From c4bc58c20842ff6dd4065064363005ac8c533c1c Mon Sep 17 00:00:00 2001 From: johann <johann> Date: Fri, 20 Jul 2007 08:54:08 +0000 Subject: [PATCH] Branch b1_6 b=11039 i=nathan i=scjody (get approval by email on rmg@) Remove obsolete kernel patches. --- .../patches/2.6-rhel4-kgdb-ga.patch | 6371 --- .../patches/8kstack-2.6-rhel4.patch | 13 - .../patches/bluesmoke-2.6-suse-lnxi.patch | 5485 --- .../patches/brk-locked-2.6-suse-lnxi.patch | 219 - .../compile-fixes-2.6.9-rhel4-22.patch | 76 - .../kernel_patches/patches/elevator-cfq.patch | 20 - .../ext3-check-jbd-errors-2.6-sles10.patch | 83 - .../ext3-extents-fixes-2.6.9-rhel4.patch | 86 - ...tents-multiblock-directio-2.6.5-suse.patch | 157 - ...ents-multiblock-directio-2.6.9-rhel4.patch | 149 - .../ext3-extents-search-2.6.9-rhel4.patch | 168 - .../patches/ext3-external-journal-2.6.9.patch | 150 - .../patches/ext3-filterdata-sles10.patch | 25 - .../patches/ext3-htree-dot-2.6.5-suse.patch | 23 - .../patches/ext3-htree-path-ops.patch | 894 - .../ext3-inode-version-2.6-sles10.patch | 426 - .../ext3-inode-version-2.6.18-vanilla.patch | 426 - .../patches/ext3-mballoc3-core.patch | 4528 -- .../patches/ext3-mballoc3-rhel4.patch | 396 - .../patches/ext3-mballoc3-sles10.patch | 377 - .../patches/ext3-mballoc3-suse.patch | 397 - .../ext3-multi-mount-protection-2.6-fc5.patch | 381 - ...ulti-mount-protection-2.6.18-vanilla.patch | 381 - .../patches/ext3-statfs-2.6.12.patch | 177 - .../patches/ext3-uninit-2.6-sles10.patch | 674 - .../patches/ext3-uninit-2.6-suse.patch | 653 - .../patches/ext3-uninit-2.6.9.patch | 664 - .../patches/ext3-wantedi-2.6.15.patch | 174 - .../grab_cache_page_nowait_gfp-2.6-suse.patch | 57 - .../patches/jbd-stats-2.6.13.4.patch | 735 - .../patches/kexec-2.6-suse-lnxi.patch | 1603 - .../patches/kjournald_affinity.patch | 52 - .../patches/link_notlast-susefix.patch | 16 - .../kernel_patches/patches/lustre_build.patch | 33 - .../patches/mtd-2.6-suse-lnxi.patch | 35414 ---------------- .../patches/nfs-cifs-intent-2.6-rhel4.patch | 123 - .../patches/perfctr-2.6-suse-lnxi.patch | 10070 ----- .../patches/uml-export-end_iomem.patch | 12 - .../patches/uml-exprt-clearuser-2.6.12.patch | 11 - .../patches/vfs_nointent-2.6-sles10.patch | 453 - 40 files changed, 72152 deletions(-) delete mode 100644 lustre/kernel_patches/patches/2.6-rhel4-kgdb-ga.patch delete mode 100644 lustre/kernel_patches/patches/8kstack-2.6-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/bluesmoke-2.6-suse-lnxi.patch delete mode 100644 lustre/kernel_patches/patches/brk-locked-2.6-suse-lnxi.patch delete mode 100644 lustre/kernel_patches/patches/compile-fixes-2.6.9-rhel4-22.patch delete mode 100644 lustre/kernel_patches/patches/elevator-cfq.patch delete mode 100644 lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6-sles10.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-fixes-2.6.9-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.5-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.9-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-extents-search-2.6.9-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-external-journal-2.6.9.patch delete mode 100644 lustre/kernel_patches/patches/ext3-filterdata-sles10.patch delete mode 100644 lustre/kernel_patches/patches/ext3-htree-dot-2.6.5-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-htree-path-ops.patch delete mode 100644 lustre/kernel_patches/patches/ext3-inode-version-2.6-sles10.patch delete mode 100644 lustre/kernel_patches/patches/ext3-inode-version-2.6.18-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/ext3-mballoc3-core.patch delete mode 100644 lustre/kernel_patches/patches/ext3-mballoc3-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-mballoc3-sles10.patch delete mode 100644 lustre/kernel_patches/patches/ext3-mballoc3-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6-fc5.patch delete mode 100644 lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6.18-vanilla.patch delete mode 100644 lustre/kernel_patches/patches/ext3-statfs-2.6.12.patch delete mode 100644 lustre/kernel_patches/patches/ext3-uninit-2.6-sles10.patch delete mode 100644 lustre/kernel_patches/patches/ext3-uninit-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch delete mode 100644 lustre/kernel_patches/patches/ext3-wantedi-2.6.15.patch delete mode 100644 lustre/kernel_patches/patches/grab_cache_page_nowait_gfp-2.6-suse.patch delete mode 100644 lustre/kernel_patches/patches/jbd-stats-2.6.13.4.patch delete mode 100644 lustre/kernel_patches/patches/kexec-2.6-suse-lnxi.patch delete mode 100644 lustre/kernel_patches/patches/kjournald_affinity.patch delete mode 100644 lustre/kernel_patches/patches/link_notlast-susefix.patch delete mode 100644 lustre/kernel_patches/patches/lustre_build.patch delete mode 100644 lustre/kernel_patches/patches/mtd-2.6-suse-lnxi.patch delete mode 100644 lustre/kernel_patches/patches/nfs-cifs-intent-2.6-rhel4.patch delete mode 100644 lustre/kernel_patches/patches/perfctr-2.6-suse-lnxi.patch delete mode 100644 lustre/kernel_patches/patches/uml-export-end_iomem.patch delete mode 100644 lustre/kernel_patches/patches/uml-exprt-clearuser-2.6.12.patch delete mode 100644 lustre/kernel_patches/patches/vfs_nointent-2.6-sles10.patch diff --git a/lustre/kernel_patches/patches/2.6-rhel4-kgdb-ga.patch b/lustre/kernel_patches/patches/2.6-rhel4-kgdb-ga.patch deleted file mode 100644 index f3067fadcf..0000000000 --- a/lustre/kernel_patches/patches/2.6-rhel4-kgdb-ga.patch +++ /dev/null @@ -1,6371 +0,0 @@ - - -This kgdb will get called and will trap almost any kernel -fault WITHOUT BEING ARMED. - -It is entered at boot time via "kgdb" in the boot string, -not "gdb". This entry occurs when the first setup on the -boot string is called, not sometime later. You will not -find a "waiting for gdb" on your console, as the console has -not yet been enabled at this time. (Note, this early stuff -is a bit fragile as the full trap table has yet to be -loaded, something I might address, sometime... So don't try -to look at memory that can not be reached, for example. -Once the full trap table is loaded this restriction goes -away.) - -If you hard code it, you can put a breakpoint() as the FIRST -LINE OF C CODE. - -It does NOT use the serial driver, but if the serial driver -is loaded, it tells it to release the port to avoid -conflict. - -The threads stuff is not configurable, does not require -redirection of schedule() calls and does back track to the -first non schedule() caller on the info threads command. If -you switch to the thread, however, it will show it in the -switch code (as it should). - -It is MUCH more aggressive and paranoid about grabbing the -other cpus on entry. It issues a "send_nmi_all_but_self()" -rather than depending on them to interrupt or hit an NMI -sometime in the distant future. If a cpu does not come to -the party, it will continue without it so all is not lost. - -It does not have anything to do with IOCTL calls, but does -do the control-C thing. - -There is a LOT of info in the patch which ends up in -.../Documentation/i386/kgdb/* - -There is a nifty little thing call kgdb_ts() (kgdb time -stamp) which is a function you can code calls to which puts -some useful stuff in a circular buffer which can be examined -with the supplied gdb macros. - -It also allows you do to do "p foobar(...)" i.e. to call a -function from gdb, just like gdb allows in program -debugging. - -In an SMP system, you can choose to "hold" any given set of -cpus. It also defaults to holding other cpus on single step -(this can be overridden). - -This said, you can imagine my consternation when I found it -"lost it" on continues on 2.5. I found and fixed this this -early pm, a hold cpu on exit goof on my part. - -Oh, and a final point, the configure options are more -extensive (the serial port is set up here, for example, (can -not wait for a command line to do this)). There is one to -do system call exit tests. This is VERY new and causes the -kernel to hit a hard "int 3" if a system call attempts to -exit with preempt count other than zero. This is a fault, -of course, but the current 2.5 is full of them so I don't -recommend turning this on. - - -DESC -kgdbL warning fix -EDESC -From: Ingo Molnar <mingo@elte.hu> - -this patch fixes a deprecated use of asm input operands. (and shuts up a -gcc 3.3 warning.) - -DESC -kgdb buffer overflow fix -EDESC -From: George Anzinger <george@mvista.com> - - -DESC -kgdbL warning fix -EDESC -From: Ingo Molnar <mingo@elte.hu> - -this patch fixes a deprecated use of asm input operands. (and shuts up a -gcc 3.3 warning.) - -DESC -kgdb: CONFIG_DEBUG_INFO fix -EDESC -From: Thomas Schlichter <schlicht@uni-mannheim.de> - -that patch sets DEBUG_INFO to y by default, even if whether DEBUG_KERNEL nor -KGDB is enabled. The attached patch changes this to enable DEBUG_INFO by -default only if KGDB is enabled. - -DESC -x86_64 fixes -EDESC -From Andi Kleen - -Fix x86_64 for kgdb. We forget why. -DESC -correct kgdb.txt Documentation link (against 2.6.1-rc1-mm2) -EDESC -From: Jesper Juhl <juhl-lkml@dif.dk> - -The help text for "config KGDB" in arch/i386/Kconfig refers to -Documentation/i386/kgdb.txt - the actual location is -Documentation/i386/kgdb/kgdb.txt - patch below to fix that. - -DESC -kgdb: fix for recent gcc -EDESC - -arch/i386/kernel/traps.c:97: error: conflicting types for 'int3' -arch/i386/kernel/traps.c:77: error: previous declaration of 'int3' was here -arch/i386/kernel/traps.c:97: error: conflicting types for 'int3' -arch/i386/kernel/traps.c:77: error: previous declaration of 'int3' was here -arch/i386/kernel/traps.c:99: error: conflicting types for 'debug' -arch/i386/kernel/traps.c:75: error: previous declaration of 'debug' was here -arch/i386/kernel/traps.c:99: error: conflicting types for 'debug' -arch/i386/kernel/traps.c:75: error: previous declaration of 'debug' was here - -DESC -kgdb warning fixes -EDESC - -arch/i386/kernel/kgdb_stub.c:1306: warning: 'time' might be used uninitialized in this function -arch/i386/kernel/kgdb_stub.c:1306: warning: 'dum' might be used uninitialized in this function -DESC -THREAD_SIZE fixes for kgdb -EDESC -From: Matt Mackall <mpm@selenic.com> - -Noticed the THREAD_SIZE clean-ups are in -mm now. Here are the missing -bits for kgdb, tested in -tiny with 4k stacks. -DESC -Fix stack overflow test for non-8k stacks -EDESC -From: Matt Mackall <mpm@selenic.com> - -This is needed to work properly with 4k and 16k stacks. -DESC -kgdb-ga.patch fix for i386 single-step into sysenter -EDESC -From: Roland McGrath <roland@redhat.com> - -Using kgdb-ga.patch from -mm, if userland single-steps (PTRACE_SINGLESTEP) -into the `sysenter' instruction, kgdb reports a bogus trap: - - Program received signal SIGTRAP, Trace/breakpoint trap. - sysenter_past_esp () at arch/i386/kernel/entry.S:249 - 1: x/i $pc 0xc0106023 <sysenter_past_esp>: sti - (gdb) - -The hackery in the "FIX_STACK" macro in entry.S changes the saved PC for a -the spurious kernel-mode debug trap when TF was set on user-mode execution -of `sysenter', so sysenter_past_esp is where it actually lies in this case. - The following patch removes the kgdb hiccup when userland -PTRACE_SINGLESTEP's into sysenter. -DESC -fix TRAP_BAD_SYSCALL_EXITS on i386 -EDESC -From: Andy Whitcroft <apw@shadowen.org> - -We are not using the right offset name, nor the right address when checking -for a non-zero preempt count. Move to TI_preempt_count(%ebp). - -Signed-off-by: Andy Whitcroft <apw@shadowen.org> -DESC -add TRAP_BAD_SYSCALL_EXITS config for i386 -EDESC -From: Andy Whitcroft <apw@shadowen.org> - -There seems to be code recently added to -bk and thereby -mm which supports -extra debug for preempt on system call exit. Oddly there doesn't seem to -be configuration options to enable them. Below is a possible patch to -allow enabling this on i386. Sadly the most obvious menu to add this to is -the Kernel Hacking menu, but that is defined in architecture specific -configuration. If this makes sense I could patch the other arches? - -Add a configuration option to allow enabling TRAP_BAD_SYSCALL_EXITS to the -Kernel Hacking menu. - -Signed-off-by: Andy Whitcroft <apw@shadowen.org> -Signed-off-by: Andrew Morton <akpm@osdl.org> ---- - - 25-akpm/Documentation/i386/kgdb/andthen | 100 + - 25-akpm/Documentation/i386/kgdb/debug-nmi.txt | 37 - 25-akpm/Documentation/i386/kgdb/gdb-globals.txt | 71 - 25-akpm/Documentation/i386/kgdb/gdbinit | 14 - 25-akpm/Documentation/i386/kgdb/gdbinit-modules | 146 + - 25-akpm/Documentation/i386/kgdb/gdbinit.hw | 117 + - 25-akpm/Documentation/i386/kgdb/kgdb.txt | 775 +++++++ - 25-akpm/Documentation/i386/kgdb/loadmodule.sh | 78 - 25-akpm/MAINTAINERS | 6 - 25-akpm/arch/i386/Kconfig | 8 - 25-akpm/arch/i386/Kconfig.debug | 2 - 25-akpm/arch/i386/Kconfig.kgdb | 175 + - 25-akpm/arch/i386/Makefile | 3 - 25-akpm/arch/i386/kernel/Makefile | 1 - 25-akpm/arch/i386/kernel/entry.S | 29 - 25-akpm/arch/i386/kernel/kgdb_stub.c | 2330 ++++++++++++++++++++++++ - 25-akpm/arch/i386/kernel/nmi.c | 25 - 25-akpm/arch/i386/kernel/smp.c | 12 - 25-akpm/arch/i386/kernel/traps.c | 77 - 25-akpm/arch/i386/lib/Makefile | 1 - 25-akpm/arch/i386/lib/kgdb_serial.c | 485 ++++ - 25-akpm/arch/i386/mm/fault.c | 6 - 25-akpm/arch/x86_64/boot/compressed/head.S | 1 - 25-akpm/arch/x86_64/boot/compressed/misc.c | 1 - 25-akpm/drivers/char/keyboard.c | 3 - 25-akpm/drivers/char/sysrq.c | 23 - 25-akpm/drivers/serial/8250.c | 40 - 25-akpm/drivers/serial/serial_core.c | 5 - 25-akpm/include/asm-i386/bugs.h | 21 - 25-akpm/include/asm-i386/kgdb.h | 59 - 25-akpm/include/asm-i386/kgdb_local.h | 102 + - 25-akpm/include/linux/config.h | 3 - 25-akpm/include/linux/dwarf2-lang.h | 132 + - 25-akpm/include/linux/dwarf2.h | 738 +++++++ - 25-akpm/include/linux/serial_core.h | 4 - 25-akpm/include/linux/spinlock.h | 12 - 25-akpm/kernel/pid.c | 6 - 25-akpm/kernel/sched.c | 7 - 38 files changed, 5645 insertions(+), 10 deletions(-) - -diff -puN arch/i386/Kconfig~kgdb-ga arch/i386/Kconfig ---- 25/arch/i386/Kconfig~kgdb-ga 2004-10-21 14:54:15.256604136 -0700 -+++ 25-akpm/arch/i386/Kconfig 2004-10-21 14:54:15.295598208 -0700 -@@ -1184,6 +1184,14 @@ menu "Executable file formats" - - source "fs/Kconfig.binfmt" - -+config TRAP_BAD_SYSCALL_EXITS -+ bool "Debug bad system call exits" -+ depends on KGDB -+ help -+ If you say Y here the kernel will check for system calls which -+ return without clearing preempt. -+ default n -+ - endmenu - - source "drivers/Kconfig" -diff -puN arch/i386/kernel/entry.S~kgdb-ga arch/i386/kernel/entry.S ---- 25/arch/i386/kernel/entry.S~kgdb-ga 2004-10-21 14:54:15.257603984 -0700 -+++ 25-akpm/arch/i386/kernel/entry.S 2004-10-21 14:54:15.296598056 -0700 -@@ -48,6 +48,18 @@ - #include <asm/smp.h> - #include <asm/page.h> - #include "irq_vectors.h" -+ /* We do not recover from a stack overflow, but at least -+ * we know it happened and should be able to track it down. -+ */ -+#ifdef CONFIG_STACK_OVERFLOW_TEST -+#define STACK_OVERFLOW_TEST \ -+ testl $(THREAD_SIZE - 512),%esp; \ -+ jnz 10f; \ -+ call stack_overflow; \ -+10: -+#else -+#define STACK_OVERFLOW_TEST -+#endif - - #define nr_syscalls ((syscall_table_size)/4) - -@@ -94,7 +106,8 @@ VM_MASK = 0x00020000 - pushl %ebx; \ - movl $(__USER_DS), %edx; \ - movl %edx, %ds; \ -- movl %edx, %es; -+ movl %edx, %es; \ -+ STACK_OVERFLOW_TEST - - #define RESTORE_INT_REGS \ - popl %ebx; \ -@@ -198,6 +211,7 @@ need_resched: - # sysenter call handler stub - ENTRY(sysenter_entry) - movl TSS_sysenter_esp0(%esp),%esp -+ .globl sysenter_past_esp - sysenter_past_esp: - sti - pushl $(__USER_DS) -@@ -260,6 +274,19 @@ syscall_exit: - testw $_TIF_ALLWORK_MASK, %cx # current->work - jne syscall_exit_work - restore_all: -+#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS -+ movl EFLAGS(%esp), %eax # mix EFLAGS and CS -+ movb CS(%esp), %al -+ testl $(VM_MASK | 3), %eax -+ jz resume_kernelX # returning to kernel or vm86-space -+ -+ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? -+ jz resume_kernelX -+ -+ int $3 -+ -+resume_kernelX: -+#endif - RESTORE_ALL - - # perform work that needs to be done immediately before resumption -diff -puN /dev/null arch/i386/kernel/kgdb_stub.c ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/arch/i386/kernel/kgdb_stub.c 2004-10-21 14:54:15.307596384 -0700 -@@ -0,0 +1,2330 @@ -+/* -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2, or (at your option) any -+ * later version. -+ * -+ * This program is distributed in the hope that it will be useful, but -+ * WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ */ -+ -+/* -+ * Copyright (c) 2000 VERITAS Software Corporation. -+ * -+ */ -+/**************************************************************************** -+ * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ -+ * -+ * Module name: remcom.c $ -+ * Revision: 1.34 $ -+ * Date: 91/03/09 12:29:49 $ -+ * Contributor: Lake Stevens Instrument Division$ -+ * -+ * Description: low level support for gdb debugger. $ -+ * -+ * Considerations: only works on target hardware $ -+ * -+ * Written by: Glenn Engel $ -+ * Updated by: David Grothe <dave@gcom.com> -+ * ModuleState: Experimental $ -+ * -+ * NOTES: See Below $ -+ * -+ * Modified for 386 by Jim Kingdon, Cygnus Support. -+ * Compatibility with 2.1.xx kernel by David Grothe <dave@gcom.com> -+ * -+ * Changes to allow auto initilization. All that is needed is that it -+ * be linked with the kernel and a break point (int 3) be executed. -+ * The header file <asm/kgdb.h> defines BREAKPOINT to allow one to do -+ * this. It should also be possible, once the interrupt system is up, to -+ * call putDebugChar("+"). Once this is done, the remote debugger should -+ * get our attention by sending a ^C in a packet. George Anzinger -+ * <george@mvista.com> -+ * Integrated into 2.2.5 kernel by Tigran Aivazian <tigran@sco.com> -+ * Added thread support, support for multiple processors, -+ * support for ia-32(x86) hardware debugging. -+ * Amit S. Kale ( akale@veritas.com ) -+ * -+ * -+ * To enable debugger support, two things need to happen. One, a -+ * call to set_debug_traps() is necessary in order to allow any breakpoints -+ * or error conditions to be properly intercepted and reported to gdb. -+ * Two, a breakpoint needs to be generated to begin communication. This -+ * is most easily accomplished by a call to breakpoint(). Breakpoint() -+ * simulates a breakpoint by executing an int 3. -+ * -+ ************* -+ * -+ * The following gdb commands are supported: -+ * -+ * command function Return value -+ * -+ * g return the value of the CPU registers hex data or ENN -+ * G set the value of the CPU registers OK or ENN -+ * -+ * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN -+ * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN -+ * -+ * c Resume at current address SNN ( signal NN) -+ * cAA..AA Continue at address AA..AA SNN -+ * -+ * s Step one instruction SNN -+ * sAA..AA Step one instruction from AA..AA SNN -+ * -+ * k kill -+ * -+ * ? What was the last sigval ? SNN (signal NN) -+ * -+ * All commands and responses are sent with a packet which includes a -+ * checksum. A packet consists of -+ * -+ * $<packet info>#<checksum>. -+ * -+ * where -+ * <packet info> :: <characters representing the command or response> -+ * <checksum> :: < two hex digits computed as modulo 256 sum of <packetinfo>> -+ * -+ * When a packet is received, it is first acknowledged with either '+' or '-'. -+ * '+' indicates a successful transfer. '-' indicates a failed transfer. -+ * -+ * Example: -+ * -+ * Host: Reply: -+ * $m0,10#2a +$00010203040506070809101112131415#42 -+ * -+ ****************************************************************************/ -+#define KGDB_VERSION "<20030915.1651.33>" -+#include <linux/config.h> -+#include <linux/types.h> -+#include <asm/string.h> /* for strcpy */ -+#include <linux/kernel.h> -+#include <linux/sched.h> -+#include <asm/vm86.h> -+#include <asm/system.h> -+#include <asm/ptrace.h> /* for linux pt_regs struct */ -+#include <asm/kgdb_local.h> -+#include <linux/list.h> -+#include <asm/atomic.h> -+#include <asm/processor.h> -+#include <linux/irq.h> -+#include <asm/desc.h> -+ -+/************************************************************************ -+ * -+ * external low-level support routines -+ */ -+typedef void (*Function) (void); /* pointer to a function */ -+ -+/* Thread reference */ -+typedef unsigned char threadref[8]; -+ -+extern void putDebugChar(int); /* write a single character */ -+extern int getDebugChar(void); /* read and return a single char */ -+ -+/************************************************************************/ -+/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ -+/* at least NUMREGBYTES*2 are needed for register packets */ -+/* Longer buffer is needed to list all threads */ -+#define BUFMAX 400 -+ -+char *kgdb_version = KGDB_VERSION; -+ -+/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ -+int debug_regs = 0; /* set to non-zero to print registers */ -+ -+/* filled in by an external module */ -+char *gdb_module_offsets; -+ -+static const char hexchars[] = "0123456789abcdef"; -+ -+/* Number of bytes of registers. */ -+#define NUMREGBYTES 64 -+/* -+ * Note that this register image is in a different order than -+ * the register image that Linux produces at interrupt time. -+ * -+ * Linux's register image is defined by struct pt_regs in ptrace.h. -+ * Just why GDB uses a different order is a historical mystery. -+ */ -+enum regnames { _EAX, /* 0 */ -+ _ECX, /* 1 */ -+ _EDX, /* 2 */ -+ _EBX, /* 3 */ -+ _ESP, /* 4 */ -+ _EBP, /* 5 */ -+ _ESI, /* 6 */ -+ _EDI, /* 7 */ -+ _PC /* 8 also known as eip */ , -+ _PS /* 9 also known as eflags */ , -+ _CS, /* 10 */ -+ _SS, /* 11 */ -+ _DS, /* 12 */ -+ _ES, /* 13 */ -+ _FS, /* 14 */ -+ _GS /* 15 */ -+}; -+ -+/*************************** ASSEMBLY CODE MACROS *************************/ -+/* -+ * Put the error code here just in case the user cares. -+ * Likewise, the vector number here (since GDB only gets the signal -+ * number through the usual means, and that's not very specific). -+ * The called_from is the return address so he can tell how we entered kgdb. -+ * This will allow him to seperate out the various possible entries. -+ */ -+#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ -+ -+#define PID_MAX PID_MAX_DEFAULT -+ -+#ifdef CONFIG_SMP -+void smp_send_nmi_allbutself(void); -+#define IF_SMP(x) x -+#undef MAX_NO_CPUS -+#ifndef CONFIG_NO_KGDB_CPUS -+#define CONFIG_NO_KGDB_CPUS 2 -+#endif -+#if CONFIG_NO_KGDB_CPUS > NR_CPUS -+#define MAX_NO_CPUS NR_CPUS -+#else -+#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS -+#endif -+#define hold_init hold_on_sstep: 1, -+#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) -+#define NUM_CPUS num_online_cpus() -+#else -+#define IF_SMP(x) -+#define hold_init -+#undef MAX_NO_CPUS -+#define MAX_NO_CPUS 1 -+#define NUM_CPUS 1 -+#endif -+#define NOCPU (struct task_struct *)0xbad1fbad -+/* *INDENT-OFF* */ -+struct kgdb_info { -+ int used_malloc; -+ void *called_from; -+ long long entry_tsc; -+ int errcode; -+ int vector; -+ int print_debug_info; -+#ifdef CONFIG_SMP -+ int hold_on_sstep; -+ struct { -+ volatile struct task_struct *task; -+ int pid; -+ int hold; -+ struct pt_regs *regs; -+ } cpus_waiting[MAX_NO_CPUS]; -+#endif -+} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; -+ -+/* *INDENT-ON* */ -+ -+#define used_m kgdb_info.used_malloc -+/* -+ * This is little area we set aside to contain the stack we -+ * need to build to allow gdb to call functions. We use one -+ * per cpu to avoid locking issues. We will do all this work -+ * with interrupts off so that should take care of the protection -+ * issues. -+ */ -+#define LOOKASIDE_SIZE 200 /* should be more than enough */ -+#define MALLOC_MAX 200 /* Max malloc size */ -+struct { -+ unsigned int esp; -+ int array[LOOKASIDE_SIZE]; -+} fn_call_lookaside[MAX_NO_CPUS]; -+ -+static int trap_cpu; -+static unsigned int OLD_esp; -+ -+#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] -+#define IF_BIT 0x200 -+#define TF_BIT 0x100 -+ -+#define MALLOC_ROUND 8-1 -+ -+static char malloc_array[MALLOC_MAX]; -+IF_SMP(static void to_gdb(const char *mess)); -+void * -+malloc(int size) -+{ -+ -+ if (size <= (MALLOC_MAX - used_m)) { -+ int old_used = used_m; -+ used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); -+ return &malloc_array[old_used]; -+ } else { -+ return NULL; -+ } -+} -+ -+/* -+ * Gdb calls functions by pushing agruments, including a return address -+ * on the stack and the adjusting EIP to point to the function. The -+ * whole assumption in GDB is that we are on a different stack than the -+ * one the "user" i.e. code that hit the break point, is on. This, of -+ * course is not true in the kernel. Thus various dodges are needed to -+ * do the call without directly messing with EIP (which we can not change -+ * as it is just a location and not a register. To adjust it would then -+ * require that we move every thing below EIP up or down as needed. This -+ * will not work as we may well have stack relative pointer on the stack -+ * (such as the pointer to regs, for example). -+ -+ * So here is what we do: -+ * We detect gdb attempting to store into the stack area and instead, store -+ * into the fn_call_lookaside.array at the same relative location as if it -+ * were the area ESP pointed at. We also trap ESP modifications -+ * and uses these to adjust fn_call_lookaside.esp. On entry -+ * fn_call_lookaside.esp will be set to point at the last entry in -+ * fn_call_lookaside.array. This allows us to check if it has changed, and -+ * if so, on exit, we add the registers we will use to do the move and a -+ * trap/ interrupt return exit sequence. We then adjust the eflags in the -+ * regs array (remember we now have a copy in the fn_call_lookaside.array) to -+ * kill the interrupt bit, AND we change EIP to point at our set up stub. -+ * As part of the register set up we preset the registers to point at the -+ * begining and end of the fn_call_lookaside.array, so all the stub needs to -+ * do is move words from the array to the stack until ESP= the desired value -+ * then do the rti. This will then transfer to the desired function with -+ * all the correct registers. Nifty huh? -+ */ -+extern asmlinkage void fn_call_stub(void); -+extern asmlinkage void fn_rtn_stub(void); -+/* *INDENT-OFF* */ -+__asm__("fn_rtn_stub:\n\t" -+ "movl %eax,%esp\n\t" -+ "fn_call_stub:\n\t" -+ "1:\n\t" -+ "addl $-4,%ebx\n\t" -+ "movl (%ebx), %eax\n\t" -+ "pushl %eax\n\t" -+ "cmpl %esp,%ecx\n\t" -+ "jne 1b\n\t" -+ "popl %eax\n\t" -+ "popl %ebx\n\t" -+ "popl %ecx\n\t" -+ "iret \n\t"); -+/* *INDENT-ON* */ -+#define gdb_i386vector kgdb_info.vector -+#define gdb_i386errcode kgdb_info.errcode -+#define waiting_cpus kgdb_info.cpus_waiting -+#define remote_debug kgdb_info.print_debug_info -+#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold -+/* gdb locks */ -+ -+#ifdef CONFIG_SMP -+static int in_kgdb_called; -+static spinlock_t waitlocks[MAX_NO_CPUS] = -+ {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; -+/* -+ * The following array has the thread pointer of each of the "other" -+ * cpus. We make it global so it can be seen by gdb. -+ */ -+volatile int in_kgdb_entry_log[MAX_NO_CPUS]; -+volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; -+/* -+static spinlock_t continuelocks[MAX_NO_CPUS]; -+*/ -+spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; -+/* waiters on our spinlock plus us */ -+static atomic_t spinlock_waiters = ATOMIC_INIT(1); -+static int spinlock_count = 0; -+static int spinlock_cpu = 0; -+/* -+ * Note we use nested spin locks to account for the case where a break -+ * point is encountered when calling a function by user direction from -+ * kgdb. Also there is the memory exception recursion to account for. -+ * Well, yes, but this lets other cpus thru too. Lets add a -+ * cpu id to the lock. -+ */ -+#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ -+ spinlock_cpu != smp_processor_id()){\ -+ atomic_inc(&spinlock_waiters); \ -+ while (! spin_trylock(x)) {\ -+ in_kgdb(®s);\ -+ }\ -+ atomic_dec(&spinlock_waiters); \ -+ spinlock_count = 1; \ -+ spinlock_cpu = smp_processor_id(); \ -+ }else{ \ -+ spinlock_count++; \ -+ } -+#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) -+#else -+unsigned kgdb_spinlock = 0; -+#define KGDB_SPIN_LOCK(x) --*x -+#define KGDB_SPIN_UNLOCK(x) ++*x -+#endif -+ -+int -+hex(char ch) -+{ -+ if ((ch >= 'a') && (ch <= 'f')) -+ return (ch - 'a' + 10); -+ if ((ch >= '0') && (ch <= '9')) -+ return (ch - '0'); -+ if ((ch >= 'A') && (ch <= 'F')) -+ return (ch - 'A' + 10); -+ return (-1); -+} -+ -+/* scan for the sequence $<data>#<checksum> */ -+void -+getpacket(char *buffer) -+{ -+ unsigned char checksum; -+ unsigned char xmitcsum; -+ int i; -+ int count; -+ char ch; -+ -+ do { -+ /* wait around for the start character, ignore all other characters */ -+ while ((ch = (getDebugChar() & 0x7f)) != '$') ; -+ checksum = 0; -+ xmitcsum = -1; -+ -+ count = 0; -+ -+ /* now, read until a # or end of buffer is found */ -+ while (count < BUFMAX) { -+ ch = getDebugChar() & 0x7f; -+ if (ch == '#') -+ break; -+ checksum = checksum + ch; -+ buffer[count] = ch; -+ count = count + 1; -+ } -+ buffer[count] = 0; -+ -+ if (ch == '#') { -+ xmitcsum = hex(getDebugChar() & 0x7f) << 4; -+ xmitcsum += hex(getDebugChar() & 0x7f); -+ if ((remote_debug) && (checksum != xmitcsum)) { -+ printk -+ ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", -+ checksum, xmitcsum, buffer); -+ } -+ -+ if (checksum != xmitcsum) -+ putDebugChar('-'); /* failed checksum */ -+ else { -+ putDebugChar('+'); /* successful transfer */ -+ /* if a sequence char is present, reply the sequence ID */ -+ if (buffer[2] == ':') { -+ putDebugChar(buffer[0]); -+ putDebugChar(buffer[1]); -+ /* remove sequence chars from buffer */ -+ count = strlen(buffer); -+ for (i = 3; i <= count; i++) -+ buffer[i - 3] = buffer[i]; -+ } -+ } -+ } -+ } while (checksum != xmitcsum); -+ -+ if (remote_debug) -+ printk("R:%s\n", buffer); -+} -+ -+/* send the packet in buffer. */ -+ -+void -+putpacket(char *buffer) -+{ -+ unsigned char checksum; -+ int count; -+ char ch; -+ -+ /* $<packet info>#<checksum>. */ -+ do { -+ if (remote_debug) -+ printk("T:%s\n", buffer); -+ putDebugChar('$'); -+ checksum = 0; -+ count = 0; -+ -+ while ((ch = buffer[count])) { -+ putDebugChar(ch); -+ checksum += ch; -+ count += 1; -+ } -+ -+ putDebugChar('#'); -+ putDebugChar(hexchars[checksum >> 4]); -+ putDebugChar(hexchars[checksum % 16]); -+ -+ } while ((getDebugChar() & 0x7f) != '+'); -+ -+} -+ -+static char remcomInBuffer[BUFMAX]; -+static char remcomOutBuffer[BUFMAX]; -+static short error; -+ -+void -+debug_error(char *format, char *parm) -+{ -+ if (remote_debug) -+ printk(format, parm); -+} -+ -+static void -+print_regs(struct pt_regs *regs) -+{ -+ printk("EAX=%08lx ", regs->eax); -+ printk("EBX=%08lx ", regs->ebx); -+ printk("ECX=%08lx ", regs->ecx); -+ printk("EDX=%08lx ", regs->edx); -+ printk("\n"); -+ printk("ESI=%08lx ", regs->esi); -+ printk("EDI=%08lx ", regs->edi); -+ printk("EBP=%08lx ", regs->ebp); -+ printk("ESP=%08lx ", (long) ®s->esp); -+ printk("\n"); -+ printk(" DS=%08x ", regs->xds); -+ printk(" ES=%08x ", regs->xes); -+ printk(" SS=%08x ", __KERNEL_DS); -+ printk(" FL=%08lx ", regs->eflags); -+ printk("\n"); -+ printk(" CS=%08x ", regs->xcs); -+ printk(" IP=%08lx ", regs->eip); -+#if 0 -+ printk(" FS=%08x ", regs->fs); -+ printk(" GS=%08x ", regs->gs); -+#endif -+ printk("\n"); -+ -+} /* print_regs */ -+ -+#define NEW_esp fn_call_lookaside[trap_cpu].esp -+ -+static void -+regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) -+{ -+ gdb_regs[_EAX] = regs->eax; -+ gdb_regs[_EBX] = regs->ebx; -+ gdb_regs[_ECX] = regs->ecx; -+ gdb_regs[_EDX] = regs->edx; -+ gdb_regs[_ESI] = regs->esi; -+ gdb_regs[_EDI] = regs->edi; -+ gdb_regs[_EBP] = regs->ebp; -+ gdb_regs[_DS] = regs->xds; -+ gdb_regs[_ES] = regs->xes; -+ gdb_regs[_PS] = regs->eflags; -+ gdb_regs[_CS] = regs->xcs; -+ gdb_regs[_PC] = regs->eip; -+ /* Note, as we are a debugging the kernel, we will always -+ * trap in kernel code, this means no priviledge change, -+ * and so the pt_regs structure is not completely valid. In a non -+ * privilege change trap, only EFLAGS, CS and EIP are put on the stack, -+ * SS and ESP are not stacked, this means that the last 2 elements of -+ * pt_regs is not valid (they would normally refer to the user stack) -+ * also, using regs+1 is no good because you end up will a value that is -+ * 2 longs (8) too high. This used to cause stepping over functions -+ * to fail, so my fix is to use the address of regs->esp, which -+ * should point at the end of the stack frame. Note I have ignored -+ * completely exceptions that cause an error code to be stacked, such -+ * as double fault. Stuart Hughes, Zentropix. -+ * original code: gdb_regs[_ESP] = (int) (regs + 1) ; -+ -+ * this is now done on entry and moved to OLD_esp (as well as NEW_esp). -+ */ -+ gdb_regs[_ESP] = NEW_esp; -+ gdb_regs[_SS] = __KERNEL_DS; -+ gdb_regs[_FS] = 0xFFFF; -+ gdb_regs[_GS] = 0xFFFF; -+} /* regs_to_gdb_regs */ -+ -+static void -+gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) -+{ -+ regs->eax = gdb_regs[_EAX]; -+ regs->ebx = gdb_regs[_EBX]; -+ regs->ecx = gdb_regs[_ECX]; -+ regs->edx = gdb_regs[_EDX]; -+ regs->esi = gdb_regs[_ESI]; -+ regs->edi = gdb_regs[_EDI]; -+ regs->ebp = gdb_regs[_EBP]; -+ regs->xds = gdb_regs[_DS]; -+ regs->xes = gdb_regs[_ES]; -+ regs->eflags = gdb_regs[_PS]; -+ regs->xcs = gdb_regs[_CS]; -+ regs->eip = gdb_regs[_PC]; -+ NEW_esp = gdb_regs[_ESP]; /* keep the value */ -+#if 0 /* can't change these */ -+ regs->esp = gdb_regs[_ESP]; -+ regs->xss = gdb_regs[_SS]; -+ regs->fs = gdb_regs[_FS]; -+ regs->gs = gdb_regs[_GS]; -+#endif -+ -+} /* gdb_regs_to_regs */ -+ -+int thread_list = 0; -+ -+void -+get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) -+{ -+ unsigned long stack_page; -+ int count = 0; -+ IF_SMP(int i); -+ if (!p || p == current) { -+ regs_to_gdb_regs(gdb_regs, regs); -+ return; -+ } -+#ifdef CONFIG_SMP -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ if (p == kgdb_info.cpus_waiting[i].task) { -+ regs_to_gdb_regs(gdb_regs, -+ kgdb_info.cpus_waiting[i].regs); -+ gdb_regs[_ESP] = -+ (int) &kgdb_info.cpus_waiting[i].regs->esp; -+ -+ return; -+ } -+ } -+#endif -+ memset(gdb_regs, 0, NUMREGBYTES); -+ gdb_regs[_ESP] = p->thread.esp; -+ gdb_regs[_PC] = p->thread.eip; -+ gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; -+ gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); -+ gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); -+ -+/* -+ * This code is to give a more informative notion of where a process -+ * is waiting. It is used only when the user asks for a thread info -+ * list. If he then switches to the thread, s/he will find the task -+ * is in schedule, but a back trace should show the same info we come -+ * up with. This code was shamelessly purloined from process.c. It was -+ * then enhanced to provide more registers than simply the program -+ * counter. -+ */ -+ -+ if (!thread_list) { -+ return; -+ } -+ -+ if (p->state == TASK_RUNNING) -+ return; -+ stack_page = (unsigned long) p->thread_info; -+ if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > -+ THREAD_SIZE - sizeof(long) + stack_page) -+ return; -+ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ -+ do { -+ if (gdb_regs[_EBP] < stack_page || -+ gdb_regs[_EBP] > THREAD_SIZE - 2*sizeof(long) + stack_page) -+ return; -+ gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); -+ gdb_regs[_ESP] = gdb_regs[_EBP] + 8; -+ gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; -+ if (!in_sched_functions(gdb_regs[_PC])) -+ return; -+ } while (count++ < 16); -+ return; -+} -+ -+/* Indicate to caller of mem2hex or hex2mem that there has been an -+ error. */ -+static volatile int mem_err = 0; -+static volatile int mem_err_expected = 0; -+static volatile int mem_err_cnt = 0; -+static int garbage_loc = -1; -+ -+int -+get_char(char *addr) -+{ -+ return *addr; -+} -+ -+void -+set_char(char *addr, int val, int may_fault) -+{ -+ /* -+ * This code traps references to the area mapped to the kernel -+ * stack as given by the regs and, instead, stores to the -+ * fn_call_lookaside[cpu].array -+ */ -+ if (may_fault && -+ (unsigned int) addr < OLD_esp && -+ ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { -+ addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); -+ } -+ *addr = val; -+} -+ -+/* convert the memory pointed to by mem into hex, placing result in buf */ -+/* return a pointer to the last char put in buf (null) */ -+/* If MAY_FAULT is non-zero, then we should set mem_err in response to -+ a fault; if zero treat a fault like any other fault in the stub. */ -+char * -+mem2hex(char *mem, char *buf, int count, int may_fault) -+{ -+ int i; -+ unsigned char ch; -+ -+ if (may_fault) { -+ mem_err_expected = 1; -+ mem_err = 0; -+ } -+ for (i = 0; i < count; i++) { -+ /* printk("%lx = ", mem) ; */ -+ -+ ch = get_char(mem++); -+ -+ /* printk("%02x\n", ch & 0xFF) ; */ -+ if (may_fault && mem_err) { -+ if (remote_debug) -+ printk("Mem fault fetching from addr %lx\n", -+ (long) (mem - 1)); -+ *buf = 0; /* truncate buffer */ -+ return (buf); -+ } -+ *buf++ = hexchars[ch >> 4]; -+ *buf++ = hexchars[ch % 16]; -+ } -+ *buf = 0; -+ if (may_fault) -+ mem_err_expected = 0; -+ return (buf); -+} -+ -+/* convert the hex array pointed to by buf into binary to be placed in mem */ -+/* return a pointer to the character AFTER the last byte written */ -+/* NOTE: We use the may fault flag to also indicate if the write is to -+ * the registers (0) or "other" memory (!=0) -+ */ -+char * -+hex2mem(char *buf, char *mem, int count, int may_fault) -+{ -+ int i; -+ unsigned char ch; -+ -+ if (may_fault) { -+ mem_err_expected = 1; -+ mem_err = 0; -+ } -+ for (i = 0; i < count; i++) { -+ ch = hex(*buf++) << 4; -+ ch = ch + hex(*buf++); -+ set_char(mem++, ch, may_fault); -+ -+ if (may_fault && mem_err) { -+ if (remote_debug) -+ printk("Mem fault storing to addr %lx\n", -+ (long) (mem - 1)); -+ return (mem); -+ } -+ } -+ if (may_fault) -+ mem_err_expected = 0; -+ return (mem); -+} -+ -+/**********************************************/ -+/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ -+/* RETURN NUMBER OF CHARS PROCESSED */ -+/**********************************************/ -+int -+hexToInt(char **ptr, int *intValue) -+{ -+ int numChars = 0; -+ int hexValue; -+ -+ *intValue = 0; -+ -+ while (**ptr) { -+ hexValue = hex(**ptr); -+ if (hexValue >= 0) { -+ *intValue = (*intValue << 4) | hexValue; -+ numChars++; -+ } else -+ break; -+ -+ (*ptr)++; -+ } -+ -+ return (numChars); -+} -+ -+#define stubhex(h) hex(h) -+#ifdef old_thread_list -+ -+static int -+stub_unpack_int(char *buff, int fieldlength) -+{ -+ int nibble; -+ int retval = 0; -+ -+ while (fieldlength) { -+ nibble = stubhex(*buff++); -+ retval |= nibble; -+ fieldlength--; -+ if (fieldlength) -+ retval = retval << 4; -+ } -+ return retval; -+} -+#endif -+static char * -+pack_hex_byte(char *pkt, int byte) -+{ -+ *pkt++ = hexchars[(byte >> 4) & 0xf]; -+ *pkt++ = hexchars[(byte & 0xf)]; -+ return pkt; -+} -+ -+#define BUF_THREAD_ID_SIZE 16 -+ -+static char * -+pack_threadid(char *pkt, threadref * id) -+{ -+ char *limit; -+ unsigned char *altid; -+ -+ altid = (unsigned char *) id; -+ limit = pkt + BUF_THREAD_ID_SIZE; -+ while (pkt < limit) -+ pkt = pack_hex_byte(pkt, *altid++); -+ return pkt; -+} -+ -+#ifdef old_thread_list -+static char * -+unpack_byte(char *buf, int *value) -+{ -+ *value = stub_unpack_int(buf, 2); -+ return buf + 2; -+} -+ -+static char * -+unpack_threadid(char *inbuf, threadref * id) -+{ -+ char *altref; -+ char *limit = inbuf + BUF_THREAD_ID_SIZE; -+ int x, y; -+ -+ altref = (char *) id; -+ -+ while (inbuf < limit) { -+ x = stubhex(*inbuf++); -+ y = stubhex(*inbuf++); -+ *altref++ = (x << 4) | y; -+ } -+ return inbuf; -+} -+#endif -+void -+int_to_threadref(threadref * id, int value) -+{ -+ unsigned char *scan; -+ -+ scan = (unsigned char *) id; -+ { -+ int i = 4; -+ while (i--) -+ *scan++ = 0; -+ } -+ *scan++ = (value >> 24) & 0xff; -+ *scan++ = (value >> 16) & 0xff; -+ *scan++ = (value >> 8) & 0xff; -+ *scan++ = (value & 0xff); -+} -+int -+int_to_hex_v(unsigned char * id, int value) -+{ -+ unsigned char *start = id; -+ int shift; -+ int ch; -+ -+ for (shift = 28; shift >= 0; shift -= 4) { -+ if ((ch = (value >> shift) & 0xf) || (id != start)) { -+ *id = hexchars[ch]; -+ id++; -+ } -+ } -+ if (id == start) -+ *id++ = '0'; -+ return id - start; -+} -+#ifdef old_thread_list -+ -+static int -+threadref_to_int(threadref * ref) -+{ -+ int i, value = 0; -+ unsigned char *scan; -+ -+ scan = (char *) ref; -+ scan += 4; -+ i = 4; -+ while (i-- > 0) -+ value = (value << 8) | ((*scan++) & 0xff); -+ return value; -+} -+#endif -+static int -+cmp_str(char *s1, char *s2, int count) -+{ -+ while (count--) { -+ if (*s1++ != *s2++) -+ return 0; -+ } -+ return 1; -+} -+ -+#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ -+extern struct task_struct *kgdb_get_idle(int cpu); -+#define idle_task(cpu) kgdb_get_idle(cpu) -+#else -+#define idle_task(cpu) init_tasks[cpu] -+#endif -+ -+extern int kgdb_pid_init_done; -+ -+struct task_struct * -+getthread(int pid) -+{ -+ struct task_struct *thread; -+ if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { -+ -+ return idle_task(pid - PID_MAX); -+ } else { -+ /* -+ * find_task_by_pid is relatively safe all the time -+ * Other pid functions require lock downs which imply -+ * that we may be interrupting them (as we get here -+ * in the middle of most any lock down). -+ * Still we don't want to call until the table exists! -+ */ -+ if (kgdb_pid_init_done){ -+ thread = find_task_by_pid(pid); -+ if (thread) { -+ return thread; -+ } -+ } -+ } -+ return NULL; -+} -+/* *INDENT-OFF* */ -+struct hw_breakpoint { -+ unsigned enabled; -+ unsigned type; -+ unsigned len; -+ unsigned addr; -+} breakinfo[4] = { {enabled:0}, -+ {enabled:0}, -+ {enabled:0}, -+ {enabled:0}}; -+/* *INDENT-ON* */ -+unsigned hw_breakpoint_status; -+void -+correct_hw_break(void) -+{ -+ int breakno; -+ int correctit; -+ int breakbit; -+ unsigned dr7; -+ -+ asm volatile ("movl %%db7, %0\n":"=r" (dr7) -+ :); -+ /* *INDENT-OFF* */ -+ do { -+ unsigned addr0, addr1, addr2, addr3; -+ asm volatile ("movl %%db0, %0\n" -+ "movl %%db1, %1\n" -+ "movl %%db2, %2\n" -+ "movl %%db3, %3\n" -+ :"=r" (addr0), "=r"(addr1), -+ "=r"(addr2), "=r"(addr3) -+ :); -+ } while (0); -+ /* *INDENT-ON* */ -+ correctit = 0; -+ for (breakno = 0; breakno < 3; breakno++) { -+ breakbit = 2 << (breakno << 1); -+ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { -+ correctit = 1; -+ dr7 |= breakbit; -+ dr7 &= ~(0xf0000 << (breakno << 2)); -+ dr7 |= (((breakinfo[breakno].len << 2) | -+ breakinfo[breakno].type) << 16) << -+ (breakno << 2); -+ switch (breakno) { -+ case 0: -+ asm volatile ("movl %0, %%dr0\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ -+ case 1: -+ asm volatile ("movl %0, %%dr1\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ -+ case 2: -+ asm volatile ("movl %0, %%dr2\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ -+ case 3: -+ asm volatile ("movl %0, %%dr3\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ } -+ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { -+ correctit = 1; -+ dr7 &= ~breakbit; -+ dr7 &= ~(0xf0000 << (breakno << 2)); -+ } -+ } -+ if (correctit) { -+ asm volatile ("movl %0, %%db7\n"::"r" (dr7)); -+ } -+} -+ -+int -+remove_hw_break(unsigned breakno) -+{ -+ if (!breakinfo[breakno].enabled) { -+ return -1; -+ } -+ breakinfo[breakno].enabled = 0; -+ return 0; -+} -+ -+int -+set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) -+{ -+ if (breakinfo[breakno].enabled) { -+ return -1; -+ } -+ breakinfo[breakno].enabled = 1; -+ breakinfo[breakno].type = type; -+ breakinfo[breakno].len = len; -+ breakinfo[breakno].addr = addr; -+ return 0; -+} -+ -+#ifdef CONFIG_SMP -+static int in_kgdb_console = 0; -+ -+int -+in_kgdb(struct pt_regs *regs) -+{ -+ unsigned flags; -+ int cpu = smp_processor_id(); -+ in_kgdb_called = 1; -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ -+ in_kgdb_console) { /* or we are doing slow i/o */ -+ return 1; -+ } -+ return 0; -+ } -+ -+ /* As I see it the only reason not to let all cpus spin on -+ * the same spin_lock is to allow selected ones to proceed. -+ * This would be a good thing, so we leave it this way. -+ * Maybe someday.... Done ! -+ -+ * in_kgdb() is called from an NMI so we don't pretend -+ * to have any resources, like printk() for example. -+ */ -+ -+ kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ -+ /* -+ * log arival of this cpu -+ * The NMI keeps on ticking. Protect against recurring more -+ * than once, and ignor the cpu that has the kgdb lock -+ */ -+ in_kgdb_entry_log[cpu]++; -+ in_kgdb_here_log[cpu] = regs; -+ if (cpu == spinlock_cpu || waiting_cpus[cpu].task) { -+ goto exit_in_kgdb; -+ } -+ /* -+ * For protection of the initilization of the spin locks by kgdb -+ * it locks the kgdb spinlock before it gets the wait locks set -+ * up. We wait here for the wait lock to be taken. If the -+ * kgdb lock goes away first?? Well, it could be a slow exit -+ * sequence where the wait lock is removed prior to the kgdb lock -+ * so if kgdb gets unlocked, we just exit. -+ */ -+ while (spin_is_locked(&kgdb_spinlock) && -+ !spin_is_locked(waitlocks + cpu)) ; -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ goto exit_in_kgdb; -+ } -+ waiting_cpus[cpu].task = current; -+ waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); -+ waiting_cpus[cpu].regs = regs; -+ -+ spin_unlock_wait(waitlocks + cpu); -+ /* -+ * log departure of this cpu -+ */ -+ waiting_cpus[cpu].task = 0; -+ waiting_cpus[cpu].pid = 0; -+ waiting_cpus[cpu].regs = 0; -+ correct_hw_break(); -+ exit_in_kgdb: -+ in_kgdb_here_log[cpu] = 0; -+ kgdb_local_irq_restore(flags); -+ return 1; -+ /* -+ spin_unlock(continuelocks + smp_processor_id()); -+ */ -+} -+ -+void -+smp__in_kgdb(struct pt_regs regs) -+{ -+ ack_APIC_irq(); -+ in_kgdb(®s); -+} -+#else -+int -+in_kgdb(struct pt_regs *regs) -+{ -+ return (kgdb_spinlock); -+} -+#endif -+ -+void -+printexceptioninfo(int exceptionNo, int errorcode, char *buffer) -+{ -+ unsigned dr6; -+ int i; -+ switch (exceptionNo) { -+ case 1: /* debug exception */ -+ break; -+ case 3: /* breakpoint */ -+ sprintf(buffer, "Software breakpoint"); -+ return; -+ default: -+ sprintf(buffer, "Details not available"); -+ return; -+ } -+ asm volatile ("movl %%db6, %0\n":"=r" (dr6) -+ :); -+ if (dr6 & 0x4000) { -+ sprintf(buffer, "Single step"); -+ return; -+ } -+ for (i = 0; i < 4; ++i) { -+ if (dr6 & (1 << i)) { -+ sprintf(buffer, "Hardware breakpoint %d", i); -+ return; -+ } -+ } -+ sprintf(buffer, "Unknown trap"); -+ return; -+} -+ -+/* -+ * This function does all command procesing for interfacing to gdb. -+ * -+ * NOTE: The INT nn instruction leaves the state of the interrupt -+ * enable flag UNCHANGED. That means that when this routine -+ * is entered via a breakpoint (INT 3) instruction from code -+ * that has interrupts enabled, then interrupts will STILL BE -+ * enabled when this routine is entered. The first thing that -+ * we do here is disable interrupts so as to prevent recursive -+ * entries and bothersome serial interrupts while we are -+ * trying to run the serial port in polled mode. -+ * -+ * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so -+ * it is always necessary to do a restore_flags before returning -+ * so as to let go of that lock. -+ */ -+int -+kgdb_handle_exception(int exceptionVector, -+ int signo, int err_code, struct pt_regs *linux_regs) -+{ -+ struct task_struct *usethread = NULL; -+ struct task_struct *thread_list_start = 0, *thread = NULL; -+ int addr, length; -+ int breakno, breaktype; -+ char *ptr; -+ int newPC; -+ threadref thref; -+ int threadid; -+ int thread_min = PID_MAX + MAX_NO_CPUS; -+#ifdef old_thread_list -+ int maxthreads; -+#endif -+ int nothreads; -+ unsigned long flags; -+ int gdb_regs[NUMREGBYTES / 4]; -+ int dr6; -+ IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ -+#define NO_NMI 1 -+#define NO_SYNC 2 -+#define regs (*linux_regs) -+#define NUMREGS NUMREGBYTES/4 -+ /* -+ * If the entry is not from the kernel then return to the Linux -+ * trap handler and let it process the interrupt normally. -+ */ -+ if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { -+ printk("ignoring non-kernel exception\n"); -+ print_regs(®s); -+ return (0); -+ } -+ -+ kgdb_local_irq_save(flags); -+ -+ /* Get kgdb spinlock */ -+ -+ KGDB_SPIN_LOCK(&kgdb_spinlock); -+ rdtscll(kgdb_info.entry_tsc); -+ /* -+ * We depend on this spinlock and the NMI watch dog to control the -+ * other cpus. They will arrive at "in_kgdb()" as a result of the -+ * NMI and will wait there for the following spin locks to be -+ * released. -+ */ -+#ifdef CONFIG_SMP -+ -+#if 0 -+ if (cpu_callout_map & ~MAX_CPU_MASK) { -+ printk("kgdb : too many cpus, possibly not mapped" -+ " in contiguous space, change MAX_NO_CPUS" -+ " in kgdb_stub and make new kernel.\n" -+ " cpu_callout_map is %lx\n", cpu_callout_map); -+ goto exit_just_unlock; -+ } -+#endif -+ if (spinlock_count == 1) { -+ int time = 0, end_time, dum = 0; -+ int i; -+ int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) -+ }; -+ if (remote_debug) { -+ printk("kgdb : cpu %d entry, syncing others\n", -+ smp_processor_id()); -+ } -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ /* -+ * Use trylock as we may already hold the lock if -+ * we are holding the cpu. Net result is all -+ * locked. -+ */ -+ spin_trylock(&waitlocks[i]); -+ } -+ for (i = 0; i < MAX_NO_CPUS; i++) -+ cpu_logged_in[i] = 0; -+ /* -+ * Wait for their arrival. We know the watch dog is active if -+ * in_kgdb() has ever been called, as it is always called on a -+ * watchdog tick. -+ */ -+ rdtsc(dum, time); -+ end_time = time + 2; /* Note: we use the High order bits! */ -+ i = 1; -+ if (num_online_cpus() > 1) { -+ int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; -+ smp_send_nmi_allbutself(); -+ while (i < num_online_cpus() && time != end_time) { -+ int j; -+ for (j = 0; j < MAX_NO_CPUS; j++) { -+ if (waiting_cpus[j].task && -+ !cpu_logged_in[j]) { -+ i++; -+ cpu_logged_in[j] = 1; -+ if (remote_debug) { -+ printk -+ ("kgdb : cpu %d arrived at kgdb\n", -+ j); -+ } -+ break; -+ } else if (!waiting_cpus[j].task && -+ !cpu_online(j)) { -+ waiting_cpus[j].task = NOCPU; -+ cpu_logged_in[j] = 1; -+ waiting_cpus[j].hold = 1; -+ break; -+ } -+ if (!waiting_cpus[j].task && -+ in_kgdb_here_log[j]) { -+ -+ int wait = 100000; -+ while (wait--) ; -+ if (!waiting_cpus[j].task && -+ in_kgdb_here_log[j]) { -+ printk -+ ("kgdb : cpu %d stall" -+ " in in_kgdb\n", -+ j); -+ i++; -+ cpu_logged_in[j] = 1; -+ waiting_cpus[j].task = -+ (struct task_struct -+ *) 1; -+ } -+ } -+ } -+ -+ if (in_kgdb_entry_log[smp_processor_id()] > -+ (me_in_kgdb + 10)) { -+ break; -+ } -+ -+ rdtsc(dum, time); -+ } -+ if (i < num_online_cpus()) { -+ printk -+ ("kgdb : time out, proceeding without sync\n"); -+#if 0 -+ printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", -+ waiting_cpus[0].task != 0, -+ waiting_cpus[1].task != 0); -+ printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", -+ cpu_logged_in[0], cpu_logged_in[1]); -+ printk -+ ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", -+ in_kgdb_here_log[0] != 0, -+ in_kgdb_here_log[1] != 0); -+#endif -+ entry_state = NO_SYNC; -+ } else { -+#if 0 -+ int ent = -+ in_kgdb_entry_log[smp_processor_id()] - -+ me_in_kgdb; -+ printk("kgdb : sync after %d entries\n", ent); -+#endif -+ } -+ } else { -+ if (remote_debug) { -+ printk -+ ("kgdb : %d cpus, but watchdog not active\n" -+ "proceeding without locking down other cpus\n", -+ num_online_cpus()); -+ entry_state = NO_NMI; -+ } -+ } -+ } -+#endif -+ -+ if (remote_debug) { -+ unsigned long *lp = (unsigned long *) &linux_regs; -+ -+ printk("handle_exception(exceptionVector=%d, " -+ "signo=%d, err_code=%d, linux_regs=%p)\n", -+ exceptionVector, signo, err_code, linux_regs); -+ if (debug_regs) { -+ print_regs(®s); -+ printk("Stk: %8lx %8lx %8lx %8lx" -+ " %8lx %8lx %8lx %8lx\n", -+ lp[0], lp[1], lp[2], lp[3], -+ lp[4], lp[5], lp[6], lp[7]); -+ printk(" %8lx %8lx %8lx %8lx" -+ " %8lx %8lx %8lx %8lx\n", -+ lp[8], lp[9], lp[10], lp[11], -+ lp[12], lp[13], lp[14], lp[15]); -+ printk(" %8lx %8lx %8lx %8lx " -+ "%8lx %8lx %8lx %8lx\n", -+ lp[16], lp[17], lp[18], lp[19], -+ lp[20], lp[21], lp[22], lp[23]); -+ printk(" %8lx %8lx %8lx %8lx " -+ "%8lx %8lx %8lx %8lx\n", -+ lp[24], lp[25], lp[26], lp[27], -+ lp[28], lp[29], lp[30], lp[31]); -+ } -+ } -+ -+ /* Disable hardware debugging while we are in kgdb */ -+ /* Get the debug register status register */ -+/* *INDENT-OFF* */ -+ __asm__("movl %0,%%db7" -+ : /* no output */ -+ :"r"(0)); -+ -+ asm volatile ("movl %%db6, %0\n" -+ :"=r" (hw_breakpoint_status) -+ :); -+ -+/* *INDENT-ON* */ -+ switch (exceptionVector) { -+ case 0: /* divide error */ -+ case 1: /* debug exception */ -+ case 2: /* NMI */ -+ case 3: /* breakpoint */ -+ case 4: /* overflow */ -+ case 5: /* bounds check */ -+ case 6: /* invalid opcode */ -+ case 7: /* device not available */ -+ case 8: /* double fault (errcode) */ -+ case 10: /* invalid TSS (errcode) */ -+ case 12: /* stack fault (errcode) */ -+ case 16: /* floating point error */ -+ case 17: /* alignment check (errcode) */ -+ default: /* any undocumented */ -+ break; -+ case 11: /* segment not present (errcode) */ -+ case 13: /* general protection (errcode) */ -+ case 14: /* page fault (special errcode) */ -+ case 19: /* cache flush denied */ -+ if (mem_err_expected) { -+ /* -+ * This fault occured because of the -+ * get_char or set_char routines. These -+ * two routines use either eax of edx to -+ * indirectly reference the location in -+ * memory that they are working with. -+ * For a page fault, when we return the -+ * instruction will be retried, so we -+ * have to make sure that these -+ * registers point to valid memory. -+ */ -+ mem_err = 1; /* set mem error flag */ -+ mem_err_expected = 0; -+ mem_err_cnt++; /* helps in debugging */ -+ /* make valid address */ -+ regs.eax = (long) &garbage_loc; -+ /* make valid address */ -+ regs.edx = (long) &garbage_loc; -+ if (remote_debug) -+ printk("Return after memory error: " -+ "mem_err_cnt=%d\n", mem_err_cnt); -+ if (debug_regs) -+ print_regs(®s); -+ goto exit_kgdb; -+ } -+ break; -+ } -+ if (remote_debug) -+ printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); -+ -+ gdb_i386vector = exceptionVector; -+ gdb_i386errcode = err_code; -+ kgdb_info.called_from = __builtin_return_address(0); -+#ifdef CONFIG_SMP -+ /* -+ * OK, we can now communicate, lets tell gdb about the sync. -+ * but only if we had a problem. -+ */ -+ switch (entry_state) { -+ case NO_NMI: -+ to_gdb("NMI not active, other cpus not stopped\n"); -+ break; -+ case NO_SYNC: -+ to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); -+ default:; -+ } -+ -+#endif -+/* -+ * Set up the gdb function call area. -+ */ -+ trap_cpu = smp_processor_id(); -+ OLD_esp = NEW_esp = (int) (&linux_regs->esp); -+ -+ IF_SMP(once_again:) -+ /* reply to host that an exception has occurred */ -+ remcomOutBuffer[0] = 'S'; -+ remcomOutBuffer[1] = hexchars[signo >> 4]; -+ remcomOutBuffer[2] = hexchars[signo % 16]; -+ remcomOutBuffer[3] = 0; -+ -+ putpacket(remcomOutBuffer); -+ -+ while (1 == 1) { -+ error = 0; -+ remcomOutBuffer[0] = 0; -+ getpacket(remcomInBuffer); -+ switch (remcomInBuffer[0]) { -+ case '?': -+ remcomOutBuffer[0] = 'S'; -+ remcomOutBuffer[1] = hexchars[signo >> 4]; -+ remcomOutBuffer[2] = hexchars[signo % 16]; -+ remcomOutBuffer[3] = 0; -+ break; -+ case 'd': -+ remote_debug = !(remote_debug); /* toggle debug flag */ -+ printk("Remote debug %s\n", -+ remote_debug ? "on" : "off"); -+ break; -+ case 'g': /* return the value of the CPU registers */ -+ get_gdb_regs(usethread, ®s, gdb_regs); -+ mem2hex((char *) gdb_regs, -+ remcomOutBuffer, NUMREGBYTES, 0); -+ break; -+ case 'G': /* set the value of the CPU registers - return OK */ -+ hex2mem(&remcomInBuffer[1], -+ (char *) gdb_regs, NUMREGBYTES, 0); -+ if (!usethread || usethread == current) { -+ gdb_regs_to_regs(gdb_regs, ®s); -+ strcpy(remcomOutBuffer, "OK"); -+ } else { -+ strcpy(remcomOutBuffer, "E00"); -+ } -+ break; -+ -+ case 'P':{ /* set the value of a single CPU register - -+ return OK */ -+ /* -+ * For some reason, gdb wants to talk about psudo -+ * registers (greater than 15). These may have -+ * meaning for ptrace, but for us it is safe to -+ * ignor them. We do this by dumping them into -+ * _GS which we also ignor, but do have memory for. -+ */ -+ int regno; -+ -+ ptr = &remcomInBuffer[1]; -+ regs_to_gdb_regs(gdb_regs, ®s); -+ if ((!usethread || usethread == current) && -+ hexToInt(&ptr, ®no) && -+ *ptr++ == '=' && (regno >= 0)) { -+ regno = -+ (regno >= NUMREGS ? _GS : regno); -+ hex2mem(ptr, (char *) &gdb_regs[regno], -+ 4, 0); -+ gdb_regs_to_regs(gdb_regs, ®s); -+ strcpy(remcomOutBuffer, "OK"); -+ break; -+ } -+ strcpy(remcomOutBuffer, "E01"); -+ break; -+ } -+ -+ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ -+ case 'm': -+ /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ -+ ptr = &remcomInBuffer[1]; -+ if (hexToInt(&ptr, &addr) && -+ (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { -+ ptr = 0; -+ /* -+ * hex doubles the byte count -+ */ -+ if (length > (BUFMAX / 2)) -+ length = BUFMAX / 2; -+ mem2hex((char *) addr, -+ remcomOutBuffer, length, 1); -+ if (mem_err) { -+ strcpy(remcomOutBuffer, "E03"); -+ debug_error("memory fault\n", NULL); -+ } -+ } -+ -+ if (ptr) { -+ strcpy(remcomOutBuffer, "E01"); -+ debug_error -+ ("malformed read memory command: %s\n", -+ remcomInBuffer); -+ } -+ break; -+ -+ /* MAA..AA,LLLL: -+ Write LLLL bytes at address AA.AA return OK */ -+ case 'M': -+ /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ -+ ptr = &remcomInBuffer[1]; -+ if (hexToInt(&ptr, &addr) && -+ (*(ptr++) == ',') && -+ (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { -+ hex2mem(ptr, (char *) addr, length, 1); -+ -+ if (mem_err) { -+ strcpy(remcomOutBuffer, "E03"); -+ debug_error("memory fault\n", NULL); -+ } else { -+ strcpy(remcomOutBuffer, "OK"); -+ } -+ -+ ptr = 0; -+ } -+ if (ptr) { -+ strcpy(remcomOutBuffer, "E02"); -+ debug_error -+ ("malformed write memory command: %s\n", -+ remcomInBuffer); -+ } -+ break; -+ case 'S': -+ remcomInBuffer[0] = 's'; -+ case 'C': -+ /* Csig;AA..AA where ;AA..AA is optional -+ * continue with signal -+ * Since signals are meaning less to us, delete that -+ * part and then fall into the 'c' code. -+ */ -+ ptr = &remcomInBuffer[1]; -+ length = 2; -+ while (*ptr && *ptr != ';') { -+ length++; -+ ptr++; -+ } -+ if (*ptr) { -+ do { -+ ptr++; -+ *(ptr - length++) = *ptr; -+ } while (*ptr); -+ } else { -+ remcomInBuffer[1] = 0; -+ } -+ -+ /* cAA..AA Continue at address AA..AA(optional) */ -+ /* sAA..AA Step one instruction from AA..AA(optional) */ -+ /* D detach, reply OK and then continue */ -+ case 'c': -+ case 's': -+ case 'D': -+ -+ /* try to read optional parameter, -+ pc unchanged if no parm */ -+ ptr = &remcomInBuffer[1]; -+ if (hexToInt(&ptr, &addr)) { -+ if (remote_debug) -+ printk("Changing EIP to 0x%x\n", addr); -+ -+ regs.eip = addr; -+ } -+ -+ newPC = regs.eip; -+ -+ /* clear the trace bit */ -+ regs.eflags &= 0xfffffeff; -+ -+ /* set the trace bit if we're stepping */ -+ if (remcomInBuffer[0] == 's') -+ regs.eflags |= 0x100; -+ -+ /* detach is a friendly version of continue. Note that -+ debugging is still enabled (e.g hit control C) -+ */ -+ if (remcomInBuffer[0] == 'D') { -+ strcpy(remcomOutBuffer, "OK"); -+ putpacket(remcomOutBuffer); -+ } -+ -+ if (remote_debug) { -+ printk("Resuming execution\n"); -+ print_regs(®s); -+ } -+ asm volatile ("movl %%db6, %0\n":"=r" (dr6) -+ :); -+ if (!(dr6 & 0x4000)) { -+ for (breakno = 0; breakno < 4; ++breakno) { -+ if (dr6 & (1 << breakno) && -+ (breakinfo[breakno].type == 0)) { -+ /* Set restore flag */ -+ regs.eflags |= 0x10000; -+ break; -+ } -+ } -+ } -+ correct_hw_break(); -+ asm volatile ("movl %0, %%db6\n"::"r" (0)); -+ goto exit_kgdb; -+ -+ /* kill the program */ -+ case 'k': /* do nothing */ -+ break; -+ -+ /* query */ -+ case 'q': -+ nothreads = 0; -+ switch (remcomInBuffer[1]) { -+ case 'f': -+ threadid = 1; -+ thread_list = 2; -+ thread_list_start = (usethread ? : current); -+ case 's': -+ if (!cmp_str(&remcomInBuffer[2], -+ "ThreadInfo", 10)) -+ break; -+ -+ remcomOutBuffer[nothreads++] = 'm'; -+ for (; threadid < PID_MAX + MAX_NO_CPUS; -+ threadid++) { -+ thread = getthread(threadid); -+ if (thread) { -+ nothreads += int_to_hex_v( -+ &remcomOutBuffer[ -+ nothreads], -+ threadid); -+ if (thread_min > threadid) -+ thread_min = threadid; -+ remcomOutBuffer[ -+ nothreads] = ','; -+ nothreads++; -+ if (nothreads > BUFMAX - 10) -+ break; -+ } -+ } -+ if (remcomOutBuffer[nothreads - 1] == 'm') { -+ remcomOutBuffer[nothreads - 1] = 'l'; -+ } else { -+ nothreads--; -+ } -+ remcomOutBuffer[nothreads] = 0; -+ break; -+ -+#ifdef old_thread_list /* Old thread info request */ -+ case 'L': -+ /* List threads */ -+ thread_list = 2; -+ thread_list_start = (usethread ? : current); -+ unpack_byte(remcomInBuffer + 3, &maxthreads); -+ unpack_threadid(remcomInBuffer + 5, &thref); -+ do { -+ int buf_thread_limit = -+ (BUFMAX - 22) / BUF_THREAD_ID_SIZE; -+ if (maxthreads > buf_thread_limit) { -+ maxthreads = buf_thread_limit; -+ } -+ } while (0); -+ remcomOutBuffer[0] = 'q'; -+ remcomOutBuffer[1] = 'M'; -+ remcomOutBuffer[4] = '0'; -+ pack_threadid(remcomOutBuffer + 5, &thref); -+ -+ threadid = threadref_to_int(&thref); -+ for (nothreads = 0; -+ nothreads < maxthreads && -+ threadid < PID_MAX + MAX_NO_CPUS; -+ threadid++) { -+ thread = getthread(threadid); -+ if (thread) { -+ int_to_threadref(&thref, -+ threadid); -+ pack_threadid(remcomOutBuffer + -+ 21 + -+ nothreads * 16, -+ &thref); -+ nothreads++; -+ if (thread_min > threadid) -+ thread_min = threadid; -+ } -+ } -+ -+ if (threadid == PID_MAX + MAX_NO_CPUS) { -+ remcomOutBuffer[4] = '1'; -+ } -+ pack_hex_byte(remcomOutBuffer + 2, nothreads); -+ remcomOutBuffer[21 + nothreads * 16] = '\0'; -+ break; -+#endif -+ case 'C': -+ /* Current thread id */ -+ remcomOutBuffer[0] = 'Q'; -+ remcomOutBuffer[1] = 'C'; -+ threadid = current->pid; -+ if (!threadid) { -+ /* -+ * idle thread -+ */ -+ for (threadid = PID_MAX; -+ threadid < PID_MAX + MAX_NO_CPUS; -+ threadid++) { -+ if (current == -+ idle_task(threadid - -+ PID_MAX)) -+ break; -+ } -+ } -+ int_to_threadref(&thref, threadid); -+ pack_threadid(remcomOutBuffer + 2, &thref); -+ remcomOutBuffer[18] = '\0'; -+ break; -+ -+ case 'E': -+ /* Print exception info */ -+ printexceptioninfo(exceptionVector, -+ err_code, remcomOutBuffer); -+ break; -+ case 'T':{ -+ char * nptr; -+ /* Thread extra info */ -+ if (!cmp_str(&remcomInBuffer[2], -+ "hreadExtraInfo,", 15)) { -+ break; -+ } -+ ptr = &remcomInBuffer[17]; -+ hexToInt(&ptr, &threadid); -+ thread = getthread(threadid); -+ nptr = &thread->comm[0]; -+ length = 0; -+ ptr = &remcomOutBuffer[0]; -+ do { -+ length++; -+ ptr = pack_hex_byte(ptr, *nptr++); -+ } while (*nptr && length < 16); -+ /* -+ * would like that 16 to be the size of -+ * task_struct.comm but don't know the -+ * syntax.. -+ */ -+ *ptr = 0; -+ } -+ } -+ break; -+ -+ /* task related */ -+ case 'H': -+ switch (remcomInBuffer[1]) { -+ case 'g': -+ ptr = &remcomInBuffer[2]; -+ hexToInt(&ptr, &threadid); -+ thread = getthread(threadid); -+ if (!thread) { -+ remcomOutBuffer[0] = 'E'; -+ remcomOutBuffer[1] = '\0'; -+ break; -+ } -+ /* -+ * Just in case I forget what this is all about, -+ * the "thread info" command to gdb causes it -+ * to ask for a thread list. It then switches -+ * to each thread and asks for the registers. -+ * For this (and only this) usage, we want to -+ * fudge the registers of tasks not on the run -+ * list (i.e. waiting) to show the routine that -+ * called schedule. Also, gdb, is a minimalist -+ * in that if the current thread is the last -+ * it will not re-read the info when done. -+ * This means that in this case we must show -+ * the real registers. So here is how we do it: -+ * Each entry we keep track of the min -+ * thread in the list (the last that gdb will) -+ * get info for. We also keep track of the -+ * starting thread. -+ * "thread_list" is cleared when switching back -+ * to the min thread if it is was current, or -+ * if it was not current, thread_list is set -+ * to 1. When the switch to current comes, -+ * if thread_list is 1, clear it, else do -+ * nothing. -+ */ -+ usethread = thread; -+ if ((thread_list == 1) && -+ (thread == thread_list_start)) { -+ thread_list = 0; -+ } -+ if (thread_list && (threadid == thread_min)) { -+ if (thread == thread_list_start) { -+ thread_list = 0; -+ } else { -+ thread_list = 1; -+ } -+ } -+ /* follow through */ -+ case 'c': -+ remcomOutBuffer[0] = 'O'; -+ remcomOutBuffer[1] = 'K'; -+ remcomOutBuffer[2] = '\0'; -+ break; -+ } -+ break; -+ -+ /* Query thread status */ -+ case 'T': -+ ptr = &remcomInBuffer[1]; -+ hexToInt(&ptr, &threadid); -+ thread = getthread(threadid); -+ if (thread) { -+ remcomOutBuffer[0] = 'O'; -+ remcomOutBuffer[1] = 'K'; -+ remcomOutBuffer[2] = '\0'; -+ if (thread_min > threadid) -+ thread_min = threadid; -+ } else { -+ remcomOutBuffer[0] = 'E'; -+ remcomOutBuffer[1] = '\0'; -+ } -+ break; -+ -+ case 'Y': /* set up a hardware breakpoint */ -+ ptr = &remcomInBuffer[1]; -+ hexToInt(&ptr, &breakno); -+ ptr++; -+ hexToInt(&ptr, &breaktype); -+ ptr++; -+ hexToInt(&ptr, &length); -+ ptr++; -+ hexToInt(&ptr, &addr); -+ if (set_hw_break(breakno & 0x3, -+ breaktype & 0x3, -+ length & 0x3, addr) == 0) { -+ strcpy(remcomOutBuffer, "OK"); -+ } else { -+ strcpy(remcomOutBuffer, "ERROR"); -+ } -+ break; -+ -+ /* Remove hardware breakpoint */ -+ case 'y': -+ ptr = &remcomInBuffer[1]; -+ hexToInt(&ptr, &breakno); -+ if (remove_hw_break(breakno & 0x3) == 0) { -+ strcpy(remcomOutBuffer, "OK"); -+ } else { -+ strcpy(remcomOutBuffer, "ERROR"); -+ } -+ break; -+ -+ case 'r': /* reboot */ -+ strcpy(remcomOutBuffer, "OK"); -+ putpacket(remcomOutBuffer); -+ /*to_gdb("Rebooting\n"); */ -+ /* triplefault no return from here */ -+ { -+ static long no_idt[2]; -+ __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); -+ BREAKPOINT; -+ } -+ -+ } /* switch */ -+ -+ /* reply to the request */ -+ putpacket(remcomOutBuffer); -+ } /* while(1==1) */ -+ /* -+ * reached by goto only. -+ */ -+ exit_kgdb: -+ /* -+ * Here is where we set up to trap a gdb function call. NEW_esp -+ * will be changed if we are trying to do this. We handle both -+ * adding and subtracting, thus allowing gdb to put grung on -+ * the stack which it removes later. -+ */ -+ if (NEW_esp != OLD_esp) { -+ int *ptr = END_OF_LOOKASIDE; -+ if (NEW_esp < OLD_esp) -+ ptr -= (OLD_esp - NEW_esp) / sizeof (int); -+ *--ptr = linux_regs->eflags; -+ *--ptr = linux_regs->xcs; -+ *--ptr = linux_regs->eip; -+ *--ptr = linux_regs->ecx; -+ *--ptr = linux_regs->ebx; -+ *--ptr = linux_regs->eax; -+ linux_regs->ecx = NEW_esp - (sizeof (int) * 6); -+ linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; -+ if (NEW_esp < OLD_esp) { -+ linux_regs->eip = (unsigned int) fn_call_stub; -+ } else { -+ linux_regs->eip = (unsigned int) fn_rtn_stub; -+ linux_regs->eax = NEW_esp; -+ } -+ linux_regs->eflags &= ~(IF_BIT | TF_BIT); -+ } -+#ifdef CONFIG_SMP -+ /* -+ * Release gdb wait locks -+ * Sanity check time. Must have at least one cpu to run. Also single -+ * step must not be done if the current cpu is on hold. -+ */ -+ if (spinlock_count == 1) { -+ int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; -+ int cpu_avail = 0; -+ int i; -+ -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ if (!cpu_online(i)) -+ break; -+ if (!hold_cpu(i)) { -+ cpu_avail = 1; -+ } -+ } -+ /* -+ * Early in the bring up there will be NO cpus on line... -+ */ -+ if (!cpu_avail && !cpus_empty(cpu_online_map)) { -+ to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); -+ goto once_again; -+ } -+ if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { -+ to_gdb -+ ("Current cpu must be unblocked to single step\n"); -+ goto once_again; -+ } -+ if (!(ss_hold)) { -+ int i; -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ if (!hold_cpu(i)) { -+ spin_unlock(&waitlocks[i]); -+ } -+ } -+ } else { -+ spin_unlock(&waitlocks[smp_processor_id()]); -+ } -+ /* Release kgdb spinlock */ -+ KGDB_SPIN_UNLOCK(&kgdb_spinlock); -+ /* -+ * If this cpu is on hold, this is where we -+ * do it. Note, the NMI will pull us out of here, -+ * but will return as the above lock is not held. -+ * We will stay here till another cpu releases the lock for us. -+ */ -+ spin_unlock_wait(waitlocks + smp_processor_id()); -+ kgdb_local_irq_restore(flags); -+ return (0); -+ } -+#if 0 -+exit_just_unlock: -+#endif -+#endif -+ /* Release kgdb spinlock */ -+ KGDB_SPIN_UNLOCK(&kgdb_spinlock); -+ kgdb_local_irq_restore(flags); -+ return (0); -+} -+ -+/* this function is used to set up exception handlers for tracing and -+ * breakpoints. -+ * This function is not needed as the above line does all that is needed. -+ * We leave it for backward compatitability... -+ */ -+void -+set_debug_traps(void) -+{ -+ /* -+ * linux_debug_hook is defined in traps.c. We store a pointer -+ * to our own exception handler into it. -+ -+ * But really folks, every hear of labeled common, an old Fortran -+ * concept. Lots of folks can reference it and it is define if -+ * anyone does. Only one can initialize it at link time. We do -+ * this with the hook. See the statement above. No need for any -+ * executable code and it is ready as soon as the kernel is -+ * loaded. Very desirable in kernel debugging. -+ -+ linux_debug_hook = handle_exception ; -+ */ -+ -+ /* In case GDB is started before us, ack any packets (presumably -+ "$?#xx") sitting there. -+ putDebugChar ('+'); -+ -+ initialized = 1; -+ */ -+} -+ -+/* This function will generate a breakpoint exception. It is used at the -+ beginning of a program to sync up with a debugger and can be used -+ otherwise as a quick means to stop program execution and "break" into -+ the debugger. */ -+/* But really, just use the BREAKPOINT macro. We will handle the int stuff -+ */ -+ -+#ifdef later -+/* -+ * possibly we should not go thru the traps.c code at all? Someday. -+ */ -+void -+do_kgdb_int3(struct pt_regs *regs, long error_code) -+{ -+ kgdb_handle_exception(3, 5, error_code, regs); -+ return; -+} -+#endif -+#undef regs -+#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS -+asmlinkage void -+bad_sys_call_exit(int stuff) -+{ -+ struct pt_regs *regs = (struct pt_regs *) &stuff; -+ printk("Sys call %d return with %x preempt_count\n", -+ (int) regs->orig_eax, preempt_count()); -+} -+#endif -+#ifdef CONFIG_STACK_OVERFLOW_TEST -+#include <asm/kgdb.h> -+asmlinkage void -+stack_overflow(void) -+{ -+#ifdef BREAKPOINT -+ BREAKPOINT; -+#else -+ printk("Kernel stack overflow, looping forever\n"); -+#endif -+ while (1) { -+ } -+} -+#endif -+ -+#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) -+char gdbconbuf[BUFMAX]; -+ -+static void -+kgdb_gdb_message(const char *s, unsigned count) -+{ -+ int i; -+ int wcount; -+ char *bufptr; -+ /* -+ * This takes care of NMI while spining out chars to gdb -+ */ -+ IF_SMP(in_kgdb_console = 1); -+ gdbconbuf[0] = 'O'; -+ bufptr = gdbconbuf + 1; -+ while (count > 0) { -+ if ((count << 1) > (BUFMAX - 2)) { -+ wcount = (BUFMAX - 2) >> 1; -+ } else { -+ wcount = count; -+ } -+ count -= wcount; -+ for (i = 0; i < wcount; i++) { -+ bufptr = pack_hex_byte(bufptr, s[i]); -+ } -+ *bufptr = '\0'; -+ s += wcount; -+ -+ putpacket(gdbconbuf); -+ -+ } -+ IF_SMP(in_kgdb_console = 0); -+} -+#endif -+#ifdef CONFIG_SMP -+static void -+to_gdb(const char *s) -+{ -+ int count = 0; -+ while (s[count] && (count++ < BUFMAX)) ; -+ kgdb_gdb_message(s, count); -+} -+#endif -+#ifdef CONFIG_KGDB_CONSOLE -+#include <linux/console.h> -+#include <linux/init.h> -+#include <linux/fs.h> -+#include <asm/uaccess.h> -+#include <asm/semaphore.h> -+ -+void -+kgdb_console_write(struct console *co, const char *s, unsigned count) -+{ -+ -+ if (gdb_i386vector == -1) { -+ /* -+ * We have not yet talked to gdb. What to do... -+ * lets break, on continue we can do the write. -+ * But first tell him whats up. Uh, well no can do, -+ * as this IS the console. Oh well... -+ * We do need to wait or the messages will be lost. -+ * Other option would be to tell the above code to -+ * ignore this breakpoint and do an auto return, -+ * but that might confuse gdb. Also this happens -+ * early enough in boot up that we don't have the traps -+ * set up yet, so... -+ */ -+ breakpoint(); -+ } -+ kgdb_gdb_message(s, count); -+} -+ -+/* -+ * ------------------------------------------------------------ -+ * Serial KGDB driver -+ * ------------------------------------------------------------ -+ */ -+ -+static struct console kgdbcons = { -+ name:"kgdb", -+ write:kgdb_console_write, -+#ifdef CONFIG_KGDB_USER_CONSOLE -+ device:kgdb_console_device, -+#endif -+ flags:CON_PRINTBUFFER | CON_ENABLED, -+ index:-1, -+}; -+ -+/* -+ * The trick here is that this file gets linked before printk.o -+ * That means we get to peer at the console info in the command -+ * line before it does. If we are up, we register, otherwise, -+ * do nothing. By returning 0, we allow printk to look also. -+ */ -+static int kgdb_console_enabled; -+ -+int __init -+kgdb_console_init(char *str) -+{ -+ if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { -+ register_console(&kgdbcons); -+ kgdb_console_enabled = 1; -+ } -+ return 0; /* let others look at the string */ -+} -+ -+__setup("console=", kgdb_console_init); -+ -+#ifdef CONFIG_KGDB_USER_CONSOLE -+static kdev_t kgdb_console_device(struct console *c); -+/* This stuff sort of works, but it knocks out telnet devices -+ * we are leaving it here in case we (or you) find time to figure it out -+ * better.. -+ */ -+ -+/* -+ * We need a real char device as well for when the console is opened for user -+ * space activities. -+ */ -+ -+static int -+kgdb_consdev_open(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static ssize_t -+kgdb_consdev_write(struct file *file, const char *buf, -+ size_t count, loff_t * ppos) -+{ -+ int size, ret = 0; -+ static char kbuf[128]; -+ static DECLARE_MUTEX(sem); -+ -+ /* We are not reentrant... */ -+ if (down_interruptible(&sem)) -+ return -ERESTARTSYS; -+ -+ while (count > 0) { -+ /* need to copy the data from user space */ -+ size = count; -+ if (size > sizeof (kbuf)) -+ size = sizeof (kbuf); -+ if (copy_from_user(kbuf, buf, size)) { -+ ret = -EFAULT; -+ break;; -+ } -+ kgdb_console_write(&kgdbcons, kbuf, size); -+ count -= size; -+ ret += size; -+ buf += size; -+ } -+ -+ up(&sem); -+ -+ return ret; -+} -+ -+struct file_operations kgdb_consdev_fops = { -+ open:kgdb_consdev_open, -+ write:kgdb_consdev_write -+}; -+static kdev_t -+kgdb_console_device(struct console *c) -+{ -+ return MKDEV(TTYAUX_MAJOR, 1); -+} -+ -+/* -+ * This routine gets called from the serial stub in the i386/lib -+ * This is so it is done late in bring up (just before the console open). -+ */ -+void -+kgdb_console_finit(void) -+{ -+ if (kgdb_console_enabled) { -+ char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); -+ char *cp = cptr; -+ while (*cptr && *cptr != '(') -+ cptr++; -+ *cptr = 0; -+ unregister_chrdev(TTYAUX_MAJOR, cp); -+ register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); -+ } -+} -+#endif -+#endif -+#ifdef CONFIG_KGDB_TS -+#include <asm/msr.h> /* time stamp code */ -+#include <asm/hardirq.h> /* in_interrupt */ -+#ifdef CONFIG_KGDB_TS_64 -+#define DATA_POINTS 64 -+#endif -+#ifdef CONFIG_KGDB_TS_128 -+#define DATA_POINTS 128 -+#endif -+#ifdef CONFIG_KGDB_TS_256 -+#define DATA_POINTS 256 -+#endif -+#ifdef CONFIG_KGDB_TS_512 -+#define DATA_POINTS 512 -+#endif -+#ifdef CONFIG_KGDB_TS_1024 -+#define DATA_POINTS 1024 -+#endif -+#ifndef DATA_POINTS -+#define DATA_POINTS 128 /* must be a power of two */ -+#endif -+#define INDEX_MASK (DATA_POINTS - 1) -+#if (INDEX_MASK & DATA_POINTS) -+#error "CONFIG_KGDB_TS_COUNT must be a power of 2" -+#endif -+struct kgdb_and_then_struct { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ struct task_struct *task; -+ long long at_time; -+ int from_ln; -+ char *in_src; -+ void *from; -+ int *with_shpf; -+ int data0; -+ int data1; -+}; -+struct kgdb_and_then_struct2 { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ struct task_struct *task; -+ long long at_time; -+ int from_ln; -+ char *in_src; -+ void *from; -+ int *with_shpf; -+ struct task_struct *t1; -+ struct task_struct *t2; -+}; -+struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; -+ -+struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; -+int kgdb_and_then_count; -+ -+void -+kgdb_tstamp(int line, char *source, int data0, int data1) -+{ -+ static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; -+ int flags; -+ kgdb_local_irq_save(flags); -+ spin_lock(&ts_spin); -+ rdtscll(kgdb_and_then->at_time); -+#ifdef CONFIG_SMP -+ kgdb_and_then->on_cpu = smp_processor_id(); -+#endif -+ kgdb_and_then->task = current; -+ kgdb_and_then->from_ln = line; -+ kgdb_and_then->in_src = source; -+ kgdb_and_then->from = __builtin_return_address(0); -+ kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | -+ (preempt_count() << 8)); -+ kgdb_and_then->data0 = data0; -+ kgdb_and_then->data1 = data1; -+ kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; -+ spin_unlock(&ts_spin); -+ kgdb_local_irq_restore(flags); -+#ifdef CONFIG_PREEMPT -+ -+#endif -+ return; -+} -+#endif -+typedef int gdb_debug_hook(int exceptionVector, -+ int signo, int err_code, struct pt_regs *linux_regs); -+gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ -diff -puN arch/i386/kernel/Makefile~kgdb-ga arch/i386/kernel/Makefile ---- 25/arch/i386/kernel/Makefile~kgdb-ga 2004-10-21 14:54:15.259603680 -0700 -+++ 25-akpm/arch/i386/kernel/Makefile 2004-10-21 14:54:15.308596232 -0700 -@@ -14,6 +14,7 @@ obj-y += timers/ - obj-$(CONFIG_ACPI_BOOT) += acpi/ - obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o - obj-$(CONFIG_MCA) += mca.o -+obj-$(CONFIG_KGDB) += kgdb_stub.o - obj-$(CONFIG_X86_MSR) += msr.o - obj-$(CONFIG_X86_CPUID) += cpuid.o - obj-$(CONFIG_MICROCODE) += microcode.o -diff -puN arch/i386/kernel/nmi.c~kgdb-ga arch/i386/kernel/nmi.c ---- 25/arch/i386/kernel/nmi.c~kgdb-ga 2004-10-21 14:54:15.261603376 -0700 -+++ 25-akpm/arch/i386/kernel/nmi.c 2004-10-21 14:54:15.308596232 -0700 -@@ -34,7 +34,17 @@ - - #include "mach_traps.h" - -+#ifdef CONFIG_KGDB -+#include <asm/kgdb.h> -+#ifdef CONFIG_SMP -+unsigned int nmi_watchdog = NMI_IO_APIC; -+#else -+unsigned int nmi_watchdog = NMI_LOCAL_APIC; -+#endif -+#else - unsigned int nmi_watchdog = NMI_NONE; -+#endif -+ - extern int unknown_nmi_panic; - static unsigned int nmi_hz = HZ; - static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ -@@ -466,6 +476,9 @@ void touch_nmi_watchdog (void) - for (i = 0; i < NR_CPUS; i++) - alert_counter[i] = 0; - } -+#ifdef CONFIG_KGDB -+int tune_watchdog = 5*HZ; -+#endif - - extern void die_nmi(struct pt_regs *, const char *msg); - -@@ -481,12 +494,24 @@ void nmi_watchdog_tick (struct pt_regs * - */ - sum = irq_stat[cpu].apic_timer_irqs; - -+#ifdef CONFIG_KGDB -+ if (!in_kgdb(regs) && last_irq_sums[cpu] == sum) { -+ -+#else - if (last_irq_sums[cpu] == sum) { -+#endif - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - alert_counter[cpu]++; -+#ifdef CONFIG_KGDB -+ if (alert_counter[cpu] == tune_watchdog) { -+ kgdb_handle_exception(2, SIGPWR, 0, regs); -+ last_irq_sums[cpu] = sum; -+ alert_counter[cpu] = 0; -+ } -+#endif - if (alert_counter[cpu] == 30*nmi_hz) - die_nmi(regs, "NMI Watchdog detected LOCKUP"); - } else { -diff -puN arch/i386/kernel/smp.c~kgdb-ga arch/i386/kernel/smp.c ---- 25/arch/i386/kernel/smp.c~kgdb-ga 2004-10-21 14:54:15.262603224 -0700 -+++ 25-akpm/arch/i386/kernel/smp.c 2004-10-21 14:54:15.309596080 -0700 -@@ -466,7 +466,17 @@ void flush_tlb_all(void) - { - on_each_cpu(do_flush_tlb_all, NULL, 1, 1); - } -- -+#ifdef CONFIG_KGDB -+/* -+ * By using the NMI code instead of a vector we just sneak thru the -+ * word generator coming out with just what we want. AND it does -+ * not matter if clustered_apic_mode is set or not. -+ */ -+void smp_send_nmi_allbutself(void) -+{ -+ send_IPI_allbutself(APIC_DM_NMI); -+} -+#endif - /* - * this function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing -diff -puN arch/i386/kernel/traps.c~kgdb-ga arch/i386/kernel/traps.c ---- 25/arch/i386/kernel/traps.c~kgdb-ga 2004-10-21 14:54:15.264602920 -0700 -+++ 25-akpm/arch/i386/kernel/traps.c 2004-10-21 14:54:15.311595776 -0700 -@@ -105,6 +105,39 @@ int register_die_notifier(struct notifie - return err; - } - -+#ifdef CONFIG_KGDB -+extern void sysenter_past_esp(void); -+#include <asm/kgdb.h> -+#include <linux/init.h> -+void set_intr_gate(unsigned int n, void *addr); -+static void set_intr_usr_gate(unsigned int n, void *addr); -+/* -+ * Should be able to call this breakpoint() very early in -+ * bring up. Just hard code the call where needed. -+ * The breakpoint() code is here because set_?_gate() functions -+ * are local (static) to trap.c. They need be done only once, -+ * but it does not hurt to do them over. -+ */ -+void breakpoint(void) -+{ -+ set_intr_usr_gate(3,&int3); /* disable ints on trap */ -+ set_intr_gate(1,&debug); -+ set_intr_gate(14,&page_fault); -+ -+ BREAKPOINT; -+} -+#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \ -+ { \ -+ if (!user_mode(regs) ) \ -+ { \ -+ kgdb_handle_exception(trapnr, signr, error_code, regs); \ -+ after; \ -+ } else if ((trapnr == 3) && (regs->eflags &0x200)) local_irq_enable(); \ -+ } -+#else -+#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) -+#endif -+ - static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) - { - return p > (void *)tinfo && -@@ -332,6 +365,15 @@ void die(const char * str, struct pt_reg - #endif - if (nl) - printk("\n"); -+#ifdef CONFIG_KGDB -+ /* This is about the only place we want to go to kgdb even if in -+ * user mode. But we must go in via a trap so within kgdb we will -+ * always be in kernel mode. -+ */ -+ if (user_mode(regs)) -+ BREAKPOINT; -+#endif -+ CHK_REMOTE_DEBUG(0,SIGTRAP,err,regs,) - notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); - show_registers(regs); - } else -@@ -406,6 +448,7 @@ static inline void do_trap(int trapnr, i - #define DO_ERROR(trapnr, signr, str, name) \ - asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ - { \ -+ CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,) \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ -@@ -429,6 +472,7 @@ asmlinkage void do_##name(struct pt_regs - #define DO_VM86_ERROR(trapnr, signr, str, name) \ - asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ - { \ -+ CHK_REMOTE_DEBUG(trapnr, signr, error_code,regs, return) \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ - return; \ -@@ -512,7 +556,8 @@ gp_in_vm86: - - gp_in_kernel: - if (!fixup_exception(regs)) { - die: -+ CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,) - if (notify_die(DIE_GPF, "general protection fault", regs, - error_code, 13, SIGSEGV) == NOTIFY_STOP) - return; -@@ -721,8 +766,18 @@ asmlinkage void do_debug(struct pt_regs - * allowing programs to debug themselves without the ptrace() - * interface. - */ -+#ifdef CONFIG_KGDB -+ /* -+ * I think this is the only "real" case of a TF in the kernel -+ * that really belongs to user space. Others are -+ * "Ours all ours!" -+ */ -+ if (((regs->xcs & 3) == 0) && ((void *)regs->eip == sysenter_past_esp)) -+ goto clear_TF_reenable; -+#else - if ((regs->xcs & 3) == 0) - goto clear_TF_reenable; -+#endif - if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) - goto clear_TF; - } -@@ -734,6 +789,17 @@ asmlinkage void do_debug(struct pt_regs - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - -+#ifdef CONFIG_KGDB -+ /* -+ * If this is a kernel mode trap, we need to reset db7 to allow us -+ * to continue sanely ALSO skip the signal delivery -+ */ -+ if ((regs->xcs & 3) == 0) -+ goto clear_dr7; -+ -+ /* if not kernel, allow ints but only if they were on */ -+ if ( regs->eflags & 0x200) local_irq_enable(); -+#endif - /* If this is a kernel mode trap, save the user PC on entry to - * the kernel, that's what the debugger can make sense of. - */ -@@ -748,6 +814,7 @@ clear_dr7: - __asm__("movl %0,%%db7" - : /* no output */ - : "r" (0)); -+ CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,) - return; - - debug_vm86: -@@ -1004,6 +1071,12 @@ static void __init set_task_gate(unsigne - { - _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); - } -+#ifdef CONFIG_KGDB -+void set_intr_usr_gate(unsigned int n, void *addr) -+{ -+ _set_gate(idt_table+n,14,3,addr,__KERNEL_CS); -+} -+#endif - - - void __init trap_init(void) -@@ -1021,7 +1094,11 @@ void __init trap_init(void) - set_trap_gate(0,÷_error); - set_intr_gate(1,&debug); - set_intr_gate(2,&nmi); -+#ifndef CONFIG_KGDB - set_system_intr_gate(3, &int3); /* int3-5 can be called from all */ -+#else -+ set_intr_usr_gate(3,&int3); /* int3-5 can be called from all */ -+#endif - set_system_gate(4,&overflow); - set_system_gate(5,&bounds); - set_trap_gate(6,&invalid_op); -diff -puN /dev/null arch/i386/lib/kgdb_serial.c ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/arch/i386/lib/kgdb_serial.c 2004-10-21 14:54:15.313595472 -0700 -@@ -0,0 +1,485 @@ -+/* -+ * Serial interface GDB stub -+ * -+ * Written (hacked together) by David Grothe (dave@gcom.com) -+ * Modified to allow invokation early in boot see also -+ * kgdb.h for instructions by George Anzinger(george@mvista.com) -+ * -+ */ -+ -+#include <linux/module.h> -+#include <linux/errno.h> -+#include <linux/signal.h> -+#include <linux/sched.h> -+#include <linux/timer.h> -+#include <linux/interrupt.h> -+#include <linux/tty.h> -+#include <linux/tty_flip.h> -+#include <linux/serial.h> -+#include <linux/serial_reg.h> -+#include <linux/config.h> -+#include <linux/major.h> -+#include <linux/string.h> -+#include <linux/fcntl.h> -+#include <linux/ptrace.h> -+#include <linux/ioport.h> -+#include <linux/mm.h> -+#include <linux/init.h> -+#include <linux/highmem.h> -+#include <asm/system.h> -+#include <asm/io.h> -+#include <asm/segment.h> -+#include <asm/bitops.h> -+#include <asm/system.h> -+#include <asm/kgdb_local.h> -+#ifdef CONFIG_KGDB_USER_CONSOLE -+extern void kgdb_console_finit(void); -+#endif -+#define PRNT_off -+#define TEST_EXISTANCE -+#ifdef PRNT -+#define dbprintk(s) printk s -+#else -+#define dbprintk(s) -+#endif -+#define TEST_INTERRUPT_off -+#ifdef TEST_INTERRUPT -+#define intprintk(s) printk s -+#else -+#define intprintk(s) -+#endif -+ -+#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) -+ -+#define GDB_BUF_SIZE 512 /* power of 2, please */ -+ -+static char gdb_buf[GDB_BUF_SIZE]; -+static int gdb_buf_in_inx; -+static atomic_t gdb_buf_in_cnt; -+static int gdb_buf_out_inx; -+ -+struct async_struct *gdb_async_info; -+static int gdb_async_irq; -+ -+#define outb_px(a,b) outb_p(b,a) -+ -+static void program_uart(struct async_struct *info); -+static void write_char(struct async_struct *info, int chr); -+/* -+ * Get a byte from the hardware data buffer and return it -+ */ -+static int -+read_data_bfr(struct async_struct *info) -+{ -+ char it = inb_p(info->port + UART_LSR); -+ -+ if (it & UART_LSR_DR) -+ return (inb_p(info->port + UART_RX)); -+ /* -+ * If we have a framing error assume somebody messed with -+ * our uart. Reprogram it and send '-' both ways... -+ */ -+ if (it & 0xc) { -+ program_uart(info); -+ write_char(info, '-'); -+ return ('-'); -+ } -+ return (-1); -+ -+} /* read_data_bfr */ -+ -+/* -+ * Get a char if available, return -1 if nothing available. -+ * Empty the receive buffer first, then look at the interface hardware. -+ -+ * Locking here is a bit of a problem. We MUST not lock out communication -+ * if we are trying to talk to gdb about a kgdb entry. ON the other hand -+ * we can loose chars in the console pass thru if we don't lock. It is also -+ * possible that we could hold the lock or be waiting for it when kgdb -+ * NEEDS to talk. Since kgdb locks down the world, it does not need locks. -+ * We do, of course have possible issues with interrupting a uart operation, -+ * but we will just depend on the uart status to help keep that straight. -+ -+ */ -+static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; -+#ifdef CONFIG_SMP -+extern spinlock_t kgdb_spinlock; -+#endif -+ -+static int -+read_char(struct async_struct *info) -+{ -+ int chr; -+ unsigned long flags; -+ local_irq_save(flags); -+#ifdef CONFIG_SMP -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ spin_lock(&uart_interrupt_lock); -+ } -+#endif -+ if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ -+ chr = gdb_buf[gdb_buf_out_inx++]; -+ gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); -+ atomic_dec(&gdb_buf_in_cnt); -+ } else { -+ chr = read_data_bfr(info); -+ } -+#ifdef CONFIG_SMP -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ spin_unlock(&uart_interrupt_lock); -+ } -+#endif -+ local_irq_restore(flags); -+ return (chr); -+} -+ -+/* -+ * Wait until the interface can accept a char, then write it. -+ */ -+static void -+write_char(struct async_struct *info, int chr) -+{ -+ while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; -+ -+ outb_p(chr, info->port + UART_TX); -+ -+} /* write_char */ -+ -+/* -+ * Mostly we don't need a spinlock, but since the console goes -+ * thru here with interrutps on, well, we need to catch those -+ * chars. -+ */ -+/* -+ * This is the receiver interrupt routine for the GDB stub. -+ * It will receive a limited number of characters of input -+ * from the gdb host machine and save them up in a buffer. -+ * -+ * When the gdb stub routine getDebugChar() is called it -+ * draws characters out of the buffer until it is empty and -+ * then reads directly from the serial port. -+ * -+ * We do not attempt to write chars from the interrupt routine -+ * since the stubs do all of that via putDebugChar() which -+ * writes one byte after waiting for the interface to become -+ * ready. -+ * -+ * The debug stubs like to run with interrupts disabled since, -+ * after all, they run as a consequence of a breakpoint in -+ * the kernel. -+ * -+ * Perhaps someone who knows more about the tty driver than I -+ * care to learn can make this work for any low level serial -+ * driver. -+ */ -+static irqreturn_t -+gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ struct async_struct *info; -+ unsigned long flags; -+ -+ info = gdb_async_info; -+ if (!info || !info->tty || irq != gdb_async_irq) -+ return IRQ_NONE; -+ -+ local_irq_save(flags); -+ spin_lock(&uart_interrupt_lock); -+ do { -+ int chr = read_data_bfr(info); -+ intprintk(("Debug char on int: %x hex\n", chr)); -+ if (chr < 0) -+ continue; -+ -+ if (chr == 3) { /* Ctrl-C means remote interrupt */ -+ BREAKPOINT; -+ continue; -+ } -+ -+ if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { -+ /* buffer overflow tosses early char */ -+ read_char(info); -+ } -+ gdb_buf[gdb_buf_in_inx++] = chr; -+ gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); -+ } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); -+ spin_unlock(&uart_interrupt_lock); -+ local_irq_restore(flags); -+ return IRQ_HANDLED; -+} /* gdb_interrupt */ -+ -+/* -+ * Just a NULL routine for testing. -+ */ -+void -+gdb_null(void) -+{ -+} /* gdb_null */ -+ -+/* These structure are filled in with values defined in asm/kgdb_local.h -+ */ -+static struct serial_state state = SB_STATE; -+static struct async_struct local_info = SB_INFO; -+static int ok_to_enable_ints = 0; -+static void kgdb_enable_ints_now(void); -+ -+extern char *kgdb_version; -+/* -+ * Hook an IRQ for KGDB. -+ * -+ * This routine is called from putDebugChar, below. -+ */ -+static int ints_disabled = 1; -+int -+gdb_hook_interrupt(struct async_struct *info, int verb) -+{ -+ struct serial_state *state = info->state; -+ unsigned long flags; -+ int port; -+#ifdef TEST_EXISTANCE -+ int scratch, scratch2; -+#endif -+ -+ /* The above fails if memory managment is not set up yet. -+ * Rather than fail the set up, just keep track of the fact -+ * and pick up the interrupt thing later. -+ */ -+ gdb_async_info = info; -+ port = gdb_async_info->port; -+ gdb_async_irq = state->irq; -+ if (verb) { -+ printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", -+ kgdb_version, -+ port, -+ gdb_async_irq, gdb_async_info->state->custom_divisor); -+ } -+ local_irq_save(flags); -+#ifdef TEST_EXISTANCE -+ /* Existance test */ -+ /* Should not need all this, but just in case.... */ -+ -+ scratch = inb_p(port + UART_IER); -+ outb_px(port + UART_IER, 0); -+ outb_px(0xff, 0x080); -+ scratch2 = inb_p(port + UART_IER); -+ outb_px(port + UART_IER, scratch); -+ if (scratch2) { -+ printk -+ ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); -+ local_irq_restore(flags); -+ return 1; /* We failed; there's nothing here */ -+ } -+ scratch2 = inb_p(port + UART_LCR); -+ outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ -+ outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ -+ outb_px(port + UART_LCR, 0); -+ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); -+ scratch = inb_p(port + UART_IIR) >> 6; -+ if (scratch == 1) { -+ printk("gdb_hook_interrupt: Undefined UART type!" -+ " Not a UART! \n"); -+ local_irq_restore(flags); -+ return 1; -+ } else { -+ dbprintk(("gdb_hook_interrupt: UART type " -+ "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); -+ } -+ scratch = inb_p(port + UART_MCR); -+ outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); -+ outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); -+ scratch2 = inb_p(port + UART_MSR) & 0xF0; -+ outb_px(port + UART_MCR, scratch); -+ if (scratch2 != 0x90) { -+ printk("gdb_hook_interrupt: " -+ "Loop back test failed! Not a UART!\n"); -+ local_irq_restore(flags); -+ return scratch2 + 1000; /* force 0 to fail */ -+ } -+#endif /* test existance */ -+ program_uart(info); -+ local_irq_restore(flags); -+ -+ return (0); -+ -+} /* gdb_hook_interrupt */ -+ -+static void -+program_uart(struct async_struct *info) -+{ -+ int port = info->port; -+ -+ (void) inb_p(port + UART_RX); -+ outb_px(port + UART_IER, 0); -+ -+ (void) inb_p(port + UART_RX); /* serial driver comments say */ -+ (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ -+ (void) inb_p(port + UART_MSR); -+ outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); -+ outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ -+ outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ -+ outb_px(port + UART_MCR, info->MCR); -+ -+ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ -+ outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ -+ outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ -+ if (!ints_disabled) { -+ intprintk(("KGDB: Sending %d to port %x offset %d\n", -+ gdb_async_info->IER, -+ (int) gdb_async_info->port, UART_IER)); -+ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); -+ } -+ return; -+} -+ -+/* -+ * getDebugChar -+ * -+ * This is a GDB stub routine. It waits for a character from the -+ * serial interface and then returns it. If there is no serial -+ * interface connection then it returns a bogus value which will -+ * almost certainly cause the system to hang. In the -+ */ -+int kgdb_in_isr = 0; -+int kgdb_in_lsr = 0; -+extern spinlock_t kgdb_spinlock; -+ -+/* Caller takes needed protections */ -+ -+int -+getDebugChar(void) -+{ -+ volatile int chr, dum, time, end_time; -+ -+ dbprintk(("getDebugChar(port %x): ", gdb_async_info->port)); -+ -+ if (gdb_async_info == NULL) { -+ gdb_hook_interrupt(&local_info, 0); -+ } -+ /* -+ * This trick says if we wait a very long time and get -+ * no char, return the -1 and let the upper level deal -+ * with it. -+ */ -+ rdtsc(dum, time); -+ end_time = time + 2; -+ while (((chr = read_char(gdb_async_info)) == -1) && -+ (end_time - time) > 0) { -+ rdtsc(dum, time); -+ }; -+ /* -+ * This covers our butts if some other code messes with -+ * our uart, hay, it happens :o) -+ */ -+ if (chr == -1) -+ program_uart(gdb_async_info); -+ -+ dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); -+ return (chr); -+ -+} /* getDebugChar */ -+ -+static int count = 3; -+static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; -+ -+static int __init -+kgdb_enable_ints(void) -+{ -+ if (gdb_async_info == NULL) { -+ gdb_hook_interrupt(&local_info, 1); -+ } -+ ok_to_enable_ints = 1; -+ kgdb_enable_ints_now(); -+#ifdef CONFIG_KGDB_USER_CONSOLE -+ kgdb_console_finit(); -+#endif -+ return 0; -+} -+ -+#ifdef CONFIG_SERIAL_8250 -+void shutdown_for_kgdb(struct async_struct *gdb_async_info); -+#endif -+ -+#ifdef CONFIG_DISCONTIGMEM -+static inline int kgdb_mem_init_done(void) -+{ -+ return highmem_start_page != NULL; -+} -+#else -+static inline int kgdb_mem_init_done(void) -+{ -+ return max_mapnr != 0; -+} -+#endif -+ -+static void -+kgdb_enable_ints_now(void) -+{ -+ if (!spin_trylock(&one_at_atime)) -+ return; -+ if (!ints_disabled) -+ goto exit; -+ if (kgdb_mem_init_done() && -+ ints_disabled) { /* don't try till mem init */ -+#ifdef CONFIG_SERIAL_8250 -+ /* -+ * The ifdef here allows the system to be configured -+ * without the serial driver. -+ * Don't make it a module, however, it will steal the port -+ */ -+ shutdown_for_kgdb(gdb_async_info); -+#endif -+ ints_disabled = request_irq(gdb_async_info->state->irq, -+ gdb_interrupt, -+ IRQ_T(gdb_async_info), -+ "KGDB-stub", NULL); -+ intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); -+ } -+ if (!ints_disabled) { -+ intprintk(("KGDB: Sending %d to port %x offset %d\n", -+ gdb_async_info->IER, -+ (int) gdb_async_info->port, UART_IER)); -+ outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); -+ } -+ exit: -+ spin_unlock(&one_at_atime); -+} -+ -+/* -+ * putDebugChar -+ * -+ * This is a GDB stub routine. It waits until the interface is ready -+ * to transmit a char and then sends it. If there is no serial -+ * interface connection then it simply returns to its caller, having -+ * pretended to send the char. Caller takes needed protections. -+ */ -+void -+putDebugChar(int chr) -+{ -+ dbprintk(("putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", -+ gdb_async_info->port, -+ chr, -+ chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); -+ -+ if (gdb_async_info == NULL) { -+ gdb_hook_interrupt(&local_info, 0); -+ } -+ -+ write_char(gdb_async_info, chr); /* this routine will wait */ -+ count = (chr == '#') ? 0 : count + 1; -+ if ((count == 2)) { /* try to enable after */ -+ if (ints_disabled & ok_to_enable_ints) -+ kgdb_enable_ints_now(); /* try to enable after */ -+ -+ /* We do this a lot because, well we really want to get these -+ * interrupts. The serial driver will clear these bits when it -+ * initializes the chip. Every thing else it does is ok, -+ * but this. -+ */ -+ if (!ints_disabled) { -+ outb_px(gdb_async_info->port + UART_IER, -+ gdb_async_info->IER); -+ } -+ } -+ -+} /* putDebugChar */ -+ -+module_init(kgdb_enable_ints); -diff -puN arch/i386/lib/Makefile~kgdb-ga arch/i386/lib/Makefile ---- 25/arch/i386/lib/Makefile~kgdb-ga 2004-10-21 14:54:15.265602768 -0700 -+++ 25-akpm/arch/i386/lib/Makefile 2004-10-21 14:54:15.313595472 -0700 -@@ -8,3 +8,4 @@ lib-y = checksum.o delay.o usercopy.o ge - - lib-$(CONFIG_X86_USE_3DNOW) += mmx.o - lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o -+lib-$(CONFIG_KGDB) += kgdb_serial.o -diff -puN arch/i386/Makefile~kgdb-ga arch/i386/Makefile ---- 25/arch/i386/Makefile~kgdb-ga 2004-10-21 14:54:15.266602616 -0700 -+++ 25-akpm/arch/i386/Makefile 2004-10-21 14:54:15.314595320 -0700 -@@ -99,6 +99,9 @@ core-$(CONFIG_X86_ES7000) := arch/i386/m - # default subarch .h files - mflags-y += -Iinclude/asm-i386/mach-default - -+mflags-$(CONFIG_KGDB) += -gdwarf-2 -+mflags-$(CONFIG_KGDB_MORE) += $(shell echo $(CONFIG_KGDB_OPTIONS) | sed -e 's/"//g') -+ - head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o - - libs-y += arch/i386/lib/ -diff -puN arch/i386/mm/fault.c~kgdb-ga arch/i386/mm/fault.c ---- 25/arch/i386/mm/fault.c~kgdb-ga 2004-10-21 14:54:15.268602312 -0700 -+++ 25-akpm/arch/i386/mm/fault.c 2004-10-21 14:54:15.314595320 -0700 -@@ -430,6 +430,12 @@ no_context: - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ -+#ifdef CONFIG_KGDB -+ if (!user_mode(regs)){ -+ kgdb_handle_exception(14,SIGBUS, error_code, regs); -+ return; -+ } -+#endif - - bust_spinlocks(1); - -diff -puN arch/x86_64/boot/compressed/head.S~kgdb-ga arch/x86_64/boot/compressed/head.S ---- 25/arch/x86_64/boot/compressed/head.S~kgdb-ga 2004-10-21 14:54:15.269602160 -0700 -+++ 25-akpm/arch/x86_64/boot/compressed/head.S 2004-10-21 14:54:15.315595168 -0700 -@@ -26,6 +26,7 @@ - .code32 - .text - -+#define IN_BOOTLOADER - #include <linux/linkage.h> - #include <asm/segment.h> - -diff -puN arch/x86_64/boot/compressed/misc.c~kgdb-ga arch/x86_64/boot/compressed/misc.c ---- 25/arch/x86_64/boot/compressed/misc.c~kgdb-ga 2004-10-21 14:54:15.270602008 -0700 -+++ 25-akpm/arch/x86_64/boot/compressed/misc.c 2004-10-21 14:54:15.315595168 -0700 -@@ -9,6 +9,7 @@ - * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 - */ - -+#define IN_BOOTLOADER - #include "miscsetup.h" - #include <asm/io.h> - -diff -puN /dev/null Documentation/i386/kgdb/andthen ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/andthen 2004-10-21 14:54:15.316595016 -0700 -@@ -0,0 +1,100 @@ -+ -+define set_andthen -+ set var $thp=0 -+ set var $thp=(struct kgdb_and_then_struct *)&kgdb_data[0] -+ set var $at_size = (sizeof kgdb_data)/(sizeof *$thp) -+ set var $at_oc=kgdb_and_then_count -+ set var $at_cc=$at_oc -+end -+ -+define andthen_next -+ set var $at_cc=$arg0 -+end -+ -+define andthen -+ andthen_set_edge -+ if ($at_cc >= $at_oc) -+ printf "Outside window. Window size is %d\n",($at_oc-$at_low) -+ else -+ printf "%d: ",$at_cc -+ output *($thp+($at_cc++ % $at_size )) -+ printf "\n" -+ end -+end -+define andthen_set_edge -+ set var $at_oc=kgdb_and_then_count -+ set var $at_low = $at_oc - $at_size -+ if ($at_low < 0 ) -+ set var $at_low = 0 -+ end -+ if (( $at_cc > $at_oc) || ($at_cc < $at_low)) -+ printf "Count outside of window, setting count to " -+ if ($at_cc >= $at_oc) -+ set var $at_cc = $at_oc -+ else -+ set var $at_cc = $at_low -+ end -+ printf "%d\n",$at_cc -+ end -+end -+ -+define beforethat -+ andthen_set_edge -+ if ($at_cc <= $at_low) -+ printf "Outside window. Window size is %d\n",($at_oc-$at_low) -+ else -+ printf "%d: ",$at_cc-1 -+ output *($thp+(--$at_cc % $at_size )) -+ printf "\n" -+ end -+end -+ -+document andthen_next -+ andthen_next <count> -+ . sets the number of the event to display next. If this event -+ . is not in the event pool, either andthen or beforethat will -+ . correct it to the nearest event pool edge. The event pool -+ . ends at the last event recorded and begins <number of events> -+ . prior to that. If beforethat is used next, it will display -+ . event <count> -1. -+. -+ andthen commands are: set_andthen, andthen_next, andthen and beforethat -+end -+ -+ -+document andthen -+ andthen -+. displays the next event in the list. <set_andthen> sets up to display -+. the oldest saved event first. -+. <count> (optional) count of the event to display. -+. note the number of events saved is specified at configure time. -+. if events are saved between calls to andthen the index will change -+. but the displayed event will be the next one (unless the event buffer -+. is overrun). -+. -+. andthen commands are: set_andthen, andthen_next, andthen and beforethat -+end -+ -+document set_andthen -+ set_andthen -+. sets up to use the <andthen> and <beforethat> commands. -+. if you have defined your own struct, use the above and -+. then enter the following: -+. p $thp=(struct kgdb_and_then_structX *)&kgdb_data[0] -+. where <kgdb_and_then_structX> is the name of your structure. -+. -+. andthen commands are: set_andthen, andthen_next, andthen and beforethat -+end -+ -+document beforethat -+ beforethat -+. displays the next prior event in the list. <set_andthen> sets up to -+. display the last occuring event first. -+. -+. note the number of events saved is specified at configure time. -+. if events are saved between calls to beforethat the index will change -+. but the displayed event will be the next one (unless the event buffer -+. is overrun). -+. -+. andthen commands are: set_andthen, andthen_next, andthen and beforethat -+end -diff -puN /dev/null Documentation/i386/kgdb/debug-nmi.txt ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/debug-nmi.txt 2004-10-21 14:54:15.316595016 -0700 -@@ -0,0 +1,37 @@ -+Subject: Debugging with NMI -+Date: Mon, 12 Jul 1999 11:28:31 -0500 -+From: David Grothe <dave@gcom.com> -+Organization: Gcom, Inc -+To: David Grothe <dave@gcom.com> -+ -+Kernel hackers: -+ -+Maybe this is old hat, but it is new to me -- -+ -+On an ISA bus machine, if you short out the A1 and B1 pins of an ISA -+slot you will generate an NMI to the CPU. This interrupts even a -+machine that is hung in a loop with interrupts disabled. Used in -+conjunction with kgdb < -+ftp://ftp.gcom.com/pub/linux/src/kgdb-2.3.35/kgdb-2.3.35.tgz > you can -+gain debugger control of a machine that is hung in the kernel! Even -+without kgdb the kernel will print a stack trace so you can find out -+where it was hung. -+ -+The A1/B1 pins are directly opposite one another and the farthest pins -+towards the bracket end of the ISA bus socket. You can stick a paper -+clip or multi-meter probe between them to short them out. -+ -+I had a spare ISA bus to PC104 bus adapter around. The PC104 end of the -+board consists of two rows of wire wrap pins. So I wired a push button -+between the A1/B1 pins and now have an ISA board that I can stick into -+any ISA bus slot for debugger entry. -+ -+Microsoft has a circuit diagram of a PCI card at -+http://www.microsoft.com/hwdev/DEBUGGING/DMPSW.HTM. If you want to -+build one you will have to mail them and ask for the PAL equations. -+Nobody makes one comercially. -+ -+[THIS TIP COMES WITH NO WARRANTY WHATSOEVER. It works for me, but if -+your machine catches fire, it is your problem, not mine.] -+ -+-- Dave (the kgdb guy) -diff -puN /dev/null Documentation/i386/kgdb/gdb-globals.txt ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/gdb-globals.txt 2004-10-21 14:54:15.317594864 -0700 -@@ -0,0 +1,71 @@ -+Sender: akale@veritas.com -+Date: Fri, 23 Jun 2000 19:26:35 +0530 -+From: "Amit S. Kale" <akale@veritas.com> -+Organization: Veritas Software (India) -+To: Dave Grothe <dave@gcom.com>, linux-kernel@vger.rutgers.edu -+CC: David Milburn <dmilburn@wirespeed.com>, -+ "Edouard G. Parmelan" <Edouard.Parmelan@quadratec.fr>, -+ ezannoni@cygnus.com, Keith Owens <kaos@ocs.com.au> -+Subject: Re: Module debugging using kgdb -+ -+Dave Grothe wrote: -+> -+> Amit: -+> -+> There is a 2.4.0 version of kgdb on our ftp site: -+> ftp://ftp.gcom.com/pub/linux/src/kgdb. I mirrored your version of gdb -+> and loadmodule.sh there. -+> -+> Have a look at the README file and see if I go it right. If not, send -+> me some corrections and I will update it. -+> -+> Does your version of gdb solve the global variable problem? -+ -+Yes. -+Thanks to Elena Zanoni, gdb (developement version) can now calculate -+correctly addresses of dynamically loaded object files. I have not been -+following gdb developement for sometime and am not sure when symbol -+address calculation fix is going to appear in a gdb stable version. -+ -+Elena, any idea when the fix will make it to a prebuilt gdb from a -+redhat release? -+ -+For the time being I have built a gdb developement version. It can be -+used for module debugging with loadmodule.sh script. -+ -+The problem with calculating of module addresses with previous versions -+of gdb was as follows: -+gdb did not use base address of a section while calculating address of -+a symbol in the section in an object file loaded via 'add-symbol-file'. -+It used address of .text segment instead. Due to this addresses of -+symbols in .data, .bss etc. (e.g. global variables) were calculated incorrectly. -+ -+Above mentioned fix allow gdb to use base address of a segment while -+calculating address of a symbol in it. It adds a parameter '-s' to -+'add-symbol-file' command for specifying base address of a segment. -+ -+loadmodule.sh script works as follows. -+ -+1. Copy a module file to target machine. -+2. Load the module on the target machine using insmod with -m parameter. -+insmod produces a module load map which contains base addresses of all -+sections in the module and addresses of symbols in the module file. -+3. Find all sections and their base addresses in the module from -+the module map. -+4. Generate a script that loads the module file. The script uses -+'add-symbol-file' and specifies address of text segment followed by -+addresses of all segments in the module. -+ -+Here is an example gdb script produced by loadmodule.sh script. -+ -+add-symbol-file foo 0xd082c060 -s .text.lock 0xd08cbfb5 -+-s .fixup 0xd08cfbdf -s .rodata 0xd08cfde0 -s __ex_table 0xd08e3b38 -+-s .data 0xd08e3d00 -s .bss 0xd08ec8c0 -s __ksymtab 0xd08ee838 -+ -+With this command gdb can calculate addresses of symbols in ANY segment -+in a module file. -+ -+Regards. -+-- -+Amit Kale -+Veritas Software ( http://www.veritas.com ) -diff -puN /dev/null Documentation/i386/kgdb/gdbinit ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/gdbinit 2004-10-21 14:54:15.317594864 -0700 -@@ -0,0 +1,14 @@ -+shell echo -e "\003" >/dev/ttyS0 -+set remotebaud 38400 -+target remote /dev/ttyS0 -+define si -+stepi -+printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx -+printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp -+x/i $eip -+end -+define ni -+nexti -+printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx -+printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp -+x/i $eip -diff -puN /dev/null Documentation/i386/kgdb/gdbinit.hw ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/gdbinit.hw 2004-10-21 14:54:15.318594712 -0700 -@@ -0,0 +1,117 @@ -+ -+#Using ia-32 hardware breakpoints. -+# -+#4 hardware breakpoints are available in ia-32 processors. These breakpoints -+#do not need code modification. They are set using debug registers. -+# -+#Each hardware breakpoint can be of one of the -+#three types: execution, write, access. -+#1. An Execution breakpoint is triggered when code at the breakpoint address is -+#executed. -+#2. A write breakpoint ( aka watchpoints ) is triggered when memory location -+#at the breakpoint address is written. -+#3. An access breakpoint is triggered when memory location at the breakpoint -+#address is either read or written. -+# -+#As hardware breakpoints are available in limited number, use software -+#breakpoints ( br command in gdb ) instead of execution hardware breakpoints. -+# -+#Length of an access or a write breakpoint defines length of the datatype to -+#be watched. Length is 1 for char, 2 short , 3 int. -+# -+#For placing execution, write and access breakpoints, use commands -+#hwebrk, hwwbrk, hwabrk -+#To remove a breakpoint use hwrmbrk command. -+# -+#These commands take following types of arguments. For arguments associated -+#with each command, use help command. -+#1. breakpointno: 0 to 3 -+#2. length: 1 to 3 -+#3. address: Memory location in hex ( without 0x ) e.g c015e9bc -+# -+#Use the command exinfo to find which hardware breakpoint occured. -+ -+#hwebrk breakpointno address -+define hwebrk -+ maintenance packet Y$arg0,0,0,$arg1 -+end -+document hwebrk -+ hwebrk <breakpointno> <address> -+ Places a hardware execution breakpoint -+ <breakpointno> = 0 - 3 -+ <address> = Hex digits without leading "0x". -+end -+ -+#hwwbrk breakpointno length address -+define hwwbrk -+ maintenance packet Y$arg0,1,$arg1,$arg2 -+end -+document hwwbrk -+ hwwbrk <breakpointno> <length> <address> -+ Places a hardware write breakpoint -+ <breakpointno> = 0 - 3 -+ <length> = 1 (1 byte), 2 (2 byte), 3 (4 byte) -+ <address> = Hex digits without leading "0x". -+end -+ -+#hwabrk breakpointno length address -+define hwabrk -+ maintenance packet Y$arg0,1,$arg1,$arg2 -+end -+document hwabrk -+ hwabrk <breakpointno> <length> <address> -+ Places a hardware access breakpoint -+ <breakpointno> = 0 - 3 -+ <length> = 1 (1 byte), 2 (2 byte), 3 (4 byte) -+ <address> = Hex digits without leading "0x". -+end -+ -+#hwrmbrk breakpointno -+define hwrmbrk -+ maintenance packet y$arg0 -+end -+document hwrmbrk -+ hwrmbrk <breakpointno> -+ <breakpointno> = 0 - 3 -+ Removes a hardware breakpoint -+end -+ -+define reboot -+ maintenance packet r -+end -+#exinfo -+define exinfo -+ maintenance packet qE -+end -+document exinfo -+ exinfo -+ Gives information about a breakpoint. -+end -+define get_th -+ p $th=(struct thread_info *)((int)$esp & ~8191) -+end -+document get_th -+ get_tu -+ Gets and prints the current thread_info pointer, Defines th to be it. -+end -+define get_cu -+ p $cu=((struct thread_info *)((int)$esp & ~8191))->task -+end -+document get_cu -+ get_cu -+ Gets and print the "current" value. Defines $cu to be it. -+end -+define int_off -+ set var $flags=$eflags -+ set $eflags=$eflags&~0x200 -+ end -+define int_on -+ set var $eflags|=$flags&0x200 -+ end -+document int_off -+ saves the current interrupt state and clears the processor interrupt -+ flag. Use int_on to restore the saved flag. -+end -+document int_on -+ Restores the interrupt flag saved by int_off. -+end -diff -puN /dev/null Documentation/i386/kgdb/gdbinit-modules ---- /dev/null Thu Apr 11 07:25:15 2002 -+++ 25-akpm/Documentation/i386/kgdb/gdbinit-modules Fri Jan 13 17:54:25 2006 -@@ -0,0 +1,149 @@ -+# -+# Usefull GDB user-command to debug Linux Kernel Modules with gdbstub. -+# -+# This don't work for Linux-2.0 or older. -+# -+# Author Edouard G. Parmelan <Edouard.Parmelan@quadratec.fr> -+# -+# -+# Fri Apr 30 20:33:29 CEST 1999 -+# First public release. -+# -+# Major cleanup after experiment Linux-2.0 kernel without success. -+# Symbols of a module are not in the correct order, I can't explain -+# why :( -+# -+# Fri Mar 19 15:41:40 CET 1999 -+# Initial version. -+# -+# Thu Jan 6 16:29:03 CST 2000 -+# A little fixing by Dave Grothe <dave@gcom.com> -+# -+# Mon Jun 19 09:33:13 CDT 2000 -+# Alignment changes from Edouard Parmelan -+# -+# The basic idea is to find where insmod load the module and inform -+# GDB to load the symbol table of the module with the GDB command -+# ``add-symbol-file <object> <address>''. -+# -+# The Linux kernel holds the list of all loaded modules in module_list, -+# this list end with &kernel_module (exactly with module->next == NULL, -+# but the last module is not a real module). -+# -+# Insmod allocates the struct module before the object file. Since -+# Linux-2.1, this structure contain his size. The real address of -+# the object file is then (char*)module + module->size_of_struct. -+# -+# You can use three user functions ``mod-list'', ``mod-print-symbols'' -+# and ``add-module-symbols''. -+# -+# mod-list list all loaded modules with the format: -+# <module-address> <module-name> -+# -+# As soon as you have found the address of your module, you can -+# print its exported symbols (mod-print-symbols) or inform GDB to add -+# symbols from your module file (mod-add-symbols). -+# -+# The argument that you give to mod-print-symbols or mod-add-symbols -+# is the <module-address> from the mod-list command. -+# -+# When using the mod-add-symbols command you must also give the full -+# pathname of the modules object code file. -+# -+# The command mod-add-lis is an example of how to make this easier. -+# You can edit this macro to contain the path name of your own -+# favorite module and then use it as a shorthand to load it. You -+# still need the module-address, however. -+# -+# The internal function ``mod-validate'' set the GDB variable $mod -+# as a ``struct module*'' if the kernel known the module otherwise -+# $mod is set to NULL. This ensure to not add symbols for a wrong -+# address. -+# -+# -+# Sat Feb 12 20:05:47 CET 2005 -+# -+# Adapted to the 2.6.* module data structure. -+# (Getting miffed at gdb for not having "offsetof" in the process :-/ ) -+# -+# Autogenerate add-symbol-file statements from the module list instead -+# of relying on a no-longer-working loadmodule.sh program. -+# -+# Matthias Urlichs <smurf@debian.org> -+# -+# -+# Have a nice hacking day ! -+# -+# -+define mod-list -+ set $lmod = modules->next -+ # This is a circular data structure -+ while $lmod != &modules -+ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) -+ printf "%p\t%s\n", $mod, $mod->name -+ set $lmod = $lmod->next -+ end -+end -+document mod-list -+mod-list -+List all modules in the form: <module-address> <module-name> -+Use the <module-address> as the argument for the other -+mod-commands: mod-print-symbols, mod-add-symbols. -+end -+ -+define mod-list-syms -+ set $lmod = modules->next -+ # This is a circular data structure -+ while $lmod != &modules -+ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) -+ printf "add-symbol-file %s.ko %p\n", $mod->name, $mod->module_core -+ set $lmod = $lmod->next -+ end -+end -+document mod-list-syms -+mod-list-syms -+List all modules in the form: add-symbol-file <module-path> <module-core> -+for adding modules' symbol tables without loadmodule.sh. -+end -+ -+define mod-validate -+ set $lmod = modules->next -+ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) -+ while ($lmod != &modules) && ($mod != $arg0) -+ set $lmod = $lmod->next -+ set $mod = (struct module *)(((char *)$lmod) - ((int)&(((struct module *)0) -> list))) -+ end -+ if $lmod == &modules -+ set $mod = 0 -+ printf "%p is not a module\n", $arg0 -+ end -+end -+document mod-validate -+mod-validate <module-address> -+Internal user-command used to validate the module parameter. -+If <module> is a real loaded module, set $mod to it, otherwise set $mod -+to 0. -+end -+ -+define mod-print-symbols -+ mod-validate $arg0 -+ if $mod != 0 -+ set $i = 0 -+ while $i < $mod->num_syms -+ set $sym = $mod->syms[$i] -+ printf "%p\t%s\n", $sym->value, $sym->name -+ set $i = $i + 1 -+ end -+ set $i = 0 -+ while $i < $mod->num_gpl_syms -+ set $sym = $mod->gpl_syms[$i] -+ printf "%p\t%s\n", $sym->value, $sym->name -+ set $i = $i + 1 -+ end -+ end -+end -+document mod-print-symbols -+mod-print-symbols <module-address> -+Print all exported symbols of the module. See mod-list -+end -+ -diff -puN /dev/null Documentation/i386/kgdb/kgdb.txt ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/kgdb.txt 2004-10-21 14:54:15.324593800 -0700 -@@ -0,0 +1,775 @@ -+Last edit: <20030806.1637.12> -+This file has information specific to the i386 kgdb option. Other -+platforms with the kgdb option may behave in a similar fashion. -+ -+New features: -+============ -+20030806.1557.37 -+This version was made against the 2.6.0-test2 kernel. We have made the -+following changes: -+ -+- The getthread() code in the stub calls find_task_by_pid(). It fails -+ if we are early in the bring up such that the pid arrays have yet to -+ be allocated. We have added a line to kernel/pid.c to make -+ "kgdb_pid_init_done" true once the arrays are allocated. This way the -+ getthread() code knows not to call. This is only used by the thread -+ debugging stuff and threads will not yet exist at this point in the -+ boot. -+ -+- For some reason, gdb was not asking for a new thread list when the -+ "info thread" command was given. We changed to the newer version of -+ the thread info command and gdb now seems to ask when needed. Result, -+ we now get all threads in the thread list. -+ -+- We now respond to the ThreadExtraInfo request from gdb with the thread -+ name from task_struct .comm. This then appears in the thread list. -+ Thoughts on additional options for this are welcome. Things such as -+ "has BKL" and "Preempted" come to mind. I think we could have a flag -+ word that could enable different bits of info here. -+ -+- We now honor, sort of, the C and S commands. These are continue and -+ single set after delivering a signal. We ignore the signal and do the -+ requested action. This only happens when we told gdb that a signal -+ was the reason for entry, which is only done on memory faults. The -+ result is that you can now continue into the Oops. -+ -+- We changed the -g to -gdwarf-2. This seems to be the same as -ggdb, -+ but it is more exact on what language to use. -+ -+- We added two dwarf2 include files and a bit of code at the end of -+ entry.S. This does not yet work, so it is disabled. Still we want to -+ keep track of the code and "maybe" someone out there can fix it. -+ -+- Randy Dunlap sent some fix ups for this file which are now merged. -+ -+- Hugh Dickins sent a fix to a bit of code in traps.c that prevents a -+ compiler warning if CONFIG_KGDB is off (now who would do that :). -+ -+- Andrew Morton sent a fix for the serial driver which is now merged. -+ -+- Andrew also sent a change to the stub around the cpu managment code -+ which is also merged. -+ -+- Andrew also sent a patch to make "f" as well as "g" work as SysRq -+ commands to enter kgdb, merged. -+ -+- If CONFIG_KGDB and CONFIG_DEBUG_SPINLOCKS are both set we added a -+ "who" field to the spinlock data struct. This is filled with -+ "current" when ever the spinlock suceeds. Useful if you want to know -+ who has the lock. -+ -+_ And last, but not least, we fixed the "get_cu" macro to properly get -+ the current value of "current". -+ -+New features: -+============ -+20030505.1827.27 -+We are starting to align with the sourceforge version, at least in -+commands. To this end, the boot command string to start kgdb at -+boot time has been changed from "kgdb" to "gdb". -+ -+Andrew Morton sent a couple of patches which are now included as follows: -+1.) We now return a flag to the interrupt handler. -+2.) We no longer use smp_num_cpus (a conflict with the lock meter). -+3.) And from William Lee Irwin III <wli@holomorphy.com> code to make -+ sure high-mem is set up before we attempt to register our interrupt -+ handler. -+We now include asm/kgdb.h from config.h so you will most likely never -+have to include it. It also 'NULLS' the kgdb macros you might have in -+your code when CONFIG_KGDB is not defined. This allows you to just -+turn off CONFIG_KGDB to turn off all the kgdb_ts() calls and such. -+This include is conditioned on the machine being an x86 so as to not -+mess with other archs. -+ -+20020801.1129.03 -+This is currently the version for the 2.4.18 (and beyond?) kernel. -+ -+We have several new "features" beginning with this version: -+ -+1.) Kgdb now syncs the "other" CPUs with a cross-CPU NMI. No more -+ waiting and it will pull that guy out of an IRQ off spin lock :) -+ -+2.) We doctored up the code that tells where a task is waiting and -+ included it so that the "info thread" command will show a bit more -+ than "schedule()". Try it... -+ -+3.) Added the ability to call a function from gdb. All the standard gdb -+ issues apply, i.e. if you hit a breakpoint in the function, you are -+ not allowed to call another (gdb limitation, not kgdb). To help -+ this capability we added a memory allocation function. Gdb does not -+ return this memory (it is used for strings that you pass to that function -+ you are calling from gdb) so we fixed up a way to allow you to -+ manually return the memory (see below). -+ -+4.) Kgdb time stamps (kgdb_ts()) are enhanced to expand what was the -+ interrupt flag to now also include the preemption count and the -+ "in_interrupt" info. The flag is now called "with_pif" to indicate -+ the order, preempt_count, in_interrupt, flag. The preempt_count is -+ shifted left by 4 bits so you can read the count in hex by dropping -+ the low order digit. In_interrupt is in bit 1, and the flag is in -+ bit 0. -+ -+5.) The command: "p kgdb_info" is now expanded and prints something -+ like: -+(gdb) p kgdb_info -+$2 = {used_malloc = 0, called_from = 0xc0107506, entry_tsc = 67468627259, -+ errcode = 0, vector = 3, print_debug_info = 0, hold_on_sstep = 1, -+ cpus_waiting = {{task = 0xc027a000, pid = 32768, hold = 0, -+ regs = 0xc027bf84}, {task = 0x0, pid = 0, hold = 0, regs = 0x0}}} -+ -+ Things to note here: a.) used_malloc is the amount of memory that -+ has been malloc'ed to do calls from gdb. You can reclaim this -+ memory like this: "p kgdb_info.used_malloc=0" Cool, huh? b.) -+ cpus_waiting is now "sized" by the number of CPUs you enter at -+ configure time in the kgdb configure section. This is NOT used -+ anywhere else in the system, but it is "nice" here. c.) The task's -+ "pid" is now in the structure. This is the pid you will need to use -+ to decode to the thread id to get gdb to look at that thread. -+ Remember that the "info thread" command prints a list of threads -+ wherein it numbers each thread with its reference number followed -+ by the thread's pid. Note that the per-CPU idle threads actually -+ have pids of 0 (yes, there is more than one pid 0 in an SMP system). -+ To avoid confusion, kgdb numbers these threads with numbers beyond -+ the MAX_PID. That is why you see 32768 and above. -+ -+6.) A subtle change, we now provide the complete register set for tasks -+ that are active on the other CPUs. This allows better trace back on -+ those tasks. -+ -+ And, let's mention what we could not fix. Back-trace from all but the -+ thread that we trapped will, most likely, have a bogus entry in it. -+ The problem is that gdb does not recognize the entry code for -+ functions that use "current" near (at all?) the entry. The compiler -+ is putting the "current" decode as the first two instructions of the -+ function where gdb expects to find %ebp changing code. Back trace -+ also has trouble with interrupt frames. I am talking with Daniel -+ Jacobowitz about some way to fix this, but don't hold your breath. -+ -+20011220.0050.35 -+Major enhancement with this version is the ability to hold one or more -+CPUs in an SMP system while allowing the others to continue. Also, by -+default only the current CPU is enabled on single-step commands (please -+note that gdb issues single-step commands at times other than when you -+use the si command). -+ -+Another change is to collect some useful information in -+a global structure called "kgdb_info". You should be able to just: -+ -+p kgdb_info -+ -+although I have seen cases where the first time this is done gdb just -+prints the first member but prints the whole structure if you then enter -+CR (carriage return or enter). This also works: -+ -+p *&kgdb_info -+ -+Here is a sample: -+(gdb) p kgdb_info -+$4 = {called_from = 0xc010732c, entry_tsc = 32804123790856, errcode = 0, -+ vector = 3, print_debug_info = 0} -+ -+"Called_from" is the return address from the current entry into kgdb. -+Sometimes it is useful to know why you are in kgdb, for example, was -+it an NMI or a real breakpoint? The simple way to interrogate this -+return address is: -+ -+l *0xc010732c -+ -+which will print the surrounding few lines of source code. -+ -+"Entry_tsc" is the CPU TSC on entry to kgdb (useful to compare to the -+kgdb_ts entries). -+ -+"errcode" and "vector" are other entry parameters which may be helpful on -+some traps. -+ -+"print_debug_info" is the internal debugging kgdb print enable flag. Yes, -+you can modify it. -+ -+In SMP systems kgdb_info also includes the "cpus_waiting" structure and -+"hold_on_step": -+ -+(gdb) p kgdb_info -+$7 = {called_from = 0xc0112739, entry_tsc = 1034936624074, errcode = 0, -+ vector = 2, print_debug_info = 0, hold_on_sstep = 1, cpus_waiting = {{ -+ task = 0x0, hold = 0, regs = 0x0}, {task = 0xc71b8000, hold = 0, -+ regs = 0xc71b9f70}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, -+ hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, -+ hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, -+ hold = 0, regs = 0x0}}} -+ -+"Cpus_waiting" has an entry for each CPU other than the current one that -+has been stopped. Each entry contains the task_struct address for that -+CPU, the address of the regs for that task and a hold flag. All these -+have the proper typing so that, for example: -+ -+p *kgdb_info.cpus_waiting[1].regs -+ -+will print the registers for CPU 1. -+ -+"Hold_on_sstep" is a new feature with this version and comes up set or -+true. What this means is that whenever kgdb is asked to single-step all -+other CPUs are held (i.e. not allowed to execute). The flag applies to -+all but the current CPU and, again, can be changed: -+ -+p kgdb_info.hold_on_sstep=0 -+ -+restores the old behavior of letting all CPUs run during single-stepping. -+ -+Likewise, each CPU has a "hold" flag, which if set, locks that CPU out -+of execution. Note that this has some risk in cases where the CPUs need -+to communicate with each other. If kgdb finds no CPU available on exit, -+it will push a message thru gdb and stay in kgdb. Note that it is legal -+to hold the current CPU as long as at least one CPU can execute. -+ -+20010621.1117.09 -+This version implements an event queue. Events are signaled by calling -+a function in the kgdb stub and may be examined from gdb. See EVENTS -+below for details. This version also tightens up the interrupt and SMP -+handling to not allow interrupts on the way to kgdb from a breakpoint -+trap. It is fine to allow these interrupts for user code, but not -+system debugging. -+ -+Version -+======= -+ -+This version of the kgdb package was developed and tested on -+kernel version 2.4.16. It will not install on any earlier kernels. -+It is possible that it will continue to work on later versions -+of 2.4 and then versions of 2.5 (I hope). -+ -+ -+Debugging Setup -+=============== -+ -+Designate one machine as the "development" machine. This is the -+machine on which you run your compiles and which has your source -+code for the kernel. Designate a second machine as the "target" -+machine. This is the machine that will run your experimental -+kernel. -+ -+The two machines will be connected together via a serial line out -+one or the other of the COM ports of the PC. You will need the -+appropriate modem eliminator (null modem) cable(s) for this. -+ -+Decide on which tty port you want the machines to communicate, then -+connect them up back-to-back using the null modem cable. COM1 is -+/dev/ttyS0 and COM2 is /dev/ttyS1. You should test this connection -+with the two machines prior to trying to debug a kernel. Once you -+have it working, on the TARGET machine, enter: -+ -+setserial /dev/ttyS0 (or what ever tty you are using) -+ -+and record the port address and the IRQ number. -+ -+On the DEVELOPMENT machine you need to apply the patch for the kgdb -+hooks. You have probably already done that if you are reading this -+file. -+ -+On your DEVELOPMENT machine, go to your kernel source directory and do -+"make Xconfig" where X is one of "x", "menu", or "". If you are -+configuring in the standard serial driver, it must not be a module. -+Either yes or no is ok, but making the serial driver a module means it -+will initialize after kgdb has set up the UART interrupt code and may -+cause a failure of the control-C option discussed below. The configure -+question for the serial driver is under the "Character devices" heading -+and is: -+ -+"Standard/generic (8250/16550 and compatible UARTs) serial support" -+ -+Go down to the kernel debugging menu item and open it up. Enable the -+kernel kgdb stub code by selecting that item. You can also choose to -+turn on the "-ggdb -O1" compile options. The -ggdb causes the compiler -+to put more debug info (like local symbols) in the object file. On the -+i386 -g and -ggdb are the same so this option just reduces to "O1". The -+-O1 reduces the optimization level. This may be helpful in some cases, -+be aware, however, that this may also mask the problem you are looking -+for. -+ -+The baud rate. Default is 115200. What ever you choose be sure that -+the host machine is set to the same speed. I recommend the default. -+ -+The port. This is the I/O address of the serial UART that you should -+have gotten using setserial as described above. The standard COM1 port -+(3f8) using IRQ 4 is default. COM2 is 2f8 which by convention uses IRQ -+3. -+ -+The port IRQ (see above). -+ -+Stack overflow test. This option makes a minor change in the trap, -+system call and interrupt code to detect stack overflow and transfer -+control to kgdb if it happens. (Some platforms have this in the -+baseline code, but the i386 does not.) -+ -+You can also configure the system to recognize the boot option -+"console=kgdb" which if given will cause all console output during -+booting to be put thru gdb as well as other consoles. This option -+requires that gdb and kgdb be connected prior to sending console output -+so, if they are not, a breakpoint is executed to force the connection. -+This will happen before any kernel output (it is going thru gdb, right), -+and will stall the boot until the connection is made. -+ -+You can also configure in a patch to SysRq to enable the kGdb SysRq. -+This request generates a breakpoint. Since the serial port IRQ line is -+set up after any serial drivers, it is possible that this command will -+work when the control-C will not. -+ -+Save and exit the Xconfig program. Then do "make clean" , "make dep" -+and "make bzImage" (or whatever target you want to make). This gets the -+kernel compiled with the "-g" option set -- necessary for debugging. -+ -+You have just built the kernel on your DEVELOPMENT machine that you -+intend to run on your TARGET machine. -+ -+To install this new kernel, use the following installation procedure. -+Remember, you are on the DEVELOPMENT machine patching the kernel source -+for the kernel that you intend to run on the TARGET machine. -+ -+Copy this kernel to your target machine using your usual procedures. I -+usually arrange to copy development: -+/usr/src/linux/arch/i386/boot/bzImage to /vmlinuz on the TARGET machine -+via a LAN based NFS access. That is, I run the cp command on the target -+and copy from the development machine via the LAN. Run Lilo (see "man -+lilo" for details on how to set this up) on the new kernel on the target -+machine so that it will boot! Then boot the kernel on the target -+machine. -+ -+On the DEVELOPMENT machine, create a file called .gdbinit in the -+directory /usr/src/linux. An example .gdbinit file looks like this: -+ -+shell echo -e "\003" >/dev/ttyS0 -+set remotebaud 38400 (or what ever speed you have chosen) -+target remote /dev/ttyS0 -+ -+ -+Change the "echo" and "target" definition so that it specifies the tty -+port that you intend to use. Change the "remotebaud" definition to -+match the data rate that you are going to use for the com line. -+ -+You are now ready to try it out. -+ -+Boot your target machine with "kgdb" in the boot command i.e. something -+like: -+ -+lilo> test kgdb -+ -+or if you also want console output thru gdb: -+ -+lilo> test kgdb console=kgdb -+ -+You should see the lilo message saying it has loaded the kernel and then -+all output stops. The kgdb stub is trying to connect with gdb. Start -+gdb something like this: -+ -+ -+On your DEVELOPMENT machine, cd /usr/src/linux and enter "gdb vmlinux". -+When gdb gets the symbols loaded it will read your .gdbinit file and, if -+everything is working correctly, you should see gdb print out a few -+lines indicating that a breakpoint has been taken. It will actually -+show a line of code in the target kernel inside the kgdb activation -+code. -+ -+The gdb interaction should look something like this: -+ -+ linux-dev:/usr/src/linux# gdb vmlinux -+ GDB is free software and you are welcome to distribute copies of it -+ under certain conditions; type "show copying" to see the conditions. -+ There is absolutely no warranty for GDB; type "show warranty" for details. -+ GDB 4.15.1 (i486-slackware-linux), -+ Copyright 1995 Free Software Foundation, Inc... -+ breakpoint () at i386-stub.c:750 -+ 750 } -+ (gdb) -+ -+You can now use whatever gdb commands you like to set breakpoints. -+Enter "continue" to start your target machine executing again. At this -+point the target system will run at full speed until it encounters -+your breakpoint or gets a segment violation in the kernel, or whatever. -+ -+If you have the kgdb console enabled when you continue, gdb will print -+out all the console messages. -+ -+The above example caused a breakpoint relatively early in the boot -+process. For the i386 kgdb it is possible to code a break instruction -+as the first C-language point in init/main.c, i.e. as the first instruction -+in start_kernel(). This could be done as follows: -+ -+#include <asm/kgdb.h> -+ breakpoint(); -+ -+This breakpoint() is really a function that sets up the breakpoint and -+single-step hardware trap cells and then executes a breakpoint. Any -+early hard coded breakpoint will need to use this function. Once the -+trap cells are set up they need not be set again, but doing it again -+does not hurt anything, so you don't need to be concerned about which -+breakpoint is hit first. Once the trap cells are set up (and the kernel -+sets them up in due course even if breakpoint() is never called) the -+macro: -+ -+BREAKPOINT; -+ -+will generate an inline breakpoint. This may be more useful as it stops -+the processor at the instruction instead of in a function a step removed -+from the location of interest. In either case <asm/kgdb.h> must be -+included to define both breakpoint() and BREAKPOINT. -+ -+Triggering kgdbstub at other times -+================================== -+ -+Often you don't need to enter the debugger until much later in the boot -+or even after the machine has been running for some time. Once the -+kernel is booted and interrupts are on, you can force the system to -+enter the debugger by sending a control-C to the debug port. This is -+what the first line of the recommended .gdbinit file does. This allows -+you to start gdb any time after the system is up as well as when the -+system is already at a breakpoint. (In the case where the system is -+already at a breakpoint the control-C is not needed, however, it will -+be ignored by the target so no harm is done. Also note the the echo -+command assumes that the port speed is already set. This will be true -+once gdb has connected, but it is best to set the port speed before you -+run gdb.) -+ -+Another simple way to do this is to put the following file in you ~/bin -+directory: -+ -+#!/bin/bash -+echo -e "\003" > /dev/ttyS0 -+ -+Here, the ttyS0 should be replaced with what ever port you are using. -+The "\003" is control-C. Once you are connected with gdb, you can enter -+control-C at the command prompt. -+ -+An alternative way to get control to the debugger is to enable the kGdb -+SysRq command. Then you would enter Alt-SysRq-g (all three keys at the -+same time, but push them down in the order given). To refresh your -+memory of the available SysRq commands try Alt-SysRq-=. Actually any -+undefined command could replace the "=", but I like to KNOW that what I -+am pushing will never be defined. -+ -+Debugging hints -+=============== -+ -+You can break into the target machine at any time from the development -+machine by typing ^C (see above paragraph). If the target machine has -+interrupts enabled this will stop it in the kernel and enter the -+debugger. -+ -+There is unfortunately no way of breaking into the kernel if it is -+in a loop with interrupts disabled, so if this happens to you then -+you need to place exploratory breakpoints or printk's into the kernel -+to find out where it is looping. The exploratory breakpoints can be -+entered either thru gdb or hard coded into the source. This is very -+handy if you do something like: -+ -+if (<it hurts>) BREAKPOINT; -+ -+ -+There is a copy of an e-mail in the Documentation/i386/kgdb/ directory -+(debug-nmi.txt) which describes how to create an NMI on an ISA bus -+machine using a paper clip. I have a sophisticated version of this made -+by wiring a push button switch into a PC104/ISA bus adapter card. The -+adapter card nicely furnishes wire wrap pins for all the ISA bus -+signals. -+ -+When you are done debugging the kernel on the target machine it is a -+good idea to leave it in a running state. This makes reboots faster, -+bypassing the fsck. So do a gdb "continue" as the last gdb command if -+this is possible. To terminate gdb itself on the development machine -+and leave the target machine running, first clear all breakpoints and -+continue, then type ^Z to suspend gdb and then kill it with "kill %1" or -+something similar. -+ -+If gdbstub Does Not Work -+======================== -+ -+If it doesn't work, you will have to troubleshoot it. Do the easy -+things first like double checking your cabling and data rates. You -+might try some non-kernel based programs to see if the back-to-back -+connection works properly. Just something simple like cat /etc/hosts -+>/dev/ttyS0 on one machine and cat /dev/ttyS0 on the other will tell you -+if you can send data from one machine to the other. Make sure it works -+in both directions. There is no point in tearing out your hair in the -+kernel if the line doesn't work. -+ -+All of the real action takes place in the file -+/usr/src/linux/arch/i386/kernel/kgdb_stub.c. That is the code on the target -+machine that interacts with gdb on the development machine. In gdb you can -+turn on a debug switch with the following command: -+ -+ set remotedebug -+ -+This will print out the protocol messages that gdb is exchanging with -+the target machine. -+ -+Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c. This is -+the code that talks to the serial port on the target side. There might -+be a problem there. In particular there is a section of this code that -+tests the UART which will tell you what UART you have if you define -+"PRNT" (just remove "_off" from the #define PRNT_off). To view this -+report you will need to boot the system without any beakpoints. This -+allows the kernel to run to the point where it calls kgdb to set up -+interrupts. At this time kgdb will test the UART and print out the type -+it finds. (You need to wait so that the printks are actually being -+printed. Early in the boot they are cached, waiting for the console to -+be enabled. Also, if kgdb is entered thru a breakpoint it is possible -+to cause a dead lock by calling printk when the console is locked. The -+stub thus avoids doing printks from breakpoints, especially in the -+serial code.) At this time, if the UART fails to do the expected thing, -+kgdb will print out (using printk) information on what failed. (These -+messages will be buried in all the other boot up messages. Look for -+lines that start with "gdb_hook_interrupt:". You may want to use dmesg -+once the system is up to view the log. If this fails or if you still -+don't connect, review your answers for the port address. Use: -+ -+setserial /dev/ttyS0 -+ -+to get the current port and IRQ information. This command will also -+tell you what the system found for the UART type. The stub recognizes -+the following UART types: -+ -+16450, 16550, and 16550A -+ -+If you are really desperate you can use printk debugging in the -+kgdbstub code in the target kernel until you get it working. In particular, -+there is a global variable in /usr/src/linux/arch/i386/kernel/kgdb_stub.c -+named "remote_debug". Compile your kernel with this set to 1, rather -+than 0 and the debug stub will print out lots of stuff as it does -+what it does. Likewise there are debug printks in the kgdb_serial.c -+code that can be turned on with simple changes in the macro defines. -+ -+ -+Debugging Loadable Modules -+========================== -+ -+This technique comes courtesy of Edouard Parmelan -+<Edouard.Parmelan@quadratec.fr> -+ -+When you run gdb, enter the command -+ -+source gdbinit-modules -+ -+This will read in a file of gdb macros that was installed in your -+kernel source directory when kgdb was installed. This file implements -+the following commands: -+ -+mod-list -+ Lists the loaded modules in the form <module-address> <module-name> -+ -+mod-print-symbols <module-address> -+ Prints all the symbols in the indicated module. -+ -+mod-add-symbols <module-address> <object-file-path-name> -+ Loads the symbols from the object file and associates them -+ with the indicated module. -+ -+After you have loaded the module that you want to debug, use the command -+mod-list to find the <module-address> of your module. Then use that -+address in the mod-add-symbols command to load your module's symbols. -+From that point onward you can debug your module as if it were a part -+of the kernel. -+ -+The file gdbinit-modules also contains a command named mod-add-lis as -+an example of how to construct a command of your own to load your -+favorite module. The idea is to "can" the pathname of the module -+in the command so you don't have to type so much. -+ -+Threads -+======= -+ -+Each process in a target machine is seen as a gdb thread. gdb thread -+related commands (info threads, thread n) can be used. -+ -+ia-32 hardware breakpoints -+========================== -+ -+kgdb stub contains support for hardware breakpoints using debugging features -+of ia-32(x86) processors. These breakpoints do not need code modification. -+They use debugging registers. 4 hardware breakpoints are available in ia-32 -+processors. -+ -+Each hardware breakpoint can be of one of the following three types. -+ -+1. Execution breakpoint - An Execution breakpoint is triggered when code -+ at the breakpoint address is executed. -+ -+ As limited number of hardware breakpoints are available, it is -+ advisable to use software breakpoints ( break command ) instead -+ of execution hardware breakpoints, unless modification of code -+ is to be avoided. -+ -+2. Write breakpoint - A write breakpoint is triggered when memory -+ location at the breakpoint address is written. -+ -+ A write or can be placed for data of variable length. Length of -+ a write breakpoint indicates length of the datatype to be -+ watched. Length is 1 for 1 byte data , 2 for 2 byte data, 3 for -+ 4 byte data. -+ -+3. Access breakpoint - An access breakpoint is triggered when memory -+ location at the breakpoint address is either read or written. -+ -+ Access breakpoints also have lengths similar to write breakpoints. -+ -+IO breakpoints in ia-32 are not supported. -+ -+Since gdb stub at present does not use the protocol used by gdb for hardware -+breakpoints, hardware breakpoints are accessed through gdb macros. gdb macros -+for hardware breakpoints are described below. -+ -+hwebrk - Places an execution breakpoint -+ hwebrk breakpointno address -+hwwbrk - Places a write breakpoint -+ hwwbrk breakpointno length address -+hwabrk - Places an access breakpoint -+ hwabrk breakpointno length address -+hwrmbrk - Removes a breakpoint -+ hwrmbrk breakpointno -+exinfo - Tells whether a software or hardware breakpoint has occurred. -+ Prints number of the hardware breakpoint if a hardware breakpoint has -+ occurred. -+ -+Arguments required by these commands are as follows -+breakpointno - 0 to 3 -+length - 1 to 3 -+address - Memory location in hex digits ( without 0x ) e.g c015e9bc -+ -+SMP support -+========== -+ -+When a breakpoint occurs or user issues a break ( Ctrl + C ) to gdb -+client, all the processors are forced to enter the debugger. Current -+thread corresponds to the thread running on the processor where -+breakpoint occurred. Threads running on other processor(s) appear -+similar to other non-running threads in the 'info threads' output. -+Within the kgdb stub there is a structure "waiting_cpus" in which kgdb -+records the values of "current" and "regs" for each CPU other than the -+one that hit the breakpoint. "current" is a pointer to the task -+structure for the task that CPU is running, while "regs" points to the -+saved registers for the task. This structure can be examined with the -+gdb "p" command. -+ -+ia-32 hardware debugging registers on all processors are set to same -+values. Hence any hardware breakpoints may occur on any processor. -+ -+gdb troubleshooting -+=================== -+ -+1. gdb hangs -+Kill it. restart gdb. Connect to target machine. -+ -+2. gdb cannot connect to target machine (after killing a gdb and -+restarting another) If the target machine was not inside debugger when -+you killed gdb, gdb cannot connect because the target machine won't -+respond. In this case echo "Ctrl+C"(ASCII 3) to the serial line. -+e.g. echo -e "\003" > /dev/ttyS1 -+This forces that target machine into the debugger, after which you -+can connect. -+ -+3. gdb cannot connect even after echoing Ctrl+C into serial line -+Try changing serial line settings min to 1 and time to 0 -+e.g. stty min 1 time 0 < /dev/ttyS1 -+Try echoing again -+ -+Check serial line speed and set it to correct value if required -+e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 -+ -+EVENTS -+====== -+ -+Ever want to know the order of things happening? Which CPU did what and -+when? How did the spinlock get the way it is? Then events are for -+you. Events are defined by calls to an event collection interface and -+saved for later examination. In this case, kgdb events are saved by a -+very fast bit of code in kgdb which is fully SMP and interrupt protected -+and they are examined by using gdb to display them. Kgdb keeps only -+the last N events, where N must be a power of two and is defined at -+configure time. -+ -+ -+Events are signaled to kgdb by calling: -+ -+kgdb_ts(data0,data1) -+ -+For each call kgdb records each call in an array along with other info. -+Here is the array definition: -+ -+struct kgdb_and_then_struct { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ long long at_time; -+ int from_ln; -+ char * in_src; -+ void *from; -+ int with_if; -+ int data0; -+ int data1; -+}; -+ -+For SMP machines the CPU is recorded, for all machines the TSC is -+recorded (gets a time stamp) as well as the line number and source file -+the call was made from. The address of the (from), the "if" (interrupt -+flag) and the two data items are also recorded. The macro kgdb_ts casts -+the types to int, so you can put any 32-bit values here. There is a -+configure option to select the number of events you want to keep. A -+nice number might be 128, but you can keep up to 1024 if you want. The -+number must be a power of two. An "andthen" macro library is provided -+for gdb to help you look at these events. It is also possible to define -+a different structure for the event storage and cast the data to this -+structure. For example the following structure is defined in kgdb: -+ -+struct kgdb_and_then_struct2 { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ long long at_time; -+ int from_ln; -+ char * in_src; -+ void *from; -+ int with_if; -+ struct task_struct *t1; -+ struct task_struct *t2; -+}; -+ -+If you use this for display, the data elements will be displayed as -+pointers to task_struct entries. You may want to define your own -+structure to use in casting. You should only change the last two items -+and you must keep the structure size the same. Kgdb will handle these -+as 32-bit ints, but within that constraint you can define a structure to -+cast to any 32-bit quantity. This need only be available to gdb and is -+only used for casting in the display code. -+ -+Final Items -+=========== -+ -+I picked up this code from Amit S. Kale and enhanced it. -+ -+If you make some really cool modification to this stuff, or if you -+fix a bug, please let me know. -+ -+George Anzinger -+<george@mvista.com> -+ -+Amit S. Kale -+<akale@veritas.com> -+ -+(First kgdb by David Grothe <dave@gcom.com>) -+ -+(modified by Tigran Aivazian <tigran@sco.com>) -+ Putting gdbstub into the kernel config menu. -+ -+(modified by Scott Foehner <sfoehner@engr.sgi.com>) -+ Hooks for entering gdbstub at boot time. -+ -+(modified by Amit S. Kale <akale@veritas.com>) -+ Threads, ia-32 hw debugging, mp support, console support, -+ nmi watchdog handling. -+ -+(modified by George Anzinger <george@mvista.com>) -+ Extended threads to include the idle threads. -+ Enhancements to allow breakpoint() at first C code. -+ Use of module_init() and __setup() to automate the configure. -+ Enhanced the cpu "collection" code to work in early bring-up. -+ Added ability to call functions from gdb -+ Print info thread stuff without going back to schedule() -+ Now collect the "other" cpus with an IPI/ NMI. -diff -puN /dev/null Documentation/i386/kgdb/loadmodule.sh ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/Documentation/i386/kgdb/loadmodule.sh 2004-10-21 14:54:15.325593648 -0700 -@@ -0,0 +1,78 @@ -+#/bin/sh -+# This script loads a module on a target machine and generates a gdb script. -+# source generated gdb script to load the module file at appropriate addresses -+# in gdb. -+# -+# Usage: -+# Loading the module on target machine and generating gdb script) -+# [foo]$ loadmodule.sh <modulename> -+# -+# Loading the module file into gdb -+# (gdb) source <gdbscriptpath> -+# -+# Modify following variables according to your setup. -+# TESTMACHINE - Name of the target machine -+# GDBSCRIPTS - The directory where a gdb script will be generated -+# -+# Author: Amit S. Kale (akale@veritas.com). -+# -+# If you run into problems, please check files pointed to by following -+# variables. -+# ERRFILE - /tmp/<modulename>.errs contains stderr output of insmod -+# MAPFILE - /tmp/<modulename>.map contains stdout output of insmod -+# GDBSCRIPT - $GDBSCRIPTS/load<modulename> gdb script. -+ -+TESTMACHINE=foo -+GDBSCRIPTS=/home/bar -+ -+if [ $# -lt 1 ] ; then { -+ echo Usage: $0 modulefile -+ exit -+} ; fi -+ -+MODULEFILE=$1 -+MODULEFILEBASENAME=`basename $1` -+ -+if [ $MODULEFILE = $MODULEFILEBASENAME ] ; then { -+ MODULEFILE=`pwd`/$MODULEFILE -+} fi -+ -+ERRFILE=/tmp/$MODULEFILEBASENAME.errs -+MAPFILE=/tmp/$MODULEFILEBASENAME.map -+GDBSCRIPT=$GDBSCRIPTS/load$MODULEFILEBASENAME -+ -+function findaddr() { -+ local ADDR=0x$(echo "$SEGMENTS" | \ -+ grep "$1" | sed 's/^[^ ]*[ ]*[^ ]*[ ]*//' | \ -+ sed 's/[ ]*[^ ]*$//') -+ echo $ADDR -+} -+ -+function checkerrs() { -+ if [ "`cat $ERRFILE`" != "" ] ; then { -+ cat $ERRFILE -+ exit -+ } fi -+} -+ -+#load the module -+echo Copying $MODULEFILE to $TESTMACHINE -+rcp $MODULEFILE root@${TESTMACHINE}: -+ -+echo Loading module $MODULEFILE -+rsh -l root $TESTMACHINE /sbin/insmod -m ./`basename $MODULEFILE` \ -+ > $MAPFILE 2> $ERRFILE -+checkerrs -+ -+SEGMENTS=`head -n 11 $MAPFILE | tail -n 10` -+TEXTADDR=$(findaddr "\\.text[^.]") -+LOADSTRING="add-symbol-file $MODULEFILE $TEXTADDR" -+SEGADDRS=`echo "$SEGMENTS" | awk '//{ -+ if ($1 != ".text" && $1 != ".this" && -+ $1 != ".kstrtab" && $1 != ".kmodtab") { -+ print " -s " $1 " 0x" $3 " " -+ } -+}'` -+LOADSTRING="$LOADSTRING $SEGADDRS" -+echo Generating script $GDBSCRIPT -+echo $LOADSTRING > $GDBSCRIPT -diff -puN drivers/char/keyboard.c~kgdb-ga drivers/char/keyboard.c ---- 25/drivers/char/keyboard.c~kgdb-ga 2004-10-21 14:54:15.273601552 -0700 -+++ 25-akpm/drivers/char/keyboard.c 2004-10-21 14:54:15.326593496 -0700 -@@ -1081,6 +1081,9 @@ void kbd_keycode(unsigned int keycode, i - } - if (sysrq_down && down && !rep) { - handle_sysrq(kbd_sysrq_xlate[keycode], regs, tty); -+#ifdef CONFIG_KGDB_SYSRQ -+ sysrq_down = 0; /* in case we miss the "up" event */ -+#endif - return; - } - #endif -diff -puN drivers/char/sysrq.c~kgdb-ga drivers/char/sysrq.c ---- 25/drivers/char/sysrq.c~kgdb-ga 2004-10-21 14:54:15.275601248 -0700 -+++ 25-akpm/drivers/char/sysrq.c 2004-10-21 14:54:15.326593496 -0700 -@@ -35,6 +35,25 @@ - #include <linux/spinlock.h> - - #include <asm/ptrace.h> -+#ifdef CONFIG_KGDB_SYSRQ -+ -+#define GDB_OP &kgdb_op -+static void kgdb_sysrq(int key, struct pt_regs *pt_regs, struct tty_struct *tty) -+{ -+ printk("kgdb sysrq\n"); -+ breakpoint(); -+} -+ -+static struct sysrq_key_op kgdb_op = { -+ .handler = kgdb_sysrq, -+ .help_msg = "kGdb|Fgdb", -+ .action_msg = "Debug breakpoint\n", -+}; -+ -+#else -+#define GDB_OP NULL -+#endif -+ - - extern void reset_vc(unsigned int); - -@@ -238,8 +257,8 @@ static struct sysrq_key_op *sysrq_key_ta - /* c */ NULL, - /* d */ NULL, - /* e */ &sysrq_term_op, --/* f */ NULL, --/* g */ NULL, -+/* f */ GDB_OP, -+/* g */ GDB_OP, - /* h */ NULL, - /* i */ &sysrq_kill_op, - /* j */ NULL, -diff -puN drivers/serial/8250.c~kgdb-ga drivers/serial/8250.c ---- 25/drivers/serial/8250.c~kgdb-ga 2004-10-21 14:54:15.276601096 -0700 -+++ 25-akpm/drivers/serial/8250.c 2004-10-21 14:54:15.328593192 -0700 -@@ -983,7 +983,7 @@ receive_chars(struct uart_8250_port *up, - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - tty->flip.work.func((void *)tty); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) -- return; // if TTY_DONT_FLIP is set -+ return; /* if TTY_DONT_FLIP is set */ - } - ch = serial_inp(up, UART_RX); - *tty->flip.char_buf_ptr = ch; -@@ -1348,12 +1348,21 @@ static void serial8250_break_ctl(struct - spin_unlock_irqrestore(&up->port.lock, flags); - } - -+#ifdef CONFIG_KGDB -+static int kgdb_irq = -1; -+#endif -+ - static int serial8250_startup(struct uart_port *port) - { - struct uart_8250_port *up = (struct uart_8250_port *)port; - unsigned long flags; - int retval; - -+#ifdef CONFIG_KGDB -+ if (up->port.irq == kgdb_irq) -+ return -EBUSY; -+#endif -+ - up->capabilities = uart_config[up->port.type].flags; - up->mcr = 0; - -@@ -1990,6 +1999,10 @@ serial8250_register_ports(struct uart_dr - for (i = 0; i < UART_NR; i++) { - struct uart_8250_port *up = &serial8250_ports[i]; - -+#ifdef CONFIG_KGDB -+ if (up->port.irq == kgdb_irq) -+ up->port.kgdb = 1; -+#endif - up->port.line = i; - up->port.ops = &serial8250_pops; - up->port.dev = dev; -@@ -2376,6 +2389,31 @@ void serial8250_unregister_port(int line - } - EXPORT_SYMBOL(serial8250_unregister_port); - -+#ifdef CONFIG_KGDB -+/* -+ * Find all the ports using the given irq and shut them down. -+ * Result should be that the irq will be released. -+ */ -+void shutdown_for_kgdb(struct async_struct * info) -+{ -+ int irq = info->state->irq; -+ struct uart_8250_port *up; -+ int ttyS; -+ -+ kgdb_irq = irq; /* save for later init */ -+ for (ttyS = 0; ttyS < UART_NR; ttyS++){ -+ up = &serial8250_ports[ttyS]; -+ if (up->port.irq == irq && (irq_lists + irq)->head) { -+#ifdef CONFIG_DEBUG_SPINLOCK /* ugly business... */ -+ if(up->port.lock.magic != SPINLOCK_MAGIC) -+ spin_lock_init(&up->port.lock); -+#endif -+ serial8250_shutdown(&up->port); -+ } -+ } -+} -+#endif /* CONFIG_KGDB */ -+ - static int __init serial8250_init(void) - { - int ret, i; -diff -puN drivers/serial/serial_core.c~kgdb-ga drivers/serial/serial_core.c ---- 25/drivers/serial/serial_core.c~kgdb-ga 2004-10-21 14:54:15.278600792 -0700 -+++ 25-akpm/drivers/serial/serial_core.c 2004-10-21 14:54:15.330592888 -0700 -@@ -1976,6 +1976,11 @@ uart_configure_port(struct uart_driver * - { - unsigned int flags; - -+#ifdef CONFIG_KGDB -+ if (port->kgdb) -+ return; -+#endif -+ - /* - * If there isn't a port here, don't do anything further. - */ -diff -puN include/asm-i386/bugs.h~kgdb-ga include/asm-i386/bugs.h ---- 25/include/asm-i386/bugs.h~kgdb-ga 2004-10-21 14:54:15.279600640 -0700 -+++ 25-akpm/include/asm-i386/bugs.h 2004-10-21 14:54:15.331592736 -0700 -@@ -1,11 +1,11 @@ - /* - * include/asm-i386/bugs.h - * -- * Copyright (C) 1994 Linus Torvalds -+ * Copyright (C) 1994 Linus Torvalds - * - * Cyrix stuff, June 1998 by: - * - Rafael R. Reilova (moved everything from head.S), -- * <rreilova@ececs.uc.edu> -+ * <rreilova@ececs.uc.edu> - * - Channing Corn (tests & fixes), - * - Andrew D. Balsa (code cleanup). - * -@@ -25,7 +25,20 @@ - #include <asm/processor.h> - #include <asm/i387.h> - #include <asm/msr.h> -- -+#ifdef CONFIG_KGDB -+/* -+ * Provied the command line "gdb" initial break -+ */ -+int __init kgdb_initial_break(char * str) -+{ -+ if (*str == '\0'){ -+ breakpoint(); -+ return 1; -+ } -+ return 0; -+} -+__setup("gdb",kgdb_initial_break); -+#endif - static int __init no_halt(char *s) - { - boot_cpu_data.hlt_works_ok = 0; -@@ -140,7 +153,7 @@ static void __init check_popad(void) - : "ecx", "edi" ); - /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ - if (res != 12345678) printk( "Buggy.\n" ); -- else printk( "OK.\n" ); -+ else printk( "OK.\n" ); - #endif - } - -diff -puN /dev/null include/asm-i386/kgdb.h ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/include/asm-i386/kgdb.h 2004-10-21 14:54:15.331592736 -0700 -@@ -0,0 +1,59 @@ -+#ifndef __KGDB -+#define __KGDB -+ -+/* -+ * This file should not include ANY others. This makes it usable -+ * most anywhere without the fear of include order or inclusion. -+ * Make it so! -+ * -+ * This file may be included all the time. It is only active if -+ * CONFIG_KGDB is defined, otherwise it stubs out all the macros -+ * and entry points. -+ */ -+#if defined(CONFIG_KGDB) && !defined(__ASSEMBLY__) -+ -+extern void breakpoint(void); -+#define INIT_KGDB_INTS kgdb_enable_ints() -+ -+#ifndef BREAKPOINT -+#define BREAKPOINT asm(" int $3") -+#endif -+/* -+ * GDB debug stub (or any debug stub) can point the 'linux_debug_hook' -+ * pointer to its routine and it will be entered as the first thing -+ * when a trap occurs. -+ * -+ * Return values are, at present, undefined. -+ * -+ * The debug hook routine does not necessarily return to its caller. -+ * It has the register image and thus may choose to resume execution -+ * anywhere it pleases. -+ */ -+struct pt_regs; -+ -+extern int kgdb_handle_exception(int trapno, -+ int signo, int err_code, struct pt_regs *regs); -+extern int in_kgdb(struct pt_regs *regs); -+ -+#ifdef CONFIG_KGDB_TS -+void kgdb_tstamp(int line, char *source, int data0, int data1); -+/* -+ * This is the time stamp function. The macro adds the source info and -+ * does a cast on the data to allow most any 32-bit value. -+ */ -+ -+#define kgdb_ts(data0,data1) kgdb_tstamp(__LINE__,__FILE__,(int)data0,(int)data1) -+#else -+#define kgdb_ts(data0,data1) -+#endif -+#else /* CONFIG_KGDB && ! __ASSEMBLY__ ,stubs follow... */ -+#ifndef BREAKPOINT -+#define BREAKPOINT -+#endif -+#define kgdb_ts(data0,data1) -+#define in_kgdb -+#define kgdb_handle_exception -+#define breakpoint -+#define INIT_KGDB_INTS -+#endif -+#endif /* __KGDB */ -diff -puN /dev/null include/asm-i386/kgdb_local.h ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/include/asm-i386/kgdb_local.h 2004-10-21 14:54:15.332592584 -0700 -@@ -0,0 +1,102 @@ -+#ifndef __KGDB_LOCAL -+#define ___KGDB_LOCAL -+#include <linux/config.h> -+#include <linux/types.h> -+#include <linux/serial.h> -+#include <linux/serialP.h> -+#include <linux/spinlock.h> -+#include <asm/processor.h> -+#include <asm/msr.h> -+#include <asm/kgdb.h> -+ -+#define PORT 0x3f8 -+#ifdef CONFIG_KGDB_PORT -+#undef PORT -+#define PORT CONFIG_KGDB_PORT -+#endif -+#define IRQ 4 -+#ifdef CONFIG_KGDB_IRQ -+#undef IRQ -+#define IRQ CONFIG_KGDB_IRQ -+#endif -+#define SB_CLOCK 1843200 -+#define SB_BASE (SB_CLOCK/16) -+#define SB_BAUD9600 SB_BASE/9600 -+#define SB_BAUD192 SB_BASE/19200 -+#define SB_BAUD384 SB_BASE/38400 -+#define SB_BAUD576 SB_BASE/57600 -+#define SB_BAUD1152 SB_BASE/115200 -+#ifdef CONFIG_KGDB_9600BAUD -+#define SB_BAUD SB_BAUD9600 -+#endif -+#ifdef CONFIG_KGDB_19200BAUD -+#define SB_BAUD SB_BAUD192 -+#endif -+#ifdef CONFIG_KGDB_38400BAUD -+#define SB_BAUD SB_BAUD384 -+#endif -+#ifdef CONFIG_KGDB_57600BAUD -+#define SB_BAUD SB_BAUD576 -+#endif -+#ifdef CONFIG_KGDB_115200BAUD -+#define SB_BAUD SB_BAUD1152 -+#endif -+#ifndef SB_BAUD -+#define SB_BAUD SB_BAUD1152 /* Start with this if not given */ -+#endif -+ -+#ifndef CONFIG_X86_TSC -+#undef rdtsc -+#define rdtsc(a,b) if (a++ > 10000){a = 0; b++;} -+#undef rdtscll -+#define rdtscll(s) s++ -+#endif -+ -+#ifdef _raw_read_unlock /* must use a name that is "define"ed, not an inline */ -+#undef spin_lock -+#undef spin_trylock -+#undef spin_unlock -+#define spin_lock _raw_spin_lock -+#define spin_trylock _raw_spin_trylock -+#define spin_unlock _raw_spin_unlock -+#else -+#endif -+#undef spin_unlock_wait -+#define spin_unlock_wait(x) do { cpu_relax(); barrier();} \ -+ while(spin_is_locked(x)) -+ -+#define SB_IER 1 -+#define SB_MCR UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS -+ -+#define FLAGS 0 -+#define SB_STATE { \ -+ magic: SSTATE_MAGIC, \ -+ baud_base: SB_BASE, \ -+ port: PORT, \ -+ irq: IRQ, \ -+ flags: FLAGS, \ -+ custom_divisor:SB_BAUD} -+#define SB_INFO { \ -+ magic: SERIAL_MAGIC, \ -+ port: PORT,0,FLAGS, \ -+ state: &state, \ -+ tty: (struct tty_struct *)&state, \ -+ IER: SB_IER, \ -+ MCR: SB_MCR} -+extern void putDebugChar(int); -+/* RTAI support needs us to really stop/start interrupts */ -+ -+#define kgdb_sti() __asm__ __volatile__("sti": : :"memory") -+#define kgdb_cli() __asm__ __volatile__("cli": : :"memory") -+#define kgdb_local_save_flags(x) __asm__ __volatile__(\ -+ "pushfl ; popl %0":"=g" (x): /* no input */) -+#define kgdb_local_irq_restore(x) __asm__ __volatile__(\ -+ "pushl %0 ; popfl": \ -+ /* no output */ :"g" (x):"memory", "cc") -+#define kgdb_local_irq_save(x) kgdb_local_save_flags(x); kgdb_cli() -+ -+#ifdef CONFIG_SERIAL -+extern void shutdown_for_kgdb(struct async_struct *info); -+#endif -+#define INIT_KDEBUG putDebugChar("+"); -+#endif /* __KGDB_LOCAL */ -diff -puN include/linux/config.h~kgdb-ga include/linux/config.h ---- 25/include/linux/config.h~kgdb-ga 2004-10-21 14:54:15.281600336 -0700 -+++ 25-akpm/include/linux/config.h 2004-10-21 14:54:15.332592584 -0700 -@@ -2,6 +2,9 @@ - #define _LINUX_CONFIG_H - - #include <linux/autoconf.h> -+#if defined(__i386__) && !defined(IN_BOOTLOADER) -+#include <asm/kgdb.h> -+#endif - #if !defined (__KERNEL__) && !defined(__KERNGLUE__) - #error including kernel header in userspace; use the glibc headers instead! - #endif -diff -puN /dev/null include/linux/dwarf2.h ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/include/linux/dwarf2.h 2004-10-21 14:54:15.336591976 -0700 -@@ -0,0 +1,738 @@ -+/* Declarations and definitions of codes relating to the DWARF2 symbolic -+ debugging information format. -+ Copyright (C) 1992, 1993, 1995, 1996, 1997, 1999, 2000, 2001, 2002 -+ Free Software Foundation, Inc. -+ -+ Written by Gary Funck (gary@intrepid.com) The Ada Joint Program -+ Office (AJPO), Florida State Unviversity and Silicon Graphics Inc. -+ provided support for this effort -- June 21, 1995. -+ -+ Derived from the DWARF 1 implementation written by Ron Guilmette -+ (rfg@netcom.com), November 1990. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it under -+ the terms of the GNU General Public License as published by the Free -+ Software Foundation; either version 2, or (at your option) any later -+ version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING. If not, write to the Free -+ Software Foundation, 59 Temple Place - Suite 330, Boston, MA -+ 02111-1307, USA. */ -+ -+/* This file is derived from the DWARF specification (a public document) -+ Revision 2.0.0 (July 27, 1993) developed by the UNIX International -+ Programming Languages Special Interest Group (UI/PLSIG) and distributed -+ by UNIX International. Copies of this specification are available from -+ UNIX International, 20 Waterview Boulevard, Parsippany, NJ, 07054. -+ -+ This file also now contains definitions from the DWARF 3 specification. */ -+ -+/* This file is shared between GCC and GDB, and should not contain -+ prototypes. */ -+ -+#ifndef _ELF_DWARF2_H -+#define _ELF_DWARF2_H -+ -+/* Structure found in the .debug_line section. */ -+#ifndef __ASSEMBLY__ -+typedef struct -+{ -+ unsigned char li_length [4]; -+ unsigned char li_version [2]; -+ unsigned char li_prologue_length [4]; -+ unsigned char li_min_insn_length [1]; -+ unsigned char li_default_is_stmt [1]; -+ unsigned char li_line_base [1]; -+ unsigned char li_line_range [1]; -+ unsigned char li_opcode_base [1]; -+} -+DWARF2_External_LineInfo; -+ -+typedef struct -+{ -+ unsigned long li_length; -+ unsigned short li_version; -+ unsigned int li_prologue_length; -+ unsigned char li_min_insn_length; -+ unsigned char li_default_is_stmt; -+ int li_line_base; -+ unsigned char li_line_range; -+ unsigned char li_opcode_base; -+} -+DWARF2_Internal_LineInfo; -+ -+/* Structure found in .debug_pubnames section. */ -+typedef struct -+{ -+ unsigned char pn_length [4]; -+ unsigned char pn_version [2]; -+ unsigned char pn_offset [4]; -+ unsigned char pn_size [4]; -+} -+DWARF2_External_PubNames; -+ -+typedef struct -+{ -+ unsigned long pn_length; -+ unsigned short pn_version; -+ unsigned long pn_offset; -+ unsigned long pn_size; -+} -+DWARF2_Internal_PubNames; -+ -+/* Structure found in .debug_info section. */ -+typedef struct -+{ -+ unsigned char cu_length [4]; -+ unsigned char cu_version [2]; -+ unsigned char cu_abbrev_offset [4]; -+ unsigned char cu_pointer_size [1]; -+} -+DWARF2_External_CompUnit; -+ -+typedef struct -+{ -+ unsigned long cu_length; -+ unsigned short cu_version; -+ unsigned long cu_abbrev_offset; -+ unsigned char cu_pointer_size; -+} -+DWARF2_Internal_CompUnit; -+ -+typedef struct -+{ -+ unsigned char ar_length [4]; -+ unsigned char ar_version [2]; -+ unsigned char ar_info_offset [4]; -+ unsigned char ar_pointer_size [1]; -+ unsigned char ar_segment_size [1]; -+} -+DWARF2_External_ARange; -+ -+typedef struct -+{ -+ unsigned long ar_length; -+ unsigned short ar_version; -+ unsigned long ar_info_offset; -+ unsigned char ar_pointer_size; -+ unsigned char ar_segment_size; -+} -+DWARF2_Internal_ARange; -+ -+#define ENUM(name) enum name { -+#define IF_NOT_ASM(a) a -+#define COMMA , -+#else -+#define ENUM(name) -+#define IF_NOT_ASM(a) -+#define COMMA -+ -+#endif -+ -+/* Tag names and codes. */ -+ENUM(dwarf_tag) -+ -+ DW_TAG_padding = 0x00 COMMA -+ DW_TAG_array_type = 0x01 COMMA -+ DW_TAG_class_type = 0x02 COMMA -+ DW_TAG_entry_point = 0x03 COMMA -+ DW_TAG_enumeration_type = 0x04 COMMA -+ DW_TAG_formal_parameter = 0x05 COMMA -+ DW_TAG_imported_declaration = 0x08 COMMA -+ DW_TAG_label = 0x0a COMMA -+ DW_TAG_lexical_block = 0x0b COMMA -+ DW_TAG_member = 0x0d COMMA -+ DW_TAG_pointer_type = 0x0f COMMA -+ DW_TAG_reference_type = 0x10 COMMA -+ DW_TAG_compile_unit = 0x11 COMMA -+ DW_TAG_string_type = 0x12 COMMA -+ DW_TAG_structure_type = 0x13 COMMA -+ DW_TAG_subroutine_type = 0x15 COMMA -+ DW_TAG_typedef = 0x16 COMMA -+ DW_TAG_union_type = 0x17 COMMA -+ DW_TAG_unspecified_parameters = 0x18 COMMA -+ DW_TAG_variant = 0x19 COMMA -+ DW_TAG_common_block = 0x1a COMMA -+ DW_TAG_common_inclusion = 0x1b COMMA -+ DW_TAG_inheritance = 0x1c COMMA -+ DW_TAG_inlined_subroutine = 0x1d COMMA -+ DW_TAG_module = 0x1e COMMA -+ DW_TAG_ptr_to_member_type = 0x1f COMMA -+ DW_TAG_set_type = 0x20 COMMA -+ DW_TAG_subrange_type = 0x21 COMMA -+ DW_TAG_with_stmt = 0x22 COMMA -+ DW_TAG_access_declaration = 0x23 COMMA -+ DW_TAG_base_type = 0x24 COMMA -+ DW_TAG_catch_block = 0x25 COMMA -+ DW_TAG_const_type = 0x26 COMMA -+ DW_TAG_constant = 0x27 COMMA -+ DW_TAG_enumerator = 0x28 COMMA -+ DW_TAG_file_type = 0x29 COMMA -+ DW_TAG_friend = 0x2a COMMA -+ DW_TAG_namelist = 0x2b COMMA -+ DW_TAG_namelist_item = 0x2c COMMA -+ DW_TAG_packed_type = 0x2d COMMA -+ DW_TAG_subprogram = 0x2e COMMA -+ DW_TAG_template_type_param = 0x2f COMMA -+ DW_TAG_template_value_param = 0x30 COMMA -+ DW_TAG_thrown_type = 0x31 COMMA -+ DW_TAG_try_block = 0x32 COMMA -+ DW_TAG_variant_part = 0x33 COMMA -+ DW_TAG_variable = 0x34 COMMA -+ DW_TAG_volatile_type = 0x35 COMMA -+ /* DWARF 3. */ -+ DW_TAG_dwarf_procedure = 0x36 COMMA -+ DW_TAG_restrict_type = 0x37 COMMA -+ DW_TAG_interface_type = 0x38 COMMA -+ DW_TAG_namespace = 0x39 COMMA -+ DW_TAG_imported_module = 0x3a COMMA -+ DW_TAG_unspecified_type = 0x3b COMMA -+ DW_TAG_partial_unit = 0x3c COMMA -+ DW_TAG_imported_unit = 0x3d COMMA -+ /* SGI/MIPS Extensions. */ -+ DW_TAG_MIPS_loop = 0x4081 COMMA -+ /* GNU extensions. */ -+ DW_TAG_format_label = 0x4101 COMMA /* For FORTRAN 77 and Fortran 90. */ -+ DW_TAG_function_template = 0x4102 COMMA /* For C++. */ -+ DW_TAG_class_template = 0x4103 COMMA /* For C++. */ -+ DW_TAG_GNU_BINCL = 0x4104 COMMA -+ DW_TAG_GNU_EINCL = 0x4105 COMMA -+ /* Extensions for UPC. See: http://upc.gwu.edu/~upc. */ -+ DW_TAG_upc_shared_type = 0x8765 COMMA -+ DW_TAG_upc_strict_type = 0x8766 COMMA -+ DW_TAG_upc_relaxed_type = 0x8767 -+IF_NOT_ASM(};) -+ -+#define DW_TAG_lo_user 0x4080 -+#define DW_TAG_hi_user 0xffff -+ -+/* Flag that tells whether entry has a child or not. */ -+#define DW_children_no 0 -+#define DW_children_yes 1 -+ -+/* Form names and codes. */ -+ENUM(dwarf_form) -+ -+ DW_FORM_addr = 0x01 COMMA -+ DW_FORM_block2 = 0x03 COMMA -+ DW_FORM_block4 = 0x04 COMMA -+ DW_FORM_data2 = 0x05 COMMA -+ DW_FORM_data4 = 0x06 COMMA -+ DW_FORM_data8 = 0x07 COMMA -+ DW_FORM_string = 0x08 COMMA -+ DW_FORM_block = 0x09 COMMA -+ DW_FORM_block1 = 0x0a COMMA -+ DW_FORM_data1 = 0x0b COMMA -+ DW_FORM_flag = 0x0c COMMA -+ DW_FORM_sdata = 0x0d COMMA -+ DW_FORM_strp = 0x0e COMMA -+ DW_FORM_udata = 0x0f COMMA -+ DW_FORM_ref_addr = 0x10 COMMA -+ DW_FORM_ref1 = 0x11 COMMA -+ DW_FORM_ref2 = 0x12 COMMA -+ DW_FORM_ref4 = 0x13 COMMA -+ DW_FORM_ref8 = 0x14 COMMA -+ DW_FORM_ref_udata = 0x15 COMMA -+ DW_FORM_indirect = 0x16 -+IF_NOT_ASM(};) -+ -+/* Attribute names and codes. */ -+ -+ENUM(dwarf_attribute) -+ -+ DW_AT_sibling = 0x01 COMMA -+ DW_AT_location = 0x02 COMMA -+ DW_AT_name = 0x03 COMMA -+ DW_AT_ordering = 0x09 COMMA -+ DW_AT_subscr_data = 0x0a COMMA -+ DW_AT_byte_size = 0x0b COMMA -+ DW_AT_bit_offset = 0x0c COMMA -+ DW_AT_bit_size = 0x0d COMMA -+ DW_AT_element_list = 0x0f COMMA -+ DW_AT_stmt_list = 0x10 COMMA -+ DW_AT_low_pc = 0x11 COMMA -+ DW_AT_high_pc = 0x12 COMMA -+ DW_AT_language = 0x13 COMMA -+ DW_AT_member = 0x14 COMMA -+ DW_AT_discr = 0x15 COMMA -+ DW_AT_discr_value = 0x16 COMMA -+ DW_AT_visibility = 0x17 COMMA -+ DW_AT_import = 0x18 COMMA -+ DW_AT_string_length = 0x19 COMMA -+ DW_AT_common_reference = 0x1a COMMA -+ DW_AT_comp_dir = 0x1b COMMA -+ DW_AT_const_value = 0x1c COMMA -+ DW_AT_containing_type = 0x1d COMMA -+ DW_AT_default_value = 0x1e COMMA -+ DW_AT_inline = 0x20 COMMA -+ DW_AT_is_optional = 0x21 COMMA -+ DW_AT_lower_bound = 0x22 COMMA -+ DW_AT_producer = 0x25 COMMA -+ DW_AT_prototyped = 0x27 COMMA -+ DW_AT_return_addr = 0x2a COMMA -+ DW_AT_start_scope = 0x2c COMMA -+ DW_AT_stride_size = 0x2e COMMA -+ DW_AT_upper_bound = 0x2f COMMA -+ DW_AT_abstract_origin = 0x31 COMMA -+ DW_AT_accessibility = 0x32 COMMA -+ DW_AT_address_class = 0x33 COMMA -+ DW_AT_artificial = 0x34 COMMA -+ DW_AT_base_types = 0x35 COMMA -+ DW_AT_calling_convention = 0x36 COMMA -+ DW_AT_count = 0x37 COMMA -+ DW_AT_data_member_location = 0x38 COMMA -+ DW_AT_decl_column = 0x39 COMMA -+ DW_AT_decl_file = 0x3a COMMA -+ DW_AT_decl_line = 0x3b COMMA -+ DW_AT_declaration = 0x3c COMMA -+ DW_AT_discr_list = 0x3d COMMA -+ DW_AT_encoding = 0x3e COMMA -+ DW_AT_external = 0x3f COMMA -+ DW_AT_frame_base = 0x40 COMMA -+ DW_AT_friend = 0x41 COMMA -+ DW_AT_identifier_case = 0x42 COMMA -+ DW_AT_macro_info = 0x43 COMMA -+ DW_AT_namelist_items = 0x44 COMMA -+ DW_AT_priority = 0x45 COMMA -+ DW_AT_segment = 0x46 COMMA -+ DW_AT_specification = 0x47 COMMA -+ DW_AT_static_link = 0x48 COMMA -+ DW_AT_type = 0x49 COMMA -+ DW_AT_use_location = 0x4a COMMA -+ DW_AT_variable_parameter = 0x4b COMMA -+ DW_AT_virtuality = 0x4c COMMA -+ DW_AT_vtable_elem_location = 0x4d COMMA -+ /* DWARF 3 values. */ -+ DW_AT_allocated = 0x4e COMMA -+ DW_AT_associated = 0x4f COMMA -+ DW_AT_data_location = 0x50 COMMA -+ DW_AT_stride = 0x51 COMMA -+ DW_AT_entry_pc = 0x52 COMMA -+ DW_AT_use_UTF8 = 0x53 COMMA -+ DW_AT_extension = 0x54 COMMA -+ DW_AT_ranges = 0x55 COMMA -+ DW_AT_trampoline = 0x56 COMMA -+ DW_AT_call_column = 0x57 COMMA -+ DW_AT_call_file = 0x58 COMMA -+ DW_AT_call_line = 0x59 COMMA -+ /* SGI/MIPS extensions. */ -+ DW_AT_MIPS_fde = 0x2001 COMMA -+ DW_AT_MIPS_loop_begin = 0x2002 COMMA -+ DW_AT_MIPS_tail_loop_begin = 0x2003 COMMA -+ DW_AT_MIPS_epilog_begin = 0x2004 COMMA -+ DW_AT_MIPS_loop_unroll_factor = 0x2005 COMMA -+ DW_AT_MIPS_software_pipeline_depth = 0x2006 COMMA -+ DW_AT_MIPS_linkage_name = 0x2007 COMMA -+ DW_AT_MIPS_stride = 0x2008 COMMA -+ DW_AT_MIPS_abstract_name = 0x2009 COMMA -+ DW_AT_MIPS_clone_origin = 0x200a COMMA -+ DW_AT_MIPS_has_inlines = 0x200b COMMA -+ /* GNU extensions. */ -+ DW_AT_sf_names = 0x2101 COMMA -+ DW_AT_src_info = 0x2102 COMMA -+ DW_AT_mac_info = 0x2103 COMMA -+ DW_AT_src_coords = 0x2104 COMMA -+ DW_AT_body_begin = 0x2105 COMMA -+ DW_AT_body_end = 0x2106 COMMA -+ DW_AT_GNU_vector = 0x2107 COMMA -+ /* VMS extensions. */ -+ DW_AT_VMS_rtnbeg_pd_address = 0x2201 COMMA -+ /* UPC extension. */ -+ DW_AT_upc_threads_scaled = 0x3210 -+IF_NOT_ASM(};) -+ -+#define DW_AT_lo_user 0x2000 /* Implementation-defined range start. */ -+#define DW_AT_hi_user 0x3ff0 /* Implementation-defined range end. */ -+ -+/* Location atom names and codes. */ -+ENUM(dwarf_location_atom) -+ -+ DW_OP_addr = 0x03 COMMA -+ DW_OP_deref = 0x06 COMMA -+ DW_OP_const1u = 0x08 COMMA -+ DW_OP_const1s = 0x09 COMMA -+ DW_OP_const2u = 0x0a COMMA -+ DW_OP_const2s = 0x0b COMMA -+ DW_OP_const4u = 0x0c COMMA -+ DW_OP_const4s = 0x0d COMMA -+ DW_OP_const8u = 0x0e COMMA -+ DW_OP_const8s = 0x0f COMMA -+ DW_OP_constu = 0x10 COMMA -+ DW_OP_consts = 0x11 COMMA -+ DW_OP_dup = 0x12 COMMA -+ DW_OP_drop = 0x13 COMMA -+ DW_OP_over = 0x14 COMMA -+ DW_OP_pick = 0x15 COMMA -+ DW_OP_swap = 0x16 COMMA -+ DW_OP_rot = 0x17 COMMA -+ DW_OP_xderef = 0x18 COMMA -+ DW_OP_abs = 0x19 COMMA -+ DW_OP_and = 0x1a COMMA -+ DW_OP_div = 0x1b COMMA -+ DW_OP_minus = 0x1c COMMA -+ DW_OP_mod = 0x1d COMMA -+ DW_OP_mul = 0x1e COMMA -+ DW_OP_neg = 0x1f COMMA -+ DW_OP_not = 0x20 COMMA -+ DW_OP_or = 0x21 COMMA -+ DW_OP_plus = 0x22 COMMA -+ DW_OP_plus_uconst = 0x23 COMMA -+ DW_OP_shl = 0x24 COMMA -+ DW_OP_shr = 0x25 COMMA -+ DW_OP_shra = 0x26 COMMA -+ DW_OP_xor = 0x27 COMMA -+ DW_OP_bra = 0x28 COMMA -+ DW_OP_eq = 0x29 COMMA -+ DW_OP_ge = 0x2a COMMA -+ DW_OP_gt = 0x2b COMMA -+ DW_OP_le = 0x2c COMMA -+ DW_OP_lt = 0x2d COMMA -+ DW_OP_ne = 0x2e COMMA -+ DW_OP_skip = 0x2f COMMA -+ DW_OP_lit0 = 0x30 COMMA -+ DW_OP_lit1 = 0x31 COMMA -+ DW_OP_lit2 = 0x32 COMMA -+ DW_OP_lit3 = 0x33 COMMA -+ DW_OP_lit4 = 0x34 COMMA -+ DW_OP_lit5 = 0x35 COMMA -+ DW_OP_lit6 = 0x36 COMMA -+ DW_OP_lit7 = 0x37 COMMA -+ DW_OP_lit8 = 0x38 COMMA -+ DW_OP_lit9 = 0x39 COMMA -+ DW_OP_lit10 = 0x3a COMMA -+ DW_OP_lit11 = 0x3b COMMA -+ DW_OP_lit12 = 0x3c COMMA -+ DW_OP_lit13 = 0x3d COMMA -+ DW_OP_lit14 = 0x3e COMMA -+ DW_OP_lit15 = 0x3f COMMA -+ DW_OP_lit16 = 0x40 COMMA -+ DW_OP_lit17 = 0x41 COMMA -+ DW_OP_lit18 = 0x42 COMMA -+ DW_OP_lit19 = 0x43 COMMA -+ DW_OP_lit20 = 0x44 COMMA -+ DW_OP_lit21 = 0x45 COMMA -+ DW_OP_lit22 = 0x46 COMMA -+ DW_OP_lit23 = 0x47 COMMA -+ DW_OP_lit24 = 0x48 COMMA -+ DW_OP_lit25 = 0x49 COMMA -+ DW_OP_lit26 = 0x4a COMMA -+ DW_OP_lit27 = 0x4b COMMA -+ DW_OP_lit28 = 0x4c COMMA -+ DW_OP_lit29 = 0x4d COMMA -+ DW_OP_lit30 = 0x4e COMMA -+ DW_OP_lit31 = 0x4f COMMA -+ DW_OP_reg0 = 0x50 COMMA -+ DW_OP_reg1 = 0x51 COMMA -+ DW_OP_reg2 = 0x52 COMMA -+ DW_OP_reg3 = 0x53 COMMA -+ DW_OP_reg4 = 0x54 COMMA -+ DW_OP_reg5 = 0x55 COMMA -+ DW_OP_reg6 = 0x56 COMMA -+ DW_OP_reg7 = 0x57 COMMA -+ DW_OP_reg8 = 0x58 COMMA -+ DW_OP_reg9 = 0x59 COMMA -+ DW_OP_reg10 = 0x5a COMMA -+ DW_OP_reg11 = 0x5b COMMA -+ DW_OP_reg12 = 0x5c COMMA -+ DW_OP_reg13 = 0x5d COMMA -+ DW_OP_reg14 = 0x5e COMMA -+ DW_OP_reg15 = 0x5f COMMA -+ DW_OP_reg16 = 0x60 COMMA -+ DW_OP_reg17 = 0x61 COMMA -+ DW_OP_reg18 = 0x62 COMMA -+ DW_OP_reg19 = 0x63 COMMA -+ DW_OP_reg20 = 0x64 COMMA -+ DW_OP_reg21 = 0x65 COMMA -+ DW_OP_reg22 = 0x66 COMMA -+ DW_OP_reg23 = 0x67 COMMA -+ DW_OP_reg24 = 0x68 COMMA -+ DW_OP_reg25 = 0x69 COMMA -+ DW_OP_reg26 = 0x6a COMMA -+ DW_OP_reg27 = 0x6b COMMA -+ DW_OP_reg28 = 0x6c COMMA -+ DW_OP_reg29 = 0x6d COMMA -+ DW_OP_reg30 = 0x6e COMMA -+ DW_OP_reg31 = 0x6f COMMA -+ DW_OP_breg0 = 0x70 COMMA -+ DW_OP_breg1 = 0x71 COMMA -+ DW_OP_breg2 = 0x72 COMMA -+ DW_OP_breg3 = 0x73 COMMA -+ DW_OP_breg4 = 0x74 COMMA -+ DW_OP_breg5 = 0x75 COMMA -+ DW_OP_breg6 = 0x76 COMMA -+ DW_OP_breg7 = 0x77 COMMA -+ DW_OP_breg8 = 0x78 COMMA -+ DW_OP_breg9 = 0x79 COMMA -+ DW_OP_breg10 = 0x7a COMMA -+ DW_OP_breg11 = 0x7b COMMA -+ DW_OP_breg12 = 0x7c COMMA -+ DW_OP_breg13 = 0x7d COMMA -+ DW_OP_breg14 = 0x7e COMMA -+ DW_OP_breg15 = 0x7f COMMA -+ DW_OP_breg16 = 0x80 COMMA -+ DW_OP_breg17 = 0x81 COMMA -+ DW_OP_breg18 = 0x82 COMMA -+ DW_OP_breg19 = 0x83 COMMA -+ DW_OP_breg20 = 0x84 COMMA -+ DW_OP_breg21 = 0x85 COMMA -+ DW_OP_breg22 = 0x86 COMMA -+ DW_OP_breg23 = 0x87 COMMA -+ DW_OP_breg24 = 0x88 COMMA -+ DW_OP_breg25 = 0x89 COMMA -+ DW_OP_breg26 = 0x8a COMMA -+ DW_OP_breg27 = 0x8b COMMA -+ DW_OP_breg28 = 0x8c COMMA -+ DW_OP_breg29 = 0x8d COMMA -+ DW_OP_breg30 = 0x8e COMMA -+ DW_OP_breg31 = 0x8f COMMA -+ DW_OP_regx = 0x90 COMMA -+ DW_OP_fbreg = 0x91 COMMA -+ DW_OP_bregx = 0x92 COMMA -+ DW_OP_piece = 0x93 COMMA -+ DW_OP_deref_size = 0x94 COMMA -+ DW_OP_xderef_size = 0x95 COMMA -+ DW_OP_nop = 0x96 COMMA -+ /* DWARF 3 extensions. */ -+ DW_OP_push_object_address = 0x97 COMMA -+ DW_OP_call2 = 0x98 COMMA -+ DW_OP_call4 = 0x99 COMMA -+ DW_OP_call_ref = 0x9a COMMA -+ /* GNU extensions. */ -+ DW_OP_GNU_push_tls_address = 0xe0 -+IF_NOT_ASM(};) -+ -+#define DW_OP_lo_user 0xe0 /* Implementation-defined range start. */ -+#define DW_OP_hi_user 0xff /* Implementation-defined range end. */ -+ -+/* Type encodings. */ -+ENUM(dwarf_type) -+ -+ DW_ATE_void = 0x0 COMMA -+ DW_ATE_address = 0x1 COMMA -+ DW_ATE_boolean = 0x2 COMMA -+ DW_ATE_complex_float = 0x3 COMMA -+ DW_ATE_float = 0x4 COMMA -+ DW_ATE_signed = 0x5 COMMA -+ DW_ATE_signed_char = 0x6 COMMA -+ DW_ATE_unsigned = 0x7 COMMA -+ DW_ATE_unsigned_char = 0x8 COMMA -+ /* DWARF 3. */ -+ DW_ATE_imaginary_float = 0x9 -+IF_NOT_ASM(};) -+ -+#define DW_ATE_lo_user 0x80 -+#define DW_ATE_hi_user 0xff -+ -+/* Array ordering names and codes. */ -+ENUM(dwarf_array_dim_ordering) -+ -+ DW_ORD_row_major = 0 COMMA -+ DW_ORD_col_major = 1 -+IF_NOT_ASM(};) -+ -+/* Access attribute. */ -+ENUM(dwarf_access_attribute) -+ -+ DW_ACCESS_public = 1 COMMA -+ DW_ACCESS_protected = 2 COMMA -+ DW_ACCESS_private = 3 -+IF_NOT_ASM(};) -+ -+/* Visibility. */ -+ENUM(dwarf_visibility_attribute) -+ -+ DW_VIS_local = 1 COMMA -+ DW_VIS_exported = 2 COMMA -+ DW_VIS_qualified = 3 -+IF_NOT_ASM(};) -+ -+/* Virtuality. */ -+ENUM(dwarf_virtuality_attribute) -+ -+ DW_VIRTUALITY_none = 0 COMMA -+ DW_VIRTUALITY_virtual = 1 COMMA -+ DW_VIRTUALITY_pure_virtual = 2 -+IF_NOT_ASM(};) -+ -+/* Case sensitivity. */ -+ENUM(dwarf_id_case) -+ -+ DW_ID_case_sensitive = 0 COMMA -+ DW_ID_up_case = 1 COMMA -+ DW_ID_down_case = 2 COMMA -+ DW_ID_case_insensitive = 3 -+IF_NOT_ASM(};) -+ -+/* Calling convention. */ -+ENUM(dwarf_calling_convention) -+ -+ DW_CC_normal = 0x1 COMMA -+ DW_CC_program = 0x2 COMMA -+ DW_CC_nocall = 0x3 -+IF_NOT_ASM(};) -+ -+#define DW_CC_lo_user 0x40 -+#define DW_CC_hi_user 0xff -+ -+/* Inline attribute. */ -+ENUM(dwarf_inline_attribute) -+ -+ DW_INL_not_inlined = 0 COMMA -+ DW_INL_inlined = 1 COMMA -+ DW_INL_declared_not_inlined = 2 COMMA -+ DW_INL_declared_inlined = 3 -+IF_NOT_ASM(};) -+ -+/* Discriminant lists. */ -+ENUM(dwarf_discrim_list) -+ -+ DW_DSC_label = 0 COMMA -+ DW_DSC_range = 1 -+IF_NOT_ASM(};) -+ -+/* Line number opcodes. */ -+ENUM(dwarf_line_number_ops) -+ -+ DW_LNS_extended_op = 0 COMMA -+ DW_LNS_copy = 1 COMMA -+ DW_LNS_advance_pc = 2 COMMA -+ DW_LNS_advance_line = 3 COMMA -+ DW_LNS_set_file = 4 COMMA -+ DW_LNS_set_column = 5 COMMA -+ DW_LNS_negate_stmt = 6 COMMA -+ DW_LNS_set_basic_block = 7 COMMA -+ DW_LNS_const_add_pc = 8 COMMA -+ DW_LNS_fixed_advance_pc = 9 COMMA -+ /* DWARF 3. */ -+ DW_LNS_set_prologue_end = 10 COMMA -+ DW_LNS_set_epilogue_begin = 11 COMMA -+ DW_LNS_set_isa = 12 -+IF_NOT_ASM(};) -+ -+/* Line number extended opcodes. */ -+ENUM(dwarf_line_number_x_ops) -+ -+ DW_LNE_end_sequence = 1 COMMA -+ DW_LNE_set_address = 2 COMMA -+ DW_LNE_define_file = 3 -+IF_NOT_ASM(};) -+ -+/* Call frame information. */ -+ENUM(dwarf_call_frame_info) -+ -+ DW_CFA_advance_loc = 0x40 COMMA -+ DW_CFA_offset = 0x80 COMMA -+ DW_CFA_restore = 0xc0 COMMA -+ DW_CFA_nop = 0x00 COMMA -+ DW_CFA_set_loc = 0x01 COMMA -+ DW_CFA_advance_loc1 = 0x02 COMMA -+ DW_CFA_advance_loc2 = 0x03 COMMA -+ DW_CFA_advance_loc4 = 0x04 COMMA -+ DW_CFA_offset_extended = 0x05 COMMA -+ DW_CFA_restore_extended = 0x06 COMMA -+ DW_CFA_undefined = 0x07 COMMA -+ DW_CFA_same_value = 0x08 COMMA -+ DW_CFA_register = 0x09 COMMA -+ DW_CFA_remember_state = 0x0a COMMA -+ DW_CFA_restore_state = 0x0b COMMA -+ DW_CFA_def_cfa = 0x0c COMMA -+ DW_CFA_def_cfa_register = 0x0d COMMA -+ DW_CFA_def_cfa_offset = 0x0e COMMA -+ -+ /* DWARF 3. */ -+ DW_CFA_def_cfa_expression = 0x0f COMMA -+ DW_CFA_expression = 0x10 COMMA -+ DW_CFA_offset_extended_sf = 0x11 COMMA -+ DW_CFA_def_cfa_sf = 0x12 COMMA -+ DW_CFA_def_cfa_offset_sf = 0x13 COMMA -+ -+ /* SGI/MIPS specific. */ -+ DW_CFA_MIPS_advance_loc8 = 0x1d COMMA -+ -+ /* GNU extensions. */ -+ DW_CFA_GNU_window_save = 0x2d COMMA -+ DW_CFA_GNU_args_size = 0x2e COMMA -+ DW_CFA_GNU_negative_offset_extended = 0x2f -+IF_NOT_ASM(};) -+ -+#define DW_CIE_ID 0xffffffff -+#define DW_CIE_VERSION 1 -+ -+#define DW_CFA_extended 0 -+#define DW_CFA_lo_user 0x1c -+#define DW_CFA_hi_user 0x3f -+ -+#define DW_CHILDREN_no 0x00 -+#define DW_CHILDREN_yes 0x01 -+ -+#define DW_ADDR_none 0 -+ -+/* Source language names and codes. */ -+ENUM(dwarf_source_language) -+ -+ DW_LANG_C89 = 0x0001 COMMA -+ DW_LANG_C = 0x0002 COMMA -+ DW_LANG_Ada83 = 0x0003 COMMA -+ DW_LANG_C_plus_plus = 0x0004 COMMA -+ DW_LANG_Cobol74 = 0x0005 COMMA -+ DW_LANG_Cobol85 = 0x0006 COMMA -+ DW_LANG_Fortran77 = 0x0007 COMMA -+ DW_LANG_Fortran90 = 0x0008 COMMA -+ DW_LANG_Pascal83 = 0x0009 COMMA -+ DW_LANG_Modula2 = 0x000a COMMA -+ DW_LANG_Java = 0x000b COMMA -+ /* DWARF 3. */ -+ DW_LANG_C99 = 0x000c COMMA -+ DW_LANG_Ada95 = 0x000d COMMA -+ DW_LANG_Fortran95 = 0x000e COMMA -+ /* MIPS. */ -+ DW_LANG_Mips_Assembler = 0x8001 COMMA -+ /* UPC. */ -+ DW_LANG_Upc = 0x8765 -+IF_NOT_ASM(};) -+ -+#define DW_LANG_lo_user 0x8000 /* Implementation-defined range start. */ -+#define DW_LANG_hi_user 0xffff /* Implementation-defined range start. */ -+ -+/* Names and codes for macro information. */ -+ENUM(dwarf_macinfo_record_type) -+ -+ DW_MACINFO_define = 1 COMMA -+ DW_MACINFO_undef = 2 COMMA -+ DW_MACINFO_start_file = 3 COMMA -+ DW_MACINFO_end_file = 4 COMMA -+ DW_MACINFO_vendor_ext = 255 -+IF_NOT_ASM(};) -+ -+/* @@@ For use with GNU frame unwind information. */ -+ -+#define DW_EH_PE_absptr 0x00 -+#define DW_EH_PE_omit 0xff -+ -+#define DW_EH_PE_uleb128 0x01 -+#define DW_EH_PE_udata2 0x02 -+#define DW_EH_PE_udata4 0x03 -+#define DW_EH_PE_udata8 0x04 -+#define DW_EH_PE_sleb128 0x09 -+#define DW_EH_PE_sdata2 0x0A -+#define DW_EH_PE_sdata4 0x0B -+#define DW_EH_PE_sdata8 0x0C -+#define DW_EH_PE_signed 0x08 -+ -+#define DW_EH_PE_pcrel 0x10 -+#define DW_EH_PE_textrel 0x20 -+#define DW_EH_PE_datarel 0x30 -+#define DW_EH_PE_funcrel 0x40 -+#define DW_EH_PE_aligned 0x50 -+ -+#define DW_EH_PE_indirect 0x80 -+ -+#endif /* _ELF_DWARF2_H */ -diff -puN /dev/null include/linux/dwarf2-lang.h ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/include/linux/dwarf2-lang.h 2004-10-21 14:54:15.337591824 -0700 -@@ -0,0 +1,132 @@ -+#ifndef DWARF2_LANG -+#define DWARF2_LANG -+#include <linux/dwarf2.h> -+ -+/* -+ * This is free software; you can redistribute it and/or modify it under -+ * the terms of the GNU General Public License as published by the Free -+ * Software Foundation; either version 2, or (at your option) any later -+ * version. -+ */ -+/* -+ * This file defines macros that allow generation of DWARF debug records -+ * for asm files. This file is platform independent. Register numbers -+ * (which are about the only thing that is platform dependent) are to be -+ * supplied by a platform defined file. -+ */ -+#define DWARF_preamble() .section .debug_frame,"",@progbits -+/* -+ * This macro starts a debug frame section. The debug_frame describes -+ * where to find the registers that the enclosing function saved on -+ * entry. -+ * -+ * ORD is use by the label generator and should be the same as what is -+ * passed to CFI_postamble. -+ * -+ * pc, pc register gdb ordinal. -+ * -+ * code_align this is the factor used to define locations or regions -+ * where the given definitions apply. If you use labels to define these -+ * this should be 1. -+ * -+ * data_align this is the factor used to define register offsets. If -+ * you use struct offset, this should be the size of the register in -+ * bytes or the negative of that. This is how it is used: you will -+ * define a register as the reference register, say the stack pointer, -+ * then you will say where a register is located relative to this -+ * reference registers value, say 40 for register 3 (the gdb register -+ * number). The <40> will be multiplied by <data_align> to define the -+ * byte offset of the given register (3, in this example). So if your -+ * <40> is the byte offset and the reference register points at the -+ * begining, you would want 1 for the data_offset. If <40> was the 40th -+ * 4-byte element in that structure you would want 4. And if your -+ * reference register points at the end of the structure you would want -+ * a negative data_align value(and you would have to do other math as -+ * well). -+ */ -+ -+#define CFI_preamble(ORD, pc, code_align, data_align) \ -+.section .debug_frame,"",@progbits ; \ -+frame/**/_/**/ORD: \ -+ .long end/**/_/**/ORD-start/**/_/**/ORD; \ -+start/**/_/**/ORD: \ -+ .long DW_CIE_ID; \ -+ .byte DW_CIE_VERSION; \ -+ .byte 0 ; \ -+ .uleb128 code_align; \ -+ .sleb128 data_align; \ -+ .byte pc; -+ -+/* -+ * After the above macro and prior to the CFI_postamble, you need to -+ * define the initial state. This starts with defining the reference -+ * register and, usually the pc. Here are some helper macros: -+ */ -+ -+#define CFA_define_reference(reg, offset) \ -+ .byte DW_CFA_def_cfa; \ -+ .uleb128 reg; \ -+ .uleb128 (offset); -+ -+#define CFA_define_offset(reg, offset) \ -+ .byte (DW_CFA_offset + reg); \ -+ .uleb128 (offset); -+ -+#define CFI_postamble(ORD) \ -+ .align 4; \ -+end/**/_/**/ORD: -+/* -+ * So now your code pushs stuff on the stack, you need a new location -+ * and the rules for what to do. This starts a running description of -+ * the call frame. You need to describe what changes with respect to -+ * the call registers as the location of the pc moves through the code. -+ * The following builds an FDE (fram descriptor entry?). Like the -+ * above, it has a preamble and a postamble. It also is tied to the CFI -+ * above. -+ * The first entry after the preamble must be the location in the code -+ * that the call frame is being described for. -+ */ -+#define FDE_preamble(ORD, fde_no, initial_address, length) \ -+ .long FDE_end/**/_/**/fde_no-FDE_start/**/_/**/fde_no; \ -+FDE_start/**/_/**/fde_no: \ -+ .long frame/**/_/**/ORD; \ -+ .long initial_address; \ -+ .long length; -+ -+#define FDE_postamble(fde_no) \ -+ .align 4; \ -+FDE_end/**/_/**/fde_no: -+/* -+ * That done, you can now add registers, subtract registers, move the -+ * reference and even change the reference. You can also define a new -+ * area of code the info applies to. For discontinuous bits you should -+ * start a new FDE. You may have as many as you like. -+ */ -+ -+/* -+ * To advance the address by <bytes> -+ */ -+ -+#define FDE_advance(bytes) \ -+ .byte DW_CFA_advance_loc4 \ -+ .long bytes -+ -+ -+ -+/* -+ * With the above you can define all the register locations. But -+ * suppose the reference register moves... Takes the new offset NOT an -+ * increment. This is how esp is tracked if it is not saved. -+ */ -+ -+#define CFA_define_cfa_offset(offset) \ -+ .byte $DW_CFA_def_cfa_offset; \ -+ .uleb128 (offset); -+/* -+ * Or suppose you want to use a different reference register... -+ */ -+#define CFA_define_cfa_register(reg) \ -+ .byte DW_CFA_def_cfa_register; \ -+ .uleb128 reg; -+ -+#endif -diff -puN include/linux/serial_core.h~kgdb-ga include/linux/serial_core.h ---- 25/include/linux/serial_core.h~kgdb-ga 2004-10-21 14:54:15.282600184 -0700 -+++ 25-akpm/include/linux/serial_core.h 2004-10-21 14:54:15.338591672 -0700 -@@ -172,7 +172,9 @@ struct uart_port { - unsigned char x_char; /* xon/xoff char */ - unsigned char regshift; /* reg offset shift */ - unsigned char iotype; /* io access style */ -- -+#ifdef CONFIG_KGDB -+ int kgdb; /* in use by kgdb */ -+#endif - #define UPIO_PORT (0) - #define UPIO_HUB6 (1) - #define UPIO_MEM (2) -diff -puN include/linux/spinlock.h~kgdb-ga include/linux/spinlock.h ---- 25/include/linux/spinlock.h~kgdb-ga 2004-10-21 14:54:15.284599880 -0700 -+++ 25-akpm/include/linux/spinlock.h 2004-10-21 14:54:15.338591672 -0700 -@@ -15,6 +15,12 @@ - - #include <asm/processor.h> /* for cpu relax */ - #include <asm/system.h> -+#ifdef CONFIG_KGDB -+#include <asm/current.h> -+#define SET_WHO(x, him) (x)->who = him; -+#else -+#define SET_WHO(x, him) -+#endif - - /* - * Must define these before including other files, inline functions need them -@@ -88,6 +94,9 @@ typedef struct { - const char *module; - char *owner; - int oline; -+#ifdef CONFIG_KGDB -+ struct task_struct *who; -+#endif - } spinlock_t; - #define SPIN_LOCK_UNLOCKED (spinlock_t) { SPINLOCK_MAGIC, 0, 10, __FILE__ , NULL, 0} - -@@ -99,6 +108,7 @@ typedef struct { - (x)->module = __FILE__; \ - (x)->owner = NULL; \ - (x)->oline = 0; \ -+ SET_WHO(x, NULL) \ - } while (0) - - #define CHECK_LOCK(x) \ -@@ -121,6 +131,7 @@ typedef struct { - (x)->lock = 1; \ - (x)->owner = __FILE__; \ - (x)->oline = __LINE__; \ -+ SET_WHO(x, current) \ - } while (0) - - /* without debugging, spin_is_locked on UP always says -@@ -151,6 +162,7 @@ typedef struct { - (x)->lock = 1; \ - (x)->owner = __FILE__; \ - (x)->oline = __LINE__; \ -+ SET_WHO(x, current) \ - 1; \ - }) - -diff -puN kernel/pid.c~kgdb-ga kernel/pid.c ---- 25/kernel/pid.c~kgdb-ga 2004-10-21 14:54:15.285599728 -0700 -+++ 25-akpm/kernel/pid.c 2004-10-21 14:54:15.339591520 -0700 -@@ -252,6 +252,9 @@ void switch_exec_pids(task_t *leader, ta - * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or - * more. - */ -+#ifdef CONFIG_KGDB -+int kgdb_pid_init_done; /* so we don't call prior to... */ -+#endif - void __init pidhash_init(void) - { - int i, j, pidhash_size; -@@ -273,6 +276,9 @@ void __init pidhash_init(void) - for (j = 0; j < pidhash_size; j++) - INIT_HLIST_HEAD(&pid_hash[i][j]); - } -+#ifdef CONFIG_KGDB -+ kgdb_pid_init_done++; -+#endif - } - - void __init pidmap_init(void) -diff -puN kernel/sched.c~kgdb-ga kernel/sched.c ---- 25/kernel/sched.c~kgdb-ga 2004-10-21 14:54:15.287599424 -0700 -+++ 25-akpm/kernel/sched.c 2004-10-21 14:54:15.342591064 -0700 -@@ -2931,6 +2931,13 @@ out_unlock: - - EXPORT_SYMBOL(set_user_nice); - -+#ifdef CONFIG_KGDB -+struct task_struct *kgdb_get_idle(int this_cpu) -+{ -+ return cpu_rq(this_cpu)->idle; -+} -+#endif -+ - #ifdef __ARCH_WANT_SYS_NICE - - /* -diff -puN MAINTAINERS~kgdb-ga MAINTAINERS ---- 25/MAINTAINERS~kgdb-ga 2004-10-21 14:54:15.288599272 -0700 -+++ 25-akpm/MAINTAINERS 2004-10-21 14:54:15.344590760 -0700 -@@ -1242,6 +1242,12 @@ W: http://sf.net/projects/kernel-janitor - W: http://developer.osdl.org/rddunlap/kj-patches/ - S: Maintained - -+KGDB FOR I386 PLATFORM -+P: George Anzinger -+M: george@mvista.com -+L: linux-net@vger.kernel.org -+S: Supported -+ - KERNEL NFSD - P: Neil Brown - M: neilb@cse.unsw.edu.au -diff -puN arch/i386/Kconfig.debug~kgdb-ga arch/i386/Kconfig.debug ---- 25/arch/i386/Kconfig.debug~kgdb-ga 2004-10-21 14:54:15.290598968 -0700 -+++ 25-akpm/arch/i386/Kconfig.debug 2004-10-21 14:54:15.344590760 -0700 -@@ -65,4 +65,6 @@ config X86_MPPARSE - depends on X86_LOCAL_APIC && !X86_VISWS - default y - -+source "arch/i386/Kconfig.kgdb" -+ - endmenu -diff -puN /dev/null arch/i386/Kconfig.kgdb ---- /dev/null 2003-09-15 06:40:47.000000000 -0700 -+++ 25-akpm/arch/i386/Kconfig.kgdb 2004-10-21 14:54:15.345590608 -0700 -@@ -0,0 +1,175 @@ -+config KGDB -+ bool "Include kgdb kernel debugger" -+ depends on DEBUG_KERNEL -+ help -+ If you say Y here, the system will be compiled with the debug -+ option (-g) and a debugging stub will be included in the -+ kernel. This stub communicates with gdb on another (host) -+ computer via a serial port. The host computer should have -+ access to the kernel binary file (vmlinux) and a serial port -+ that is connected to the target machine. Gdb can be made to -+ configure the serial port or you can use stty and setserial to -+ do this. See the 'target' command in gdb. This option also -+ configures in the ability to request a breakpoint early in the -+ boot process. To request the breakpoint just include 'kgdb' -+ as a boot option when booting the target machine. The system -+ will then break as soon as it looks at the boot options. This -+ option also installs a breakpoint in panic and sends any -+ kernel faults to the debugger. For more information see the -+ Documentation/i386/kgdb/kgdb.txt file. -+ -+choice -+ depends on KGDB -+ prompt "Debug serial port BAUD" -+ default KGDB_115200BAUD -+ help -+ Gdb and the kernel stub need to agree on the baud rate to be -+ used. Some systems (x86 family at this writing) allow this to -+ be configured. -+ -+config KGDB_9600BAUD -+ bool "9600" -+ -+config KGDB_19200BAUD -+ bool "19200" -+ -+config KGDB_38400BAUD -+ bool "38400" -+ -+config KGDB_57600BAUD -+ bool "57600" -+ -+config KGDB_115200BAUD -+ bool "115200" -+endchoice -+ -+config KGDB_PORT -+ hex "hex I/O port address of the debug serial port" -+ depends on KGDB -+ default 3f8 -+ help -+ Some systems (x86 family at this writing) allow the port -+ address to be configured. The number entered is assumed to be -+ hex, don't put 0x in front of it. The standard address are: -+ COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx -+ will tell you what you have. It is good to test the serial -+ connection with a live system before trying to debug. -+ -+config KGDB_IRQ -+ int "IRQ of the debug serial port" -+ depends on KGDB -+ default 4 -+ help -+ This is the irq for the debug port. If everything is working -+ correctly and the kernel has interrupts on a control C to the -+ port should cause a break into the kernel debug stub. -+ -+config DEBUG_INFO -+ bool -+ depends on KGDB -+ default y -+ -+config KGDB_MORE -+ bool "Add any additional compile options" -+ depends on KGDB -+ default n -+ help -+ Saying yes here turns on the ability to enter additional -+ compile options. -+ -+ -+config KGDB_OPTIONS -+ depends on KGDB_MORE -+ string "Additional compile arguments" -+ default "-O1" -+ help -+ This option allows you enter additional compile options for -+ the whole kernel compile. Each platform will have a default -+ that seems right for it. For example on PPC "-ggdb -O1", and -+ for i386 "-O1". Note that by configuring KGDB "-g" is already -+ turned on. In addition, on i386 platforms -+ "-fomit-frame-pointer" is deleted from the standard compile -+ options. -+ -+config NO_KGDB_CPUS -+ int "Number of CPUs" -+ depends on KGDB && SMP -+ default NR_CPUS -+ help -+ -+ This option sets the number of cpus for kgdb ONLY. It is used -+ to prune some internal structures so they look "nice" when -+ displayed with gdb. This is to overcome possibly larger -+ numbers that may have been entered above. Enter the real -+ number to get nice clean kgdb_info displays. -+ -+config KGDB_TS -+ bool "Enable kgdb time stamp macros?" -+ depends on KGDB -+ default n -+ help -+ Kgdb event macros allow you to instrument your code with calls -+ to the kgdb event recording function. The event log may be -+ examined with gdb at a break point. Turning on this -+ capability also allows you to choose how many events to -+ keep. Kgdb always keeps the lastest events. -+ -+choice -+ depends on KGDB_TS -+ prompt "Max number of time stamps to save?" -+ default KGDB_TS_128 -+ -+config KGDB_TS_64 -+ bool "64" -+ -+config KGDB_TS_128 -+ bool "128" -+ -+config KGDB_TS_256 -+ bool "256" -+ -+config KGDB_TS_512 -+ bool "512" -+ -+config KGDB_TS_1024 -+ bool "1024" -+ -+endchoice -+ -+config STACK_OVERFLOW_TEST -+ bool "Turn on kernel stack overflow testing?" -+ depends on KGDB -+ default n -+ help -+ This option enables code in the front line interrupt handlers -+ to check for kernel stack overflow on interrupts and system -+ calls. This is part of the kgdb code on x86 systems. -+ -+config KGDB_CONSOLE -+ bool "Enable serial console thru kgdb port" -+ depends on KGDB -+ default n -+ help -+ This option enables the command line "console=kgdb" option. -+ When the system is booted with this option in the command line -+ all kernel printk output is sent to gdb (as well as to other -+ consoles). For this to work gdb must be connected. For this -+ reason, this command line option will generate a breakpoint if -+ gdb has not yet connected. After the gdb continue command is -+ given all pent up console output will be printed by gdb on the -+ host machine. Neither this option, nor KGDB require the -+ serial driver to be configured. -+ -+config KGDB_SYSRQ -+ bool "Turn on SysRq 'G' command to do a break?" -+ depends on KGDB -+ default y -+ help -+ This option includes an option in the SysRq code that allows -+ you to enter SysRq G which generates a breakpoint to the KGDB -+ stub. This will work if the keyboard is alive and can -+ interrupt the system. Because of constraints on when the -+ serial port interrupt can be enabled, this code may allow you -+ to interrupt the system before the serial port control C is -+ available. Just say yes here. -+ -_ diff --git a/lustre/kernel_patches/patches/8kstack-2.6-rhel4.patch b/lustre/kernel_patches/patches/8kstack-2.6-rhel4.patch deleted file mode 100644 index 36fea122e3..0000000000 --- a/lustre/kernel_patches/patches/8kstack-2.6-rhel4.patch +++ /dev/null @@ -1,13 +0,0 @@ -Index: linux-2.6.9-5.0.3.EL/include/asm-i386/thread_info.h -=================================================================== ---- linux-2.6.9-5.0.3.EL.orig/include/asm-i386/thread_info.h 2005-02-25 10:25:33.000000000 +0200 -+++ linux-2.6.9-5.0.3.EL/include/asm-i386/thread_info.h 2005-02-25 20:19:11.676139032 +0200 -@@ -54,7 +54,7 @@ - #endif - - #define PREEMPT_ACTIVE 0x4000000 --#define THREAD_SIZE (4096) -+#define THREAD_SIZE (8192) - - #define STACK_WARN (THREAD_SIZE/8) - /* diff --git a/lustre/kernel_patches/patches/bluesmoke-2.6-suse-lnxi.patch b/lustre/kernel_patches/patches/bluesmoke-2.6-suse-lnxi.patch deleted file mode 100644 index a6501a4c71..0000000000 --- a/lustre/kernel_patches/patches/bluesmoke-2.6-suse-lnxi.patch +++ /dev/null @@ -1,5485 +0,0 @@ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/Kconfig -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/Kconfig 2004-11-11 10:28:08.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/Kconfig 2004-12-17 12:45:23.000000000 -0500 -@@ -6,6 +6,8 @@ - - source "drivers/mtd/Kconfig" - -+source "drivers/bluesmoke/Kconfig" -+ - source "drivers/parport/Kconfig" - - source "drivers/pnp/Kconfig" -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/Makefile -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/Makefile 2004-11-11 10:28:16.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/Makefile 2004-12-17 12:45:23.000000000 -0500 -@@ -29,6 +29,7 @@ - obj-$(CONFIG_IEEE1394) += ieee1394/ - obj-y += cdrom/ video/ - obj-$(CONFIG_MTD) += mtd/ -+obj-$(CONFIG_BLUESMOKE) += bluesmoke/ - obj-$(CONFIG_PCMCIA) += pcmcia/ - obj-$(CONFIG_DIO) += dio/ - obj-$(CONFIG_SBUS) += sbus/ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/Kconfig -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/Kconfig 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/Kconfig 2004-12-17 12:46:23.000000000 -0500 -@@ -0,0 +1,72 @@ -+# -+# Bluesmoke Kconfig -+# Copyright (c) 2003 Linux Networx -+# Licensed and distributed under the GPL -+# -+# $Id: Kconfig,v 1.4 2004/11/10 01:12:35 thayne Exp $ -+# -+ -+menu 'Bluesmoke - error detection and reporting (RAS)' -+ -+config BLUESMOKE -+ tristate "Bluesmoke core system error reporting" -+ help -+ Bluesmoke is designed to report errors in the core system. -+ These are low-level errors that are reported in the CPU or -+ supporting chipset: memory errors, cache errors, PCI errors, -+ thermal throttling, etc.. If unsure, select 'Y'. -+ -+ -+comment "Reporting subsystems" -+ depends on BLUESMOKE -+ -+config BLUESMOKE_DEBUG -+ bool "Debugging" -+ depends on BLUESMOKE -+ help -+ This turns on debugging information for the entire Bluesmoke -+ sub-system. Usually you should select 'N'. -+ -+config BULESMOKE_DEBUG_VERBOSE -+ int "Debugging verbosity (0=quiet, 3=noisy)" -+ depends on BLUESMOKE_DEBUG -+ default "0" -+ help -+ Verbosity level of Bluesmoke debug messages. -+ -+config BLUESMOKE_MM_EDAC -+ tristate "Bluesmoke Main Memory EDAC (Error Detection And Correction) reporting" -+ depends on BLUESMOKE -+ help -+ Some systems are able to detect and correct errors in main -+ memory. Bluesmoke can report statistics on memory error -+ detection and correction (EDAC - or commonly referred to ECC -+ errors). Bluesmoke will also try to decode where these errors -+ occurred so that a particular failing memory module can be -+ replaced. If unsure, select 'Y'. -+ -+ -+comment "Bluesmoke system controller/chipset support" -+ depends on BLUESMOKE -+ -+config BLUESMOKE_AMD76X -+ tristate "AMD 76x (760, 762, 768)" -+ depends on BLUESMOKE -+ -+config BLUESMOKE_E7XXX -+ tristate "Intel e7xxx (e7205, e7500, e7501, e7505)" -+ depends on BLUESMOKE -+ -+config BLUESMOKE_E752X -+ tristate "Intel e752x (e7520)" -+ depends on BLUESMOKE -+ -+config BLUESMOKE_I82875P -+ tristate "Intel 82875p" -+ depends on BLUESMOKE -+ -+config BLUESMOKE_K8 -+ tristate "AMD K8 (Athlon FX, Athlon 64, Opteron)" -+ depends on BLUESMOKE -+ -+endmenu -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/Makefile -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/Makefile 2004-12-17 12:46:23.000000000 -0500 -@@ -0,0 +1,26 @@ -+# -+# Makefile for the Linux kernel bluesmoke drivers. -+# -+# Copyright 02 Jul 2003, Linux Networx (http://lnxi.com) -+# This file may be distributed under the terms of the -+# GNU General Public License. -+# -+# $Id: Makefile,v 1.4 2004/11/10 01:12:35 thayne Exp $ -+ -+ -+obj-$(CONFIG_BLUESMOKE_MM_EDAC) += bluesmoke_mc.o -+obj-$(CONFIG_BLUESMOKE_AMD76X) += bluesmoke_amd76x.o -+obj-$(CONFIG_BLUESMOKE_E7XXX) += bluesmoke_e7xxx.o -+obj-$(CONFIG_BLUESMOKE_E752X) += bluesmoke_e752x.o -+obj-$(CONFIG_BLUESMOKE_I82875P) += bluesmoke_i82875p.o -+obj-$(CONFIG_BLUESMOKE_K8) += bluesmoke_k8.o -+ -+ifeq ($(PATCHLEVEL),4) -+ -+export-objs := bluesmoke_mc.o -+ -+O_TARGET := bluesmokelink.o -+ -+include $(TOPDIR)/Rules.make -+ -+endif -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_amd76x.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_amd76x.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_amd76x.c 2004-12-17 12:46:23.000000000 -0500 -@@ -0,0 +1,323 @@ -+/* -+ * AMD 76x Memory Controller kernel module -+ * (C) 2003 Linux Networx (http://lnxi.com) -+ * This file may be distributed under the terms of the -+ * GNU General Public License. -+ * -+ * Written by Thayne Harbaugh -+ * Based on work by Dan Hollis <goemon at anime dot net> and others. -+ * http://www.anime.net/~goemon/linux-ecc/ -+ * -+ * $Id: bluesmoke_amd76x.c,v 1.4 2004/11/10 01:12:35 thayne Exp $ -+ * -+ */ -+ -+ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/init.h> -+ -+#include <linux/pci.h> -+#include <linux/pci_ids.h> -+ -+#include <linux/slab.h> -+ -+#include "bluesmoke_mc.h" -+ -+ -+#define AMD76X_NR_CSROWS 8 -+#define AMD76X_NR_CHANS 1 -+#define AMD76X_NR_DIMMS 4 -+ -+ -+/* AMD 76x register addresses - device 0 function 0 - PCI bridge */ -+#define AMD76X_ECC_MODE_STATUS 0x48 /* Mode and status of ECC (32b) -+ * -+ * 31:16 reserved -+ * 15:14 SERR enabled: x1=ue 1x=ce -+ * 13 reserved -+ * 12 diag: disabled, enabled -+ * 11:10 mode: dis, EC, ECC, ECC+scrub -+ * 9:8 status: x1=ue 1x=ce -+ * 7:4 UE cs row -+ * 3:0 CE cs row -+ */ -+#define AMD76X_DRAM_MODE_STATUS 0x58 /* DRAM Mode and status (32b) -+ * -+ * 31:26 clock disable 5 - 0 -+ * 25 SDRAM init -+ * 24 reserved -+ * 23 mode register service -+ * 22:21 suspend to RAM -+ * 20 burst refresh enable -+ * 19 refresh disable -+ * 18 reserved -+ * 17:16 cycles-per-refresh -+ * 15:8 reserved -+ * 7:0 x4 mode enable 7 - 0 -+ */ -+#define AMD76X_MEM_BASE_ADDR 0xC0 /* Memory base address (8 x 32b) -+ * -+ * 31:23 chip-select base -+ * 22:16 reserved -+ * 15:7 chip-select mask -+ * 6:3 reserved -+ * 2:1 address mode -+ * 0 chip-select enable -+ */ -+ -+ -+enum amd76x_chips { -+ AMD761 = 0, -+ AMD762 -+}; -+ -+ -+struct amd76x_dev_info { -+ const char *ctl_name; -+}; -+ -+ -+static const struct amd76x_dev_info amd76x_devs[] = { -+ [AMD761] = { -+ .ctl_name = "AMD761" -+ }, -+ [AMD762] = { -+ .ctl_name = "AMD762" -+ }, -+}; -+ -+ -+static void amd76x_check(struct mem_ctl_info *mci) -+{ -+ u32 ems; -+ -+ debugf1( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); -+ -+ pci_read_config_dword(mci->pdev, AMD76X_ECC_MODE_STATUS, &ems); -+ -+ if ( ems & BIT(8) ) { /* UE? */ -+ u32 ems_ue_row = (ems >> 4) & 0xf; -+ -+ pci_write_bits32( mci->pdev, AMD76X_ECC_MODE_STATUS, -+ (u32)BIT(8), (u32)BIT(8) ); -+ -+ bluesmoke_mc_handle_ue( mci, mci->csrows[ems_ue_row].first_page, -+ 0, ems_ue_row, mci->ctl_name ); -+ } -+ -+ if ( ems & BIT(9) ) { /* CE? */ -+ u32 ems_ce_row = ems & 0xf; -+ -+ pci_write_bits32( mci->pdev, AMD76X_ECC_MODE_STATUS, -+ (u32)BIT(9), (u32)BIT(9) ); -+ -+ bluesmoke_mc_handle_ce( mci, mci->csrows[ems_ce_row].first_page, -+ 0, 0, ems_ce_row, 0, mci->ctl_name ); -+ } -+ return; -+} -+ -+ -+static int amd76x_probe1( struct pci_dev *pdev, int dev_idx ) -+{ -+ int rc = -ENODEV; -+ int index; -+ struct mem_ctl_info *mci = NULL; -+ enum edac_type ems_modes[] = { EDAC_NONE, -+ EDAC_EC, -+ EDAC_SECDED, -+ EDAC_SECDED }; -+ u32 ems; -+ u32 ems_mode; -+ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, &ems); -+ ems_mode = ( ems >> 10 ) & 0x3; -+ -+ mci = bluesmoke_mc_init_structs(0, -+ AMD76X_NR_CSROWS, -+ AMD76X_NR_CHANS); -+ -+ if ( ! mci ) { -+ rc = -ENOMEM; -+ goto FAIL_FINISHED; -+ } -+ -+ debugf0( "MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci ); -+ -+ mci->pdev = pdev; -+ mci->mtype_cap = MEM_FLAG_RDDR; -+ -+ mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; -+ if ( ems_mode ) { -+ mci->edac_cap = EDAC_FLAG_EC | EDAC_FLAG_SECDED; -+ } else { -+ mci->edac_cap = EDAC_FLAG_NONE; -+ } -+ -+ mci->mod_name = BS_MOD_STR; -+ mci->mod_ver = "$Revision: 1.4 $"; -+ mci->ctl_name = amd76x_devs[dev_idx].ctl_name; -+ mci->edac_check = amd76x_check; -+ mci->clear_err = NULL; -+ mci->ctl_page_to_phys = NULL; -+ -+ for ( index = 0; index < mci->nr_csrows; index++ ) { -+ struct csrow_info *csrow = &mci->csrows[ index ]; -+ u32 mba; -+ u32 mba_base; -+ u32 mba_mask; -+ u32 dms; -+ -+ /* find the DRAM Chip Select Base address and mask */ -+ pci_read_config_dword( mci->pdev, -+ AMD76X_MEM_BASE_ADDR + (index*4), -+ &mba ); -+ -+ if ( ! (mba & BIT(0)) ) { -+ continue; -+ } -+ -+ mba_base = mba & 0xff800000UL; -+ mba_mask = ((mba & 0xff80) << 16) | 0x7fffffUL; -+ -+ pci_read_config_dword( mci->pdev, -+ AMD76X_DRAM_MODE_STATUS, -+ &dms ); -+ -+ csrow->first_page = mba_base >> PAGE_SHIFT; -+ csrow->nr_pages = (mba_mask + 1) >> PAGE_SHIFT; -+ csrow->last_page = csrow->first_page + csrow->nr_pages - 1; -+ csrow->page_mask = mba_mask >> PAGE_SHIFT; -+ csrow->grain = csrow->nr_pages << PAGE_SHIFT; -+ csrow->mtype = MEM_RDDR; -+ csrow->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN; -+ csrow->edac_mode = ems_modes[ ems_mode ]; -+ } -+ -+ /* clear counters */ -+ pci_write_bits32( mci->pdev, AMD76X_ECC_MODE_STATUS, -+ (u32)(0x3 << 8), (u32)(0x3 << 8) ); -+ -+ if ( 0 != bluesmoke_mc_add_mc( mci ) ) { -+ debugf3( "MC: " __FILE__ -+ ": %s(): failed bluesmoke_mc_add_mc()\n", __func__ ); -+ goto FAIL_FINISHED; -+ } -+ -+ /* get this far and it's successful */ -+ debugf3( "MC: " __FILE__ ": %s(): success\n", __func__ ); -+ rc = 0; -+ goto FINISHED; -+ -+ FAIL_FINISHED: -+ if ( mci ) { -+ kfree( mci ); -+ } -+ -+ FINISHED: -+ return( rc ); -+} -+ -+ -+#ifdef CONFIG_PM -+ -+static int amd76x_suspend (struct pci_dev *pdev, u32 state) -+{ -+ debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); -+ -+ return -ENOSYS; -+} -+ -+ -+static int amd76x_resume (struct pci_dev *pdev) -+{ -+ debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); -+ -+ return -ENOSYS; -+} -+ -+#endif /* CONFIG_PM */ -+ -+ -+/* returns count (>= 0), or negative on error */ -+static int __devinit amd76x_init_one( struct pci_dev *pdev, -+ const struct pci_device_id *ent ) -+{ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ /* don't need to call pci_device_enable() */ -+ return amd76x_probe1( pdev, ent->driver_data ); -+} -+ -+ -+static void __devexit amd76x_remove_one( struct pci_dev *pdev ) -+{ -+ struct mem_ctl_info *mci; -+ -+ debugf0( __FILE__ ": %s()\n", __func__); -+ -+ if ( NULL == ( mci = bluesmoke_mc_find_mci_by_pdev( pdev ) ) ) { -+ goto FINISHED; -+ } -+ -+ if ( 0 != bluesmoke_mc_del_mc( mci ) ) { -+ goto FINISHED; -+ } -+ -+ kfree( mci ); -+ -+ FINISHED: -+ return; -+} -+ -+ -+static const struct pci_device_id amd76x_pci_tbl[] __devinitdata = { -+ { PCI_VEND_DEV( AMD, FE_GATE_700C ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, AMD762 }, -+ { PCI_VEND_DEV( AMD, FE_GATE_700E ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, AMD761 }, -+ {0,} /* 0 terminated list. */ -+}; -+ -+MODULE_DEVICE_TABLE(pci, amd76x_pci_tbl); -+ -+ -+static struct pci_driver amd76x_driver = { -+ .name = BS_MOD_STR, -+ .probe = amd76x_init_one, -+ .remove = __devexit_p(amd76x_remove_one), -+ .id_table = amd76x_pci_tbl, -+#ifdef CONFIG_PM -+ .suspend = amd76x_suspend, -+ .resume = amd76x_resume, -+#endif /* CONFIG_PM */ -+}; -+ -+ -+int __init amd76x_init(void) -+{ -+ int pci_rc; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ pci_rc = pci_module_init( &amd76x_driver ); -+ if ( pci_rc < 0 ) return pci_rc; -+ -+ return 0; -+} -+ -+ -+static void __exit amd76x_exit(void) -+{ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ pci_unregister_driver( &amd76x_driver ); -+} -+ -+ -+module_init(amd76x_init); -+module_exit(amd76x_exit); -+ -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh"); -+MODULE_DESCRIPTION("MC support for AMD 76x memory controllers"); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_e752x.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_e752x.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_e752x.c 2004-12-17 12:46:23.000000000 -0500 -@@ -0,0 +1,1027 @@ -+/* -+ * Intel e752x Memory Controller kernel module -+ * (C) 2004 Linux Networx (http://lnxi.com) -+ * This file may be distributed under the terms of the -+ * GNU General Public License. -+ * -+ * See "enum e752x_chips" below for supported chipsets -+ * -+ * Written by Tom Zimmerman -+ * -+ * Contributors: -+ * Thayne Harbaugh (Linux Networx) -+ * -+ * $Id: bluesmoke_e752x.c,v 1.5 2004/11/18 22:19:46 thayne Exp $ -+ * -+ */ -+ -+ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/init.h> -+ -+#include <linux/pci.h> -+#include <linux/pci_ids.h> -+ -+#include <linux/slab.h> -+ -+#include "bluesmoke_mc.h" -+ -+ -+#ifndef PCI_DEVICE_ID_INTEL_7520_0 -+#define PCI_DEVICE_ID_INTEL_7520_0 0x3590 -+#endif /* PCI_DEVICE_ID_INTEL_7520_0 */ -+ -+#ifndef PCI_DEVICE_ID_INTEL_7520_1_ERR -+#define PCI_DEVICE_ID_INTEL_7520_1_ERR 0x3591 -+#endif /* PCI_DEVICE_ID_INTEL_7520_1_ERR */ -+ -+ -+#define E752X_NR_CSROWS 8 /* number of csrows */ -+ -+ -+/* E752X register addresses - device 0 function 0 */ -+#define E752X_DRB 0x60 /* DRAM row boundary register (8b) */ -+#define E752X_DRA 0x70 /* DRAM row attribute register (8b) */ -+ /* -+ * 31:30 Device width row 7 -+ * 01=x8 10=x4 11=x8 DDR2 -+ * 27:26 Device width row 6 -+ * 23:22 Device width row 5 -+ * 19:20 Device width row 4 -+ * 15:14 Device width row 3 -+ * 11:10 Device width row 2 -+ * 7:6 Device width row 1 -+ * 3:2 Device width row 0 -+ */ -+#define E752X_DRC 0x7C /* DRAM controller mode reg (32b) */ -+ /* -+ * 22 Number channels 0=1,1=2 -+ * 19:18 DRB Granularity 32/64MB -+ */ -+#define E752X_DRM 0x80 /* Dimm mapping register */ -+#define E752X_DDRCSR 0x9A /* DDR control and status reg (16b) */ -+ /* -+ * 14:12 1 single A, 2 single B, 3 dual -+ */ -+#define E752X_TOLM 0xC4 /* DRAM top of low memory reg (16b) */ -+#define E752X_REMAPBASE 0xC6 /* DRAM remap base address reg (16b) */ -+#define E752X_REMAPLIMIT 0xC8 /* DRAM remap limit address reg (16b) */ -+#define E752X_REMAPOFFSET 0xCA /* DRAM remap limit offset reg (16b) */ -+ -+/* E752X register addresses - device 0 function 1 */ -+#define E752X_FERR_GLOBAL 0x40 /* Global first error register (32b)*/ -+#define E752X_NERR_GLOBAL 0x44 /* Global next error register (32b) */ -+#define E752X_HI_FERR 0x50 /* Hub interface first error reg (8b)*/ -+#define E752X_HI_NERR 0x52 /* Hub interface next error reg (8b)*/ -+#define E752X_HI_ERRMASK 0x54 /* Hub interface error mask reg (8b)*/ -+#define E752X_HI_SMICMD 0x5A /* Hub interface SMI command reg (8b)*/ -+#define E752X_SYSBUS_FERR 0x60 /* System buss first error reg (16b)*/ -+#define E752X_SYSBUS_NERR 0x62 /* System buss next error reg (16b)*/ -+#define E752X_SYSBUS_ERRMASK 0x64 /* System buss error mask reg (16b) */ -+#define E752X_SYSBUS_SMICMD 0x6A /* System buss SMI command reg (16b) */ -+#define E752X_BUF_FERR 0x70 /* Memory buffer first error reg (8b)*/ -+#define E752X_BUF_NERR 0x72 /* Memory buffer next error reg (8b)*/ -+#define E752X_BUF_ERRMASK 0x74 /* Memory buffer error mask reg (8b)*/ -+#define E752X_BUF_SMICMD 0x7A /* Memory buffer SMI command reg (8b)*/ -+#define E752X_DRAM_FERR 0x80 /* DRAM first error register (16b) */ -+#define E752X_DRAM_NERR 0x82 /* DRAM next error register (16b) */ -+#define E752X_DRAM_ERRMASK 0x84 /* DRAM error mask register (8b) */ -+#define E752X_DRAM_SMICMD 0x8A /* DRAM SMI command register (8b) */ -+#define E752X_DRAM_RETRY_ADD 0xAC /* DRAM Retry address register (32b) */ -+#define E752X_DRAM_CELOG1_ADD 0xA0 /* DRAM first correctable memory */ -+ /* error address register (32b) */ -+ /* -+ * 31 Reserved -+ * 30:2 CE address (64 byte block 34:6) -+ * 1 Reserved -+ * 0 HiLoCS -+ */ -+#define E752X_DRAM_CELOG2_ADD 0xC8 /* DRAM first correctable memory */ -+ /* error address register (32b) */ -+ /* -+ * 31 Reserved -+ * 30:2 CE address (64 byte block 34:6) -+ * 1 Reserved -+ * 0 HiLoCS -+ */ -+#define E752X_DRAM_UELOG_ADD 0xA4 /* DRAM first uncorrectable memory */ -+ /* error address register (32b) */ -+ /* -+ * 31 Reserved -+ * 30:2 CE address (64 byte block 34:6) -+ * 1 Reserved -+ * 0 HiLoCS -+ */ -+#define E752X_DRAM_UELOGS_ADD 0xA8 /* DRAM first uncorrectable scrub memory */ -+ /* error address register (32b) */ -+ /* -+ * 31 Reserved -+ * 30:2 CE address (64 byte block 34:6) -+ * 1 Reserved -+ * 0 HiLoCS -+ */ -+#define E752X_DRAM_CELOG1_SYNDROME 0xC4 /* DRAM first correctable memory */ -+ /* error syndrome register (16b) */ -+#define E752X_DRAM_CELOG2_SYNDROME 0xC6 /* DRAM second correctable memory */ -+ /* error syndrome register (16b) */ -+#define E752X_DEVPRES1 0xF4 /* Device Present 1 register (8b) */ -+ -+/* ICH5R register addresses - device 30 function 0 */ -+#define ICH5R_PCI_STAT 0x06 /* PCI status register (16b) */ -+#define ICH5R_PCI_2ND_STAT 0x1E /* PCI status secondary reg (16b) */ -+#define ICH5R_PCI_BRIDGE_CTL 0x3E /* PCI bridge control register (16b) */ -+ -+enum e752x_chips { -+ E7520 = 0, -+}; -+ -+ -+struct e752x_pvt { -+ struct pci_dev *bridge_ck; -+ struct pci_dev *dev_d0f0; -+ struct pci_dev *dev_d0f1; -+ u32 tolm; -+ u32 remapbase; -+ u32 remaplimit; -+ int mc_symmetric; -+ u8 map[8]; -+ int map_type; -+ const struct e752x_dev_info *dev_info; -+}; -+ -+ -+struct e752x_dev_info { -+ u16 err_dev; -+ const char *ctl_name; -+}; -+ -+ -+static const struct e752x_dev_info e752x_devs[] = { -+ [E7520] = { -+ .err_dev = PCI_DEVICE_ID_INTEL_7520_1_ERR, -+ .ctl_name = "E7520" -+ }, -+}; -+ -+ -+/* FIXME - is this valid for both SECDED and S4ECD4ED? */ -+static inline int e752x_find_channel(u16 syndrome) -+{ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ if((syndrome & 0xff00)==0) -+ return(0); -+ if((syndrome & 0x00ff)==0) -+ return(1); -+ if((syndrome & 0xf000)==0) -+ return(0); -+ if((syndrome & 0x0f00)==0) -+ return(0); -+ return(1); -+} -+ -+ -+static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, -+ unsigned long page) -+{ -+ u32 remap; -+ struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ if(page < pvt->tolm) -+ return(page); -+ if((page >= 0x100000)&&(page < pvt->remapbase)) -+ return(page); -+ remap = (page - pvt->tolm) + pvt->remapbase; -+ if(remap < pvt->remaplimit) -+ return(remap); -+ printk(KERN_ERR "Invalid page %lx - out of range\n", page); -+ return(pvt->tolm-1); -+} -+ -+ -+static void process_ce(struct mem_ctl_info *mci, u16 error_one, -+ u32 celog1_add, u16 celog1_syndrome) -+{ -+ u32 error_1b, page; -+ u16 syndrome; -+ int row; -+ int channel; -+ int i; -+ struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ if(error_one&0x0101) { -+ /* read the error address */ -+// pci_read_config_dword(pvt->bridge_ck,E752X_DRAM_CELOG1_ADD, -+// &error_1b); -+ error_1b = celog1_add; -+ page = error_1b >> (PAGE_SHIFT-4); /* convert the addr to 4k page */ -+ /* read the syndrome */ -+// pci_read_config_word(pvt->bridge_ck,E752X_DRAM_CELOG1_SYNDROME, -+// &syndrome); -+ syndrome = celog1_syndrome; -+ /* FIXME - check for -1 */ -+ if (pvt->mc_symmetric) { -+ row = ((page >>1)&3); /* chip select are bits 14 & 13 */ -+ printk( KERN_WARNING -+ "Test row %d Table %d %d %d %d %d %d %d %d\n", -+ row,pvt->map[0],pvt->map[1],pvt->map[2],pvt->map[3],pvt->map[4], -+ pvt->map[5],pvt->map[6],pvt->map[7]); -+ -+ /* test for channel remapping */ -+ for(i=0;i<8;i++) { -+ if(pvt->map[i] == row) -+ break; -+ } -+ printk( KERN_WARNING -+ "Test computed row %d\n",i); -+ if(i<8) { -+ row = i; -+ } -+ else { -+ printk( KERN_WARNING -+ "MC%d: row %d not found in remap table\n", -+ mci->mc_idx,row); -+ } -+ -+ -+ } else { -+ row = bluesmoke_mc_find_csrow_by_page( mci, page ); -+ } -+ if(error_one&1) -+ channel = 0; /* 0 = channel A */ -+ else -+ channel = 1; /* 1 = channel B */ -+ -+ if(!pvt->map_type) -+ row = 7 - row; -+ bluesmoke_mc_handle_ce( mci, page, 0, syndrome, -+ row, channel, "e752x CE" ); -+ } -+} -+ -+ -+static void process_ue(struct mem_ctl_info *mci, u16 error_one, -+ u32 uelog_add, u32 uelogs_add) -+{ -+ u32 error_2b, block_page; -+ int row; -+ struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ if(error_one & 0x0202) { -+ error_2b = uelog_add; -+ /* convert to 4k address */ -+ block_page = error_2b >> (PAGE_SHIFT - 4); -+ if (pvt->mc_symmetric) { -+ /* chip select are bits 14 & 13 */ -+ row = ((block_page >>1)&3); -+ } -+ else { -+ row = bluesmoke_mc_find_csrow_by_page(mci, block_page); -+ } -+ bluesmoke_mc_handle_ue( mci, block_page, 0, row, -+ "e752x UE from Read" ); -+ } -+ if(error_one & 0x0404) { -+ error_2b = uelogs_add; -+ /* convert to 4k address */ -+ block_page = error_2b >> (PAGE_SHIFT - 4); -+ if (pvt->mc_symmetric) { -+ /* chip select are bits 14 & 13 */ -+ row = ((block_page >>1)&3); -+ } -+ else { -+ row = bluesmoke_mc_find_csrow_by_page(mci, block_page); -+ } -+ bluesmoke_mc_handle_ue( mci, block_page, 0, row, -+ "e752x UE from Scruber" ); -+ } -+} -+ -+#if 0 -+static void process_ue_no_info(struct mem_ctl_info *mci) -+{ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ bluesmoke_mc_handle_ue_no_info( mci, "e752x UE log register overflow" ); -+} -+#endif -+ -+static void process_ue_no_info_wr(struct mem_ctl_info *mci) -+{ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ bluesmoke_mc_handle_ue_no_info( mci, "e752x UE log memory write" ); -+} -+ -+static void process_ded_retry(struct mem_ctl_info *mci,u16 error,u32 retry_add) -+{ -+ u32 error_1b, page; -+ int row; -+ struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; -+ -+ error_1b = retry_add; -+ page = error_1b >> (PAGE_SHIFT-4); /* convert the addr to 4k page */ -+ if (pvt->mc_symmetric) { -+ row = ((page >>1)&3); /* chip select are bits 14 & 13 */ -+ } else { -+ row = bluesmoke_mc_find_csrow_by_page( mci, page ); -+ } -+ printk( KERN_WARNING -+ "MC%d: CE page 0x%lx, row %d : Memory read retry\n", -+ mci->mc_idx,(long unsigned int)page,row); -+} -+ -+static void process_threshold_ce(struct mem_ctl_info *mci,u16 error) -+{ -+ printk( KERN_WARNING -+ "MC%d: Memory threshold CE\n",mci->mc_idx); -+} -+ -+char *global_message[11]= {"PCI Express C1","PCI Express C","PCI Express B1", -+ "PCI Express B","PCI Express A1","PCI Express A", -+ "DMA Controler","HUB Interface","System Bus", -+ "DRAM Controler","Internal Buffer"}; -+char *fatal_message[2]={"Non-Fatal ","Fatal "}; -+ -+static void global_error(int fatal, u32 errors) -+{ -+ int i; -+ -+ for(i=0;i<11;i++) { -+ if(errors & (1<<i)) { -+ printk( KERN_WARNING "%sError %s\n", -+ fatal_message[fatal], -+ global_message[i]); -+ } -+ } -+} -+ -+char *hub_message[7]= {"HI Address or Command Parity","HI Illegal Access", -+ "HI Internal Parity","Out of Range Access", -+ "HI Data Parity","Enhanced Config Access", -+ "Hub Interface Target Abort"}; -+; -+static void hub_error(int fatal, u8 errors) -+{ -+ int i; -+ -+ for(i=0;i<7;i++) { -+ if(errors & (1<<i)) { -+ printk( KERN_WARNING "%sError %s\n", -+ fatal_message[fatal], -+ hub_message[i]); -+ } -+ } -+} -+ -+char *membuf_message[4]= {"Internal PMWB to DRAM parity", -+ "Internal PMWB to System Bus Parity", -+ "Internal System Bus or IO to PMWB Parity", -+ "Internal DRAM to PMWB Parity"}; -+; -+static void membuf_error(u8 errors) -+{ -+ int i; -+ -+ for(i=0;i<4;i++) { -+ if(errors & (1<<i)) { -+ printk( KERN_WARNING "Non-Fatal Error %s\n", -+ membuf_message[i]); -+ } -+ } -+} -+ -+char *sysbus_message[10]= {"Addr or Request Parity", -+ "Data Strobe Glitch", -+ "Addr Strobe Glitch", -+ "Data Parity", -+ "Addr Above TOM", -+ "Non DRAM Lock Error", -+ "MCERR", "BINIT", -+ "Memory Parity", -+ "IO Subsystem Parity"}; -+ -+static void sysbus_error(int fatal, u32 errors) -+{ -+ int i; -+ -+ for(i=0;i<10;i++) { -+ if(errors & (1<<i)) { -+ printk( KERN_WARNING "%sError System Bus %s\n", -+ fatal_message[fatal], -+ global_message[i]); -+ } -+ } -+} -+ -+static void e752x_check(struct mem_ctl_info *mci) -+{ -+ int i; -+ u8 stat8; -+ u16 error_one, error_next, stat; -+ u32 stat32,error32; -+ /* Snap shot of error registers */ -+ u8 hi_ferr; -+ u8 hi_nerr; -+ u16 sysbus_ferr; -+ u16 sysbus_nerr; -+ u8 buf_ferr; -+ u8 buf_nerr; -+ u16 dram_ferr; -+ u16 dram_nerr; -+ u32 celog1_add; -+ u32 celog2_add; -+ u16 celog1_syndrome; -+ u16 celog2_syndrome; -+ u32 retry_add; -+ u32 uelog_add; -+ u32 uelogs_add; -+ struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info; -+ struct pci_dev *pres_dev; -+ struct pci_dev *dev; -+ -+ /* clear snapshot */ -+ hi_ferr=hi_nerr=buf_ferr=buf_nerr=0; -+ sysbus_ferr=sysbus_nerr=dram_ferr=dram_nerr=0; -+ celog1_syndrome=celog2_syndrome=retry_add=0; -+ celog1_add=celog2_add=uelog_add=uelogs_add=0; -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ if (pvt->dev_d0f1 != NULL) { -+ dev = pvt->dev_d0f1; -+ pci_read_config_dword(dev,E752X_FERR_GLOBAL,&stat32); -+ if(stat32) { /* Error, so process */ -+#if 1 -+ /* dump d0f0 and d0f1 */ -+ printk("\nDevice 0 Function 0"); -+ for(i=0;i<0x100;i++) { -+ pci_read_config_byte(mci->pdev,i,&stat8); -+ if((i%16)==0) { -+ printk("\n%2.2x ",i); -+ } -+ printk("%2.2x ",stat8); -+ } -+ printk("\n"); -+ /* dump d0f0 and d0f1 */ -+ printk("\nDevice 0 Function 1"); -+ for(i=0;i<0x100;i++) { -+ pci_read_config_byte(dev,i,&stat8); -+ if((i%16)==0) { -+ printk("\n%2.2x ",i); -+ } -+ printk("%2.2x ",stat8); -+ } -+ printk("\n\n"); -+#endif -+ /* take a snap shot of first errors */ -+ pci_read_config_byte(dev,E752X_HI_FERR,&hi_ferr); -+ pci_read_config_word(dev,E752X_SYSBUS_FERR,&sysbus_ferr); -+ pci_read_config_byte(dev,E752X_BUF_FERR,&buf_ferr); -+ pci_read_config_word(dev,E752X_DRAM_FERR,&dram_ferr); -+ pci_read_config_dword(dev,E752X_DRAM_CELOG1_ADD, -+ &celog1_add); -+ pci_read_config_word(dev,E752X_DRAM_CELOG1_SYNDROME, -+ &celog1_syndrome); -+ pci_read_config_dword(dev,E752X_DRAM_UELOG_ADD, -+ &uelog_add); -+ pci_read_config_dword(dev,E752X_DRAM_UELOGS_ADD, -+ &uelogs_add); -+ pci_read_config_dword(dev,E752X_DRAM_RETRY_ADD, -+ &retry_add); -+ -+ pci_write_config_dword(dev,E752X_FERR_GLOBAL,stat32); -+ error32=(stat32>>18)&0x3ff; -+ stat32=(stat32>>4)&0x7ff; -+ if(error32) -+ global_error(1,error32); -+ if(stat32) -+ global_error(0,stat32); -+ } -+ -+ pci_read_config_dword(dev,E752X_NERR_GLOBAL,&stat32); -+ if(stat32) { /* Error, so process */ -+ /* take a snap shot of second errors */ -+ pci_read_config_byte(dev,E752X_HI_NERR,&hi_nerr); -+ pci_read_config_word(dev,E752X_SYSBUS_NERR,&sysbus_nerr); -+ pci_read_config_byte(dev,E752X_BUF_NERR,&buf_nerr); -+ pci_read_config_word(dev,E752X_DRAM_NERR,&dram_nerr); -+ pci_read_config_dword(dev,E752X_DRAM_CELOG2_ADD, -+ &celog2_add); -+ pci_read_config_word(dev,E752X_DRAM_CELOG2_SYNDROME, -+ &celog2_syndrome); -+ -+ pci_write_config_dword(dev,E752X_NERR_GLOBAL,stat32); -+ error32=(stat32>>18)&0x3ff; -+ stat32=(stat32>>4)&0x7ff; -+ if(error32) -+ global_error(1,error32); -+ if(stat32) -+ global_error(0,stat32); -+ } -+ -+// pci_read_config_byte(dev,E752X_HI_FERR,&stat8); -+ stat8=hi_ferr; -+ if(stat8&0x7f) { /* Error, so process */ -+ pci_write_config_dword(dev,E752X_HI_FERR,stat8); -+ stat8 &= 0x7f; -+ if(stat8&0x2b) -+ hub_error(1,(stat8&0x2b)); -+ if(stat8 & 0x54) -+ hub_error(0,(stat8&0x54)); -+ } -+// pci_read_config_byte(dev,E752X_HI_NERR,&stat8); -+ stat8=hi_nerr; -+ if(stat8&0x7f) { /* Error, so process */ -+ pci_write_config_dword(dev,E752X_HI_NERR,stat8); -+ stat8 &= 0x7f; -+ if(stat8&0x2b) -+ hub_error(1,(stat8&0x2b)); -+ if(stat8 & 0x54) -+ hub_error(0,(stat8&0x54)); -+ } -+// pci_read_config_dword(dev,E752X_SYSBUS_FERR,&stat32); -+ stat32 = sysbus_ferr + (sysbus_nerr <<16); -+ if(stat32) { /* Error, so process */ -+ pci_write_config_dword(dev,E752X_SYSBUS_FERR,stat32); -+ error32=(stat32>>16)&0x3ff; -+ stat32=stat32&0x3ff; -+ if(stat32 & 0x083) -+ sysbus_error(1,(stat32&0x083)); -+ if(stat32 & 0x37c) -+ sysbus_error(0,(stat32&0x37c)); -+ if(error32 & 0x083) -+ sysbus_error(1,(error32&0x083)); -+ if(error32 & 0x37c) -+ sysbus_error(0,(error32&0x37c)); -+ } -+// pci_read_config_byte(dev,E752X_BUF_FERR,&stat8); -+ stat8 = buf_ferr; -+ if(stat8&0x0f) { /* Error, so process */ -+ pci_write_config_dword(dev,E752X_BUF_FERR,stat8); -+ stat8 &= 0x0f; -+ membuf_error(stat8); -+ } -+// pci_read_config_byte(dev,E752X_BUF_NERR,&stat8); -+ stat8 = buf_nerr; -+ if(stat8&0x0f) { /* Error, so process */ -+ pci_write_config_dword(dev,E752X_BUF_NERR,stat8); -+ stat8 &= 0x0f; -+ membuf_error(stat8); -+ } -+ -+ -+ -+ -+// pci_read_config_word(pvt->bridge_ck,E752X_DRAM_FERR,&error_one); -+// pci_read_config_word(pvt->bridge_ck,E752X_DRAM_NERR,&error_next); -+ error_one = dram_ferr; -+ error_next = dram_nerr; -+ /* clear any error bits */ -+ if(error_one) { -+ pci_write_bits16(pvt->bridge_ck, E752X_DRAM_FERR, -+ error_one,error_one); -+ } -+ if(error_next) { -+ pci_write_bits16(pvt->bridge_ck, E752X_DRAM_NERR, -+ error_next, error_next); -+ } -+ -+ /* decode and report errors */ -+ if(error_one & 0x0101) { /* check first error correctable */ -+ process_ce(mci,error_one,celog1_add,celog1_syndrome); -+ } -+ if(error_next & 0x0101) { /* check next error correctable */ -+ process_ce(mci,error_next,celog2_add,celog2_syndrome); -+ } -+ if(error_one & 0x4040) { -+ process_ue_no_info_wr(mci); -+ } -+ if(error_next & 0x4040) { -+ process_ue_no_info_wr(mci); -+ } -+ if(error_one & 0x2020) { -+ process_ded_retry(mci,error_one,retry_add); -+ } -+ if(error_next & 0x2020) { -+ process_ded_retry(mci,error_next,retry_add); -+ } -+ if(error_one & 0x0808) { -+ process_threshold_ce(mci,error_one); -+ } -+ if(error_next & 0x0808) { -+ process_threshold_ce(mci,error_next); -+ } -+ if(error_one & 0x0606) { -+ process_ue(mci,error_one,uelog_add,uelogs_add); -+ } -+ if(error_next & 0x0606) { -+ process_ue(mci,error_next,uelog_add,uelogs_add); -+ } -+ -+ -+ } -+ /* Test for PCI Parity errors in the southbridge */ -+ if (pvt->dev_d0f0 != NULL) { -+ dev = pvt->dev_d0f0; -+ for(pres_dev = dev; -+ ((struct pci_dev*)pres_dev->global_list.next != dev); -+ pres_dev = (struct pci_dev*)pres_dev->global_list.next) { -+ pci_read_config_dword(pres_dev,PCI_COMMAND,&stat32); -+ stat = (u16)(stat32 >>16); -+ /* test for error any error bits */ -+ if(stat32 & ((1<<6)+(1<<8))) { /* error reporting dev */ -+ if(stat & ((1<<15)+(1<<14)+(1<<8))) { -+ pci_write_config_word(pres_dev,6,stat); -+ if(stat & (1<<14)) { -+ printk( KERN_WARNING -+ "System Error on %s %s\n", -+ pres_dev->slot_name, -+ pci_pretty_name(pres_dev)); -+ } -+ if(stat & ((1<<15)+(1<<8))) { -+ printk( KERN_WARNING -+ "Parity Error on %s %s\n", -+ pres_dev->slot_name, -+ pci_pretty_name(pres_dev)); -+ } -+ } -+ } -+ } -+ } -+} -+ -+ -+static int e752x_probe1( struct pci_dev *pdev, int dev_idx ) -+{ -+ int rc = -ENODEV; -+ int index; -+ u16 pci_data, stat; -+ u32 stat32; -+ u16 stat16; -+ u8 stat8; -+ struct mem_ctl_info *mci = NULL; -+ struct e752x_pvt *pvt = NULL; -+ u16 ddrcsr; -+ u32 drc; -+ int drc_chan; /* Number of channels 0=1chan,1=2chan */ -+ int drc_drbg; /* DRB granularity 0=32mb,1=64mb */ -+ int drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */ -+ u32 dra; -+ unsigned long last_cumul_size; -+ struct pci_dev *pres_dev; -+ struct pci_dev *dev; -+ -+ debugf0( "MC: " __FILE__ ": %s(): mci\n", __func__ ); -+ printk( KERN_ERR "Starting Probe1\n" ); -+ -+ /* enable device 0 function 1 */ -+ pci_read_config_byte(pdev, E752X_DEVPRES1, &stat8); -+ stat8 |= (1<<5); -+ pci_write_config_byte(pdev, E752X_DEVPRES1, stat8); -+ -+ /* need to find out the number of channels */ -+ pci_read_config_dword(pdev, E752X_DRC, &drc); -+ pci_read_config_word(pdev, E752X_DDRCSR, &ddrcsr); -+ if(((ddrcsr>>12)&3)==3) -+ drc_chan = 1; /* Dual channel */ -+ else -+ drc_chan = 0; /* Single channel */ -+ drc_drbg = drc_chan + 1; /* 128 in dual mode, 64 in single */ -+ drc_ddim = ( drc >> 20 ) & 0x3; -+ -+ mci = bluesmoke_mc_init_structs(sizeof(*pvt), -+ E752X_NR_CSROWS, -+ drc_chan + 1); -+ -+ if ( ! mci ) { -+ rc = -ENOMEM; -+ goto FAIL_FINISHED; -+ } -+ -+ debugf3( "MC: " __FILE__ ": %s(): init mci\n", __func__ ); -+ -+ mci->mtype_cap = MEM_FLAG_RDDR; -+ mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED | EDAC_FLAG_S4ECD4ED; -+ /* FIXME - what if different memory types are in different csrows? */ -+ mci->mod_name = BS_MOD_STR; -+ mci->mod_ver = "$Revision: 1.5 $"; -+ mci->pdev = pdev; -+ -+ debugf3( "MC: " __FILE__ ": %s(): init pvt\n", __func__ ); -+ pvt = (struct e752x_pvt *)mci->pvt_info; -+ pvt->dev_info = &e752x_devs[dev_idx]; -+ pvt->bridge_ck = pci_find_device( PCI_VENDOR_ID_INTEL, -+ pvt->dev_info->err_dev, -+ pvt->bridge_ck ); -+ if ( ! pvt->bridge_ck ) { -+ pvt->bridge_ck = pci_scan_single_device(pdev->bus, PCI_DEVFN(0,1)); -+ } -+ if ( ! pvt->bridge_ck ) { -+ printk( KERN_ERR -+ "MC: error reporting device not found:" -+ "vendor %x device 0x%x (broken BIOS?)\n", -+ PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].err_dev ); -+ goto FAIL_FINISHED; -+ } -+ if(ddrcsr & 0x10) { -+ pvt->mc_symmetric = 1; -+ } else { -+ pvt->mc_symmetric =0; -+ } -+ -+ debugf3( "MC: " __FILE__ ": %s(): more mci init\n", __func__ ); -+ mci->ctl_name = pvt->dev_info->ctl_name; -+ -+ mci->edac_check = e752x_check; -+ /* FIXME - why isn't clear_err set to something? */ -+ mci->clear_err = NULL; -+ mci->ctl_page_to_phys = ctl_page_to_phys; -+ -+ /* find out the device types */ -+ pci_read_config_dword(pdev, E752X_DRA, &dra); -+ -+ /* -+ * The dram row boundary (DRB) reg values are boundary address -+ * for each DRAM row with a granularity of 64 or 128MB (single/dual -+ * channel operation). DRB regs are cumulative; therefore DRB7 will -+ * contain the total memory contained in all eight rows. -+ */ -+ for( last_cumul_size = index = 0; index < mci->nr_csrows; index++ ) { -+ u8 value; -+ u32 cumul_size; -+ /* mem_dev 0=x8, 1=x4 */ -+ int mem_dev = ( dra >> ( index * 4 + 2 ) ) & 0x3; -+ struct csrow_info *csrow = &mci->csrows[ index ]; -+ -+ if(mem_dev == 2) -+ mem_dev = 1; -+ else -+ mem_dev = 0; -+ pci_read_config_byte(mci->pdev, E752X_DRB + index, &value); -+ /* convert a 128 or 64 MiB DRB to a page size. */ -+ cumul_size = value << (25 + drc_drbg - PAGE_SHIFT ); -+ debugf3( "MC: " __FILE__ ": %s(): (%d) cumul_size 0x%x\n", -+ __func__, index, cumul_size ); -+ if ( cumul_size == last_cumul_size ) { -+ continue; /* not populated */ -+ } -+ -+ csrow->first_page = last_cumul_size; -+ csrow->last_page = cumul_size - 1; -+ csrow->nr_pages = cumul_size - last_cumul_size; -+ last_cumul_size = cumul_size; -+ csrow->grain = 1 << 12; /* 4KiB - resolution of CELOG */ -+ csrow->mtype = MEM_RDDR; /* only one type supported */ -+ csrow->dtype = mem_dev ? DEV_X4 : DEV_X8; -+ -+ /* -+ * if single channel or x8 devices then SECDED -+ * if dual channel and x4 then S4ECD4ED -+ */ -+ if ( drc_ddim ) { -+ if ( drc_chan && mem_dev ) { -+ csrow->edac_mode = EDAC_S4ECD4ED; -+ mci->edac_cap |= EDAC_FLAG_S4ECD4ED; -+ } else { -+ csrow->edac_mode = EDAC_SECDED; -+ mci->edac_cap |= EDAC_FLAG_SECDED; -+ } -+ } else { -+ csrow->edac_mode = EDAC_NONE; -+ } -+ } -+ -+ /* Fill in the memory map table */ -+ { -+ u8 value; -+ u8 last=0; -+ u8 row=0; -+ for(index=0;index<8;index+=2) { -+ -+ pci_read_config_byte(mci->pdev, E752X_DRB + index, &value); -+ /* test if there is a dimm in this slot */ -+ if(value == last) { -+ /* no dimm in the slot, so flag it as empty */ -+ pvt->map[index]=0xff; -+ pvt->map[index+1]=0xff; -+ } -+ else { /* there is a dimm in the slot */ -+ pvt->map[index]=row; -+ row++; -+ last = value; -+ /* test the next value to see if the dimm is double sided */ -+ pci_read_config_byte(mci->pdev, E752X_DRB + index + 1, &value); -+ if(value == last) { -+ /* the dimm is single sided, so flag as empty */ -+ pvt->map[index+1]=0xff; -+ row++; -+ } -+ else { -+ /* this is a double sided dimm to save the next row # */ -+ pvt->map[index+1]=row; -+ row++; -+ } -+ last = value; -+ } -+ } -+ } -+ -+ /* set the map type. 1 = normal, 0 = reversed */ -+ pci_read_config_byte(mci->pdev, E752X_DRM, &stat8); -+ if((stat8&0x0f) > ((stat8>>4)&0x0f)) { -+ /* map type is normal */ -+ pvt->map_type = 1; -+ } -+ else { -+ /* map type is reversed */ -+ pvt->map_type = 0; -+ } -+ -+ mci->edac_cap |= EDAC_FLAG_NONE; -+ -+ debugf3( "MC: " __FILE__ ": %s(): tolm, remapbase, remaplimit\n", __func__ ); -+ /* load the top of low memory, remap base, and remap limit vars */ -+ pci_read_config_word(mci->pdev, E752X_TOLM, &pci_data); -+ pvt->tolm = ((u32)pci_data) << 4; -+ pci_read_config_word(mci->pdev, E752X_REMAPBASE, &pci_data); -+ pvt->remapbase = ((u32)pci_data) << 14; -+ pci_read_config_word(mci->pdev, E752X_REMAPLIMIT, &pci_data); -+ pvt->remaplimit = ((u32)pci_data) << 14; -+ printk( "tolm = %x, remapbase = %x, remaplimit = %x\n", -+ pvt->tolm, pvt->remapbase, pvt->remaplimit); -+ -+ if ( 0 != bluesmoke_mc_add_mc( mci ) ) { -+ debugf3( "MC: " __FILE__ ": %s(): failed bluesmoke_mc_add_mc()\n", __func__ ); -+ goto FAIL_FINISHED; -+ } -+ -+ /* Walk through the PCI table and clear errors */ -+ dev = pci_find_device( PCI_VENDOR_ID_INTEL, -+ PCI_DEVICE_ID_INTEL_7520_0, NULL ); -+ pvt->dev_d0f0 = dev; -+ for(pres_dev = dev; -+ ((struct pci_dev*)pres_dev->global_list.next != dev); -+ pres_dev = (struct pci_dev*)pres_dev->global_list.next) { -+ pci_read_config_dword(pres_dev,PCI_COMMAND,&stat32); -+ stat = (u16)(stat32 >>16); -+ /* clear any error bits */ -+ if(stat32 & ((1<<6)+(1<<8))) { -+ pci_write_config_word(pres_dev,PCI_STATUS,stat); -+ } -+ } -+ /* find the error reporting device and clear errors */ -+ dev = pvt->dev_d0f1 = pvt->bridge_ck; -+ /* Turn off error disable & SMI in case the BIOS turned it on */ -+ pci_write_config_byte(dev,E752X_HI_ERRMASK,0x00); -+ pci_write_config_byte(dev,E752X_HI_SMICMD,0x00); -+ pci_write_config_word(dev,E752X_SYSBUS_ERRMASK,0x00); -+ pci_write_config_word(dev,E752X_SYSBUS_SMICMD,0x00); -+ pci_write_config_byte(dev,E752X_BUF_ERRMASK,0x00); -+ pci_write_config_byte(dev,E752X_BUF_SMICMD,0x00); -+ pci_write_config_byte(dev,E752X_DRAM_ERRMASK,0x00); -+ pci_write_config_byte(dev,E752X_DRAM_SMICMD,0x00); -+ /* clear other MCH errors */ -+ pci_read_config_dword(dev,E752X_FERR_GLOBAL,&stat32); -+ pci_write_config_dword(dev,E752X_FERR_GLOBAL,stat32); -+ pci_read_config_dword(dev,E752X_NERR_GLOBAL,&stat32); -+ pci_write_config_dword(dev,E752X_NERR_GLOBAL,stat32); -+ pci_read_config_byte(dev,E752X_HI_FERR,&stat8); -+ pci_write_config_byte(dev,E752X_HI_FERR,stat8); -+ pci_read_config_byte(dev,E752X_HI_NERR,&stat8); -+ pci_write_config_byte(dev,E752X_HI_NERR,stat8); -+ pci_read_config_dword(dev,E752X_SYSBUS_FERR,&stat32); -+ pci_write_config_dword(dev,E752X_SYSBUS_FERR,stat32); -+ pci_read_config_byte(dev,E752X_BUF_FERR,&stat8); -+ pci_write_config_byte(dev,E752X_BUF_FERR,stat8); -+ pci_read_config_byte(dev,E752X_BUF_NERR,&stat8); -+ pci_write_config_byte(dev,E752X_BUF_NERR,stat8); -+ pci_read_config_word(dev, E752X_DRAM_FERR, &stat16); -+ pci_write_config_word(dev, E752X_DRAM_FERR, stat16); -+ pci_read_config_word(dev, E752X_DRAM_NERR, &stat16); -+ pci_write_config_word(dev, E752X_DRAM_NERR, stat16); -+ -+ /* get this far and it's successful */ -+ debugf3( "MC: " __FILE__ ": %s(): success\n", __func__ ); -+ rc = 0; -+ goto FINISHED; -+ -+ FAIL_FINISHED: -+ if ( mci ) { -+ kfree( mci ); -+ } -+ FINISHED: -+ return( rc ); -+} -+ -+ -+#ifdef CONFIG_PM -+ -+static int e752x_suspend (struct pci_dev *pdev, u32 state) -+{ -+ debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); -+ -+ return -ENOSYS; -+} -+ -+ -+static int e752x_resume (struct pci_dev *pdev) -+{ -+ debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); -+ -+ return -ENOSYS; -+} -+ -+#endif /* CONFIG_PM */ -+ -+ -+/* returns count (>= 0), or negative on error */ -+static int __devinit e752x_init_one( struct pci_dev *pdev, -+ const struct pci_device_id *ent ) -+{ -+ int rc; -+ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ /* wake up and enable device */ -+ if (pci_enable_device (pdev)) { -+ rc = -EIO; -+ } else { -+ rc = e752x_probe1( pdev, ent->driver_data ); -+ } -+ return rc; -+} -+ -+ -+static void __devexit e752x_remove_one( struct pci_dev *pdev ) -+{ -+ struct mem_ctl_info *mci; -+ -+ debugf0( __FILE__ ": %s()\n", __func__); -+ -+ if ( NULL == ( mci = bluesmoke_mc_find_mci_by_pdev( pdev ) ) ) { -+ goto FINISHED; -+ } -+ -+ if ( 0 != bluesmoke_mc_del_mc( mci ) ) { -+ goto FINISHED; -+ } -+ -+ kfree( mci ); -+ -+ FINISHED: -+ return; -+} -+ -+ -+static const struct pci_device_id e752x_pci_tbl[] __devinitdata = { -+ { PCI_VEND_DEV( INTEL, 7520_0 ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7520 }, -+ {0,} /* 0 terminated list. */ -+}; -+ -+MODULE_DEVICE_TABLE(pci, e752x_pci_tbl); -+ -+ -+static struct pci_driver e752x_driver = { -+ name: BS_MOD_STR, -+ probe: e752x_init_one, -+ remove: __devexit_p(e752x_remove_one), -+ id_table: e752x_pci_tbl, -+#ifdef CONFIG_PM -+ suspend: e752x_suspend, -+ resume: e752x_resume, -+#endif /* CONFIG_PM */ -+}; -+ -+ -+int __init e752x_init(void) -+{ -+ int pci_rc; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ pci_rc = pci_module_init( &e752x_driver ); -+ if ( pci_rc < 0 ) return pci_rc; -+ -+ return 0; -+} -+ -+ -+static void __exit e752x_exit(void) -+{ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ pci_unregister_driver( &e752x_driver ); -+} -+ -+ -+module_init(e752x_init); -+module_exit(e752x_exit); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Linux Networx (http://lnxi.com) Tom Zimmerman\n"); -+MODULE_DESCRIPTION("MC support for Intel e752x memory controllers"); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_e7xxx.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_e7xxx.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_e7xxx.c 2004-12-17 12:46:23.000000000 -0500 -@@ -0,0 +1,552 @@ -+/* -+ * Intel e7xxx Memory Controller kernel module -+ * (C) 2003 Linux Networx (http://lnxi.com) -+ * This file may be distributed under the terms of the -+ * GNU General Public License. -+ * -+ * See "enum e7xxx_chips" below for supported chipsets -+ * -+ * Written by Thayne Harbaugh -+ * Based on work by Dan Hollis <goemon at anime dot net> and others. -+ * http://www.anime.net/~goemon/linux-ecc/ -+ * -+ * Contributors: -+ * Eric Biederman (Linux Networx) -+ * Tom Zimmerman (Linux Networx) -+ * Jim Garlic (Lawrence Livermore National Labs) -+ * Dave Peterson (Lawrence Livermore National Labs) -+ * That One Guy (Some other place) -+ * -+ * $Id: bluesmoke_e7xxx.c,v 1.5 2004/11/18 22:19:46 thayne Exp $ -+ * -+ */ -+ -+ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/init.h> -+ -+#include <linux/pci.h> -+#include <linux/pci_ids.h> -+ -+#include <linux/slab.h> -+ -+#include "bluesmoke_mc.h" -+ -+ -+#ifndef PCI_DEVICE_ID_INTEL_7205_0 -+#define PCI_DEVICE_ID_INTEL_7205_0 0x255d -+#endif /* PCI_DEVICE_ID_INTEL_7205_0 */ -+ -+#ifndef PCI_DEVICE_ID_INTEL_7205_1_ERR -+#define PCI_DEVICE_ID_INTEL_7205_1_ERR 0x2551 -+#endif /* PCI_DEVICE_ID_INTEL_7205_1_ERR */ -+ -+#ifndef PCI_DEVICE_ID_INTEL_7500_0 -+#define PCI_DEVICE_ID_INTEL_7500_0 0x2540 -+#endif /* PCI_DEVICE_ID_INTEL_7500_0 */ -+ -+#ifndef PCI_DEVICE_ID_INTEL_7500_1_ERR -+#define PCI_DEVICE_ID_INTEL_7500_1_ERR 0x2541 -+#endif /* PCI_DEVICE_ID_INTEL_7500_1_ERR */ -+ -+#ifndef PCI_DEVICE_ID_INTEL_7501_0 -+#define PCI_DEVICE_ID_INTEL_7501_0 0x254c -+#endif /* PCI_DEVICE_ID_INTEL_7501_0 */ -+ -+#ifndef PCI_DEVICE_ID_INTEL_7501_1_ERR -+#define PCI_DEVICE_ID_INTEL_7501_1_ERR 0x2541 -+#endif /* PCI_DEVICE_ID_INTEL_7501_1_ERR */ -+ -+#ifndef PCI_DEVICE_ID_INTEL_7505_0 -+#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 -+#endif /* PCI_DEVICE_ID_INTEL_7505_0 */ -+ -+#ifndef PCI_DEVICE_ID_INTEL_7505_1_ERR -+#define PCI_DEVICE_ID_INTEL_7505_1_ERR 0x2551 -+#endif /* PCI_DEVICE_ID_INTEL_7505_1_ERR */ -+ -+ -+#define E7XXX_NR_CSROWS 8 /* number of csrows */ -+#define E7XXX_NR_DIMMS 8 /* FIXME - is this correct? */ -+ -+ -+/* E7XXX register addresses - device 0 function 0 */ -+#define E7XXX_DRB 0x60 /* DRAM row boundary register (8b) */ -+#define E7XXX_DRA 0x70 /* DRAM row attribute register (8b) */ -+ /* -+ * 31 Device width row 7 0=x8 1=x4 -+ * 27 Device width row 6 -+ * 23 Device width row 5 -+ * 19 Device width row 4 -+ * 15 Device width row 3 -+ * 11 Device width row 2 -+ * 7 Device width row 1 -+ * 3 Device width row 0 -+ */ -+#define E7XXX_DRC 0x7C /* DRAM controller mode reg (32b) */ -+ /* -+ * 22 Number channels 0=1,1=2 -+ * 19:18 DRB Granularity 32/64MB -+ */ -+#define E7XXX_TOLM 0xC4 /* DRAM top of low memory reg (16b) */ -+#define E7XXX_REMAPBASE 0xC6 /* DRAM remap base address reg (16b) */ -+#define E7XXX_REMAPLIMIT 0xC8 /* DRAM remap limit address reg (16b) */ -+ -+/* E7XXX register addresses - device 0 function 1 */ -+#define E7XXX_DRAM_FERR 0x80 /* DRAM first error register (8b) */ -+#define E7XXX_DRAM_NERR 0x82 /* DRAM next error register (8b) */ -+#define E7XXX_DRAM_CELOG_ADD 0xA0 /* DRAM first correctable memory */ -+ /* error address register (32b) */ -+ /* -+ * 31:28 Reserved -+ * 27:6 CE address (4k block 33:12) -+ * 5:0 Reserved -+ */ -+#define E7XXX_DRAM_UELOG_ADD 0xB0 /* DRAM first uncorrectable memory */ -+ /* error address register (32b) */ -+ /* -+ * 31:28 Reserved -+ * 27:6 CE address (4k block 33:12) -+ * 5:0 Reserved -+ */ -+#define E7XXX_DRAM_CELOG_SYNDROME 0xD0 /* DRAM first correctable memory */ -+ /* error syndrome register (16b) */ -+ -+enum e7xxx_chips { -+ E7500 = 0, -+ E7501, -+ E7505, -+ E7205, -+}; -+ -+ -+struct e7xxx_pvt { -+ struct pci_dev *bridge_ck; -+ u32 tolm; -+ u32 remapbase; -+ u32 remaplimit; -+ const struct e7xxx_dev_info *dev_info; -+}; -+ -+ -+struct e7xxx_dev_info { -+ u16 err_dev; -+ const char *ctl_name; -+}; -+ -+ -+static const struct e7xxx_dev_info e7xxx_devs[] = { -+ [E7500] = { -+ .err_dev = PCI_DEVICE_ID_INTEL_7500_1_ERR, -+ .ctl_name = "E7500" -+ }, -+ [E7501] = { -+ .err_dev = PCI_DEVICE_ID_INTEL_7501_1_ERR, -+ .ctl_name = "E7501" -+ }, -+ [E7505] = { -+ .err_dev = PCI_DEVICE_ID_INTEL_7505_1_ERR, -+ .ctl_name = "E7505" -+ }, -+ [E7205] = { -+ .err_dev = PCI_DEVICE_ID_INTEL_7205_1_ERR, -+ .ctl_name = "E7205" -+ }, -+}; -+ -+ -+/* FIXME - is this valid for both SECDED and S4ECD4ED? */ -+static inline int e7xxx_find_channel(u16 syndrome) -+{ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ if((syndrome & 0xff00)==0) -+ return(0); -+ if((syndrome & 0x00ff)==0) -+ return(1); -+ if((syndrome & 0xf000)==0) -+ return(0); -+ if((syndrome & 0x0f00)==0) -+ return(0); -+ return(1); -+} -+ -+ -+static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, -+ unsigned long page) -+{ -+ u32 remap; -+ struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ if(page < pvt->tolm) -+ return(page); -+ if((page >= 0x100000)&&(page < pvt->remapbase)) -+ return(page); -+ remap = (page - pvt->tolm) + pvt->remapbase; -+ if(remap < pvt->remaplimit) -+ return(remap); -+ printk(KERN_ERR "Invalid page %lx - out of range\n", page); -+ return(pvt->tolm-1); -+} -+ -+ -+static void process_ce(struct mem_ctl_info *mci) -+{ -+ u32 error_1b, page; -+ u16 syndrome; -+ int row; -+ int channel; -+ struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ /* read the error address */ -+ pci_read_config_dword(pvt->bridge_ck,E7XXX_DRAM_CELOG_ADD, -+ &error_1b); -+ /* FIXME - should use PAGE_SHIFT */ -+ page = error_1b >>6; /* convert the address to 4k page */ -+ /* read the syndrome */ -+ pci_read_config_word(pvt->bridge_ck,E7XXX_DRAM_CELOG_SYNDROME, -+ &syndrome); -+ /* FIXME - check for -1 */ -+ row = bluesmoke_mc_find_csrow_by_page( mci, page ); -+ channel = e7xxx_find_channel(syndrome); /* convert syndrome to channel */ -+ bluesmoke_mc_handle_ce( mci, page, 0, syndrome, -+ row, channel, "e7xxx CE" ); -+} -+ -+ -+static void process_ce_no_info(struct mem_ctl_info *mci) -+{ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ bluesmoke_mc_handle_ce_no_info( mci, "e7xxx CE log register overflow" ); -+} -+ -+ -+static void process_ue(struct mem_ctl_info *mci) -+{ -+ u32 error_2b, block_page; -+ int row; -+ struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ /* read the error address */ -+ pci_read_config_dword( pvt->bridge_ck, E7XXX_DRAM_UELOG_ADD, -+ &error_2b ); -+ /* FIXME - should use PAGE_SHIFT */ -+ block_page = error_2b >>6; /* convert to 4k address */ -+ row = bluesmoke_mc_find_csrow_by_page( mci, block_page ); -+ bluesmoke_mc_handle_ue( mci, block_page, 0, row, "e7xxx UE" ); -+} -+ -+ -+static void process_ue_no_info(struct mem_ctl_info *mci) -+{ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ bluesmoke_mc_handle_ue_no_info( mci, "e7xxx UE log register overflow" ); -+} -+ -+ -+static void e7xxx_check(struct mem_ctl_info *mci) -+{ -+ u8 error_one, error_next; -+ struct e7xxx_pvt *pvt = (struct e7xxx_pvt *)mci->pvt_info; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ pci_read_config_byte(pvt->bridge_ck,E7XXX_DRAM_FERR,&error_one); -+ pci_read_config_byte(pvt->bridge_ck,E7XXX_DRAM_NERR,&error_next); -+ -+ /* clear any error bits */ -+ if(error_one & 3) { -+ pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_FERR, 0x03, 0x03); -+ } -+ if(error_next & 3) { -+ pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_NERR, 0x03, 0x03); -+ } -+ -+ /* decode and report errors */ -+ if(error_one & 1) { /* check first error correctable */ -+ process_ce(mci); -+ if(error_next & 1) { /* check next error correctable */ -+ process_ce_no_info(mci); -+ } -+ if(error_next & 2) { /* check next error uncorrectable */ -+ process_ue(mci); -+ } -+ } else if(error_one & 2) { /* check first error uncorrectable */ -+ process_ue(mci); -+ if(error_next & 1) { /* check next error correctable */ -+ process_ce(mci); -+ } -+ if(error_next & 2) { /* check next error uncorrectable */ -+ process_ue_no_info(mci); -+ } -+ } -+} -+ -+ -+static int e7xxx_probe1( struct pci_dev *pdev, int dev_idx ) -+{ -+ int rc = -ENODEV; -+ int index; -+ u16 pci_data; -+ struct mem_ctl_info *mci = NULL; -+ struct e7xxx_pvt *pvt = NULL; -+ u32 drc; -+ int drc_chan; /* Number of channels 0=1chan,1=2chan */ -+ int drc_drbg; /* DRB granularity 0=32mb,1=64mb */ -+ int drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */ -+ u32 dra; -+ unsigned long last_cumul_size; -+ -+ -+ debugf0( "MC: " __FILE__ ": %s(): mci\n", __func__ ); -+ -+ /* need to find out the number of channels */ -+ pci_read_config_dword(pdev, E7XXX_DRC, &drc); -+ drc_chan = ( ( drc >> 22 ) & 0x1 ); -+ drc_drbg = ( drc >> 18 ) & 0x3; -+ drc_ddim = ( drc >> 20 ) & 0x3; -+ -+ mci = bluesmoke_mc_init_structs(sizeof(*pvt), -+ E7XXX_NR_CSROWS, -+ drc_chan + 1); -+ -+ if ( ! mci ) { -+ rc = -ENOMEM; -+ goto FAIL_FINISHED; -+ } -+ -+ debugf3( "MC: " __FILE__ ": %s(): init mci\n", __func__ ); -+ -+ mci->mtype_cap = MEM_FLAG_RDDR; -+ mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED | EDAC_FLAG_S4ECD4ED; -+ /* FIXME - what if different memory types are in different csrows? */ -+ mci->mod_name = BS_MOD_STR; -+ mci->mod_ver = "$Revision: 1.5 $"; -+ mci->pdev = pdev; -+ -+ debugf3( "MC: " __FILE__ ": %s(): init pvt\n", __func__ ); -+ pvt = (struct e7xxx_pvt *)mci->pvt_info; -+ pvt->dev_info = &e7xxx_devs[dev_idx]; -+ pvt->bridge_ck = pci_find_device( PCI_VENDOR_ID_INTEL, -+ pvt->dev_info->err_dev, -+ pvt->bridge_ck ); -+ if ( ! pvt->bridge_ck ) { -+ printk( KERN_ERR -+ "MC: error reporting device not found:" -+ "vendor %x device 0x%x (broken BIOS?)\n", -+ PCI_VENDOR_ID_INTEL, e7xxx_devs[dev_idx].err_dev ); -+ goto FAIL_FINISHED; -+ } -+ -+ debugf3( "MC: " __FILE__ ": %s(): more mci init\n", __func__ ); -+ mci->ctl_name = pvt->dev_info->ctl_name; -+ -+ mci->edac_check = e7xxx_check; -+ /* FIXME - why isn't clear_err set to something? */ -+ mci->clear_err = NULL; -+ mci->ctl_page_to_phys = ctl_page_to_phys; -+ -+ /* find out the device types */ -+ pci_read_config_dword(pdev, E7XXX_DRA, &dra); -+ -+ /* -+ * The dram row boundary (DRB) reg values are boundary address -+ * for each DRAM row with a granularity of 32 or 64MB (single/dual -+ * channel operation). DRB regs are cumulative; therefore DRB7 will -+ * contain the total memory contained in all eight rows. -+ */ -+ for( last_cumul_size = index = 0; index < mci->nr_csrows; index++ ) { -+ u8 value; -+ u32 cumul_size; -+ /* mem_dev 0=x8, 1=x4 */ -+ int mem_dev = ( dra >> ( index * 4 + 3 ) ) & 0x1; -+ struct csrow_info *csrow = &mci->csrows[ index ]; -+ -+ pci_read_config_byte(mci->pdev, E7XXX_DRB + index, &value); -+ /* convert a 64 or 32 MiB DRB to a page size. */ -+ cumul_size = value << (25 + drc_drbg - PAGE_SHIFT ); -+ debugf3( "MC: " __FILE__ ": %s(): (%d) cumul_size 0x%x\n", -+ __func__, index, cumul_size ); -+ if ( cumul_size == last_cumul_size ) { -+ continue; /* not populated */ -+ } -+ -+ csrow->first_page = last_cumul_size; -+ csrow->last_page = cumul_size - 1; -+ csrow->nr_pages = cumul_size - last_cumul_size; -+ last_cumul_size = cumul_size; -+ csrow->grain = 1 << 12; /* 4KiB - resolution of CELOG */ -+ csrow->mtype = MEM_RDDR; /* only one type supported */ -+ csrow->dtype = mem_dev ? DEV_X4 : DEV_X8; -+ -+ /* -+ * if single channel or x8 devices then SECDED -+ * if dual channel and x4 then S4ECD4ED -+ */ -+ if ( drc_ddim ) { -+ if ( drc_chan && mem_dev ) { -+ csrow->edac_mode = EDAC_S4ECD4ED; -+ mci->edac_cap |= EDAC_FLAG_S4ECD4ED; -+ } else { -+ csrow->edac_mode = EDAC_SECDED; -+ mci->edac_cap |= EDAC_FLAG_SECDED; -+ } -+ } else { -+ csrow->edac_mode = EDAC_NONE; -+ } -+ } -+ -+ mci->edac_cap |= EDAC_FLAG_NONE; -+ -+ debugf3( "MC: " __FILE__ ": %s(): tolm, remapbase, remaplimit\n", __func__ ); -+ /* load the top of low memory, remap base, and remap limit vars */ -+ pci_read_config_word(mci->pdev, E7XXX_TOLM, &pci_data); -+ pvt->tolm = ((u32)pci_data) << 4; -+ pci_read_config_word(mci->pdev, E7XXX_REMAPBASE, &pci_data); -+ pvt->remapbase = ((u32)pci_data) << 14; -+ pci_read_config_word(mci->pdev, E7XXX_REMAPLIMIT, &pci_data); -+ pvt->remaplimit = ((u32)pci_data) << 14; -+ printk( "tolm = %x, remapbase = %x, remaplimit = %x\n", -+ pvt->tolm, pvt->remapbase, pvt->remaplimit); -+ -+ /* clear any pending errors, or initial state bits */ -+ pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_FERR, 0x03, 0x03); -+ pci_write_bits8(pvt->bridge_ck, E7XXX_DRAM_NERR, 0x03, 0x03); -+ -+ if ( 0 != bluesmoke_mc_add_mc( mci ) ) { -+ debugf3( "MC: " __FILE__ ": %s(): failed bluesmoke_mc_add_mc()\n", __func__ ); -+ goto FAIL_FINISHED; -+ } -+ -+ /* get this far and it's successful */ -+ debugf3( "MC: " __FILE__ ": %s(): success\n", __func__ ); -+ rc = 0; -+ goto FINISHED; -+ -+ FAIL_FINISHED: -+ if ( mci ) { -+ kfree( mci ); -+ } -+ FINISHED: -+ return( rc ); -+} -+ -+ -+#ifdef CONFIG_PM -+ -+static int e7xxx_suspend (struct pci_dev *pdev, u32 state) -+{ -+ debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); -+ -+ return -ENOSYS; -+} -+ -+ -+static int e7xxx_resume (struct pci_dev *pdev) -+{ -+ debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); -+ -+ return -ENOSYS; -+} -+ -+#endif /* CONFIG_PM */ -+ -+ -+/* returns count (>= 0), or negative on error */ -+static int __devinit e7xxx_init_one( struct pci_dev *pdev, -+ const struct pci_device_id *ent ) -+{ -+ int rc; -+ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ /* wake up and enable device */ -+ if (pci_enable_device (pdev)) { -+ rc = -EIO; -+ } else { -+ rc = e7xxx_probe1( pdev, ent->driver_data ); -+ } -+ return rc; -+} -+ -+ -+static void __devexit e7xxx_remove_one( struct pci_dev *pdev ) -+{ -+ struct mem_ctl_info *mci; -+ -+ debugf0( __FILE__ ": %s()\n", __func__); -+ -+ if ( NULL == ( mci = bluesmoke_mc_find_mci_by_pdev( pdev ) ) ) { -+ goto FINISHED; -+ } -+ -+ if ( 0 != bluesmoke_mc_del_mc( mci ) ) { -+ goto FINISHED; -+ } -+ -+ kfree( mci ); -+ -+ FINISHED: -+ return; -+} -+ -+ -+static const struct pci_device_id e7xxx_pci_tbl[] __devinitdata = { -+ { PCI_VEND_DEV( INTEL, 7205_0 ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7205 }, -+ { PCI_VEND_DEV( INTEL, 7500_0 ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7500 }, -+ { PCI_VEND_DEV( INTEL, 7501_0 ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7501 }, -+ { PCI_VEND_DEV( INTEL, 7505_0 ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7505 }, -+ {0,} /* 0 terminated list. */ -+}; -+ -+MODULE_DEVICE_TABLE(pci, e7xxx_pci_tbl); -+ -+ -+static struct pci_driver e7xxx_driver = { -+ .name = BS_MOD_STR, -+ .probe = e7xxx_init_one, -+ .remove = __devexit_p(e7xxx_remove_one), -+ .id_table = e7xxx_pci_tbl, -+#ifdef CONFIG_PM -+ .suspend = e7xxx_suspend, -+ .resume = e7xxx_resume, -+#endif /* CONFIG_PM */ -+}; -+ -+ -+int __init e7xxx_init(void) -+{ -+ int pci_rc; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ pci_rc = pci_module_init( &e7xxx_driver ); -+ if ( pci_rc < 0 ) return pci_rc; -+ -+ return 0; -+} -+ -+ -+static void __exit e7xxx_exit(void) -+{ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ pci_unregister_driver( &e7xxx_driver ); -+} -+ -+ -+module_init(e7xxx_init); -+module_exit(e7xxx_exit); -+ -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n" -+ "Based on.work by Dan Hollis et al"); -+MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers"); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_i82875p.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_i82875p.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_i82875p.c 2004-12-17 12:46:23.000000000 -0500 -@@ -0,0 +1,510 @@ -+/* -+ * AMD 76x Memory Controller kernel module -+ * (C) 2003 Linux Networx (http://lnxi.com) -+ * This file may be distributed under the terms of the -+ * GNU General Public License. -+ * -+ * Written by Thayne Harbaugh -+ * -+ * $Id: bluesmoke_i82875p.c,v 1.5 2004/11/18 22:19:46 thayne Exp $ -+ * -+ */ -+ -+ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/init.h> -+ -+#include <linux/pci.h> -+#include <linux/pci_ids.h> -+ -+#include <linux/slab.h> -+ -+#include "bluesmoke_mc.h" -+ -+ -+#ifndef PCI_DEVICE_ID_INTEL_82875_0 -+#define PCI_DEVICE_ID_INTEL_82875_0 0x2578 -+#endif /* PCI_DEVICE_ID_INTEL_82875_0 */ -+ -+#ifndef PCI_DEVICE_ID_INTEL_82875_6 -+#define PCI_DEVICE_ID_INTEL_82875_6 0x257e -+#endif /* PCI_DEVICE_ID_INTEL_82875_6 */ -+ -+ -+/* four csrows in dual channel, eight in single channel */ -+#define I82875P_NR_CSROWS(nr_chans) (8/(nr_chans)) -+ -+ -+/* Intel 82875p register addresses - device 0 function 0 - DRAM Controller */ -+#define I82875P_EAP 0x58 /* Error Address Pointer (32b) -+ * -+ * 31:12 block address -+ * 11:0 reserved -+ */ -+ -+#define I82875P_DERRSYN 0x5c /* DRAM Error Syndrome (8b) -+ * -+ * 7:0 DRAM ECC Syndrome -+ */ -+ -+#define I82875P_DES 0x5d /* DRAM Error Status (8b) -+ * -+ * 7:1 reserved -+ * 0 Error channel 0/1 -+ */ -+ -+#define I82875P_ERRSTS 0xc8 /* Error Status Register (16b) -+ * -+ * 15:10 reserved -+ * 9 non-DRAM lock error (ndlock) -+ * 8 Sftwr Generated SMI -+ * 7 ECC UE -+ * 6 reserved -+ * 5 MCH detects unimplemented cycle -+ * 4 AGP access outside GA -+ * 3 Invalid AGP access -+ * 2 Invalid GA translation table -+ * 1 Unsupported AGP command -+ * 0 ECC CE -+ */ -+ -+#define I82875P_ERRCMD 0xca /* Error Command (16b) -+ * -+ * 15:10 reserved -+ * 9 SERR on non-DRAM lock -+ * 8 SERR on ECC UE -+ * 7 SERR on ECC CE -+ * 6 target abort on high exception -+ * 5 detect unimplemented cyc -+ * 4 AGP access outside of GA -+ * 3 SERR on invalid AGP access -+ * 2 invalid translation table -+ * 1 SERR on unsupported AGP command -+ * 0 reserved -+ */ -+ -+ -+/* Intel 82875p register addresses - device 6 function 0 - DRAM Controller */ -+#define I82875P_PCICMD6 0x04 /* PCI Command Register (16b) -+ * -+ * 15:10 reserved -+ * 9 fast back-to-back - ro 0 -+ * 8 SERR enable - ro 0 -+ * 7 addr/data stepping - ro 0 -+ * 6 parity err enable - ro 0 -+ * 5 VGA palette snoop - ro 0 -+ * 4 mem wr & invalidate - ro 0 -+ * 3 special cycle - ro 0 -+ * 2 bus master - ro 0 -+ * 1 mem access dev6 - 0(dis),1(en) -+ * 0 IO access dev3 - 0(dis),1(en) -+ */ -+ -+#define I82875P_BAR6 0x10 /* Mem Delays Base ADDR Reg (32b) -+ * -+ * 31:12 mem base addr [31:12] -+ * 11:4 address mask - ro 0 -+ * 3 prefetchable - ro 0(non),1(pre) -+ * 2:1 mem type - ro 0 -+ * 0 mem space - ro 0 -+ */ -+ -+/* Intel 82875p MMIO register space - device 0 function 0 - MMR space */ -+ -+#define I82875P_DRB_SHIFT 26 /* 64MiB grain */ -+#define I82875P_DRB 0x00 /* DRAM Row Boundary (8b x 8) -+ * -+ * 7 reserved -+ * 6:0 64MiB row boundary addr -+ */ -+ -+#define I82875P_DRA 0x10 /* DRAM Row Attribute (4b x 8) -+ * -+ * 7 reserved -+ * 6:4 row attr row 1 -+ * 3 reserved -+ * 2:0 row attr row 0 -+ * -+ * 000 = 4KiB -+ * 001 = 8KiB -+ * 010 = 16KiB -+ * 011 = 32KiB -+ */ -+ -+#define I82875P_DRC 0x68 /* DRAM Controller Mode (32b) -+ * -+ * 31:30 reserved -+ * 29 init complete -+ * 28:23 reserved -+ * 22:21 nr chan 00=1,01=2 -+ * 20 reserved -+ * 19:18 Data Integ Mode 00=none,01=ecc -+ * 17:11 reserved -+ * 10:8 refresh mode -+ * 7 reserved -+ * 6:4 mode select -+ * 3:2 reserved -+ * 1:0 DRAM type 01=DDR -+ */ -+ -+ -+enum i82875p_chips { -+ I82875P = 0, -+}; -+ -+ -+struct i82875p_pvt { -+ struct pci_dev *ovrfl_pdev; -+ void *ovrfl_window; -+}; -+ -+ -+struct i82875p_dev_info { -+ const char *ctl_name; -+}; -+ -+ -+static const struct i82875p_dev_info i82875p_devs[] = { -+ [I82875P] = { -+ .ctl_name = "i828875p" -+ }, -+}; -+ -+ -+static void i82875p_check(struct mem_ctl_info *mci) -+{ -+ u8 des; -+ u8 derrsyn; -+ u16 errsts, errsts2; -+ u32 eap; -+ int row; -+ int multi_chan = mci->csrows[0].nr_channels - 1; -+ -+ debugf1( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); -+ -+ /* -+ * This is a mess because there is no atomic way to read all -+ * the registers at once and the registers can transition -+ * from CE being overwritten by UE. -+ */ -+ pci_read_config_word( mci->pdev, I82875P_ERRSTS, &errsts ); -+ pci_read_config_dword( mci->pdev, I82875P_EAP, &eap ); -+ pci_read_config_byte( mci->pdev, I82875P_DES, &des ); -+ pci_read_config_byte( mci->pdev, I82875P_DERRSYN, &derrsyn ); -+ pci_read_config_word( mci->pdev, I82875P_ERRSTS, &errsts2 ); -+ -+ pci_write_bits16( mci->pdev, I82875P_ERRSTS, 0x0081, 0x0081 ); -+ -+ /* -+ * If the error is the same then we can for both reads then -+ * the first set of reads is valid. If there is a change then -+ * there is a CE no info and the second set of reads is valid -+ * and should be UE info. -+ */ -+ if (! (errsts2 & 0x0081) ) return; -+ if ( (errsts ^ errsts2) & 0x0081 ) { -+ bluesmoke_mc_handle_ce_no_info( mci, "UE overwrote CE" ); -+ errsts = errsts2; -+ pci_read_config_dword( mci->pdev, I82875P_EAP, &eap ); -+ pci_read_config_byte( mci->pdev, I82875P_DES, &des ); -+ pci_read_config_byte( mci->pdev, I82875P_DERRSYN, &derrsyn ); -+ } -+ -+ eap >>= PAGE_SHIFT; -+ row = bluesmoke_mc_find_csrow_by_page( mci, eap ); -+ -+ if ( errsts & 0x0080 ) { -+ bluesmoke_mc_handle_ue( mci, eap, 0, row, "i82875p UE" ); -+ } else { -+ bluesmoke_mc_handle_ce( mci, eap, 0, derrsyn, row, -+ multi_chan ? (des & 0x1) : 0, -+ "i82875p UE" ); -+ } -+ -+ return; -+} -+ -+ -+static int i82875p_probe1( struct pci_dev *pdev, int dev_idx ) -+{ -+ int rc = -ENODEV; -+ int index; -+ struct mem_ctl_info *mci = NULL; -+ struct i82875p_pvt *pvt = NULL; -+ unsigned long last_cumul_size; -+ struct pci_dev *ovrfl_pdev; -+ void *ovrfl_window = NULL; -+ -+ u32 drc; -+ u32 drc_chan; /* Number of channels 0=1chan,1=2chan */ -+ u32 nr_chans; -+ u32 drc_ddim; /* DRAM Data Integrity Mode 0=none,2=edac */ -+ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ ovrfl_pdev = pci_find_device( PCI_VEND_DEV( INTEL, 82875_6 ), NULL ); -+ -+ if ( ! ovrfl_pdev ) { -+ /* -+ * Intel tells BIOS developers to hide device 6 which -+ * configures the overflow device access containing -+ * the DRBs - this is where we expose device 6. -+ * http://www.x86-secret.com/articles/tweak/pat/patsecrets-2.htm -+ */ -+ pci_write_bits8( pdev, 0xf4, 0x2, 0x2 ); -+ ovrfl_pdev = pci_scan_single_device( pdev->bus, PCI_DEVFN( 6, 0 ) ); -+ if ( ! ovrfl_pdev ) { -+ goto FAIL_FINISHED; -+ } -+ } -+ -+#ifdef CONFIG_PROC_FS -+ if ( !ovrfl_pdev->procent && pci_proc_attach_device(ovrfl_pdev)) { -+ printk( KERN_ERR "MC: " __FILE__ -+ ": %s(): Failed to attach overflow device\n", -+ __func__ ); -+ goto FAIL_FINISHED; -+ } -+#endif /* CONFIG_PROC_FS */ -+ if (pci_enable_device(ovrfl_pdev)) { -+ printk( KERN_ERR "MC: " __FILE__ -+ ": %s(): Failed to enable overflow device\n", -+ __func__ ); -+ goto FAIL_FINISHED; -+ } -+ if (pci_request_regions(ovrfl_pdev, pci_name(ovrfl_pdev))) { -+ printk( KERN_ERR "MC: " __FILE__ -+ ": %s(): Failed to reserve regions - broken BIOS?\n", -+ __func__ ); -+#ifdef CORRECT_BIOS -+ goto FAIL_FINISHED; -+#endif /* CORRECT_BIOS */ -+ } -+ -+ /* cache is irrelevant for PCI bus reads/writes */ -+ ovrfl_window = ioremap_nocache(pci_resource_start(ovrfl_pdev, 0), -+ pci_resource_len(ovrfl_pdev, 0)); -+ -+ if (!ovrfl_window) { -+ printk( KERN_ERR "MC: " __FILE__ -+ ": %s(): Failed to ioremap bar6\n", -+ __func__ ); -+ goto FAIL_FINISHED; -+ } -+ -+ /* need to find out the number of channels */ -+ drc = readl(ovrfl_window + I82875P_DRC); -+ drc_chan = ( ( drc >> 21 ) & 0x1 ); -+ nr_chans = drc_chan + 1; -+ drc_ddim = ( drc >> 18 ) & 0x1; -+ -+ mci = bluesmoke_mc_init_structs(sizeof(*pvt), -+ I82875P_NR_CSROWS(nr_chans), -+ nr_chans ); -+ -+ if ( ! mci ) { -+ rc = -ENOMEM; -+ goto FAIL_FINISHED; -+ } -+ -+ debugf3( "MC: " __FILE__ ": %s(): init mci\n", __func__ ); -+ -+ mci->pdev = pdev; -+ mci->mtype_cap = MEM_FLAG_RDDR; -+ -+ mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; -+ mci->edac_cap = EDAC_FLAG_UNKNOWN; -+ /* adjust FLAGS */ -+ -+ mci->mod_name = BS_MOD_STR; -+ mci->mod_ver = "$Revision: 1.5 $"; -+ mci->ctl_name = i82875p_devs[dev_idx].ctl_name; -+ mci->edac_check = i82875p_check; -+ mci->clear_err = NULL; -+ mci->ctl_page_to_phys = NULL; -+ -+ debugf3( "MC: " __FILE__ ": %s(): init pvt\n", __func__ ); -+ -+ pvt = (struct i82875p_pvt *)mci->pvt_info; -+ pvt->ovrfl_pdev = ovrfl_pdev; -+ pvt->ovrfl_window = ovrfl_window; -+ -+ /* -+ * The dram row boundary (DRB) reg values are boundary address -+ * for each DRAM row with a granularity of 32 or 64MB (single/dual -+ * channel operation). DRB regs are cumulative; therefore DRB7 will -+ * contain the total memory contained in all eight rows. -+ */ -+ for( last_cumul_size = index = 0; index < mci->nr_csrows; index++ ) { -+ u8 value; -+ u32 cumul_size; -+ struct csrow_info *csrow = &mci->csrows[ index ]; -+ -+ value = readb(ovrfl_window + I82875P_DRB + index); -+ cumul_size = value << ( I82875P_DRB_SHIFT - PAGE_SHIFT ); -+ debugf3( "MC: " __FILE__ ": %s(): (%d) cumul_size 0x%x\n", -+ __func__, index, cumul_size ); -+ if ( cumul_size == last_cumul_size ) { -+ continue; /* not populated */ -+ } -+ -+ csrow->first_page = last_cumul_size; -+ csrow->last_page = cumul_size - 1; -+ csrow->nr_pages = cumul_size - last_cumul_size; -+ last_cumul_size = cumul_size; -+ csrow->grain = 1 << 12; /* I82875P_EAP has 4KiB reolution */ -+ csrow->mtype = MEM_DDR; -+ csrow->dtype = DEV_UNKNOWN; -+ csrow->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE; -+ } -+ -+ /* clear counters */ -+ pci_write_bits16( mci->pdev, I82875P_ERRSTS, 0x0081, 0x0081 ); -+ -+ if ( 0 != bluesmoke_mc_add_mc( mci ) ) { -+ debugf3( "MC: " __FILE__ -+ ": %s(): failed bluesmoke_mc_add_mc()\n", __func__ ); -+ goto FAIL_FINISHED; -+ } -+ -+ /* get this far and it's successful */ -+ debugf3( "MC: " __FILE__ ": %s(): success\n", __func__ ); -+ rc = 0; -+ goto FINISHED; -+ -+ FAIL_FINISHED: -+ if ( mci ) { -+ kfree( mci ); -+ } -+ -+ if (ovrfl_window) { -+ iounmap(ovrfl_window); -+ } -+ -+ if (ovrfl_pdev) { -+ pci_release_regions( ovrfl_pdev ); -+ pci_disable_device( ovrfl_pdev ); -+ } -+ -+ FINISHED: -+ /* NOTE: the ovrfl proc entry and pci_dev are intentionally left */ -+ return( rc ); -+} -+ -+ -+#ifdef CONFIG_PM -+ -+static int i82875p_suspend (struct pci_dev *pdev, u32 state) -+{ -+ debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); -+ -+ return -ENOSYS; -+} -+ -+ -+static int i82875p_resume (struct pci_dev *pdev) -+{ -+ debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); -+ -+ return -ENOSYS; -+} -+ -+#endif /* CONFIG_PM */ -+ -+ -+/* returns count (>= 0), or negative on error */ -+static int __devinit i82875p_init_one( struct pci_dev *pdev, -+ const struct pci_device_id *ent ) -+{ -+ int rc; -+ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ if (pci_enable_device (pdev)) { -+ rc = -EIO; -+ } else { -+ rc = i82875p_probe1( pdev, ent->driver_data ); -+ } -+ return rc; -+} -+ -+ -+static void __devexit i82875p_remove_one( struct pci_dev *pdev ) -+{ -+ struct mem_ctl_info *mci; -+ struct i82875p_pvt *pvt = NULL; -+ -+ debugf0( __FILE__ ": %s()\n", __func__); -+ -+ if ( NULL == ( mci = bluesmoke_mc_find_mci_by_pdev( pdev ) ) ) { -+ goto FINISHED; -+ } -+ -+ pvt = (struct i82875p_pvt *)mci->pvt_info; -+ if (pvt->ovrfl_window) { -+ iounmap(pvt->ovrfl_window); -+ } -+ -+ if (pvt->ovrfl_pdev) { -+ pci_release_regions( pvt->ovrfl_pdev ); -+ pci_disable_device( pvt->ovrfl_pdev ); -+ } -+ -+ if ( 0 != bluesmoke_mc_del_mc( mci ) ) { -+ goto FINISHED; -+ } -+ -+ kfree( mci ); -+ -+ FINISHED: -+ return; -+} -+ -+ -+static const struct pci_device_id i82875p_pci_tbl[] __devinitdata = { -+ { PCI_VEND_DEV( INTEL, 82875_0 ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82875P }, -+ {0,} /* 0 terminated list. */ -+}; -+ -+MODULE_DEVICE_TABLE(pci, i82875p_pci_tbl); -+ -+ -+static struct pci_driver i82875p_driver = { -+ .name = BS_MOD_STR, -+ .probe = i82875p_init_one, -+ .remove = __devexit_p(i82875p_remove_one), -+ .id_table = i82875p_pci_tbl, -+#ifdef CONFIG_PM -+ .suspend = i82875p_suspend, -+ .resume = i82875p_resume, -+#endif /* CONFIG_PM */ -+}; -+ -+ -+int __init i82875p_init(void) -+{ -+ int pci_rc; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ pci_rc = pci_module_init( &i82875p_driver ); -+ if ( pci_rc < 0 ) return pci_rc; -+ -+ return 0; -+} -+ -+ -+static void __exit i82875p_exit(void) -+{ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ pci_unregister_driver( &i82875p_driver ); -+} -+ -+ -+module_init(i82875p_init); -+module_exit(i82875p_exit); -+ -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh"); -+MODULE_DESCRIPTION("MC support for Intel 82875 memory hub controllers"); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_k8.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_k8.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_k8.c 2004-12-17 12:46:23.000000000 -0500 -@@ -0,0 +1,1252 @@ -+/* -+ * AMD K8 class Memory Controller kernel module -+ * (C) 2003 Linux Networx (http://lnxi.com) -+ * This file may be distributed under the terms of the -+ * GNU General Public License. -+ * -+ * Written by Thayne Harbaugh -+ * -+ * $Id: bluesmoke_k8.c,v 1.6 2004/11/23 01:34:25 thayne Exp $ -+ * -+ */ -+ -+ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/init.h> -+ -+#include <linux/pci.h> -+#include <linux/pci_ids.h> -+ -+#include <linux/slab.h> -+ -+#include "bluesmoke_mc.h" -+ -+ -+#ifndef PCI_DEVICE_ID_AMD_OPT_0_HT -+#define PCI_DEVICE_ID_AMD_OPT_0_HT 0x1100 -+#endif /* PCI_DEVICE_ID_AMD_OPT_0_HT */ -+ -+#ifndef PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP -+#define PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP 0x1101 -+#endif /* PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP */ -+ -+#ifndef PCI_DEVICE_ID_AMD_OPT_2_MEMCTL -+#define PCI_DEVICE_ID_AMD_OPT_2_MEMCTL 0x1102 -+#endif /* PCI_DEVICE_ID_AMD_OPT_2_MEMCTL */ -+ -+#ifndef PCI_DEVICE_ID_AMD_OPT_3_MISCCTL -+#define PCI_DEVICE_ID_AMD_OPT_3_MISCCTL 0x1103 -+#endif /* PCI_DEVICE_ID_AMD_OPT_3_MISCCTL */ -+ -+ -+#define K8_NR_CSROWS 8 -+ -+ -+/* K8 register addresses - device 0 function 1 - Address Map */ -+#define K8_DBR 0x40 /* DRAM Base Register (8 x 32b -+ * interlaced with K8_DLR) -+ * -+ * 31:16 DRAM Base addr 39:24 -+ * 15:11 reserved -+ * 10:8 interleave enable -+ * 7:2 reserved -+ * 1 write enable -+ * 0 read enable -+ */ -+#define K8_DLR 0x44 /* DRAM Limit Register (8 x 32b -+ * interlaced with K8_DBR) -+ * -+ * 31:16 DRAM Limit addr 32:24 -+ * 15:11 reserved -+ * 10:8 interleave select -+ * 7:3 reserved -+ * 2:0 destination node ID -+ */ -+ -+ -+/* K8 register addresses - device 0 function 2 - DRAM controller */ -+#define K8_DCSB 0x40 /* DRAM Chip-Select Base (8 x 32b) -+ * -+ * 31:21 Base addr high 35:25 -+ * 20:16 reserved -+ * 15:9 Base addr low 19:13 (interlvd) -+ * 8:1 reserved -+ * 0 chip-select bank enable -+ */ -+#define K8_DCSM 0x60 /* DRAM Chip-Select Mask (8 x 32b) -+ * -+ * 31:30 reserved -+ * 29:21 addr mask high 33:25 -+ * 20:16 reserved -+ * 15:9 addr mask low 19:13 -+ * 8:0 reserved -+ */ -+ -+#define K8_DBAM 0x80 /* DRAM Base Addr Mapping (32b) */ -+#define K8_DCL 0x90 /* DRAM configuration low reg (32b) -+ * -+ * 31:28 reserved -+ * 27:25 Bypass Max: 000b=respect -+ * 24 Dissable receivers - no sockets -+ * 23:20 x4 DIMMS -+ * 19 32byte chunks -+ * 18 Unbuffered -+ * 17 ECC enabled -+ * 16 128/64 bit (dual/single chan) -+ * 15:14 R/W Queue bypass count -+ * 13 Self refresh -+ * 12 exit self refresh -+ * 11 mem clear status -+ * 10 DRAM enable -+ * 9 reserved -+ * 8 DRAM init -+ * 7:4 reserved -+ * 3 dis DQS hysteresis -+ * 2 QFC enabled -+ * 1 DRAM drive strength -+ * 0 Digital Locked Loop disable -+ */ -+ -+ -+/* K8 register addresses - device 0 function 3 - Misc Control */ -+#define K8_NBCTL 0x40 /* MCA NB Control (32b) -+ * -+ * 1 MCA UE Reporting -+ * 0 MCA CE Reporting -+ */ -+#define K8_NBCFG 0x44 /* MCA NB Config (32b) -+ * -+ * 23 Chip-kill x4 ECC enable -+ * 22 ECC enable -+ * 1 CPU ECC enable -+ */ -+#define K8_NBSL 0x48 /* MCA NB Status Low (32b) -+ * -+ * 31:24 Syndrome 15:8 chip-kill x4 -+ * 23:20 reserved -+ * 19:16 Extended err code -+ * 15:0 Err code -+ */ -+#define K8_NBSH 0x4C /* MCA NB Status High (32b) -+ * -+ * 31 Err valid -+ * 30 Err overflow -+ * 29 Uncorrected err -+ * 28 Err enable -+ * 27 Misc err reg valid -+ * 26 Err addr valid -+ * 25 proc context corrupt -+ * 24:23 reserved -+ * 22:15 Syndrome 7:0 -+ * 14 CE -+ * 13 UE -+ * 12:9 reserved -+ * 8 err found by scrubber -+ * 7 reserved -+ * 6:4 Hyper-transport link number -+ * 3:2 reserved -+ * 1 Err CPU 1 -+ * 0 Err CPU 0 -+ */ -+#define K8_NBEAL 0x50 /* MCA NB err addr low (32b) -+ * -+ * 31:3 Err addr low 31:3 -+ * 2:0 reserved -+ */ -+#define K8_NBEAH 0x54 /* MCA NB err addr high (32b) -+ * -+ * 31:8 reserved -+ * 7:0 Err addr high 39:32 -+ */ -+#define K8_NBCAP 0xE8 /* MCA NB capabilities (32b) -+ * -+ * 31:9 reserved -+ * 4 S4ECD4ED capable -+ * 3 SECDED capable -+ */ -+ -+ -+ /* MSR's */ -+ /* -+ * K8_MSR_MCxCTL (64b) -+ * (0x400,404,408,40C,410) -+ * 63 Enable reporting source 63 -+ * . -+ * . -+ * . -+ * 2 Enable error source 2 -+ * 1 Enable error source 1 -+ * 0 Enable error source 0 -+ */ -+ /* -+ * K8_MSR_MCxSTAT (64b) -+ * (0x401,405,409,40D,411) -+ * 63 Error valid -+ * 62 Status overflow -+ * 61 UE -+ * 60 Enabled error condition -+ * 59 Misc register valid (not used) -+ * 58 Err addr register valid -+ * 57 Processor context corrupt -+ * 56:32 Other information -+ * 31:16 Model specific error code -+ * 15:0 MCA err code -+ */ -+ /* -+ * K8_MSR_MCxADDR (64b) -+ * (0x402,406,40A,40E,412) -+ * 63:48 reserved -+ * 47:0 Address -+ */ -+ /* -+ * K8_MSR_MCxMISC (64b) -+ * (0x403,407,40B,40F,413) -+ * Unused on Athlon64 and K8 -+ */ -+ -+#define K8_MSR_MCGCTL 0x017b /* Machine Chk Global report ctl (64b) -+ * -+ * 31:5 reserved -+ * 4 North Bridge -+ * 3 Load/Store -+ * 2 Bus Unit -+ * 1 Instruction Cache -+ * 0 Data Cache -+ */ -+#define K8_MSR_MC4CTL 0x0410 /* North Bridge Check report ctl (64b) */ -+#define K8_MSR_MC4STAT 0x0411 /* North Bridge status (64b) */ -+#define K8_MSR_MC4ADDR 0x0412 /* North Bridge Address (64b) */ -+ -+ -+#define MCI2NID(mci) (PCI_SLOT(mci->pdev->devfn) - 0x18) -+ -+ -+enum k8_chips { -+ OPTERON = 0, -+}; -+ -+ -+struct k8_pvt { -+ struct pci_dev *addr_map; -+ struct pci_dev *misc_ctl; -+}; -+ -+ -+struct k8_dev_info { -+ const char *ctl_name; -+ u16 addr_map; -+ u16 misc_ctl; -+}; -+ -+ -+static const struct k8_dev_info k8_devs[] = { -+ [OPTERON] = { -+ .ctl_name = "Athlon64/Opteron", -+ .addr_map = PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP, -+ .misc_ctl = PCI_DEVICE_ID_AMD_OPT_3_MISCCTL -+ }, -+}; -+ -+ -+static inline void pci_find_related_function( unsigned int vendor, -+ unsigned int device, -+ struct pci_dev **from, -+ struct pci_dev *related ) -+{ -+ do { -+ *from = pci_find_device( vendor, device, *from ); -+ if ( ! *from ) return; -+ -+ if ( ((*from)->bus->number == related->bus->number) -+ && (PCI_SLOT((*from)->devfn) -+ == PCI_SLOT(related->devfn)) ) { -+ return; -+ } -+ } while ( 1 ); -+} -+ -+ -+/* FIXME - stolen from msr.c - the calls in msr.c could be exported */ -+#ifdef CONFIG_SMP -+ -+struct msr_command { -+ int cpu; -+ int err; -+ u32 reg; -+ u32 data[2]; -+}; -+ -+ -+static void msr_smp_wrmsr(void *cmd_block) -+{ -+ struct msr_command *cmd = (struct msr_command *) cmd_block; -+ -+ debugf1( "MC: " __FILE__ ": %s(): %d ? %d\n", -+ __func__, cmd->cpu, smp_processor_id() ); -+ -+ if ( cmd->cpu == smp_processor_id() ) { -+ debugf1( "MC: " __FILE__ ": %s(): Matched %d\n", -+ __func__, cmd->cpu ); -+ wrmsr(cmd->reg, cmd->data[0], cmd->data[1]); -+ } -+} -+ -+ -+static void msr_smp_rdmsr(void *cmd_block) -+{ -+ struct msr_command *cmd = (struct msr_command *) cmd_block; -+ -+ debugf1( "MC: " __FILE__ ": %s(): %d ? %d\n", -+ __func__, cmd->cpu, smp_processor_id() ); -+ -+ if ( cmd->cpu == smp_processor_id() ) { -+ debugf1( "MC: " __FILE__ ": %s(): Matched %d\n", -+ __func__, cmd->cpu ); -+ rdmsr(cmd->reg, cmd->data[0], cmd->data[1]); -+ } -+} -+ -+ -+static inline void do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) -+{ -+ struct msr_command cmd; -+ -+ debugf0( "MC: " __FILE__ ": %s(): %d\n", __func__, cpu ); -+ -+ if ( cpu == smp_processor_id() ) { -+ wrmsr(reg, eax, edx); -+ } else { -+ cmd.cpu = cpu; -+ cmd.reg = reg; -+ cmd.data[0] = eax; -+ cmd.data[1] = edx; -+ -+ smp_call_function(msr_smp_wrmsr, &cmd, 1, 1); -+ } -+} -+ -+ -+static inline void do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx) -+{ -+ struct msr_command cmd; -+ -+ debugf0( "MC: " __FILE__ ": %s(): %d\n", __func__, cpu ); -+ -+ if ( cpu == smp_processor_id() ) { -+ rdmsr(reg, eax, edx); -+ } else { -+ cmd.cpu = cpu; -+ cmd.reg = reg; -+ -+ smp_call_function(msr_smp_rdmsr, &cmd, 1, 1); -+ -+ *eax = cmd.data[0]; -+ *edx = cmd.data[1]; -+ } -+} -+ -+#else /* ! CONFIG_SMP */ -+ -+static inline void do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) -+{ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ wrmsr(reg, eax, edx); -+} -+ -+ -+static inline void do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx) -+{ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ rdmsr(reg, eax, edx); -+} -+ -+#endif /* ! CONFIG_SMP */ -+ -+ -+/* -+ * FIXME - This is a large chunk of memory to suck up just to decode the -+ * syndrome. It would be nice to discover a patter in the syndromes that -+ * could be used to quickly identify the channel. The big problems with -+ * this table is memory usage, lookup speed (could sort and binary search), -+ * correctness (there could be a transcription error). A zero in any nibble -+ * for a syndrom is always channel 0, but that only decodes some of the -+ * syndromes. Can anyone find any other patterns? -+ */ -+/* -+ * The comment in the left column is the nibble that is in error. The least -+ * significant nibble of the syndrome is the mask for the bits that are -+ * in error (need to be toggled) for the particular nibble. -+ */ -+#define SYNDROME_TABLE_SIZE 270 -+static const unsigned long syndromes_chan0[SYNDROME_TABLE_SIZE] = { -+ /*0*/ 0xe821, 0x7c32, 0x9413, 0xbb44, 0x5365, 0xc776, 0x2f57, 0xdd88, 0x35a9, 0xa1ba, 0x499b, 0x66cc, 0x8eed, 0x1afe, 0xf2df, -+ /*1*/ 0x5d31, 0xa612, 0xfb23, 0x9584, 0xc8b5, 0x3396, 0x6ea7, 0xeac8, 0xb7f9, 0x4cda, 0x11eb, 0x7f4c, 0x227d, 0xd95e, 0x846f, -+ /*2*/ 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, -+ /*3*/ 0x2021, 0x3032, 0x1013, 0x4044, 0x6065, 0x7076, 0x5057, 0x8088, 0xa0a9, 0xb0ba, 0x909b, 0xc0cc, 0xe0ed, 0xf0fe, 0xd0df, -+ /*4*/ 0x5041, 0xa082, 0xf0c3, 0x9054, 0xc015, 0x30d6, 0x6097, 0xe0a8, 0xb0e9, 0x402a, 0x106b, 0x70fc, 0x20bd, 0xd07e, 0x803f, -+ /*5*/ 0xbe21, 0xd732, 0x6913, 0x2144, 0x9f65, 0xf676, 0x4857, 0x3288, 0x8ca9, 0xe5ba, 0x5b9b, 0x13cc, 0xaded, 0xc4fe, 0x7adf, -+ /*6*/ 0x4951, 0x8ea2, 0xc7f3, 0x5394, 0x1ac5, 0xdd36, 0x9467, 0xa1e8, 0xe8b9, 0x2f4a, 0x661b, 0xf27c, 0xbb2d, 0x7cde, 0x358f, -+ /*7*/ 0x74e1, 0x9872, 0xec93, 0xd6b4, 0xa255, 0x4ec6, 0x3a27, 0x6bd8, 0x1f39, 0xf3aa, 0x874b, 0xbd6c, 0xc98d, 0x251e, 0x51ff, -+ /*8*/ 0x15c1, 0x2a42, 0x3f83, 0xcef4, 0xdb35, 0xe4b6, 0xf177, 0x4758, 0x5299, 0x6d1a, 0x78db, 0x89ac, 0x9c6d, 0xa3ee, 0xb62f, -+ /*9*/ 0x3d01, 0x1602, 0x2b03, 0x8504, 0xb805, 0x9306, 0xae07, 0xca08, 0xf709, 0xdc0a, 0xe10b, 0x4f0c, 0x720d, 0x590e, 0x640f, -+ /*a*/ 0x9801, 0xec02, 0x7403, 0x6b04, 0xf305, 0x8706, 0x1f07, 0xbd08, 0x2509, 0x510a, 0xc90b, 0xd60c, 0x4e0d, 0x3a0e, 0xa20f, -+ /*b*/ 0xd131, 0x6212, 0xb323, 0x3884, 0xe9b5, 0x5a96, 0x8ba7, 0x1cc8, 0xcdf9, 0x7eda, 0xafeb, 0x244c, 0xf57d, 0x465e, 0x976f, -+ /*c*/ 0xe1d1, 0x7262, 0x93b3, 0xb834, 0x59e5, 0xca56, 0x2b87, 0xdc18, 0x3dc9, 0xae7a, 0x4fab, 0x542c, 0x85fd, 0x164e, 0xf79f, -+ /*d*/ 0x6051, 0xb0a2, 0xd0f3, 0x1094, 0x70c5, 0xa036, 0xc067, 0x20e8, 0x40b9, 0x904a, 0x601b, 0x307c, 0x502d, 0x80de, 0xe08f, -+ /*e*/ 0xa4c1, 0xf842, 0x5c83, 0xe6f4, 0x4235, 0x1eb6, 0xba77, 0x7b58, 0xdf99, 0x831a, 0x27db, 0x9dac, 0x396d, 0x65ee, 0xc12f, -+ /*f*/ 0x11c1, 0x2242, 0x3383, 0xc8f4, 0xd935, 0xeab6, 0xfb77, 0x4c58, 0x5d99, 0x6e1a, 0x7fdb, 0x84ac, 0x9562, 0xa6ee, 0xb72f, -+ -+ /*20*/ 0xbe01, 0xd702, 0x6903, 0x2104, 0x9f05, 0xf606, 0x4807, 0x3208, 0x8c09, 0xe50a, 0x5b0b, 0x130c, 0xad0d, 0xc40e, 0x7a0f, -+ /*21*/ 0x4101, 0x8202, 0xc303, 0x5804, 0x1905, 0xda06, 0x9b07, 0xac08, 0xed09, 0x2e0a, 0x6f0b, 0x640c, 0xb50d, 0x760e, 0x370f -+}; -+ -+static const unsigned long syndromes_chan1[SYNDROME_TABLE_SIZE] = { -+ /*10*/ 0x45d1, 0x8a62, 0xcfb3, 0x5e34, 0x1be5, 0xd456, 0x9187, 0xa718, 0xe2c9, 0x2d7a, 0x68ab, 0xf92c, 0xbcfd, 0x734e, 0x369f, -+ /*11*/ 0x63e1, 0xb172, 0xd293, 0x14b4, 0x7755, 0xa5c6, 0xc627, 0x28d8, 0x4b39, 0x99aa, 0xfa4b, 0x3c6c, 0x5f8d, 0x8d1e, 0xeeff, -+ /*12*/ 0xb741, 0xd982, 0x6ec3, 0x2254, 0x9515, 0xfbd6, 0x4c97, 0x33a8, 0x84e9, 0xea2a, 0x5d6b, 0x11fc, 0xa6bd, 0xc87e, 0x7f3f, -+ /*13*/ 0xdd41, 0x6682, 0xbbc3, 0x3554, 0xe815, 0x53d6, 0xce97, 0x1aa8, 0xc7e9, 0x7c2a, 0xa1fb, 0x2ffc, 0xf2bd, 0x497e, 0x943f, -+ /*14*/ 0x2bd1, 0x3d62, 0x16b3, 0x4f34, 0x64e5, 0x7256, 0x5987, 0x8518, 0xaec9, 0xb87a, 0x93ab, 0xca2c, 0xe1fd, 0xf74e, 0xdc9f, -+ /*15*/ 0x83c1, 0xc142, 0x4283, 0xa4f4, 0x2735, 0x65b6, 0xe677, 0xf858, 0x7b99, 0x391a, 0xbadb, 0x5cac, 0xdf6d, 0x9dee, 0x1e2f, -+ /*16*/ 0x8fd1, 0xc562, 0x4ab3, 0xa934, 0x26e5, 0x6c56, 0xe387, 0xfe18, 0x71c9, 0x3b7a, 0xb4ab, 0x572c, 0xd8fd, 0x924e, 0x1d9f, -+ /*17*/ 0x4791, 0x89e2, 0xce73, 0x5264, 0x15f5, 0xdb86, 0x9c17, 0xa3b8, 0xe429, 0x2a5a, 0x6dcb, 0xf1dc, 0xb64d, 0x783e, 0x3faf, -+ /*18*/ 0x5781, 0xa9c2, 0xfe43, 0x92a4, 0xc525, 0x3b66, 0x6ce7, 0xe3f8, 0xb479, 0x4a3a, 0x1dbb, 0x715c, 0x26dd, 0xd89e, 0x8f1f, -+ /*19*/ 0xbf41, 0xd582, 0x6ac3, 0x2954, 0x9615, 0xfcd6, 0x4397, 0x3ea8, 0x81e9, 0xeb2a, 0x546b, 0x17fc, 0xa8bd, 0xc27e, 0x7d3f, -+ /*1a*/ 0x9891, 0xe1e2, 0x7273, 0x6464, 0xf7f5, 0x8586, 0x1617, 0xb8b8, 0x2b29, 0x595a, 0xcacb, 0xdcdc, 0x4f4d, 0x3d3e, 0xaeaf, -+ /*1b*/ 0xcce1, 0x4472, 0x8893, 0xfdb4, 0x3f55, 0xb9c6, 0x7527, 0x56d8, 0x9a39, 0x12aa, 0xde4b, 0xab6c, 0x678d, 0xef1e, 0x23ff, -+ /*1c*/ 0xa761, 0xf9b2, 0x5ed3, 0xe214, 0x4575, 0x1ba6, 0xbcc7, 0x7328, 0xd449, 0x8a9a, 0x2dfb, 0x913c, 0x365d, 0x688e, 0xcfef, -+ /*1d*/ 0xff61, 0x55b2, 0xaad3, 0x7914, 0x8675, 0x2ca6, 0xd3c7, 0x9e28, 0x6149, 0xcb9a, 0x34fb, 0xe73c, 0x185d, 0xb28e, 0x4def, -+ /*1e*/ 0x5451, 0xa8a2, 0xfcf3, 0x9694, 0xc2c5, 0x3e36, 0x6a67, 0xebe8, 0xbfb9, 0x434a, 0x171b, 0x7d7c, 0x292d, 0xd5de, 0x818f, -+ /*1f*/ 0x6fc1, 0xb542, 0xda83, 0x19f4, 0x7635, 0xacb6, 0xc377, 0x2e58, 0x4199, 0x9b1a, 0xf4db, 0x37ac, 0x586d, 0x82ee, 0xed2f, -+ -+ /*22*/ 0xc441, 0x4882, 0x8cc3, 0xf654, 0x3215, 0xbed6, 0x7a97, 0x5ba8, 0x9fe9, 0x132a, 0xd76b, 0xadfc, 0x69bd, 0xe57e, 0x213f, -+ /*23*/ 0x7621, 0x9b32, 0xed13, 0xda44, 0xac65, 0x4176, 0x3757, 0x6f88, 0x19a9, 0xf4ba, 0x829b, 0xb5cc, 0xc3ed, 0x2efe, 0x58df -+}; -+ -+ -+/* -+ * FIXME - either the above table is borken or something is incorrect with -+ * the way the syndrome is read out of the NB. -+ */ -+static int chan_from_syndrome( unsigned long syndrome ) -+{ -+ int i; -+ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ for ( i = 0; i < SYNDROME_TABLE_SIZE; i++ ) { -+ if ( syndromes_chan0[i] == syndrome ) return 0; -+ if ( syndromes_chan1[i] == syndrome ) return 1; -+ } -+ -+ debugf0( "MC: " __FILE__ ": %s(): syndrome(%lx) not found\n", -+ __func__, syndrome ); -+ return -1; -+} -+ -+ -+static const char *tt_msgs[] = { /* transaction type */ -+ "inst", -+ "data", -+ "generic", -+ "reserved" -+}; -+ -+ -+static const char *ll_msgs[] = { /* cache level */ -+ "0", -+ "1", -+ "2", -+ "generic" -+}; -+ -+ -+static const char *memtt_msgs[] = { -+ "generic", -+ "generic read", -+ "generic write", -+ "data read", -+ "data write", -+ "inst fetch", -+ "prefetch", -+ "evict", -+ "snoop", -+ "unknown error 9", -+ "unknown error 10", -+ "unknown error 11", -+ "unknown error 12", -+ "unknown error 13", -+ "unknown error 14", -+ "unknown error 15" -+}; -+ -+ -+static const char *pp_msgs[] = { /* participating processor */ -+ "local node origin", -+ "local node response", -+ "local node observed", -+ "generic" -+}; -+ -+ -+static const char *to_msgs[] = { -+ "no timeout", -+ "timed out" -+}; -+ -+ -+static const char *ii_msgs[] = { /* memory or i/o */ -+ "mem access", -+ "reserved", -+ "i/o access", -+ "generic" -+}; -+ -+ -+static const char *ext_msgs[] = { /* extended error */ -+ "ECC error", -+ "CRC error", -+ "sync error", -+ "mst abort", -+ "tgt abort", -+ "GART error", -+ "RMW error", -+ "watchdog error", -+ "ECC chipkill x4 error", -+ "unknown error 9", -+ "unknown error 10", -+ "unknown error 11", -+ "unknown error 12", -+ "unknown error 13", -+ "unknown error 14", -+ "unknown error 15" -+}; -+ -+ -+static const char *htlink_msgs[] = { -+ "none", -+ "1", -+ "2", -+ "1 2", -+ "3", -+ "1 3", -+ "2 3", -+ "1 2 3" -+}; -+ -+ -+static inline void decode_gart_tlb_error( struct mem_ctl_info *mci, -+ u32 nbeah, u32 nbeal, -+ u32 nbsh, u32 nbsl, -+ u32 nbcfg ) -+{ -+ u32 err_code; -+ u32 ec_tt; /* error code transaction type (2b) */ -+ u32 ec_ll; /* error code cache level (2b) */ -+ -+ debugf0( "MC%d: " __FILE__ ": %s(): FIXME\n", mci->mc_idx, __func__ ); -+ -+ err_code = nbsl & 0xffffUL; -+ ec_tt = ( err_code >> 2 ) & 0x03UL; -+ ec_ll = ( err_code >> 0 ) & 0x03UL; -+ -+ printk( "BS%d: GART TLB errorr:" -+ " transaction type(%s)," -+ " cache level(%s)\n", -+ mci->mc_idx, -+ tt_msgs[ec_tt], -+ ll_msgs[ec_ll] ); -+} -+ -+ -+static inline void decode_cache_error( struct mem_ctl_info *mci, -+ u32 nbeah, u32 nbeal, -+ u32 nbsh, u32 nbsl, -+ u32 nbcfg ) -+{ -+ u32 err_code; -+ u32 ec_rrrr; /* error code memory transaction (4b) */ -+ u32 ec_tt; /* error code transaction type (2b) */ -+ u32 ec_ll; /* error code cache level (2b) */ -+ -+ debugf0( "MC%d: " __FILE__ ": %s(): FIXME\n", mci->mc_idx, __func__ ); -+ -+ err_code = nbsl & 0xffffUL; -+ ec_rrrr = ( err_code >> 4 ) & 0x0fUL; -+ ec_tt = ( err_code >> 2 ) & 0x03UL; -+ ec_ll = ( err_code >> 0 ) & 0x03UL; -+ -+ printk( "BS%d: cache heirarchy error:" -+ " memory transaction type(%s)," -+ " transaction type(%s)," -+ " cache level(%s)\n", -+ mci->mc_idx, -+ memtt_msgs[ ec_rrrr ], -+ tt_msgs[ ec_tt ], -+ ll_msgs[ ec_ll ] ); -+} -+ -+ -+static inline void decode_bus_error( struct mem_ctl_info *mci, -+ u32 nbeah, u32 nbeal, -+ u32 nbsh, u32 nbsl, -+ u32 nbcfg ) -+{ -+ u32 page, offset; -+ u32 err_code, ext_ec; -+ int row = 0; -+ u32 ec_pp; /* error code participating processor (2p) */ -+ u32 ec_to; /* error code timed out (1b) */ -+ u32 ec_rrrr; /* error code memory transaction (4b) */ -+ u32 ec_ii; /* error code memory or I/O (2b) */ -+ u32 ec_ll; /* error code cache level (2b) */ -+ char msg[1024] = ""; -+ u32 msg_idx = 0; -+ -+ debugf0( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); -+ -+ msg_idx = snprintf( msg, 1024, "%s", BS_MOD_STR ); -+ -+ err_code = nbsl & 0xffffUL; -+ ec_pp = ( err_code >> 9 ) & 0x03UL; -+ ec_to = ( err_code >> 8 ) & 0x01UL; -+ ec_rrrr = ( err_code >> 4 ) & 0x0fUL; -+ ec_ii = ( err_code >> 2 ) & 0x03UL; -+ ec_ll = ( err_code >> 0 ) & 0x03UL; -+ -+ ext_ec = ( nbsl >> 16 ) & 0xfUL; -+ -+ /* FIXME - these should report through bluesmoke channels */ -+ -+ printk( "BS%d: general bus error:" -+ " participating processor(%s)," -+ " time-out(%s)," -+ " memory transaction type(%s)," -+ " mem or i/o(%s)," -+ " cache level(%s)\n", -+ mci->mc_idx, -+ pp_msgs[ ec_pp ], -+ to_msgs[ ec_to ], -+ memtt_msgs[ ec_rrrr ], -+ ii_msgs[ ec_ii ], -+ ll_msgs[ ec_ll ] ); -+ -+ /* FIXME - other errors should have other error handling mechanisms. */ -+ if ( ( 0 != ext_ec ) && ( 0x8 != ext_ec ) ) { -+ printk( "BS%d: no special error handling for this error\n", -+ mci->mc_idx ); -+ return; -+ } -+ -+ if ( ec_pp & 0x02 ) { -+ /* We aren't the node involved */ -+ return; -+ } -+ -+ offset = nbeal & ~PAGE_MASK & ~0x7UL; -+ page = ( ( nbeah & 0xff ) << ( 40 - PAGE_SHIFT ) ) -+ | ( ( nbeal & PAGE_MASK ) >> PAGE_SHIFT ); -+ -+ /* process any errors */ -+ if ( nbsh & BIT(14) ) { /* CE */ -+ unsigned long syndrome; -+ int chan = 0; -+ -+ syndrome = ( nbsh >> 15 ) & 0x00ffUL; /* bits 7:0 */ -+ if ( nbcfg & BIT(23) ) { -+ syndrome |= ( nbsl >> 16 ) & 0xff00UL; /* bits 15:8 */ -+ chan = chan_from_syndrome( syndrome ); -+ } -+ -+ if ( 0 > chan ) { -+ /* -+ * If the syndrome couldn't be found then -+ * the race condition for error reporting -+ * registers likely occurred. There's alot -+ * more in doubt than just the channel. -+ * Might as well just log the error without -+ * any info. -+ */ -+ msg_idx += snprintf( &msg[ msg_idx ], 1024 - msg_idx, -+ " unknown syndrome 0x%lx - " -+ " possible error reporting race", -+ syndrome ); -+ bluesmoke_mc_handle_ce_no_info( mci, msg ); -+ } else if ( nbsh & BIT(26) ) { /* valid address? */ -+ row = bluesmoke_mc_find_csrow_by_page( mci, page ); -+ if ( -1 == row ) { -+ bluesmoke_mc_handle_ce_no_info( mci, msg ); -+ } else { -+ bluesmoke_mc_handle_ce( mci, page, offset, -+ syndrome, row, chan, -+ msg ); -+ } -+ } else { -+ bluesmoke_mc_handle_ce_no_info( mci, msg ); -+ } -+ } else if ( nbsh & BIT(13) ) { /* UE */ -+ if ( nbsh & BIT(26) ) { /* valid address? */ -+ row = bluesmoke_mc_find_csrow_by_page( mci, page ); -+ if ( -1 == row ) { -+ bluesmoke_mc_handle_ue_no_info( mci, msg ); -+ } else { -+ bluesmoke_mc_handle_ue( mci, page, offset, -+ row, msg ); -+ } -+ } else { -+ bluesmoke_mc_handle_ue_no_info( mci, msg ); -+ } -+ } -+ -+ if ( nbsh & BIT(30) ) { -+ /* -+ * If main error is CE then overflow must be CE. -+ * If main error is UE then overflow is unknown. -+ * We'll call the overflow a CE - if panic_on_ue -+ * is set then we're already panic'ed and won't -+ * arrive here. If panic_on_ue is not set then -+ * apparently someone doesn't think that -+ * UE's are catastrophic. -+ */ -+ bluesmoke_mc_handle_ce_no_info( mci, BS_MOD_STR ); -+ } -+} -+ -+ -+static void k8_check(struct mem_ctl_info *mci) -+{ -+ struct k8_pvt *pvt = (struct k8_pvt *)mci->pvt_info; -+ u32 nbsl1, nbsh1, nbeal1, nbeah1, nbcfg1; -+ u32 nbsl2, nbsh2, nbeal2, nbeah2, nbcfg2; -+ u32 err_code; -+ u32 ext_ec; -+ -+ debugf1( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); -+ -+ /* check for an error */ -+ pci_read_config_dword(pvt->misc_ctl, K8_NBSH, &nbsh1); -+ if ( ! (nbsh1 & BIT(31) ) ) { /* err valid? */ -+ return; -+ } -+ -+ /* might as well slurp in everything at once */ -+ pci_read_config_dword(pvt->misc_ctl, K8_NBSL, &nbsl1); -+ pci_read_config_dword(pvt->misc_ctl, K8_NBEAL, &nbeal1); -+ pci_read_config_dword(pvt->misc_ctl, K8_NBEAH, &nbeah1); -+ pci_read_config_dword(pvt->misc_ctl, K8_NBCFG, &nbcfg1); -+ debugf1( KERN_WARNING -+ "NorthBridge ERROR: mci(0x%p) node(%d) nbeah(0x%.8x)" -+ " nbeal(0x%.8x) nbsh(0x%.8x) nbsl(0x%.8x): ", -+ mci, MCI2NID(mci), nbeah1, nbeal1, nbsh1, nbsl1 ); -+ -+ /* -+ * Here's the problem with the K8's EDAC reporting: -+ * There are four registers which report pieces of error -+ * information. These four registers are shared between -+ * CEs and UEs. Furthermore, contrary to what is stated in -+ * the OBKG, the overflow bit is never used! Every error -+ * always updates the reporting registers. -+ * -+ * Can you see the race condition? All four error reporting -+ * registers must be read before a new error updates them! -+ * There is no way to read all four registers atomically. The -+ * best than can be done is to detect that a race has occured -+ * and then report the error without any kind of precision. -+ * -+ * What is still positive is that errors are -+ * still reported and thus problems can still be detected - -+ * just not localized because the syndrome and address are -+ * spread out across registers. -+ * -+ * Grrrrr!!!!! Here's hoping that AMD fixes this in some -+ * future K8 rev. UEs and CEs should have separate -+ * register sets with proper overflow bits that are used! -+ * At very least the problem can be fixed by honoring the -+ * ErrValid bit in nbsh and not updating registers - just -+ * set the overflow bit - unless the current error is CE -+ * and the new error is UE which would be the only situation -+ * for overwriting the current values. -+ */ -+ pci_read_config_dword(pvt->misc_ctl, K8_NBSH, &nbsh2); -+ pci_read_config_dword(pvt->misc_ctl, K8_NBSL, &nbsl2); -+ pci_read_config_dword(pvt->misc_ctl, K8_NBEAL, &nbeal2); -+ pci_read_config_dword(pvt->misc_ctl, K8_NBEAH, &nbeah2); -+ pci_read_config_dword(pvt->misc_ctl, K8_NBCFG, &nbcfg2); -+ debugf1( KERN_WARNING -+ "NorthBridge ERROR2: mci(0x%p) node(%d) nbeah2(0x%.8x)" -+ " nbeal2(0x%.8x) nbsh2(0x%.8x) nbsl2(0x%.8x): ", -+ mci, MCI2NID(mci), nbeah2, nbeal2, nbsh2, nbsl2 ); -+ -+ /* clear the error */ -+ pci_write_bits32( pvt->misc_ctl, K8_NBSH, 0, BIT(31) ); -+ -+ if ( ( nbsh1 != nbsh2 ) -+ || ( nbsl1 != nbsl2 ) -+ || ( nbeah1 != nbeah2 ) -+ || ( nbeal1 != nbeal2 ) ) { -+ printk( KERN_WARNING "MC%d: race condition detected!\n", -+ mci->mc_idx ); -+ } -+ -+ err_code = nbsl2 & 0xffffUL; -+ ext_ec = (nbsl2 >> 16) & 0x0fUL; -+ -+ /* Use info from the second read - most current */ -+ if ( 0x0010UL == ( err_code & 0xfff0UL ) ) { -+ debugf1( "GART TLB error\n" ); -+ decode_gart_tlb_error( mci, nbeah2, nbeal2, nbsh2, nbsl2, nbcfg2 ); -+ } else if ( 0x0100UL == ( err_code & 0xff00UL ) ) { -+ debugf1( "Cache error\n" ); -+ decode_cache_error( mci, nbeah2, nbeal2, nbsh2, nbsl2, nbcfg2 ); -+ } else if ( 0x0800UL == ( err_code & 0xf800UL ) ) { -+ debugf1( "Bus error\n" ); -+ decode_bus_error( mci, nbeah2, nbeal2, nbsh2, nbsl2, nbcfg2 ); -+ } else { -+ /* shouldn't reach here! */ -+ printk( KERN_WARNING "MC%d: " __FILE__ -+ ": %s(): unknown MCE error 0x%x\n", -+ mci->mc_idx, __func__, err_code ); -+ } -+ -+ printk( "BS%d: extended error code: %s\n", -+ mci->mc_idx, -+ ext_msgs[ ext_ec ] ); -+ -+ if ( ((ext_ec >=1 && ext_ec <= 4) || (ext_ec == 6)) -+ && ((nbsh2 >> 4) & 0x03UL) ) { -+ /* need to decode which hypertransport link had the error */ -+ u32 htln = (nbsh2 >> 4) & 0x03UL; -+ printk( "BS%d: Error on hypertransport link: %s\n", -+ mci->mc_idx, htlink_msgs[ htln ] ); -+ } -+ -+ /* -+ * If the processor context is corrupt or the error is -+ * uncorrectable then panic - why would you want to continue -+ * with something seriosly broken? -+ */ -+ if ( nbsh2 & ( BIT(29) | BIT(25) ) ) { -+ if ( nbsh2 & BIT(29) ) -+ printk( "BS%d: uncorrected error\n", mci->mc_idx ); -+ -+ if ( nbsh2 & BIT(25) ) -+ printk( "BS%d: processor context corrupt\n", -+ mci->mc_idx ); -+ -+ panic( "BS%d: cannot recover\n", mci->mc_idx ); -+ }; -+} -+ -+ -+static int k8_probe1( struct pci_dev *pdev, int dev_idx ) -+{ -+ int rc = -ENODEV; -+ int index; -+ struct mem_ctl_info *mci = NULL; -+ struct k8_pvt *pvt = NULL; -+ int nid; -+ u32 dram_pg_base = 0; -+ u32 dram_pg_limit = 0; -+ u32 dcl; -+ u32 dcl_chans; -+ u32 dcl_unbuf; -+ u32 dcl_x4; -+ u32 dcl_eccen; -+ u32 dbam; -+ u32 nbcfg; -+ u32 nbcfg_ckx4en; -+ u32 nbcfg_eccen; -+ u32 nbcap; -+ u32 nbcap_ckx4; -+ u32 nbcap_ecc; -+ u32 csrows_loaded = 0; -+ u32 mcgctl_l, mcgctl_h; -+ u32 mc4ctl_l, mc4ctl_h; -+ const struct k8_dev_info *k8_dev = &k8_devs[dev_idx]; -+ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ pci_read_config_dword(pdev, K8_DCL, &dcl); -+ dcl_chans = ( dcl >> 16 ) & 0x1; -+ dcl_unbuf = ( dcl >> 18 ) & 0x1; -+ dcl_x4 = ( dcl >> 20 ) & 0xf; -+ dcl_eccen = ( dcl >> 17 ) & 0x1; -+ pci_read_config_dword(pdev, K8_DBAM, &dbam); -+ -+ mci = bluesmoke_mc_init_structs(sizeof(*pvt), -+ K8_NR_CSROWS, -+ dcl_chans + 1); -+ -+ if ( ! mci ) { -+ rc = -ENOMEM; -+ goto FAIL_FINISHED; -+ } -+ -+ debugf0( "MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci ); -+ -+ pvt = (struct k8_pvt *)mci->pvt_info; -+ -+ mci->pdev = pdev; -+ nid = MCI2NID(mci); -+ -+ /* setup private structure */ -+ /* -+ * The address mapping device provides a table that indicates -+ * which physical address ranges are owned by which node. -+ * Each node's memory controller has memory controller addresses -+ * that begin at 0x0. Locally, the memory controller address -+ * must be added to the mapping device address to convert to -+ * physical address. -+ */ -+ pci_find_related_function( PCI_VENDOR_ID_AMD, -+ k8_dev->addr_map, -+ &pvt->addr_map, -+ mci->pdev ); -+ -+ if ( ! pvt->addr_map ) { -+ printk( KERN_ERR -+ "MC: error address map device not found:" -+ "vendor %x device 0x%x (broken BIOS?)\n", -+ PCI_VENDOR_ID_AMD, -+ k8_dev->addr_map ); -+ goto FAIL_FINISHED; -+ } -+ -+ debugf1( "Addr Map device PCI Bus ID:\t%s\n", pvt->addr_map->name ); -+ -+ /* -+ * Sift through address mapper DRAM table - the documentation isn't -+ * explicit, but it is believed to be an error if there are multiple -+ * entries for the same node. -+ */ -+ for ( index = 0; index < 8; index++ ) { -+ u32 dbr; -+ u32 dbr_base = 0; -+ u32 dbr_inten; -+ u32 dbr_wen; -+ u32 dbr_ren; -+ u32 dlr; -+ u32 dlr_limit = 0; -+ u32 dlr_intsel; -+ u32 dlr_nid; -+ -+ pci_read_config_dword( pvt->addr_map, -+ K8_DLR + (8 * index), -+ &dlr ); -+ -+ dlr_nid = dlr & 0x7; -+ -+ if ( dlr_nid != nid ) continue; -+ -+ /* -+ * dlr_limit has all the low-order bits 1 while dbr_base -+ * has all the low-order bits 0. Here we do some bit -+ * jockeying to set all the low-order bits of dlr_limit. -+ */ -+ dlr_limit = ((((dlr >> 16) & 0xffff) + 1) -+ << (24 - PAGE_SHIFT)) - 1; -+ dlr_intsel = (dlr >> 8) & 0x1f; -+ -+ pci_read_config_dword( pvt->addr_map, -+ K8_DBR + (8 * index), -+ &dbr ); -+ -+ dbr_base = ((dbr >> 16) & 0xffff) << (24 - PAGE_SHIFT); -+ dbr_inten = (dbr >> 8) & 0x7; -+ dbr_wen = (dbr >> 1) & 0x1; -+ dbr_ren = dbr & 0x1; -+ -+ debugf1( "\tAddr Map: %d:0x%x - 0x%x\n", -+ dlr_nid, dbr_base, dlr_limit ); -+ -+ if ( dram_pg_limit ) { -+ printk( KERN_ERR -+ "MC: multiple entries for node %d found" -+ " in Address Mapping device %s:" -+ " PROBE FAILED!\n", -+ nid, pci_name(pvt->misc_ctl) ); -+ goto FAIL_FINISHED; -+ } -+ -+ dram_pg_limit = dlr_limit; -+ dram_pg_base = dbr_base; -+ } -+ -+ if (! dram_pg_limit) { -+ printk( KERN_ERR -+ "MC: no DRAM entry found for node %d in Address" -+ " Mapping device: %s: POBE FAILED!\n", -+ nid, pci_name(pvt->misc_ctl) ); -+ goto FAIL_FINISHED; -+ } -+ -+ pci_find_related_function( PCI_VENDOR_ID_AMD, -+ k8_dev->misc_ctl, -+ &pvt->misc_ctl, -+ mci->pdev ); -+ -+ if ( ! pvt->misc_ctl ) { -+ printk( KERN_ERR -+ "MC: error miscellaneous device not found:" -+ "vendor %x device 0x%x (broken BIOS?)\n", -+ PCI_VENDOR_ID_AMD, -+ k8_dev->misc_ctl ); -+ goto FAIL_FINISHED; -+ } -+ -+ debugf1( "Misc device PCI Bus ID:\t\t%.2x:%.2x.%.1x\n", -+ pvt->misc_ctl->name ); -+ -+ pci_read_config_dword( pvt->misc_ctl, K8_NBCFG, &nbcfg ); -+ nbcfg_ckx4en = nbcfg & BIT(23); -+ nbcfg_eccen = nbcfg & BIT(22); -+ -+ mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR; -+ -+ pci_read_config_dword( pvt->misc_ctl, K8_NBCAP, &nbcap ); -+ nbcap_ckx4 = ( nbcap >> 4 ) & 0x1; -+ nbcap_ecc = ( nbcap >> 3 ) & 0x1; -+ mci->edac_ctl_cap = EDAC_FLAG_NONE; -+ if ( nbcap_ecc ) mci->edac_ctl_cap |= EDAC_FLAG_SECDED; -+ if ( nbcap_ckx4 ) mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; -+ -+ mci->edac_cap = EDAC_FLAG_NONE; -+ if ( dcl_eccen ) { -+ mci->edac_cap |= EDAC_FLAG_SECDED; -+ if ( dcl_chans ) { -+ mci->edac_cap |= EDAC_FLAG_S4ECD4ED; -+ } -+ } -+ -+ mci->mod_name = BS_MOD_STR; -+ mci->mod_ver = "$Revision: 1.6 $"; -+ mci->ctl_name = k8_devs[dev_idx].ctl_name; -+ mci->edac_check = k8_check; -+ mci->clear_err = NULL; -+ mci->ctl_page_to_phys = NULL; -+ -+ for ( index = 0; index < mci->nr_csrows; index++ ) { -+ struct csrow_info *csrow = &mci->csrows[ index ]; -+ u32 dcsb; -+ u32 dcsb_bah; -+ u32 dcsb_bal; -+ u32 dcsm; -+ u32 dcsm_amh; -+ u32 dcsm_aml; -+ u32 aml; -+ u32 device_shift = 0; -+ u32 intlv_shift = 0; -+ int i; -+ -+ /* find the DRAM Chip Select Base address for this row */ -+ pci_read_config_dword(mci->pdev, K8_DCSB + (index*4), &dcsb); -+ if ( ! (dcsb & 0x1) ) { -+ continue; /* empty */ -+ } -+ csrows_loaded++; -+ dcsb_bal = ((dcsb >> 9) & 0x7fUL) << (13 - PAGE_SHIFT); -+ dcsb_bah = ((dcsb >> 21) & 0x7ffUL) << (25 - PAGE_SHIFT); -+ -+ pci_read_config_dword(mci->pdev, K8_DCSM + (index*4), &dcsm); -+ dcsm_aml = ((~dcsm >> 9) & 0x7fUL) << (13 - PAGE_SHIFT); -+ dcsm_amh = ((dcsm >> 21) & 0x1ffUL) << (25 - PAGE_SHIFT); -+ -+ debugf2( "\t%d: dcsb(%x) dcsm(%x)\n", index, dcsb, dcsm ); -+ -+ /* 25 is 32MiB minimum DIMM size */ -+ csrow->first_page = (dcsb_bah | dcsb_bal) + dram_pg_base; -+ csrow->nr_pages = 1 << ((( dbam >> ((index / 2)*4) ) & 0x7) -+ + 25 - PAGE_SHIFT + dcl_chans); -+ if ( dcsm_aml ) { -+ aml = dcsm_aml; -+ i = 0; -+ while ( ! (aml & 0x1UL) ) { -+ i++; -+ aml >>= 1; -+ } -+ device_shift = i; -+ -+ i = 0; -+ while ( aml & 0x1UL ) { -+ i++; -+ aml >>= 1; -+ } -+ intlv_shift = i; -+ -+ csrow->last_page = csrow->first_page -+ + ( csrow->nr_pages << intlv_shift ) -+ - ( (1 << device_shift) | 0x1UL ); -+ } else { -+ csrow->last_page = csrow->first_page -+ + csrow->nr_pages - 1; -+ } -+ -+ csrow->page_mask = dcsm_aml; -+ csrow->grain = 8; /* 8 bytes of resolution */ -+ csrow->mtype = dcl_unbuf ? MEM_DDR : MEM_RDDR; -+ if ( ( dcl_x4 >> (index / 2 ) ) & 0x1 ) { -+ csrow->dtype = DEV_X4; -+ } else { -+ csrow->dtype = DEV_UNKNOWN; -+ } -+ -+ if ( nbcfg_eccen ) { -+ if ( nbcfg_ckx4en ) { -+ csrow->edac_mode = EDAC_S4ECD4ED; -+ } else { -+ csrow->edac_mode = EDAC_SECDED; -+ } -+ } else { -+ csrow->edac_mode = EDAC_NONE; -+ } -+ } -+ -+ /* clear any pending errors, or initial state bits */ -+ /* FIXME - should log what is already there */ -+ pci_write_bits32( pvt->misc_ctl, K8_NBSH, 0, BIT(31) ); -+ -+ if ( ! csrows_loaded ) { -+ mci->edac_cap = EDAC_FLAG_NONE; -+ } else { -+ /* turn on error reporting */ -+ pci_write_bits32( pvt->misc_ctl, K8_NBCTL, 0x3UL, 0x3UL ); -+ -+ pci_write_bits32( pvt->misc_ctl, K8_NBCTL, 0x3UL, 0x3UL ); -+ -+ do_rdmsr( nid, K8_MSR_MC4CTL, &mc4ctl_l, &mc4ctl_h ); -+ mc4ctl_l |= BIT(0) | BIT(1); -+ do_wrmsr( nid, K8_MSR_MC4CTL, mc4ctl_l, mc4ctl_h ); -+ do_rdmsr( nid, K8_MSR_MC4CTL, &mc4ctl_l, &mc4ctl_h ); -+ -+ do_rdmsr( nid, K8_MSR_MCGCTL, &mcgctl_l, &mcgctl_h ); -+ mcgctl_l |= BIT(4); -+ do_wrmsr( nid, K8_MSR_MCGCTL, mcgctl_l, mcgctl_h ); -+ do_rdmsr( nid, K8_MSR_MCGCTL, &mcgctl_l, &mcgctl_h ); -+ } -+ -+ if ( 0 != bluesmoke_mc_add_mc( mci ) ) { -+ debugf3( "MC: " __FILE__ -+ ": %s(): failed bluesmoke_mc_add_mc()\n", __func__ ); -+ goto FAIL_FINISHED; -+ } -+ -+ /* get this far and it's successful */ -+ debugf3( "MC: " __FILE__ ": %s(): success\n", __func__ ); -+ rc = 0; -+ goto FINISHED; -+ -+ FAIL_FINISHED: -+ if ( mci ) { -+ kfree( mci ); -+ } -+ -+ FINISHED: -+ return( rc ); -+} -+ -+ -+#ifdef CONFIG_PM -+ -+static int k8_suspend (struct pci_dev *pdev, u32 state) -+{ -+ debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); -+ -+ return -ENOSYS; -+} -+ -+ -+static int k8_resume (struct pci_dev *pdev) -+{ -+ debugf0( "MC: " __FILE__ ": %s(): FIXME\n", __func__ ); -+ -+ return -ENOSYS; -+} -+ -+#endif /* CONFIG_PM */ -+ -+ -+/* returns count (>= 0), or negative on error */ -+static int __devinit k8_init_one( struct pci_dev *pdev, -+ const struct pci_device_id *ent ) -+{ -+ int rc; -+ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ /* wake up and enable device */ -+ if (pci_enable_device (pdev)) { -+ rc = -EIO; -+ } else { -+ rc = k8_probe1( pdev, ent->driver_data ); -+ } -+ return rc; -+} -+ -+ -+static void __devexit k8_remove_one( struct pci_dev *pdev ) -+{ -+ struct mem_ctl_info *mci; -+ -+ debugf0( __FILE__ ": %s()\n", __func__); -+ -+ if ( NULL == ( mci = bluesmoke_mc_find_mci_by_pdev( pdev ) ) ) { -+ goto FINISHED; -+ } -+ -+ if ( 0 != bluesmoke_mc_del_mc( mci ) ) { -+ goto FINISHED; -+ } -+ -+ kfree( mci ); -+ -+ FINISHED: -+ return; -+} -+ -+ -+static const struct pci_device_id k8_pci_tbl[] __devinitdata = { -+ { PCI_VEND_DEV( AMD, OPT_2_MEMCTL ), PCI_ANY_ID, PCI_ANY_ID, 0, 0, OPTERON }, -+ {0,} /* 0 terminated list. */ -+}; -+ -+MODULE_DEVICE_TABLE(pci, k8_pci_tbl); -+ -+ -+static struct pci_driver k8_driver = { -+ .name = BS_MOD_STR, -+ .probe = k8_init_one, -+ .remove = __devexit_p(k8_remove_one), -+ .id_table = k8_pci_tbl, -+#ifdef CONFIG_PM -+ .suspend = k8_suspend, -+ .resume = k8_resume, -+#endif /* CONFIG_PM */ -+}; -+ -+ -+int __init k8_init(void) -+{ -+ int pci_rc; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ pci_rc = pci_module_init( &k8_driver ); -+ if ( pci_rc < 0 ) return pci_rc; -+ -+ return 0; -+} -+ -+ -+static void __exit k8_exit(void) -+{ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ pci_unregister_driver( &k8_driver ); -+} -+ -+ -+module_init(k8_init); -+module_exit(k8_exit); -+ -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh"); -+MODULE_DESCRIPTION("MC support for AMD K8 memory controllers"); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_mc.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_mc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_mc.c 2004-12-17 12:46:23.000000000 -0500 -@@ -0,0 +1,1112 @@ -+/* -+ * bluesmoke_mc kernel module -+ * (C) 2003 Linux Networx (http://lnxi.com) -+ * This file may be distributed under the terms of the -+ * GNU General Public License. -+ * -+ * Written by Thayne Harbaugh -+ * Based on work by Dan Hollis <goemon at anime dot net> and others. -+ * http://www.anime.net/~goemon/linux-ecc/ -+ * -+ * $Id: bluesmoke_mc.c,v 1.9 2004/12/13 22:19:40 thayne Exp $ -+ * -+ */ -+ -+ -+#include <linux/config.h> -+#include <linux/version.h> -+#include <linux/module.h> -+#include <linux/proc_fs.h> -+#include <linux/kernel.h> -+#include <linux/init.h> -+#include <linux/sysctl.h> -+#include <linux/highmem.h> -+#include <linux/timer.h> -+#include <linux/slab.h> -+ -+#include <asm/uaccess.h> -+#include <asm/page.h> -+ -+#include "bluesmoke_mc.h" -+ -+ -+#ifndef pfn_to_page -+#define pfn_to_page(pfn) (mem_map + (pfn)) -+#endif /* pfn_to_page */ -+ -+#define MC_PROC_DIR "mc" -+ -+/* /proc/mc dir */ -+static struct proc_dir_entry *proc_mc; -+ -+/* Setable by module parameter and sysctl */ -+#if SCRUB -+/* FIXME - do something with scrubbing */ -+static int mc_scrub = -1; -+#endif /* SCRUB */ -+static int panic_on_ue = 1; -+static int log_ue = 1; -+static int log_ce = 1; -+static int poll_msec = 1000; -+static struct timer_list timer; -+ -+static DECLARE_MUTEX(mem_ctls_mutex); -+ -+/* FIXME - use list.h */ -+/* FIXME - should be dynamic */ -+static struct mem_ctl_info *mcis[MAX_MC_DEVICES]; -+ -+ -+#ifdef CONFIG_SYSCTL -+static void dimm_labels( char *buf, void *data ) -+{ -+ int mcidx, ridx, chidx; -+ char *mcstr, *rstr, *chstr, *lstr, *p; -+ -+ lstr = buf; -+ -+ mcstr = strsep( &lstr, "." ); -+ if (! lstr) -+ return; -+ mcidx = simple_strtol( mcstr, &p, 0 ); -+ if ( *p ) -+ return; -+ if ( mcidx >= MAX_MC_DEVICES || ! mcis[mcidx] ) -+ return; -+ -+ rstr = strsep( &lstr, "." ); -+ if (! lstr) -+ return; -+ ridx = simple_strtol( rstr, &p, 0 ); -+ if ( *p ) -+ return; -+ if ( ridx >= mcis[mcidx]->nr_csrows -+ || ! mcis[mcidx]->csrows ) -+ return; -+ -+ chstr = strsep( &lstr, ":" ); -+ if (! lstr) -+ return; -+ chidx = simple_strtol( chstr, &p, 0 ); -+ if ( *p ) -+ return; -+ if ( chidx >= mcis[mcidx]->csrows[ridx].nr_channels -+ || ! mcis[mcidx]->csrows[ridx].channels ) -+ return; -+ -+ debugf1( "%d:%d.%d:%s\n", -+ mcidx, ridx, chidx, lstr ); -+ -+ strncpy(mcis[mcidx]->csrows[ridx].channels[chidx].label, -+ lstr, BLUESMOKE_MC_LABEL_LEN + 1); -+ /* -+ * no need to NUL terminate label since -+ * get_user_tok() NUL terminates. -+ */ -+} -+ -+ -+static void counter_reset( char *buf, void *data ) -+{ -+ char *p = buf; -+ int mcidx, row, chan; -+ struct mem_ctl_info *mci; -+ -+ mcidx = simple_strtol( buf, &p, 0 ); -+ if ( *p ) -+ return; -+ if ( mcidx >= MAX_MC_DEVICES || ! mcis[mcidx] ) -+ return; -+ -+ mci = mcis[mcidx]; -+ mci->ue_noinfo_count = 0; -+ mci->ce_noinfo_count = 0; -+ mci->ue_count = 0; -+ mci->ce_count = 0; -+ for ( row = 0; row < mci->nr_csrows; row++ ) { -+ struct csrow_info *ri = &mci->csrows[row]; -+ -+ ri->ue_count = 0; -+ ri->ce_count = 0; -+ for ( chan = 0; chan < ri->nr_channels; chan++ ) { -+ ri->channels[chan].ce_count = 0; -+ } -+ } -+ do_gettimeofday( &mci->tv ); -+} -+ -+ -+struct actionvec_info { -+ void (*action)(char *str, void *data); -+ char separator; -+ char *usage; -+ void *data; -+}; -+ -+ -+static struct actionvec_info dimm_labels_avi = { -+ .action = dimm_labels, -+ .separator = ',', -+ .usage = "<mc>.<row>.<chan>:<label>" -+ "[,<mc>.<row>.<chan>:<label>[,...]]\n", -+ .data = NULL -+}; -+ -+ -+static struct actionvec_info counter_reset_avi = { -+ .action = counter_reset, -+ .separator = ',', -+ .usage = "<mc>[,<mc>[,...]]\n", -+ .data = NULL -+}; -+ -+ -+static int proc_actionvec( ctl_table *table, int write, struct file *filp, -+ void *buffer, size_t *lenp ) -+{ -+ size_t len; -+ char *p, c, *buf, *tok, sep[] = " "; -+ struct actionvec_info *avi; -+ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ if ( !table->data || !*lenp || (filp->f_pos && !write)) { -+ *lenp = 0; -+ return 0; -+ } -+ -+ avi = (struct actionvec_info *)table->data; -+ -+ if (write) { -+ /* dup the string from user space */ -+ len = 0; -+ p = buffer; -+ while (len < *lenp) { -+ if (get_user(c, p++)) -+ return -EFAULT; -+ if (c == 0 || c == '\n') -+ break; -+ len++; -+ } -+ if (! (buf = kmalloc(len + 1, GFP_KERNEL))) -+ return -EFAULT; -+ if (copy_from_user(buf, buffer, len)) { -+ kfree(buf); -+ return -EFAULT; -+ } -+ buf[len] = '\0'; -+ filp->f_pos += *lenp; -+ /* working copy can now be segmented for processing */ -+ p = buf; -+ sep[0] = avi->separator; -+ while ((tok = strsep(&p, sep))) -+ avi->action(tok, avi->data); -+ kfree(buf); -+ } else { -+ len = strlen(avi->usage); -+ if (len > *lenp) -+ len = *lenp; -+ if (len) -+ if(copy_to_user(buffer, avi->usage, len)) -+ return -EFAULT; -+ *lenp = len; -+ filp->f_pos += len; -+ } -+ return 0; -+} -+ -+ -+static ctl_table mc_table[] = { -+ {-1, "panic_on_ue", &panic_on_ue, -+ sizeof(int), 0644, NULL, proc_dointvec}, -+ {-2, "log_ue", &log_ue, -+ sizeof(int), 0644, NULL, proc_dointvec}, -+ {-3, "log_ce", &log_ce, -+ sizeof(int), 0644, NULL, proc_dointvec}, -+ {-4, "poll_msec", &poll_msec, -+ sizeof(int), 0644, NULL, proc_dointvec}, -+ {-5, "dimm_labels", &dimm_labels_avi, -+ 0, 0644, NULL, proc_actionvec}, -+ {-6, "counter_reset", &counter_reset_avi, -+ 0, 0644, NULL, proc_actionvec}, -+ {0} -+}; -+ -+ -+static ctl_table mc_root_table[] = { -+ {CTL_DEBUG, MC_PROC_DIR, NULL, 0, 0555, mc_table}, -+ {0} -+}; -+ -+ -+static struct ctl_table_header *mc_sysctl_header = NULL; -+#endif /* CONFIG_SYSCTL */ -+ -+ -+#ifdef CONFIG_PROC_FS -+static const char *mem_types[] = { -+ [MEM_EMPTY] = "Empty", -+ [MEM_RESERVED] = "Reserved", -+ [MEM_UNKNOWN] = "Unknown", -+ [MEM_FPM] = "FPM", -+ [MEM_EDO] = "EDO", -+ [MEM_BEDO] = "BEDO", -+ [MEM_SDR] = "Unbuffered-SDR", -+ [MEM_RDR] = "Registered-SDR", -+ [MEM_DDR] = "Unbuffered-DDR", -+ [MEM_RDDR] = "Registered-DDR", -+ [MEM_RMBS] = "RMBS" -+}; -+ -+static const char *dev_types[] = { -+ [DEV_UNKNOWN] = "Unknown", -+ [DEV_X1] = "x1", -+ [DEV_X2] = "x2", -+ [DEV_X4] = "x4", -+ [DEV_X8] = "x8", -+ [DEV_X16] = "x16", -+ [DEV_X32] = "x32", -+ [DEV_X64] = "x64" -+}; -+ -+static const char *edac_caps[] = { -+ [EDAC_UNKNOWN] = "Unknown", -+ [EDAC_NONE] = "None", -+ [EDAC_RESERVED] = "Reserved", -+ [EDAC_PARITY] = "PARITY", -+ [EDAC_EC] = "EC", -+ [EDAC_SECDED] = "SECDED", -+ [EDAC_S2ECD2ED] = "S2ECD2ED", -+ [EDAC_S4ECD4ED] = "S4ECD4ED", -+ [EDAC_S8ECD8ED] = "S8ECD8ED", -+ [EDAC_S16ECD16ED] = "S16ECD16ED" -+}; -+ -+ -+#if UNUSED -+static const char *scrub_caps[] = { -+ [SCRUB_UNKNOWN] = "Unknown", -+ [SCRUB_NONE] = "None", -+ [SCRUB_SW_PROG] = "SProg", -+ [SCRUB_SW_SRC] = "SSrc", -+ [SCRUB_SW_PROG_SRC] = "SProg+Src", -+ [SCRUB_SW_TUNABLE] = "STun", -+ [SCRUB_HW_PROG] = "HProg", -+ [SCRUB_HW_SRC] = "HSrc", -+ [SCRUB_HW_PROG_SRC] = "HProg+Src", -+ [SCRUB_HW_TUNABLE] = "HTun" -+}; -+#endif /* UNUSED */ -+ -+ -+/* FIXME - CHANNEL_PREFIX is pretty bad */ -+#define CHANNEL_PREFIX(...) \ -+ do { \ -+ p += sprintf( p, "%d.%d:%s", \ -+ chan->csrow->csrow_idx, \ -+ chan->chan_idx, \ -+ chan->label ); \ -+ p += sprintf( p, ":" __VA_ARGS__ ); \ -+ } while ( 0 ) -+ -+ -+static inline int mc_proc_output_channel(char *buf, struct channel_info *chan) -+{ -+ char *p = buf; -+ -+ CHANNEL_PREFIX( "CE:\t\t%d\n", chan->ce_count ); -+ -+ return p - buf; -+} -+ -+#undef CHANNEL_PREFIX -+ -+ -+#define CSROW_PREFIX(...) \ -+ do { \ -+ int i; \ -+ p += sprintf( p, "%d:", csrow->csrow_idx ); \ -+ p += sprintf( p, "%s", csrow->channels[0].label ); \ -+ for ( i = 1; i < csrow->nr_channels; i++ ) { \ -+ p += sprintf( p, "|%s", csrow->channels[i].label ); \ -+ } \ -+ p += sprintf( p, ":" __VA_ARGS__ ); \ -+ } while ( 0 ) -+ -+ -+static inline int mc_proc_output_csrow(char *buf, struct csrow_info *csrow) -+{ -+ char *p = buf; -+ int chan_idx; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ CSROW_PREFIX( "Memory Size:\t%d MiB\n", -+ (u32)PAGES_TO_MiB(csrow->nr_pages) ); -+ CSROW_PREFIX( "Mem Type:\t\t%s\n", mem_types[csrow->mtype] ); -+ CSROW_PREFIX( "Dev Type:\t\t%s\n", dev_types[csrow->dtype] ); -+ CSROW_PREFIX( "EDAC Mode:\t\t%s\n", edac_caps[csrow->edac_mode] ); -+ CSROW_PREFIX( "UE:\t\t\t%d\n", csrow->ue_count ); -+ CSROW_PREFIX( "CE:\t\t\t%d\n", csrow->ce_count ); -+ -+ for ( chan_idx = 0; chan_idx < csrow->nr_channels; chan_idx++ ) { -+ p += mc_proc_output_channel( p, &csrow->channels[chan_idx] ); -+ } -+ p += sprintf( p, "\n" ); -+ return p - buf; -+} -+ -+#undef CSROW_PREFIX -+ -+ -+static inline int mc_proc_output_edac_cap(char *buf, unsigned long edac_cap) -+{ -+ char *p = buf; -+ int bit_idx; -+ -+ for ( bit_idx = 0; bit_idx < 8 * sizeof(edac_cap); bit_idx++ ) { -+ if ( ( edac_cap >> bit_idx ) & 0x1 ) { -+ p += sprintf( p, "%s ", edac_caps[ bit_idx ] ); -+ } -+ } -+ -+ return p - buf; -+} -+ -+ -+static inline int mc_proc_output_mtype_cap(char *buf, unsigned long mtype_cap) -+{ -+ char *p = buf; -+ int bit_idx; -+ -+ for ( bit_idx = 0; bit_idx < 8 * sizeof(mtype_cap); bit_idx++ ) { -+ if ( ( mtype_cap >> bit_idx ) & 0x1 ) { -+ p += sprintf( p, "%s ", mem_types[ bit_idx ] ); -+ } -+ } -+ -+ return p - buf; -+} -+ -+ -+static int mc_proc_output(struct mem_ctl_info *mci, char *buf) -+{ -+ int csrow_idx; -+ u32 total_pages; -+ char *p = buf; -+ struct timeval tv; -+ -+ debugf3( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); -+ -+ do_gettimeofday( &tv ); -+ -+ p += sprintf( p, "Panic UE:\t\t%d\n", panic_on_ue ); -+ p += sprintf( p, "Log UE:\t\t\t%d\n", log_ue ); -+ p += sprintf( p, "Log CE:\t\t\t%d\n", log_ce ); -+ p += sprintf( p, "Poll msec:\t\t%d\n", poll_msec ); -+ -+ p += sprintf( p, "\n" ); -+ -+ p += sprintf( p, "MC Module:\t\t%s %s\n", mci->mod_name, mci->mod_ver ); -+ p += sprintf( p, "Memory Controller:\t%s\n", mci->ctl_name ); -+ p += sprintf( p, "PCI Bus ID:\t\t%s (%s)\n", -+ mci->pdev->slot_name, pci_name(mci->pdev) ); -+ -+ p += sprintf( p, "EDAC capability:\t" ); -+ p += mc_proc_output_edac_cap( p, mci->edac_ctl_cap ); -+ p += sprintf( p, "\n" ); -+ -+ p += sprintf( p, "Current EDAC capability:\t" ); -+ p += mc_proc_output_edac_cap( p, mci->edac_cap ); -+ p += sprintf( p, "\n" ); -+ -+ p += sprintf( p, "Supported Mem Types:\t" ); -+ p += mc_proc_output_mtype_cap( p, mci->mtype_cap ); -+ p += sprintf( p, "\n" ); -+ -+ p += sprintf( p, "\n" ); -+ -+ for ( total_pages = csrow_idx = 0; -+ csrow_idx < mci->nr_csrows; -+ csrow_idx++ ) { -+ struct csrow_info *csrow = &mci->csrows[csrow_idx]; -+ -+ if ( ! csrow->nr_pages ) continue; -+ total_pages += csrow->nr_pages; -+ p += mc_proc_output_csrow( p, csrow ); -+ } -+ -+ p += sprintf( p, "Total Memory Size:\t%d MiB\n", -+ (u32)PAGES_TO_MiB(total_pages) ); -+ p += sprintf( p, "Seconds since reset:\t%ld\n", -+ tv.tv_sec - mci->tv.tv_sec ); -+ p += sprintf( p, "UE No Info:\t\t%d\n", mci->ue_noinfo_count ); -+ p += sprintf( p, "CE No Info:\t\t%d\n", mci->ce_noinfo_count ); -+ p += sprintf( p, "Total UE:\t\t%d\n", mci->ue_count ); -+ p += sprintf( p, "Total CE:\t\t%d\n", mci->ce_count ); -+ return p - buf; -+} -+ -+ -+static int mc_read_proc(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ struct mem_ctl_info *mci = (struct mem_ctl_info *)data; -+ -+ debugf3( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); -+ -+ down(&mem_ctls_mutex); -+ len = mc_proc_output(mci, page); -+ up(&mem_ctls_mutex); -+ if (len <= off+count) *eof = 1; -+ *start = page + off; -+ len -= off; -+ if (len>count) len = count; -+ if (len<0) len = 0; -+ -+ -+ return len; -+} -+#endif /* CONFIG_PROC_FS */ -+ -+ -+#if CONFIG_BLUESMOKE_DEBUG -+ -+ -+EXPORT_SYMBOL(bluesmoke_mc_dump_channel); -+ -+void bluesmoke_mc_dump_channel( struct channel_info *chan ) -+{ -+ printk( KERN_INFO "\tchannel = %p\n", chan ); -+ printk( KERN_INFO "\tchannel->chan_idx = %d\n", chan->chan_idx ); -+ printk( KERN_INFO "\tchannel->ce_count = %d\n", chan->ce_count ); -+ printk( KERN_INFO "\tchannel->label = '%s'\n", chan->label ); -+ printk( KERN_INFO "\tchannel->csrow = %p\n\n", chan->csrow ); -+} -+ -+ -+EXPORT_SYMBOL(bluesmoke_mc_dump_csrow); -+ -+void bluesmoke_mc_dump_csrow( struct csrow_info *csrow ) -+{ -+ printk( KERN_INFO "\tcsrow = %p\n", csrow ); -+ printk( KERN_INFO "\tcsrow->csrow_idx = %d\n", csrow->csrow_idx ); -+ printk( KERN_INFO "\tcsrow->first_page = 0x%lx\n", csrow->first_page ); -+ printk( KERN_INFO "\tcsrow->last_page = 0x%lx\n", csrow->last_page ); -+ printk( KERN_INFO "\tcsrow->page_mask = 0x%lx\n", csrow->page_mask ); -+ printk( KERN_INFO "\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages ); -+ printk( KERN_INFO "\tcsrow->nr_channels = %d\n", csrow->nr_channels ); -+ printk( KERN_INFO "\tcsrow->channels = %p\n", csrow->channels ); -+ printk( KERN_INFO "\tcsrow->mci = %p\n\n", csrow->mci ); -+} -+ -+ -+EXPORT_SYMBOL(bluesmoke_mc_dump_mci); -+ -+void bluesmoke_mc_dump_mci( struct mem_ctl_info *mci ) -+{ -+ printk( KERN_INFO "\tmci = %p\n", mci ); -+ printk( KERN_INFO "\tmci->mtype_cap = %lx\n", mci->mtype_cap ); -+ printk( KERN_INFO "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap ); -+ printk( KERN_INFO "\tmci->edac_cap = %lx\n", mci->edac_cap ); -+ printk( KERN_INFO "\tmci->edac_check = %p\n", mci->edac_check ); -+ printk( KERN_INFO "\tmci->clear_err = %p\n", mci->clear_err ); -+ printk( KERN_INFO "\tmci->nr_csrows = %d, csrows = %p\n", -+ mci->nr_csrows, mci->csrows ); -+ printk( KERN_INFO "\tpdev = %p\n", mci->pdev ); -+ printk( KERN_INFO "\tmod_name:ctl_name = %s:%s\n", -+ mci->mod_name, mci->ctl_name ); -+ printk( KERN_INFO "\tproc_name = %s, proc_ent = %p\n", -+ mci->proc_name, mci->proc_ent ); -+ printk( KERN_INFO "\tpvt_info = %p\n\n", mci->pvt_info ); -+} -+ -+ -+#endif /* CONFIG_BLUESMOKE_DEBUG */ -+ -+ -+EXPORT_SYMBOL(bluesmoke_mc_init_structs); -+ -+/* -+ * Everything is kmalloc'ed as one big chunk - more efficient. -+ * Only can be used if all structures have the same lifetime - otherwise -+ * you have to allocate and initialize your own structures. -+ * -+ * kmalloc'ed memory must be free'ed by caller. -+ */ -+struct mem_ctl_info *bluesmoke_mc_init_structs(u32 sz_pvt, -+ u32 nr_csrows, -+ u32 nr_chans) -+{ -+ struct mem_ctl_info *mci; -+ struct channel_info *chi; -+ u32 malloc_size; -+ int row, chn; -+ -+ malloc_size = -+ sizeof(struct mem_ctl_info) -+ + sz_pvt -+ + nr_csrows * sizeof(struct csrow_info) -+ + nr_chans * nr_csrows * sizeof(struct channel_info); -+ -+ if (! (mci = kmalloc(malloc_size, GFP_KERNEL))) -+ goto done; -+ -+ memset( mci, 0, malloc_size); -+ -+ /* set all the pointers to the correct offset in the malloc'ed block */ -+ if (sz_pvt) -+ mci->pvt_info = (pvt_info_t)((char *)mci + sizeof(*mci)); -+ -+ mci->csrows = (struct csrow_info *)((char *)mci + sizeof(*mci) + sz_pvt); -+ mci->nr_csrows = nr_csrows; -+ -+ chi = (struct channel_info *)((char *)mci->csrows -+ + sizeof(*mci->csrows) * nr_csrows); -+ -+ for (row = 0; row < nr_csrows; row++) { -+ struct csrow_info *csrow = &mci->csrows[row]; -+ -+ csrow->csrow_idx = row; -+ csrow->mci = mci; -+ csrow->nr_channels = nr_chans; -+ csrow->channels = &chi[ row * nr_chans ]; -+ -+ for (chn = 0; chn < nr_chans; chn++) { -+ struct channel_info *chan = &csrow->channels[ chn ]; -+ -+ chan->chan_idx = chn; -+ chan->csrow = csrow; -+ } -+ } -+ -+ done: -+ return mci; -+} -+ -+ -+EXPORT_SYMBOL(bluesmoke_mc_find_mci_by_pdev); -+ -+struct mem_ctl_info *bluesmoke_mc_find_mci_by_pdev(struct pci_dev *pdev ) -+{ -+ int i; -+ struct mem_ctl_info *mci = NULL; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ for (i=0; i < MAX_MC_DEVICES; i++) { -+ if ( ! mcis[ i ] ) continue; -+ if ( pdev == mcis[ i ]->pdev ) { -+ mci = mcis[ i ]; -+ break; -+ } -+ } -+ -+ return mci; -+} -+ -+ -+EXPORT_SYMBOL(bluesmoke_mc_add_mc); -+ -+/* FIXME - should a warning be printed if no error detection? correction? */ -+int bluesmoke_mc_add_mc(struct mem_ctl_info *mci) -+{ -+ int i; -+ int rc = 1; -+ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+#if CONFIG_BLUESMOKE_DEBUG -+#if 1 <= CONFIG_BLUESMOKE_DEBUG_VERBOSE -+ bluesmoke_mc_dump_mci( mci ); -+#endif /* 1 <= CONFIG_BLUESMOKE_DEBUG_VERBOSE */ -+#if 2 <= CONFIG_BLUESMOKE_DEBUG_VERBOSE -+ for ( i = 0; i < mci->nr_csrows; i++ ) { -+ int j; -+ bluesmoke_mc_dump_csrow( &mci->csrows[i] ); -+ for ( j = 0; j < mci->csrows[i].nr_channels; j++ ) { -+ bluesmoke_mc_dump_channel( &mci->csrows[i].channels[j] ); -+ } -+ } -+#endif /* 2 <= CONFIG_BLUESMOKE_DEBUG_VERBOSE */ -+#endif /* CONFIG_BLUESMOKE_DEBUG */ -+ down( &mem_ctls_mutex ); -+ -+ if ( bluesmoke_mc_find_mci_by_pdev( mci->pdev ) ) { -+ printk( KERN_WARNING -+ "MC: %s (%s) %s %s already assigned %d\n", -+ mci->pdev->slot_name, pci_name(mci->pdev), -+ mci->mod_name, -+ mci->ctl_name, -+ mci->mc_idx ); -+ goto FINISH; -+ } -+ -+ for (i=0; i < MAX_MC_DEVICES; i++) { -+ if ( ! mcis[ i ] ) break; -+ } -+ -+ if ( MAX_MC_DEVICES == i ) { -+ printk( KERN_WARNING -+ "MC: out of slots in mem_ctls for %s %s\n", -+ mci->mod_name, mci->ctl_name); -+ goto FINISH; -+ } -+ -+ mcis[i] = mci; -+ mci->mc_idx = i; -+ printk( KERN_INFO -+ "MC%d: Giving out device %d to %s %s: PCI %s (%s)\n", -+ mci->mc_idx, -+ i, mci->mod_name, mci->ctl_name, -+ mci->pdev->slot_name, pci_name(mci->pdev) ); -+ __module_get(THIS_MODULE); -+ -+ /* set load time so that error rate can be tracked */ -+ do_gettimeofday(&mci->tv); -+ -+#ifdef CONFIG_PROC_FS -+ if ( snprintf( mci->proc_name, MC_PROC_NAME_MAX_LEN, "%d", i ) -+ == MC_PROC_NAME_MAX_LEN ) { -+ printk( KERN_WARNING -+ "MC%d: proc entry too long for device %d \n", -+ mci->mc_idx, i ); -+ /* FIXME - should there be an error code and unwind? */ -+ goto FINISH; -+ } -+ -+ mci->proc_ent = create_proc_read_entry( mci->proc_name, 0, proc_mc, -+ mc_read_proc, (void *)mci ); -+ -+ if ( NULL == mci->proc_ent ) { -+ printk( KERN_WARNING -+ "MC%d: failed to create proc entry for controller %d \n", -+ mci->mc_idx, i ); -+ /* FIXME - should there be an error code and unwind? */ -+ goto FINISH; -+ } -+#endif /* CONFIG_PROC_FS */ -+ -+ rc = 0; -+ -+ FINISH: -+ up( &mem_ctls_mutex ); -+ return rc; -+} -+ -+ -+EXPORT_SYMBOL(bluesmoke_mc_del_mc); -+ -+int bluesmoke_mc_del_mc(struct mem_ctl_info *mci) -+{ -+ int rc = 1; -+ -+ debugf0( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); -+ down( &mem_ctls_mutex ); -+ -+ if ( mcis[mci->mc_idx] != mci ) { -+ printk( KERN_WARNING -+ "MC%d: index of mci for %s %s doesn't match" -+ " entry in mem_ctls\n", -+ mci->mc_idx, mci->mod_name, mci->ctl_name); -+ rc = -ENODEV; -+ goto FINISHED; -+ } -+ -+ mcis[mci->mc_idx] = NULL; -+ module_put(THIS_MODULE); -+ if ( ! module_refcount(THIS_MODULE) ) del_timer( &timer ); -+#ifdef CONFIG_PROC_FS -+ remove_proc_entry( mci->proc_name, proc_mc ); -+#endif -+ -+ printk( KERN_INFO -+ "MC%d: Removed device %d for %s %s: PCI %s (%s)\n", -+ mci->mc_idx, mci->mc_idx, mci->mod_name, mci->ctl_name, -+ mci->pdev->slot_name, pci_name(mci->pdev) ); -+ -+ rc = 0; -+ -+ FINISHED: -+ up( &mem_ctls_mutex ); -+ return rc; -+} -+ -+ -+/* -+ * FIXME - what happens when grain > PAGE_SIZE? -+ * Need multiple kmap_atomic() -+ */ -+/* FIXME - this should go in an arch dependant file */ -+EXPORT_SYMBOL(bluesmoke_mc_scrub_block); -+ -+void bluesmoke_mc_scrub_block(unsigned long page, unsigned long offset, u32 size) -+{ -+ struct page *pg; -+ volatile unsigned long *virt_addr; -+ int i; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+#ifndef CONFIG_DISCONTIGMEM -+ if(page > max_mapnr) -+ return; /* pointer is beyond memory, so bail */ -+#else -+/* FIXME - use the per-pgdat data instead for discontigmem */ -+#endif -+ pg = pfn_to_page(page); -+ -+ virt_addr = kmap_atomic(pg, KM_BOUNCE_READ) + offset; -+ -+ for(i = 0; i < size / sizeof(unsigned long); i++, virt_addr++) { -+ /* Very carefully read and write to memory atomically -+ * so we are interrupt and smp safe. -+ */ -+ __asm__ __volatile__( -+ "lock; addl $0, %0" -+ :: "m" (*virt_addr)); -+ } -+ kunmap_atomic(pg, KM_BOUNCE_READ); -+} -+ -+ -+/* FIXME - put in a util library? */ -+/* FIXME - should return -1 */ -+EXPORT_SYMBOL(bluesmoke_mc_find_csrow_by_page); -+ -+int bluesmoke_mc_find_csrow_by_page( struct mem_ctl_info *mci, -+ unsigned long page ) -+{ -+ struct csrow_info *csrows = mci->csrows; -+ int row = -1, i; -+ -+ debugf1( "MC%d: " __FILE__ ": %s(): 0x%lx\n", -+ mci->mc_idx, __func__, page ); -+ -+ for ( i = 0; i < mci->nr_csrows; i++ ) { -+ struct csrow_info *csrow = &csrows[i]; -+ -+ if ( 0 == csrow->nr_pages ) continue; -+ -+ debugf3( "MC%d: " __FILE__ -+ ": %s(): first(0x%lx) page(0x%lx)" -+ " last(0x%lx) mask(0x%lx)\n", -+ mci->mc_idx, __func__, -+ csrow->first_page, -+ page, -+ csrow->last_page, -+ csrow->page_mask ); -+ -+ if ( ( page >= csrow->first_page ) -+ && ( page <= csrow->last_page ) -+ && ((page & csrow->page_mask) -+ == (csrow->first_page & csrow->page_mask)) ) { -+ row = i; -+ break; -+ } -+ } -+ -+ if (row == -1) { -+ printk( KERN_ERR -+ "MC%d: could not look up page error address %lx\n", -+ mci->mc_idx, (unsigned long)page); -+ } -+ -+ return row; -+} -+ -+ -+EXPORT_SYMBOL(bluesmoke_mc_handle_ce); -+ -+/* FIXME - setable log (warning/emerg) levels */ -+/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */ -+void bluesmoke_mc_handle_ce(struct mem_ctl_info *mci, -+ unsigned long page_frame_number, -+ unsigned long offset_in_page, -+ unsigned long syndrome, -+ int row, -+ int channel, -+ const char *msg) -+{ -+ unsigned long remapped_page; -+ -+ debugf3( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); -+ -+ /* FIXME - maybe make panic on INTERNAL ERROR an option */ -+ if ( row >= mci->nr_csrows || row < 0 ) { -+ /* something is wrong */ -+ printk( KERN_ERR -+ "MC%d: INTERNAL ERROR: row out of range (%d >= %d)\n", -+ mci->mc_idx, -+ row, mci->nr_csrows ); -+ bluesmoke_mc_handle_ce_no_info( mci, "INTERNAL ERROR" ); -+ return; -+ } -+ if ( channel >= mci->csrows[row].nr_channels || channel < 0 ) { -+ /* something is wrong */ -+ printk( KERN_ERR -+ "MC%d: INTERNAL ERROR: channel out of range (%d >= %d)\n", -+ mci->mc_idx, -+ channel, mci->csrows[row].nr_channels ); -+ bluesmoke_mc_handle_ce_no_info( mci, "INTERNAL ERROR" ); -+ return; -+ } -+ -+ if ( log_ce ) { -+ /* FIXME - put in DIMM location */ -+ printk( KERN_WARNING -+ "MC%d: CE page 0x%lx, offset 0x%lx," -+ " grain %d, syndrome 0x%lx, row %d, channel %d," -+ " label \"%s\": %s\n", -+ mci->mc_idx, -+ page_frame_number, -+ offset_in_page, -+ mci->csrows[row].grain, -+ syndrome, -+ row, -+ channel, -+ mci->csrows[row].channels[channel].label, -+ msg ); -+ } -+ -+ mci->ce_count++; -+ mci->csrows[row].ce_count++; -+ mci->csrows[row].channels[channel].ce_count++; -+ -+ if ( mci->scrub_mode & SCRUB_SW_SRC ) { -+ /* -+ * Some MC's can remap memory so that it is still available -+ * at a different address when PCI devices map into memory. -+ * MC's that can't do this lose the memory where PCI devices -+ * are mapped. This mapping is MC dependant and so we call -+ * back into the MC driver for it to map the MC page to -+ * a physical (CPU) page which can then be mapped to a virtual -+ * page - which can then be scrubbed. -+ */ -+ if ( mci->ctl_page_to_phys ) { -+ remapped_page = mci->ctl_page_to_phys(mci, -+ page_frame_number); -+ } else { -+ remapped_page = page_frame_number; -+ } -+ bluesmoke_mc_scrub_block(remapped_page, -+ offset_in_page, -+ mci->csrows[row].grain); -+ } -+} -+ -+ -+EXPORT_SYMBOL(bluesmoke_mc_handle_ce_no_info); -+ -+void bluesmoke_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg) -+{ -+ if ( log_ce ) { -+ printk( KERN_WARNING -+ "MC%d: CE - no information available: %s\n", -+ mci->mc_idx, msg ); -+ } -+ mci->ce_noinfo_count++; -+ mci->ce_count++; -+} -+ -+ -+EXPORT_SYMBOL(bluesmoke_mc_handle_ue); -+ -+void bluesmoke_mc_handle_ue(struct mem_ctl_info *mci, -+ unsigned long page_frame_number, -+ unsigned long offset_in_page, -+ int row, -+ const char *msg) -+{ -+ int len = BLUESMOKE_MC_LABEL_LEN * 4; -+ char labels[len + 1]; -+ char *pos = labels; -+ int chan; -+ int chars; -+ -+ debugf3( "MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__ ); -+ -+ /* FIXME - maybe make panic on INTERNAL ERROR an option */ -+ if ( row >= mci->nr_csrows || row < 0 ) { -+ /* something is wrong */ -+ printk( KERN_ERR -+ "MC%d: INTERNAL ERROR: row out of range (%d >= %d)\n", -+ mci->mc_idx, -+ row, mci->nr_csrows ); -+ bluesmoke_mc_handle_ue_no_info( mci, "INTERNAL ERROR" ); -+ return; -+ } -+ -+ chars = snprintf( pos, len + 1, "%s", -+ mci->csrows[row].channels[0].label ); -+ len -= chars; -+ pos += chars; -+ for ( chan = 1; -+ (chan < mci->csrows[row].nr_channels) && (len > 0); -+ chan++ ) { -+ chars = snprintf( pos, len + 1, ":%s", -+ mci->csrows[row].channels[chan].label ); -+ len -= chars; -+ pos += chars; -+ } -+ -+ if ( log_ue ) { -+ printk( KERN_EMERG -+ "MC%d: UE page 0x%lx, offset 0x%lx, grain %d, row %d," -+ " labels \"%s\": %s\n", -+ mci->mc_idx, -+ page_frame_number, -+ offset_in_page, -+ mci->csrows[row].grain, -+ row, -+ labels, -+ msg ); -+ } -+ -+ if (panic_on_ue) { -+ panic( "MC%d: UE page 0x%lx, offset 0x%lx, grain %d, row %d," -+ " labels \"%s\": %s\n", -+ mci->mc_idx, -+ page_frame_number, -+ offset_in_page, -+ mci->csrows[row].grain, -+ row, -+ labels, -+ msg ); -+ } -+ -+ mci->ue_count++; -+ mci->csrows[row].ue_count++; -+} -+ -+ -+EXPORT_SYMBOL(bluesmoke_mc_handle_ue_no_info); -+ -+void bluesmoke_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) -+{ -+ if (panic_on_ue) panic("MC%d: Uncorrected Error", mci->mc_idx); -+ -+ if ( log_ue ) { -+ printk( KERN_WARNING -+ "MC%d: UE - no information available: %s\n", -+ mci->mc_idx, msg ); -+ } -+ mci->ue_noinfo_count++; -+ mci->ue_count++; -+} -+ -+ -+/* -+ * Check MC status every poll_msec. -+ * SMP safe, doesn't use NMI, and auto-rate-limits. -+ */ -+static void check_mc(unsigned long dummy) -+{ -+ int i; -+ -+ debugf3( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+ if (in_atomic()) { -+ if (down_trylock(&mem_ctls_mutex)) { -+ if (timer_pending(&timer)) { -+ mod_timer(&timer, jiffies + (HZ * poll_msec) / 1000); -+ } else { -+ timer.function = check_mc; -+ timer.expires = jiffies + (HZ * poll_msec) / 1000; -+ add_timer(&timer); -+ } -+ return; -+ } -+ } else -+ down(&mem_ctls_mutex); -+ -+ for ( i = 0; i < MAX_MC_DEVICES; i++ ) { -+ struct mem_ctl_info *mci = mcis[ i ]; -+ -+ if ( NULL == mci ) continue; -+ -+ /* FIXME - should check scrub flag */ -+ if ( ! mci->scrub_needed -+ && mci->edac_check ) { -+ mci->edac_check(mci); -+ } -+ -+ if ( mci->clear_err ) mci->clear_err(mci); -+ } -+ -+ if (timer_pending(&timer)) { -+ mod_timer(&timer, jiffies + (HZ * poll_msec) / 1000); -+ } else { -+ timer.function = check_mc; -+ timer.expires = jiffies + (HZ * poll_msec) /1000; -+ add_timer(&timer); -+ } -+ -+ up( &mem_ctls_mutex ); -+} -+ -+ -+int __init bluesmoke_mc_init(void) -+{ -+ int rc = -ENODEV; -+ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ printk( KERN_INFO "MC: " __FILE__ " version " BLUESMOKE_MC_VER "\n" ); -+ -+ memset( mcis, 0, sizeof(mcis) ); -+ -+ check_mc(0); -+ -+#ifdef CONFIG_PROC_FS -+ if ( NULL == (proc_mc = proc_mkdir( MC_PROC_DIR, &proc_root ) ) ) { -+ goto FINISHED; -+ } -+#endif /* CONFIG_PROC_FS */ -+ -+#ifdef CONFIG_SYSCTL -+ mc_sysctl_header = register_sysctl_table(mc_root_table, 1); -+#endif /* CONFIG_SYSCTL */ -+ -+ rc = 0; -+ -+ FINISHED: -+ return rc; -+} -+ -+ -+static void __exit bluesmoke_mc_exit(void) -+{ -+ debugf0( "MC: " __FILE__ ": %s()\n", __func__ ); -+ -+#ifdef CONFIG_PROC_FS -+ if ( proc_mc ) remove_proc_entry( MC_PROC_DIR, &proc_root ); -+#endif /* CONFIG_PROC_FS */ -+ -+#ifdef CONFIG_SYSCTL -+ if (mc_sysctl_header) { -+ unregister_sysctl_table(mc_sysctl_header); -+ mc_sysctl_header = NULL; -+ } -+#endif /* CONFIG_SYSCTL */ -+} -+ -+ -+module_init(bluesmoke_mc_init); -+module_exit(bluesmoke_mc_exit); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n" -+ "Based on.work by Dan Hollis et al"); -+MODULE_DESCRIPTION("Core library routines for MC reporting"); -+ -+#if SCRUB -+MODULE_PARM(mc_scrub, "i"); -+MODULE_PARM_DESC(mc_scrub, "Force MC scrubbing: 0=off 1=on"); -+#endif /* SCRUB */ -+MODULE_PARM(panic_on_ue, "i"); -+MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); -+MODULE_PARM(log_ue, "i"); -+MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on"); -+MODULE_PARM(log_ce, "i"); -+MODULE_PARM_DESC(log_ce, "Log correctable error to console: 0=off 1=on"); -+MODULE_PARM(poll_msec, "i"); -+MODULE_PARM_DESC(poll_msec, "Polling period in milliseconds"); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_mc.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/bluesmoke_mc.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/bluesmoke_mc.h 2004-12-17 12:46:23.000000000 -0500 -@@ -0,0 +1,435 @@ -+/* -+ * MC kernel module -+ * (C) 2003 Linux Networx (http://lnxi.com) -+ * This file may be distributed under the terms of the -+ * GNU General Public License. -+ * -+ * Written by Thayne Harbaugh -+ * Based on work by Dan Hollis <goemon at anime dot net> and others. -+ * http://www.anime.net/~goemon/linux-ecc/ -+ * -+ * $Id: bluesmoke_mc.h,v 1.4 2004/11/10 01:12:36 thayne Exp $ -+ * -+ */ -+ -+ -+#ifndef _BLUESMOKE_MC_H_ -+#define _BLUESMOKE_MC_H_ -+ -+ -+#include <linux/pci.h> -+#include <linux/time.h> -+ -+ -+#define BLUESMOKE_MC_VER "MC $Revision: 1.4 $" -+#define MAX_MC_DEVICES 4 /* FIXME - this should be dynamic */ -+#define BLUESMOKE_MC_LABEL_LEN 31 -+#define MC_PROC_NAME_MAX_LEN 7 -+ -+#if PAGE_SHIFT < 20 -+#define PAGES_TO_MiB( pages ) ( ( pages ) >> ( 20 - PAGE_SHIFT ) ) -+#else /* PAGE_SHIFT > 20 */ -+#define PAGES_TO_MiB( pages ) ( ( pages ) << ( PAGE_SHIFT - 20 ) ) -+#endif -+ -+#if defined(CONFIG_BLUESMOKE_DEBUG) \ -+ && defined(CONFIG_BLUESMOKE_DEBUG_VERBOSE) -+#if 0 <= CONFIG_BLUESMOKE_DEBUG_VERBOSE -+#define debugf0( ... ) printk( __VA_ARGS__ ) -+#else -+#define debugf0( ... ) -+#endif -+ -+#if 1 <= CONFIG_BLUESMOKE_DEBUG_VERBOSE -+#define debugf1( ... ) printk( __VA_ARGS__ ) -+#else -+#define debugf1( ... ) -+#endif -+ -+#if 2 <= CONFIG_BLUESMOKE_DEBUG_VERBOSE -+#define debugf2( ... ) printk( __VA_ARGS__ ) -+#else -+#define debugf2( ... ) -+#endif -+ -+#if 3 <= CONFIG_BLUESMOKE_DEBUG_VERBOSE -+#define debugf3( ... ) printk( __VA_ARGS__ ) -+#else -+#define debugf3( ... ) -+#endif -+ -+#else /* !CONFIG_BLUESMOKE_DEBUG || !CONFIG_BLUESMOKE_DEBUG_VERBOSE */ -+ -+#define debugf0( ... ) -+#define debugf1( ... ) -+#define debugf2( ... ) -+#define debugf3( ... ) -+#endif /* !CONFIG_BLUESMOKE_DEBUG || !CONFIG_BLUESMOKE_DEBUG_VERBOSE */ -+ -+ -+#define bs_xstr(s) bs_str(s) -+#define bs_str(s) #s -+#define BS_MOD_STR bs_xstr(KBUILD_BASENAME) -+ -+#define BIT(x) (1 << (x)) -+ -+#define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, PCI_DEVICE_ID_ ## vend ## _ ## dev -+ -+/* memory devices */ -+enum dev_type { -+ DEV_UNKNOWN = 0, -+ DEV_X1, -+ DEV_X2, -+ DEV_X4, -+ DEV_X8, -+ DEV_X16, -+ DEV_X32, /* Do these parts exist? */ -+ DEV_X64 /* Do these parts exist? */ -+}; -+ -+#define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN) -+#define DEV_FLAG_X1 BIT(DEV_X1) -+#define DEV_FLAG_X2 BIT(DEV_X2) -+#define DEV_FLAG_X4 BIT(DEV_X4) -+#define DEV_FLAG_X8 BIT(DEV_X8) -+#define DEV_FLAG_X16 BIT(DEV_X16) -+#define DEV_FLAG_X32 BIT(DEV_X32) -+#define DEV_FLAG_X64 BIT(DEV_X64) -+ -+/* memory types */ -+enum mem_type { -+ MEM_EMPTY = 0, /* Empty csrow */ -+ MEM_RESERVED, /* Reserved csrow type */ -+ MEM_UNKNOWN, /* Unknown csrow type */ -+ MEM_FPM, /* Fast page mode */ -+ MEM_EDO, /* Extended data out */ -+ MEM_BEDO, /* Burst Extended data out */ -+ MEM_SDR, /* Single data rate SDRAM */ -+ MEM_RDR, /* Registered single data rate SDRAM */ -+ MEM_DDR, /* Double data rate SDRAM */ -+ MEM_RDDR, /* Registered Double data rate SDRAM */ -+ MEM_RMBS /* Rambus DRAM */ -+}; -+ -+#define MEM_FLAG_EMPTY BIT(MEM_EMPTY) -+#define MEM_FLAG_RESERVED BIT(MEM_RESERVED) -+#define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN) -+#define MEM_FLAG_FPM BIT(MEM_FPM) -+#define MEM_FLAG_EDO BIT(MEM_EDO) -+#define MEM_FLAG_BEDO BIT(MEM_BEDO) -+#define MEM_FLAG_SDR BIT(MEM_SDR) -+#define MEM_FLAG_RDR BIT(MEM_RDR) -+#define MEM_FLAG_DDR BIT(MEM_DDR) -+#define MEM_FLAG_RDDR BIT(MEM_RDDR) -+#define MEM_FLAG_RMBS BIT(MEM_RMBS) -+ -+ -+/* chipset Error Detection and Correction capabilities and mode */ -+enum edac_type { -+ EDAC_UNKNOWN = 0,/* Unknown if ECC is available */ -+ EDAC_NONE, /* Doesnt support ECC */ -+ EDAC_RESERVED, /* Reserved ECC type */ -+ EDAC_PARITY, /* Detects parity errors */ -+ EDAC_EC, /* Error Checking - no correction */ -+ EDAC_SECDED, /* Single bit error correction, Double detection */ -+ EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */ -+ EDAC_S4ECD4ED, /* Chipkill x4 devices */ -+ EDAC_S8ECD8ED, /* Chipkill x8 devices */ -+ EDAC_S16ECD16ED,/* Chipkill x16 devices */ -+}; -+ -+#define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN) -+#define EDAC_FLAG_NONE BIT(EDAC_NONE) -+#define EDAC_FLAG_PARITY BIT(EDAC_PARITY) -+#define EDAC_FLAG_EC BIT(EDAC_EC) -+#define EDAC_FLAG_SECDED BIT(EDAC_SECDED) -+#define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED) -+#define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED) -+#define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED) -+#define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED) -+ -+ -+/* scrubbing capabilities */ -+enum scrub_type { -+ SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */ -+ SCRUB_NONE, /* No scrubber */ -+ SCRUB_SW_PROG, /* Software progressive (sequential) scrubbing */ -+ SCRUB_SW_SRC, /* Software scrub only errors */ -+ SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */ -+ SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */ -+ SCRUB_HW_PROG, /* Hardware progressive (sequential) scrubbing */ -+ SCRUB_HW_SRC, /* Hardware scrub only errors */ -+ SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */ -+ SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */ -+}; -+ -+#define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG) -+#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC_CORR) -+#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC_CORR) -+#define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE) -+#define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG) -+#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC_CORR) -+#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC_CORR) -+#define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE) -+ -+ -+/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */ -+ -+ -+/* -+ * There are several things to be aware of that aren't at all obvious: -+ * -+ * -+ * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc.. -+ * -+ * These are some of the many terms that are thrown about that don't always -+ * mean what people think they mean (Inconceivable!). In the interest of -+ * creating a common ground for discussion, terms and their definitions -+ * will be established. -+ * -+ * Memory devices: The individual chip on a memory stick. These devices -+ * commonly output 4 and 8 bits each. Grouping several -+ * of these in parallel provides 64 bits which is common -+ * for a memory stick. -+ * -+ * Memory Stick: A printed circuit board that agregates multiple -+ * memory devices in parallel. This is the atomic -+ * memory component that is purchaseable by Joe consumer -+ * and loaded int a memory socket. -+ * -+ * Socket: A physical connector on the motherboard that accepts -+ * a single memory stick. -+ * -+ * Channel: Set of memory devices on a memory stick that must be -+ * grouped in parallel with one or more additional -+ * channels from other memory sticks. This parallel -+ * grouping of the output from multiple channels are -+ * necessary for the smallest granularity of memory access. -+ * Some memory controllers are capable of single channel - -+ * which means that memory sticks can be loaded -+ * individually. Other memory controllers are only -+ * capable of dual channel - which means that memory -+ * sticks must be loaded as pairs (see "socket set"). -+ * -+ * Chip-select row: All of the memory devices that are selected together. -+ * for a single, minimum grain of memory access. -+ * This selects all of the parallel memory devices across -+ * all of the parallel channels. Common chip-select rows -+ * for single channel are 64 bits, for dual channel 128 -+ * bits. -+ * -+ * Double-sided stick: A double-sided stick has two chip-select rows which -+ * access different sets of memory devices. The two -+ * rows cannot be accessed concurrently. "Double-sided" -+ * is irrespective of the memory devices being mounted -+ * on both sides of the memory stick. -+ * -+ * Socket set: All of the memory sticks that are required for for -+ * a single memory access or all of the memory sticks -+ * spanned by a chip-select row. A single socket set -+ * has two chip-select rows if double-sided sticks are -+ * used. -+ * -+ * Bank: This term is avoided because it is unclear when -+ * needing to distinguish between chip-select rows and -+ * socket sets. -+ * -+ * -+ * Controller pages: -+ * -+ * Physical pages: -+ * -+ * Virtual pages: -+ * -+ * -+ * STRUCTURE ORGANIZATION AND CHOICES -+ * -+ * -+ * -+ * PS - I enjoyed writing all that about as much as you enjoyed reading it. -+ */ -+ -+ -+struct channel_info { -+ int chan_idx; /* channel index */ -+ u32 ce_count; /* Correctable Errors for this CHANNEL */ -+ char label[BLUESMOKE_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ -+ struct csrow_info *csrow;/* the parent */ -+}; -+ -+ -+struct csrow_info { -+ unsigned long first_page;/* first page number in dimm */ -+ unsigned long last_page;/* last page number in dimm */ -+ unsigned long page_mask;/* used for interleaving - 0UL for non intlv */ -+ u32 nr_pages; /* number of pages in csrow */ -+ u32 grain; /* granularity of reported error in bytes */ -+ int csrow_idx; /* the chip-select row */ -+ enum dev_type dtype; /* memory device type */ -+ u32 ue_count; /* Uncorrectable Errors for this csrow */ -+ u32 ce_count; /* Correctable Errors for this csrow */ -+ enum mem_type mtype; /* memory csrow type */ -+ enum edac_type edac_mode;/* EDAC mode for this csrow */ -+ struct mem_ctl_info *mci;/* the parent */ -+ /* FIXME the number of CHANNELs might need to become dynamic */ -+ u32 nr_channels; -+ struct channel_info *channels; -+}; -+ -+ -+typedef void *pvt_info_t; -+ -+ -+struct mem_ctl_info { -+ unsigned long mtype_cap;/* memory types supported by mc */ -+ unsigned long edac_ctl_cap;/* Memory controller EDAC capabilities */ -+ unsigned long edac_cap; /* configuration capabilities - this is -+ closely related to edac_ctl_cap. The -+ difference is that the controller -+ may be capable of s4ecd4ed which would -+ be listed in edac_ctl_cap, but if -+ channels aren't capable of s4ecd4ed then the -+ edac_cap would not have that capability. */ -+ unsigned long scrub_cap;/* chipset scrub capabilities */ -+ enum scrub_type scrub_mode;/* current scrub mode */ -+ /* pointer to edac checking routine */ -+ void (*edac_check)(struct mem_ctl_info *mci); -+ /* pointer to error clear routine */ -+ void (*clear_err)(struct mem_ctl_info *mci); -+ /* -+ * Remaps memory pages: controller pages to physical pages. -+ * For most MC's, this will be NULL. -+ */ -+ /* FIXME - why not send the phys page to begin with? */ -+ unsigned long (*ctl_page_to_phys)(struct mem_ctl_info *mci, -+ unsigned long page); -+ int mc_idx; -+ int nr_csrows; -+ struct csrow_info *csrows; -+ /* -+ * FIXME - what about controllers on other busses? - IDs must be -+ * unique. pdev pointer should be sufficiently unique, but -+ * BUS:SLOT.FUNC numbers may not be unique. -+ */ -+ struct pci_dev *pdev; -+ const char *mod_name; -+ const char *mod_ver; -+ const char *ctl_name; -+ char proc_name[MC_PROC_NAME_MAX_LEN + 1]; -+#ifdef CONFIG_PROC_FS -+ struct proc_dir_entry *proc_ent; -+#endif -+ pvt_info_t pvt_info; -+ int scrub_needed; -+ u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */ -+ u32 ce_noinfo_count; /* Correctable Errors w/o info */ -+ u32 ue_count; /* Total Uncorrectable Errors for this MC */ -+ u32 ce_count; /* Total Correctable Errors for this MC */ -+ struct timeval tv; /* time when counters were zeroed */ -+}; -+ -+ -+/* write all or some bits in a byte-register*/ -+static inline void pci_write_bits8( struct pci_dev *pdev, int offset, -+ u8 value, u8 mask ) -+{ -+ if ( mask != 0xff ){ -+ u8 buf; -+ pci_read_config_byte( pdev, offset, &buf); -+ value &= mask; -+ buf &= ~mask; -+ value |= buf; -+ } -+ pci_write_config_byte( pdev, offset, value ); -+} -+ -+ -+/* write all or some bits in a word-register*/ -+static inline void pci_write_bits16( struct pci_dev *pdev, int offset, -+ u16 value, u16 mask ) -+{ -+ if ( mask != 0xffff ){ -+ u16 buf; -+ pci_read_config_word( pdev, offset, &buf ); -+ value &= mask; -+ buf &= ~mask; -+ value |= buf; -+ } -+ pci_write_config_word( pdev, offset, value); -+} -+ -+ -+/* write all or some bits in a dword-register*/ -+static inline void pci_write_bits32( struct pci_dev *pdev, int offset, -+ u32 value, u32 mask ) -+{ -+ if ( mask != 0xffff ){ -+ u32 buf; -+ pci_read_config_dword( pdev, offset, &buf ); -+ value &= mask; -+ buf &= ~mask; -+ value |= buf; -+ } -+ pci_write_config_dword( pdev, offset, value ); -+} -+ -+ -+#if CONFIG_BLUESMOKE_DEBUG -+void bluesmoke_mc_dump_channel( struct channel_info *chan ); -+void bluesmoke_mc_dump_mci( struct mem_ctl_info *mci ); -+void bluesmoke_mc_dump_csrow( struct csrow_info *csrow ); -+#endif /* CONFIG_BLUESMOKE_DEBUG */ -+ -+extern int bluesmoke_mc_add_mc(struct mem_ctl_info *mci); -+extern int bluesmoke_mc_del_mc(struct mem_ctl_info *mci); -+ -+extern int bluesmoke_mc_find_csrow_by_page( struct mem_ctl_info *mci, -+ unsigned long page ); -+ -+extern struct mem_ctl_info *bluesmoke_mc_find_mci_by_pdev(struct pci_dev *pdev ); -+ -+extern void bluesmoke_mc_scrub_block(unsigned long page, -+ unsigned long offset, -+ u32 size); -+ -+/* -+ * The no info errors are used when error overflows are reported. -+ * There are a limited number of error logging registers that can -+ * be exausted. When all registers are exhausted and an additional -+ * error occurs then an error overflow register records that an -+ * error occured and the type of error, but doesn't have any -+ * further information. The ce/ue versions make for cleaner -+ * reporting logic and function interface - reduces conditional -+ * statement clutter and extra function arguments. -+ */ -+extern void bluesmoke_mc_handle_ce(struct mem_ctl_info *mci, -+ unsigned long page_frame_number, -+ unsigned long offset_in_page, -+ unsigned long syndrome, -+ int row, -+ int channel, -+ const char *msg); -+ -+extern void bluesmoke_mc_handle_ce_no_info(struct mem_ctl_info *mci, -+ const char *msg); -+ -+extern void bluesmoke_mc_handle_ue(struct mem_ctl_info *mci, -+ unsigned long page_frame_number, -+ unsigned long offset_in_page, -+ int row, -+ const char *msg); -+ -+extern void bluesmoke_mc_handle_ue_no_info(struct mem_ctl_info *mci, -+ const char *msg); -+ -+/* -+ * This kmalloc's and initializes all the structures. -+ * Can't be used if all structures don't have the same lifetime. -+ */ -+extern struct mem_ctl_info *bluesmoke_mc_init_structs(u32 sz_pvt, -+ u32 nr_csrows, -+ u32 nr_chans); -+ -+#include "compatmac.h" -+ -+#endif /* _BLUESMOKE_MC_H_ */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/compatmac.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/bluesmoke/compatmac.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/bluesmoke/compatmac.h 2004-12-17 12:46:23.000000000 -0500 -@@ -0,0 +1,63 @@ -+#ifndef __LINUX_BLUESMOKE_COMPATMAC_H__ -+#define __LINUX_BLUESMOKE_COMPATMAC_H__ -+ -+#include <linux/version.h> -+#include <linux/pci.h> -+ -+/* -+ * Assume that if the kernel isn't 2.6.x then it is a 2.4.x - this will -+ * obviously break 2.5.x but we don't care! -+ */ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) -+ -+#define pci_name(pci_dev) ((pci_dev)->slot_name) -+ -+#ifndef pci_pretty_name -+# define pci_pretty_name(pdev) "" -+#endif -+ -+#define try_module_get(m) try_inc_mod_count(m) -+#define __module_get(m) do { if (!try_inc_mod_count(m)) BUG(); } while(0) -+#define module_put(m) do { if (m) __MOD_DEC_USE_COUNT((struct module *)(m)); } while(0) -+#define module_refcount(m) (MOD_IN_USE) -+#define set_module_owner(x) do { x->owner = THIS_MODULE; } while(0) -+ -+ -+/* -+ * The real pci_scan_single_device() in the 2.6.x series -+ * has a few more features. It calls pci_name_device() and -+ * pci_fixup_device(). Unfortunately neither of those are -+ * exported symbols. The pci_name is a nicety that we can -+ * live without. As far as pci quirks, if your device has them -+ * then you better just fix them in your driver rather than -+ * trying to call some generic kernel code. -+ */ -+static inline struct pci_dev *pci_scan_single_device( struct pci_bus *bus, -+ int devfn ) -+{ -+ struct pci_dev tmp_pdev, *pdev = NULL; -+ -+ memset( &tmp_pdev, 0, sizeof(tmp_pdev) ); -+ tmp_pdev.bus = bus; -+ tmp_pdev.devfn = devfn; -+ if ( (pdev = pci_scan_device( &tmp_pdev ) ) ) { -+ pci_insert_device( pdev, bus ); -+ } -+ return pdev; -+} -+ -+ -+#else -+ -+#ifndef pci_pretty_name -+# define pci_pretty_name(pdev) ((pdev)->pretty_name) -+#endif -+ -+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) */ -+ -+#ifndef in_atomic -+#define in_atomic() 0 -+#define down_trylock(mtx) 1 -+#endif /* in_atomic */ -+ -+#endif /* __LINUX_BLUESMOKE_COMPATMAC_H__ */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/pci_ids.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/linux/pci_ids.h 2004-11-11 10:28:34.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/pci_ids.h 2004-12-17 12:45:23.000000000 -0500 -@@ -454,6 +454,10 @@ - #define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060 - - #define PCI_VENDOR_ID_AMD 0x1022 -+#define PCI_DEVICE_ID_AMD_OPT_0_HT 0x1100 -+#define PCI_DEVICE_ID_AMD_OPT_1_ADDRMAP 0x1101 -+#define PCI_DEVICE_ID_AMD_OPT_2_MEMCTL 0x1102 -+#define PCI_DEVICE_ID_AMD_OPT_3_MISCCTL 0x1103 - #define PCI_DEVICE_ID_AMD_LANCE 0x2000 - #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 - #define PCI_DEVICE_ID_AMD_SCSI 0x2020 -@@ -2078,6 +2082,22 @@ - #define PCI_DEVICE_ID_INTEL_82801EB_7 0x24d7 - #define PCI_DEVICE_ID_INTEL_82801EB_11 0x24db - #define PCI_DEVICE_ID_INTEL_82801EB_13 0x24dd -+#define PCI_DEVICE_ID_INTEL_7205_0 0x255d -+#define PCI_DEVICE_ID_INTEL_7205_1_ERR 0x2551 -+#define PCI_DEVICE_ID_INTEL_7500_0 0x2540 -+#define PCI_DEVICE_ID_INTEL_7500_1_ERR 0x2541 -+#define PCI_DEVICE_ID_INTEL_7501_0 0x254c -+#define PCI_DEVICE_ID_INTEL_7501_1_ERR 0x2541 -+#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 -+#define PCI_DEVICE_ID_INTEL_7505_1_ERR 0x2551 -+#define PCI_DEVICE_ID_INTEL_7520_0 0x3590 -+#define PCI_DEVICE_ID_INTEL_7520_1_ERR 0x3591 -+#define PCI_DEVICE_ID_INTEL_ICH5R_D30_F0 0x244e -+#define PCI_DEVICE_ID_INTEL_ICH5R_D31_F0 0x24D0 -+#define PCI_DEVICE_ID_INTEL_82875_0 0x2578 -+#define PCI_DEVICE_ID_INTEL_82875_6 0x257e -+#define PCI_DEVICE_ID_INTEL_7520_0 0x3590 -+#define PCI_DEVICE_ID_INTEL_7520_1_ERR 0x3591 - #define PCI_DEVICE_ID_INTEL_ESB_0 0x25a0 - #define PCI_DEVICE_ID_INTEL_ESB_1 0x25a1 - #define PCI_DEVICE_ID_INTEL_ESB_2 0x25a2 diff --git a/lustre/kernel_patches/patches/brk-locked-2.6-suse-lnxi.patch b/lustre/kernel_patches/patches/brk-locked-2.6-suse-lnxi.patch deleted file mode 100644 index 950b6eba85..0000000000 --- a/lustre/kernel_patches/patches/brk-locked-2.6-suse-lnxi.patch +++ /dev/null @@ -1,219 +0,0 @@ -diff -urN clean/arch/mips/kernel/irixelf.c linux-2.6.5-SLES9_SP1_BRANCH_91/arch/mips/kernel/irixelf.c ---- clean/arch/mips/kernel/irixelf.c 2005-01-09 12:25:26.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_91/arch/mips/kernel/irixelf.c 2005-01-19 19:29:59.909824951 -0500 -@@ -128,7 +128,7 @@ - end = PAGE_ALIGN(end); - if (end <= start) - return; -- do_brk(start, end - start); -+ do_brk_locked(start, end - start); - } - - -diff -urN clean/arch/x86_64/ia32/ia32_aout.c linux-2.6.5-SLES9_SP1_BRANCH_91/arch/x86_64/ia32/ia32_aout.c ---- clean/arch/x86_64/ia32/ia32_aout.c 2005-01-09 12:25:33.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_91/arch/x86_64/ia32/ia32_aout.c 2005-01-19 19:30:50.255145196 -0500 -@@ -114,7 +114,7 @@ - start = PAGE_ALIGN(start); - end = PAGE_ALIGN(end); - if (end > start) { -- unsigned long addr = do_brk(start, end - start); -+ unsigned long addr = do_brk_locked(start, end - start); - if (BAD_ADDR(addr)) - return addr; - } -@@ -327,7 +327,7 @@ - pos = 32; - map_size = ex.a_text+ex.a_data; - -- error = do_brk(text_addr & PAGE_MASK, map_size); -+ error = do_brk_locked(text_addr & PAGE_MASK, map_size); - if (error != (text_addr & PAGE_MASK)) { - send_sig(SIGKILL, current, 0); - return error; -@@ -363,7 +363,7 @@ - - if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { - loff_t pos = fd_offset; -- do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); -+ do_brk_locked(N_TXTADDR(ex), ex.a_text+ex.a_data); - bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex), - ex.a_text+ex.a_data, &pos); - flush_icache_range((unsigned long) N_TXTADDR(ex), -@@ -476,7 +476,7 @@ - } - #endif - -- do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); -+ do_brk_locked(start_addr, ex.a_text + ex.a_data + ex.a_bss); - - file->f_op->read(file, (char *)start_addr, - ex.a_text + ex.a_data, &pos); -@@ -500,7 +500,7 @@ - len = PAGE_ALIGN(ex.a_text + ex.a_data); - bss = ex.a_text + ex.a_data + ex.a_bss; - if (bss > len) { -- error = do_brk(start_addr + len, bss - len); -+ error = do_brk_locked(start_addr + len, bss - len); - retval = error; - if (error != start_addr + len) - goto out; -diff -urN clean/fs/binfmt_aout.c linux-2.6.5-SLES9_SP1_BRANCH_91/fs/binfmt_aout.c ---- clean/fs/binfmt_aout.c 2005-01-09 12:25:33.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_91/fs/binfmt_aout.c 2005-01-19 19:31:40.480490745 -0500 -@@ -51,7 +51,7 @@ - start = PAGE_ALIGN(start); - end = PAGE_ALIGN(end); - if (end > start) { -- unsigned long addr = do_brk(start, end - start); -+ unsigned long addr = do_brk_locked(start, end - start); - if (BAD_ADDR(addr)) - return addr; - } -@@ -323,10 +323,10 @@ - loff_t pos = fd_offset; - /* Fuck me plenty... */ - /* <AOL></AOL> */ -- error = do_brk(N_TXTADDR(ex), ex.a_text); -+ error = do_brk_locked(N_TXTADDR(ex), ex.a_text); - bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex), - ex.a_text, &pos); -- error = do_brk(N_DATADDR(ex), ex.a_data); -+ error = do_brk_locked(N_DATADDR(ex), ex.a_data); - bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex), - ex.a_data, &pos); - goto beyond_if; -@@ -347,7 +347,7 @@ - map_size = ex.a_text+ex.a_data; - #endif - -- error = do_brk(text_addr & PAGE_MASK, map_size); -+ error = do_brk_locked(text_addr & PAGE_MASK, map_size); - if (error != (text_addr & PAGE_MASK)) { - send_sig(SIGKILL, current, 0); - return error; -@@ -381,7 +381,7 @@ - - if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { - loff_t pos = fd_offset; -- do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); -+ do_brk_locked(N_TXTADDR(ex), ex.a_text+ex.a_data); - bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex), - ex.a_text+ex.a_data, &pos); - flush_icache_range((unsigned long) N_TXTADDR(ex), -@@ -486,7 +486,7 @@ - error_time = jiffies; - } - -- do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); -+ do_brk_locked(start_addr, ex.a_text + ex.a_data + ex.a_bss); - - file->f_op->read(file, (char *)start_addr, - ex.a_text + ex.a_data, &pos); -@@ -510,7 +510,7 @@ - len = PAGE_ALIGN(ex.a_text + ex.a_data); - bss = ex.a_text + ex.a_data + ex.a_bss; - if (bss > len) { -- error = do_brk(start_addr + len, bss - len); -+ error = do_brk_locked(start_addr + len, bss - len); - retval = error; - if (error != start_addr + len) - goto out; -diff -urN clean/fs/binfmt_elf.c linux-2.6.5-SLES9_SP1_BRANCH_91/fs/binfmt_elf.c ---- clean/fs/binfmt_elf.c 2005-01-09 12:25:33.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_91/fs/binfmt_elf.c 2005-01-19 19:29:59.914823791 -0500 -@@ -88,7 +88,7 @@ - start = ELF_PAGEALIGN(start); - end = ELF_PAGEALIGN(end); - if (end > start) { -- unsigned long addr = do_brk(start, end - start); -+ unsigned long addr = do_brk_locked(start, end - start); - if (BAD_ADDR(addr)) - return addr; - } -@@ -406,7 +406,7 @@ - - /* Map the last of the bss segment */ - if (last_bss > elf_bss) { -- error = do_brk(elf_bss, last_bss - elf_bss); -+ error = do_brk_locked(elf_bss, last_bss - elf_bss); - if (BAD_ADDR(error)) - goto out_close; - } -@@ -446,7 +446,7 @@ - goto out; - } - -- do_brk(0, text_data); -+ do_brk_locked(0, text_data); - if (!interpreter->f_op || !interpreter->f_op->read) - goto out; - if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0) -@@ -454,7 +454,7 @@ - flush_icache_range((unsigned long)addr, - (unsigned long)addr + text_data); - -- do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1), -+ do_brk_locked(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1), - interp_ex->a_bss); - elf_entry = interp_ex->a_entry; - -@@ -1006,7 +1006,7 @@ - len = ELF_PAGESTART(elf_phdata->p_filesz + elf_phdata->p_vaddr + ELF_MIN_ALIGN - 1); - bss = elf_phdata->p_memsz + elf_phdata->p_vaddr; - if (bss > len) -- do_brk(len, bss - len); -+ do_brk_locked(len, bss - len); - error = 0; - - out_free_ph: -diff -urN clean/include/linux/mm.h linux-2.6.5-SLES9_SP1_BRANCH_91/include/linux/mm.h ---- clean/include/linux/mm.h 2005-01-09 12:25:34.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_91/include/linux/mm.h 2005-01-19 19:29:59.915823559 -0500 -@@ -821,6 +821,7 @@ - extern int do_munmap(struct mm_struct *, unsigned long, size_t); - - extern unsigned long do_brk(unsigned long, unsigned long); -+extern unsigned long do_brk_locked(unsigned long, unsigned long); - - /* vma merging helpers */ - static inline void -diff -urN clean/mm/mmap.c linux-2.6.5-SLES9_SP1_BRANCH_91/mm/mmap.c ---- clean/mm/mmap.c 2005-01-09 12:25:34.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_91/mm/mmap.c 2005-01-19 19:29:59.917823095 -0500 -@@ -1658,6 +1658,20 @@ - - EXPORT_SYMBOL(do_brk); - -+/* locking version of do_brk. */ -+unsigned long do_brk_locked(unsigned long addr, unsigned long len) -+{ -+ unsigned long ret; -+ -+ down_write(¤t->mm->mmap_sem); -+ ret = do_brk(addr, len); -+ up_write(¤t->mm->mmap_sem); -+ -+ return ret; -+} -+ -+EXPORT_SYMBOL(do_brk_locked); -+ - /* Release all mmaps. */ - void exit_mmap(struct mm_struct *mm) - { -diff -urN clean/mm/nommu.c linux-2.6.5-SLES9_SP1_BRANCH_91/mm/nommu.c ---- clean/mm/nommu.c 2005-01-09 12:25:27.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_91/mm/nommu.c 2005-01-19 19:29:59.918822864 -0500 -@@ -227,6 +227,11 @@ - return audit_lresult(mm->brk = brk); - } - -+unsigned long do_brk_locked(unsigned long addr, unsigned long len) -+{ -+ return -ENOMEM; -+} -+ - /* - * Combine the mmap "prot" and "flags" argument into one "vm_flags" used - * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits diff --git a/lustre/kernel_patches/patches/compile-fixes-2.6.9-rhel4-22.patch b/lustre/kernel_patches/patches/compile-fixes-2.6.9-rhel4-22.patch deleted file mode 100644 index 98b8715e92..0000000000 --- a/lustre/kernel_patches/patches/compile-fixes-2.6.9-rhel4-22.patch +++ /dev/null @@ -1,76 +0,0 @@ ---- linux-2.6.9/arch/i386/kernel/apic.c.orig 2005-08-04 08:11:13.000000000 -0400 -+++ linux-2.6.9/arch/i386/kernel/apic.c 2005-08-04 08:27:04.000000000 -0400 -@@ -1125,8 +1125,10 @@ asmlinkage void smp_local_timer_interrup - - void smp_apic_timer_interrupt(struct pt_regs regs) - { -+#ifdef CONFIG_4KSTACKS - union irq_ctx *curctx; - union irq_ctx *irqctx; -+#endif - int cpu; - u32 *isp; - -@@ -1147,11 +1149,11 @@ void smp_apic_timer_interrupt(struct pt_ - * interrupt lock, which is the WrongThing (tm) to do. - */ - irq_enter(); -+ -+#ifdef CONFIG_4KSTACKS - curctx = (union irq_ctx *) current_thread_info(); - irqctx = hardirq_ctx[cpu]; -- if (curctx == irqctx) { -- smp_local_timer_interrupt(®s); -- } else { -+ if (curctx != irqctx) { - /* build the stack frame on the IRQ stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); - irqctx->tinfo.task = curctx->tinfo.task; -@@ -1167,7 +1169,10 @@ void smp_apic_timer_interrupt(struct pt_ - : : "b"(isp) - : "memory", "cc", "edx", "ecx" - ); -- } -+ } else -+#endif -+ smp_local_timer_interrupt(®s); -+ - irq_exit(); - } - ---- linux-2.6.9/include/asm-i386/crashdump.h.orig 2005-08-04 08:11:22.000000000 -0400 -+++ linux-2.6.9/include/asm-i386/crashdump.h 2005-08-04 08:27:04.000000000 -0400 -@@ -48,12 +48,14 @@ extern unsigned long next_ram_page (unsi - - static inline void platform_init_stack(void **stackptr) - { -+#ifdef CONFIG_4KSTACKS - *stackptr = (void *)kmalloc(sizeof(union irq_ctx), GFP_KERNEL); - if (*stackptr) - memset(*stackptr, 0, sizeof(union irq_ctx)); - else - printk(KERN_WARNING - "crashdump: unable to allocate separate stack\n"); -+#endif - } - - typedef asmlinkage void (*crashdump_func_t)(struct pt_regs *, void *); -@@ -62,6 +64,7 @@ static inline void platform_start_crashd - crashdump_func_t dumpfunc, - struct pt_regs *regs) - { -+#ifdef CONFIG_4KSTACKS - u32 *dsp; - union irq_ctx * curctx; - union irq_ctx * dumpctx; -@@ -90,6 +93,10 @@ static inline void platform_start_crashd - : "memory", "cc", "edx", "ecx" - ); - } -+#else -+ dumpfunc(regs, NULL); -+#endif -+ - } - - #define platform_cleanup_stack(stackptr) \ diff --git a/lustre/kernel_patches/patches/elevator-cfq.patch b/lustre/kernel_patches/patches/elevator-cfq.patch deleted file mode 100644 index a13194ef64..0000000000 --- a/lustre/kernel_patches/patches/elevator-cfq.patch +++ /dev/null @@ -1,20 +0,0 @@ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/block/ll_rw_blk.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/block/ll_rw_blk.c 2005-06-28 01:53:39.000000000 -0600 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/block/ll_rw_blk.c 2005-06-28 01:58:45.000000000 -0600 -@@ -1380,12 +1380,12 @@ - static int __make_request(request_queue_t *, struct bio *); - - static elevator_t *chosen_elevator = --#if defined(CONFIG_IOSCHED_AS) -+#if defined(CONFIG_IOSCHED_CFQ) -+ &iosched_cfq; -+#elif defined(CONFIG_IOSCHED_AS) - &iosched_as; - #elif defined(CONFIG_IOSCHED_DEADLINE) - &iosched_deadline; --#elif defined(CONFIG_IOSCHED_CFQ) -- &iosched_cfq; - #elif defined(CONFIG_IOSCHED_NOOP) - &elevator_noop; - #else diff --git a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6-sles10.patch b/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6-sles10.patch deleted file mode 100644 index 3724fd9a68..0000000000 --- a/lustre/kernel_patches/patches/ext3-check-jbd-errors-2.6-sles10.patch +++ /dev/null @@ -1,83 +0,0 @@ -Index: linux-stage/include/linux/ext3_fs.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h -+++ linux-stage/include/linux/ext3_fs.h -@@ -921,6 +921,7 @@ extern unsigned ext3_list_backups(struct - unsigned *five, unsigned *seven); - - /* super.c */ -+extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int); - extern void ext3_error (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); - extern void __ext3_std_error (struct super_block *, const char *, int); -Index: linux-stage/fs/ext3/super.c -=================================================================== ---- linux-stage.orig/fs/ext3/super.c -+++ linux-stage/fs/ext3/super.c -@@ -47,9 +47,6 @@ static int ext3_load_journal(struct supe - unsigned long journal_devnum); - static int ext3_create_journal(struct super_block *, struct ext3_super_block *, - int); --static void ext3_commit_super (struct super_block * sb, -- struct ext3_super_block * es, -- int sync); - static void ext3_mark_recovery_complete(struct super_block * sb, - struct ext3_super_block * es); - static void ext3_clear_journal_err(struct super_block * sb, -@@ -2175,7 +2172,7 @@ static int ext3_create_journal(struct su - return 0; - } - --static void ext3_commit_super (struct super_block * sb, -+void ext3_commit_super (struct super_block * sb, - struct ext3_super_block * es, - int sync) - { -Index: linux-stage/fs/ext3/namei.c -=================================================================== ---- linux-stage.orig/fs/ext3/namei.c -+++ linux-stage/fs/ext3/namei.c -@@ -1591,7 +1591,7 @@ static int ext3_delete_entry (handle_t * - struct buffer_head * bh) - { - struct ext3_dir_entry_2 * de, * pde; -- int i; -+ int i, err; - - i = 0; - pde = NULL; -@@ -1601,7 +1601,9 @@ static int ext3_delete_entry (handle_t * - return -EIO; - if (de == de_del) { - BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) -+ return err; - if (pde) - pde->rec_len = - cpu_to_le16(le16_to_cpu(pde->rec_len) + -Index: linux-stage/fs/ext3/inode.c -=================================================================== ---- linux-stage.orig/fs/ext3/inode.c -+++ linux-stage/fs/ext3/inode.c -@@ -1838,8 +1838,18 @@ ext3_clear_blocks(handle_t *handle, stru - ext3_mark_inode_dirty(handle, inode); - ext3_journal_test_restart(handle, inode); - if (bh) { -+ int err; - BUFFER_TRACE(bh, "retaking write access"); -- ext3_journal_get_write_access(handle, bh); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) { -+ struct super_block *sb = inode->i_sb; -+ struct ext3_super_block *es = EXT3_SB(sb)->s_es; -+ printk (KERN_CRIT"EXT3-fs: can't continue truncate\n"); -+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; -+ es->s_state |= cpu_to_le16(EXT3_ERROR_FS); -+ ext3_commit_super(sb, es, 1); -+ return; -+ } - } - } - diff --git a/lustre/kernel_patches/patches/ext3-extents-fixes-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-extents-fixes-2.6.9-rhel4.patch deleted file mode 100644 index ffb9700950..0000000000 --- a/lustre/kernel_patches/patches/ext3-extents-fixes-2.6.9-rhel4.patch +++ /dev/null @@ -1,86 +0,0 @@ - - minor fixes - - multiblock get_block() for direct I/O - -Index: linux-2.6.9-full/include/linux/ext3_extents.h -=================================================================== ---- linux-2.6.9-full.orig/include/linux/ext3_extents.h 2007-03-23 15:57:00.000000000 +0300 -+++ linux-2.6.9-full/include/linux/ext3_extents.h 2007-03-28 00:59:32.000000000 +0400 -@@ -205,7 +205,7 @@ typedef int (*ext_prepare_callback)(stru - #define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) - #define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) - --#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); -+#define EXT_ASSERT(__x__) if (unlikely(!(__x__))) BUG(); - - #define EXT_CHECK_PATH(tree,path) \ - { \ -Index: linux-2.6.9-full/fs/ext3/extents.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/extents.c 2007-03-23 15:57:00.000000000 +0300 -+++ linux-2.6.9-full/fs/ext3/extents.c 2007-03-28 00:59:41.000000000 +0400 -@@ -895,6 +895,8 @@ repeat: - /* if we found index with free entry, then use that - * entry: create all needed subtree and add new leaf */ - err = ext3_ext_split(handle, tree, path, newext, i); -+ if (err) -+ goto out; - - /* refill path */ - ext3_ext_drop_refs(path); -@@ -904,12 +906,16 @@ repeat: - } else { - /* tree is full, time to grow in depth */ - err = ext3_ext_grow_indepth(handle, tree, path, newext); -+ if (err) -+ goto out; - - /* refill path */ - ext3_ext_drop_refs(path); - path = ext3_ext_find_extent(tree, newext->ee_block, path); -- if (IS_ERR(path)) -+ if (IS_ERR(path)) { - err = PTR_ERR(path); -+ goto out; -+ } - - /* - * only first (depth 0 -> 1) produces free space -@@ -922,10 +928,8 @@ repeat: - } - } - -- if (err) -- return err; -- -- return 0; -+out: -+ return err; - } - - /* -@@ -1992,21 +1996,10 @@ static int ext3_new_block_cb(handle_t *h - EXT_ASSERT(ex->ee_start); - EXT_ASSERT(ex->ee_len); - -- /* reuse block from the extent to order data/metadata */ -- newblock = ex->ee_start++; -- ex->ee_len--; -- if (ex->ee_len == 0) { -- ex->ee_len = 1; -- /* allocate new block for the extent */ -- goal = ext3_ext_find_goal(inode, path, ex->ee_block); -- ex->ee_start = ext3_new_block(handle, inode, goal, err); -- ex->ee_start_hi = 0; -- if (ex->ee_start == 0) { -- /* error occured: restore old extent */ -- ex->ee_start = newblock; -- return 0; -- } -- } -+ /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ newblock = ext3_new_block(handle, inode, goal, err); -+ - return newblock; - } - diff --git a/lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.5-suse.patch b/lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.5-suse.patch deleted file mode 100644 index 744cc45892..0000000000 --- a/lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.5-suse.patch +++ /dev/null @@ -1,157 +0,0 @@ -Index: linux-2.6.5-7.283-full/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs.h 2007-03-28 02:13:37.000000000 +0400 -+++ linux-2.6.5-7.283-full/include/linux/ext3_fs.h 2007-03-28 02:21:37.000000000 +0400 -@@ -815,7 +815,7 @@ extern struct inode_operations ext3_fast - - /* extents.c */ - extern int ext3_ext_writepage_trans_blocks(struct inode *, int); --extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, int, - struct buffer_head *, int, int); - extern void ext3_ext_truncate(struct inode *, struct page *); - extern void ext3_ext_init(struct super_block *); -Index: linux-2.6.5-7.283-full/fs/ext3/extents.c -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/extents.c 2007-03-28 02:14:25.000000000 +0400 -+++ linux-2.6.5-7.283-full/fs/ext3/extents.c 2007-03-28 02:21:37.000000000 +0400 -@@ -2024,7 +2024,8 @@ void ext3_init_tree_desc(struct ext3_ext - } - - int ext3_ext_get_block(handle_t *handle, struct inode *inode, -- long iblock, struct buffer_head *bh_result, -+ long iblock, int max_blocks, -+ struct buffer_head *bh_result, - int create, int extend_disksize) - { - struct ext3_ext_path *path = NULL; -@@ -2032,6 +2033,11 @@ int ext3_ext_get_block(handle_t *handle, - struct ext3_extent *ex; - int goal, newblock, err = 0, depth; - struct ext3_extents_tree tree; -+ unsigned long next; -+ int allocated = 0; -+ -+ /* until we have multiblock allocation */ -+ max_blocks = 1; - - __clear_bit(BH_New, &bh_result->b_state); - ext3_init_tree_desc(&tree, inode); -@@ -2051,6 +2057,9 @@ int ext3_ext_get_block(handle_t *handle, - } else if (goal == EXT3_EXT_CACHE_EXTENT) { - /* block is already allocated */ - newblock = iblock - newex.ee_block + newex.ee_start; -+ /* number of remaining blocks in the extent */ -+ EXT_ASSERT(iblock >= newex.ee_block); -+ allocated = newex.ee_len - (iblock - newex.ee_block); - goto out; - } else { - EXT_ASSERT(0); -@@ -2078,6 +2087,8 @@ int ext3_ext_get_block(handle_t *handle, - /* if found exent covers block, simple return it */ - if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { - newblock = iblock - ex->ee_block + ex->ee_start; -+ /* number of remaining blocks in the extent */ -+ allocated = ex->ee_len - (iblock - ex->ee_block); - ext_debug(&tree, "%d fit into %d:%d -> %d\n", - (int) iblock, ex->ee_block, ex->ee_len, - newblock); -@@ -2098,6 +2109,15 @@ int ext3_ext_get_block(handle_t *handle, - goto out2; - } - -+ /* find next allocated block so that we know how many -+ * blocks we can allocate without ovelapping next extent */ -+ EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len); -+ next = ext3_ext_next_allocated_block(path); -+ EXT_ASSERT(next > iblock); -+ allocated = next - iblock; -+ if (allocated > max_blocks) -+ allocated = max_blocks; -+ - /* allocate new block */ - goal = ext3_ext_find_goal(inode, path, iblock); - newblock = ext3_new_block(handle, inode, goal, &err); -@@ -2112,8 +2132,11 @@ int ext3_ext_get_block(handle_t *handle, - newex.ee_start_hi = 0; - newex.ee_len = 1; - err = ext3_ext_insert_extent(handle, &tree, path, &newex); -- if (err) -+ if (err) { -+ /* free data blocks we just allocated */ -+ ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len); - goto out2; -+ } - - if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) - EXT3_I(inode)->i_disksize = inode->i_size; -@@ -2125,10 +2148,13 @@ int ext3_ext_get_block(handle_t *handle, - ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, - newex.ee_start, EXT3_EXT_CACHE_EXTENT); - out: -+ if (allocated > max_blocks) -+ allocated = max_blocks; - ext3_ext_show_leaf(&tree, path); - __set_bit(BH_Mapped, &bh_result->b_state); - bh_result->b_bdev = inode->i_sb->s_bdev; - bh_result->b_blocknr = newblock; -+ bh_result->b_size = (allocated << inode->i_blkbits); - out2: - if (path) { - ext3_ext_drop_refs(path); -Index: linux-2.6.5-7.283-full/fs/ext3/inode.c -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/inode.c 2007-03-28 02:13:37.000000000 +0400 -+++ linux-2.6.5-7.283-full/fs/ext3/inode.c 2007-03-28 02:50:19.000000000 +0400 -@@ -800,13 +800,17 @@ changed: - - static inline int - ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -- struct buffer_head *bh, int create, int extend_disksize) -+ int max_blocks, struct buffer_head *bh, int create, -+ int extend_disksize) - { -+ int ret; - if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -- return ext3_ext_get_block(handle, inode, block, bh, create, -- extend_disksize); -- return ext3_get_block_handle(handle, inode, block, bh, create, -+ return ext3_ext_get_block(handle, inode, block, max_blocks, -+ bh, create, extend_disksize); -+ ret = ext3_get_block_handle(handle, inode, block, bh, create, - extend_disksize); -+ bh->b_size = (1 << inode->i_blkbits); -+ return ret; - } - - static int ext3_get_block(struct inode *inode, sector_t iblock, -@@ -819,7 +823,7 @@ static int ext3_get_block(struct inode * - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_wrap(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, 1, - bh_result, create, 1); - return ret; - } -@@ -847,10 +851,8 @@ ext3_direct_io_get_blocks(struct inode * - } - } - if (ret == 0) -- ret = ext3_get_block_wrap(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, max_blocks, - bh_result, create, 0); -- if (ret == 0) -- bh_result->b_size = (1 << inode->i_blkbits); - return ret; - } - -@@ -869,7 +871,7 @@ struct buffer_head *ext3_getblk(handle_t - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, 1, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); diff --git a/lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.9-rhel4.patch deleted file mode 100644 index 726a473738..0000000000 --- a/lustre/kernel_patches/patches/ext3-extents-multiblock-directio-2.6.9-rhel4.patch +++ /dev/null @@ -1,149 +0,0 @@ ---- linux-2.6.9-full/include/linux/ext3_fs.h 2007-03-23 15:57:00.000000000 +0300 -+++ linux-2.6.9-full/include/linux/ext3_fs.h 2007-02-16 17:16:23.000000000 +0300 -@@ -850,7 +850,7 @@ extern struct inode_operations ext3_fast - - /* extents.c */ - extern int ext3_ext_writepage_trans_blocks(struct inode *, int); --extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, int, - struct buffer_head *, int, int); - extern void ext3_ext_truncate(struct inode *, struct page *); - extern void ext3_ext_init(struct super_block *); - ---- linux-2.6.9-full/fs/ext3/extents.c 2007-03-23 15:57:00.000000000 +0300 -+++ linux-2.6.9-full/fs/ext3/extents.c 2007-02-22 17:45:05.000000000 +0300 -@@ -2031,7 +2168,8 @@ void ext3_init_tree_desc(struct ext3_ext - } - - int ext3_ext_get_block(handle_t *handle, struct inode *inode, -- long iblock, struct buffer_head *bh_result, -+ long iblock, int max_blocks, -+ struct buffer_head *bh_result, - int create, int extend_disksize) - { - struct ext3_ext_path *path = NULL; -@@ -2039,6 +2177,11 @@ int ext3_ext_get_block(handle_t *handle, - struct ext3_extent *ex; - int goal, newblock, err = 0, depth; - struct ext3_extents_tree tree; -+ unsigned long next; -+ int allocated = 0; -+ -+ /* until we have multiblock allocation */ -+ max_blocks = 1; - - clear_buffer_new(bh_result); - ext3_init_tree_desc(&tree, inode); -@@ -2058,6 +2201,9 @@ int ext3_ext_get_block(handle_t *handle, - } else if (goal == EXT3_EXT_CACHE_EXTENT) { - /* block is already allocated */ - newblock = iblock - newex.ee_block + newex.ee_start; -+ /* number of remaining blocks in the extent */ -+ EXT_ASSERT(iblock >= newex.ee_block); -+ allocated = newex.ee_len - (iblock - newex.ee_block); - goto out; - } else { - EXT_ASSERT(0); -@@ -2085,6 +2231,8 @@ int ext3_ext_get_block(handle_t *handle, - /* if found exent covers block, simple return it */ - if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { - newblock = iblock - ex->ee_block + ex->ee_start; -+ /* number of remaining blocks in the extent */ -+ allocated = ex->ee_len - (iblock - ex->ee_block); - ext_debug(&tree, "%d fit into %d:%d -> %d\n", - (int) iblock, ex->ee_block, ex->ee_len, - newblock); -@@ -2105,6 +2253,15 @@ int ext3_ext_get_block(handle_t *handle, - goto out2; - } - -+ /* find next allocated block so that we know how many -+ * blocks we can allocate without ovelapping next extent */ -+ EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len); -+ next = ext3_ext_next_allocated_block(path); -+ EXT_ASSERT(next > iblock); -+ allocated = next - iblock; -+ if (allocated > max_blocks) -+ allocated = max_blocks; -+ - /* allocate new block */ - goal = ext3_ext_find_goal(inode, path, iblock); - newblock = ext3_new_block(handle, inode, goal, &err); -@@ -2119,8 +2276,11 @@ int ext3_ext_get_block(handle_t *handle, - newex.ee_start_hi = 0; - newex.ee_len = 1; - err = ext3_ext_insert_extent(handle, &tree, path, &newex); -- if (err) -+ if (err) { -+ /* free data blocks we just allocated */ -+ ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len); - goto out2; -+ } - - if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) - EXT3_I(inode)->i_disksize = inode->i_size; -@@ -2132,8 +2292,11 @@ int ext3_ext_get_block(handle_t *handle, - ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, - newex.ee_start, EXT3_EXT_CACHE_EXTENT); - out: -+ if (allocated > max_blocks) -+ allocated = max_blocks; - ext3_ext_show_leaf(&tree, path); - map_bh(bh_result, inode->i_sb, newblock); -+ bh_result->b_size = (allocated << inode->i_blkbits); - out2: - if (path) { - ext3_ext_drop_refs(path); ---- linux-2.6.9-full/fs/ext3/inode.c 2007-03-23 15:57:00.000000000 +0300 -+++ linux-2.6.9-full/fs/ext3/inode.c 2007-02-16 17:17:03.000000000 +0300 -@@ -798,13 +798,17 @@ changed: - - static inline int - ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -- struct buffer_head *bh, int create, int extend_disksize) -+ int max_blocks, struct buffer_head *bh, int create, -+ int extend_disksize) - { -+ int ret; - if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -- return ext3_ext_get_block(handle, inode, block, bh, create, -- extend_disksize); -- return ext3_get_block_handle(handle, inode, block, bh, create, -+ return ext3_ext_get_block(handle, inode, block, max_blocks, -+ bh, create, extend_disksize); -+ ret = ext3_get_block_handle(handle, inode, block, bh, create, - extend_disksize); -+ bh->b_size = (1 << inode->i_blkbits); -+ return ret; - } - - static int ext3_get_block(struct inode *inode, sector_t iblock, -@@ -817,7 +821,7 @@ static int ext3_get_block(struct inode * - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_wrap(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, 1, - bh_result, create, 1); - return ret; - } -@@ -862,9 +866,8 @@ ext3_direct_io_get_blocks(struct inode * - - get_block: - if (ret == 0) -- ret = ext3_get_block_wrap(handle, inode, iblock, -+ ret = ext3_get_block_wrap(handle, inode, iblock, max_blocks, - bh_result, create, 0); -- bh_result->b_size = (1 << inode->i_blkbits); - return ret; - } - -@@ -882,7 +885,7 @@ struct buffer_head *ext3_getblk(handle_t - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, 1, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); diff --git a/lustre/kernel_patches/patches/ext3-extents-search-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-extents-search-2.6.9-rhel4.patch deleted file mode 100644 index 2ad69c8c4c..0000000000 --- a/lustre/kernel_patches/patches/ext3-extents-search-2.6.9-rhel4.patch +++ /dev/null @@ -1,168 +0,0 @@ -Index: linux-2.6.9-full/include/linux/ext3_extents.h -=================================================================== ---- linux-2.6.9-full.orig/include/linux/ext3_extents.h 2007-03-23 15:57:00.000000000 +0300 -+++ linux-2.6.9-full/include/linux/ext3_extents.h 2007-03-26 22:08:16.000000000 +0400 -@@ -242,6 +242,8 @@ struct ext3_extent_tree_stats { - int leaf_num; - }; - -+extern int ext3_ext_search_left(struct ext3_extents_tree *, struct ext3_ext_path *, unsigned long *, unsigned long *); -+extern int ext3_ext_search_right(struct ext3_extents_tree *, struct ext3_ext_path *, unsigned long *, unsigned long *); - extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); - extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); - extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -Index: linux-2.6.9-full/fs/ext3/extents.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/extents.c 2007-03-23 15:57:00.000000000 +0300 -+++ linux-2.6.9-full/fs/ext3/extents.c 2007-03-26 22:07:37.000000000 +0400 -@@ -929,6 +929,150 @@ repeat: - } - - /* -+ * search the closest allocated block to the left for *logical -+ * and returns it at @logical + it's physical address at @phys -+ * if *logical is the smallest allocated block, the function -+ * returns 0 at @phys -+ * return value contains 0 (success) or error code -+ */ -+int -+ext3_ext_search_left(struct ext3_extents_tree *tree, struct ext3_ext_path *path, -+ unsigned long *logical, unsigned long *phys) -+{ -+ struct ext3_extent_idx *ix; -+ struct ext3_extent *ex; -+ int depth; -+ -+ BUG_ON(path == NULL); -+ depth = path->p_depth; -+ *phys = 0; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return 0; -+ -+ /* usually extent in the path covers blocks smaller -+ * then *logical, but it can be that extent is the -+ * first one in the file */ -+ -+ ex = path[depth].p_ext; -+ if (*logical < ex->ee_block) { -+ BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); -+ while (--depth >= 0) { -+ ix = path[depth].p_idx; -+ BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); -+ } -+ return 0; -+ } -+ -+ BUG_ON(*logical < ex->ee_block + ex->ee_len); -+ -+ *logical = ex->ee_block + ex->ee_len - 1; -+ *phys = ex->ee_start + ex->ee_len - 1; -+ return 0; -+} -+EXPORT_SYMBOL(ext3_ext_search_left); -+ -+/* -+ * search the closest allocated block to the right for *logical -+ * and returns it at @logical + it's physical address at @phys -+ * if *logical is the smallest allocated block, the function -+ * returns 0 at @phys -+ * return value contains 0 (success) or error code -+ */ -+int -+ext3_ext_search_right(struct ext3_extents_tree *tree, struct ext3_ext_path *path, -+ unsigned long *logical, unsigned long *phys) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_extent_header *eh; -+ struct ext3_extent_idx *ix; -+ struct ext3_extent *ex; -+ unsigned long block; -+ int depth; -+ -+ BUG_ON(path == NULL); -+ depth = path->p_depth; -+ *phys = 0; -+ -+ if (depth == 0 && path->p_ext == NULL) -+ return 0; -+ -+ /* usually extent in the path covers blocks smaller -+ * then *logical, but it can be that extent is the -+ * first one in the file */ -+ -+ ex = path[depth].p_ext; -+ if (*logical < ex->ee_block) { -+ BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); -+ while (--depth >= 0) { -+ ix = path[depth].p_idx; -+ BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); -+ } -+ *logical = ex->ee_block; -+ *phys = ex->ee_start; -+ return 0; -+ } -+ -+ BUG_ON(*logical < ex->ee_block + ex->ee_len); -+ -+ if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { -+ /* next allocated block in this leaf */ -+ ex++; -+ *logical = ex->ee_block; -+ *phys = ex->ee_start; -+ return 0; -+ } -+ -+ /* go up and search for index to the right */ -+ while (--depth >= 0) { -+ ix = path[depth].p_idx; -+ if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) -+ break; -+ } -+ -+ if (depth < 0) { -+ /* we've gone up to the root and -+ * found no index to the right */ -+ return 0; -+ } -+ -+ /* we've found index to the right, let's -+ * follow it and find the closest allocated -+ * block to the right */ -+ ix++; -+ block = ix->ei_leaf; -+ while (++depth < path->p_depth) { -+ bh = sb_bread(tree->inode->i_sb, block); -+ if (bh == NULL) -+ return -EIO; -+ eh = EXT_BLOCK_HDR(bh); -+ if (ext3_ext_check_header(eh)) { -+ brelse(bh); -+ return -EIO; -+ } -+ ix = EXT_FIRST_INDEX(eh); -+ block = ix->ei_leaf; -+ brelse(bh); -+ } -+ -+ bh = sb_bread(tree->inode->i_sb, block); -+ if (bh == NULL) -+ return -EIO; -+ eh = EXT_BLOCK_HDR(bh); -+ if (ext3_ext_check_header(eh)) { -+ brelse(bh); -+ return -EIO; -+ } -+ ex = EXT_FIRST_EXTENT(eh); -+ *logical = ex->ee_block; -+ *phys = ex->ee_start; -+ brelse(bh); -+ return 0; -+ -+} -+EXPORT_SYMBOL(ext3_ext_search_right); -+ -+/* - * returns allocated block in subsequent extent or EXT_MAX_BLOCK - * NOTE: it consider block number from index entry as - * allocated block. thus, index entries have to be consistent diff --git a/lustre/kernel_patches/patches/ext3-external-journal-2.6.9.patch b/lustre/kernel_patches/patches/ext3-external-journal-2.6.9.patch deleted file mode 100644 index 7cc86f2f02..0000000000 --- a/lustre/kernel_patches/patches/ext3-external-journal-2.6.9.patch +++ /dev/null @@ -1,150 +0,0 @@ -Signed-off-by: Johann Lombardi <johann.lombardi@bull.net> - -Index: linux-2.6.9-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/super.c 2006-05-20 01:14:14.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/super.c 2006-05-20 01:17:10.000000000 +0400 -@@ -39,7 +39,8 @@ - #include "xattr.h" - #include "acl.h" - --static int ext3_load_journal(struct super_block *, struct ext3_super_block *); -+static int ext3_load_journal(struct super_block *, struct ext3_super_block *, -+ unsigned long journal_devnum); - static int ext3_create_journal(struct super_block *, struct ext3_super_block *, - int); - static void ext3_commit_super (struct super_block * sb, -@@ -591,7 +592,7 @@ enum { - Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, - Opt_reservation, Opt_noreservation, Opt_noload, -- Opt_commit, Opt_journal_update, Opt_journal_inum, -+ Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, - Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, -@@ -630,6 +631,7 @@ static match_table_t tokens = { - {Opt_commit, "commit=%u"}, - {Opt_journal_update, "journal=update"}, - {Opt_journal_inum, "journal=%u"}, -+ {Opt_journal_dev, "journal_dev=%u"}, - {Opt_abort, "abort"}, - {Opt_data_journal, "data=journal"}, - {Opt_data_ordered, "data=ordered"}, -@@ -675,8 +677,9 @@ static unsigned long get_sb_block(void * - return sb_block; - } - --static int parse_options (char * options, struct super_block *sb, -- unsigned long * inum, unsigned long *n_blocks_count, int is_remount) -+static int parse_options (char *options, struct super_block *sb, -+ unsigned long *inum, unsigned long *journal_devnum, -+ unsigned long *n_blocks_count, int is_remount) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); - char * p; -@@ -816,6 +819,16 @@ static int parse_options (char * options - return 0; - *inum = option; - break; -+ case Opt_journal_dev: -+ if (is_remount) { -+ printk(KERN_ERR "EXT3-fs: cannot specify " -+ "journal on remount\n"); -+ return 0; -+ } -+ if (match_int(&args[0], &option)) -+ return 0; -+ *journal_devnum = option; -+ break; - case Opt_noload: - set_opt (sbi->s_mount_opt, NOLOAD); - break; -@@ -1278,6 +1291,7 @@ static int ext3_fill_super (struct super - unsigned long logic_sb_block; - unsigned long offset = 0; - unsigned long journal_inum = 0; -+ unsigned long journal_devnum = 0; - unsigned long def_mount_opts; - struct inode *root; - int blocksize; -@@ -1361,7 +1375,8 @@ static int ext3_fill_super (struct super - - set_opt(sbi->s_mount_opt, RESERVATION); - -- if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0)) -+ if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, -+ NULL, 0)) - goto failed_mount; - - set_sb_time_gran(sb, 1000000000U); -@@ -1567,7 +1582,7 @@ static int ext3_fill_super (struct super - */ - if (!test_opt(sb, NOLOAD) && - EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { -- if (ext3_load_journal(sb, es)) -+ if (ext3_load_journal(sb, es, journal_devnum)) - goto failed_mount2; - } else if (journal_inum) { - if (ext3_create_journal(sb, es, journal_inum)) -@@ -1831,15 +1846,24 @@ out_bdev: - return NULL; - } - --static int ext3_load_journal(struct super_block * sb, -- struct ext3_super_block * es) -+static int ext3_load_journal(struct super_block *sb, -+ struct ext3_super_block *es, -+ unsigned long journal_devnum) - { - journal_t *journal; - int journal_inum = le32_to_cpu(es->s_journal_inum); -- dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); -+ dev_t journal_dev; - int err = 0; - int really_read_only; - -+ if (journal_devnum && -+ journal_devnum != le32_to_cpu(es->s_journal_dev)) { -+ printk(KERN_INFO "EXT3-fs: external journal device major/minor " -+ "numbers have changed\n"); -+ journal_dev = new_decode_dev(journal_devnum); -+ } else -+ journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); -+ - really_read_only = bdev_read_only(sb->s_bdev); - - /* -@@ -1898,6 +1922,16 @@ static int ext3_load_journal(struct supe - - EXT3_SB(sb)->s_journal = journal; - ext3_clear_journal_err(sb, es); -+ -+ if (journal_devnum && -+ journal_devnum != le32_to_cpu(es->s_journal_dev)) { -+ es->s_journal_dev = cpu_to_le32(journal_devnum); -+ sb->s_dirt = 1; -+ -+ /* Make sure we flush the recovery flag to disk. */ -+ ext3_commit_super(sb, es, 1); -+ } -+ - return 0; - } - -@@ -2105,13 +2139,13 @@ int ext3_remount (struct super_block * s - { - struct ext3_super_block * es; - struct ext3_sb_info *sbi = EXT3_SB(sb); -- unsigned long tmp; -+ unsigned long tmp1, tmp2; - unsigned long n_blocks_count = 0; - - /* - * Allow the "check" option to be passed as a remount option. - */ -- if (!parse_options(data, sb, &tmp, &n_blocks_count, 1)) -+ if (!parse_options(data, sb, &tmp1, &tmp2, &n_blocks_count, 1)) - return -EINVAL; - - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) diff --git a/lustre/kernel_patches/patches/ext3-filterdata-sles10.patch b/lustre/kernel_patches/patches/ext3-filterdata-sles10.patch deleted file mode 100644 index 5f7c8c9b99..0000000000 --- a/lustre/kernel_patches/patches/ext3-filterdata-sles10.patch +++ /dev/null @@ -1,25 +0,0 @@ -Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs_i.h 2007-03-28 16:03:20.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs_i.h 2007-03-28 19:40:53.000000000 +0400 -@@ -139,6 +139,8 @@ struct ext3_inode_info { - /* mballoc */ - struct list_head i_prealloc_list; - spinlock_t i_prealloc_lock; -+ -+ void *i_filterdata; - }; - - #endif /* _LINUX_EXT3_FS_I */ -Index: linux-2.6.16.27-0.9-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/super.c 2007-03-28 18:20:17.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/fs/ext3/super.c 2007-03-28 19:40:53.000000000 +0400 -@@ -462,6 +462,7 @@ static struct inode *ext3_alloc_inode(st - memset(&ei->i_cached_extent, 0, sizeof(ei->i_cached_extent)); - INIT_LIST_HEAD(&ei->i_prealloc_list); - spin_lock_init(&ei->i_prealloc_lock); -+ ei->i_filterdata = NULL; - return &ei->vfs_inode; - } - diff --git a/lustre/kernel_patches/patches/ext3-htree-dot-2.6.5-suse.patch b/lustre/kernel_patches/patches/ext3-htree-dot-2.6.5-suse.patch deleted file mode 100644 index e8ed1920fe..0000000000 --- a/lustre/kernel_patches/patches/ext3-htree-dot-2.6.5-suse.patch +++ /dev/null @@ -1,23 +0,0 @@ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/namei.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/fs/ext3/namei.c 2005-04-04 05:06:46.000000000 -0600 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/fs/ext3/namei.c 2005-04-04 05:09:18.000000000 -0600 -@@ -926,8 +926,16 @@ - struct inode *dir = dentry->d_parent->d_inode; - - sb = dir->i_sb; -- if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err))) -- return NULL; -+ /* NFS may look up ".." - look at dx_root directory block */ -+ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ -+ if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) -+ return NULL; -+ } else { -+ frame = frames; -+ frame->bh = NULL; /* for dx_release() */ -+ frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ -+ dx_set_block(frame->at, 0); /* dx_root block is 0 */ -+ } - hash = hinfo.hash; - do { - block = dx_get_block(frame->at); diff --git a/lustre/kernel_patches/patches/ext3-htree-path-ops.patch b/lustre/kernel_patches/patches/ext3-htree-path-ops.patch deleted file mode 100644 index 9a2edbd3c4..0000000000 --- a/lustre/kernel_patches/patches/ext3-htree-path-ops.patch +++ /dev/null @@ -1,894 +0,0 @@ -Index: iam-src/fs/ext3/namei.c -=================================================================== ---- iam-src.orig/fs/ext3/namei.c 2006-02-12 16:43:57.000000000 +0300 -+++ iam-src/fs/ext3/namei.c 2006-02-12 23:22:12.000000000 +0300 -@@ -83,22 +83,21 @@ static struct buffer_head *ext3_append(h - #define dxtrace(command) - #endif - --struct fake_dirent --{ -+struct fake_dirent { - __le32 inode; - __le16 rec_len; - u8 name_len; - u8 file_type; - }; - --struct dx_countlimit --{ -+struct dx_countlimit { - __le16 limit; - __le16 count; - }; - --struct dx_entry --{ -+struct dx_entry; /* incomplete type */ -+ -+struct dx_entry_compat { - __le32 hash; - __le32 block; - }; -@@ -109,8 +108,7 @@ struct dx_entry - * hash version mod 4 should never be 0. Sincerely, the paranoia department. - */ - --struct dx_root --{ -+struct dx_root { - struct fake_dirent dot; - char dot_name[4]; - struct fake_dirent dotdot; -@@ -124,13 +122,13 @@ struct dx_root - u8 unused_flags; - } - info; -- struct dx_entry entries[0]; -+ struct {} entries[0]; - }; - - struct dx_node - { - struct fake_dirent fake; -- struct dx_entry entries[0]; -+ struct {} entries[0]; - }; - - -@@ -147,38 +145,76 @@ struct dx_map_entry - u32 offs; - }; - -+struct dx_path; -+struct dx_param { -+ size_t dpo_key_size; -+ size_t dpo_ptr_size; -+ size_t dpo_node_gap; -+ size_t dpo_root_gap; -+ -+ u32 (*dpo_root_ptr)(struct dx_path *path); -+ int (*dpo_node_check)(struct dx_path *path, -+ struct dx_frame *frame, void *cookie); -+ int (*dpo_node_init)(struct dx_path *path, -+ struct buffer_head *bh, int root); -+}; -+ - /* - * Structure to keep track of a path drilled through htree. - */ - struct dx_path { -- struct inode *dp_object; -- struct dx_frame dp_frames[DX_MAX_TREE_HEIGHT]; -- struct dx_frame *dp_frame; -+ struct inode *dp_object; -+ struct dx_param *dp_param; -+ int dp_indirect; -+ struct dx_frame dp_frames[DX_MAX_TREE_HEIGHT]; -+ struct dx_frame *dp_frame; -+ void *dp_key_target; -+ void *dp_key; - }; - -+static u32 htree_root_ptr(struct dx_path *p); -+static int htree_node_check(struct dx_path *path, -+ struct dx_frame *frame, void *cookie); -+static int htree_node_init(struct dx_path *path, -+ struct buffer_head *bh, int root); -+ -+static struct dx_param htree_compat_param = { -+ .dpo_key_size = sizeof ((struct dx_map_entry *)NULL)->hash, -+ .dpo_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs, -+ .dpo_node_gap = offsetof(struct dx_node, entries), -+ .dpo_root_gap = offsetof(struct dx_root, entries), -+ -+ .dpo_root_ptr = htree_root_ptr, -+ .dpo_node_check = htree_node_check, -+ .dpo_node_init = htree_node_init -+}; -+ -+ - #ifdef CONFIG_EXT3_INDEX --static inline unsigned dx_get_block (struct dx_entry *entry); --static void dx_set_block (struct dx_entry *entry, unsigned value); --static inline unsigned dx_get_hash (struct dx_entry *entry); --static void dx_set_hash (struct dx_entry *entry, unsigned value); --static unsigned dx_get_count (struct dx_entry *entries); --static unsigned dx_get_limit (struct dx_entry *entries); --static void dx_set_count (struct dx_entry *entries, unsigned value); --static void dx_set_limit (struct dx_entry *entries, unsigned value); --static unsigned dx_root_limit (struct inode *dir, unsigned infosize); --static unsigned dx_node_limit (struct inode *dir); --static struct dx_frame *dx_probe(struct dentry *dentry, -- struct inode *dir, -- struct dx_hash_info *hinfo, -- struct dx_path *path, -- int *err); -+static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry); -+static void dx_set_block(struct dx_path *p, -+ struct dx_entry *entry, unsigned value); -+static inline void *dx_get_key(struct dx_path *p, -+ struct dx_entry *entry, void *key); -+static void dx_set_key(struct dx_path *p, struct dx_entry *entry, void *key); -+static unsigned dx_get_count(struct dx_entry *entries); -+static unsigned dx_get_limit(struct dx_entry *entries); -+static void dx_set_count(struct dx_entry *entries, unsigned value); -+static void dx_set_limit(struct dx_entry *entries, unsigned value); -+static unsigned dx_root_limit(struct dx_path *p); -+static unsigned dx_node_limit(struct dx_path *p); -+static int dx_probe(struct dentry *dentry, -+ struct inode *dir, -+ struct dx_hash_info *hinfo, -+ struct dx_path *path); - static int dx_make_map (struct ext3_dir_entry_2 *de, int size, - struct dx_hash_info *hinfo, struct dx_map_entry map[]); - static void dx_sort_map(struct dx_map_entry *map, unsigned count); - static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, - struct dx_map_entry *offsets, int count); - static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); --static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -+static void dx_insert_block (struct dx_path *path, -+ struct dx_frame *frame, u32 hash, u32 block); - static int ext3_htree_next_block(struct inode *dir, __u32 hash, - struct dx_path *path, __u32 *start_hash); - static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -@@ -186,29 +222,65 @@ static struct buffer_head * ext3_dx_find - static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, - struct inode *inode); - -+static inline void dx_path_init(struct dx_path *path, struct inode *inode); -+static inline void dx_path_fini(struct dx_path *path); -+ -+ - /* - * Future: use high four bits of block for coalesce-on-delete flags - * Mask them off for now. - */ - --static inline unsigned dx_get_block (struct dx_entry *entry) -+static inline void *entry_off(struct dx_entry *entry, ptrdiff_t off) -+{ -+ return (void *)((char *)entry + off); -+} -+ -+static inline size_t dx_entry_size(struct dx_path *p) - { -- return le32_to_cpu(entry->block) & 0x00ffffff; -+ return p->dp_param->dpo_key_size + p->dp_param->dpo_ptr_size; - } - --static inline void dx_set_block (struct dx_entry *entry, unsigned value) -+static inline struct dx_entry *dx_entry_shift(struct dx_path *p, -+ struct dx_entry *entry, int shift) - { -- entry->block = cpu_to_le32(value); -+ void *e = entry; -+ return e + shift * dx_entry_size(p); - } - --static inline unsigned dx_get_hash (struct dx_entry *entry) -+static inline ptrdiff_t dx_entry_diff(struct dx_path *p, -+ struct dx_entry *e1, struct dx_entry *e2) - { -- return le32_to_cpu(entry->hash); -+ ptrdiff_t diff; -+ -+ diff = (void *)e1 - (void *)e2; -+ assert(diff / dx_entry_size(p) * dx_entry_size(p) == diff); -+ return diff / dx_entry_size(p); -+} -+ -+static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry) -+{ -+ return le32_to_cpu(*(u32 *)entry_off(entry, p->dp_param->dpo_key_size)) -+ & 0x00ffffff; - } - --static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -+static inline void dx_set_block(struct dx_path *p, -+ struct dx_entry *entry, unsigned value) - { -- entry->hash = cpu_to_le32(value); -+ *(u32*)entry_off(entry, p->dp_param->dpo_key_size) = cpu_to_le32(value); -+} -+ -+static inline void *dx_get_key(struct dx_path *p, -+ struct dx_entry *entry, void *key) -+{ -+ memcpy(key, entry, p->dp_param->dpo_key_size); -+ return key; -+} -+ -+static inline void dx_set_key(struct dx_path *p, -+ struct dx_entry *entry, void *key) -+{ -+ memcpy(entry, key, p->dp_param->dpo_key_size); - } - - static inline unsigned dx_get_count (struct dx_entry *entries) -@@ -231,17 +303,123 @@ static inline void dx_set_limit (struct - ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); - } - --static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -+static inline unsigned dx_root_limit(struct dx_path *p) - { -- unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - -- EXT3_DIR_REC_LEN(2) - infosize; -- return 0? 20: entry_space / sizeof(struct dx_entry); -+ struct dx_param *param = p->dp_param; -+ unsigned entry_space = p->dp_object->i_sb->s_blocksize - -+ param->dpo_root_gap; -+ return entry_space / (param->dpo_key_size + param->dpo_ptr_size); -+} -+ -+static inline unsigned dx_node_limit(struct dx_path *p) -+{ -+ struct dx_param *param = p->dp_param; -+ unsigned entry_space = p->dp_object->i_sb->s_blocksize - -+ param->dpo_node_gap; -+ return entry_space / (param->dpo_key_size + param->dpo_ptr_size); -+} -+ -+static inline int dx_index_is_compat(struct dx_path *path) -+{ -+ return path->dp_param == &htree_compat_param; -+} -+ -+static struct dx_entry *dx_get_entries(struct dx_path *path, void *data, -+ int root) -+{ -+ return data + -+ (root ? -+ path->dp_param->dpo_root_gap : path->dp_param->dpo_node_gap); -+} -+ -+static struct dx_entry *dx_node_get_entries(struct dx_path *path, -+ struct dx_frame *frame) -+{ -+ return dx_get_entries(path, -+ frame->bh->b_data, frame == path->dp_frames); -+} -+ -+static u32 htree_root_ptr(struct dx_path *path) -+{ -+ return 0; -+} -+ -+struct htree_cookie { -+ struct dx_hash_info *hinfo; -+ struct dentry *dentry; -+}; -+ -+static int htree_node_check(struct dx_path *path, struct dx_frame *frame, -+ void *cookie) -+{ -+ void *data; -+ struct dx_entry *entries; -+ struct super_block *sb; -+ -+ data = frame->bh->b_data; -+ entries = dx_node_get_entries(path, frame); -+ sb = path->dp_object->i_sb; -+ if (frame == path->dp_frames) { -+ /* root node */ -+ struct dx_root *root; -+ struct htree_cookie *hc = cookie; -+ -+ root = data; -+ if (root->info.hash_version != DX_HASH_TEA && -+ root->info.hash_version != DX_HASH_HALF_MD4 && -+ root->info.hash_version != DX_HASH_R5 && -+ root->info.hash_version != DX_HASH_LEGACY) { -+ ext3_warning(sb, __FUNCTION__, -+ "Unrecognised inode hash code %d", -+ root->info.hash_version); -+ return ERR_BAD_DX_DIR; -+ } -+ -+ if (root->info.unused_flags & 1) { -+ ext3_warning(sb, __FUNCTION__, -+ "Unimplemented inode hash flags: %#06x", -+ root->info.unused_flags); -+ return ERR_BAD_DX_DIR; -+ } -+ -+ path->dp_indirect = root->info.indirect_levels; -+ if (path->dp_indirect > DX_MAX_TREE_HEIGHT - 1) { -+ ext3_warning(sb, __FUNCTION__, -+ "Unimplemented inode hash depth: %#06x", -+ root->info.indirect_levels); -+ return ERR_BAD_DX_DIR; -+ } -+ -+ assert((char *)entries == (((char *)&root->info) + -+ root->info.info_length)); -+ assert(dx_get_limit(entries) == dx_root_limit(path)); -+ -+ hc->hinfo->hash_version = root->info.hash_version; -+ hc->hinfo->seed = EXT3_SB(sb)->s_hash_seed; -+ if (hc->dentry) -+ ext3fs_dirhash(hc->dentry->d_name.name, -+ hc->dentry->d_name.len, hc->hinfo); -+ path->dp_key_target = &hc->hinfo->hash; -+ } else { -+ /* non-root index */ -+ assert(entries == data + path->dp_param->dpo_node_gap); -+ assert(dx_get_limit(entries) == dx_node_limit(path)); -+ } -+ frame->entries = frame->at = entries; -+ return 0; - } - --static inline unsigned dx_node_limit (struct inode *dir) -+static int htree_node_init(struct dx_path *path, -+ struct buffer_head *bh, int root) - { -- unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); -- return 0? 22: entry_space / sizeof(struct dx_entry); -+ struct dx_node *node; -+ -+ assert(!root); -+ -+ node = (void *)bh->b_data; -+ node->fake.rec_len = cpu_to_le16(path->dp_object->i_sb->s_blocksize); -+ node->fake.inode = 0; -+ return 0; - } - - /* -@@ -327,123 +505,101 @@ struct stats dx_show_entries(struct dx_h - } - #endif /* DX_DEBUG */ - --/* -- * Probe for a directory leaf block to search. -- * -- * dx_probe can return ERR_BAD_DX_DIR, which means there was a format -- * error in the directory index, and the caller should fall back to -- * searching the directory normally. The callers of dx_probe **MUST** -- * check for this error code, and make sure it never gets reflected -- * back to userspace. -- */ --static struct dx_frame * --dx_probe(struct dentry *dentry, struct inode *dir, -- struct dx_hash_info *hinfo, struct dx_path *path, int *err) --{ -- unsigned count, indirect; -- struct dx_entry *at, *entries, *p, *q, *m; -- struct dx_root *root; -- struct buffer_head *bh; -- struct dx_frame *frame = path->dp_frames; -- u32 hash; -+static int dx_lookup(struct dx_path *path, void *cookie) -+{ -+ u32 ptr; -+ int err; -+ int i; - -- frame->bh = NULL; -- if (dentry) -- dir = dentry->d_parent->d_inode; -- if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) -- goto fail; -- root = (struct dx_root *) bh->b_data; -- if (root->info.hash_version != DX_HASH_TEA && -- root->info.hash_version != DX_HASH_HALF_MD4 && -- root->info.hash_version != DX_HASH_R5 && -- root->info.hash_version != DX_HASH_LEGACY) { -- ext3_warning(dir->i_sb, __FUNCTION__, -- "Unrecognised inode hash code %d", root->info.hash_version); -- brelse(bh); -- *err = ERR_BAD_DX_DIR; -- goto fail; -- } -- hinfo->hash_version = root->info.hash_version; -- hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed; -- if (dentry) -- ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); -- hash = hinfo->hash; -- -- if (root->info.unused_flags & 1) { -- ext3_warning(dir->i_sb, __FUNCTION__, -- "Unimplemented inode hash flags: %#06x", -- root->info.unused_flags); -- brelse(bh); -- *err = ERR_BAD_DX_DIR; -- goto fail; -- } -+ struct dx_param *param; -+ struct dx_frame *frame; - -- if ((indirect = root->info.indirect_levels) > DX_MAX_TREE_HEIGHT - 1) { -- ext3_warning(dir->i_sb, __FUNCTION__, -- "Unimplemented inode hash depth: %#06x", -- root->info.indirect_levels); -- brelse(bh); -- *err = ERR_BAD_DX_DIR; -- goto fail; -- } -+ param = path->dp_param; - -- entries = (struct dx_entry *) (((char *)&root->info) + -- root->info.info_length); -- assert(dx_get_limit(entries) == dx_root_limit(dir, -- root->info.info_length)); -- dxtrace (printk("Look up %x", hash)); -- while (1) -- { -+ for (frame = path->dp_frames, i = 0, -+ ptr = param->dpo_root_ptr(path); i <= path->dp_indirect; -+ ptr = dx_get_block(path, frame->at), ++frame, ++i) { -+ struct dx_entry *entries; -+ struct dx_entry *p; -+ struct dx_entry *q; -+ struct dx_entry *m; -+ unsigned count; -+ -+ frame->bh = ext3_bread(NULL, path->dp_object, ptr, 0, &err); -+ if (frame->bh == NULL) { -+ err = -EIO; -+ break; -+ } -+ err = param->dpo_node_check(path, frame, cookie); -+ if (err != 0) -+ break; -+ -+ entries = frame->entries; - count = dx_get_count(entries); -- assert (count && count <= dx_get_limit(entries)); -- p = entries + 1; -- q = entries + count - 1; -- while (p <= q) -- { -- m = p + (q - p)/2; -+ assert(count && count <= dx_get_limit(entries)); -+ p = dx_entry_shift(path, entries, 1); -+ q = dx_entry_shift(path, entries, count - 1); -+ while (p <= q) { -+ m = dx_entry_shift(path, -+ p, dx_entry_diff(path, q, p) / 2); - dxtrace(printk(".")); -- if (dx_get_hash(m) > hash) -- q = m - 1; -+ if (memcmp(dx_get_key(path, m, path->dp_key), -+ path->dp_key_target, -+ param->dpo_key_size) > 0) -+ q = dx_entry_shift(path, m, -1); - else -- p = m + 1; -+ p = dx_entry_shift(path, m, +1); - } - -- if (0) // linear search cross check -- { -+ frame->at = dx_entry_shift(path, p, -1); -+ if (1) { // linear search cross check - unsigned n = count - 1; -+ struct dx_entry *at; -+ - at = entries; -- while (n--) -- { -+ while (n--) { - dxtrace(printk(",")); -- if (dx_get_hash(++at) > hash) -- { -- at--; -+ at = dx_entry_shift(path, at, +1); -+ if (memcmp(dx_get_key(path, at, path->dp_key), -+ path->dp_key_target, -+ param->dpo_key_size) > 0) { -+ at = dx_entry_shift(path, at, -1); - break; - } - } -- assert (at == p - 1); -+ assert(at == frame->at); - } -- -- at = p - 1; -- dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); -- frame->bh = bh; -- frame->entries = entries; -- frame->at = at; -- if (!indirect--) -- return path->dp_frame = frame; -- if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) -- goto fail2; -- at = entries = ((struct dx_node *) bh->b_data)->entries; -- assert (dx_get_limit(entries) == dx_node_limit (dir)); -- frame++; -- } --fail2: -- while (frame >= path->dp_frames) { -- brelse(frame->bh); -- frame--; - } --fail: -- return NULL; -+ if (err != 0) -+ dx_path_fini(path); -+ path->dp_frame = --frame; -+ return err; -+} -+ -+/* -+ * Probe for a directory leaf block to search. -+ * -+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format -+ * error in the directory index, and the caller should fall back to -+ * searching the directory normally. The callers of dx_probe **MUST** -+ * check for this error code, and make sure it never gets reflected -+ * back to userspace. -+ */ -+static int dx_probe(struct dentry *dentry, struct inode *dir, -+ struct dx_hash_info *hinfo, struct dx_path *path) -+{ -+ int err; -+ __u32 hash_storage; -+ struct htree_cookie hc = { -+ .dentry = dentry, -+ .hinfo = hinfo -+ }; -+ -+ assert(dx_index_is_compat(path)); -+ path->dp_key = &hash_storage; -+ err = dx_lookup(path, &hc); -+ assert(err != 0 || path->dp_frames[path->dp_indirect].bh != NULL); -+ return err; - } - - static inline void dx_path_init(struct dx_path *path, struct inode *inode) -@@ -458,8 +614,10 @@ static inline void dx_path_fini(struct d - int i; - - for (i = 0; i < ARRAY_SIZE(path->dp_frames); i--) { -- if (path->dp_frames[i].bh != NULL) -+ if (path->dp_frames[i].bh != NULL) { - brelse(path->dp_frames[i].bh); -+ path->dp_frames[i].bh = NULL; -+ } - } - } - -@@ -488,6 +646,8 @@ static int ext3_htree_next_block(struct - int err, num_frames = 0; - __u32 bhash; - -+ assert(dx_index_is_compat(path)); -+ - p = path->dp_frame; - /* - * Find the next leaf page by incrementing the frame pointer. -@@ -497,7 +657,9 @@ static int ext3_htree_next_block(struct - * nodes need to be read. - */ - while (1) { -- if (++(p->at) < p->entries + dx_get_count(p->entries)) -+ p->at = dx_entry_shift(path, p->at, +1); -+ if (p->at < dx_entry_shift(path, p->entries, -+ dx_get_count(p->entries))) - break; - if (p == path->dp_frames) - return 0; -@@ -512,7 +674,7 @@ static int ext3_htree_next_block(struct - * desired contiuation hash. If it doesn't, return since - * there's no point to read in the successive index pages. - */ -- bhash = dx_get_hash(p->at); -+ dx_get_key(path, p->at, &bhash); - if (start_hash) - *start_hash = bhash; - if ((hash & 1) == 0) { -@@ -524,12 +686,13 @@ static int ext3_htree_next_block(struct - * block so no check is necessary - */ - while (num_frames--) { -- if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), 0, &err))) -+ if (!(bh = ext3_bread(NULL, dir, -+ dx_get_block(path, p->at), 0, &err))) - return err; /* Failure */ - ++p; - brelse (p->bh); - p->bh = bh; -- p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; -+ p->at = p->entries = dx_node_get_entries(path, p); - } - return 1; - } -@@ -609,6 +772,7 @@ int ext3_htree_fill_tree(struct file *di - start_minor_hash)); - dir = dir_file->f_dentry->d_inode; - dx_path_init(&path, dir); -+ path.dp_param = &htree_compat_param; - if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) { - hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; - hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed; -@@ -619,7 +783,8 @@ int ext3_htree_fill_tree(struct file *di - } - hinfo.hash = start_hash; - hinfo.minor_hash = 0; -- if (!dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, &path, &err)) -+ err = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, &path); -+ if (err != 0) - return err; - - /* Add '.' and '..' from the htree header */ -@@ -634,7 +799,7 @@ int ext3_htree_fill_tree(struct file *di - } - - while (1) { -- block = dx_get_block(path.dp_frame->at); -+ block = dx_get_block(&path, path.dp_frame->at); - ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, - start_hash, start_minor_hash); - if (ret < 0) { -@@ -722,17 +887,19 @@ static void dx_sort_map (struct dx_map_e - } while(more); - } - --static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -+static void dx_insert_block(struct dx_path *path, -+ struct dx_frame *frame, u32 hash, u32 block) - { - struct dx_entry *entries = frame->entries; -- struct dx_entry *old = frame->at, *new = old + 1; -+ struct dx_entry *old = frame->at, *new = dx_entry_shift(path, old, +1); - int count = dx_get_count(entries); - - assert(count < dx_get_limit(entries)); -- assert(old < entries + count); -- memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); -- dx_set_hash(new, hash); -- dx_set_block(new, block); -+ assert(old < dx_entry_shift(path, entries, count)); -+ memmove(dx_entry_shift(path, new, 1), new, -+ (char *)dx_entry_shift(path, entries, count) - (char *)new); -+ dx_set_key(path, new, &hash); -+ dx_set_block(path, new, block); - dx_set_count(entries, count + 1); - } - #endif -@@ -934,7 +1101,9 @@ static struct buffer_head * ext3_dx_find - struct dx_hash_info hinfo; - u32 hash; - struct dx_path path; -- struct dx_entry dummy_dot; -+ struct dx_entry_compat dummy_dot = { -+ .block = 0 -+ }; - struct ext3_dir_entry_2 *de, *top; - struct buffer_head *bh; - unsigned long block; -@@ -944,19 +1113,21 @@ static struct buffer_head * ext3_dx_find - struct inode *dir = dentry->d_parent->d_inode; - - dx_path_init(&path, dir); -+ path.dp_param = &htree_compat_param; -+ - sb = dir->i_sb; - /* NFS may look up ".." - look at dx_root directory block */ - if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ -- if (!(dx_probe(dentry, NULL, &hinfo, &path, err))) -+ *err = dx_probe(dentry, NULL, &hinfo, &path); -+ if (*err != 0) - return NULL; - } else { -- path.dp_frame->bh = NULL; /* for dx_path_fini() */ -- path.dp_frame->at = &dummy_dot; /* hack for zero entry*/ -- dx_set_block(path.dp_frame->at, 0); /* dx_root block is 0 */ -+ path.dp_frame->bh = NULL; /* for dx_path_fini() */ -+ path.dp_frame->at = (void *)&dummy_dot; /* hack for zero entry*/ - } - hash = hinfo.hash; - do { -- block = dx_get_block(path.dp_frame->at); -+ block = dx_get_block(&path, path.dp_frame->at); - if (!(bh = ext3_bread (NULL,dir, block, 0, err))) - goto errout; - de = (struct ext3_dir_entry_2 *) bh->b_data; -@@ -1115,10 +1286,11 @@ static struct ext3_dir_entry_2* dx_pack_ - - /* Allocate new node, and split leaf node @bh into it, inserting new pointer - * into parent node identified by @frame */ --static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, -+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct dx_path *path, - struct buffer_head **bh,struct dx_frame *frame, - struct dx_hash_info *hinfo, int *error) - { -+ struct inode *dir = path->dp_object; - unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count, continued; - struct buffer_head *bh2; -@@ -1180,7 +1352,7 @@ static struct ext3_dir_entry_2 *do_split - swap(*bh, bh2); - de = de2; - } -- dx_insert_block (frame, hash2 + continued, newblock); -+ dx_insert_block(path, frame, hash2 + continued, newblock); - err = ext3_journal_dirty_metadata (handle, bh2); - if (err) - goto journal_error; -@@ -1315,6 +1487,7 @@ static int make_indexed_dir(handle_t *ha - struct fake_dirent *fde; - - dx_path_init(&path, dir); -+ path.dp_param = &htree_compat_param; - blocksize = dir->i_sb->s_blocksize; - dxtrace(printk("Creating index\n")); - retval = ext3_journal_get_write_access(handle, bh); -@@ -1350,10 +1523,10 @@ static int make_indexed_dir(handle_t *ha - root->info.info_length = sizeof(root->info); - root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; - root->info.hash_version = DX_HASH_R5; -- entries = root->entries; -- dx_set_block (entries, 1); -+ entries = (void *)root->entries; -+ dx_set_block (&path, entries, 1); - dx_set_count (entries, 1); -- dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); -+ dx_set_limit (entries, dx_root_limit(&path)); - - /* Initialize as for dx_probe */ - hinfo.hash_version = root->info.hash_version; -@@ -1363,7 +1536,7 @@ static int make_indexed_dir(handle_t *ha - path.dp_frame->at = entries; - path.dp_frame->bh = bh; - bh = bh2; -- de = do_split(handle,dir, &bh, path.dp_frame, &hinfo, &retval); -+ de = do_split(handle, &path, &bh, path.dp_frame, &hinfo, &retval); - dx_path_fini(&path); - if (!de) - return retval; -@@ -1446,8 +1619,8 @@ static int ext3_dx_add_entry(handle_t *h - struct inode *inode) - { - struct dx_path path; -+ struct dx_param *param; - struct dx_frame *frame, *safe; -- struct dx_node *node2; - struct dx_entry *entries; /* old block contents */ - struct dx_entry *entries2; /* new block contents */ - struct dx_hash_info hinfo; -@@ -1463,7 +1636,10 @@ static int ext3_dx_add_entry(handle_t *h - size_t isize; - - dx_path_init(&path, dir); -- if (!dx_probe(dentry, NULL, &hinfo, &path, &err)) -+ param = path.dp_param = &htree_compat_param; -+ -+ err = dx_probe(dentry, NULL, &hinfo, &path); -+ if (err != 0) - return err; - frame = path.dp_frame; - entries = frame->entries; -@@ -1471,7 +1647,8 @@ static int ext3_dx_add_entry(handle_t *h - /* XXX nikita: global serialization! */ - isize = dir->i_size; - -- if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) -+ if (!(bh = ext3_bread(handle, dir, -+ dx_get_block(&path, frame->at), 0, &err))) - goto cleanup; - - BUFFER_TRACE(bh, "get_write_access"); -@@ -1519,12 +1696,9 @@ static int ext3_dx_add_entry(handle_t *h - * transaction... */ - for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) { - bh_new[i] = ext3_append (handle, dir, &newblock[i], &err); -- if (!bh_new[i]) -+ if (!bh_new[i] || -+ param->dpo_node_init(&path, bh_new[i], 0) != 0) - goto cleanup; -- node2 = (struct dx_node *)(bh_new[i]->b_data); -- entries2 = node2->entries; -- node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); -- node2->fake.inode = 0; - BUFFER_TRACE(frame->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, frame->bh); - if (err) -@@ -1545,11 +1719,10 @@ static int ext3_dx_add_entry(handle_t *h - - entries = frame->entries; - count = dx_get_count(entries); -- idx = frame->at - entries; -+ idx = dx_entry_diff(&path, frame->at, entries); - - bh2 = bh_new[i]; -- node2 = (struct dx_node *)(bh2->b_data); -- entries2 = node2->entries; -+ entries2 = dx_get_entries(&path, bh2->b_data, 0); - - if (frame == path.dp_frames) { - /* splitting root node. Tricky point: -@@ -1571,19 +1744,19 @@ static int ext3_dx_add_entry(handle_t *h - indirects = root->info.indirect_levels; - dxtrace(printk("Creating new root %d\n", indirects)); - memcpy((char *) entries2, (char *) entries, -- count * sizeof(struct dx_entry)); -- dx_set_limit(entries2, dx_node_limit(dir)); -+ count * dx_entry_size(&path)); -+ dx_set_limit(entries2, dx_node_limit(&path)); - - /* Set up root */ - dx_set_count(entries, 1); -- dx_set_block(entries + 0, newblock[i]); -+ dx_set_block(&path, entries, newblock[i]); - root->info.indirect_levels = indirects + 1; - - /* Shift frames in the path */ - memmove(frames + 2, frames + 1, - (sizeof path.dp_frames) - 2 * sizeof frames[0]); - /* Add new access path frame */ -- frames[1].at = entries2 + idx; -+ frames[1].at = dx_entry_shift(&path, entries2, idx); - frames[1].entries = entries = entries2; - frames[1].bh = bh2; - ++ frame; -@@ -1594,23 +1767,30 @@ static int ext3_dx_add_entry(handle_t *h - } else { - /* splitting non-root index node. */ - unsigned count1 = count/2, count2 = count - count1; -- unsigned hash2 = dx_get_hash(entries + count1); -+ unsigned hash2; -+ -+ dx_get_key(&path, -+ dx_entry_shift(&path, entries, count1), -+ &hash2); -+ - dxtrace(printk("Split index %i/%i\n", count1, count2)); - -- memcpy ((char *) entries2, (char *) (entries + count1), -- count2 * sizeof(struct dx_entry)); -+ memcpy ((char *) entries2, -+ (char *) dx_entry_shift(&path, entries, count1), -+ count2 * dx_entry_size(&path)); - dx_set_count (entries, count1); - dx_set_count (entries2, count2); -- dx_set_limit (entries2, dx_node_limit(dir)); -+ dx_set_limit (entries2, dx_node_limit(&path)); - - /* Which index block gets the new entry? */ - if (idx >= count1) { -- frame->at = entries2 + idx - count1; -+ frame->at = dx_entry_shift(&path, entries2, -+ idx - count1); - frame->entries = entries = entries2; - swap(frame->bh, bh2); - bh_new[i] = bh2; - } -- dx_insert_block (frame - 1, hash2, newblock[i]); -+ dx_insert_block(&path, frame - 1, hash2, newblock[i]); - dxtrace(dx_show_index ("node", frame->entries)); - dxtrace(dx_show_index ("node", - ((struct dx_node *) bh2->b_data)->entries)); -@@ -1619,7 +1799,7 @@ static int ext3_dx_add_entry(handle_t *h - goto journal_error; - } - } -- de = do_split(handle, dir, &bh, --frame, &hinfo, &err); -+ de = do_split(handle, &path, &bh, --frame, &hinfo, &err); - if (!de) - goto cleanup; - err = add_dirent_to_buf(handle, dentry, inode, de, bh); diff --git a/lustre/kernel_patches/patches/ext3-inode-version-2.6-sles10.patch b/lustre/kernel_patches/patches/ext3-inode-version-2.6-sles10.patch deleted file mode 100644 index 7b6b1b8890..0000000000 --- a/lustre/kernel_patches/patches/ext3-inode-version-2.6-sles10.patch +++ /dev/null @@ -1,426 +0,0 @@ -Index: linux-2.6.16-sles10/fs/ext3/inode.c -=================================================================== ---- linux-2.6.16-sles10.orig/fs/ext3/inode.c -+++ linux-2.6.16-sles10/fs/ext3/inode.c -@@ -2558,6 +2558,13 @@ void ext3_read_inode(struct inode * inod - EXT3_INODE_GET_XTIME(i_atime, inode, raw_inode); - EXT3_EINODE_GET_XTIME(i_crtime, ei, raw_inode); - -+ ei->i_fs_version = le32_to_cpu(raw_inode->i_disk_version); -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) { -+ if (EXT3_FITS_IN_INODE(raw_inode, ei, i_version_hi)) -+ ei->i_fs_version |= (__u64)(le32_to_cpu(raw_inode->i_version_hi)) -+ << 32; -+ } -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2696,8 +2703,14 @@ static int ext3_do_update_inode(handle_t - } else for (block = 0; block < EXT3_N_BLOCKS; block++) - raw_inode->i_block[block] = ei->i_data[block]; - -- if (ei->i_extra_isize) -+ raw_inode->i_disk_version = cpu_to_le32(ei->i_fs_version); -+ if (ei->i_extra_isize) { -+ if (EXT3_FITS_IN_INODE(raw_inode, ei, i_version_hi)) { -+ raw_inode->i_version_hi = cpu_to_le32(ei->i_fs_version -+ >> 32); -+ } - raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); -+ } - - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); -@@ -2971,10 +2984,32 @@ ext3_reserve_inode_write(handle_t *handl - int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) - { - struct ext3_iloc iloc; -- int err; -+ int err, ret; -+ static int expand_message; - - might_sleep(); - err = ext3_reserve_inode_write(handle, inode, &iloc); -+ if (EXT3_I(inode)->i_extra_isize < -+ EXT3_SB(inode->i_sb)->s_want_extra_isize && -+ !(EXT3_I(inode)->i_state & EXT3_STATE_NO_EXPAND)) { -+ /* We need extra buffer credits since we may write into EA block -+ * with this same handle */ -+ if ((ext3_journal_extend(handle, -+ EXT3_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { -+ ret = ext3_expand_extra_isize(inode, -+ EXT3_SB(inode->i_sb)->s_want_extra_isize, -+ iloc, handle); -+ if (ret) { -+ EXT3_I(inode)->i_state |= EXT3_STATE_NO_EXPAND; -+ if (!expand_message) { -+ ext3_warning(inode->i_sb, __FUNCTION__, -+ "Unable to expand inode %lu. Delete some" -+ " EAs or run e2fsck.", inode->i_ino); -+ expand_message = 1; -+ } -+ } -+ } -+ } - if (!err) - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - return err; -Index: linux-2.6.16-sles10/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.16-sles10.orig/include/linux/ext3_fs.h -+++ linux-2.6.16-sles10/include/linux/ext3_fs.h -@@ -205,6 +205,7 @@ struct ext3_group_desc - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ - #define EXT3_STATE_XATTR 0x00000004 /* has in-inode xattrs */ -+#define EXT3_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ - - /* Used to pass group descriptor data when online resize is done */ - struct ext3_new_group_input { -@@ -281,7 +282,7 @@ struct ext3_inode { - __le32 i_flags; /* File flags */ - union { - struct { -- __u32 l_i_reserved1; -+ __u32 l_i_version; - } linux1; - struct { - __u32 h_i_translator; -@@ -326,6 +327,7 @@ struct ext3_inode { - __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ - __le32 i_crtime; /* File Creation time */ - __le32 i_crtime_extra; /* extra File Creation time (nsec << 2 | epoch) */ -+ __le32 i_version_hi; /* high 32 bits for 64-bit version */ - }; - - #define i_size_high i_dir_acl -@@ -388,6 +390,8 @@ do { \ - raw_inode->xtime ## _extra); \ - } while (0) - -+#define i_disk_version osd1.linux1.l_i_version -+ - #if defined(__KERNEL__) || defined(__linux__) - #define i_reserved1 osd1.linux1.l_i_reserved1 - #define i_frag osd2.linux2.l_i_frag -Index: linux-2.6.16-sles10/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.16-sles10.orig/include/linux/ext3_fs_i.h -+++ linux-2.6.16-sles10/include/linux/ext3_fs_i.h -@@ -20,6 +20,8 @@ - #include <linux/rbtree.h> - #include <linux/seqlock.h> - -+#define HAVE_DISK_INODE_VERSION -+ - struct ext3_reserve_window { - __u32 _rsv_start; /* First byte reserved */ - __u32 _rsv_end; /* Last byte reserved or 0 */ -@@ -138,6 +140,8 @@ struct ext3_inode_info { - __u32 i_cached_extent[4]; - - void *i_filterdata; -+ -+ __u64 i_fs_version; - }; - - #endif /* _LINUX_EXT3_FS_I */ -Index: linux-2.6.16-sles10/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.16-sles10.orig/fs/ext3/xattr.c -+++ linux-2.6.16-sles10/fs/ext3/xattr.c -@@ -505,6 +505,20 @@ ext3_xattr_release_block(handle_t *handl - } - } - -+static inline size_t ext3_xattr_free_space(struct ext3_xattr_entry *last, -+ size_t *min_offs, void *base, int *total) -+{ -+ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { -+ *total += EXT3_XATTR_LEN(last->e_name_len); -+ if (!last->e_value_block && last->e_value_size) { -+ size_t offs = le16_to_cpu(last->e_value_offs); -+ if (offs < *min_offs) -+ *min_offs = offs; -+ } -+ } -+ return (*min_offs - ((void *)last - base) - sizeof(__u32)); -+} -+ - struct ext3_xattr_info { - int name_index; - const char *name; -@@ -1007,6 +1021,8 @@ ext3_xattr_set_handle(handle_t *handle, - if (!error) { - ext3_xattr_update_super_block(handle, inode->i_sb); - inode->i_ctime = ext3_current_time(inode); -+ if (!value) -+ EXT3_I(inode)->i_state &= ~EXT3_STATE_NO_EXPAND; - ext3_mark_inode_dirty(handle, inode); - /* - * The bh is consumed by ext3_mark_iloc_dirty, even with -@@ -1059,6 +1075,249 @@ retry: - return error; - } - -+static void ext3_xattr_shift_entries(struct ext3_xattr_entry *entry, -+ int value_offs_shift, void *to, -+ void *from, size_t n, int blocksize) -+{ -+ struct ext3_xattr_entry *last = entry; -+ int new_offs; -+ -+ /* Adjust the value offsets of the entries */ -+ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { -+ if (!last->e_value_block && last->e_value_size) { -+ new_offs = le16_to_cpu(last->e_value_offs) + -+ value_offs_shift; -+ BUG_ON(new_offs + le32_to_cpu(last->e_value_size) > -+ blocksize); -+ last->e_value_offs = cpu_to_le16(new_offs); -+ } -+ } -+ /* Shift the entries by n bytes */ -+ memmove(to, from, n); -+} -+ -+/* Expand an inode by new_extra_isize bytes. -+ * Returns 0 on success or negative error number on failure. -+ */ -+int ext3_expand_extra_isize(struct inode *inode, int new_extra_isize, -+ struct ext3_iloc iloc, handle_t *handle) -+{ -+ struct ext3_inode *raw_inode; -+ struct ext3_xattr_ibody_header *header; -+ struct ext3_xattr_entry *entry, *last, *first; -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_ibody_find *is = NULL; -+ struct ext3_xattr_block_find *bs = NULL; -+ char *buffer = NULL, *b_entry_name = NULL; -+ size_t min_offs, free; -+ int total_ino, total_blk; -+ void *base, *start, *end; -+ int extra_isize = 0, error = 0, tried_min_extra_isize = 0; -+ int s_min_extra_isize = EXT3_SB(inode->i_sb)->s_es->s_min_extra_isize; -+ -+ down_write(&EXT3_I(inode)->xattr_sem); -+ -+retry: -+ if (EXT3_I(inode)->i_extra_isize >= new_extra_isize) { -+ up_write(&EXT3_I(inode)->xattr_sem); -+ return 0; -+ } -+ -+ raw_inode = ext3_raw_inode(&iloc); -+ -+ header = IHDR(inode, raw_inode); -+ entry = IFIRST(header); -+ -+ /* No extended attributes present */ -+ if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR) || -+ header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC)) { -+ memset((void *)raw_inode + EXT3_GOOD_OLD_INODE_SIZE, 0, -+ new_extra_isize); -+ EXT3_I(inode)->i_extra_isize = new_extra_isize; -+ goto cleanup; -+ } -+ -+ /* -+ * Check if enough free space is available in the inode to shift the -+ * entries ahead by new_extra_isize. -+ */ -+ -+ base = start = entry; -+ end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ min_offs = end - base; -+ last = entry; -+ total_ino = sizeof(struct ext3_xattr_ibody_header); -+ -+ free = ext3_xattr_free_space(last, &min_offs, base, &total_ino); -+ if (free >= new_extra_isize) { -+ entry = IFIRST(header); -+ ext3_xattr_shift_entries(entry, EXT3_I(inode)->i_extra_isize - -+ new_extra_isize, (void *)raw_inode + -+ EXT3_GOOD_OLD_INODE_SIZE + new_extra_isize, -+ (void *)header, total_ino, -+ inode->i_sb->s_blocksize); -+ EXT3_I(inode)->i_extra_isize = new_extra_isize; -+ error = 0; -+ goto cleanup; -+ } -+ -+ /* -+ * Enough free space isn't available in the inode, check if -+ * EA block can hold new_extra_isize bytes. -+ */ -+ if (EXT3_I(inode)->i_file_acl) { -+ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ if (ext3_xattr_check_block(bh)) { -+ ext3_error(inode->i_sb, __FUNCTION__, -+ "inode %lu: bad block %d", inode->i_ino, -+ EXT3_I(inode)->i_file_acl); -+ error = -EIO; -+ goto cleanup; -+ } -+ base = BHDR(bh); -+ first = BFIRST(bh); -+ end = bh->b_data + bh->b_size; -+ min_offs = end - base; -+ free = ext3_xattr_free_space(first, &min_offs, base, -+ &total_blk); -+ if (free < new_extra_isize) { -+ if (!tried_min_extra_isize && s_min_extra_isize) { -+ tried_min_extra_isize++; -+ new_extra_isize = s_min_extra_isize; -+ goto retry; -+ } -+ error = -1; -+ goto cleanup; -+ } -+ } else { -+ free = inode->i_sb->s_blocksize; -+ } -+ -+ while (new_extra_isize > 0) { -+ size_t offs, size, entry_size; -+ struct ext3_xattr_entry *small_entry = NULL; -+ struct ext3_xattr_info i = { -+ .value = NULL, -+ .value_len = 0, -+ }; -+ unsigned int total_size, shift_bytes, temp = ~0U; -+ -+ is = (struct ext3_xattr_ibody_find *) kmalloc(sizeof(struct -+ ext3_xattr_ibody_find), GFP_KERNEL); -+ bs = (struct ext3_xattr_block_find *) kmalloc(sizeof(struct -+ ext3_xattr_block_find), GFP_KERNEL); -+ memset((void *)is, 0, sizeof(struct ext3_xattr_ibody_find)); -+ memset((void *)bs, 0, sizeof(struct ext3_xattr_block_find)); -+ -+ is->s.not_found = bs->s.not_found = -ENODATA; -+ is->iloc.bh = NULL; -+ bs->bh = NULL; -+ -+ last = IFIRST(header); -+ /* Find the entry best suited to be pushed into EA block */ -+ entry = NULL; -+ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { -+ total_size = EXT3_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + -+ EXT3_XATTR_LEN(last->e_name_len); -+ if (total_size <= free && total_size < temp) { -+ if (total_size < new_extra_isize) { -+ small_entry = last; -+ } else { -+ entry = last; -+ temp = total_size; -+ } -+ } -+ } -+ -+ if (entry == NULL) { -+ if (small_entry) { -+ entry = small_entry; -+ } else { -+ if (!tried_min_extra_isize && -+ s_min_extra_isize) { -+ tried_min_extra_isize++; -+ new_extra_isize = s_min_extra_isize; -+ goto retry; -+ } -+ error = -1; -+ goto cleanup; -+ } -+ } -+ offs = le16_to_cpu(entry->e_value_offs); -+ size = le32_to_cpu(entry->e_value_size); -+ entry_size = EXT3_XATTR_LEN(entry->e_name_len); -+ i.name_index = entry->e_name_index, -+ buffer = kmalloc(EXT3_XATTR_SIZE(size), GFP_KERNEL); -+ b_entry_name = kmalloc(entry->e_name_len + 1, GFP_KERNEL); -+ /* Save the entry name and the entry value */ -+ memcpy((void *)buffer, (void *)IFIRST(header) + offs, -+ EXT3_XATTR_SIZE(size)); -+ memcpy((void *)b_entry_name, (void *)entry->e_name, -+ entry->e_name_len); -+ b_entry_name[entry->e_name_len] = '\0'; -+ i.name = b_entry_name; -+ -+ error = ext3_get_inode_loc(inode, &is->iloc); -+ if (error) -+ goto cleanup; -+ -+ error = ext3_xattr_ibody_find(inode, &i, is); -+ if (error) -+ goto cleanup; -+ -+ /* Remove the chosen entry from the inode */ -+ error = ext3_xattr_ibody_set(handle, inode, &i, is); -+ -+ entry = IFIRST(header); -+ if (entry_size + EXT3_XATTR_SIZE(size) >= new_extra_isize) -+ shift_bytes = new_extra_isize; -+ else -+ shift_bytes = entry_size + size; -+ /* Adjust the offsets and shift the remaining entries ahead */ -+ ext3_xattr_shift_entries(entry, EXT3_I(inode)->i_extra_isize - -+ shift_bytes, (void *)raw_inode + -+ EXT3_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes, -+ (void *)header, total_ino - entry_size, -+ inode->i_sb->s_blocksize); -+ -+ extra_isize += shift_bytes; -+ new_extra_isize -= shift_bytes; -+ EXT3_I(inode)->i_extra_isize = extra_isize; -+ -+ i.name = b_entry_name; -+ i.value = buffer; -+ i.value_len = cpu_to_le32(size); -+ error = ext3_xattr_block_find(inode, &i, bs); -+ if (error) -+ goto cleanup; -+ -+ /* Add entry which was removed from the inode into the block */ -+ error = ext3_xattr_block_set(handle, inode, &i, bs); -+ if (error) -+ goto cleanup; -+ } -+ -+cleanup: -+ if (b_entry_name) -+ kfree(b_entry_name); -+ if (buffer) -+ kfree(buffer); -+ if (is) { -+ brelse(is->iloc.bh); -+ kfree(is); -+ } -+ if (bs) -+ kfree(bs); -+ brelse(bh); -+ up_write(&EXT3_I(inode)->xattr_sem); -+ return error; -+} -+ -+ -+ - /* - * ext3_xattr_delete_inode() - * -Index: linux-2.6.16-sles10/fs/ext3/xattr.h -=================================================================== ---- linux-2.6.16-sles10.orig/fs/ext3/xattr.h -+++ linux-2.6.16-sles10/fs/ext3/xattr.h -@@ -75,6 +75,9 @@ extern int ext3_xattr_set_handle(handle_ - extern void ext3_xattr_delete_inode(handle_t *, struct inode *); - extern void ext3_xattr_put_super(struct super_block *); - -+int ext3_expand_extra_isize(struct inode *inode, int new_extra_isize, -+ struct ext3_iloc iloc, handle_t *handle); -+ - extern int init_ext3_xattr(void); - extern void exit_ext3_xattr(void); - diff --git a/lustre/kernel_patches/patches/ext3-inode-version-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/ext3-inode-version-2.6.18-vanilla.patch deleted file mode 100644 index 26f71acf45..0000000000 --- a/lustre/kernel_patches/patches/ext3-inode-version-2.6.18-vanilla.patch +++ /dev/null @@ -1,426 +0,0 @@ -Index: linux-2.6.18/fs/ext3/inode.c -=================================================================== ---- linux-2.6.18.orig/fs/ext3/inode.c -+++ linux-2.6.18/fs/ext3/inode.c -@@ -2703,6 +2703,13 @@ void ext3_read_inode(struct inode * inod - EXT3_INODE_GET_XTIME(i_atime, inode, raw_inode); - EXT3_EINODE_GET_XTIME(i_crtime, ei, raw_inode); - -+ ei->i_fs_version = le32_to_cpu(raw_inode->i_disk_version); -+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) { -+ if (EXT3_FITS_IN_INODE(raw_inode, ei, i_version_hi)) -+ ei->i_fs_version |= (__u64)(le32_to_cpu(raw_inode->i_version_hi)) -+ << 32; -+ } -+ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; -@@ -2841,8 +2848,14 @@ static int ext3_do_update_inode(handle_t - } else for (block = 0; block < EXT3_N_BLOCKS; block++) - raw_inode->i_block[block] = ei->i_data[block]; - -- if (ei->i_extra_isize) -+ raw_inode->i_disk_version = cpu_to_le32(ei->i_fs_version); -+ if (ei->i_extra_isize) { -+ if (EXT3_FITS_IN_INODE(raw_inode, ei, i_version_hi)) { -+ raw_inode->i_version_hi = cpu_to_le32(ei->i_fs_version -+ >> 32); -+ } - raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); -+ } - - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); -@@ -3116,10 +3129,32 @@ ext3_reserve_inode_write(handle_t *handl - int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) - { - struct ext3_iloc iloc; -- int err; -+ int err, ret; -+ static int expand_message; - - might_sleep(); - err = ext3_reserve_inode_write(handle, inode, &iloc); -+ if (EXT3_I(inode)->i_extra_isize < -+ EXT3_SB(inode->i_sb)->s_want_extra_isize && -+ !(EXT3_I(inode)->i_state & EXT3_STATE_NO_EXPAND)) { -+ /* We need extra buffer credits since we may write into EA block -+ * with this same handle */ -+ if ((ext3_journal_extend(handle, -+ EXT3_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { -+ ret = ext3_expand_extra_isize(inode, -+ EXT3_SB(inode->i_sb)->s_want_extra_isize, -+ iloc, handle); -+ if (ret) { -+ EXT3_I(inode)->i_state |= EXT3_STATE_NO_EXPAND; -+ if (!expand_message) { -+ ext3_warning(inode->i_sb, __FUNCTION__, -+ "Unable to expand inode %lu. Delete some" -+ " EAs or run e2fsck.", inode->i_ino); -+ expand_message = 1; -+ } -+ } -+ } -+ } - if (!err) - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - return err; -Index: linux-2.6.18/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.18.orig/include/linux/ext3_fs.h -+++ linux-2.6.18/include/linux/ext3_fs.h -@@ -201,6 +201,7 @@ struct ext3_group_desc - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ - #define EXT3_STATE_XATTR 0x00000004 /* has in-inode xattrs */ -+#define EXT3_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ - - /* Used to pass group descriptor data when online resize is done */ - struct ext3_new_group_input { -@@ -277,7 +278,7 @@ struct ext3_inode { - __le32 i_flags; /* File flags */ - union { - struct { -- __u32 l_i_reserved1; -+ __u32 l_i_version; - } linux1; - struct { - __u32 h_i_translator; -@@ -322,6 +323,7 @@ struct ext3_inode { - __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ - __le32 i_crtime; /* File Creation time */ - __le32 i_crtime_extra; /* extra File Creation time (nsec << 2 | epoch) */ -+ __le32 i_version_hi; /* high 32 bits for 64-bit version */ - }; - - #define i_size_high i_dir_acl -@@ -384,6 +386,8 @@ do { \ - raw_inode->xtime ## _extra); \ - } while (0) - -+#define i_disk_version osd1.linux1.l_i_version -+ - #if defined(__KERNEL__) || defined(__linux__) - #define i_reserved1 osd1.linux1.l_i_reserved1 - #define i_frag osd2.linux2.l_i_frag -Index: linux-2.6.18/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.18.orig/include/linux/ext3_fs_i.h -+++ linux-2.6.18/include/linux/ext3_fs_i.h -@@ -21,6 +21,8 @@ - #include <linux/seqlock.h> - #include <linux/mutex.h> - -+#define HAVE_DISK_INODE_VERSION -+ - /* data type for block offset of block group */ - typedef int ext3_grpblk_t; - -@@ -147,6 +149,8 @@ struct ext3_inode_info { - struct timespec i_crtime; - - void *i_filterdata; -+ -+ __u64 i_fs_version; - }; - - #endif /* _LINUX_EXT3_FS_I */ -Index: linux-2.6.18/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.18.orig/fs/ext3/xattr.c -+++ linux-2.6.18/fs/ext3/xattr.c -@@ -505,6 +505,20 @@ ext3_xattr_release_block(handle_t *handl - } - } - -+static inline size_t ext3_xattr_free_space(struct ext3_xattr_entry *last, -+ size_t *min_offs, void *base, int *total) -+{ -+ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { -+ *total += EXT3_XATTR_LEN(last->e_name_len); -+ if (!last->e_value_block && last->e_value_size) { -+ size_t offs = le16_to_cpu(last->e_value_offs); -+ if (offs < *min_offs) -+ *min_offs = offs; -+ } -+ } -+ return (*min_offs - ((void *)last - base) - sizeof(__u32)); -+} -+ - struct ext3_xattr_info { - int name_index; - const char *name; -@@ -1008,6 +1022,8 @@ ext3_xattr_set_handle(handle_t *handle, - if (!error) { - ext3_xattr_update_super_block(handle, inode->i_sb); - inode->i_ctime = ext3_current_time(inode); -+ if (!value) -+ EXT3_I(inode)->i_state &= ~EXT3_STATE_NO_EXPAND; - error = ext3_mark_iloc_dirty(handle, inode, &is.iloc); - /* - * The bh is consumed by ext3_mark_iloc_dirty, even with -@@ -1060,6 +1076,249 @@ retry: - return error; - } - -+static void ext3_xattr_shift_entries(struct ext3_xattr_entry *entry, -+ int value_offs_shift, void *to, -+ void *from, size_t n, int blocksize) -+{ -+ struct ext3_xattr_entry *last = entry; -+ int new_offs; -+ -+ /* Adjust the value offsets of the entries */ -+ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { -+ if (!last->e_value_block && last->e_value_size) { -+ new_offs = le16_to_cpu(last->e_value_offs) + -+ value_offs_shift; -+ BUG_ON(new_offs + le32_to_cpu(last->e_value_size) > -+ blocksize); -+ last->e_value_offs = cpu_to_le16(new_offs); -+ } -+ } -+ /* Shift the entries by n bytes */ -+ memmove(to, from, n); -+} -+ -+/* Expand an inode by new_extra_isize bytes. -+ * Returns 0 on success or negative error number on failure. -+ */ -+int ext3_expand_extra_isize(struct inode *inode, int new_extra_isize, -+ struct ext3_iloc iloc, handle_t *handle) -+{ -+ struct ext3_inode *raw_inode; -+ struct ext3_xattr_ibody_header *header; -+ struct ext3_xattr_entry *entry, *last, *first; -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_ibody_find *is = NULL; -+ struct ext3_xattr_block_find *bs = NULL; -+ char *buffer = NULL, *b_entry_name = NULL; -+ size_t min_offs, free; -+ int total_ino, total_blk; -+ void *base, *start, *end; -+ int extra_isize = 0, error = 0, tried_min_extra_isize = 0; -+ int s_min_extra_isize = EXT3_SB(inode->i_sb)->s_es->s_min_extra_isize; -+ -+ down_write(&EXT3_I(inode)->xattr_sem); -+ -+retry: -+ if (EXT3_I(inode)->i_extra_isize >= new_extra_isize) { -+ up_write(&EXT3_I(inode)->xattr_sem); -+ return 0; -+ } -+ -+ raw_inode = ext3_raw_inode(&iloc); -+ -+ header = IHDR(inode, raw_inode); -+ entry = IFIRST(header); -+ -+ /* No extended attributes present */ -+ if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR) || -+ header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC)) { -+ memset((void *)raw_inode + EXT3_GOOD_OLD_INODE_SIZE, 0, -+ new_extra_isize); -+ EXT3_I(inode)->i_extra_isize = new_extra_isize; -+ goto cleanup; -+ } -+ -+ /* -+ * Check if enough free space is available in the inode to shift the -+ * entries ahead by new_extra_isize. -+ */ -+ -+ base = start = entry; -+ end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; -+ min_offs = end - base; -+ last = entry; -+ total_ino = sizeof(struct ext3_xattr_ibody_header); -+ -+ free = ext3_xattr_free_space(last, &min_offs, base, &total_ino); -+ if (free >= new_extra_isize) { -+ entry = IFIRST(header); -+ ext3_xattr_shift_entries(entry, EXT3_I(inode)->i_extra_isize - -+ new_extra_isize, (void *)raw_inode + -+ EXT3_GOOD_OLD_INODE_SIZE + new_extra_isize, -+ (void *)header, total_ino, -+ inode->i_sb->s_blocksize); -+ EXT3_I(inode)->i_extra_isize = new_extra_isize; -+ error = 0; -+ goto cleanup; -+ } -+ -+ /* -+ * Enough free space isn't available in the inode, check if -+ * EA block can hold new_extra_isize bytes. -+ */ -+ if (EXT3_I(inode)->i_file_acl) { -+ bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ if (ext3_xattr_check_block(bh)) { -+ ext3_error(inode->i_sb, __FUNCTION__, -+ "inode %lu: bad block "E3FSBLK, inode->i_ino, -+ EXT3_I(inode)->i_file_acl); -+ error = -EIO; -+ goto cleanup; -+ } -+ base = BHDR(bh); -+ first = BFIRST(bh); -+ end = bh->b_data + bh->b_size; -+ min_offs = end - base; -+ free = ext3_xattr_free_space(first, &min_offs, base, -+ &total_blk); -+ if (free < new_extra_isize) { -+ if (!tried_min_extra_isize && s_min_extra_isize) { -+ tried_min_extra_isize++; -+ new_extra_isize = s_min_extra_isize; -+ goto retry; -+ } -+ error = -1; -+ goto cleanup; -+ } -+ } else { -+ free = inode->i_sb->s_blocksize; -+ } -+ -+ while (new_extra_isize > 0) { -+ size_t offs, size, entry_size; -+ struct ext3_xattr_entry *small_entry = NULL; -+ struct ext3_xattr_info i = { -+ .value = NULL, -+ .value_len = 0, -+ }; -+ unsigned int total_size, shift_bytes, temp = ~0U; -+ -+ is = (struct ext3_xattr_ibody_find *) kmalloc(sizeof(struct -+ ext3_xattr_ibody_find), GFP_KERNEL); -+ bs = (struct ext3_xattr_block_find *) kmalloc(sizeof(struct -+ ext3_xattr_block_find), GFP_KERNEL); -+ memset((void *)is, 0, sizeof(struct ext3_xattr_ibody_find)); -+ memset((void *)bs, 0, sizeof(struct ext3_xattr_block_find)); -+ -+ is->s.not_found = bs->s.not_found = -ENODATA; -+ is->iloc.bh = NULL; -+ bs->bh = NULL; -+ -+ last = IFIRST(header); -+ /* Find the entry best suited to be pushed into EA block */ -+ entry = NULL; -+ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { -+ total_size = EXT3_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + -+ EXT3_XATTR_LEN(last->e_name_len); -+ if (total_size <= free && total_size < temp) { -+ if (total_size < new_extra_isize) { -+ small_entry = last; -+ } else { -+ entry = last; -+ temp = total_size; -+ } -+ } -+ } -+ -+ if (entry == NULL) { -+ if (small_entry) { -+ entry = small_entry; -+ } else { -+ if (!tried_min_extra_isize && -+ s_min_extra_isize) { -+ tried_min_extra_isize++; -+ new_extra_isize = s_min_extra_isize; -+ goto retry; -+ } -+ error = -1; -+ goto cleanup; -+ } -+ } -+ offs = le16_to_cpu(entry->e_value_offs); -+ size = le32_to_cpu(entry->e_value_size); -+ entry_size = EXT3_XATTR_LEN(entry->e_name_len); -+ i.name_index = entry->e_name_index, -+ buffer = kmalloc(EXT3_XATTR_SIZE(size), GFP_KERNEL); -+ b_entry_name = kmalloc(entry->e_name_len + 1, GFP_KERNEL); -+ /* Save the entry name and the entry value */ -+ memcpy((void *)buffer, (void *)IFIRST(header) + offs, -+ EXT3_XATTR_SIZE(size)); -+ memcpy((void *)b_entry_name, (void *)entry->e_name, -+ entry->e_name_len); -+ b_entry_name[entry->e_name_len] = '\0'; -+ i.name = b_entry_name; -+ -+ error = ext3_get_inode_loc(inode, &is->iloc); -+ if (error) -+ goto cleanup; -+ -+ error = ext3_xattr_ibody_find(inode, &i, is); -+ if (error) -+ goto cleanup; -+ -+ /* Remove the chosen entry from the inode */ -+ error = ext3_xattr_ibody_set(handle, inode, &i, is); -+ -+ entry = IFIRST(header); -+ if (entry_size + EXT3_XATTR_SIZE(size) >= new_extra_isize) -+ shift_bytes = new_extra_isize; -+ else -+ shift_bytes = entry_size + size; -+ /* Adjust the offsets and shift the remaining entries ahead */ -+ ext3_xattr_shift_entries(entry, EXT3_I(inode)->i_extra_isize - -+ shift_bytes, (void *)raw_inode + -+ EXT3_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes, -+ (void *)header, total_ino - entry_size, -+ inode->i_sb->s_blocksize); -+ -+ extra_isize += shift_bytes; -+ new_extra_isize -= shift_bytes; -+ EXT3_I(inode)->i_extra_isize = extra_isize; -+ -+ i.name = b_entry_name; -+ i.value = buffer; -+ i.value_len = cpu_to_le32(size); -+ error = ext3_xattr_block_find(inode, &i, bs); -+ if (error) -+ goto cleanup; -+ -+ /* Add entry which was removed from the inode into the block */ -+ error = ext3_xattr_block_set(handle, inode, &i, bs); -+ if (error) -+ goto cleanup; -+ } -+ -+cleanup: -+ if (b_entry_name) -+ kfree(b_entry_name); -+ if (buffer) -+ kfree(buffer); -+ if (is) { -+ brelse(is->iloc.bh); -+ kfree(is); -+ } -+ if (bs) -+ kfree(bs); -+ brelse(bh); -+ up_write(&EXT3_I(inode)->xattr_sem); -+ return error; -+} -+ -+ -+ - /* - * ext3_xattr_delete_inode() - * -Index: linux-2.6.18/fs/ext3/xattr.h -=================================================================== ---- linux-2.6.18.orig/fs/ext3/xattr.h -+++ linux-2.6.18/fs/ext3/xattr.h -@@ -74,6 +74,9 @@ extern int ext3_xattr_set_handle(handle_ - extern void ext3_xattr_delete_inode(handle_t *, struct inode *); - extern void ext3_xattr_put_super(struct super_block *); - -+int ext3_expand_extra_isize(struct inode *inode, int new_extra_isize, -+ struct ext3_iloc iloc, handle_t *handle); -+ - extern int init_ext3_xattr(void); - extern void exit_ext3_xattr(void); - diff --git a/lustre/kernel_patches/patches/ext3-mballoc3-core.patch b/lustre/kernel_patches/patches/ext3-mballoc3-core.patch deleted file mode 100644 index a6033d1d19..0000000000 --- a/lustre/kernel_patches/patches/ext3-mballoc3-core.patch +++ /dev/null @@ -1,4528 +0,0 @@ -Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h 2006-05-18 23:57:04.000000000 +0400 -+++ linux-2.6.9-full/include/linux/ext3_fs_sb.h 2007-03-28 15:42:16.000000000 +0400 -@@ -81,6 +81,61 @@ struct ext3_sb_info { - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ - #endif -+ -+ /* for buddy allocator */ -+ struct ext3_group_info ***s_group_info; -+ struct inode *s_buddy_cache; -+ long s_blocks_reserved; -+ spinlock_t s_reserve_lock; -+ struct list_head s_active_transaction; -+ struct list_head s_closed_transaction; -+ struct list_head s_committed_transaction; -+ spinlock_t s_md_lock; -+ tid_t s_last_transaction; -+ unsigned short *s_mb_offsets, *s_mb_maxs; -+ -+ /* tunables */ -+ unsigned long s_mb_factor; -+ unsigned long s_stripe; -+ unsigned long s_mb_stream_request; -+ unsigned long s_mb_max_to_scan; -+ unsigned long s_mb_min_to_scan; -+ unsigned long s_mb_max_groups_to_scan; -+ unsigned long s_mb_stats; -+ unsigned long s_mb_order2_reqs; -+ -+ /* history to debug policy */ -+ struct ext3_mb_history *s_mb_history; -+ int s_mb_history_cur; -+ int s_mb_history_max; -+ int s_mb_history_num; -+ struct proc_dir_entry *s_mb_proc; -+ spinlock_t s_mb_history_lock; -+ int s_mb_history_filter; -+ -+ /* stats for buddy allocator */ -+ spinlock_t s_mb_pa_lock; -+ atomic_t s_bal_reqs; /* number of reqs with len > 1 */ -+ atomic_t s_bal_success; /* we found long enough chunks */ -+ atomic_t s_bal_allocated; /* in blocks */ -+ atomic_t s_bal_ex_scanned; /* total extents scanned */ -+ atomic_t s_bal_goals; /* goal hits */ -+ atomic_t s_bal_breaks; /* too long searches */ -+ atomic_t s_bal_2orders; /* 2^order hits */ -+ spinlock_t s_bal_lock; -+ unsigned long s_mb_buddies_generated; -+ unsigned long long s_mb_generation_time; -+ atomic_t s_mb_lost_chunks; -+ atomic_t s_mb_preallocated; -+ atomic_t s_mb_discarded; -+ -+ /* locality groups */ -+ struct ext3_locality_group *s_locality_groups; -+ - }; - -+#define EXT3_GROUP_INFO(sb, group) \ -+ EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \ -+ [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)] -+ - #endif /* _LINUX_EXT3_FS_SB */ -Index: linux-2.6.9-full/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2007-03-28 01:29:39.000000000 +0400 -+++ linux-2.6.9-full/include/linux/ext3_fs.h 2007-03-28 15:45:07.000000000 +0400 -@@ -57,6 +57,30 @@ struct statfs; - #define ext3_debug(f, a...) do {} while (0) - #endif - -+#define EXT3_MULTIBLOCK_ALLOCATOR 1 -+ -+#define EXT3_MB_HINT_MERGE 1 /* prefer goal again. length */ -+#define EXT3_MB_HINT_RESERVED 2 /* blocks already reserved */ -+#define EXT3_MB_HINT_METADATA 4 /* metadata is being allocated */ -+#define EXT3_MB_HINT_FIRST 8 /* first blocks in the file */ -+#define EXT3_MB_HINT_BEST 16 /* search for the best chunk */ -+#define EXT3_MB_HINT_DATA 32 /* data is being allocated */ -+#define EXT3_MB_HINT_NOPREALLOC 64 /* don't preallocate (for tails) */ -+#define EXT3_MB_HINT_GROUP_ALLOC 128 /* allocate for locality group */ -+#define EXT3_MB_HINT_GOAL_ONLY 256 /* allocate goal blocks or none */ -+ -+struct ext3_allocation_request { -+ struct inode *inode; /* target inode for block we're allocating */ -+ unsigned long logical; /* logical block in target inode */ -+ unsigned long goal; /* phys. target (a hint) */ -+ unsigned long lleft; /* the closest logical allocated block to the left */ -+ unsigned long pleft; /* phys. block for ^^^ */ -+ unsigned long lright; /* the closest logical allocated block to the right */ -+ unsigned long pright; /* phys. block for ^^^ */ -+ unsigned long len; /* how many blocks we want to allocate */ -+ unsigned long flags; /* flags. see above EXT3_MB_HINT_* */ -+}; -+ - /* - * Special inodes numbers - */ -@@ -404,6 +413,14 @@ - #define ext3_find_first_zero_bit ext2_find_first_zero_bit - #define ext3_find_next_zero_bit ext2_find_next_zero_bit - -+#ifndef ext2_find_next_le_bit -+#ifdef __LITTLE_ENDIAN -+#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off)) -+#else -+#error "mballoc needs a patch for big-endian systems - CFS bug 10634" -+#endif /* __LITTLE_ENDIAN */ -+#endif /* !ext2_find_next_le_bit */ -+ - /* - * Maximal mount counts between two filesystem checks - */ -@@ -763,6 +787,20 @@ extern unsigned long ext3_count_dirs (st - extern void ext3_check_inodes_bitmap (struct super_block *); - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - -+/* mballoc.c */ -+extern long ext3_mb_stats; -+extern long ext3_mb_max_to_scan; -+extern int ext3_mb_init(struct super_block *, int); -+extern int ext3_mb_release(struct super_block *); -+extern unsigned long ext3_mb_new_blocks(handle_t *, struct ext3_allocation_request *, int *); -+extern int ext3_mb_reserve_blocks(struct super_block *, int); -+extern void ext3_mb_release_blocks(struct super_block *, int); -+extern void ext3_mb_release_blocks(struct super_block *, int); -+extern void ext3_mb_discard_inode_preallocations(struct inode *); -+extern int __init init_ext3_proc(void); -+extern void exit_ext3_proc(void); -+extern void ext3_mb_free_blocks(handle_t *, struct inode *, unsigned long, unsigned long, int, int *); -+ - - /* inode.c */ - extern int ext3_block_truncate_page(handle_t *, struct page *, -Index: linux-2.6.9-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/super.c 2007-03-28 01:29:38.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/super.c 2007-03-28 15:42:16.000000000 +0400 -@@ -394,6 +394,7 @@ void ext3_put_super (struct super_block - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_mb_release(sb); - ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); -@@ -463,6 +464,8 @@ static struct inode *ext3_alloc_inode(st - ei->vfs_inode.i_version = 1; - - memset(&ei->i_cached_extent, 0, sizeof(ei->i_cached_extent)); -+ INIT_LIST_HEAD(&ei->i_prealloc_list); -+ spin_lock_init(&ei->i_prealloc_lock); - return &ei->vfs_inode; - } - -@@ -2433,7 +2436,13 @@ static struct file_system_type ext3_fs_t - - static int __init init_ext3_fs(void) - { -- int err = init_ext3_xattr(); -+ int err; -+ -+ err = init_ext3_proc(); -+ if (err) -+ return err; -+ -+ err = init_ext3_xattr(); - if (err) - return err; - err = init_inodecache(); -@@ -2455,6 +2464,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - destroy_inodecache(); - exit_ext3_xattr(); -+ exit_ext3_proc(); - } - - int ext3_prep_san_write(struct inode *inode, long *blocks, -Index: linux-2.6.9-full/fs/ext3/mballoc.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/mballoc.c 2007-02-13 18:39:59.640066087 +0300 -+++ linux-2.6.9-full/fs/ext3/mballoc.c 2007-03-29 00:28:40.000000000 +0400 -@@ -0,0 +1,4342 @@ -+/* -+ * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas <alex@clusterfs.com> -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+ -+/* -+ * mballoc.c contains the multiblocks allocation routines -+ */ -+ -+#include <linux/time.h> -+#include <linux/fs.h> -+#include <linux/namei.h> -+#include <linux/ext3_jbd.h> -+#include <linux/jbd.h> -+#include <linux/ext3_fs.h> -+#include <linux/quotaops.h> -+#include <linux/buffer_head.h> -+#include <linux/module.h> -+#include <linux/swap.h> -+#include <linux/proc_fs.h> -+#include <linux/pagemap.h> -+#include <linux/seq_file.h> -+#include <linux/version.h> -+ -+/* -+ * MUSTDO: -+ * - test ext3_ext_search_left() and ext3_ext_search_right() -+ * - search for metadata in few groups -+ * -+ * TODO v4: -+ * - normalization should take into account whether file is still open -+ * - discard preallocations if no free space left (policy?) -+ * - don't normalize tails -+ * - quota -+ * - reservation for superuser -+ * -+ * TODO v3: -+ * - bitmap read-ahead (proposed by Oleg Drokin aka green) -+ * - track min/max extents in each group for better group selection -+ * - mb_mark_used() may allocate chunk right after splitting buddy -+ * - tree of groups sorted by number of free blocks -+ * - error handling -+ */ -+ -+/* -+ * mballoc operates on the following data: -+ * - on-disk bitmap -+ * - in-core buddy (actually includes buddy and bitmap) -+ * - preallocation descriptors (PAs) -+ * -+ * there are two types of preallocations: -+ * - inode -+ * assiged to specific inode and can be used for this inode only. -+ * it describes part of inode's space preallocated to specific -+ * physical blocks. any block from that preallocated can be used -+ * independent. the descriptor just tracks number of blocks left -+ * unused. so, before taking some block from descriptor, one must -+ * make sure corresponded logical block isn't allocated yet. this -+ * also means that freeing any block within descriptor's range -+ * must discard all preallocated blocks. -+ * - locality group -+ * assigned to specific locality group which does not translate to -+ * permanent set of inodes: inode can join and leave group. space -+ * from this type of preallocation can be used for any inode. thus -+ * it's consumed from the beginning to the end. -+ * -+ * relation between them can be expressed as: -+ * in-core buddy = on-disk bitmap + preallocation descriptors -+ * -+ * this mean blocks mballoc considers used are: -+ * - allocated blocks (persistent) -+ * - preallocated blocks (non-persistent) -+ * -+ * consistency in mballoc world means that at any time a block is either -+ * free or used in ALL structures. notice: "any time" should not be read -+ * literally -- time is discrete and delimited by locks. -+ * -+ * to keep it simple, we don't use block numbers, instead we count number of -+ * blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA. -+ * -+ * all operations can be expressed as: -+ * - init buddy: buddy = on-disk + PAs -+ * - new PA: buddy += N; PA = N -+ * - use inode PA: on-disk += N; PA -= N -+ * - discard inode PA buddy -= on-disk - PA; PA = 0 -+ * - use locality group PA on-disk += N; PA -= N -+ * - discard locality group PA buddy -= PA; PA = 0 -+ * note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap -+ * is used in real operation because we can't know actual used -+ * bits from PA, only from on-disk bitmap -+ * -+ * if we follow this strict logic, then all operations above should be atomic. -+ * given some of them can block, we'd have to use something like semaphores -+ * killing performance on high-end SMP hardware. let's try to relax it using -+ * the following knowledge: -+ * 1) if buddy is referenced, it's already initialized -+ * 2) while block is used in buddy and the buddy is referenced, -+ * nobody can re-allocate that block -+ * 3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has -+ * bit set and PA claims same block, it's OK. IOW, one can set bit in -+ * on-disk bitmap if buddy has same bit set or/and PA covers corresponded -+ * block -+ * -+ * so, now we're building a concurrency table: -+ * - init buddy vs. -+ * - new PA -+ * blocks for PA are allocated in the buddy, buddy must be referenced -+ * until PA is linked to allocation group to avoid concurrent buddy init -+ * - use inode PA -+ * we need to make sure that either on-disk bitmap or PA has uptodate data -+ * given (3) we care that PA-=N operation doesn't interfere with init -+ * - discard inode PA -+ * the simplest way would be to have buddy initialized by the discard -+ * - use locality group PA -+ * again PA-=N must be serialized with init -+ * - discard locality group PA -+ * the simplest way would be to have buddy initialized by the discard -+ * - new PA vs. -+ * - use inode PA -+ * i_truncate_mutex serializes them -+ * - discard inode PA -+ * discard process must wait until PA isn't used by another process -+ * - use locality group PA -+ * some mutex should serialize them -+ * - discard locality group PA -+ * discard process must wait until PA isn't used by another process -+ * - use inode PA -+ * - use inode PA -+ * i_truncate_mutex or another mutex should serializes them -+ * - discard inode PA -+ * discard process must wait until PA isn't used by another process -+ * - use locality group PA -+ * nothing wrong here -- they're different PAs covering different blocks -+ * - discard locality group PA -+ * discard process must wait until PA isn't used by another process -+ * -+ * now we're ready to make few consequences: -+ * - PA is referenced and while it is no discard is possible -+ * - PA is referenced until block isn't marked in on-disk bitmap -+ * - PA changes only after on-disk bitmap -+ * - discard must not compete with init. either init is done before -+ * any discard or they're serialized somehow -+ * - buddy init as sum of on-disk bitmap and PAs is done atomically -+ * -+ * a special case when we've used PA to emptiness. no need to modify buddy -+ * in this case, but we should care about concurrent init -+ * -+ */ -+ -+ /* -+ * Logic in few words: -+ * -+ * - allocation: -+ * load group -+ * find blocks -+ * mark bits in on-disk bitmap -+ * release group -+ * -+ * - use preallocation: -+ * find proper PA (per-inode or group) -+ * load group -+ * mark bits in on-disk bitmap -+ * release group -+ * release PA -+ * -+ * - free: -+ * load group -+ * mark bits in on-disk bitmap -+ * release group -+ * -+ * - discard preallocations in group: -+ * mark PAs deleted -+ * move them onto local list -+ * load on-disk bitmap -+ * load group -+ * remove PA from object (inode or locality group) -+ * mark free blocks in-core -+ * -+ * - discard inode's preallocations: -+ */ -+ -+/* -+ * Locking rules -+ * -+ * Locks: -+ * - bitlock on a group (group) -+ * - object (inode/locality) (object) -+ * - per-pa lock (pa) -+ * -+ * Paths: -+ * - new pa -+ * object -+ * group -+ * -+ * - find and use pa: -+ * pa -+ * -+ * - release consumed pa: -+ * pa -+ * group -+ * object -+ * -+ * - generate in-core bitmap: -+ * group -+ * pa -+ * -+ * - discard all for given object (inode, locality group): -+ * object -+ * pa -+ * group -+ * -+ * - discard all for given group: -+ * group -+ * pa -+ * group -+ * object -+ * -+ */ -+ -+/* -+ * with AGGRESSIVE_CHECK allocator runs consistency checks over -+ * structures. these checks slow things down a lot -+ */ -+#define AGGRESSIVE_CHECK__ -+ -+/* -+ * with DOUBLE_CHECK defined mballoc creates persistent in-core -+ * bitmaps, maintains and uses them to check for double allocations -+ */ -+#define DOUBLE_CHECK__ -+ -+/* -+ */ -+#define MB_DEBUG__ -+#ifdef MB_DEBUG -+#define mb_debug(fmt,a...) printk(fmt, ##a) -+#else -+#define mb_debug(fmt,a...) -+#endif -+ -+/* -+ * with EXT3_MB_HISTORY mballoc stores last N allocations in memory -+ * and you can monitor it in /proc/fs/ext3/<dev>/mb_history -+ */ -+#define EXT3_MB_HISTORY -+#define EXT3_MB_HISTORY_ALLOC 1 /* allocation */ -+#define EXT3_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ -+#define EXT3_MB_HISTORY_DISCARD 4 /* preallocation discarded */ -+#define EXT3_MB_HISTORY_FREE 8 /* free */ -+ -+#define EXT3_MB_HISTORY_DEFAULT (EXT3_MB_HISTORY_ALLOC | \ -+ EXT3_MB_HISTORY_PREALLOC | \ -+ EXT3_MB_HISTORY_DISCARD | \ -+ EXT3_MB_HISTORY_FREE) -+ -+/* -+ * How long mballoc can look for a best extent (in found extents) -+ */ -+#define MB_DEFAULT_MAX_TO_SCAN 200 -+ -+/* -+ * How long mballoc must look for a best extent -+ */ -+#define MB_DEFAULT_MIN_TO_SCAN 10 -+ -+/* -+ * How many groups mballoc will scan looking for the best chunk -+ */ -+#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 -+ -+/* -+ * with 'ext3_mb_stats' allocator will collect stats that will be -+ * shown at umount. The collecting costs though! -+ */ -+#define MB_DEFAULT_STATS 1 -+ -+/* -+ * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served -+ * by the stream allocator, which purpose is to pack requests -+ * as close each to other as possible to produce smooth I/O traffic -+ */ -+#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ -+ -+/* -+ * for which requests use 2^N search using buddies -+ */ -+#define MB_DEFAULT_ORDER2_REQS 8 -+ -+/* -+ * default stripe size = 1MB -+ */ -+#define MB_DEFAULT_STRIPE 256 -+ -+static kmem_cache_t *ext3_pspace_cachep = NULL; -+ -+#ifdef EXT3_BB_MAX_BLOCKS -+#undef EXT3_BB_MAX_BLOCKS -+#endif -+#define EXT3_BB_MAX_BLOCKS 30 -+ -+struct ext3_free_metadata { -+ unsigned short group; -+ unsigned short num; -+ unsigned short blocks[EXT3_BB_MAX_BLOCKS]; -+ struct list_head list; -+}; -+ -+struct ext3_group_info { -+ unsigned long bb_state; -+ unsigned long bb_tid; -+ struct ext3_free_metadata *bb_md_cur; -+ unsigned short bb_first_free; -+ unsigned short bb_free; -+ unsigned short bb_fragments; -+ struct list_head bb_prealloc_list; -+#ifdef DOUBLE_CHECK -+ void *bb_bitmap; -+#endif -+ unsigned short bb_counters[]; -+}; -+ -+#define EXT3_GROUP_INFO_NEED_INIT_BIT 0 -+#define EXT3_GROUP_INFO_LOCKED_BIT 1 -+ -+#define EXT3_MB_GRP_NEED_INIT(grp) \ -+ (test_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &(grp)->bb_state)) -+ -+ -+struct ext3_prealloc_space { -+ struct list_head pa_inode_list; -+ struct list_head pa_group_list; -+ union { -+ struct list_head pa_tmp_list; -+ struct rcu_head pa_rcu; -+ } u; -+ spinlock_t pa_lock; -+ atomic_t pa_count; -+ unsigned pa_deleted; -+ unsigned long pa_pstart; /* phys. block */ -+ unsigned long pa_lstart; /* log. block */ -+ unsigned short pa_len; /* len of preallocated chunk */ -+ unsigned short pa_free; /* how many blocks are free */ -+ unsigned short pa_linear; /* consumed in one direction -+ * strictly, for group prealloc */ -+ spinlock_t *pa_obj_lock; -+ struct inode *pa_inode; /* hack, for history only */ -+}; -+ -+ -+struct ext3_free_extent { -+ unsigned long fe_logical; -+ unsigned long fe_start; -+ unsigned long fe_group; -+ unsigned long fe_len; -+}; -+ -+/* -+ * Locality group: -+ * we try to group all related changes together -+ * so that writeback can flush/allocate them together as well -+ */ -+struct ext3_locality_group { -+ /* for allocator */ -+ struct semaphore lg_sem; /* to serialize allocates */ -+ struct list_head lg_prealloc_list;/* list of preallocations */ -+ spinlock_t lg_prealloc_lock; -+}; -+ -+struct ext3_allocation_context { -+ struct inode *ac_inode; -+ struct super_block *ac_sb; -+ -+ /* original request */ -+ struct ext3_free_extent ac_o_ex; -+ -+ /* goal request (after normalization) */ -+ struct ext3_free_extent ac_g_ex; -+ -+ /* the best found extent */ -+ struct ext3_free_extent ac_b_ex; -+ -+ /* copy of the bext found extent taken before preallocation efforts */ -+ struct ext3_free_extent ac_f_ex; -+ -+ /* number of iterations done. we have to track to limit searching */ -+ unsigned long ac_ex_scanned; -+ __u16 ac_groups_scanned; -+ __u16 ac_found; -+ __u16 ac_tail; -+ __u16 ac_buddy; -+ __u16 ac_flags; /* allocation hints */ -+ __u8 ac_status; -+ __u8 ac_criteria; -+ __u8 ac_repeats; -+ __u8 ac_2order; /* if request is to allocate 2^N blocks and -+ * N > 0, the field stores N, otherwise 0 */ -+ __u8 ac_op; /* operation, for history only */ -+ struct page *ac_bitmap_page; -+ struct page *ac_buddy_page; -+ struct ext3_prealloc_space *ac_pa; -+ struct ext3_locality_group *ac_lg; -+}; -+ -+#define AC_STATUS_CONTINUE 1 -+#define AC_STATUS_FOUND 2 -+#define AC_STATUS_BREAK 3 -+ -+struct ext3_mb_history { -+ struct ext3_free_extent orig; /* orig allocation */ -+ struct ext3_free_extent goal; /* goal allocation */ -+ struct ext3_free_extent result; /* result allocation */ -+ unsigned pid; -+ unsigned ino; -+ __u16 found; /* how many extents have been found */ -+ __u16 groups; /* how many groups have been scanned */ -+ __u16 tail; /* what tail broke some buddy */ -+ __u16 buddy; /* buddy the tail ^^^ broke */ -+ __u16 flags; -+ __u8 cr:3; /* which phase the result extent was found at */ -+ __u8 op:4; -+ __u8 merged:1; -+}; -+ -+struct ext3_buddy { -+ struct page *bd_buddy_page; -+ void *bd_buddy; -+ struct page *bd_bitmap_page; -+ void *bd_bitmap; -+ struct ext3_group_info *bd_info; -+ struct super_block *bd_sb; -+ __u16 bd_blkbits; -+ __u16 bd_group; -+}; -+#define EXT3_MB_BITMAP(e3b) ((e3b)->bd_bitmap) -+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy) -+ -+#ifndef EXT3_MB_HISTORY -+#define ext3_mb_store_history(ac) -+#else -+static void ext3_mb_store_history(struct ext3_allocation_context *ac); -+#endif -+ -+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) -+ -+static struct proc_dir_entry *proc_root_ext3; -+ -+int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); -+struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); -+unsigned long ext3_new_blocks_old(handle_t *handle, struct inode *inode, -+ unsigned long goal, unsigned long *count, int *errp); -+void ext3_mb_release_blocks(struct super_block *, int); -+void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); -+void ext3_mb_free_committed_blocks(struct super_block *); -+void ext3_mb_generate_from_pa(struct super_block *sb, void *bitmap, int group); -+void ext3_mb_free_consumed_preallocations(struct ext3_allocation_context *ac); -+void ext3_mb_return_to_preallocation(struct inode *inode, struct ext3_buddy *e3b, -+ sector_t block, int count); -+void ext3_mb_show_ac(struct ext3_allocation_context *ac); -+void ext3_mb_check_with_pa(struct ext3_buddy *e3b, int first, int count); -+void ext3_mb_put_pa(struct ext3_allocation_context *, struct super_block *, struct ext3_prealloc_space *pa); -+int ext3_mb_init_per_dev_proc(struct super_block *sb); -+int ext3_mb_destroy_per_dev_proc(struct super_block *sb); -+ -+/* -+ * Calculate the block group number and offset, given a block number -+ */ -+static void ext3_get_group_no_and_offset(struct super_block *sb, -+ unsigned long blocknr, -+ unsigned long *blockgrpp, -+ unsigned long *offsetp) -+{ -+ struct ext3_super_block *es = EXT3_SB(sb)->s_es; -+ unsigned long offset; -+ -+ blocknr = blocknr - le32_to_cpu(es->s_first_data_block); -+ offset = blocknr % EXT3_BLOCKS_PER_GROUP(sb); -+ blocknr = blocknr / EXT3_BLOCKS_PER_GROUP(sb); -+ if (offsetp) -+ *offsetp = offset; -+ if (blockgrpp) -+ *blockgrpp = blocknr; -+ -+} -+ -+static inline void -+ext3_lock_group(struct super_block *sb, int group) -+{ -+ bit_spin_lock(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+static inline void -+ext3_unlock_group(struct super_block *sb, int group) -+{ -+ bit_spin_unlock(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+static inline int -+ext3_is_group_locked(struct super_block *sb, int group) -+{ -+ return bit_spin_is_locked(EXT3_GROUP_INFO_LOCKED_BIT, -+ &EXT3_GROUP_INFO(sb, group)->bb_state); -+} -+ -+unsigned long ext3_grp_offs_to_block(struct super_block *sb, -+ struct ext3_free_extent *fex) -+{ -+ unsigned long block; -+ -+ block = (unsigned long) fex->fe_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + fex->fe_start -+ + le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block); -+ return block; -+} -+ -+#if BITS_PER_LONG == 64 -+#define mb_correct_addr_and_bit(bit,addr) \ -+{ \ -+ bit += ((unsigned long) addr & 7UL) << 3; \ -+ addr = (void *) ((unsigned long) addr & ~7UL); \ -+} -+#elif BITS_PER_LONG == 32 -+#define mb_correct_addr_and_bit(bit,addr) \ -+{ \ -+ bit += ((unsigned long) addr & 3UL) << 3; \ -+ addr = (void *) ((unsigned long) addr & ~3UL); \ -+} -+#else -+#error "how many bits you are?!" -+#endif -+ -+static inline int mb_test_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ return ext2_test_bit(bit, addr); -+} -+ -+static inline void mb_set_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_set_bit(bit, addr); -+} -+ -+static inline void mb_set_bit_atomic(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_set_bit_atomic(NULL, bit, addr); -+} -+ -+static inline void mb_clear_bit(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_clear_bit(bit, addr); -+} -+ -+static inline void mb_clear_bit_atomic(int bit, void *addr) -+{ -+ mb_correct_addr_and_bit(bit,addr); -+ ext2_clear_bit_atomic(NULL, bit, addr); -+} -+ -+static inline int mb_find_next_zero_bit(void *addr, int max, int start) -+{ -+ int fix; -+#if BITS_PER_LONG == 64 -+ fix = ((unsigned long) addr & 7UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~7UL); -+#elif BITS_PER_LONG == 32 -+ fix = ((unsigned long) addr & 3UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~3UL); -+#else -+#error "how many bits you are?!" -+#endif -+ max += fix; -+ start += fix; -+ return ext2_find_next_zero_bit(addr, max, start) - fix; -+} -+ -+static inline int mb_find_next_bit(void *addr, int max, int start) -+{ -+ int fix; -+#if BITS_PER_LONG == 64 -+ fix = ((unsigned long) addr & 7UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~7UL); -+#elif BITS_PER_LONG == 32 -+ fix = ((unsigned long) addr & 3UL) << 3; -+ addr = (void *) ((unsigned long) addr & ~3UL); -+#else -+#error "how many bits you are?!" -+#endif -+ max += fix; -+ start += fix; -+ -+#ifdef __BIG_ENDIAN -+#else -+ return find_next_bit(addr, max, start) - fix; -+#endif -+} -+ -+static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) -+{ -+ char *bb; -+ -+ BUG_ON(EXT3_MB_BITMAP(e3b) == EXT3_MB_BUDDY(e3b)); -+ BUG_ON(max == NULL); -+ -+ if (order > e3b->bd_blkbits + 1) { -+ *max = 0; -+ return NULL; -+ } -+ -+ /* at order 0 we see each particular block */ -+ *max = 1 << (e3b->bd_blkbits + 3); -+ if (order == 0) -+ return EXT3_MB_BITMAP(e3b); -+ -+ bb = EXT3_MB_BUDDY(e3b) + EXT3_SB(e3b->bd_sb)->s_mb_offsets[order]; -+ *max = EXT3_SB(e3b->bd_sb)->s_mb_maxs[order]; -+ -+ return bb; -+} -+ -+#ifdef DOUBLE_CHECK -+void mb_free_blocks_double(struct ext3_buddy *e3b, int first, int count) -+{ -+ int i; -+ if (unlikely(e3b->bd_info->bb_bitmap == NULL)) -+ return; -+ BUG_ON(!ext3_is_group_locked(e3b->bd_sb, e3b->bd_group)); -+ for (i = 0; i < count; i++) { -+ BUG_ON(!mb_test_bit(first + i, e3b->bd_info->bb_bitmap)); -+ mb_clear_bit(first + i, e3b->bd_info->bb_bitmap); -+ } -+} -+ -+void mb_mark_used_double(struct ext3_buddy *e3b, int first, int count) -+{ -+ int i; -+ if (unlikely(e3b->bd_info->bb_bitmap == NULL)) -+ return; -+ BUG_ON(!ext3_is_group_locked(e3b->bd_sb, e3b->bd_group)); -+ for (i = 0; i < count; i++) { -+ BUG_ON(mb_test_bit(first + i, e3b->bd_info->bb_bitmap)); -+ mb_set_bit(first + i, e3b->bd_info->bb_bitmap); -+ } -+} -+ -+void mb_cmp_bitmaps(struct ext3_buddy *e3b, void *bitmap) -+{ -+ if (memcmp(e3b->bd_info->bb_bitmap, bitmap, e3b->bd_sb->s_blocksize)) { -+ unsigned char *b1, *b2; -+ int i; -+ b1 = (unsigned char *) e3b->bd_info->bb_bitmap; -+ b2 = (unsigned char *) bitmap; -+ for (i = 0; i < e3b->bd_sb->s_blocksize; i++) { -+ if (b1[i] != b2[i]) { -+ printk("corruption in group %u at byte %u(%u): " -+ "%x in copy != %x on disk/prealloc\n", -+ e3b->bd_group, i, i * 8, b1[i], b2[i]); -+ BUG(); -+ } -+ } -+ } -+} -+ -+#else -+#define mb_free_blocks_double(a,b,c) -+#define mb_mark_used_double(a,b,c) -+#define mb_cmp_bitmaps(a,b) -+#endif -+ -+#ifdef AGGRESSIVE_CHECK -+ -+#define MB_CHECK_ASSERT(assert) \ -+do { \ -+ if (!(assert)) { \ -+ printk (KERN_EMERG \ -+ "Assertion failure in %s() at %s:%d: \"%s\"\n", \ -+ function, file, line, # assert); \ -+ BUG(); \ -+ } \ -+} while (0) -+ -+static int __mb_check_buddy(struct ext3_buddy *e3b, char *file, -+ const char *function, int line) -+{ -+ struct super_block *sb = e3b->bd_sb; -+ int order = e3b->bd_blkbits + 1; -+ int max, max2, i, j, k, count; -+ struct ext3_group_info *grp; -+ int fragments = 0, fstart; -+ struct list_head *cur; -+ void *buddy, *buddy2; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ { -+ static int mb_check_counter = 0; -+ if (mb_check_counter++ % 100 != 0) -+ return 0; -+ } -+ -+ while (order > 1) { -+ buddy = mb_find_buddy(e3b, order, &max); -+ MB_CHECK_ASSERT(buddy); -+ buddy2 = mb_find_buddy(e3b, order - 1, &max2); -+ MB_CHECK_ASSERT(buddy2); -+ MB_CHECK_ASSERT(buddy != buddy2); -+ MB_CHECK_ASSERT(max * 2 == max2); -+ -+ count = 0; -+ for (i = 0; i < max; i++) { -+ -+ if (mb_test_bit(i, buddy)) { -+ /* only single bit in buddy2 may be 1 */ -+ if (!mb_test_bit(i << 1, buddy2)) -+ MB_CHECK_ASSERT(mb_test_bit((i<<1)+1, buddy2)); -+ else if (!mb_test_bit((i << 1) + 1, buddy2)) -+ MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); -+ continue; -+ } -+ -+ /* both bits in buddy2 must be 0 */ -+ MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); -+ MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); -+ -+ for (j = 0; j < (1 << order); j++) { -+ k = (i * (1 << order)) + j; -+ MB_CHECK_ASSERT(!mb_test_bit(k, EXT3_MB_BITMAP(e3b))); -+ } -+ count++; -+ } -+ MB_CHECK_ASSERT(e3b->bd_info->bb_counters[order] == count); -+ order--; -+ } -+ -+ fstart = -1; -+ buddy = mb_find_buddy(e3b, 0, &max); -+ for (i = 0; i < max; i++) { -+ if (!mb_test_bit(i, buddy)) { -+ MB_CHECK_ASSERT(i >= e3b->bd_info->bb_first_free); -+ if (fstart == -1) { -+ fragments++; -+ fstart = i; -+ } -+ continue; -+ } -+ fstart = -1; -+ /* check used bits only */ -+ for (j = 0; j < e3b->bd_blkbits + 1; j++) { -+ buddy2 = mb_find_buddy(e3b, j, &max2); -+ k = i >> j; -+ MB_CHECK_ASSERT(k < max2); -+ MB_CHECK_ASSERT(mb_test_bit(k, buddy2)); -+ } -+ } -+ MB_CHECK_ASSERT(!EXT3_MB_GRP_NEED_INIT(e3b->bd_info)); -+ MB_CHECK_ASSERT(e3b->bd_info->bb_fragments == fragments); -+ -+ grp = EXT3_GROUP_INFO(sb, e3b->bd_group); -+ buddy = mb_find_buddy(e3b, 0, &max); -+ list_for_each(cur, &grp->bb_prealloc_list) { -+ unsigned long groupnr; -+ struct ext3_prealloc_space *pa; -+ pa = list_entry(cur, struct ext3_prealloc_space, group_list); -+ ext3_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k); -+ MB_CHECK_ASSERT(groupnr == e3b->bd_group); -+ for (i = 0; i < pa->len; i++) -+ MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); -+ } -+ return 0; -+} -+#undef MB_CHECK_ASSERT -+#define mb_check_buddy(e3b) __mb_check_buddy(e3b,__FILE__,__FUNCTION__,__LINE__) -+#else -+#define mb_check_buddy(e3b) -+#endif -+ -+/* find most significant bit */ -+static int inline fmsb(unsigned short word) -+{ -+ int order; -+ -+ if (word > 255) { -+ order = 7; -+ word >>= 8; -+ } else { -+ order = -1; -+ } -+ -+ do { -+ order++; -+ word >>= 1; -+ } while (word != 0); -+ -+ return order; -+} -+ -+static void inline -+ext3_mb_mark_free_simple(struct super_block *sb, void *buddy, unsigned first, -+ int len, struct ext3_group_info *grp) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ unsigned short min, max, chunk, border; -+ -+ BUG_ON(len >= EXT3_BLOCKS_PER_GROUP(sb)); -+ -+ border = 2 << sb->s_blocksize_bits; -+ -+ while (len > 0) { -+ /* find how many blocks can be covered since this position */ -+ max = ffs(first | border) - 1; -+ -+ /* find how many blocks of power 2 we need to mark */ -+ min = fmsb(len); -+ -+ if (max < min) -+ min = max; -+ chunk = 1 << min; -+ -+ /* mark multiblock chunks only */ -+ grp->bb_counters[min]++; -+ if (min > 0) -+ mb_clear_bit(first >> min, buddy + sbi->s_mb_offsets[min]); -+ -+ len -= chunk; -+ first += chunk; -+ } -+} -+ -+static void -+ext3_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap, -+ int group) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group); -+ unsigned short max = EXT3_BLOCKS_PER_GROUP(sb); -+ unsigned short i = 0, first, len; -+ unsigned free = 0, fragments = 0; -+ unsigned long long period = get_cycles(); -+ -+ /* initialize buddy from bitmap which is aggregation -+ * of on-disk bitmap and preallocations */ -+ i = mb_find_next_zero_bit(bitmap, max, 0); -+ grp->bb_first_free = i; -+ while (i < max) { -+ fragments++; -+ first = i; -+ i = ext2_find_next_le_bit(bitmap, max, i); -+ len = i - first; -+ free += len; -+ if (len > 1) -+ ext3_mb_mark_free_simple(sb, buddy, first, len, grp); -+ else -+ grp->bb_counters[0]++; -+ if (i < max) -+ i = mb_find_next_zero_bit(bitmap, max, i); -+ } -+ grp->bb_fragments = fragments; -+ -+ if (free != grp->bb_free) { -+ printk("EXT3-fs: group %u: %u blocks in bitmap, %u in gd\n", -+ group, free, grp->bb_free); -+ grp->bb_free = free; -+ } -+ -+ clear_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &grp->bb_state); -+ -+ period = get_cycles() - period; -+ spin_lock(&EXT3_SB(sb)->s_bal_lock); -+ EXT3_SB(sb)->s_mb_buddies_generated++; -+ EXT3_SB(sb)->s_mb_generation_time += period; -+ spin_unlock(&EXT3_SB(sb)->s_bal_lock); -+} -+ -+static int ext3_mb_init_cache(struct page *page, char *incore) -+{ -+ int blocksize, blocks_per_page, groups_per_page; -+ int err = 0, i, first_group, first_block; -+ struct super_block *sb; -+ struct buffer_head *bhs; -+ struct buffer_head **bh; -+ struct inode *inode; -+ char *data, *bitmap; -+ -+ mb_debug("init page %lu\n", page->index); -+ -+ inode = page->mapping->host; -+ sb = inode->i_sb; -+ blocksize = 1 << inode->i_blkbits; -+ blocks_per_page = PAGE_CACHE_SIZE / blocksize; -+ -+ groups_per_page = blocks_per_page >> 1; -+ if (groups_per_page == 0) -+ groups_per_page = 1; -+ -+ /* allocate buffer_heads to read bitmaps */ -+ if (groups_per_page > 1) { -+ err = -ENOMEM; -+ i = sizeof(struct buffer_head *) * groups_per_page; -+ bh = kmalloc(i, GFP_NOFS); -+ if (bh == NULL) -+ goto out; -+ memset(bh, 0, i); -+ } else -+ bh = &bhs; -+ -+ first_group = page->index * blocks_per_page / 2; -+ -+ /* read all groups the page covers into the cache */ -+ for (i = 0; i < groups_per_page; i++) { -+ struct ext3_group_desc * desc; -+ -+ if (first_group + i >= EXT3_SB(sb)->s_groups_count) -+ break; -+ -+ err = -EIO; -+ desc = ext3_get_group_desc(sb, first_group + i, NULL); -+ if (desc == NULL) -+ goto out; -+ -+ err = -ENOMEM; -+ bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (bh[i] == NULL) -+ goto out; -+ -+ if (buffer_uptodate(bh[i])) -+ continue; -+ -+ lock_buffer(bh[i]); -+ if (buffer_uptodate(bh[i])) { -+ unlock_buffer(bh[i]); -+ continue; -+ } -+ -+ get_bh(bh[i]); -+ bh[i]->b_end_io = end_buffer_read_sync; -+ submit_bh(READ, bh[i]); -+ mb_debug("read bitmap for group %u\n", first_group + i); -+ } -+ -+ /* wait for I/O completion */ -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ wait_on_buffer(bh[i]); -+ -+ err = -EIO; -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ if (!buffer_uptodate(bh[i])) -+ goto out; -+ -+ first_block = page->index * blocks_per_page; -+ for (i = 0; i < blocks_per_page; i++) { -+ int group; -+ -+ group = (first_block + i) >> 1; -+ if (group >= EXT3_SB(sb)->s_groups_count) -+ break; -+ -+ data = page_address(page) + (i * blocksize); -+ bitmap = bh[group - first_group]->b_data; -+ -+ if ((first_block + i) & 1) { -+ /* this is block of buddy */ -+ BUG_ON(incore == NULL); -+ mb_debug("put buddy for group %u in page %lu/%x\n", -+ group, page->index, i * blocksize); -+ memset(data, 0xff, blocksize); -+ EXT3_GROUP_INFO(sb, group)->bb_fragments = 0; -+ memset(EXT3_GROUP_INFO(sb, group)->bb_counters, 0, -+ sizeof(unsigned short)*(sb->s_blocksize_bits+2)); -+ ext3_mb_generate_buddy(sb, data, incore, group); -+ incore = NULL; -+ } else { -+ /* this is block of bitmap */ -+ BUG_ON(incore != NULL); -+ mb_debug("put bitmap for group %u in page %lu/%x\n", -+ group, page->index, i * blocksize); -+ -+ /* see comments in ext3_mb_put_pa() */ -+ ext3_lock_group(sb, group); -+ memcpy(data, bitmap, blocksize); -+ -+ /* mark all preallocated blocks used in in-core bitmap */ -+ ext3_mb_generate_from_pa(sb, data, group); -+ ext3_unlock_group(sb, group); -+ -+ incore = data; -+ } -+ } -+ SetPageUptodate(page); -+ -+out: -+ if (bh) { -+ for (i = 0; i < groups_per_page && bh[i]; i++) -+ brelse(bh[i]); -+ if (bh != &bhs) -+ kfree(bh); -+ } -+ return err; -+} -+ -+static int ext3_mb_load_buddy(struct super_block *sb, int group, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct inode *inode = sbi->s_buddy_cache; -+ int blocks_per_page, block, pnum, poff; -+ struct page *page; -+ -+ mb_debug("load group %u\n", group); -+ -+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; -+ -+ e3b->bd_blkbits = sb->s_blocksize_bits; -+ e3b->bd_info = EXT3_GROUP_INFO(sb, group); -+ e3b->bd_sb = sb; -+ e3b->bd_group = group; -+ e3b->bd_buddy_page = NULL; -+ e3b->bd_bitmap_page = NULL; -+ -+ block = group * 2; -+ pnum = block / blocks_per_page; -+ poff = block % blocks_per_page; -+ -+ /* we could use find_or_create_page(), but it locks page -+ * what we'd like to avoid in fast path ... */ -+ page = find_get_page(inode->i_mapping, pnum); -+ if (page == NULL || !PageUptodate(page)) { -+ if (page) -+ page_cache_release(page); -+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); -+ if (page) { -+ BUG_ON(page->mapping != inode->i_mapping); -+ if (!PageUptodate(page)) { -+ ext3_mb_init_cache(page, NULL); -+ mb_cmp_bitmaps(e3b, page_address(page) + -+ (poff * sb->s_blocksize)); -+ } -+ unlock_page(page); -+ } -+ } -+ if (page == NULL || !PageUptodate(page)) -+ goto err; -+ e3b->bd_bitmap_page = page; -+ e3b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); -+ mark_page_accessed(page); -+ -+ block++; -+ pnum = block / blocks_per_page; -+ poff = block % blocks_per_page; -+ -+ page = find_get_page(inode->i_mapping, pnum); -+ if (page == NULL || !PageUptodate(page)) { -+ if (page) -+ page_cache_release(page); -+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); -+ if (page) { -+ BUG_ON(page->mapping != inode->i_mapping); -+ if (!PageUptodate(page)) -+ ext3_mb_init_cache(page, e3b->bd_bitmap); -+ -+ unlock_page(page); -+ } -+ } -+ if (page == NULL || !PageUptodate(page)) -+ goto err; -+ e3b->bd_buddy_page = page; -+ e3b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); -+ mark_page_accessed(page); -+ -+ BUG_ON(e3b->bd_bitmap_page == NULL); -+ BUG_ON(e3b->bd_buddy_page == NULL); -+ -+ return 0; -+ -+err: -+ if (e3b->bd_bitmap_page) -+ page_cache_release(e3b->bd_bitmap_page); -+ if (e3b->bd_buddy_page) -+ page_cache_release(e3b->bd_buddy_page); -+ e3b->bd_buddy = NULL; -+ e3b->bd_bitmap = NULL; -+ return -EIO; -+} -+ -+static void ext3_mb_release_desc(struct ext3_buddy *e3b) -+{ -+ if (e3b->bd_bitmap_page) -+ page_cache_release(e3b->bd_bitmap_page); -+ if (e3b->bd_buddy_page) -+ page_cache_release(e3b->bd_buddy_page); -+} -+ -+ -+static int mb_find_order_for_block(struct ext3_buddy *e3b, int block) -+{ -+ int order = 1; -+ void *bb; -+ -+ BUG_ON(EXT3_MB_BITMAP(e3b) == EXT3_MB_BUDDY(e3b)); -+ BUG_ON(block >= (1 << (e3b->bd_blkbits + 3))); -+ -+ bb = EXT3_MB_BUDDY(e3b); -+ while (order <= e3b->bd_blkbits + 1) { -+ block = block >> 1; -+ if (!mb_test_bit(block, bb)) { -+ /* this block is part of buddy of order 'order' */ -+ return order; -+ } -+ bb += 1 << (e3b->bd_blkbits - order); -+ order++; -+ } -+ return 0; -+} -+ -+static inline void mb_clear_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0; -+ cur += 32; -+ continue; -+ } -+ mb_clear_bit_atomic(cur, bm); -+ cur++; -+ } -+} -+ -+static inline void mb_set_bits(void *bm, int cur, int len) -+{ -+ __u32 *addr; -+ -+ len = cur + len; -+ while (cur < len) { -+ if ((cur & 31) == 0 && (len - cur) >= 32) { -+ /* fast path: clear whole word at once */ -+ addr = bm + (cur >> 3); -+ *addr = 0xffffffff; -+ cur += 32; -+ continue; -+ } -+ mb_set_bit_atomic(cur, bm); -+ cur++; -+ } -+} -+ -+static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count) -+{ -+ int block = 0, max = 0, order; -+ void *buddy, *buddy2; -+ -+ BUG_ON(first + count > (e3b->bd_sb->s_blocksize << 3)); -+ BUG_ON(!ext3_is_group_locked(e3b->bd_sb, e3b->bd_group)); -+ mb_check_buddy(e3b); -+ mb_free_blocks_double(e3b, first, count); -+ -+ e3b->bd_info->bb_free += count; -+ if (first < e3b->bd_info->bb_first_free) -+ e3b->bd_info->bb_first_free = first; -+ -+ /* let's maintain fragments counter */ -+ if (first != 0) -+ block = !mb_test_bit(first - 1, EXT3_MB_BITMAP(e3b)); -+ if (first + count < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) -+ max = !mb_test_bit(first + count, EXT3_MB_BITMAP(e3b)); -+ if (block && max) -+ e3b->bd_info->bb_fragments--; -+ else if (!block && !max) -+ e3b->bd_info->bb_fragments++; -+ -+ /* let's maintain buddy itself */ -+ while (count-- > 0) { -+ block = first++; -+ order = 0; -+ -+ BUG_ON(!mb_test_bit(block, EXT3_MB_BITMAP(e3b))); -+ mb_clear_bit(block, EXT3_MB_BITMAP(e3b)); -+ e3b->bd_info->bb_counters[order]++; -+ -+ /* start of the buddy */ -+ buddy = mb_find_buddy(e3b, order, &max); -+ -+ do { -+ block &= ~1UL; -+ if (mb_test_bit(block, buddy) || -+ mb_test_bit(block + 1, buddy)) -+ break; -+ -+ /* both the buddies are free, try to coalesce them */ -+ buddy2 = mb_find_buddy(e3b, order + 1, &max); -+ -+ if (!buddy2) -+ break; -+ -+ if (order > 0) { -+ /* for special purposes, we don't set -+ * free bits in bitmap */ -+ mb_set_bit(block, buddy); -+ mb_set_bit(block + 1, buddy); -+ } -+ e3b->bd_info->bb_counters[order]--; -+ e3b->bd_info->bb_counters[order]--; -+ -+ block = block >> 1; -+ order++; -+ e3b->bd_info->bb_counters[order]++; -+ -+ mb_clear_bit(block, buddy2); -+ buddy = buddy2; -+ } while (1); -+ } -+ mb_check_buddy(e3b); -+ -+ return 0; -+} -+ -+static int mb_find_extent(struct ext3_buddy *e3b, int order, int block, -+ int needed, struct ext3_free_extent *ex) -+{ -+ int next = block, max, ord; -+ void *buddy; -+ -+ BUG_ON(!ext3_is_group_locked(e3b->bd_sb, e3b->bd_group)); -+ BUG_ON(ex == NULL); -+ -+ buddy = mb_find_buddy(e3b, order, &max); -+ BUG_ON(buddy == NULL); -+ BUG_ON(block >= max); -+ if (mb_test_bit(block, buddy)) { -+ ex->fe_len = 0; -+ ex->fe_start = 0; -+ ex->fe_group = 0; -+ return 0; -+ } -+ -+ if (likely(order == 0)) { -+ /* find actual order */ -+ order = mb_find_order_for_block(e3b, block); -+ block = block >> order; -+ } -+ -+ ex->fe_len = 1 << order; -+ ex->fe_start = block << order; -+ ex->fe_group = e3b->bd_group; -+ -+ /* calc difference from given start */ -+ next = next - ex->fe_start; -+ ex->fe_len -= next; -+ ex->fe_start += next; -+ -+ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) { -+ -+ if (block + 1 >= max) -+ break; -+ -+ next = (block + 1) * (1 << order); -+ if (mb_test_bit(next, EXT3_MB_BITMAP(e3b))) -+ break; -+ -+ ord = mb_find_order_for_block(e3b, next); -+ -+ order = ord; -+ block = next >> order; -+ ex->fe_len += 1 << order; -+ } -+ -+ BUG_ON(ex->fe_start + ex->fe_len > (1 << (e3b->bd_blkbits + 3))); -+ return ex->fe_len; -+} -+ -+static int mb_mark_used(struct ext3_buddy *e3b, struct ext3_free_extent *ex) -+{ -+ int ord, mlen = 0, max = 0, cur; -+ int start = ex->fe_start; -+ int len = ex->fe_len; -+ unsigned ret = 0; -+ int len0 = len; -+ void *buddy; -+ -+ BUG_ON(start + len > (e3b->bd_sb->s_blocksize << 3)); -+ BUG_ON(e3b->bd_group != ex->fe_group); -+ BUG_ON(!ext3_is_group_locked(e3b->bd_sb, e3b->bd_group)); -+ mb_check_buddy(e3b); -+ mb_mark_used_double(e3b, start, len); -+ -+ e3b->bd_info->bb_free -= len; -+ if (e3b->bd_info->bb_first_free == start) -+ e3b->bd_info->bb_first_free += len; -+ -+ /* let's maintain fragments counter */ -+ if (start != 0) -+ mlen = !mb_test_bit(start - 1, EXT3_MB_BITMAP(e3b)); -+ if (start + len < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0]) -+ max = !mb_test_bit(start + len, EXT3_MB_BITMAP(e3b)); -+ if (mlen && max) -+ e3b->bd_info->bb_fragments++; -+ else if (!mlen && !max) -+ e3b->bd_info->bb_fragments--; -+ -+ /* let's maintain buddy itself */ -+ while (len) { -+ ord = mb_find_order_for_block(e3b, start); -+ -+ if (((start >> ord) << ord) == start && len >= (1 << ord)) { -+ /* the whole chunk may be allocated at once! */ -+ mlen = 1 << ord; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ BUG_ON((start >> ord) >= max); -+ mb_set_bit(start >> ord, buddy); -+ e3b->bd_info->bb_counters[ord]--; -+ start += mlen; -+ len -= mlen; -+ BUG_ON(len < 0); -+ continue; -+ } -+ -+ /* store for history */ -+ if (ret == 0) -+ ret = len | (ord << 16); -+ -+ /* we have to split large buddy */ -+ BUG_ON(ord <= 0); -+ buddy = mb_find_buddy(e3b, ord, &max); -+ mb_set_bit(start >> ord, buddy); -+ e3b->bd_info->bb_counters[ord]--; -+ -+ ord--; -+ cur = (start >> ord) & ~1U; -+ buddy = mb_find_buddy(e3b, ord, &max); -+ mb_clear_bit(cur, buddy); -+ mb_clear_bit(cur + 1, buddy); -+ e3b->bd_info->bb_counters[ord]++; -+ e3b->bd_info->bb_counters[ord]++; -+ } -+ -+ mb_set_bits(EXT3_MB_BITMAP(e3b), ex->fe_start, len0); -+ mb_check_buddy(e3b); -+ -+ return ret; -+} -+ -+/* -+ * Must be called under group lock! -+ */ -+static void ext3_mb_use_best_found(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ unsigned long ret; -+ -+ BUG_ON(ac->ac_b_ex.fe_group != e3b->bd_group); -+ BUG_ON(ac->ac_status == AC_STATUS_FOUND); -+ -+ ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len); -+ ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical; -+ ret = mb_mark_used(e3b, &ac->ac_b_ex); -+ -+ /* preallocation can change ac_b_ex, thus we store actually -+ * allocated blocks for history */ -+ ac->ac_f_ex = ac->ac_b_ex; -+ -+ ac->ac_status = AC_STATUS_FOUND; -+ ac->ac_tail = ret & 0xffff; -+ ac->ac_buddy = ret >> 16; -+ -+ /* XXXXXXX: SUCH A HORRIBLE **CK */ -+ ac->ac_bitmap_page = e3b->bd_bitmap_page; -+ get_page(ac->ac_bitmap_page); -+ ac->ac_buddy_page = e3b->bd_buddy_page; -+ get_page(ac->ac_buddy_page); -+} -+ -+/* -+ * regular allocator, for general purposes allocation -+ */ -+ -+void ext3_mb_check_limits(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b, -+ int finish_group) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); -+ struct ext3_free_extent *bex = &ac->ac_b_ex; -+ struct ext3_free_extent *gex = &ac->ac_g_ex; -+ struct ext3_free_extent ex; -+ int max; -+ -+ /* -+ * We don't want to scan for a whole year -+ */ -+ if (ac->ac_found > sbi->s_mb_max_to_scan && -+ !(ac->ac_flags & EXT3_MB_HINT_FIRST)) { -+ ac->ac_status = AC_STATUS_BREAK; -+ return; -+ } -+ -+ /* -+ * Haven't found good chunk so far, let's continue -+ */ -+ if (bex->fe_len < gex->fe_len) -+ return; -+ -+ if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan) -+ && bex->fe_group == e3b->bd_group) { -+ /* recheck chunk's availability - we don't know -+ * when it was found (within this lock-unlock -+ * period or not) */ -+ max = mb_find_extent(e3b, 0, bex->fe_start, gex->fe_len, &ex); -+ if (max >= gex->fe_len) { -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ } -+} -+ -+/* -+ * The routine checks whether found extent is good enough. If it is, -+ * then the extent gets marked used and flag is set to the context -+ * to stop scanning. Otherwise, the extent is compared with the -+ * previous found extent and if new one is better, then it's stored -+ * in the context. Later, the best found extent will be used, if -+ * mballoc can't find good enough extent. -+ * -+ * FIXME: real allocation policy is to be designed yet! -+ */ -+static void ext3_mb_measure_extent(struct ext3_allocation_context *ac, -+ struct ext3_free_extent *ex, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_free_extent *bex = &ac->ac_b_ex; -+ struct ext3_free_extent *gex = &ac->ac_g_ex; -+ -+ BUG_ON(ex->fe_len <= 0); -+ BUG_ON(ex->fe_len >= (1 << ac->ac_sb->s_blocksize_bits) * 8); -+ BUG_ON(ex->fe_start >= (1 << ac->ac_sb->s_blocksize_bits) * 8); -+ BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); -+ -+ ac->ac_found++; -+ -+ /* -+ * The special case - take what you catch first -+ */ -+ if (unlikely(ac->ac_flags & EXT3_MB_HINT_FIRST)) { -+ *bex = *ex; -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ -+ /* -+ * Let's check whether the chuck is good enough -+ */ -+ if (ex->fe_len == gex->fe_len) { -+ *bex = *ex; -+ ext3_mb_use_best_found(ac, e3b); -+ return; -+ } -+ -+ /* -+ * If this is first found extent, just store it in the context -+ */ -+ if (bex->fe_len == 0) { -+ *bex = *ex; -+ return; -+ } -+ -+ /* -+ * If new found extent is better, store it in the context -+ */ -+ if (bex->fe_len < gex->fe_len) { -+ /* if the request isn't satisfied, any found extent -+ * larger than previous best one is better */ -+ if (ex->fe_len > bex->fe_len) -+ *bex = *ex; -+ } else if (ex->fe_len > gex->fe_len) { -+ /* if the request is satisfied, then we try to find -+ * an extent that still satisfy the request, but is -+ * smaller than previous one */ -+ *bex = *ex; -+ } -+ -+ ext3_mb_check_limits(ac, e3b, 0); -+} -+ -+static int ext3_mb_try_best_found(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct ext3_free_extent ex = ac->ac_b_ex; -+ int group = ex.fe_group, max, err; -+ -+ BUG_ON(ex.fe_len <= 0); -+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); -+ if (err) -+ return err; -+ -+ ext3_lock_group(ac->ac_sb, group); -+ max = mb_find_extent(e3b, 0, ex.fe_start, ex.fe_len, &ex); -+ -+ if (max > 0) { -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ -+ ext3_unlock_group(ac->ac_sb, group); -+ ext3_mb_release_desc(e3b); -+ -+ return 0; -+} -+ -+static int ext3_mb_find_by_goal(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ int group = ac->ac_g_ex.fe_group, max, err; -+ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); -+ struct ext3_super_block *es = sbi->s_es; -+ struct ext3_free_extent ex; -+ -+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b); -+ if (err) -+ return err; -+ -+ ext3_lock_group(ac->ac_sb, group); -+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start, -+ ac->ac_g_ex.fe_len, &ex); -+ -+ if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { -+ unsigned long start; -+ start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) + -+ ex.fe_start + le32_to_cpu(es->s_first_data_block)); -+ if (start % sbi->s_stripe == 0) { -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ } else if (max >= ac->ac_g_ex.fe_len) { -+ BUG_ON(ex.fe_len <= 0); -+ BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group); -+ BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start); -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) { -+ /* Sometimes, caller may want to merge even small -+ * number of blocks to an existing extent */ -+ BUG_ON(ex.fe_len <= 0); -+ BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group); -+ BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start); -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ } -+ ext3_unlock_group(ac->ac_sb, group); -+ ext3_mb_release_desc(e3b); -+ -+ return 0; -+} -+ -+/* -+ * The routine scans buddy structures (not bitmap!) from given order -+ * to max order and tries to find big enough chunk to satisfy the req -+ */ -+static void ext3_mb_simple_scan_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_group_info *grp = e3b->bd_info; -+ void *buddy; -+ int i, k, max; -+ -+ BUG_ON(ac->ac_2order <= 0); -+ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) { -+ if (grp->bb_counters[i] == 0) -+ continue; -+ -+ buddy = mb_find_buddy(e3b, i, &max); -+ BUG_ON(buddy == NULL); -+ -+ k = mb_find_next_zero_bit(buddy, max, 0); -+ BUG_ON(k >= max); -+ -+ ac->ac_found++; -+ -+ ac->ac_b_ex.fe_len = 1 << i; -+ ac->ac_b_ex.fe_start = k << i; -+ ac->ac_b_ex.fe_group = e3b->bd_group; -+ -+ ext3_mb_use_best_found(ac, e3b); -+ -+ BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len); -+ -+ if (EXT3_SB(sb)->s_mb_stats) -+ atomic_inc(&EXT3_SB(sb)->s_bal_2orders); -+ -+ break; -+ } -+} -+ -+/* -+ * The routine scans the group and measures all found extents. -+ * In order to optimize scanning, caller must pass number of -+ * free blocks in the group, so the routine can know upper limit. -+ */ -+static void ext3_mb_complex_scan_group(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ void *bitmap = EXT3_MB_BITMAP(e3b); -+ struct ext3_free_extent ex; -+ int i, free; -+ -+ free = e3b->bd_info->bb_free; -+ BUG_ON(free <= 0); -+ -+ i = e3b->bd_info->bb_first_free; -+ -+ while (free && ac->ac_status == AC_STATUS_CONTINUE) { -+ i = mb_find_next_zero_bit(bitmap, sb->s_blocksize * 8, i); -+ if (i >= sb->s_blocksize * 8) { -+ BUG_ON(free != 0); -+ break; -+ } -+ -+ mb_find_extent(e3b, 0, i, ac->ac_g_ex.fe_len, &ex); -+ BUG_ON(ex.fe_len <= 0); -+ BUG_ON(free < ex.fe_len); -+ -+ ext3_mb_measure_extent(ac, &ex, e3b); -+ -+ i += ex.fe_len; -+ free -= ex.fe_len; -+ } -+ -+ ext3_mb_check_limits(ac, e3b, 1); -+} -+ -+/* -+ * This is a special case for storages like raid5 -+ * we try to find stripe-aligned chunks for stripe-size requests -+ */ -+static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac, -+ struct ext3_buddy *e3b) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ void *bitmap = EXT3_MB_BITMAP(e3b); -+ struct ext3_free_extent ex; -+ unsigned long i, max; -+ -+ BUG_ON(sbi->s_stripe == 0); -+ -+ /* find first stripe-aligned block */ -+ i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + le32_to_cpu(sbi->s_es->s_first_data_block); -+ i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe; -+ i = (i - le32_to_cpu(sbi->s_es->s_first_data_block)) -+ % EXT3_BLOCKS_PER_GROUP(sb); -+ -+ while (i < sb->s_blocksize * 8) { -+ if (!mb_test_bit(i, bitmap)) { -+ max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex); -+ if (max >= sbi->s_stripe) { -+ ac->ac_found++; -+ ac->ac_b_ex = ex; -+ ext3_mb_use_best_found(ac, e3b); -+ break; -+ } -+ } -+ i += sbi->s_stripe; -+ } -+} -+ -+static int ext3_mb_good_group(struct ext3_allocation_context *ac, -+ int group, int cr) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(ac->ac_sb, group); -+ unsigned free, fragments, i, bits; -+ -+ BUG_ON(cr < 0 || cr >= 4); -+ BUG_ON(EXT3_MB_GRP_NEED_INIT(grp)); -+ -+ free = grp->bb_free; -+ fragments = grp->bb_fragments; -+ if (free == 0) -+ return 0; -+ if (fragments == 0) -+ return 0; -+ -+ switch (cr) { -+ case 0: -+ BUG_ON(ac->ac_2order == 0); -+ bits = ac->ac_sb->s_blocksize_bits + 1; -+ for (i = ac->ac_2order; i <= bits; i++) -+ if (grp->bb_counters[i] > 0) -+ return 1; -+ break; -+ case 1: -+ if ((free / fragments) >= ac->ac_g_ex.fe_len) -+ return 1; -+ break; -+ case 2: -+ if (free >= ac->ac_g_ex.fe_len) -+ return 1; -+ break; -+ case 3: -+ return 1; -+ default: -+ BUG(); -+ } -+ -+ return 0; -+} -+ -+int ext3_mb_regular_allocator(struct ext3_allocation_context *ac) -+{ -+ int group, i, cr, err = 0; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ struct ext3_buddy e3b; -+ -+ sb = ac->ac_sb; -+ sbi = EXT3_SB(sb); -+ BUG_ON(ac->ac_status == AC_STATUS_FOUND); -+ -+ /* first, try the goal */ -+ err = ext3_mb_find_by_goal(ac, &e3b); -+ if (err || ac->ac_status == AC_STATUS_FOUND) -+ goto out; -+ -+ if (unlikely(ac->ac_flags & EXT3_MB_HINT_GOAL_ONLY)) -+ goto out; -+ -+ i = ffs(ac->ac_g_ex.fe_len); -+ ac->ac_2order = 0; -+ if (i >= sbi->s_mb_order2_reqs) { -+ i--; -+ if ((ac->ac_g_ex.fe_len & (~(1 << i))) == 0) -+ ac->ac_2order = i; -+ } -+ -+ group = ac->ac_g_ex.fe_group; -+ -+ /* Let's just scan groups to find more-less suitable blocks */ -+ cr = ac->ac_2order ? 0 : 1; -+repeat: -+ for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) { -+ ac->ac_criteria = cr; -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) { -+ struct ext3_group_info *grp; -+ -+ if (group == EXT3_SB(sb)->s_groups_count) -+ group = 0; -+ -+ /* quick check to skip empty groups */ -+ grp = EXT3_GROUP_INFO(ac->ac_sb, group); -+ if (grp->bb_free == 0) -+ continue; -+ -+ if (EXT3_MB_GRP_NEED_INIT(EXT3_GROUP_INFO(sb, group))) { -+ /* we need full data about the group -+ * to make a good selection */ -+ err = ext3_mb_load_buddy(sb, group, &e3b); -+ if (err) -+ goto out; -+ ext3_mb_release_desc(&e3b); -+ } -+ -+ /* check is group good for our criteries */ -+ if (!ext3_mb_good_group(ac, group, cr)) -+ continue; -+ -+ err = ext3_mb_load_buddy(sb, group, &e3b); -+ if (err) -+ goto out; -+ -+ ext3_lock_group(sb, group); -+ if (!ext3_mb_good_group(ac, group, cr)) { -+ /* someone did allocation from this group */ -+ ext3_unlock_group(sb, group); -+ ext3_mb_release_desc(&e3b); -+ continue; -+ } -+ -+ ac->ac_groups_scanned++; -+ if (cr == 0) -+ ext3_mb_simple_scan_group(ac, &e3b); -+ else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe) -+ ext3_mb_scan_aligned(ac, &e3b); -+ else -+ ext3_mb_complex_scan_group(ac, &e3b); -+ -+ ext3_unlock_group(sb, group); -+ ext3_mb_release_desc(&e3b); -+ -+ if (ac->ac_status != AC_STATUS_CONTINUE) -+ break; -+ } -+ } -+ -+ if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND && -+ !(ac->ac_flags & EXT3_MB_HINT_FIRST)) { -+ /* -+ * We've been searching too long. Let's try to allocate -+ * the best chunk we've found so far -+ */ -+ -+ ext3_mb_try_best_found(ac, &e3b); -+ if (ac->ac_status != AC_STATUS_FOUND) { -+ /* -+ * Someone more lucky has already allocated it. -+ * The only thing we can do is just take first -+ * found block(s) -+ printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n"); -+ */ -+ ac->ac_b_ex.fe_group = 0; -+ ac->ac_b_ex.fe_start = 0; -+ ac->ac_b_ex.fe_len = 0; -+ ac->ac_status = AC_STATUS_CONTINUE; -+ ac->ac_flags |= EXT3_MB_HINT_FIRST; -+ cr = 3; -+ atomic_inc(&sbi->s_mb_lost_chunks); -+ goto repeat; -+ } -+ } -+out: -+ return err; -+} -+ -+#ifdef EXT3_MB_HISTORY -+struct ext3_mb_proc_session { -+ struct ext3_mb_history *history; -+ struct super_block *sb; -+ int start; -+ int max; -+}; -+ -+static void *ext3_mb_history_skip_empty(struct ext3_mb_proc_session *s, -+ struct ext3_mb_history *hs, -+ int first) -+{ -+ if (hs == s->history + s->max) -+ hs = s->history; -+ if (!first && hs == s->history + s->start) -+ return NULL; -+ while (hs->orig.fe_len == 0) { -+ hs++; -+ if (hs == s->history + s->max) -+ hs = s->history; -+ if (hs == s->history + s->start) -+ return NULL; -+ } -+ return hs; -+} -+ -+static void *ext3_mb_seq_history_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct ext3_mb_proc_session *s = seq->private; -+ struct ext3_mb_history *hs; -+ int l = *pos; -+ -+ if (l == 0) -+ return SEQ_START_TOKEN; -+ hs = ext3_mb_history_skip_empty(s, s->history + s->start, 1); -+ if (!hs) -+ return NULL; -+ while (--l && (hs = ext3_mb_history_skip_empty(s, ++hs, 0)) != NULL); -+ return hs; -+} -+ -+static void *ext3_mb_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct ext3_mb_proc_session *s = seq->private; -+ struct ext3_mb_history *hs = v; -+ -+ ++*pos; -+ if (v == SEQ_START_TOKEN) -+ return ext3_mb_history_skip_empty(s, s->history + s->start, 1); -+ else -+ return ext3_mb_history_skip_empty(s, ++hs, 0); -+} -+ -+static int ext3_mb_seq_history_show(struct seq_file *seq, void *v) -+{ -+ char buf[25], buf2[25], buf3[25], *fmt; -+ struct ext3_mb_history *hs = v; -+ -+ if (v == SEQ_START_TOKEN) { -+ seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " -+ "%-5s %-2s %-5s %-5s %-5s %-6s\n", -+ "pid", "inode", "original", "goal", "result","found", -+ "grps", "cr", "flags", "merge", "tail", "broken"); -+ return 0; -+ } -+ -+ if (hs->op == EXT3_MB_HISTORY_ALLOC) { -+ fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " -+ "%-5u %-5s %-5u %-6u\n"; -+ sprintf(buf2, "%lu/%lu/%lu@%lu", hs->result.fe_group, -+ hs->result.fe_start, hs->result.fe_len, -+ hs->result.fe_logical); -+ sprintf(buf, "%lu/%lu/%lu@%lu", hs->orig.fe_group, -+ hs->orig.fe_start, hs->orig.fe_len, -+ hs->orig.fe_logical); -+ sprintf(buf3, "%lu/%lu/%lu@%lu", hs->goal.fe_group, -+ hs->goal.fe_start, hs->goal.fe_len, -+ hs->goal.fe_logical); -+ seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2, -+ hs->found, hs->groups, hs->cr, hs->flags, -+ hs->merged ? "M" : "", hs->tail, -+ hs->buddy ? 1 << hs->buddy : 0); -+ } else if (hs->op == EXT3_MB_HISTORY_PREALLOC) { -+ fmt = "%-5u %-8u %-23s %-23s %-23s\n"; -+ sprintf(buf2, "%lu/%lu/%lu@%lu", hs->result.fe_group, -+ hs->result.fe_start, hs->result.fe_len, -+ hs->result.fe_logical); -+ sprintf(buf, "%lu/%lu/%lu@%lu", hs->orig.fe_group, -+ hs->orig.fe_start, hs->orig.fe_len, -+ hs->orig.fe_logical); -+ seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2); -+ } else if (hs->op == EXT3_MB_HISTORY_DISCARD) { -+ sprintf(buf2, "%lu/%lu/%lu", hs->result.fe_group, -+ hs->result.fe_start, hs->result.fe_len); -+ seq_printf(seq, "%-5u %-8u %-23s discard\n", -+ hs->pid, hs->ino, buf2); -+ } else if (hs->op == EXT3_MB_HISTORY_FREE) { -+ sprintf(buf2, "%lu/%lu/%lu", hs->result.fe_group, -+ hs->result.fe_start, hs->result.fe_len); -+ seq_printf(seq, "%-5u %-8u %-23s free\n", -+ hs->pid, hs->ino, buf2); -+ } -+ return 0; -+} -+ -+static void ext3_mb_seq_history_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations ext3_mb_seq_history_ops = { -+ .start = ext3_mb_seq_history_start, -+ .next = ext3_mb_seq_history_next, -+ .stop = ext3_mb_seq_history_stop, -+ .show = ext3_mb_seq_history_show, -+}; -+ -+static int ext3_mb_seq_history_open(struct inode *inode, struct file *file) -+{ -+ struct super_block *sb = PDE(inode)->data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_mb_proc_session *s; -+ int rc, size; -+ -+ s = kmalloc(sizeof(*s), GFP_KERNEL); -+ if (s == NULL) -+ return -ENOMEM; -+ s->sb = sb; -+ size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max; -+ s->history = kmalloc(size, GFP_KERNEL); -+ if (s->history == NULL) { -+ kfree(s); -+ return -ENOMEM; -+ } -+ -+ spin_lock(&sbi->s_mb_history_lock); -+ memcpy(s->history, sbi->s_mb_history, size); -+ s->max = sbi->s_mb_history_max; -+ s->start = sbi->s_mb_history_cur % s->max; -+ spin_unlock(&sbi->s_mb_history_lock); -+ -+ rc = seq_open(file, &ext3_mb_seq_history_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = s; -+ } else { -+ kfree(s->history); -+ kfree(s); -+ } -+ return rc; -+ -+} -+ -+static int ext3_mb_seq_history_release(struct inode *inode, struct file *file) -+{ -+ struct seq_file *seq = (struct seq_file *)file->private_data; -+ struct ext3_mb_proc_session *s = seq->private; -+ kfree(s->history); -+ kfree(s); -+ return seq_release(inode, file); -+} -+ -+static ssize_t ext3_mb_seq_history_write(struct file *file, -+ const char __user *buffer, -+ size_t count, loff_t *ppos) -+{ -+ struct seq_file *seq = (struct seq_file *)file->private_data; -+ struct ext3_mb_proc_session *s = seq->private; -+ struct super_block *sb = s->sb; -+ char str[32]; -+ int value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ "mb_history", (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ value = simple_strtol(str, NULL, 0); -+ if (value < 0) -+ return -ERANGE; -+ EXT3_SB(sb)->s_mb_history_filter = value; -+ -+ return count; -+} -+ -+static struct file_operations ext3_mb_seq_history_fops = { -+ .owner = THIS_MODULE, -+ .open = ext3_mb_seq_history_open, -+ .read = seq_read, -+ .write = ext3_mb_seq_history_write, -+ .llseek = seq_lseek, -+ .release = ext3_mb_seq_history_release, -+}; -+ -+static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct super_block *sb = seq->private; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ long group; -+ -+ if (*pos < 0 || *pos >= sbi->s_groups_count) -+ return NULL; -+ -+ group = *pos + 1; -+ return (void *) group; -+} -+ -+static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct super_block *sb = seq->private; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ long group; -+ -+ ++*pos; -+ if (*pos < 0 || *pos >= sbi->s_groups_count) -+ return NULL; -+ group = *pos + 1; -+ return (void *) group;; -+} -+ -+static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v) -+{ -+ struct super_block *sb = seq->private; -+ long group = (long) v; -+ int i, err; -+ struct ext3_buddy e3b; -+ struct sg { -+ struct ext3_group_info info; -+ unsigned short counters[16]; -+ } sg; -+ -+ group--; -+ if (group == 0) -+ seq_printf(seq, "#%-5s: %-5s %-5s %-5s " -+ "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s " -+ "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n", -+ "group", "free", "frags", "first", -+ "2^0", "2^1", "2^2", "2^3", "2^4", "2^5","2^6", -+ "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13"); -+ -+ i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + -+ sizeof(struct ext3_group_info); -+ err = ext3_mb_load_buddy(sb, group, &e3b); -+ if (err) { -+ seq_printf(seq, "#%-5lu: I/O error\n", group); -+ return 0; -+ } -+ ext3_lock_group(sb, group); -+ memcpy(&sg, EXT3_GROUP_INFO(sb, group), i); -+ ext3_unlock_group(sb, group); -+ ext3_mb_release_desc(&e3b); -+ -+ seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free, -+ sg.info.bb_fragments, sg.info.bb_first_free); -+ for (i = 0; i <= 13; i++) -+ seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? -+ sg.info.bb_counters[i] : 0); -+ seq_printf(seq, " ]\n"); -+ -+ return 0; -+} -+ -+static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations ext3_mb_seq_groups_ops = { -+ .start = ext3_mb_seq_groups_start, -+ .next = ext3_mb_seq_groups_next, -+ .stop = ext3_mb_seq_groups_stop, -+ .show = ext3_mb_seq_groups_show, -+}; -+ -+static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file) -+{ -+ struct super_block *sb = PDE(inode)->data; -+ int rc; -+ -+ rc = seq_open(file, &ext3_mb_seq_groups_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = sb; -+ } -+ return rc; -+ -+} -+ -+static struct file_operations ext3_mb_seq_groups_fops = { -+ .owner = THIS_MODULE, -+ .open = ext3_mb_seq_groups_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+static void ext3_mb_history_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ remove_proc_entry("mb_groups", sbi->s_mb_proc); -+ remove_proc_entry("mb_history", sbi->s_mb_proc); -+ -+ if (sbi->s_mb_history) -+ kfree(sbi->s_mb_history); -+} -+ -+static void ext3_mb_history_init(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i; -+ -+ if (sbi->s_mb_proc != NULL) { -+ struct proc_dir_entry *p; -+ p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); -+ if (p) { -+ p->proc_fops = &ext3_mb_seq_history_fops; -+ p->data = sb; -+ } -+ p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc); -+ if (p) { -+ p->proc_fops = &ext3_mb_seq_groups_fops; -+ p->data = sb; -+ } -+ } -+ -+ sbi->s_mb_history_max = 1000; -+ sbi->s_mb_history_cur = 0; -+ spin_lock_init(&sbi->s_mb_history_lock); -+ i = sbi->s_mb_history_max * sizeof(struct ext3_mb_history); -+ sbi->s_mb_history = kmalloc(i, GFP_KERNEL); -+ memset(sbi->s_mb_history, 0, i); -+ /* if we can't allocate history, then we simple won't use it */ -+} -+ -+static void -+ext3_mb_store_history(struct ext3_allocation_context *ac) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); -+ struct ext3_mb_history h; -+ -+ if (likely(sbi->s_mb_history == NULL)) -+ return; -+ -+ if (!(ac->ac_op & sbi->s_mb_history_filter)) -+ return; -+ -+ h.op = ac->ac_op; -+ h.pid = current->pid; -+ h.ino = ac->ac_inode ? ac->ac_inode->i_ino : 0; -+ h.orig = ac->ac_o_ex; -+ h.result = ac->ac_b_ex; -+ h.flags = ac->ac_flags; -+ h.merged = 0; -+ if (ac->ac_op == EXT3_MB_HISTORY_ALLOC) { -+ if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && -+ ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) -+ h.merged = 1; -+ h.goal = ac->ac_g_ex; -+ h.result = ac->ac_f_ex; -+ } -+ -+ spin_lock(&sbi->s_mb_history_lock); -+ memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h)); -+ if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max) -+ sbi->s_mb_history_cur = 0; -+ spin_unlock(&sbi->s_mb_history_lock); -+} -+ -+#else -+#define ext3_mb_history_release(sb) -+#define ext3_mb_history_init(sb) -+#endif -+ -+int ext3_mb_init_backend(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i, j, len, metalen; -+ int num_meta_group_infos = -+ (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) >> -+ EXT3_DESC_PER_BLOCK_BITS(sb); -+ struct ext3_group_info **meta_group_info; -+ -+ /* An 8TB filesystem with 64-bit pointers requires a 4096 byte -+ * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. -+ * So a two level scheme suffices for now. */ -+ sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * -+ num_meta_group_infos, GFP_KERNEL); -+ if (sbi->s_group_info == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate buddy meta group\n"); -+ return -ENOMEM; -+ } -+ sbi->s_buddy_cache = new_inode(sb); -+ if (sbi->s_buddy_cache == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't get new inode\n"); -+ goto err_freesgi; -+ } -+ EXT3_I(sbi->s_buddy_cache)->i_disksize = 0; -+ -+ metalen = sizeof(*meta_group_info) << EXT3_DESC_PER_BLOCK_BITS(sb); -+ for (i = 0; i < num_meta_group_infos; i++) { -+ if ((i + 1) == num_meta_group_infos) -+ metalen = sizeof(*meta_group_info) * -+ (sbi->s_groups_count - -+ (i << EXT3_DESC_PER_BLOCK_BITS(sb))); -+ meta_group_info = kmalloc(metalen, GFP_KERNEL); -+ if (meta_group_info == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate mem for a " -+ "buddy group\n"); -+ goto err_freemeta; -+ } -+ sbi->s_group_info[i] = meta_group_info; -+ } -+ -+ /* -+ * calculate needed size. if change bb_counters size, -+ * don't forget about ext3_mb_generate_buddy() -+ */ -+ len = sizeof(struct ext3_group_info); -+ len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2); -+ for (i = 0; i < sbi->s_groups_count; i++) { -+ struct ext3_group_desc * desc; -+ -+ meta_group_info = -+ sbi->s_group_info[i >> EXT3_DESC_PER_BLOCK_BITS(sb)]; -+ j = i & (EXT3_DESC_PER_BLOCK(sb) - 1); -+ -+ meta_group_info[j] = kmalloc(len, GFP_KERNEL); -+ if (meta_group_info[j] == NULL) { -+ printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n"); -+ i--; -+ goto err_freebuddy; -+ } -+ desc = ext3_get_group_desc(sb, i, NULL); -+ if (desc == NULL) { -+ printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); -+ goto err_freebuddy; -+ } -+ memset(meta_group_info[j], 0, len); -+ set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, -+ &meta_group_info[j]->bb_state); -+ -+ /* initialize bb_free to be able to skip -+ * empty groups without initialization */ -+ meta_group_info[j]->bb_free = -+ le16_to_cpu(desc->bg_free_blocks_count); -+ -+ INIT_LIST_HEAD(&meta_group_info[j]->bb_prealloc_list); -+ -+#ifdef DOUBLE_CHECK -+ { -+ struct buffer_head *bh; -+ meta_group_info[j]->bb_bitmap = -+ kmalloc(sb->s_blocksize, GFP_KERNEL); -+ BUG_ON(meta_group_info[j]->bb_bitmap == NULL); -+ bh = read_block_bitmap(sb, i); -+ BUG_ON(bh == NULL); -+ memcpy(meta_group_info[j]->bb_bitmap, bh->b_data, -+ sb->s_blocksize); -+ brelse(bh); -+ } -+#endif -+ -+ } -+ -+ return 0; -+ -+err_freebuddy: -+ while (i >= 0) { -+ kfree(EXT3_GROUP_INFO(sb, i)); -+ i--; -+ } -+ i = num_meta_group_infos; -+err_freemeta: -+ while (--i >= 0) -+ kfree(sbi->s_group_info[i]); -+ iput(sbi->s_buddy_cache); -+err_freesgi: -+ kfree(sbi->s_group_info); -+ return -ENOMEM; -+} -+ -+int ext3_mb_init(struct super_block *sb, int needs_recovery) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ unsigned i, offset, max; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); -+ -+ sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); -+ if (sbi->s_mb_offsets == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ return -ENOMEM; -+ } -+ sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); -+ if (sbi->s_mb_maxs == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_maxs); -+ return -ENOMEM; -+ } -+ -+ /* order 0 is regular bitmap */ -+ sbi->s_mb_maxs[0] = sb->s_blocksize << 3; -+ sbi->s_mb_offsets[0] = 0; -+ -+ i = 1; -+ offset = 0; -+ max = sb->s_blocksize << 2; -+ do { -+ sbi->s_mb_offsets[i] = offset; -+ sbi->s_mb_maxs[i] = max; -+ offset += 1 << (sb->s_blocksize_bits - i); -+ max = max >> 1; -+ i++; -+ } while (i <= sb->s_blocksize_bits + 1); -+ -+ /* init file for buddy data */ -+ if ((i = ext3_mb_init_backend(sb))) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_offsets); -+ kfree(sbi->s_mb_maxs); -+ return i; -+ } -+ -+ spin_lock_init(&sbi->s_md_lock); -+ INIT_LIST_HEAD(&sbi->s_active_transaction); -+ INIT_LIST_HEAD(&sbi->s_closed_transaction); -+ INIT_LIST_HEAD(&sbi->s_committed_transaction); -+ spin_lock_init(&sbi->s_bal_lock); -+ -+ sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; -+ sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; -+ sbi->s_mb_max_groups_to_scan = MB_DEFAULT_MAX_GROUPS_TO_SCAN; -+ sbi->s_mb_stats = MB_DEFAULT_STATS; -+ sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; -+ sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; -+ sbi->s_mb_history_filter = EXT3_MB_HISTORY_DEFAULT; -+ -+ i = sizeof(struct ext3_locality_group) * NR_CPUS; -+ sbi->s_locality_groups = kmalloc(i, GFP_NOFS); -+ if (sbi->s_locality_groups == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_offsets); -+ kfree(sbi->s_mb_maxs); -+ return -ENOMEM; -+ } -+ for (i = 0; i < NR_CPUS; i++) { -+ struct ext3_locality_group *lg; -+ lg = &sbi->s_locality_groups[i]; -+ sema_init(&lg->lg_sem, 1); -+ INIT_LIST_HEAD(&lg->lg_prealloc_list); -+ spin_lock_init(&lg->lg_prealloc_lock); -+ } -+ -+ ext3_mb_init_per_dev_proc(sb); -+ ext3_mb_history_init(sb); -+ -+ printk("EXT3-fs: mballoc enabled\n"); -+ return 0; -+} -+ -+void ext3_mb_cleanup_pa(struct ext3_group_info *grp) -+{ -+ struct ext3_prealloc_space *pa; -+ struct list_head *cur, *tmp; -+ int count = 0; -+ -+ list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) { -+ pa = list_entry(cur, struct ext3_prealloc_space, pa_group_list); -+ list_del_rcu(&pa->pa_group_list); -+ count++; -+ kfree(pa); -+ } -+ if (count) -+ mb_debug("mballoc: %u PAs left\n", count); -+ -+} -+ -+int ext3_mb_release(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int i, num_meta_group_infos; -+ -+ if (!test_opt(sb, MBALLOC)) -+ return 0; -+ -+ /* release freed, non-committed blocks */ -+ spin_lock(&sbi->s_md_lock); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_committed_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ ext3_mb_free_committed_blocks(sb); -+ -+ if (sbi->s_group_info) { -+ for (i = 0; i < sbi->s_groups_count; i++) { -+#ifdef DOUBLE_CHECK -+ if (EXT3_GROUP_INFO(sb, i)->bb_bitmap) -+ kfree(EXT3_GROUP_INFO(sb, i)->bb_bitmap); -+#endif -+ ext3_mb_cleanup_pa(EXT3_GROUP_INFO(sb, i)); -+ kfree(EXT3_GROUP_INFO(sb, i)); -+ } -+ num_meta_group_infos = (sbi->s_groups_count + -+ EXT3_DESC_PER_BLOCK(sb) - 1) >> -+ EXT3_DESC_PER_BLOCK_BITS(sb); -+ for (i = 0; i < num_meta_group_infos; i++) -+ kfree(sbi->s_group_info[i]); -+ kfree(sbi->s_group_info); -+ } -+ if (sbi->s_mb_offsets) -+ kfree(sbi->s_mb_offsets); -+ if (sbi->s_mb_maxs) -+ kfree(sbi->s_mb_maxs); -+ if (sbi->s_buddy_cache) -+ iput(sbi->s_buddy_cache); -+ if (sbi->s_mb_stats) { -+ printk("EXT3-fs: mballoc: %u blocks %u reqs (%u success)\n", -+ atomic_read(&sbi->s_bal_allocated), -+ atomic_read(&sbi->s_bal_reqs), -+ atomic_read(&sbi->s_bal_success)); -+ printk("EXT3-fs: mballoc: %u extents scanned, %u goal hits, " -+ "%u 2^N hits, %u breaks, %u lost\n", -+ atomic_read(&sbi->s_bal_ex_scanned), -+ atomic_read(&sbi->s_bal_goals), -+ atomic_read(&sbi->s_bal_2orders), -+ atomic_read(&sbi->s_bal_breaks), -+ atomic_read(&sbi->s_mb_lost_chunks)); -+ printk("EXT3-fs: mballoc: %lu generated and it took %Lu\n", -+ sbi->s_mb_buddies_generated++, -+ sbi->s_mb_generation_time); -+ printk("EXT3-fs: mballoc: %u preallocated, %u discarded\n", -+ atomic_read(&sbi->s_mb_preallocated), -+ atomic_read(&sbi->s_mb_discarded)); -+ } -+ -+ if (sbi->s_locality_groups) -+ kfree(sbi->s_locality_groups); -+ -+ ext3_mb_history_release(sb); -+ ext3_mb_destroy_per_dev_proc(sb); -+ -+ return 0; -+} -+ -+void ext3_mb_free_committed_blocks(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int err, i, count = 0, count2 = 0; -+ struct ext3_free_metadata *md; -+ struct ext3_buddy e3b; -+ -+ if (list_empty(&sbi->s_committed_transaction)) -+ return; -+ -+ /* there is committed blocks to be freed yet */ -+ do { -+ /* get next array of blocks */ -+ md = NULL; -+ spin_lock(&sbi->s_md_lock); -+ if (!list_empty(&sbi->s_committed_transaction)) { -+ md = list_entry(sbi->s_committed_transaction.next, -+ struct ext3_free_metadata, list); -+ list_del(&md->list); -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ if (md == NULL) -+ break; -+ -+ mb_debug("gonna free %u blocks in group %u (0x%p):", -+ md->num, md->group, md); -+ -+ err = ext3_mb_load_buddy(sb, md->group, &e3b); -+ /* we expect to find existing buddy because it's pinned */ -+ BUG_ON(err != 0); -+ -+ /* there are blocks to put in buddy to make them really free */ -+ count += md->num; -+ count2++; -+ ext3_lock_group(sb, md->group); -+ for (i = 0; i < md->num; i++) { -+ mb_debug(" %u", md->blocks[i]); -+ err = mb_free_blocks(&e3b, md->blocks[i], 1); -+ BUG_ON(err != 0); -+ } -+ mb_debug("\n"); -+ ext3_unlock_group(sb, md->group); -+ -+ /* balance refcounts from ext3_mb_free_metadata() */ -+ page_cache_release(e3b.bd_buddy_page); -+ page_cache_release(e3b.bd_bitmap_page); -+ -+ kfree(md); -+ ext3_mb_release_desc(&e3b); -+ -+ } while (md); -+ -+ mb_debug("freed %u blocks in %u structures\n", count, count2); -+} -+ -+#define EXT3_ROOT "ext3" -+#define EXT3_MB_STATS_NAME "stats" -+#define EXT3_MB_MAX_TO_SCAN_NAME "max_to_scan" -+#define EXT3_MB_MIN_TO_SCAN_NAME "min_to_scan" -+#define EXT3_MB_ORDER2_REQ "order2_req" -+#define EXT3_MB_STREAM_REQ "stream_req" -+ -+static int ext3_mb_stats_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ struct ext3_sb_info *sbi = data; -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", sbi->s_mb_stats); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_stats_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ struct ext3_sb_info *sbi = data; -+ char str[32]; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_STATS_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ sbi->s_mb_stats = (simple_strtol(str, NULL, 0) != 0); -+ return count; -+} -+ -+static int ext3_mb_max_to_scan_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ struct ext3_sb_info *sbi = data; -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", sbi->s_mb_max_to_scan); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_max_to_scan_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ struct ext3_sb_info *sbi = data; -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ sbi->s_mb_max_to_scan = value; -+ -+ return count; -+} -+ -+static int ext3_mb_min_to_scan_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ struct ext3_sb_info *sbi = data; -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", sbi->s_mb_min_to_scan); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_order2_req_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ struct ext3_sb_info *sbi = data; -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ sbi->s_mb_order2_reqs = value; -+ -+ return count; -+} -+ -+static int ext3_mb_order2_req_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ struct ext3_sb_info *sbi = data; -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", sbi->s_mb_order2_reqs); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ struct ext3_sb_info *sbi = data; -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ sbi->s_mb_min_to_scan = value; -+ -+ return count; -+} -+ -+static int ext3_mb_stream_req_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ struct ext3_sb_info *sbi = data; -+ int len; -+ -+ *eof = 1; -+ if (off != 0) -+ return 0; -+ -+ len = sprintf(page, "%ld\n", sbi->s_mb_stream_request); -+ *start = page; -+ return len; -+} -+ -+static int ext3_mb_stream_req_write(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ struct ext3_sb_info *sbi = data; -+ char str[32]; -+ long value; -+ -+ if (count >= sizeof(str)) { -+ printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n", -+ EXT3_MB_STREAM_REQ, (int)sizeof(str)); -+ return -EOVERFLOW; -+ } -+ -+ if (copy_from_user(str, buffer, count)) -+ return -EFAULT; -+ -+ /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */ -+ value = simple_strtol(str, NULL, 0); -+ if (value <= 0) -+ return -ERANGE; -+ -+ sbi->s_mb_stream_request = value; -+ -+ return count; -+} -+ -+int ext3_mb_init_per_dev_proc(struct super_block *sb) -+{ -+ mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct proc_dir_entry *proc; -+ char devname[64], *name; -+ -+ snprintf(devname, sizeof(devname) - 1, "%s", -+ bdevname(sb->s_bdev, devname)); -+ sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext3); -+ -+ name = EXT3_MB_STATS_NAME; -+ proc = create_proc_entry(name, mode, sbi->s_mb_proc); -+ if (proc == NULL) -+ goto err_out; -+ proc->data = sbi; -+ proc->read_proc = ext3_mb_stats_read; -+ proc->write_proc = ext3_mb_stats_write; -+ -+ name = EXT3_MB_MAX_TO_SCAN_NAME; -+ proc = create_proc_entry(name, mode, sbi->s_mb_proc); -+ if (proc == NULL) -+ goto err_out; -+ proc->data = sbi; -+ proc->read_proc = ext3_mb_max_to_scan_read; -+ proc->write_proc = ext3_mb_max_to_scan_write; -+ -+ name = EXT3_MB_MIN_TO_SCAN_NAME; -+ proc = create_proc_entry(name, mode, sbi->s_mb_proc); -+ if (proc == NULL) -+ goto err_out; -+ proc->data = sbi; -+ proc->read_proc = ext3_mb_min_to_scan_read; -+ proc->write_proc = ext3_mb_min_to_scan_write; -+ -+ name = EXT3_MB_ORDER2_REQ; -+ proc = create_proc_entry(name, mode, sbi->s_mb_proc); -+ if (proc == NULL) -+ goto err_out; -+ proc->data = sbi; -+ proc->read_proc = ext3_mb_order2_req_read; -+ proc->write_proc = ext3_mb_order2_req_write; -+ -+ name = EXT3_MB_STREAM_REQ; -+ proc = create_proc_entry(name, mode, sbi->s_mb_proc); -+ if (proc == NULL) -+ goto err_out; -+ proc->data = sbi; -+ proc->read_proc = ext3_mb_stream_req_read; -+ proc->write_proc = ext3_mb_stream_req_write; -+ -+ return 0; -+ -+err_out: -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", name); -+ remove_proc_entry(EXT3_MB_STREAM_REQ, sbi->s_mb_proc); -+ remove_proc_entry(EXT3_MB_ORDER2_REQ, sbi->s_mb_proc); -+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); -+ remove_proc_entry(EXT3_MB_STATS_NAME, sbi->s_mb_proc); -+ remove_proc_entry(devname, proc_root_ext3); -+ sbi->s_mb_proc = NULL; -+ -+ return -ENOMEM; -+} -+ -+int ext3_mb_destroy_per_dev_proc(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ char devname[64]; -+ -+ if (sbi->s_mb_proc == NULL) -+ return -EINVAL; -+ -+ snprintf(devname, sizeof(devname) - 1, "%s", -+ bdevname(sb->s_bdev, devname)); -+ remove_proc_entry(EXT3_MB_STREAM_REQ, sbi->s_mb_proc); -+ remove_proc_entry(EXT3_MB_ORDER2_REQ, sbi->s_mb_proc); -+ remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); -+ remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); -+ remove_proc_entry(EXT3_MB_STATS_NAME, sbi->s_mb_proc); -+ remove_proc_entry(devname, proc_root_ext3); -+ -+ return 0; -+} -+ -+int __init init_ext3_proc(void) -+{ -+ ext3_pspace_cachep = -+ kmem_cache_create("ext3_prealloc_space", -+ sizeof(struct ext3_prealloc_space), -+ 0, SLAB_RECLAIM_ACCOUNT, NULL, NULL); -+ if (ext3_pspace_cachep == NULL) -+ return -ENOMEM; -+ -+ proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs); -+ if (proc_root_ext3 == NULL) -+ printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT); -+ -+ return 0; -+} -+ -+void exit_ext3_proc(void) -+{ -+ /* XXX: synchronize_rcu(); */ -+ kmem_cache_destroy(ext3_pspace_cachep); -+ remove_proc_entry(EXT3_ROOT, proc_root_fs); -+} -+ -+ -+/* -+ * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps -+ * Returns 0 if success or error code -+ */ -+int ext3_mb_mark_diskspace_used(struct ext3_allocation_context *ac, handle_t *handle) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_super_block *es; -+ struct ext3_group_desc *gdp; -+ struct buffer_head *gdp_bh; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ sector_t block; -+ int len, err; -+ -+ BUG_ON(ac->ac_status != AC_STATUS_FOUND); -+ BUG_ON(ac->ac_b_ex.fe_len <= 0); -+ -+ sb = ac->ac_sb; -+ sbi = EXT3_SB(sb); -+ es = sbi->s_es; -+ -+ ext3_debug("using block group %d(%d)\n", ac->ac_b_group.group, -+ gdp->bg_free_blocks_count); -+ -+ /* time to check quota, we can't do this before because -+ * having quota spent on preallocated-unused-yet blocks -+ * would be wrong */ -+ len = ac->ac_b_ex.fe_len; -+ while (len && DQUOT_ALLOC_BLOCK(ac->ac_inode, len)) len--; -+ if (ac->ac_b_ex.fe_len != len) { -+ /* some blocks can't be allocated due to quota -+ * we have to return them back */ -+ BUG(); -+ } -+ err = -EDQUOT; -+ if (len == 0) -+ goto out_err; -+ -+ err = -EIO; -+ bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); -+ if (!bitmap_bh) -+ goto out_err; -+ -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) -+ goto out_err; -+ -+ err = -EIO; -+ gdp = ext3_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh); -+ if (!gdp) -+ goto out_err; -+ -+ err = ext3_journal_get_write_access(handle, gdp_bh); -+ if (err) -+ goto out_err; -+ -+ block = ac->ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb) -+ + ac->ac_b_ex.fe_start -+ + le32_to_cpu(es->s_first_data_block); -+ -+ if (block == le32_to_cpu(gdp->bg_block_bitmap) || -+ block == le32_to_cpu(gdp->bg_inode_bitmap) || -+ in_range(block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error(sb, __FUNCTION__, -+ "Allocating block in system zone - block = %lu", -+ (unsigned long) block); -+#ifdef AGGRESSIVE_CHECK -+ { -+ int i; -+ for (i = 0; i < ac->ac_b_ex.fe_len; i++) { -+ BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i, -+ bitmap_bh->b_data)); -+ } -+ } -+#endif -+ mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); -+ -+ spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -+ - ac->ac_b_ex.fe_len); -+ spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len); -+ -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ if (err) -+ goto out_err; -+ err = ext3_journal_dirty_metadata(handle, gdp_bh); -+ -+out_err: -+ sb->s_dirt = 1; -+ brelse(bitmap_bh); -+ return err; -+} -+ -+/* -+ * here we normalize request for locality group -+ * XXX: should we try to preallocate more than the group has now? -+ */ -+void ext3_mb_normalize_group_request(struct ext3_allocation_context *ac) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_locality_group *lg = ac->ac_lg; -+ -+ BUG_ON(lg == NULL); -+ if (EXT3_SB(sb)->s_stripe) -+ ac->ac_g_ex.fe_len = EXT3_SB(sb)->s_stripe; -+ else -+ ac->ac_g_ex.fe_len = (1024 * 1024) >> sb->s_blocksize_bits; -+ -+ mb_debug("#%u: goal %u blocks for locality group\n", -+ current->pid, ac->ac_g_ex.fe_len); -+} -+ -+/* -+ * Normalization means making request better in terms of -+ * size and alignment -+ */ -+void ext3_mb_normalize_request(struct ext3_allocation_context *ac, -+ struct ext3_allocation_request *ar) -+{ -+ struct ext3_inode_info *ei = EXT3_I(ac->ac_inode); -+ loff_t start, end, size, orig_size, orig_start; -+ struct list_head *cur; -+ int bsbits; -+ -+ /* do normalize only data requests, metadata requests -+ do not need preallocation */ -+ if (!(ac->ac_flags & EXT3_MB_HINT_DATA)) -+ return; -+ -+ /* sometime caller may want exact blocks */ -+ if (unlikely(ac->ac_flags & EXT3_MB_HINT_GOAL_ONLY)) -+ return; -+ -+ /* caller may indicate that preallocation isn't -+ * required (it's a tail, for example) */ -+ if (ac->ac_flags & EXT3_MB_HINT_NOPREALLOC) -+ return; -+ -+ if (ac->ac_flags & EXT3_MB_HINT_GROUP_ALLOC) -+ return ext3_mb_normalize_group_request(ac); -+ -+ bsbits = ac->ac_sb->s_blocksize_bits; -+ -+ /* first, let's learn actual file size -+ * given current request is allocated */ -+ size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; -+ size = size << bsbits; -+ if (size < i_size_read(ac->ac_inode)) -+ size = i_size_read(ac->ac_inode); -+ -+ /* first, try to predict filesize */ -+ /* XXX: should this table be tunable? */ -+ start = 0; -+ if (size <= 16 * 1024) { -+ size = 16 * 1024; -+ } else if (size <= 32 * 1024) { -+ size = 32 * 1024; -+ } else if (size <= 64 * 1024) { -+ size = 64 * 1024; -+ } else if (size <= 128 * 1024) { -+ size = 128 * 1024; -+ } else if (size <= 256 * 1024) { -+ size = 256 * 1024; -+ } else if (size <= 512 * 1024) { -+ size = 512 * 1024; -+ } else if (size <= 1024 * 1024) { -+ size = 1024 * 1024; -+ } else if (size < 4 * 1024 * 1024) { -+ start = ac->ac_o_ex.fe_logical << bsbits; -+ start = (start / (1024 * 1024)) * (1024 * 1024); -+ size = 1024 * 1024; -+ } else if (size < 8 * 1024 * 1024) { -+ start = ac->ac_o_ex.fe_logical << bsbits; -+ start = (start / (4 * (1024 * 1024))) * 4 * (1024 * 1024); -+ size = 4 * 1024 * 1024; -+ } else if (ac->ac_o_ex.fe_len < ((8 << 20) >> bsbits)) { -+ start = ac->ac_o_ex.fe_logical; -+ start = start << bsbits; -+ start = (start / (8 * (1024 * 1024))) * 8 * (1024 * 1024); -+ size = 8 * 1024 * 1024; -+ } else { -+ start = ac->ac_o_ex.fe_logical; -+ start = start << bsbits; -+ size = ac->ac_o_ex.fe_len << bsbits; -+ } -+ orig_size = size = size >> bsbits; -+ orig_start = start = start >> bsbits; -+ -+ /* don't cover already allocated blocks in selected range */ -+ if (ar->pleft && start <= ar->lleft) { -+ size -= ar->lleft + 1 - start; -+ start = ar->lleft + 1; -+ } -+ if (ar->pright && start + size - 1 >= ar->lright) -+ size -= start + size - ar->lright; -+ -+ end = start + size; -+ -+ /* check we don't cross already preallocated blocks */ -+ rcu_read_lock(); -+ list_for_each_rcu(cur, &ei->i_prealloc_list) { -+ struct ext3_prealloc_space *pa; -+ unsigned long pa_end; -+ -+ pa = list_entry(cur, struct ext3_prealloc_space, pa_inode_list); -+ pa_end = pa->pa_lstart + pa->pa_len; -+ -+ /* PA must not overlap original request */ -+ BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || -+ ac->ac_o_ex.fe_logical < pa->pa_lstart)); -+ -+ /* skip PA normalized request doesn't overlap with */ -+ if (pa->pa_lstart >= end) -+ continue; -+ if (pa_end <= start) -+ continue; -+ BUG_ON(pa->pa_lstart <= start && pa_end >= end); -+ -+ if (pa_end <= ac->ac_o_ex.fe_logical) { -+ BUG_ON(pa_end < start); -+ start = pa_end; -+ } -+ -+ if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { -+ BUG_ON(pa->pa_lstart > end); -+ end = pa->pa_lstart; -+ } -+ } -+ rcu_read_unlock(); -+ size = end - start; -+ -+ /* XXX: extra loop to check we really don't overlap preallocations */ -+ rcu_read_lock(); -+ list_for_each_rcu(cur, &ei->i_prealloc_list) { -+ struct ext3_prealloc_space *pa; -+ unsigned long pa_end; -+ pa = list_entry(cur, struct ext3_prealloc_space, pa_inode_list); -+ pa_end = pa->pa_lstart + pa->pa_len; -+ BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); -+ } -+ rcu_read_unlock(); -+ -+ if (start + size <= ac->ac_o_ex.fe_logical && -+ start > ac->ac_o_ex.fe_logical) { -+ printk("start %lu, size %lu, fe_logical %lu\n", -+ (unsigned long) start, (unsigned long) size, -+ (unsigned long) ac->ac_o_ex.fe_logical); -+ } -+ BUG_ON(start + size <= ac->ac_o_ex.fe_logical && -+ start > ac->ac_o_ex.fe_logical); -+ -+ /* now prepare goal request */ -+ BUG_ON(size <= 0 || size >= EXT3_BLOCKS_PER_GROUP(ac->ac_sb)); -+ if (size < ac->ac_o_ex.fe_len) { -+ /* XXX: don't normalize tails? */ -+ } -+ -+ /* XXX: is it better to align blocks WRT to logical placement -+ * or satisfy big request as is */ -+ ac->ac_g_ex.fe_logical = start; -+ ac->ac_g_ex.fe_len = size; -+ -+ mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size, -+ (unsigned) orig_size, (unsigned) start); -+} -+ -+void ext3_mb_collect_stats(struct ext3_allocation_context *ac) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); -+ -+ if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { -+ atomic_inc(&sbi->s_bal_reqs); -+ atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); -+ if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len) -+ atomic_inc(&sbi->s_bal_success); -+ atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); -+ if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && -+ ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) -+ atomic_inc(&sbi->s_bal_goals); -+ if (ac->ac_found > sbi->s_mb_max_to_scan) -+ atomic_inc(&sbi->s_bal_breaks); -+ } -+ -+ ext3_mb_store_history(ac); -+} -+ -+/* -+ * use blocks preallocated to inode -+ */ -+void ext3_mb_use_inode_pa(struct ext3_allocation_context *ac, -+ struct ext3_prealloc_space *pa) -+{ -+ unsigned long start, len; -+ -+ /* found preallocated blocks, use them */ -+ start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); -+ len = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len); -+ len = len - start; -+ ext3_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, -+ &ac->ac_b_ex.fe_start); -+ ac->ac_b_ex.fe_len = len; -+ ac->ac_status = AC_STATUS_FOUND; -+ ac->ac_pa = pa; -+ -+ BUG_ON(start < pa->pa_pstart); -+ BUG_ON(start + len > pa->pa_pstart + pa->pa_len); -+ BUG_ON(pa->pa_free < len); -+ pa->pa_free -= len; -+ -+ mb_debug("use %lu/%lu from inode pa %p\n", start, len, pa); -+} -+ -+/* -+ * use blocks preallocated to locality group -+ */ -+void ext3_mb_use_group_pa(struct ext3_allocation_context *ac, -+ struct ext3_prealloc_space *pa) -+{ -+ unsigned len = ac->ac_o_ex.fe_len; -+ -+ ext3_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, -+ &ac->ac_b_ex.fe_group, -+ &ac->ac_b_ex.fe_start); -+ ac->ac_b_ex.fe_len = len; -+ ac->ac_status = AC_STATUS_FOUND; -+ ac->ac_pa = pa; -+ -+ /* we don't correct pa_pstart or pa_plen here to avoid -+ * possible race when tte group is being loaded concurrently -+ * instead we correct pa later, after blocks are marked -+ * in on-disk bitmap -- see ext3_mb_release_context() */ -+ mb_debug("use %lu/%lu from group pa %p\n", pa->pa_lstart-len, len, pa); -+} -+ -+/* -+ * search goal blocks in preallocated space -+ */ -+int ext3_mb_use_preallocated(struct ext3_allocation_context *ac) -+{ -+ struct ext3_inode_info *ei = EXT3_I(ac->ac_inode); -+ struct ext3_locality_group *lg; -+ struct ext3_prealloc_space *pa; -+ struct list_head *cur; -+ -+ /* only data can be preallocated */ -+ if (!(ac->ac_flags & EXT3_MB_HINT_DATA)) -+ return 0; -+ -+ /* first, try per-file preallocation */ -+ rcu_read_lock(); -+ list_for_each_rcu(cur, &ei->i_prealloc_list) { -+ pa = list_entry(cur, struct ext3_prealloc_space, pa_inode_list); -+ -+ /* all fields in this condition don't change, -+ * so we can skip locking for them */ -+ if (ac->ac_o_ex.fe_logical < pa->pa_lstart || -+ ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) -+ continue; -+ -+ /* found preallocated blocks, use them */ -+ spin_lock(&pa->pa_lock); -+ if (pa->pa_deleted == 0 && pa->pa_free) { -+ atomic_inc(&pa->pa_count); -+ ext3_mb_use_inode_pa(ac, pa); -+ spin_unlock(&pa->pa_lock); -+ ac->ac_criteria = 10; -+ rcu_read_unlock(); -+ return 1; -+ } -+ spin_unlock(&pa->pa_lock); -+ } -+ rcu_read_unlock(); -+ -+ /* can we use group allocation? */ -+ if (!(ac->ac_flags & EXT3_MB_HINT_GROUP_ALLOC)) -+ return 0; -+ -+ /* inode may have no locality group for some reason */ -+ lg = ac->ac_lg; -+ if (lg == NULL) -+ return 0; -+ -+ rcu_read_lock(); -+ list_for_each_rcu(cur, &lg->lg_prealloc_list) { -+ pa = list_entry(cur, struct ext3_prealloc_space, pa_inode_list); -+ spin_lock(&pa->pa_lock); -+ if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { -+ atomic_inc(&pa->pa_count); -+ ext3_mb_use_group_pa(ac, pa); -+ spin_unlock(&pa->pa_lock); -+ ac->ac_criteria = 20; -+ rcu_read_unlock(); -+ return 1; -+ } -+ spin_unlock(&pa->pa_lock); -+ } -+ rcu_read_unlock(); -+ -+ return 0; -+} -+ -+/* -+ * the function goes through all preallocation in this group and marks them -+ * used in in-core bitmap. buddy must be generated from this bitmap -+ */ -+void ext3_mb_generate_from_pa(struct super_block *sb, void *bitmap, int group) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group); -+ struct ext3_prealloc_space *pa; -+ struct list_head *cur; -+ unsigned long groupnr; -+ unsigned long start; -+ int preallocated = 0, count = 0, len; -+ -+ /* all form of preallocation discards first load group, -+ * so the only competing code is preallocation use. -+ * we don't need any locking here -+ * notice we do NOT ignore preallocations with pa_deleted -+ * otherwise we could leave used blocks available for -+ * allocation in buddy when concurrent ext3_mb_put_pa() -+ * is dropping preallocation -+ */ -+ list_for_each_rcu(cur, &grp->bb_prealloc_list) { -+ pa = list_entry(cur, struct ext3_prealloc_space, pa_group_list); -+ spin_lock(&pa->pa_lock); -+ ext3_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &start); -+ len = pa->pa_len; -+ spin_unlock(&pa->pa_lock); -+ BUG_ON(groupnr != group); -+ mb_set_bits(bitmap, start, len); -+ preallocated += len; -+ count++; -+ } -+ mb_debug("prellocated %u for group %u\n", preallocated, group); -+} -+ -+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,5) -+static void ext3_mb_pa_callback(struct rcu_head *head) -+{ -+ struct ext3_prealloc_space *pa; -+ pa = container_of(head, struct ext3_prealloc_space, u.pa_rcu); -+ kmem_cache_free(ext3_pspace_cachep, pa); -+} -+#define mb_call_rcu(__pa) call_rcu(&(__pa)->u.pa_rcu, ext3_mb_pa_callback) -+#else -+static void ext3_mb_pa_callback(void *pa) -+{ -+ kmem_cache_free(ext3_pspace_cachep, pa); -+} -+#define mb_call_rcu(__pa) call_rcu(&(__pa)->u.pa_rcu, ext3_mb_pa_callback, pa) -+#endif -+ -+/* -+ * drops a reference to preallocated space descriptor -+ * if this was the last reference and the space is consumed -+ */ -+void ext3_mb_put_pa(struct ext3_allocation_context *ac, -+ struct super_block *sb, struct ext3_prealloc_space *pa) -+{ -+ unsigned long grp; -+ -+ if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) -+ return; -+ -+ /* in this short window concurrent discard can set pa_deleted */ -+ spin_lock(&pa->pa_lock); -+ if (pa->pa_deleted == 0) { -+ spin_unlock(&pa->pa_lock); -+ return; -+ } -+ -+ pa->pa_deleted = 1; -+ spin_unlock(&pa->pa_lock); -+ -+ /* -1 is to protect from crossing allocation group */ -+ ext3_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL); -+ -+ /* -+ * possible race: -+ * -+ * P1 (buddy init) P2 (regular allocation) -+ * find block B in PA -+ * copy on-disk bitmap to buddy -+ * mark B in on-disk bitmap -+ * drop PA from group -+ * mark all PAs in buddy -+ * -+ * thus, P1 initializes buddy with B available. to prevent this -+ * we make "copy" and "mark all PAs" atomic and serialize "drop PA" -+ * against that pair -+ */ -+ ext3_lock_group(sb, grp); -+ list_del_rcu(&pa->pa_group_list); -+ ext3_unlock_group(sb, grp); -+ -+ spin_lock(pa->pa_obj_lock); -+ list_del_rcu(&pa->pa_inode_list); -+ spin_unlock(pa->pa_obj_lock); -+ -+ mb_call_rcu(pa); -+} -+ -+/* -+ * creates new preallocated space for given inode -+ */ -+int ext3_mb_new_inode_pa(struct ext3_allocation_context *ac) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_prealloc_space *pa; -+ struct ext3_group_info *grp; -+ struct ext3_inode_info *ei; -+ -+ /* preallocate only when found space is larger then requested */ -+ BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len); -+ BUG_ON(ac->ac_status != AC_STATUS_FOUND); -+ BUG_ON(!S_ISREG(ac->ac_inode->i_mode)); -+ -+ pa = kmem_cache_alloc(ext3_pspace_cachep, SLAB_NOFS); -+ if (pa == NULL) -+ return -ENOMEM; -+ -+ if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) { -+ int winl, wins, win, offs; -+ -+ /* we can't allocate as much as normalizer wants. -+ * so, found space must get proper lstart -+ * to cover original request */ -+ BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); -+ BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); -+ -+ /* we're limited by original request in that -+ * logical block must be covered any way -+ * winl is window we can move our chunk within */ -+ winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; -+ -+ /* also, we should cover whole original request */ -+ wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len; -+ -+ /* the smallest one defines real window */ -+ win = min(winl, wins); -+ -+ offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len; -+ if (offs && offs < win) -+ win = offs; -+ -+ ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win; -+ BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); -+ BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); -+ } -+ -+ /* preallocation can change ac_b_ex, thus we store actually -+ * allocated blocks for history */ -+ ac->ac_f_ex = ac->ac_b_ex; -+ -+ pa->pa_lstart = ac->ac_b_ex.fe_logical; -+ pa->pa_pstart = ext3_grp_offs_to_block(sb, &ac->ac_b_ex); -+ pa->pa_len = ac->ac_b_ex.fe_len; -+ pa->pa_free = pa->pa_len; -+ atomic_set(&pa->pa_count, 1); -+ spin_lock_init(&pa->pa_lock); -+ pa->pa_deleted = 0; -+ pa->pa_linear = 0; -+ -+ mb_debug("new inode pa %p: %lu/%lu for %lu\n", pa, -+ pa->pa_pstart, pa->pa_len, pa->pa_lstart); -+ -+ ext3_mb_use_inode_pa(ac, pa); -+ atomic_add(pa->pa_free, &EXT3_SB(sb)->s_mb_preallocated); -+ -+ ei = EXT3_I(ac->ac_inode); -+ grp = EXT3_GROUP_INFO(sb, ac->ac_b_ex.fe_group); -+ -+ pa->pa_obj_lock = &ei->i_prealloc_lock; -+ pa->pa_inode = ac->ac_inode; -+ -+ ext3_lock_group(sb, ac->ac_b_ex.fe_group); -+ list_add_rcu(&pa->pa_group_list, &grp->bb_prealloc_list); -+ ext3_unlock_group(sb, ac->ac_b_ex.fe_group); -+ -+ spin_lock(pa->pa_obj_lock); -+ list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list); -+ spin_unlock(pa->pa_obj_lock); -+ -+ return 0; -+} -+ -+/* -+ * creates new preallocated space for locality group inodes belongs to -+ */ -+int ext3_mb_new_group_pa(struct ext3_allocation_context *ac) -+{ -+ struct super_block *sb = ac->ac_sb; -+ struct ext3_locality_group *lg; -+ struct ext3_prealloc_space *pa; -+ struct ext3_group_info *grp; -+ -+ /* preallocate only when found space is larger then requested */ -+ BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len); -+ BUG_ON(ac->ac_status != AC_STATUS_FOUND); -+ BUG_ON(!S_ISREG(ac->ac_inode->i_mode)); -+ -+ BUG_ON(ext3_pspace_cachep == NULL); -+ pa = kmem_cache_alloc(ext3_pspace_cachep, SLAB_NOFS); -+ if (pa == NULL) -+ return -ENOMEM; -+ -+ /* preallocation can change ac_b_ex, thus we store actually -+ * allocated blocks for history */ -+ ac->ac_f_ex = ac->ac_b_ex; -+ -+ pa->pa_pstart = ext3_grp_offs_to_block(sb, &ac->ac_b_ex); -+ pa->pa_lstart = pa->pa_pstart; -+ pa->pa_len = ac->ac_b_ex.fe_len; -+ pa->pa_free = pa->pa_len; -+ atomic_set(&pa->pa_count, 1); -+ spin_lock_init(&pa->pa_lock); -+ pa->pa_deleted = 0; -+ pa->pa_linear = 1; -+ -+ mb_debug("new group pa %p: %lu/%lu for %lu\n", pa, -+ pa->pa_pstart, pa->pa_len, pa->pa_lstart); -+ -+ ext3_mb_use_group_pa(ac, pa); -+ atomic_add(pa->pa_free, &EXT3_SB(sb)->s_mb_preallocated); -+ -+ grp = EXT3_GROUP_INFO(sb, ac->ac_b_ex.fe_group); -+ lg = ac->ac_lg; -+ BUG_ON(lg == NULL); -+ -+ pa->pa_obj_lock = &lg->lg_prealloc_lock; -+ pa->pa_inode = NULL; -+ -+ ext3_lock_group(sb, ac->ac_b_ex.fe_group); -+ list_add_rcu(&pa->pa_group_list, &grp->bb_prealloc_list); -+ ext3_unlock_group(sb, ac->ac_b_ex.fe_group); -+ -+ spin_lock(pa->pa_obj_lock); -+ list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list); -+ spin_unlock(pa->pa_obj_lock); -+ -+ return 0; -+} -+ -+int ext3_mb_new_preallocation(struct ext3_allocation_context *ac) -+{ -+ int err; -+ -+ if (ac->ac_flags & EXT3_MB_HINT_GROUP_ALLOC) -+ err = ext3_mb_new_group_pa(ac); -+ else -+ err = ext3_mb_new_inode_pa(ac); -+ return err; -+} -+ -+/* -+ * finds all unused blocks in on-disk bitmap, frees them in -+ * in-core bitmap and buddy. -+ * @pa must be unlinked from inode and group lists, so that -+ * nobody else can find/use it. -+ * the caller MUST hold group/inode locks. -+ * TODO: optimize the case when there are no in-core structures yet -+ */ -+int ext3_mb_release_inode_pa(struct ext3_buddy *e3b, -+ struct buffer_head *bitmap_bh, -+ struct ext3_prealloc_space *pa) -+{ -+ struct ext3_allocation_context ac; -+ struct super_block *sb = e3b->bd_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ unsigned long bit, end, next, group; -+ sector_t start; -+ int err = 0, free = 0; -+ -+ BUG_ON(pa->pa_deleted == 0); -+ ext3_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); -+ BUG_ON(group != e3b->bd_group); -+ end = bit + pa->pa_len; -+ -+ ac.ac_sb = sb; -+ ac.ac_inode = pa->pa_inode; -+ ac.ac_op = EXT3_MB_HISTORY_DISCARD; -+ -+ while (bit < end) { -+ bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); -+ if (bit >= end) -+ break; -+ next = mb_find_next_bit(bitmap_bh->b_data, end, bit); -+ if (next > end) -+ next = end; -+ start = group * EXT3_BLOCKS_PER_GROUP(sb) + bit + -+ le32_to_cpu(sbi->s_es->s_first_data_block); -+ mb_debug(" free preallocated %u/%u in group %u\n", -+ (unsigned) start, (unsigned) next - bit, -+ (unsigned) group); -+ free += next - bit; -+ -+ ac.ac_b_ex.fe_group = group; -+ ac.ac_b_ex.fe_start = bit; -+ ac.ac_b_ex.fe_len = next - bit; -+ ac.ac_b_ex.fe_logical = 0; -+ ext3_mb_store_history(&ac); -+ -+ mb_free_blocks(e3b, bit, next - bit); -+ bit = next + 1; -+ } -+ if (free != pa->pa_free) { -+ printk("pa %p: logic %lu, phys. %lu, len %lu\n", -+ pa, (unsigned long) pa->pa_lstart, -+ (unsigned long) pa->pa_pstart, -+ (unsigned long) pa->pa_len); -+ printk("free %u, pa_free %u\n", free, pa->pa_free); -+ } -+ BUG_ON(free != pa->pa_free); -+ atomic_add(free, &sbi->s_mb_discarded); -+ -+ return err; -+} -+ -+int ext3_mb_release_group_pa(struct ext3_buddy *e3b, -+ struct ext3_prealloc_space *pa) -+{ -+ struct ext3_allocation_context ac; -+ struct super_block *sb = e3b->bd_sb; -+ unsigned long bit, group; -+ -+ ac.ac_op = EXT3_MB_HISTORY_DISCARD; -+ -+ BUG_ON(pa->pa_deleted == 0); -+ ext3_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); -+ BUG_ON(group != e3b->bd_group); -+ mb_free_blocks(e3b, bit, pa->pa_len); -+ atomic_add(pa->pa_len, &EXT3_SB(sb)->s_mb_discarded); -+ -+ ac.ac_sb = sb; -+ ac.ac_inode = NULL; -+ ac.ac_b_ex.fe_group = group; -+ ac.ac_b_ex.fe_start = bit; -+ ac.ac_b_ex.fe_len = pa->pa_len; -+ ac.ac_b_ex.fe_logical = 0; -+ ext3_mb_store_history(&ac); -+ -+ return 0; -+} -+ -+/* -+ * releases all preallocations in given group -+ * -+ * first, we need to decide discard policy: -+ * - when do we discard -+ * 1) ENOSPC -+ * - how many do we discard -+ * 1) how many requested -+ */ -+int ext3_mb_discard_group_preallocations(struct super_block *sb, -+ int group, int needed) -+{ -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group); -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_prealloc_space *pa, *tmp; -+ struct list_head list; -+ struct ext3_buddy e3b; -+ int err, busy, free = 0; -+ -+ mb_debug("discard preallocation for group %lu\n", group); -+ -+ if (list_empty(&grp->bb_prealloc_list)) -+ return 0; -+ -+ bitmap_bh = read_block_bitmap(sb, group); -+ if (bitmap_bh == NULL) { -+ /* error handling here */ -+ ext3_mb_release_desc(&e3b); -+ BUG_ON(bitmap_bh == NULL); -+ } -+ -+ err = ext3_mb_load_buddy(sb, group, &e3b); -+ BUG_ON(err != 0); /* error handling here */ -+ -+ if (needed == 0) -+ needed = EXT3_BLOCKS_PER_GROUP(sb) + 1; -+ -+ grp = EXT3_GROUP_INFO(sb, group); -+ INIT_LIST_HEAD(&list); -+ -+repeat: -+ busy = 0; -+ ext3_lock_group(sb, group); -+ list_for_each_entry_safe (pa, tmp, &grp->bb_prealloc_list, pa_group_list) { -+ spin_lock(&pa->pa_lock); -+ if (atomic_read(&pa->pa_count)) { -+ spin_unlock(&pa->pa_lock); -+ printk("uh! busy PA\n"); -+ dump_stack(); -+ busy = 1; -+ continue; -+ } -+ if (pa->pa_deleted) { -+ spin_unlock(&pa->pa_lock); -+ continue; -+ } -+ -+ /* seems this one can be freed ... */ -+ pa->pa_deleted = 1; -+ -+ /* we can trust pa_free ... */ -+ free += pa->pa_free; -+ -+ spin_unlock(&pa->pa_lock); -+ -+ list_del_rcu(&pa->pa_group_list); -+ list_add(&pa->u.pa_tmp_list, &list); -+ } -+ -+ /* if we still need more blocks and some PAs were used, try again */ -+ if (free < needed && busy) -+ goto repeat; -+ -+ /* found anything to free? */ -+ if (list_empty(&list)) { -+ BUG_ON(free != 0); -+ goto out; -+ } -+ -+ /* now free all selected PAs */ -+ list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { -+ -+ /* remove from object (inode or locality group) */ -+ spin_lock(pa->pa_obj_lock); -+ list_del_rcu(&pa->pa_inode_list); -+ spin_unlock(pa->pa_obj_lock); -+ -+ if (pa->pa_linear) -+ ext3_mb_release_group_pa(&e3b, pa); -+ else -+ ext3_mb_release_inode_pa(&e3b, bitmap_bh, pa); -+ -+ list_del(&pa->u.pa_tmp_list); -+ mb_call_rcu(pa); -+ } -+ -+out: -+ ext3_unlock_group(sb, group); -+ ext3_mb_release_desc(&e3b); -+ brelse(bitmap_bh); -+ return free; -+} -+ -+/* -+ * releases all non-used preallocated blocks for given inode -+ */ -+void ext3_mb_discard_inode_preallocations(struct inode *inode) -+{ -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bitmap_bh = NULL; -+ struct ext3_prealloc_space *pa, *tmp; -+ unsigned long group = 0; -+ struct list_head list; -+ struct ext3_buddy e3b; -+ int err; -+ -+ if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { -+ /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ -+ return; -+ } -+ -+ mb_debug("discard preallocation for inode %lu\n", inode->i_ino); -+ -+ INIT_LIST_HEAD(&list); -+ -+repeat: -+ /* first, collect all pa's in the inode */ -+ spin_lock(&ei->i_prealloc_lock); -+ while (!list_empty(&ei->i_prealloc_list)) { -+ pa = list_entry(ei->i_prealloc_list.next, -+ struct ext3_prealloc_space, pa_inode_list); -+ BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock); -+ spin_lock(&pa->pa_lock); -+ if (atomic_read(&pa->pa_count)) { -+ /* this shouldn't happen often - nobody should -+ * use preallocation while we're discarding it */ -+ spin_unlock(&pa->pa_lock); -+ spin_unlock(&ei->i_prealloc_lock); -+ printk("uh-oh! used pa while discarding\n"); -+ dump_stack(); -+ current->state = TASK_UNINTERRUPTIBLE; -+ schedule_timeout(HZ); -+ goto repeat; -+ -+ } -+ if (pa->pa_deleted == 0) { -+ pa->pa_deleted = 1; -+ spin_unlock(&pa->pa_lock); -+ list_del_rcu(&pa->pa_inode_list); -+ list_add(&pa->u.pa_tmp_list, &list); -+ continue; -+ } -+ -+ /* someone is deleting pa right now */ -+ spin_unlock(&pa->pa_lock); -+ spin_unlock(&ei->i_prealloc_lock); -+ -+ /* we have to wait here because pa_deleted -+ * doesn't mean pa is already unlinked from -+ * the list. as we might be called from -+ * ->clear_inode() the inode will get freed -+ * and concurrent thread which is unlinking -+ * pa from inode's list may access already -+ * freed memory, bad-bad-bad */ -+ -+ /* XXX: if this happens too often, we can -+ * add a flag to force wait only in case -+ * of ->clear_inode(), but not in case of -+ * regular truncate */ -+ printk("uh-oh! some one just deleted it\n"); -+ dump_stack(); -+ current->state = TASK_UNINTERRUPTIBLE; -+ schedule_timeout(HZ); -+ goto repeat; -+ } -+ spin_unlock(&ei->i_prealloc_lock); -+ -+ list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { -+ BUG_ON(pa->pa_linear != 0); -+ ext3_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); -+ -+ err = ext3_mb_load_buddy(sb, group, &e3b); -+ BUG_ON(err != 0); /* error handling here */ -+ -+ bitmap_bh = read_block_bitmap(sb, group); -+ if (bitmap_bh == NULL) { -+ /* error handling here */ -+ ext3_mb_release_desc(&e3b); -+ BUG_ON(bitmap_bh == NULL); -+ } -+ -+ ext3_lock_group(sb, group); -+ list_del_rcu(&pa->pa_group_list); -+ ext3_mb_release_inode_pa(&e3b, bitmap_bh, pa); -+ ext3_unlock_group(sb, group); -+ -+ ext3_mb_release_desc(&e3b); -+ brelse(bitmap_bh); -+ -+ list_del(&pa->u.pa_tmp_list); -+ mb_call_rcu(pa); -+ } -+} -+ -+/* -+ * finds all preallocated spaces and return blocks being freed to them -+ * if preallocated space becomes full (no block is used from the space) -+ * then the function frees space in buddy -+ * XXX: at the moment, truncate (which is the only way to free blocks) -+ * discards all preallocations -+ */ -+void ext3_mb_return_to_preallocation(struct inode *inode, struct ext3_buddy *e3b, -+ sector_t block, int count) -+{ -+ BUG_ON(!list_empty(&EXT3_I(inode)->i_prealloc_list)); -+} -+ -+void ext3_mb_show_ac(struct ext3_allocation_context *ac) -+{ -+#if 0 -+ struct super_block *sb = ac->ac_sb; -+ int i; -+ -+ printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n", -+ ac->ac_status, ac->ac_flags); -+ printk(KERN_ERR "EXT3-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, " -+ "best %lu/%lu/%lu@%lu cr %d\n", -+ ac->ac_o_ex.fe_group, ac->ac_o_ex.fe_start, -+ ac->ac_o_ex.fe_len, ac->ac_o_ex.fe_logical, -+ ac->ac_g_ex.fe_group, ac->ac_g_ex.fe_start, -+ ac->ac_g_ex.fe_len, ac->ac_g_ex.fe_logical, -+ ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start, -+ ac->ac_b_ex.fe_len, ac->ac_b_ex.fe_logical, -+ ac->ac_criteria); -+ printk(KERN_ERR "EXT3-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, -+ ac->ac_found); -+ printk("EXT3-fs: groups: "); -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { -+ struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, i); -+ struct ext3_prealloc_space *pa; -+ unsigned long start; -+ struct list_head *cur; -+ list_for_each_rcu(cur, &grp->bb_prealloc_list) { -+ pa = list_entry(cur, struct ext3_prealloc_space, -+ pa_group_list); -+ spin_lock(&pa->pa_lock); -+ ext3_get_group_no_and_offset(sb, pa->pa_pstart, NULL, &start); -+ spin_unlock(&pa->pa_lock); -+ printk("PA:%u:%lu:%u ", i, start, pa->pa_len); -+ } -+ -+ if (grp->bb_free == 0) -+ continue; -+ printk("%d: %d/%d ", i, grp->bb_free, grp->bb_fragments); -+ } -+ printk("\n"); -+ //dump_stack(); -+#endif -+} -+ -+void ext3_mb_group_or_file(struct ext3_allocation_context *ac) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb); -+ int bsbits = ac->ac_sb->s_blocksize_bits; -+ loff_t size, isize; -+ -+ if (!(ac->ac_flags & EXT3_MB_HINT_DATA)) -+ return; -+ -+ size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; -+ isize = i_size_read(ac->ac_inode) >> bsbits; -+ if (size < isize) -+ size = isize; -+ -+ /* don't use group allocation for large files */ -+ if (size >= sbi->s_mb_stream_request) -+ return; -+ -+ if (unlikely(ac->ac_flags & EXT3_MB_HINT_GOAL_ONLY)) -+ return; -+ -+ BUG_ON(ac->ac_lg != NULL); -+ ac->ac_lg = &sbi->s_locality_groups[smp_processor_id()]; -+ -+ /* we're going to use group allocation */ -+ ac->ac_flags |= EXT3_MB_HINT_GROUP_ALLOC; -+ -+ /* serialize all allocations in the group */ -+ down(&ac->ac_lg->lg_sem); -+} -+ -+int ext3_mb_initialize_context(struct ext3_allocation_context *ac, -+ struct ext3_allocation_request *ar) -+{ -+ struct super_block *sb = ar->inode->i_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_super_block *es = sbi->s_es; -+ unsigned long group, len, goal; -+ unsigned long block; -+ -+ /* we can't allocate > group size */ -+ len = ar->len; -+ if (len >= EXT3_BLOCKS_PER_GROUP(sb) - 10) -+ len = EXT3_BLOCKS_PER_GROUP(sb) - 10; -+ -+ /* start searching from the goal */ -+ goal = ar->goal; -+ if (goal < le32_to_cpu(es->s_first_data_block) || -+ goal >= le32_to_cpu(es->s_blocks_count)) -+ goal = le32_to_cpu(es->s_first_data_block); -+ ext3_get_group_no_and_offset(sb, goal, &group, &block); -+ -+ /* set up allocation goals */ -+ ac->ac_b_ex.fe_logical = ar->logical; -+ ac->ac_b_ex.fe_group = 0; -+ ac->ac_b_ex.fe_start = 0; -+ ac->ac_b_ex.fe_len = 0; -+ ac->ac_status = AC_STATUS_CONTINUE; -+ ac->ac_groups_scanned = 0; -+ ac->ac_ex_scanned = 0; -+ ac->ac_found = 0; -+ ac->ac_sb = sb; -+ ac->ac_inode = ar->inode; -+ ac->ac_o_ex.fe_logical = ar->logical; -+ ac->ac_o_ex.fe_group = group; -+ ac->ac_o_ex.fe_start = block; -+ ac->ac_o_ex.fe_len = len; -+ ac->ac_g_ex.fe_logical = ar->logical; -+ ac->ac_g_ex.fe_group = group; -+ ac->ac_g_ex.fe_start = block; -+ ac->ac_g_ex.fe_len = len; -+ ac->ac_f_ex.fe_len = 0; -+ ac->ac_flags = ar->flags; -+ ac->ac_2order = 0; -+ ac->ac_criteria = 0; -+ ac->ac_pa = NULL; -+ ac->ac_bitmap_page = NULL; -+ ac->ac_buddy_page = NULL; -+ ac->ac_lg = NULL; -+ -+ /* we have to define context: we'll we work with a file or -+ * locality group. this is a policy, actually */ -+ ext3_mb_group_or_file(ac); -+ -+ mb_debug("init ac: %u blocks @ %llu, goal %llu, flags %x, 2^%d, " -+ "left: %llu/%llu, right %llu/%llu to %swritable\n", -+ (unsigned) ar->len, (unsigned) ar->logical, -+ (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, -+ (unsigned) ar->lleft, (unsigned) ar->pleft, -+ (unsigned) ar->lright, (unsigned) ar->pright, -+ atomic_read(&ar->inode->i_writecount) ? "" : "non-"); -+ return 0; -+ -+} -+ -+/* -+ * release all resource we used in allocation -+ */ -+int ext3_mb_release_context(struct ext3_allocation_context *ac) -+{ -+ if (ac->ac_pa) { -+ if (ac->ac_pa->pa_linear) { -+ /* see comment in ext3_mb_use_group_pa() */ -+ spin_lock(&ac->ac_pa->pa_lock); -+ ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len; -+ ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len; -+ ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len; -+ ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len; -+ spin_unlock(&ac->ac_pa->pa_lock); -+ } -+ ext3_mb_put_pa(ac, ac->ac_sb, ac->ac_pa); -+ } -+ if (ac->ac_bitmap_page) -+ page_cache_release(ac->ac_bitmap_page); -+ if (ac->ac_buddy_page) -+ page_cache_release(ac->ac_buddy_page); -+ if (ac->ac_flags & EXT3_MB_HINT_GROUP_ALLOC) -+ up(&ac->ac_lg->lg_sem); -+ ext3_mb_collect_stats(ac); -+ return 0; -+} -+ -+int ext3_mb_discard_preallocations(struct super_block *sb, int needed) -+{ -+ int i, ret, freed = 0; -+ -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count && needed > 0; i++) { -+ ret = ext3_mb_discard_group_preallocations(sb, i, needed); -+ freed += ret; -+ needed -= ret; -+ } -+ -+ return freed; -+} -+ -+/* -+ * Main entry point into mballoc to allocate blocks -+ * it tries to use preallocation first, then falls back -+ * to usual allocation -+ */ -+unsigned long ext3_mb_new_blocks(handle_t *handle, -+ struct ext3_allocation_request *ar, int *errp) -+{ -+ struct ext3_allocation_context ac; -+ struct ext3_sb_info *sbi; -+ struct super_block *sb; -+ unsigned long block; -+ int err, freed; -+ -+ sb = ar->inode->i_sb; -+ sbi = EXT3_SB(sb); -+ -+ if (!test_opt(sb, MBALLOC)) { -+ static int ext3_mballoc_warning = 0; -+ if (ext3_mballoc_warning++ == 0) -+ printk(KERN_ERR "EXT3-fs: multiblock request with " -+ "mballoc disabled!\n"); -+ ar->len = 1; -+ err = ext3_new_block_old(handle, ar->inode, ar->goal, errp); -+ return err; -+ } -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ if ((err = ext3_mb_initialize_context(&ac, ar))) -+ return err; -+ -+ ac.ac_op = EXT3_MB_HISTORY_PREALLOC; -+ if (!ext3_mb_use_preallocated(&ac)) { -+ -+ ac.ac_op = EXT3_MB_HISTORY_ALLOC; -+ ext3_mb_normalize_request(&ac, ar); -+ -+repeat: -+ /* allocate space in core */ -+ ext3_mb_regular_allocator(&ac); -+ -+ /* as we've just preallocated more space than -+ * user requested orinally, we store allocated -+ * space in a special descriptor */ -+ if (ac.ac_status == AC_STATUS_FOUND && -+ ac.ac_o_ex.fe_len < ac.ac_b_ex.fe_len) -+ ext3_mb_new_preallocation(&ac); -+ } -+ -+ if (likely(ac.ac_status == AC_STATUS_FOUND)) { -+ ext3_mb_mark_diskspace_used(&ac, handle); -+ *errp = 0; -+ block = ext3_grp_offs_to_block(sb, &ac.ac_b_ex); -+ ar->len = ac.ac_b_ex.fe_len; -+ } else { -+ freed = ext3_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len); -+ if (freed) -+ goto repeat; -+ *errp = -ENOSPC; -+ ac.ac_b_ex.fe_len = 0; -+ block = 0; -+ ext3_mb_show_ac(&ac); -+ } -+ -+ ext3_mb_release_context(&ac); -+ -+ return block; -+} -+EXPORT_SYMBOL(ext3_mb_new_blocks); -+ -+int ext3_new_block(handle_t *handle, struct inode *inode, -+ unsigned long goal, int *errp) -+{ -+ struct ext3_allocation_request ar; -+ unsigned long ret; -+ -+ if (!test_opt(inode->i_sb, MBALLOC)) { -+ ret = ext3_new_block_old(handle, inode, goal, errp); -+ return ret; -+ } -+ -+ ar.inode = inode; -+ ar.goal = goal; -+ ar.len = 1; -+ ar.logical = 0; -+ ar.lleft = 0; -+ ar.pleft = 0; -+ ar.lright = 0; -+ ar.pright = 0; -+ ar.flags = 0; -+ ret = ext3_mb_new_blocks(handle, &ar, errp); -+ return ret; -+} -+ -+void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ if (sbi->s_last_transaction == handle->h_transaction->t_tid) -+ return; -+ -+ /* new transaction! time to close last one and free blocks for -+ * committed transaction. we know that only transaction can be -+ * active, so previos transaction can be being logged and we -+ * know that transaction before previous is known to be already -+ * logged. this means that now we may free blocks freed in all -+ * transactions before previous one. hope I'm clear enough ... */ -+ -+ spin_lock(&sbi->s_md_lock); -+ if (sbi->s_last_transaction != handle->h_transaction->t_tid) { -+ mb_debug("new transaction %lu, old %lu\n", -+ (unsigned long) handle->h_transaction->t_tid, -+ (unsigned long) sbi->s_last_transaction); -+ list_splice_init(&sbi->s_closed_transaction, -+ &sbi->s_committed_transaction); -+ list_splice_init(&sbi->s_active_transaction, -+ &sbi->s_closed_transaction); -+ sbi->s_last_transaction = handle->h_transaction->t_tid; -+ } -+ spin_unlock(&sbi->s_md_lock); -+ -+ ext3_mb_free_committed_blocks(sb); -+} -+ -+int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b, -+ int group, int block, int count) -+{ -+ struct ext3_group_info *db = e3b->bd_info; -+ struct super_block *sb = e3b->bd_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_free_metadata *md; -+ int i; -+ -+ BUG_ON(e3b->bd_bitmap_page == NULL); -+ BUG_ON(e3b->bd_buddy_page == NULL); -+ -+ ext3_lock_group(sb, group); -+ for (i = 0; i < count; i++) { -+ md = db->bb_md_cur; -+ if (md && db->bb_tid != handle->h_transaction->t_tid) { -+ db->bb_md_cur = NULL; -+ md = NULL; -+ } -+ -+ if (md == NULL) { -+ ext3_unlock_group(sb, group); -+ md = kmalloc(sizeof(*md), GFP_KERNEL); -+ if (md == NULL) -+ return -ENOMEM; -+ md->num = 0; -+ md->group = group; -+ -+ ext3_lock_group(sb, group); -+ if (db->bb_md_cur == NULL) { -+ spin_lock(&sbi->s_md_lock); -+ list_add(&md->list, &sbi->s_active_transaction); -+ spin_unlock(&sbi->s_md_lock); -+ /* protect buddy cache from being freed, -+ * otherwise we'll refresh it from -+ * on-disk bitmap and lose not-yet-available -+ * blocks */ -+ page_cache_get(e3b->bd_buddy_page); -+ page_cache_get(e3b->bd_bitmap_page); -+ db->bb_md_cur = md; -+ db->bb_tid = handle->h_transaction->t_tid; -+ mb_debug("new md 0x%p for group %u\n", -+ md, md->group); -+ } else { -+ kfree(md); -+ md = db->bb_md_cur; -+ } -+ } -+ -+ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS); -+ md->blocks[md->num] = block + i; -+ md->num++; -+ if (md->num == EXT3_BB_MAX_BLOCKS) { -+ /* no more space, put full container on a sb's list */ -+ db->bb_md_cur = NULL; -+ } -+ } -+ ext3_unlock_group(sb, group); -+ return 0; -+} -+ -+/* -+ * Main entry point into mballoc to free blocks -+ */ -+void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, -+ unsigned long block, unsigned long count, -+ int metadata, int *freed) -+{ -+ struct buffer_head *bitmap_bh = NULL; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_allocation_context ac; -+ struct ext3_group_desc *gdp; -+ struct ext3_super_block *es; -+ unsigned long bit, overflow; -+ struct buffer_head *gd_bh; -+ unsigned long block_group; -+ struct ext3_sb_info *sbi; -+ struct ext3_buddy e3b; -+ int err = 0, ret; -+ -+ *freed = 0; -+ -+ ext3_mb_poll_new_transaction(sb, handle); -+ -+ sbi = EXT3_SB(sb); -+ es = EXT3_SB(sb)->s_es; -+ if (block < le32_to_cpu(es->s_first_data_block) || -+ block + count < block || -+ block + count > le32_to_cpu(es->s_blocks_count)) { -+ ext3_error (sb, __FUNCTION__, -+ "Freeing blocks not in datazone - " -+ "block = %lu, count = %lu", block, count); -+ goto error_return; -+ } -+ -+ ext3_debug("freeing block %lu\n", block); -+ -+ ac.ac_op = EXT3_MB_HISTORY_FREE; -+ ac.ac_inode = inode; -+ ac.ac_sb = sb; -+ -+do_more: -+ overflow = 0; -+ ext3_get_group_no_and_offset(sb, block, &block_group, &bit); -+ -+ /* -+ * Check to see if we are freeing blocks across a group -+ * boundary. -+ */ -+ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { -+ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); -+ count -= overflow; -+ } -+ brelse(bitmap_bh); -+ bitmap_bh = read_block_bitmap(sb, block_group); -+ if (!bitmap_bh) -+ goto error_return; -+ gdp = ext3_get_group_desc (sb, block_group, &gd_bh); -+ if (!gdp) -+ goto error_return; -+ -+ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || -+ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || -+ in_range (block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group) || -+ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) -+ ext3_error(sb, __FUNCTION__, -+ "Freeing blocks in system zone - " -+ "Block = %lu, count = %lu", block, count); -+ -+ BUFFER_TRACE(bitmap_bh, "getting write access"); -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) -+ goto error_return; -+ -+ /* -+ * We are about to modify some metadata. Call the journal APIs -+ * to unshare ->b_data if a currently-committing transaction is -+ * using it -+ */ -+ BUFFER_TRACE(gd_bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, gd_bh); -+ if (err) -+ goto error_return; -+ -+ err = ext3_mb_load_buddy(sb, block_group, &e3b); -+ if (err) -+ goto error_return; -+ -+#ifdef AGGRESSIVE_CHECK -+ { -+ int i; -+ for (i = 0; i < count; i++) -+ BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); -+ } -+#endif -+ mb_clear_bits(bitmap_bh->b_data, bit, count); -+ -+ /* We dirtied the bitmap block */ -+ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ -+ ac.ac_b_ex.fe_group = block_group; -+ ac.ac_b_ex.fe_start = bit; -+ ac.ac_b_ex.fe_len = count; -+ ext3_mb_store_history(&ac); -+ -+ if (metadata) { -+ /* blocks being freed are metadata. these blocks shouldn't -+ * be used until this transaction is committed */ -+ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count); -+ } else { -+ ext3_lock_group(sb, block_group); -+ err = mb_free_blocks(&e3b, bit, count); -+ ext3_mb_return_to_preallocation(inode, &e3b, block, count); -+ ext3_unlock_group(sb, block_group); -+ BUG_ON(err != 0); -+ } -+ -+ spin_lock(sb_bgl_lock(sbi, block_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); -+ spin_unlock(sb_bgl_lock(sbi, block_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, count); -+ -+ ext3_mb_release_desc(&e3b); -+ -+ *freed += count; -+ -+ /* And the group descriptor block */ -+ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); -+ ret = ext3_journal_dirty_metadata(handle, gd_bh); -+ if (!err) err = ret; -+ -+ if (overflow && !err) { -+ block += count; -+ count = overflow; -+ goto do_more; -+ } -+ sb->s_dirt = 1; -+error_return: -+ brelse(bitmap_bh); -+ ext3_std_error(sb, err); -+ return; -+} diff --git a/lustre/kernel_patches/patches/ext3-mballoc3-rhel4.patch b/lustre/kernel_patches/patches/ext3-mballoc3-rhel4.patch deleted file mode 100644 index 910df7c0d0..0000000000 --- a/lustre/kernel_patches/patches/ext3-mballoc3-rhel4.patch +++ /dev/null @@ -1,396 +0,0 @@ -Index: linux-2.6.9-full/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.9-full.orig/include/linux/ext3_fs_i.h 2007-03-28 01:29:38.000000000 +0400 -+++ linux-2.6.9-full/include/linux/ext3_fs_i.h 2007-03-28 15:45:41.000000000 +0400 -@@ -130,6 +130,10 @@ struct ext3_inode_info { - struct inode vfs_inode; - - __u32 i_cached_extent[4]; -+ -+ /* mballoc */ -+ struct list_head i_prealloc_list; -+ spinlock_t i_prealloc_lock; - }; - - #endif /* _LINUX_EXT3_FS_I */ -Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h 2007-03-28 15:42:16.000000000 +0400 -+++ linux-2.6.9-full/include/linux/ext3_fs_sb.h 2007-03-28 15:45:41.000000000 +0400 -@@ -23,9 +23,16 @@ - #define EXT_INCLUDE - #include <linux/blockgroup_lock.h> - #include <linux/percpu_counter.h> -+#include <linux/list.h> - #endif - #endif - #include <linux/rbtree.h> -+#include <linux/proc_fs.h> -+ -+struct ext3_buddy_group_blocks; -+struct ext3_locality_group; -+struct ext3_mb_history; -+#define EXT3_BB_MAX_BLOCKS - - /* - * third extended-fs super-block data in memory -Index: linux-2.6.9-full/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2007-03-28 15:45:07.000000000 +0400 -+++ linux-2.6.9-full/include/linux/ext3_fs.h 2007-03-28 15:45:41.000000000 +0400 -@@ -389,6 +389,7 @@ struct ext3_inode { - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ -+#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -749,8 +750,9 @@ struct dir_private_info { - extern int ext3_bg_has_super(struct super_block *sb, int group); - extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); - extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); -+extern int ext3_new_block_old (handle_t *, struct inode *, unsigned long, int *); - extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, -- unsigned long); -+ unsigned long, int); - extern void ext3_free_blocks_sb (handle_t *, struct super_block *, - unsigned long, unsigned long, int *); - extern unsigned long ext3_count_free_blocks (struct super_block *); -Index: linux-2.6.9-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/super.c 2007-03-28 15:42:16.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/super.c 2007-03-28 15:45:41.000000000 +0400 -@@ -600,6 +600,7 @@ enum { - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - Opt_extents, Opt_noextents, Opt_extdebug, -+ Opt_mballoc, Opt_nomballoc, Opt_stripe, - }; - - static match_table_t tokens = { -@@ -653,6 +654,9 @@ static match_table_t tokens = { - {Opt_noextents, "noextents"}, - {Opt_extdebug, "extdebug"}, - {Opt_barrier, "barrier=%u"}, -+ {Opt_mballoc, "mballoc"}, -+ {Opt_nomballoc, "nomballoc"}, -+ {Opt_stripe, "stripe=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, - }; -@@ -965,6 +969,19 @@ clear_qf_name: - case Opt_extdebug: - set_opt (sbi->s_mount_opt, EXTDEBUG); - break; -+ case Opt_mballoc: -+ set_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_nomballoc: -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_stripe: -+ if (match_int(&args[0], &option)) -+ return 0; -+ if (option < 0) -+ return 0; -+ sbi->s_stripe = option; -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1654,6 +1671,7 @@ static int ext3_fill_super (struct super - ext3_count_dirs(sb)); - - ext3_ext_init(sb); -+ ext3_mb_init(sb, needs_recovery); - - return 0; - -Index: linux-2.6.9-full/fs/ext3/extents.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/extents.c 2007-03-28 01:29:41.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/extents.c 2007-03-28 15:45:41.000000000 +0400 -@@ -779,7 +779,7 @@ cleanup: - for (i = 0; i < depth; i++) { - if (!ablocks[i]) - continue; -- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); - } - } - kfree(ablocks); -@@ -1586,7 +1586,7 @@ int ext3_ext_rm_idx(handle_t *handle, st - path->p_idx->ei_leaf); - bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); - ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); - return err; - } - -@@ -2071,10 +2071,12 @@ ext3_remove_blocks(struct ext3_extents_t - int needed = ext3_remove_blocks_credits(tree, ex, from, to); - handle_t *handle = ext3_journal_start(tree->inode, needed); - struct buffer_head *bh; -- int i; -+ int i, metadata = 0; - - if (IS_ERR(handle)) - return PTR_ERR(handle); -+ if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) -+ metadata = 1; - if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { - /* tail removal */ - unsigned long num, start; -@@ -2086,7 +2088,7 @@ ext3_remove_blocks(struct ext3_extents_t - bh = sb_find_get_block(tree->inode->i_sb, start + i); - ext3_forget(handle, 0, tree->inode, bh, start + i); - } -- ext3_free_blocks(handle, tree->inode, start, num); -+ ext3_free_blocks(handle, tree->inode, start, num, metadata); - } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { - printk("strange request: removal %lu-%lu from %u:%u\n", - from, to, ex->ee_block, ex->ee_len); -@@ -2177,11 +2179,8 @@ int ext3_ext_get_block(handle_t *handle, - struct ext3_extent *ex; - int goal, newblock, err = 0, depth; - struct ext3_extents_tree tree; -- unsigned long next; -- int allocated = 0; -- -- /* until we have multiblock allocation */ -- max_blocks = 1; -+ unsigned long allocated = 0; -+ struct ext3_allocation_request ar; - - clear_buffer_new(bh_result); - ext3_init_tree_desc(&tree, inode); -@@ -2253,18 +2252,33 @@ int ext3_ext_get_block(handle_t *handle, - goto out2; - } - -+ /* find neighbour allocated blocks */ -+ ar.lleft = iblock; -+ err = ext3_ext_search_left(&tree, path, &ar.lleft, &ar.pleft); -+ if (err) -+ goto out2; -+ ar.lright = iblock; -+ err = ext3_ext_search_right(&tree, path, &ar.lright, &ar.pright); -+ if (err) -+ goto out2; -+ - /* find next allocated block so that we know how many - * blocks we can allocate without ovelapping next extent */ -- EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len); -- next = ext3_ext_next_allocated_block(path); -- EXT_ASSERT(next > iblock); -- allocated = next - iblock; -+ EXT_ASSERT(ar.pright == 0 || ar.lright > iblock); -+ if (ar.pright == 0) -+ allocated = EXT_MAX_BLOCK - iblock; -+ else -+ allocated = ar.lright - iblock; - if (allocated > max_blocks) - allocated = max_blocks; - - /* allocate new block */ -- goal = ext3_ext_find_goal(inode, path, iblock); -- newblock = ext3_new_block(handle, inode, goal, &err); -+ ar.inode = inode; -+ ar.goal = ext3_ext_find_goal(inode, path, iblock); -+ ar.logical = iblock; -+ ar.len = allocated; -+ ar.flags = EXT3_MB_HINT_DATA; -+ newblock = ext3_mb_new_blocks(handle, &ar, &err); - if (!newblock) - goto out2; - ext_debug(&tree, "allocate new block: goal %d, found %d\n", -@@ -2274,11 +2288,14 @@ int ext3_ext_get_block(handle_t *handle, - newex.ee_block = iblock; - newex.ee_start = newblock; - newex.ee_start_hi = 0; -- newex.ee_len = 1; -+ newex.ee_len = ar.len; - err = ext3_ext_insert_extent(handle, &tree, path, &newex); - if (err) { - /* free data blocks we just allocated */ -- ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len); -+ /* not a good idea to call discard here directly, -+ * but otherwise we'd need to call it every free() */ -+ ext3_mb_discard_inode_preallocations(inode); -+ ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len, 0); - goto out2; - } - -@@ -2287,6 +2304,7 @@ int ext3_ext_get_block(handle_t *handle, - - /* previous routine could use block we allocated */ - newblock = newex.ee_start; -+ allocated = newex.ee_len; - set_buffer_new(bh_result); - - ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -@@ -2339,6 +2357,9 @@ void ext3_ext_truncate(struct inode * in - down(&EXT3_I(inode)->truncate_sem); - ext3_ext_invalidate_cache(&tree); - -+ /* it's important to discard preallocations under truncate_sem */ -+ ext3_mb_discard_inode_preallocations(inode); -+ - /* - * TODO: optimization is possible here - * probably we need not scaning at all, -Index: linux-2.6.9-full/fs/ext3/Makefile -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/Makefile 2007-03-28 01:29:38.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/Makefile 2007-03-28 15:45:41.000000000 +0400 -@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o \ -- extents.o -+ extents.o mballoc.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-2.6.9-full/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/xattr.c 2006-05-18 23:57:04.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/xattr.c 2007-03-28 15:45:41.000000000 +0400 -@@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle, - new_bh = sb_getblk(sb, block); - if (!new_bh) { - getblk_failed: -- ext3_free_blocks(handle, inode, block, 1); -+ ext3_free_blocks(handle, inode, block, 1, 1); - error = -EIO; - goto cleanup; - } -@@ -1328,7 +1328,7 @@ getblk_failed: - if (ce) - mb_cache_entry_free(ce); - ea_bdebug(old_bh, "freeing"); -- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); -+ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1); - - /* ext3_forget() calls bforget() for us, but we - let our caller release old_bh, so we need to -@@ -1427,7 +1427,7 @@ ext3_xattr_delete_inode(handle_t *handle - if (HDR(bh)->h_refcount == cpu_to_le32(1)) { - if (ce) - mb_cache_entry_free(ce); -- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); -+ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1); - get_bh(bh); - ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); - } else { -Index: linux-2.6.9-full/fs/ext3/balloc.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/balloc.c 2006-03-10 18:20:03.000000000 +0300 -+++ linux-2.6.9-full/fs/ext3/balloc.c 2007-03-28 15:45:41.000000000 +0400 -@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_ - * - * Return buffer_head on success or NULL in case of failure. - */ --static struct buffer_head * -+struct buffer_head * - read_block_bitmap(struct super_block *sb, unsigned int block_group) - { - struct ext3_group_desc * desc; -@@ -267,6 +267,8 @@ void ext3_discard_reservation(struct ino - struct reserve_window_node *rsv = &ei->i_rsv_window; - spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; - -+ ext3_mb_discard_inode_preallocations(inode); -+ - if (!rsv_is_empty(&rsv->rsv_window)) { - spin_lock(rsv_lock); - if (!rsv_is_empty(&rsv->rsv_window)) -@@ -451,21 +453,25 @@ error_return: - return; - } - --/* Free given blocks, update quota and i_blocks field */ --void ext3_free_blocks(handle_t *handle, struct inode *inode, -- unsigned long block, unsigned long count) -+void ext3_free_blocks(handle_t *handle, struct inode * inode, -+ unsigned long block, unsigned long count, int metadata) - { -- struct super_block * sb; -- int dquot_freed_blocks; -+ struct super_block *sb; -+ int freed; -+ -+ /* this isn't the right place to decide whether block is metadata -+ * inode.c/extents.c knows better, but for safety ... */ -+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || -+ ext3_should_journal_data(inode)) -+ metadata = 1; - - sb = inode->i_sb; -- if (!sb) { -- printk ("ext3_free_blocks: nonexistent device"); -- return; -- } -- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); -- if (dquot_freed_blocks) -- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); -+ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) -+ ext3_free_blocks_sb(handle, sb, block, count, &freed); -+ else -+ ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); -+ if (freed) -+ DQUOT_FREE_BLOCK(inode, freed); - return; - } - -@@ -1131,7 +1137,7 @@ int ext3_should_retry_alloc(struct super - * bitmap, and then for any free bit if that fails. - * This function also updates quota and i_blocks field. - */ --int ext3_new_block(handle_t *handle, struct inode *inode, -+int ext3_new_block_old(handle_t *handle, struct inode *inode, - unsigned long goal, int *errp) - { - struct buffer_head *bitmap_bh = NULL; -Index: linux-2.6.9-full/fs/ext3/inode.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/inode.c 2007-03-28 01:29:39.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/inode.c 2007-03-28 15:45:41.000000000 +0400 -@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h - ext3_journal_forget(handle, branch[i].bh); - } - for (i = 0; i < keys; i++) -- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); -+ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 0); - return err; - } - -@@ -673,7 +673,7 @@ err_out: - if (err == -EAGAIN) - for (i = 0; i < num; i++) - ext3_free_blocks(handle, inode, -- le32_to_cpu(where[i].key), 1); -+ le32_to_cpu(where[i].key), 1, 0); - return err; - } - -@@ -1834,7 +1834,7 @@ ext3_clear_blocks(handle_t *handle, stru - } - } - -- ext3_free_blocks(handle, inode, block_to_free, count); -+ ext3_free_blocks(handle, inode, block_to_free, count, 0); - } - - /** -@@ -2007,7 +2007,7 @@ static void ext3_free_branches(handle_t - ext3_journal_test_restart(handle, inode); - } - -- ext3_free_blocks(handle, inode, nr, 1); -+ ext3_free_blocks(handle, inode, nr, 1, 1); - - if (parent_bh) { - /* diff --git a/lustre/kernel_patches/patches/ext3-mballoc3-sles10.patch b/lustre/kernel_patches/patches/ext3-mballoc3-sles10.patch deleted file mode 100644 index 373f0c6680..0000000000 --- a/lustre/kernel_patches/patches/ext3-mballoc3-sles10.patch +++ /dev/null @@ -1,377 +0,0 @@ -Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs_i.h 2007-03-28 05:12:50.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs_i.h 2007-03-28 16:03:20.000000000 +0400 -@@ -135,6 +135,10 @@ struct ext3_inode_info { - struct inode vfs_inode; - - __u32 i_cached_extent[4]; -+ -+ /* mballoc */ -+ struct list_head i_prealloc_list; -+ spinlock_t i_prealloc_lock; - }; - - #endif /* _LINUX_EXT3_FS_I */ -Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs_sb.h 2007-03-28 16:03:19.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs_sb.h 2007-03-28 16:03:20.000000000 +0400 -@@ -21,8 +21,15 @@ - #include <linux/wait.h> - #include <linux/blockgroup_lock.h> - #include <linux/percpu_counter.h> -+#include <linux/list.h> - #endif - #include <linux/rbtree.h> -+#include <linux/proc_fs.h> -+ -+struct ext3_buddy_group_blocks; -+struct ext3_locality_group; -+struct ext3_mb_history; -+#define EXT3_BB_MAX_BLOCKS - - /* - * third extended-fs super-block data in memory -Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs.h 2007-03-28 16:03:19.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs.h 2007-03-28 16:03:20.000000000 +0400 -@@ -407,6 +407,7 @@ struct ext3_inode { - #define EXT3_MOUNT_IOPEN_NOPRIV 0x800000/* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x1000000/* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x2000000/* Extents debug */ -+#define EXT3_MOUNT_MBALLOC 0x4000000/* Buddy allocation support */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -767,8 +768,9 @@ struct dir_private_info { - extern int ext3_bg_has_super(struct super_block *sb, int group); - extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); - extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); -+extern int ext3_new_block_old (handle_t *, struct inode *, unsigned long, int *); - extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, -- unsigned long); -+ unsigned long, int); - extern void ext3_free_blocks_sb (handle_t *, struct super_block *, - unsigned long, unsigned long, int *); - extern unsigned long ext3_count_free_blocks (struct super_block *); -Index: linux-2.6.16.27-0.9-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/super.c 2007-03-28 16:03:19.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/fs/ext3/super.c 2007-03-28 16:03:20.000000000 +0400 -@@ -688,6 +688,7 @@ enum { - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - Opt_extents, Opt_noextents, Opt_extdebug, -+ Opt_mballoc, Opt_nomballoc, Opt_stripe, - Opt_grpquota - }; - -@@ -743,6 +744,9 @@ static match_table_t tokens = { - {Opt_noextents, "noextents"}, - {Opt_extdebug, "extdebug"}, - {Opt_barrier, "barrier=%u"}, -+ {Opt_mballoc, "mballoc"}, -+ {Opt_nomballoc, "nomballoc"}, -+ {Opt_stripe, "stripe=%u"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, - }; -@@ -1092,6 +1096,19 @@ clear_qf_name: - case Opt_extdebug: - set_opt (sbi->s_mount_opt, EXTDEBUG); - break; -+ case Opt_mballoc: -+ set_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_nomballoc: -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_stripe: -+ if (match_int(&args[0], &option)) -+ return 0; -+ if (option < 0) -+ return 0; -+ sbi->s_stripe = option; -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1819,6 +1836,7 @@ static int ext3_fill_super (struct super - ext3_count_dirs(sb)); - - ext3_ext_init(sb); -+ ext3_mb_init(sb, needs_recovery); - lock_kernel(); - return 0; - -Index: linux-2.6.16.27-0.9-full/fs/ext3/extents.c -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/extents.c 2007-03-28 05:13:39.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/fs/ext3/extents.c 2007-03-28 16:03:20.000000000 +0400 -@@ -779,7 +779,7 @@ cleanup: - for (i = 0; i < depth; i++) { - if (!ablocks[i]) - continue; -- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); - } - } - kfree(ablocks); -@@ -1586,7 +1586,7 @@ int ext3_ext_rm_idx(handle_t *handle, st - path->p_idx->ei_leaf); - bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); - ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); - return err; - } - -@@ -2071,10 +2071,12 @@ ext3_remove_blocks(struct ext3_extents_t - int needed = ext3_remove_blocks_credits(tree, ex, from, to); - handle_t *handle = ext3_journal_start(tree->inode, needed); - struct buffer_head *bh; -- int i; -+ int i, metadata = 0; - - if (IS_ERR(handle)) - return PTR_ERR(handle); -+ if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) -+ metadata = 1; - if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { - /* tail removal */ - unsigned long num, start; -@@ -2086,7 +2088,7 @@ ext3_remove_blocks(struct ext3_extents_t - bh = sb_find_get_block(tree->inode->i_sb, start + i); - ext3_forget(handle, 0, tree->inode, bh, start + i); - } -- ext3_free_blocks(handle, tree->inode, start, num); -+ ext3_free_blocks(handle, tree->inode, start, num, metadata); - } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { - printk("strange request: removal %lu-%lu from %u:%u\n", - from, to, ex->ee_block, ex->ee_len); -@@ -2177,11 +2179,8 @@ int ext3_ext_get_block(handle_t *handle, - struct ext3_extent *ex; - int goal, newblock, err = 0, depth; - struct ext3_extents_tree tree; -- unsigned long next; -- int allocated = 0; -- -- /* until we have multiblock allocation */ -- max_blocks = 1; -+ unsigned long allocated = 0; -+ struct ext3_allocation_request ar; - - clear_buffer_new(bh_result); - ext3_init_tree_desc(&tree, inode); -@@ -2253,18 +2252,33 @@ int ext3_ext_get_block(handle_t *handle, - goto out2; - } - -+ /* find neighbour allocated blocks */ -+ ar.lleft = iblock; -+ err = ext3_ext_search_left(&tree, path, &ar.lleft, &ar.pleft); -+ if (err) -+ goto out2; -+ ar.lright = iblock; -+ err = ext3_ext_search_right(&tree, path, &ar.lright, &ar.pright); -+ if (err) -+ goto out2; -+ - /* find next allocated block so that we know how many - * blocks we can allocate without ovelapping next extent */ -- EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len); -- next = ext3_ext_next_allocated_block(path); -- EXT_ASSERT(next > iblock); -- allocated = next - iblock; -+ EXT_ASSERT(ar.pright == 0 || ar.lright > iblock); -+ if (ar.pright == 0) -+ allocated = EXT_MAX_BLOCK - iblock; -+ else -+ allocated = ar.lright - iblock; - if (allocated > max_blocks) - allocated = max_blocks; - - /* allocate new block */ -- goal = ext3_ext_find_goal(inode, path, iblock); -- newblock = ext3_new_block(handle, inode, goal, &err); -+ ar.inode = inode; -+ ar.goal = ext3_ext_find_goal(inode, path, iblock); -+ ar.logical = iblock; -+ ar.len = allocated; -+ ar.flags = EXT3_MB_HINT_DATA; -+ newblock = ext3_mb_new_blocks(handle, &ar, &err); - if (!newblock) - goto out2; - ext_debug(&tree, "allocate new block: goal %d, found %d\n", -@@ -2274,11 +2288,14 @@ int ext3_ext_get_block(handle_t *handle, - newex.ee_block = iblock; - newex.ee_start = newblock; - newex.ee_start_hi = 0; -- newex.ee_len = 1; -+ newex.ee_len = ar.len; - err = ext3_ext_insert_extent(handle, &tree, path, &newex); - if (err) { - /* free data blocks we just allocated */ -- ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len); -+ /* not a good idea to call discard here directly, -+ * but otherwise we'd need to call it every free() */ -+ ext3_mb_discard_inode_preallocations(inode); -+ ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len, 0); - goto out2; - } - -@@ -2287,6 +2304,7 @@ int ext3_ext_get_block(handle_t *handle, - - /* previous routine could use block we allocated */ - newblock = newex.ee_start; -+ allocated = newex.ee_len; - set_buffer_new(bh_result); - - ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -@@ -2339,6 +2357,9 @@ void ext3_ext_truncate(struct inode * in - down(&EXT3_I(inode)->truncate_sem); - ext3_ext_invalidate_cache(&tree); - -+ /* it's important to discard preallocations under truncate_sem */ -+ ext3_mb_discard_inode_preallocations(inode); -+ - /* - * TODO: optimization is possible here - * probably we need not scaning at all, -Index: linux-2.6.16.27-0.9-full/fs/ext3/Makefile -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/Makefile 2007-03-28 05:12:50.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/fs/ext3/Makefile 2007-03-28 16:03:20.000000000 +0400 -@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o \ -- extents.o -+ extents.o mballoc.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-2.6.16.27-0.9-full/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/xattr.c 2007-03-13 02:56:52.000000000 +0300 -+++ linux-2.6.16.27-0.9-full/fs/ext3/xattr.c 2007-03-28 16:03:20.000000000 +0400 -@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl - ea_bdebug(bh, "refcount now=0; freeing"); - if (ce) - mb_cache_entry_free(ce); -- ext3_free_blocks(handle, inode, bh->b_blocknr, 1); -+ ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1); - get_bh(bh); - ext3_forget(handle, 1, inode, bh, bh->b_blocknr); - } else { -@@ -804,7 +804,7 @@ inserted: - new_bh = sb_getblk(sb, block); - if (!new_bh) { - getblk_failed: -- ext3_free_blocks(handle, inode, block, 1); -+ ext3_free_blocks(handle, inode, block, 1, 1); - error = -EIO; - goto cleanup; - } -Index: linux-2.6.16.27-0.9-full/fs/ext3/balloc.c -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/balloc.c 2007-03-13 02:56:52.000000000 +0300 -+++ linux-2.6.16.27-0.9-full/fs/ext3/balloc.c 2007-03-28 16:03:20.000000000 +0400 -@@ -80,7 +80,7 @@ struct ext3_group_desc * ext3_get_group_ - * - * Return buffer_head on success or NULL in case of failure. - */ --static struct buffer_head * -+struct buffer_head * - read_block_bitmap(struct super_block *sb, unsigned int block_group) - { - struct ext3_group_desc * desc; -@@ -296,6 +296,8 @@ void ext3_discard_reservation(struct ino - struct ext3_reserve_window_node *rsv; - spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; - -+ ext3_mb_discard_inode_preallocations(inode); -+ - if (!block_i) - return; - -@@ -491,21 +493,25 @@ error_return: - return; - } - --/* Free given blocks, update quota and i_blocks field */ --void ext3_free_blocks(handle_t *handle, struct inode *inode, -- unsigned long block, unsigned long count) -+void ext3_free_blocks(handle_t *handle, struct inode * inode, -+ unsigned long block, unsigned long count, int metadata) - { -- struct super_block * sb; -- int dquot_freed_blocks; -+ struct super_block *sb; -+ int freed; -+ -+ /* this isn't the right place to decide whether block is metadata -+ * inode.c/extents.c knows better, but for safety ... */ -+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || -+ ext3_should_journal_data(inode)) -+ metadata = 1; - - sb = inode->i_sb; -- if (!sb) { -- printk ("ext3_free_blocks: nonexistent device"); -- return; -- } -- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); -- if (dquot_freed_blocks) -- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); -+ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) -+ ext3_free_blocks_sb(handle, sb, block, count, &freed); -+ else -+ ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); -+ if (freed) -+ DQUOT_FREE_BLOCK(inode, freed); - return; - } - -@@ -1154,7 +1160,7 @@ int ext3_should_retry_alloc(struct super - * bitmap, and then for any free bit if that fails. - * This function also updates quota and i_blocks field. - */ --int ext3_new_block(handle_t *handle, struct inode *inode, -+int ext3_new_block_old(handle_t *handle, struct inode *inode, - unsigned long goal, int *errp) - { - struct buffer_head *bitmap_bh = NULL; -Index: linux-2.6.16.27-0.9-full/fs/ext3/inode.c -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/inode.c 2007-03-28 05:13:38.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/fs/ext3/inode.c 2007-03-28 16:03:20.000000000 +0400 -@@ -568,7 +568,7 @@ static int ext3_alloc_branch(handle_t *h - ext3_journal_forget(handle, branch[i].bh); - } - for (i = 0; i < keys; i++) -- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); -+ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 0); - return err; - } - -@@ -1865,7 +1865,7 @@ ext3_clear_blocks(handle_t *handle, stru - } - } - -- ext3_free_blocks(handle, inode, block_to_free, count); -+ ext3_free_blocks(handle, inode, block_to_free, count, 0); - } - - /** -@@ -2038,7 +2038,7 @@ static void ext3_free_branches(handle_t - ext3_journal_test_restart(handle, inode); - } - -- ext3_free_blocks(handle, inode, nr, 1); -+ ext3_free_blocks(handle, inode, nr, 1, 1); - - if (parent_bh) { - /* diff --git a/lustre/kernel_patches/patches/ext3-mballoc3-suse.patch b/lustre/kernel_patches/patches/ext3-mballoc3-suse.patch deleted file mode 100644 index dd07148688..0000000000 --- a/lustre/kernel_patches/patches/ext3-mballoc3-suse.patch +++ /dev/null @@ -1,397 +0,0 @@ -Index: linux-2.6.5-7.283-full/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs_i.h 2007-03-28 02:13:37.000000000 +0400 -+++ linux-2.6.5-7.283-full/include/linux/ext3_fs_i.h 2007-03-28 15:46:02.000000000 +0400 -@@ -131,6 +131,10 @@ struct ext3_inode_info { - struct inode vfs_inode; - - struct ext3_ext_cache i_cached_extent; -+ -+ /* mballoc */ -+ struct list_head i_prealloc_list; -+ spinlock_t i_prealloc_lock; - }; - - #endif /* _LINUX_EXT3_FS_I */ -Index: linux-2.6.5-7.283-full/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs_sb.h 2007-03-28 15:46:00.000000000 +0400 -+++ linux-2.6.5-7.283-full/include/linux/ext3_fs_sb.h 2007-03-28 15:46:02.000000000 +0400 -@@ -23,9 +23,16 @@ - #define EXT_INCLUDE - #include <linux/blockgroup_lock.h> - #include <linux/percpu_counter.h> -+#include <linux/list.h> - #endif - #endif - #include <linux/rbtree.h> -+#include <linux/proc_fs.h> -+ -+struct ext3_buddy_group_blocks; -+struct ext3_locality_group; -+struct ext3_mb_history; -+#define EXT3_BB_MAX_BLOCKS - - /* - * third extended-fs super-block data in memory -Index: linux-2.6.5-7.283-full/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs.h 2007-03-28 15:46:00.000000000 +0400 -+++ linux-2.6.5-7.283-full/include/linux/ext3_fs.h 2007-03-28 15:46:02.000000000 +0400 -@@ -363,6 +363,7 @@ struct ext3_inode { - #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ -+#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -723,8 +724,9 @@ struct dir_private_info { - extern int ext3_bg_has_super(struct super_block *sb, int group); - extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); - extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); -+extern int ext3_new_block_old (handle_t *, struct inode *, unsigned long, int *); - extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, -- unsigned long); -+ unsigned long, int); - extern unsigned long ext3_count_free_blocks (struct super_block *); - extern void ext3_check_blocks_bitmap (struct super_block *); - extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, -Index: linux-2.6.5-7.283-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/super.c 2007-03-28 15:46:00.000000000 +0400 -+++ linux-2.6.5-7.283-full/fs/ext3/super.c 2007-03-28 15:46:02.000000000 +0400 -@@ -622,6 +622,7 @@ enum { - Opt_err, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - Opt_extents, Opt_noextents, Opt_extdebug, -+ Opt_mballoc, Opt_nomballoc, Opt_stripe, - }; - - static match_table_t tokens = { -@@ -669,6 +670,9 @@ static match_table_t tokens = { - {Opt_noextents, "noextents"}, - {Opt_extdebug, "extdebug"}, - {Opt_barrier, "barrier=%u"}, -+ {Opt_mballoc, "mballoc"}, -+ {Opt_nomballoc, "nomballoc"}, -+ {Opt_stripe, "stripe=%u"}, - {Opt_err, NULL} - }; - -@@ -893,6 +897,19 @@ static int parse_options (char * options - case Opt_extdebug: - set_opt (sbi->s_mount_opt, EXTDEBUG); - break; -+ case Opt_mballoc: -+ set_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_nomballoc: -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ break; -+ case Opt_stripe: -+ if (match_int(&args[0], &option)) -+ return 0; -+ if (option < 0) -+ return 0; -+ sbi->s_stripe = option; -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1548,6 +1565,7 @@ static int ext3_fill_super (struct super - ext3_count_dirs(sb)); - - ext3_ext_init(sb); -+ ext3_mb_init(sb, needs_recovery); - - return 0; - -Index: linux-2.6.5-7.283-full/fs/ext3/extents.c -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/extents.c 2007-03-28 03:18:19.000000000 +0400 -+++ linux-2.6.5-7.283-full/fs/ext3/extents.c 2007-03-28 15:46:02.000000000 +0400 -@@ -779,7 +779,7 @@ cleanup: - for (i = 0; i < depth; i++) { - if (!ablocks[i]) - continue; -- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); -+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); - } - } - kfree(ablocks); -@@ -1586,7 +1586,7 @@ int ext3_ext_rm_idx(handle_t *handle, st - path->p_idx->ei_leaf); - bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); - ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); -- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); -+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); - return err; - } - -@@ -2071,10 +2071,12 @@ ext3_remove_blocks(struct ext3_extents_t - int needed = ext3_remove_blocks_credits(tree, ex, from, to); - handle_t *handle = ext3_journal_start(tree->inode, needed); - struct buffer_head *bh; -- int i; -+ int i, metadata = 0; - - if (IS_ERR(handle)) - return PTR_ERR(handle); -+ if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) -+ metadata = 1; - if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { - /* tail removal */ - unsigned long num, start; -@@ -2086,7 +2088,7 @@ ext3_remove_blocks(struct ext3_extents_t - bh = sb_find_get_block(tree->inode->i_sb, start + i); - ext3_forget(handle, 0, tree->inode, bh, start + i); - } -- ext3_free_blocks(handle, tree->inode, start, num); -+ ext3_free_blocks(handle, tree->inode, start, num, metadata); - } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { - printk("strange request: removal %lu-%lu from %u:%u\n", - from, to, ex->ee_block, ex->ee_len); -@@ -2177,11 +2179,8 @@ int ext3_ext_get_block(handle_t *handle, - struct ext3_extent *ex; - int goal, newblock, err = 0, depth; - struct ext3_extents_tree tree; -- unsigned long next; -- int allocated = 0; -- -- /* until we have multiblock allocation */ -- max_blocks = 1; -+ unsigned long allocated = 0; -+ struct ext3_allocation_request ar; - - __clear_bit(BH_New, &bh_result->b_state); - ext3_init_tree_desc(&tree, inode); -@@ -2253,18 +2252,33 @@ int ext3_ext_get_block(handle_t *handle, - goto out2; - } - -+ /* find neighbour allocated blocks */ -+ ar.lleft = iblock; -+ err = ext3_ext_search_left(&tree, path, &ar.lleft, &ar.pleft); -+ if (err) -+ goto out2; -+ ar.lright = iblock; -+ err = ext3_ext_search_right(&tree, path, &ar.lright, &ar.pright); -+ if (err) -+ goto out2; -+ - /* find next allocated block so that we know how many - * blocks we can allocate without ovelapping next extent */ -- EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len); -- next = ext3_ext_next_allocated_block(path); -- EXT_ASSERT(next > iblock); -- allocated = next - iblock; -+ EXT_ASSERT(ar.pright == 0 || ar.lright > iblock); -+ if (ar.pright == 0) -+ allocated = EXT_MAX_BLOCK - iblock; -+ else -+ allocated = ar.lright - iblock; - if (allocated > max_blocks) - allocated = max_blocks; - - /* allocate new block */ -- goal = ext3_ext_find_goal(inode, path, iblock); -- newblock = ext3_new_block(handle, inode, goal, &err); -+ ar.inode = inode; -+ ar.goal = ext3_ext_find_goal(inode, path, iblock); -+ ar.logical = iblock; -+ ar.len = allocated; -+ ar.flags = EXT3_MB_HINT_DATA; -+ newblock = ext3_mb_new_blocks(handle, &ar, &err); - if (!newblock) - goto out2; - ext_debug(&tree, "allocate new block: goal %d, found %d\n", -@@ -2274,11 +2288,14 @@ int ext3_ext_get_block(handle_t *handle, - newex.ee_block = iblock; - newex.ee_start = newblock; - newex.ee_start_hi = 0; -- newex.ee_len = 1; -+ newex.ee_len = ar.len; - err = ext3_ext_insert_extent(handle, &tree, path, &newex); - if (err) { - /* free data blocks we just allocated */ -- ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len); -+ /* not a good idea to call discard here directly, -+ * but otherwise we'd need to call it every free() */ -+ ext3_mb_discard_inode_preallocations(inode); -+ ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len, 0); - goto out2; - } - -@@ -2287,6 +2304,7 @@ int ext3_ext_get_block(handle_t *handle, - - /* previous routine could use block we allocated */ - newblock = newex.ee_start; -+ allocated = newex.ee_len; - __set_bit(BH_New, &bh_result->b_state); - - ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, -@@ -2341,6 +2359,9 @@ void ext3_ext_truncate(struct inode * in - down(&EXT3_I(inode)->truncate_sem); - ext3_ext_invalidate_cache(&tree); - -+ /* it's important to discard preallocations under truncate_sem */ -+ ext3_mb_discard_inode_preallocations(inode); -+ - /* - * TODO: optimization is possible here - * probably we need not scaning at all, -Index: linux-2.6.5-7.283-full/fs/ext3/Makefile -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/Makefile 2007-03-28 15:27:39.000000000 +0400 -+++ linux-2.6.5-7.283-full/fs/ext3/Makefile 2007-03-28 15:46:02.000000000 +0400 -@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o \ -- extents.o -+ extents.o mballoc.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-2.6.5-7.283-full/fs/ext3/xattr.c -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/xattr.c 2007-03-28 02:13:37.000000000 +0400 -+++ linux-2.6.5-7.283-full/fs/ext3/xattr.c 2007-03-28 15:46:02.000000000 +0400 -@@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle, - new_bh = sb_getblk(sb, block); - if (!new_bh) { - getblk_failed: -- ext3_free_blocks(handle, inode, block, 1); -+ ext3_free_blocks(handle, inode, block, 1, 1); - error = -EIO; - goto cleanup; - } -@@ -1411,7 +1411,7 @@ getblk_failed: - if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { - /* Free the old block. */ - ea_bdebug(old_bh, "freeing"); -- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); -+ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1); - - /* ext3_forget() calls bforget() for us, but we - let our caller release old_bh, so we need to -@@ -1519,7 +1519,7 @@ ext3_xattr_delete_inode(handle_t *handle - mb_cache_entry_free(ce); - ce = NULL; - } -- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); -+ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1); - get_bh(bh); - ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); - } else { -Index: linux-2.6.5-7.283-full/fs/ext3/balloc.c -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/balloc.c 2006-12-01 18:39:48.000000000 +0300 -+++ linux-2.6.5-7.283-full/fs/ext3/balloc.c 2007-03-28 15:46:02.000000000 +0400 -@@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_ - * - * Return buffer_head on success or NULL in case of failure. - */ --static struct buffer_head * -+struct buffer_head * - read_block_bitmap(struct super_block *sb, unsigned int block_group) - { - struct ext3_group_desc * desc; -@@ -266,6 +266,8 @@ void ext3_discard_reservation(struct ino - struct reserve_window_node *rsv = &ei->i_rsv_window; - spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; - -+ ext3_mb_discard_inode_preallocations(inode); -+ - if (!rsv_is_empty(&rsv->rsv_window)) { - spin_lock(rsv_lock); - rsv_window_remove(inode->i_sb, rsv); -@@ -274,7 +276,7 @@ void ext3_discard_reservation(struct ino - } - - /* Free given blocks, update quota and i_blocks field */ --void ext3_free_blocks(handle_t *handle, struct inode *inode, -+void ext3_free_blocks_old(handle_t *handle, struct inode *inode, - unsigned long block, unsigned long count) - { - struct buffer_head *bitmap_bh = NULL; -@@ -456,6 +458,29 @@ error_return: - return; - } - -+void ext3_free_blocks(handle_t *handle, struct inode * inode, -+ unsigned long block, unsigned long count, int metadata) -+{ -+ struct super_block *sb; -+ int freed; -+ -+ /* this isn't the right place to decide whether block is metadata -+ * inode.c/extents.c knows better, but for safety ... */ -+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || -+ ext3_should_journal_data(inode)) -+ metadata = 1; -+ -+ sb = inode->i_sb; -+ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) -+ ext3_free_blocks_old(handle, inode, block, count); -+ else { -+ ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); -+ if (freed) -+ DQUOT_FREE_BLOCK(inode, freed); -+ } -+ return; -+} -+ - /* - * For ext3 allocations, we must not reuse any blocks which are - * allocated in the bitmap buffer's "last committed data" copy. This -@@ -1142,7 +1167,7 @@ int ext3_should_retry_alloc(struct super - * bitmap, and then for any free bit if that fails. - * This function also updates quota and i_blocks field. - */ --int ext3_new_block(handle_t *handle, struct inode *inode, -+int ext3_new_block_old(handle_t *handle, struct inode *inode, - unsigned long goal, int *errp) - { - struct buffer_head *bitmap_bh = NULL; -Index: linux-2.6.5-7.283-full/fs/ext3/inode.c -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/inode.c 2007-03-28 02:50:19.000000000 +0400 -+++ linux-2.6.5-7.283-full/fs/ext3/inode.c 2007-03-28 15:46:02.000000000 +0400 -@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h - ext3_journal_forget(handle, branch[i].bh); - } - for (i = 0; i < keys; i++) -- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); -+ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 0); - return err; - } - -@@ -675,7 +675,7 @@ err_out: - if (err == -EAGAIN) - for (i = 0; i < num; i++) - ext3_free_blocks(handle, inode, -- le32_to_cpu(where[i].key), 1); -+ le32_to_cpu(where[i].key), 1, 0); - return err; - } - -@@ -1839,7 +1839,7 @@ ext3_clear_blocks(handle_t *handle, stru - } - } - -- ext3_free_blocks(handle, inode, block_to_free, count); -+ ext3_free_blocks(handle, inode, block_to_free, count, 0); - } - - /** -@@ -2010,7 +2010,7 @@ static void ext3_free_branches(handle_t - ext3_journal_test_restart(handle, inode); - } - -- ext3_free_blocks(handle, inode, nr, 1); -+ ext3_free_blocks(handle, inode, nr, 1, 1); - - if (parent_bh) { - /* diff --git a/lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6-fc5.patch b/lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6-fc5.patch deleted file mode 100644 index 0c41b47614..0000000000 --- a/lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6-fc5.patch +++ /dev/null @@ -1,381 +0,0 @@ -Index: mmp/fs/ext3/al.h -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ mmp/fs/ext3/al.h 2006-07-24 10:39:26.000000000 +0800 -@@ -0,0 +1,11 @@ -+/* -+ * (C) 2006 Qi Yong <qiyong@clusterfs.com> -+ */ -+ -+#define ALIVE_MAGIC 0xA1153C29 -+struct alive_struct { -+ __le32 al_magic; -+ __le32 al_seq; -+ __le32 al_time; -+ char al_nodename[65]; -+}; -Index: mmp/fs/ext3/namei.c -=================================================================== ---- mmp.orig/fs/ext3/namei.c 2006-07-24 10:34:41.000000000 +0800 -+++ mmp/fs/ext3/namei.c 2006-07-24 10:39:26.000000000 +0800 -@@ -805,7 +805,7 @@ static inline int search_dirblock(struct - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ --static struct buffer_head * ext3_find_entry (struct dentry *dentry, -+struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { - struct super_block * sb; -Index: mmp/fs/ext3/super.c -=================================================================== ---- mmp.orig/fs/ext3/super.c 2006-07-24 10:34:41.000000000 +0800 -+++ mmp/fs/ext3/super.c 2006-07-24 10:45:19.000000000 +0800 -@@ -36,12 +36,14 @@ - #include <linux/namei.h> - #include <linux/quotaops.h> - #include <linux/seq_file.h> -+#include <linux/kthread.h> - - #include <asm/uaccess.h> - - #include "xattr.h" - #include "acl.h" - #include "namei.h" -+#include "al.h" - - static int ext3_load_journal(struct super_block *, struct ext3_super_block *, - unsigned long journal_devnum); -@@ -62,6 +64,8 @@ static int ext3_statfs (struct super_blo - static void ext3_unlockfs(struct super_block *sb); - static void ext3_write_super (struct super_block * sb); - static void ext3_write_super_lockfs(struct super_block *sb); -+struct buffer_head * ext3_find_entry (struct dentry *dentry, -+ struct ext3_dir_entry_2 ** res_dir); - - /* - * Wrappers for journal_start/end. -@@ -435,6 +439,9 @@ static void ext3_put_super (struct super - invalidate_bdev(sbi->journal_bdev, 0); - ext3_blkdev_remove(sbi); - } -+ if (sbi->s_alive_tsk) -+ kthread_stop(sbi->s_alive_tsk); -+ - sb->s_fs_info = NULL; - kfree(sbi); - return; -@@ -1369,6 +1376,261 @@ static unsigned long descriptor_loc(stru - return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); - } - -+static int write_alive(struct buffer_head * bh) -+{ -+ lock_buffer(bh); -+ bh->b_end_io = end_buffer_write_sync; -+ get_bh(bh); -+ submit_bh(WRITE, bh); -+ wait_on_buffer(bh); -+ if (unlikely(!buffer_uptodate(bh))) -+ return 1; -+ return 0; -+} -+ -+static int read_alive_again(struct buffer_head * bh) -+{ -+ lock_buffer(bh); -+ bh->b_end_io = end_buffer_read_sync; -+ get_bh(bh); -+ submit_bh(READ, bh); -+ wait_on_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ brelse(bh); -+ return 1; -+ } -+ return 0; -+} -+ -+/* -+ * The caller must have a ref on the buffer_head. -+ */ -+static int kalived(void *data) -+{ -+ struct buffer_head * bh; -+ struct alive_struct * alive; -+ char b[BDEVNAME_SIZE]; -+ u32 seq = 0; -+ -+ bh = (struct buffer_head *)data; -+ bdevname(bh->b_bdev, b); -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ alive->al_magic = cpu_to_le32(ALIVE_MAGIC); -+ alive->al_time = cpu_to_le32(get_seconds()); -+ -+ down_read(&uts_sem); -+ memcpy(alive->al_nodename, system_utsname.nodename, 65); -+ up_read(&uts_sem); -+ -+ while (!kthread_should_stop()) { -+ if (++seq == 0) -+ ++seq; -+ -+ alive->al_seq = cpu_to_le32(seq); -+ alive->al_time = cpu_to_le32(get_seconds()); -+ -+ if (unlikely(write_alive(bh))) { -+ /* panic here? */ -+ printk(KERN_ERR "Alive (device %s): " -+ "can't write alive block\n", b); -+ continue; -+ } -+ -+ schedule_timeout_interruptible(5 * HZ); -+ } -+ -+ alive->al_seq = 0; -+ alive->al_time = cpu_to_le32(get_seconds()); -+ -+ if (unlikely(write_alive(bh))) -+ printk(KERN_ERR "Alive (device %s): " -+ "can't reset alive block\n", b); -+ brelse(bh); -+ return 0; -+} -+ -+static unsigned long get_alive_ino(struct super_block *sb) -+{ -+ unsigned long ino = 0; -+ struct dentry alive; -+ struct dentry * root; -+ struct inode * root_inode; -+ struct ext3_dir_entry_2 * de; -+ struct buffer_head * bh; -+ -+ root_inode = iget(sb, EXT3_ROOT_INO); -+ root = d_alloc_root(root_inode); -+ if (!root) { -+ printk(KERN_ERR "Alive (device %s): get root inode failed\n", -+ sb->s_id); -+ iput(root_inode); -+ goto out; -+ } -+ -+ alive.d_name.name = ".alive"; -+ alive.d_name.len = 6; -+ alive.d_parent = root; -+ -+ bh = ext3_find_entry(&alive, &de); -+ dput(root); -+ -+ if (!bh) { -+ printk(KERN_WARNING "Alive (device %s): alive lookup failed\n", -+ sb->s_id); -+ goto out; -+ } -+ -+ ino = le32_to_cpu(de->inode); -+ brelse (bh); -+ pr_debug("Alive (device %s): alive_ino=%lu\n", sb->s_id, ino); -+out: -+ return ino; -+} -+ -+/* check alive file */ -+static int check_alive(struct super_block *sb, struct ext3_sb_info *sbi) -+{ -+ unsigned long ino; -+ struct buffer_head * bh; -+ struct ext3_inode_info * ei; -+ struct inode * alive_inode; -+ struct alive_struct * alive; -+ u32 alive_block; -+ u32 seq; -+ -+ ino = get_alive_ino(sb); -+ if (!ino) -+ goto failed; -+ -+ alive_inode = iget(sb, ino); -+ if (!alive_inode) { -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): get alive inode failed\n", -+ sb->s_id); -+ goto failed; -+ } -+ if (!alive_inode->i_nlink) { -+ make_bad_inode(alive_inode); -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): alive inode is deleted\n", -+ sb->s_id); -+ goto failed; -+ } -+ if (!S_ISREG(alive_inode->i_mode)) { -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): invalid alive inode\n", -+ sb->s_id); -+ goto failed; -+ } -+ if (EXT3_I(alive_inode)->i_flags & EXT3_EXTENTS_FL) { -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): invalid alive inode, " -+ "in extents format\n", sb->s_id); -+ goto failed; -+ } -+ -+ ei = EXT3_I(alive_inode); -+ alive_block = ei->i_data[0]; -+ iput(alive_inode); -+ -+ pr_debug("Alive (device %s): read in alive block #%u\n", -+ sb->s_id, alive_block); -+ -+ /* first read */ -+ bh = sb_bread(sb, alive_block); -+ if (!bh) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't read alive block #%u\n", sb->s_id, alive_block); -+ goto failed; -+ } -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ if (le32_to_cpu(alive->al_magic) != ALIVE_MAGIC) { -+ printk(KERN_ERR "Alive (device %s): " -+ "magic mismatch\n", sb->s_id); -+ brelse(bh); -+ goto failed; -+ } -+ -+ seq = le32_to_cpu(alive->al_seq); -+ pr_debug("Alive (device %s): seq=%u\n", sb->s_id, seq); -+ pr_info ("Alive (device %s): last touched by node: %s, " -+ "%li seconds ago\n", sb->s_id, alive->al_nodename, -+ get_seconds() - le32_to_cpu(alive->al_time)); -+ -+ if (seq == 0) -+ goto skip; -+ -+ /* wait 8s */ -+ pr_info("Alive (device %s): wait for 8 seconds...\n", sb->s_id); -+ schedule_timeout_uninterruptible(HZ * 8); -+ -+ /* read again */ -+ if (read_alive_again(bh)) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't read alive block #%u\n", -+ sb->s_id, alive_block); -+ goto failed; -+ } -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ pr_debug("Alive (device %s): seq=%u\n", -+ sb->s_id, le32_to_cpu(alive->al_seq)); -+ -+ if (seq != le32_to_cpu(alive->al_seq)) { -+ printk(KERN_WARNING "Alive (device %s): " -+ "still active on node %s\n", -+ sb->s_id, alive->al_nodename); -+ brelse(bh); -+ goto failed; -+ } -+skip: -+ /* write a new random seq */ -+ get_random_bytes(&seq, sizeof(u32)); -+ alive->al_seq = cpu_to_le32(seq); -+ if (unlikely(write_alive(bh))) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't write alive block\n", sb->s_id); -+ goto failed; -+ } -+ pr_debug("Alive (device %s): write random seq=%u\n", sb->s_id, seq); -+ -+ /* wait 6s */ -+ pr_info("Alive (device %s): wait for 6 seconds...\n", sb->s_id); -+ schedule_timeout_uninterruptible(HZ * 6); -+ -+ /* read again */ -+ if (read_alive_again(bh)) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't read alive block #%u\n", -+ sb->s_id, alive_block); -+ goto failed; -+ } -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ pr_debug("Alive (device %s): seq=%u\n", -+ sb->s_id, le32_to_cpu(alive->al_seq)); -+ -+ if (seq != le32_to_cpu(alive->al_seq)) { -+ printk(KERN_WARNING "Alive (device %s): " -+ "still active on node %s\n", -+ sb->s_id, alive->al_nodename); -+ brelse(bh); -+ goto failed; -+ } -+ -+ /* succeed */ -+ pr_info("Alive (device %s): alive check passed!\n", sb->s_id); -+ sbi->s_alive_tsk = kthread_run(kalived, bh, "kalived"); -+ return 0; -+ -+failed: -+ printk(KERN_WARNING "Alive (device %s): alive check failed!\n", -+ sb->s_id); -+ return 1; -+} -+ - - static int ext3_fill_super (struct super_block *sb, void *data, int silent) - { -@@ -1668,6 +1930,10 @@ static int ext3_fill_super (struct super - EXT3_HAS_INCOMPAT_FEATURE(sb, - EXT3_FEATURE_INCOMPAT_RECOVER)); - -+ if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_ALIVE)) -+ if (check_alive(sb, sbi)) -+ goto failed_mount2; -+ - /* - * The first inode we look at is the journal inode. Don't try - * root first: it may be modified in the journal! -@@ -1785,6 +2051,8 @@ cantfind_ext3: - - failed_mount3: - journal_destroy(sbi->s_journal); -+ if (sbi->s_alive_tsk) -+ kthread_stop(sbi->s_alive_tsk); - failed_mount2: - for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); -Index: mmp/include/linux/ext3_fs.h -=================================================================== ---- mmp.orig/include/linux/ext3_fs.h 2006-07-24 10:34:41.000000000 +0800 -+++ mmp/include/linux/ext3_fs.h 2006-07-24 10:39:26.000000000 +0800 -@@ -581,12 +581,14 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 - #define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ -+#define EXT3_FEATURE_INCOMPAT_ALIVE 0x0080 - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ - EXT3_FEATURE_INCOMPAT_META_BG| \ -- EXT3_FEATURE_INCOMPAT_EXTENTS) -+ EXT3_FEATURE_INCOMPAT_EXTENTS| \ -+ EXT3_FEATURE_INCOMPAT_ALIVE) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -Index: mmp/include/linux/ext3_fs_sb.h -=================================================================== ---- mmp.orig/include/linux/ext3_fs_sb.h 2006-07-24 10:34:41.000000000 +0800 -+++ mmp/include/linux/ext3_fs_sb.h 2006-07-24 10:39:26.000000000 +0800 -@@ -86,6 +86,7 @@ struct ext3_sb_info { - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ - #endif -+ struct task_struct * s_alive_tsk; - - /* for buddy allocator */ - struct ext3_group_info **s_group_info; diff --git a/lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6.18-vanilla.patch deleted file mode 100644 index 989ca26c45..0000000000 --- a/lustre/kernel_patches/patches/ext3-multi-mount-protection-2.6.18-vanilla.patch +++ /dev/null @@ -1,381 +0,0 @@ -Index: mmp/fs/ext3/al.h -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ mmp/fs/ext3/al.h 2006-07-18 20:43:51.000000000 +0800 -@@ -0,0 +1,11 @@ -+/* -+ * (C) 2006 Qi Yong <qiyong@clusterfs.com> -+ */ -+ -+#define ALIVE_MAGIC 0xA1153C29 -+struct alive_struct { -+ __le32 al_magic; -+ __le32 al_seq; -+ __le32 al_time; -+ char al_nodename[65]; -+}; -Index: mmp/fs/ext3/namei.c -=================================================================== ---- mmp.orig/fs/ext3/namei.c 2006-07-18 20:43:51.000000000 +0800 -+++ mmp/fs/ext3/namei.c 2006-07-18 20:43:51.000000000 +0800 -@@ -805,7 +805,7 @@ static inline int search_dirblock(struct - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ --static struct buffer_head * ext3_find_entry (struct dentry *dentry, -+struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { - struct super_block * sb; -Index: mmp/fs/ext3/super.c -=================================================================== ---- mmp.orig/fs/ext3/super.c 2006-07-18 20:43:51.000000000 +0800 -+++ mmp/fs/ext3/super.c 2006-07-18 23:49:54.000000000 +0800 -@@ -35,12 +35,14 @@ - #include <linux/namei.h> - #include <linux/quotaops.h> - #include <linux/seq_file.h> -+#include <linux/kthread.h> - - #include <asm/uaccess.h> - - #include "xattr.h" - #include "acl.h" - #include "namei.h" -+#include "al.h" - - static int ext3_load_journal(struct super_block *, struct ext3_super_block *, - unsigned long journal_devnum); -@@ -61,6 +63,8 @@ static int ext3_statfs (struct dentry * - static void ext3_unlockfs(struct super_block *sb); - static void ext3_write_super (struct super_block * sb); - static void ext3_write_super_lockfs(struct super_block *sb); -+struct buffer_head * ext3_find_entry (struct dentry *dentry, -+ struct ext3_dir_entry_2 ** res_dir); - - /* - * Wrappers for journal_start/end. -@@ -434,6 +438,9 @@ static void ext3_put_super (struct super - invalidate_bdev(sbi->journal_bdev, 0); - ext3_blkdev_remove(sbi); - } -+ if (sbi->s_alive_tsk) -+ kthread_stop(sbi->s_alive_tsk); -+ - sb->s_fs_info = NULL; - kfree(sbi); - return; -@@ -1374,6 +1381,261 @@ static ext3_fsblk_t descriptor_loc(struc - return (has_super + ext3_group_first_block_no(sb, bg)); - } - -+static int write_alive(struct buffer_head * bh) -+{ -+ lock_buffer(bh); -+ bh->b_end_io = end_buffer_write_sync; -+ get_bh(bh); -+ submit_bh(WRITE, bh); -+ wait_on_buffer(bh); -+ if (unlikely(!buffer_uptodate(bh))) -+ return 1; -+ return 0; -+} -+ -+static int read_alive_again(struct buffer_head * bh) -+{ -+ lock_buffer(bh); -+ bh->b_end_io = end_buffer_read_sync; -+ get_bh(bh); -+ submit_bh(READ, bh); -+ wait_on_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ brelse(bh); -+ return 1; -+ } -+ return 0; -+} -+ -+/* -+ * The caller must have a ref on the buffer_head. -+ */ -+static int kalived(void *data) -+{ -+ struct buffer_head * bh; -+ struct alive_struct * alive; -+ char b[BDEVNAME_SIZE]; -+ u32 seq = 0; -+ -+ bh = (struct buffer_head *)data; -+ bdevname(bh->b_bdev, b); -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ alive->al_magic = cpu_to_le32(ALIVE_MAGIC); -+ alive->al_time = cpu_to_le32(get_seconds()); -+ -+ down_read(&uts_sem); -+ memcpy(alive->al_nodename, system_utsname.nodename, 65); -+ up_read(&uts_sem); -+ -+ while (!kthread_should_stop()) { -+ if (++seq == 0) -+ ++seq; -+ -+ alive->al_seq = cpu_to_le32(seq); -+ alive->al_time = cpu_to_le32(get_seconds()); -+ -+ if (unlikely(write_alive(bh))) { -+ /* panic here? */ -+ printk(KERN_ERR "Alive (device %s): " -+ "can't write alive block\n", b); -+ continue; -+ } -+ -+ schedule_timeout_interruptible(5 * HZ); -+ } -+ -+ alive->al_seq = 0; -+ alive->al_time = cpu_to_le32(get_seconds()); -+ -+ if (unlikely(write_alive(bh))) -+ printk(KERN_ERR "Alive (device %s): " -+ "can't reset alive block\n", b); -+ brelse(bh); -+ return 0; -+} -+ -+static unsigned long get_alive_ino(struct super_block *sb) -+{ -+ unsigned long ino = 0; -+ struct dentry alive; -+ struct dentry * root; -+ struct inode * root_inode; -+ struct ext3_dir_entry_2 * de; -+ struct buffer_head * bh; -+ -+ root_inode = iget(sb, EXT3_ROOT_INO); -+ root = d_alloc_root(root_inode); -+ if (!root) { -+ printk(KERN_ERR "Alive (device %s): get root inode failed\n", -+ sb->s_id); -+ iput(root_inode); -+ goto out; -+ } -+ -+ alive.d_name.name = ".alive"; -+ alive.d_name.len = 6; -+ alive.d_parent = root; -+ -+ bh = ext3_find_entry(&alive, &de); -+ dput(root); -+ -+ if (!bh) { -+ printk(KERN_WARNING "Alive (device %s): alive lookup failed\n", -+ sb->s_id); -+ goto out; -+ } -+ -+ ino = le32_to_cpu(de->inode); -+ brelse (bh); -+ pr_debug("Alive (device %s): alive_ino=%lu\n", sb->s_id, ino); -+out: -+ return ino; -+} -+ -+/* check alive file */ -+static int check_alive(struct super_block *sb, struct ext3_sb_info *sbi) -+{ -+ unsigned long ino; -+ struct buffer_head * bh; -+ struct ext3_inode_info * ei; -+ struct inode * alive_inode; -+ struct alive_struct * alive; -+ u32 alive_block; -+ u32 seq; -+ -+ ino = get_alive_ino(sb); -+ if (!ino) -+ goto failed; -+ -+ alive_inode = iget(sb, ino); -+ if (!alive_inode) { -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): get alive inode failed\n", -+ sb->s_id); -+ goto failed; -+ } -+ if (!alive_inode->i_nlink) { -+ make_bad_inode(alive_inode); -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): alive inode is deleted\n", -+ sb->s_id); -+ goto failed; -+ } -+ if (!S_ISREG(alive_inode->i_mode)) { -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): invalid alive inode\n", -+ sb->s_id); -+ goto failed; -+ } -+ if (EXT3_I(alive_inode)->i_flags & EXT3_EXTENTS_FL) { -+ iput(alive_inode); -+ printk(KERN_ERR "Alive (device %s): invalid alive inode, " -+ "in extents format\n", sb->s_id); -+ goto failed; -+ } -+ -+ ei = EXT3_I(alive_inode); -+ alive_block = ei->i_data[0]; -+ iput(alive_inode); -+ -+ pr_debug("Alive (device %s): read in alive block #%u\n", -+ sb->s_id, alive_block); -+ -+ /* first read */ -+ bh = sb_bread(sb, alive_block); -+ if (!bh) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't read alive block #%u\n", sb->s_id, alive_block); -+ goto failed; -+ } -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ if (le32_to_cpu(alive->al_magic) != ALIVE_MAGIC) { -+ printk(KERN_ERR "Alive (device %s): " -+ "magic mismatch\n", sb->s_id); -+ brelse(bh); -+ goto failed; -+ } -+ -+ seq = le32_to_cpu(alive->al_seq); -+ pr_debug("Alive (device %s): seq=%u\n", sb->s_id, seq); -+ pr_info ("Alive (device %s): last touched by node: %s, " -+ "%li seconds ago\n", sb->s_id, alive->al_nodename, -+ get_seconds() - le32_to_cpu(alive->al_time)); -+ -+ if (seq == 0) -+ goto skip; -+ -+ /* wait 8s */ -+ pr_info("Alive (device %s): wait for 8 seconds...\n", sb->s_id); -+ schedule_timeout_uninterruptible(HZ * 8); -+ -+ /* read again */ -+ if (read_alive_again(bh)) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't read alive block #%u\n", -+ sb->s_id, alive_block); -+ goto failed; -+ } -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ pr_debug("Alive (device %s): seq=%u\n", -+ sb->s_id, le32_to_cpu(alive->al_seq)); -+ -+ if (seq != le32_to_cpu(alive->al_seq)) { -+ printk(KERN_WARNING "Alive (device %s): " -+ "still active on node %s\n", -+ sb->s_id, alive->al_nodename); -+ brelse(bh); -+ goto failed; -+ } -+skip: -+ /* write a new random seq */ -+ get_random_bytes(&seq, sizeof(u32)); -+ alive->al_seq = cpu_to_le32(seq); -+ if (unlikely(write_alive(bh))) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't write alive block\n", sb->s_id); -+ goto failed; -+ } -+ pr_debug("Alive (device %s): write random seq=%u\n", sb->s_id, seq); -+ -+ /* wait 6s */ -+ pr_info("Alive (device %s): wait for 6 seconds...\n", sb->s_id); -+ schedule_timeout_uninterruptible(HZ * 6); -+ -+ /* read again */ -+ if (read_alive_again(bh)) { -+ printk(KERN_ERR "Alive (device %s): " -+ "can't read alive block #%u\n", -+ sb->s_id, alive_block); -+ goto failed; -+ } -+ -+ alive = (struct alive_struct *)(bh->b_data); -+ pr_debug("Alive (device %s): seq=%u\n", -+ sb->s_id, le32_to_cpu(alive->al_seq)); -+ -+ if (seq != le32_to_cpu(alive->al_seq)) { -+ printk(KERN_WARNING "Alive (device %s): " -+ "still active on node %s\n", -+ sb->s_id, alive->al_nodename); -+ brelse(bh); -+ goto failed; -+ } -+ -+ /* succeed */ -+ pr_info("Alive (device %s): alive check passed!\n", sb->s_id); -+ sbi->s_alive_tsk = kthread_run(kalived, bh, "kalived"); -+ return 0; -+ -+failed: -+ printk(KERN_WARNING "Alive (device %s): alive check failed!\n", -+ sb->s_id); -+ return 1; -+} -+ - - static int ext3_fill_super (struct super_block *sb, void *data, int silent) - { -@@ -1688,6 +1950,10 @@ static int ext3_fill_super (struct super - EXT3_HAS_INCOMPAT_FEATURE(sb, - EXT3_FEATURE_INCOMPAT_RECOVER)); - -+ if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_ALIVE)) -+ if (check_alive(sb, sbi)) -+ goto failed_mount2; -+ - /* - * The first inode we look at is the journal inode. Don't try - * root first: it may be modified in the journal! -@@ -1796,6 +2062,8 @@ failed_mount3: - percpu_counter_destroy(&sbi->s_freeblocks_counter); - percpu_counter_destroy(&sbi->s_freeinodes_counter); - percpu_counter_destroy(&sbi->s_dirs_counter); -+ if (sbi->s_alive_tsk) -+ kthread_stop(sbi->s_alive_tsk); - failed_mount2: - for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); -Index: mmp/include/linux/ext3_fs.h -=================================================================== ---- mmp.orig/include/linux/ext3_fs.h 2006-07-18 20:43:51.000000000 +0800 -+++ mmp/include/linux/ext3_fs.h 2006-07-18 20:43:52.000000000 +0800 -@@ -579,12 +579,14 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 - #define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ -+#define EXT3_FEATURE_INCOMPAT_ALIVE 0x0080 - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ - EXT3_FEATURE_INCOMPAT_META_BG| \ -- EXT3_FEATURE_INCOMPAT_EXTENTS) -+ EXT3_FEATURE_INCOMPAT_EXTENTS| \ -+ EXT3_FEATURE_INCOMPAT_ALIVE) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -Index: mmp/include/linux/ext3_fs_sb.h -=================================================================== ---- mmp.orig/include/linux/ext3_fs_sb.h 2006-07-18 20:43:51.000000000 +0800 -+++ mmp/include/linux/ext3_fs_sb.h 2006-07-18 20:43:52.000000000 +0800 -@@ -86,6 +86,7 @@ struct ext3_sb_info { - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ - #endif -+ struct task_struct * s_alive_tsk; - - /* for buddy allocator */ - struct ext3_group_info **s_group_info; diff --git a/lustre/kernel_patches/patches/ext3-statfs-2.6.12.patch b/lustre/kernel_patches/patches/ext3-statfs-2.6.12.patch deleted file mode 100644 index ad7d79b1cd..0000000000 --- a/lustre/kernel_patches/patches/ext3-statfs-2.6.12.patch +++ /dev/null @@ -1,177 +0,0 @@ -Index: linux-2.6.12/fs/ext3/super.c -=================================================================== ---- linux-2.6.12.orig/fs/ext3/super.c 2005-06-17 13:48:29.000000000 -0600 -+++ linux-2.6.12/fs/ext3/super.c 2005-11-25 05:59:47.000000000 -0700 -@@ -2165,13 +2165,13 @@ - { - struct ext3_super_block *es = EXT3_SB(sb)->s_es; - unsigned long overhead; -- int i; - - if (test_opt (sb, MINIX_DF)) - overhead = 0; - else { -- unsigned long ngroups; -- ngroups = EXT3_SB(sb)->s_groups_count; -+ unsigned long ngroups = EXT3_SB(sb)->s_groups_count, group; -+ unsigned long three = 1, five = 5, seven = 7; -+ unsigned long metabg = -1UL; - smp_rmb(); - - /* -@@ -2189,11 +2188,14 @@ - * block group descriptors. If the sparse superblocks - * feature is turned on, then not all groups have this. - */ -- for (i = 0; i < ngroups; i++) { -- overhead += ext3_bg_has_super(sb, i) + -- ext3_bg_num_gdb(sb, i); -- cond_resched(); -- } -+ overhead += 1 + EXT3_SB(sb)->s_gdb_count; /* group 0 */ -+ if (EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG)) -+ metabg =le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg); -+ -+ while ((group = ext3_list_backups(sb, &three, &five, &seven)) < -+ ngroups) /* sb + group descriptors backups */ -+ overhead += 1 + (group >= metabg ? 1 : -+ EXT3_SB(sb)->s_gdb_count); - - /* - * Every block group has an inode bitmap, a block -@@ -2205,12 +2204,16 @@ - buf->f_type = EXT3_SUPER_MAGIC; - buf->f_bsize = sb->s_blocksize; - buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead; -- buf->f_bfree = ext3_count_free_blocks (sb); -+ buf->f_bfree = percpu_counter_read(&EXT3_SB(sb)->s_freeblocks_counter); -+ if (buf->f_bfree < 0) -+ buf->f_bfree = 0; - buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); - if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) - buf->f_bavail = 0; - buf->f_files = le32_to_cpu(es->s_inodes_count); -- buf->f_ffree = ext3_count_free_inodes (sb); -+ buf->f_ffree = percpu_counter_read(&EXT3_SB(sb)->s_freeinodes_counter); -+ if (buf->f_ffree < 0) -+ buf->f_ffree = 0; - buf->f_namelen = EXT3_NAME_LEN; - return 0; - } -Index: linux-2.6.12/fs/ext3/resize.c -=================================================================== ---- linux-2.6.12.orig/fs/ext3/resize.c 2005-11-24 15:17:06.000000000 -0700 -+++ linux-2.6.12/fs/ext3/resize.c 2005-11-25 06:01:01.000000000 -0700 -@@ -285,17 +285,17 @@ - * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... - * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... - */ --static unsigned ext3_list_backups(struct super_block *sb, unsigned *three, -- unsigned *five, unsigned *seven) -+unsigned long ext3_list_backups(struct super_block *sb, unsigned long *three, -+ unsigned long *five, unsigned long *seven) - { -- unsigned *min = three; -+ unsigned long metabg = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg); -+ unsigned long *min = three, ret; - int mult = 3; -- unsigned ret; - - if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { -- ret = *min; -- *min += 1; -+ ret = *three; -+ *three += 1; - return ret; - } - -@@ -308,8 +307,26 @@ - mult = 7; - } - -- ret = *min; -- *min *= mult; -+ if (EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) && -+ *min >= metabg * EXT3_DESC_PER_BLOCK(sb)) { -+ ret = *min; -+ switch (ret & (EXT3_DESC_PER_BLOCK(sb) - 1)) { -+ case 0: -+ *three = ret + 1; -+ break; -+ case 1: -+ *three = ret + EXT3_DESC_PER_BLOCK(sb) - 2; -+ break; -+ default: -+ *three = (ret | (EXT3_DESC_PER_BLOCK(sb) - 1)) + 1; -+ break; -+ } -+ *five = -1UL; -+ *seven = -1UL; -+ } else { -+ ret = *min; -+ *min *= mult; -+ } - - return ret; - } -@@ -324,17 +337,17 @@ - { - const unsigned long blk = primary->b_blocknr; - const unsigned long end = EXT3_SB(sb)->s_groups_count; -- unsigned three = 1; -- unsigned five = 5; -- unsigned seven = 7; -- unsigned grp; -+ unsigned long three = 1; -+ unsigned long five = 5; -+ unsigned long seven = 7; -+ unsigned long grp; - __u32 *p = (__u32 *)primary->b_data; - int gdbackups = 0; - - while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { - if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ - ext3_warning(sb, __FUNCTION__, -- "reserved GDT %ld missing grp %d (%ld)\n", -+ "reserved GDT %ld missing grp %ld (%ld)\n", - blk, grp, - grp * EXT3_BLOCKS_PER_GROUP(sb) + blk); - return -EINVAL; -@@ -618,10 +631,8 @@ - struct ext3_sb_info *sbi = EXT3_SB(sb); - const unsigned long last = sbi->s_groups_count; - const int bpg = EXT3_BLOCKS_PER_GROUP(sb); -- unsigned three = 1; -- unsigned five = 5; -- unsigned seven = 7; -- unsigned group; -+ unsigned long three = 1, five = 5, seven = 7; -+ unsigned long group; - int rest = sb->s_blocksize - size; - handle_t *handle; - int err = 0, err2; -@@ -672,7 +683,7 @@ - exit_err: - if (err) { - ext3_warning(sb, __FUNCTION__, -- "can't update backup for group %d (err %d), " -+ "can't update backup for group %ld (err %d), " - "forcing fsck on next reboot\n", group, err); - sbi->s_mount_state &= ~EXT3_VALID_FS; - sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS); -Index: linux-2.6.12/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.12.orig/include/linux/ext3_fs.h 2005-06-17 13:48:29.000000000 -0600 -+++ linux-2.6.12/include/linux/ext3_fs.h 2005-11-25 05:59:47.000000000 -0700 -@@ -788,6 +788,10 @@ - extern int ext3_group_extend(struct super_block *sb, - struct ext3_super_block *es, - unsigned long n_blocks_count); -+extern unsigned long ext3_list_backups(struct super_block *sb, -+ unsigned long *three, -+ unsigned long *five, -+ unsigned long *seven); - - /* super.c */ - extern void ext3_error (struct super_block *, const char *, const char *, ...) diff --git a/lustre/kernel_patches/patches/ext3-uninit-2.6-sles10.patch b/lustre/kernel_patches/patches/ext3-uninit-2.6-sles10.patch deleted file mode 100644 index 62b1f50a90..0000000000 --- a/lustre/kernel_patches/patches/ext3-uninit-2.6-sles10.patch +++ /dev/null @@ -1,674 +0,0 @@ -Add support for the uninit_groups feature to the kernel. - -Keep a high water mark of used inodes for each group to improve e2fsck time. -Block and inode bitmaps can be uninitialized on disk via a flag in the -group descriptor to avoid reading or scanning them at e2fsck time. -A checksum of each group descriptor is used to ensure that corruption in -the group descriptor's bit flags does not cause incorrect operation. - -Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs.h 2007-03-28 18:20:16.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs.h 2007-03-28 18:30:06.000000000 +0400 -@@ -153,16 +153,22 @@ struct ext3_allocation_request { - */ - struct ext3_group_desc - { -- __le32 bg_block_bitmap; /* Blocks bitmap block */ -- __le32 bg_inode_bitmap; /* Inodes bitmap block */ -+ __le32 bg_block_bitmap; /* Blocks bitmap block */ -+ __le32 bg_inode_bitmap; /* Inodes bitmap block */ - __le32 bg_inode_table; /* Inodes table block */ - __le16 bg_free_blocks_count; /* Free blocks count */ - __le16 bg_free_inodes_count; /* Free inodes count */ - __le16 bg_used_dirs_count; /* Directories count */ -- __u16 bg_pad; -- __le32 bg_reserved[3]; -+ __le16 bg_flags; /* EXT3_BG_flags (UNINIT, etc) */ -+ __le32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ -+ __le16 bg_itable_unused; /* Unused inodes count */ -+ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ - }; - -+#define EXT3_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ -+#define EXT3_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ -+#define EXT3_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ -+ - /* - * Macro-instructions used to manage group descriptors - */ -@@ -590,6 +596,7 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 -+#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 - #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 -@@ -606,6 +613,7 @@ static inline struct ext3_inode_info *EX - EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ -+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ - EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - -Index: linux-2.6.16.27-0.9-full/fs/ext3/resize.c -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/resize.c 2007-03-13 02:56:52.000000000 +0300 -+++ linux-2.6.16.27-0.9-full/fs/ext3/resize.c 2007-03-28 18:30:06.000000000 +0400 -@@ -19,6 +19,7 @@ - #include <linux/errno.h> - #include <linux/slab.h> - -+#include "group.h" - - #define outside(b, first, last) ((b) < (first) || (b) >= (last)) - #define inside(b, first, last) ((b) >= (first) && (b) < (last)) -@@ -818,6 +819,7 @@ int ext3_group_add(struct super_block *s - gdp->bg_inode_table = cpu_to_le32(input->inode_table); - gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); - gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb)); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, input->group, gdp); - - /* - * Make the new blocks and inodes valid next. We do this before -Index: linux-2.6.16.27-0.9-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/super.c 2007-03-28 18:25:51.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/fs/ext3/super.c 2007-03-28 18:30:06.000000000 +0400 -@@ -42,6 +42,7 @@ - #include "xattr.h" - #include "acl.h" - #include "namei.h" -+#include "group.h" - - static int ext3_load_journal(struct super_block *, struct ext3_super_block *, - unsigned long journal_devnum); -@@ -1221,6 +1222,90 @@ static int ext3_setup_super(struct super - return res; - } - -+#if !defined(CONFIG_CRC16) && !defined(CONFIG_CRC16_MODULE) -+/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */ -+__u16 const crc16_table[256] = { -+ 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, -+ 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, -+ 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, -+ 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, -+ 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, -+ 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, -+ 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, -+ 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, -+ 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, -+ 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, -+ 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, -+ 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, -+ 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, -+ 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, -+ 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, -+ 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, -+ 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, -+ 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, -+ 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, -+ 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, -+ 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, -+ 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, -+ 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, -+ 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, -+ 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, -+ 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, -+ 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, -+ 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, -+ 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, -+ 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, -+ 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, -+ 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 -+}; -+ -+static inline __u16 crc16_byte(__u16 crc, const __u8 data) -+{ -+ return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; -+} -+ -+__u16 crc16(__u16 crc, __u8 const *buffer, size_t len) -+{ -+ while (len--) -+ crc = crc16_byte(crc, *buffer++); -+ return crc; -+} -+#endif -+ -+__le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 block_group, -+ struct ext3_group_desc *gdp) -+{ -+ __u16 crc = 0; -+ -+ if (sbi->s_es->s_feature_ro_compat & -+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { -+ int offset = offsetof(struct ext3_group_desc, bg_checksum); -+ __le32 le_group = cpu_to_le32(block_group); -+ -+ crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); -+ crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); -+ crc = crc16(crc, (__u8 *)gdp, offset); -+ offset += sizeof(gdp->bg_checksum); /* skip checksum */ -+ BUG_ON(offset != sizeof(*gdp)); /* XXX handle s_desc_size */ -+ /* for checksum of struct ext4_group_desc do the rest... -+ if (offset < sbi->s_es->s_desc_size) { -+ crc = crc16(crc, (__u8 *)gdp + offset, -+ sbi->s_es->s_desc_size - offset); -+ */ -+ } -+ -+ return cpu_to_le16(crc); -+} -+ -+int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 block_group, -+ struct ext3_group_desc *gdp) -+{ -+ if (gdp->bg_checksum != ext3_group_desc_csum(sbi, block_group, gdp)) -+ return 0; -+ -+ return 1; -+} -+ - /* Called at mount-time, super-block is locked */ - static int ext3_check_descriptors (struct super_block * sb) - { -@@ -1270,6 +1355,13 @@ static int ext3_check_descriptors (struc - le32_to_cpu(gdp->bg_inode_table)); - return 0; - } -+ if (!ext3_group_desc_csum_verify(sbi, i, gdp)) { -+ ext3_error(sb, __FUNCTION__, -+ "Checksum for group %d failed (%u!=%u)\n", i, -+ le16_to_cpu(ext3_group_desc_csum(sbi,i,gdp)), -+ le16_to_cpu(gdp->bg_checksum)); -+ return 0; -+ } - block += EXT3_BLOCKS_PER_GROUP(sb); - gdp++; - } -Index: linux-2.6.16.27-0.9-full/fs/ext3/group.h -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/group.h 2007-02-13 18:39:59.640066087 +0300 -+++ linux-2.6.16.27-0.9-full/fs/ext3/group.h 2007-03-28 18:30:06.000000000 +0400 -@@ -0,0 +1,29 @@ -+/* -+ * linux/fs/ext3/group.h -+ * -+ * Copyright (C) 2007 Cluster File Systems, Inc -+ * -+ * Author: Andreas Dilger <adilger@clusterfs.com> -+ */ -+ -+#ifndef _LINUX_EXT3_GROUP_H -+#define _LINUX_EXT3_GROUP_H -+#if defined(CONFIG_CRC16) || defined(CONFIG_CRC16_MODULE) -+#include <linux/crc16.h> -+#endif -+ -+extern __le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 group, -+ struct ext3_group_desc *gdp); -+extern int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 group, -+ struct ext3_group_desc *gdp); -+struct buffer_head *read_block_bitmap(struct super_block *sb, -+ unsigned int block_group); -+extern unsigned ext3_init_block_bitmap(struct super_block *sb, -+ struct buffer_head *bh, int group, -+ struct ext3_group_desc *desc); -+#define ext3_free_blocks_after_init(sb, group, desc) \ -+ ext3_init_block_bitmap(sb, NULL, group, desc) -+extern unsigned ext3_init_inode_bitmap(struct super_block *sb, -+ struct buffer_head *bh, int group, -+ struct ext3_group_desc *desc); -+#endif /* _LINUX_EXT3_GROUP_H */ -Index: linux-2.6.16.27-0.9-full/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/ialloc.c 2007-03-28 18:20:17.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/fs/ext3/ialloc.c 2007-03-28 18:30:06.000000000 +0400 -@@ -28,6 +28,7 @@ - - #include "xattr.h" - #include "acl.h" -+#include "group.h" - - /* - * ialloc.c contains the inodes allocation and deallocation routines -@@ -43,6 +44,52 @@ - * the free blocks count in the block. - */ - -+/* -+ * To avoid calling the atomic setbit hundreds or thousands of times, we only -+ * need to use it within a single byte (to ensure we get endianness right). -+ * We can use memset for the rest of the bitmap as there are no other users. -+ */ -+static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) -+{ -+ int i; -+ -+ if (start_bit >= end_bit) -+ return; -+ -+ ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); -+ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) -+ ext3_set_bit(i, bitmap); -+ if (i < end_bit) -+ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); -+} -+ -+/* Initializes an uninitialized inode bitmap */ -+unsigned ext3_init_inode_bitmap(struct super_block *sb, -+ struct buffer_head *bh, int block_group, -+ struct ext3_group_desc *gdp) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ J_ASSERT_BH(bh, buffer_locked(bh)); -+ -+ /* If checksum is bad mark all blocks and inodes use to prevent -+ * allocation, essentially implementing a per-group read-only flag. */ -+ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { -+ ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n", -+ block_group); -+ gdp->bg_free_blocks_count = 0; -+ gdp->bg_free_inodes_count = 0; -+ gdp->bg_itable_unused = 0; -+ memset(bh->b_data, 0xff, sb->s_blocksize); -+ return 0; -+ } -+ -+ memset(bh->b_data, 0, (EXT3_INODES_PER_GROUP(sb) + 7) / 8); -+ mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), -+ bh->b_data); -+ -+ return EXT3_INODES_PER_GROUP(sb); -+} - - /* - * Read the inode allocation bitmap for a given block_group, reading -@@ -59,8 +106,19 @@ read_inode_bitmap(struct super_block * s - desc = ext3_get_group_desc(sb, block_group, NULL); - if (!desc) - goto error_out; -- -- bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { -+ bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ if (!buffer_uptodate(bh)) { -+ lock_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ ext3_init_inode_bitmap(sb, bh,block_group,desc); -+ set_buffer_uptodate(bh); -+ } -+ unlock_buffer(bh); -+ } -+ } else { -+ bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ } - if (!bh) - ext3_error(sb, "read_inode_bitmap", - "Cannot read inode bitmap - " -@@ -169,6 +227,8 @@ void ext3_free_inode (handle_t *handle, - if (is_directory) - gdp->bg_used_dirs_count = cpu_to_le16( - le16_to_cpu(gdp->bg_used_dirs_count) - 1); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi,block_group, -+ gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_inc(&sbi->s_freeinodes_counter); - if (is_directory) -@@ -453,7 +513,7 @@ struct inode *ext3_new_inode(handle_t *h - struct ext3_sb_info *sbi; - int err = 0; - struct inode *ret; -- int i; -+ int i, free = 0; - - /* Cannot create files in a deleted directory */ - if (!dir || !dir->i_nlink) -@@ -570,11 +630,13 @@ repeat_in_this_group: - goto out; - - got: -- ino += group * EXT3_INODES_PER_GROUP(sb) + 1; -- if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { -- ext3_error (sb, "ext3_new_inode", -- "reserved inode or inode > inodes count - " -- "block_group = %d, inode=%lu", group, ino); -+ ino++; -+ if ((group == 0 && ino < EXT3_FIRST_INO(sb)) || -+ ino > EXT3_INODES_PER_GROUP(sb)) { -+ ext3_error(sb, __FUNCTION__, -+ "reserved inode or inode > inodes count - " -+ "block_group = %d, inode=%lu", group, -+ ino + group * EXT3_INODES_PER_GROUP(sb)); - err = -EIO; - goto fail; - } -@@ -582,13 +644,65 @@ got: - BUFFER_TRACE(bh2, "get_write_access"); - err = ext3_journal_get_write_access(handle, bh2); - if (err) goto fail; -+ -+ /* We may have to initialize the block bitmap if it isn't already */ -+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && -+ gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ struct buffer_head *block_bh = read_block_bitmap(sb, group); -+ -+ BUFFER_TRACE(block_bh, "get block bitmap access"); -+ err = ext3_journal_get_write_access(handle, block_bh); -+ if (err) { -+ brelse(block_bh); -+ goto fail; -+ } -+ -+ free = 0; -+ spin_lock(sb_bgl_lock(sbi, group)); -+ /* recheck and clear flag under lock if we still need to */ -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); -+ free = ext3_free_blocks_after_init(sb, group, gdp); -+ gdp->bg_free_blocks_count = cpu_to_le16(free); -+ } -+ spin_unlock(sb_bgl_lock(sbi, group)); -+ -+ /* Don't need to dirty bitmap block if we didn't change it */ -+ if (free) { -+ BUFFER_TRACE(block_bh, "dirty block bitmap"); -+ err = ext3_journal_dirty_metadata(handle, block_bh); -+ } -+ -+ brelse(block_bh); -+ if (err) -+ goto fail; -+ } -+ - spin_lock(sb_bgl_lock(sbi, group)); -+ /* If we didn't allocate from within the initialized part of the inode -+ * table then we need to initialize up to this inode. */ -+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_INODE_UNINIT); -+ free = EXT3_INODES_PER_GROUP(sb); -+ } else { -+ free = EXT3_INODES_PER_GROUP(sb) - -+ le16_to_cpu(gdp->bg_itable_unused); -+ } -+ -+ if (ino > free) { -+ gdp->bg_itable_unused = -+ cpu_to_le16(EXT3_INODES_PER_GROUP(sb) - ino); -+ } -+ } -+ - gdp->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); - if (S_ISDIR(mode)) { - gdp->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); - } -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, group, gdp); - spin_unlock(sb_bgl_lock(sbi, group)); - BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, bh2); -@@ -610,7 +724,7 @@ got: - inode->i_gid = current->fsgid; - inode->i_mode = mode; - -- inode->i_ino = ino; -+ inode->i_ino = ino + group * EXT3_INODES_PER_GROUP(sb); - /* This is the optimal IO size (for stat), not the fs block size */ - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; -Index: linux-2.6.16.27-0.9-full/fs/ext3/mballoc.c -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/mballoc.c 2007-03-28 16:03:19.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/fs/ext3/mballoc.c 2007-03-28 18:30:36.000000000 +0400 -@@ -36,6 +36,8 @@ - #include <linux/seq_file.h> - #include <linux/version.h> - -+#include "group.h" -+ - /* - * MUSTDO: - * - test ext3_ext_search_left() and ext3_ext_search_right() -@@ -323,6 +325,7 @@ struct ext3_group_info { - unsigned long bb_state; - unsigned long bb_tid; - struct ext3_free_metadata *bb_md_cur; -+ struct ext3_group_desc *bb_gdp; - unsigned short bb_first_free; - unsigned short bb_free; - unsigned short bb_fragments; -@@ -928,10 +931,7 @@ static int ext3_mb_init_cache(struct pag - if (first_group + i >= EXT3_SB(sb)->s_groups_count) - break; - -- err = -EIO; -- desc = ext3_get_group_desc(sb, first_group + i, NULL); -- if (desc == NULL) -- goto out; -+ desc = EXT3_GROUP_INFO(sb, first_group + i)->bb_gdp; - - err = -ENOMEM; - bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -@@ -946,7 +946,12 @@ static int ext3_mb_init_cache(struct pag - unlock_buffer(bh[i]); - continue; - } -- -+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ ext3_init_block_bitmap(sb, bh[i], first_group + i,desc); -+ set_buffer_uptodate(bh[i]); -+ unlock_buffer(bh[i]); -+ continue; -+ } - get_bh(bh[i]); - bh[i]->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh[i]); -@@ -1703,6 +1708,10 @@ static int ext3_mb_good_group(struct ext - switch (cr) { - case 0: - BUG_ON(ac->ac_2order == 0); -+ /* If this group is uninitialized, skip it initially */ -+ if (grp->bb_gdp->bg_flags & -+ cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) -+ return 0; - bits = ac->ac_sb->s_blocksize_bits + 1; - for (i = ac->ac_2order; i <= bits; i++) - if (grp->bb_counters[i] > 0) -@@ -1796,7 +1805,9 @@ repeat: - } - - ac->ac_groups_scanned++; -- if (cr == 0) -+ if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags & -+ cpu_to_le16(EXT3_BG_BLOCK_UNINIT) && -+ ac->ac_2order != 0)) - ext3_mb_simple_scan_group(ac, &e3b); - else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe) - ext3_mb_scan_aligned(ac, &e3b); -@@ -2267,12 +2278,13 @@ int ext3_mb_init_backend(struct super_bl - i--; - goto err_freebuddy; - } -+ memset(meta_group_info[j], 0, len); - desc = ext3_get_group_desc(sb, i, NULL); -+ meta_group_info[j]->bb_gdp = desc; - if (desc == NULL) { - printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); - goto err_freebuddy; - } -- memset(meta_group_info[j], 0, len); - set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, - &meta_group_info[j]->bb_state); - -@@ -2936,9 +2948,17 @@ int ext3_mb_mark_diskspace_used(struct e - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - - spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(ext3_free_blocks_after_init(sb, -+ ac->ac_b_ex.fe_group, -+ gdp)); -+ } - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - - ac->ac_b_ex.fe_len); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); - spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len); - -@@ -4303,6 +4323,7 @@ do_more: - spin_lock(sb_bgl_lock(sbi, block_group)); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, count); - -Index: linux-2.6.16.27-0.9-full/fs/ext3/balloc.c -=================================================================== ---- linux-2.6.16.27-0.9-full.orig/fs/ext3/balloc.c 2007-03-28 16:03:20.000000000 +0400 -+++ linux-2.6.16.27-0.9-full/fs/ext3/balloc.c 2007-03-28 18:30:06.000000000 +0400 -@@ -21,6 +21,7 @@ - #include <linux/quotaops.h> - #include <linux/buffer_head.h> - -+#include "group.h" - /* - * balloc.c contains the blocks allocation and deallocation routines - */ -@@ -74,6 +75,75 @@ struct ext3_group_desc * ext3_get_group_ - return desc + offset; - } - -+/* Initializes an uninitialized block bitmap if given, and returns the -+ * number of blocks free in the group. */ -+unsigned ext3_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, -+ int block_group, struct ext3_group_desc *gdp) -+{ -+ unsigned long start; -+ int bit, bit_max; -+ unsigned free_blocks; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ if (bh) { -+ J_ASSERT_BH(bh, buffer_locked(bh)); -+ -+ /* If checksum is bad mark all blocks use to prevent allocation, -+ * essentially implementing a per-group read-only flag. */ -+ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { -+ ext3_error(sb, __FUNCTION__, -+ "Checksum bad for group %u\n", block_group); -+ gdp->bg_free_blocks_count = 0; -+ gdp->bg_free_inodes_count = 0; -+ gdp->bg_itable_unused = 0; -+ memset(bh->b_data, 0xff, sb->s_blocksize); -+ return 0; -+ } -+ memset(bh->b_data, 0, sb->s_blocksize); -+ } -+ -+ /* Check for superblock and gdt backups in this group */ -+ bit_max = ext3_bg_has_super(sb, block_group); -+ -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || -+ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * -+ sbi->s_desc_per_block) { -+ if (bit_max) { -+ bit_max += ext3_bg_num_gdb(sb, block_group); -+ bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); -+ } -+ } else { /* For META_BG_BLOCK_GROUPS */ -+ int group_rel = (block_group - -+ le32_to_cpu(sbi->s_es->s_first_meta_bg)) % -+ EXT3_DESC_PER_BLOCK(sb); -+ if (group_rel == 0 || group_rel == 1 || -+ (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1)) -+ bit_max += 1; -+ } -+ -+ /* Last and first groups are always initialized */ -+ free_blocks = EXT3_BLOCKS_PER_GROUP(sb) - bit_max; -+ -+ if (bh) { -+ for (bit = 0; bit < bit_max; bit++) -+ ext3_set_bit(bit, bh->b_data); -+ -+ start = block_group * EXT3_BLOCKS_PER_GROUP(sb) + -+ le32_to_cpu(sbi->s_es->s_first_data_block); -+ -+ /* Set bits for block and inode bitmaps, and inode table */ -+ ext3_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - start, -+ bh->b_data); -+ ext3_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - start, -+ bh->b_data); -+ for (bit = le32_to_cpu(gdp->bg_inode_table) - start, -+ bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++) -+ ext3_set_bit(bit, bh->b_data); -+ } -+ -+ return free_blocks - sbi->s_itb_per_group - 2; -+} -+ - /* - * Read the bitmap for a given block_group, reading into the specified - * slot in the superblock's bitmap cache. -@@ -89,7 +159,19 @@ read_block_bitmap(struct super_block *sb - desc = ext3_get_group_desc (sb, block_group, NULL); - if (!desc) - goto error_out; -- bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (!buffer_uptodate(bh)) { -+ lock_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ ext3_init_block_bitmap(sb, bh,block_group,desc); -+ set_buffer_uptodate(bh); -+ } -+ unlock_buffer(bh); -+ } -+ } else { -+ bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ } - if (!bh) - ext3_error (sb, "read_block_bitmap", - "Cannot read block bitmap - " -@@ -468,6 +550,7 @@ do_more: - desc->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) + - group_freed); -+ desc->bg_checksum = ext3_group_desc_csum(sbi, block_group, desc); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, count); - -@@ -1378,8 +1461,11 @@ allocated: - ret_block, goal_hits, goal_attempts); - - spin_lock(sb_bgl_lock(sbi, group_no)); -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, group_no, gdp); - spin_unlock(sb_bgl_lock(sbi, group_no)); - percpu_counter_mod(&sbi->s_freeblocks_counter, -1); - - -%diffstat - fs/ext3/balloc.c | 88 +++++++++++++++++++++++++++++ - fs/ext3/group.h | 38 ++++++++++++ - fs/ext3/ialloc.c | 144 +++++++++++++++++++++++++++++++++++++++++++----- - fs/ext3/mballoc.c | 35 +++++++++-- - fs/ext3/resize.c | 2 - fs/ext3/super.c | 92 ++++++++++++++++++++++++++++++ - include/linux/ext3_fs.h | 16 ++++- - 7 files changed, 388 insertions(+), 27 deletions(-) diff --git a/lustre/kernel_patches/patches/ext3-uninit-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-uninit-2.6-suse.patch deleted file mode 100644 index 8a34ea5afa..0000000000 --- a/lustre/kernel_patches/patches/ext3-uninit-2.6-suse.patch +++ /dev/null @@ -1,653 +0,0 @@ -Add support for the uninit_groups feature to the kernel. - -Keep a high water mark of used inodes for each group to improve e2fsck time. -Block and inode bitmaps can be uninitialized on disk via a flag in the -group descriptor to avoid reading or scanning them at e2fsck time. -A checksum of each group descriptor is used to ensure that corruption in -the group descriptor's bit flags does not cause incorrect operation. - -Index: linux-2.6.5-7.283-full/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs.h 2007-03-28 17:33:05.000000000 +0400 -+++ linux-2.6.5-7.283-full/include/linux/ext3_fs.h 2007-03-28 18:33:35.000000000 +0400 -@@ -153,16 +153,22 @@ struct ext3_allocation_request { - */ - struct ext3_group_desc - { -- __u32 bg_block_bitmap; /* Blocks bitmap block */ -- __u32 bg_inode_bitmap; /* Inodes bitmap block */ -+ __u32 bg_block_bitmap; /* Blocks bitmap block */ -+ __u32 bg_inode_bitmap; /* Inodes bitmap block */ - __u32 bg_inode_table; /* Inodes table block */ - __u16 bg_free_blocks_count; /* Free blocks count */ - __u16 bg_free_inodes_count; /* Free inodes count */ - __u16 bg_used_dirs_count; /* Directories count */ -- __u16 bg_pad; -- __u32 bg_reserved[3]; -+ __u16 bg_flags; /* EXT3_BG_flags (UNINIT, etc) */ -+ __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ -+ __u16 bg_itable_unused; /* Unused inodes count */ -+ __u16 bg_checksum; /* crc16(sb_uuid+group+desc) */ - }; - -+#define EXT3_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ -+#define EXT3_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ -+#define EXT3_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ -+ - /* - * Macro-instructions used to manage group descriptors - */ -@@ -458,7 +464,7 @@ struct ext3_super_block { - */ - __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ - __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ -- __u16 s_padding1; -+ __u16 s_reserved_gdt_blocks; /* Per group desc for online growth */ - /* - * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set. - */ -@@ -546,6 +552,7 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 -+#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 - #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 -@@ -562,6 +569,7 @@ static inline struct ext3_inode_info *EX - EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ -+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ - EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - -Index: linux-2.6.5-7.283-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/super.c 2007-03-28 17:33:05.000000000 +0400 -+++ linux-2.6.5-7.283-full/fs/ext3/super.c 2007-03-28 18:33:35.000000000 +0400 -@@ -36,6 +36,7 @@ - #include <linux/quotaops.h> - #include "xattr.h" - #include "acl.h" -+#include "group.h" - - static int ext3_load_journal(struct super_block *, struct ext3_super_block *); - static int ext3_create_journal(struct super_block *, struct ext3_super_block *, -@@ -996,6 +997,90 @@ static int ext3_setup_super(struct super - return res; - } - -+#if !defined(CONFIG_CRC16) && !defined(CONFIG_CRC16_MODULE) -+/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */ -+__u16 const crc16_table[256] = { -+ 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, -+ 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, -+ 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, -+ 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, -+ 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, -+ 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, -+ 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, -+ 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, -+ 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, -+ 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, -+ 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, -+ 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, -+ 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, -+ 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, -+ 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, -+ 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, -+ 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, -+ 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, -+ 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, -+ 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, -+ 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, -+ 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, -+ 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, -+ 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, -+ 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, -+ 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, -+ 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, -+ 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, -+ 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, -+ 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, -+ 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, -+ 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 -+}; -+ -+static inline __u16 crc16_byte(__u16 crc, const __u8 data) -+{ -+ return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; -+} -+ -+__u16 crc16(__u16 crc, __u8 const *buffer, size_t len) -+{ -+ while (len--) -+ crc = crc16_byte(crc, *buffer++); -+ return crc; -+} -+#endif -+ -+__le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 block_group, -+ struct ext3_group_desc *gdp) -+{ -+ __u16 crc = 0; -+ -+ if (sbi->s_es->s_feature_ro_compat & -+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { -+ int offset = offsetof(struct ext3_group_desc, bg_checksum); -+ __le32 le_group = cpu_to_le32(block_group); -+ -+ crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); -+ crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); -+ crc = crc16(crc, (__u8 *)gdp, offset); -+ offset += sizeof(gdp->bg_checksum); /* skip checksum */ -+ BUG_ON(offset != sizeof(*gdp)); /* XXX handle s_desc_size */ -+ /* for checksum of struct ext4_group_desc do the rest... -+ if (offset < sbi->s_es->s_desc_size) { -+ crc = crc16(crc, (__u8 *)gdp + offset, -+ sbi->s_es->s_desc_size - offset); -+ */ -+ } -+ -+ return cpu_to_le16(crc); -+} -+ -+int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 block_group, -+ struct ext3_group_desc *gdp) -+{ -+ if (gdp->bg_checksum != ext3_group_desc_csum(sbi, block_group, gdp)) -+ return 0; -+ -+ return 1; -+} -+ - static int ext3_check_descriptors (struct super_block * sb) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -1044,6 +1129,13 @@ static int ext3_check_descriptors (struc - le32_to_cpu(gdp->bg_inode_table)); - return 0; - } -+ if (!ext3_group_desc_csum_verify(sbi, i, gdp)) { -+ ext3_error(sb, __FUNCTION__, -+ "Checksum for group %d failed (%u!=%u)\n", i, -+ le16_to_cpu(ext3_group_desc_csum(sbi,i,gdp)), -+ le16_to_cpu(gdp->bg_checksum)); -+ return 0; -+ } - block += EXT3_BLOCKS_PER_GROUP(sb); - gdp++; - } -Index: linux-2.6.5-7.283-full/fs/ext3/group.h -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/group.h 2007-02-13 18:39:59.640066087 +0300 -+++ linux-2.6.5-7.283-full/fs/ext3/group.h 2007-03-28 18:33:35.000000000 +0400 -@@ -0,0 +1,29 @@ -+/* -+ * linux/fs/ext3/group.h -+ * -+ * Copyright (C) 2007 Cluster File Systems, Inc -+ * -+ * Author: Andreas Dilger <adilger@clusterfs.com> -+ */ -+ -+#ifndef _LINUX_EXT3_GROUP_H -+#define _LINUX_EXT3_GROUP_H -+#if defined(CONFIG_CRC16) || defined(CONFIG_CRC16_MODULE) -+#include <linux/crc16.h> -+#endif -+ -+extern __le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 group, -+ struct ext3_group_desc *gdp); -+extern int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 group, -+ struct ext3_group_desc *gdp); -+struct buffer_head *read_block_bitmap(struct super_block *sb, -+ unsigned int block_group); -+extern unsigned ext3_init_block_bitmap(struct super_block *sb, -+ struct buffer_head *bh, int group, -+ struct ext3_group_desc *desc); -+#define ext3_free_blocks_after_init(sb, group, desc) \ -+ ext3_init_block_bitmap(sb, NULL, group, desc) -+extern unsigned ext3_init_inode_bitmap(struct super_block *sb, -+ struct buffer_head *bh, int group, -+ struct ext3_group_desc *desc); -+#endif /* _LINUX_EXT3_GROUP_H */ -Index: linux-2.6.5-7.283-full/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/ialloc.c 2007-03-28 17:33:03.000000000 +0400 -+++ linux-2.6.5-7.283-full/fs/ext3/ialloc.c 2007-03-28 18:33:35.000000000 +0400 -@@ -28,6 +28,7 @@ - - #include "xattr.h" - #include "acl.h" -+#include "group.h" - - /* - * ialloc.c contains the inodes allocation and deallocation routines -@@ -43,6 +44,52 @@ - * the free blocks count in the block. - */ - -+/* -+ * To avoid calling the atomic setbit hundreds or thousands of times, we only -+ * need to use it within a single byte (to ensure we get endianness right). -+ * We can use memset for the rest of the bitmap as there are no other users. -+ */ -+static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) -+{ -+ int i; -+ -+ if (start_bit >= end_bit) -+ return; -+ -+ ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); -+ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) -+ ext3_set_bit(i, bitmap); -+ if (i < end_bit) -+ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); -+} -+ -+/* Initializes an uninitialized inode bitmap */ -+unsigned ext3_init_inode_bitmap(struct super_block *sb, -+ struct buffer_head *bh, int block_group, -+ struct ext3_group_desc *gdp) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ J_ASSERT_BH(bh, buffer_locked(bh)); -+ -+ /* If checksum is bad mark all blocks and inodes use to prevent -+ * allocation, essentially implementing a per-group read-only flag. */ -+ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { -+ ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n", -+ block_group); -+ gdp->bg_free_blocks_count = 0; -+ gdp->bg_free_inodes_count = 0; -+ gdp->bg_itable_unused = 0; -+ memset(bh->b_data, 0xff, sb->s_blocksize); -+ return 0; -+ } -+ -+ memset(bh->b_data, 0, (EXT3_INODES_PER_GROUP(sb) + 7) / 8); -+ mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), -+ bh->b_data); -+ -+ return EXT3_INODES_PER_GROUP(sb); -+} - - /* - * Read the inode allocation bitmap for a given block_group, reading -@@ -59,8 +106,19 @@ read_inode_bitmap(struct super_block * s - desc = ext3_get_group_desc(sb, block_group, NULL); - if (!desc) - goto error_out; -- -- bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { -+ bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ if (!buffer_uptodate(bh)) { -+ lock_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ ext3_init_inode_bitmap(sb, bh,block_group,desc); -+ set_buffer_uptodate(bh); -+ } -+ unlock_buffer(bh); -+ } -+ } else { -+ bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ } - if (!bh) - ext3_error(sb, "read_inode_bitmap", - "Cannot read inode bitmap - " -@@ -168,6 +226,8 @@ void ext3_free_inode (handle_t *handle, - if (is_directory) - gdp->bg_used_dirs_count = cpu_to_le16( - le16_to_cpu(gdp->bg_used_dirs_count) - 1); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi,block_group, -+ gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_inc(&sbi->s_freeinodes_counter); - if (is_directory) -@@ -454,7 +514,7 @@ struct inode *ext3_new_inode(handle_t *h - struct ext3_sb_info *sbi; - int err = 0; - struct inode *ret; -- int i; -+ int i, free = 0; - - /* Cannot create files in a deleted directory */ - if (!dir || !dir->i_nlink) -@@ -570,11 +630,13 @@ repeat_in_this_group: - goto out; - - got: -- ino += group * EXT3_INODES_PER_GROUP(sb) + 1; -- if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { -- ext3_error (sb, "ext3_new_inode", -- "reserved inode or inode > inodes count - " -- "block_group = %d, inode=%lu", group, ino); -+ ino++; -+ if ((group == 0 && ino < EXT3_FIRST_INO(sb)) || -+ ino > EXT3_INODES_PER_GROUP(sb)) { -+ ext3_error(sb, __FUNCTION__, -+ "reserved inode or inode > inodes count - " -+ "block_group = %d, inode=%lu", group, -+ ino + group * EXT3_INODES_PER_GROUP(sb)); - err = -EIO; - goto fail; - } -@@ -582,13 +644,65 @@ got: - BUFFER_TRACE(bh2, "get_write_access"); - err = ext3_journal_get_write_access(handle, bh2); - if (err) goto fail; -+ -+ /* We may have to initialize the block bitmap if it isn't already */ -+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && -+ gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ struct buffer_head *block_bh = read_block_bitmap(sb, group); -+ -+ BUFFER_TRACE(block_bh, "get block bitmap access"); -+ err = ext3_journal_get_write_access(handle, block_bh); -+ if (err) { -+ brelse(block_bh); -+ goto fail; -+ } -+ -+ free = 0; -+ spin_lock(sb_bgl_lock(sbi, group)); -+ /* recheck and clear flag under lock if we still need to */ -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); -+ free = ext3_free_blocks_after_init(sb, group, gdp); -+ gdp->bg_free_blocks_count = cpu_to_le16(free); -+ } -+ spin_unlock(sb_bgl_lock(sbi, group)); -+ -+ /* Don't need to dirty bitmap block if we didn't change it */ -+ if (free) { -+ BUFFER_TRACE(block_bh, "dirty block bitmap"); -+ err = ext3_journal_dirty_metadata(handle, block_bh); -+ } -+ -+ brelse(block_bh); -+ if (err) -+ goto fail; -+ } -+ - spin_lock(sb_bgl_lock(sbi, group)); -+ /* If we didn't allocate from within the initialized part of the inode -+ * table then we need to initialize up to this inode. */ -+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_INODE_UNINIT); -+ free = EXT3_INODES_PER_GROUP(sb); -+ } else { -+ free = EXT3_INODES_PER_GROUP(sb) - -+ le16_to_cpu(gdp->bg_itable_unused); -+ } -+ -+ if (ino > free) { -+ gdp->bg_itable_unused = -+ cpu_to_le16(EXT3_INODES_PER_GROUP(sb) - ino); -+ } -+ } -+ - gdp->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); - if (S_ISDIR(mode)) { - gdp->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); - } -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, group, gdp); - spin_unlock(sb_bgl_lock(sbi, group)); - BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, bh2); -@@ -610,7 +724,7 @@ got: - inode->i_gid = current->fsgid; - inode->i_mode = mode; - -- inode->i_ino = ino; -+ inode->i_ino = ino + group * EXT3_INODES_PER_GROUP(sb); - /* This is the optimal IO size (for stat), not the fs block size */ - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; -Index: linux-2.6.5-7.283-full/fs/ext3/mballoc.c -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/mballoc.c 2007-03-28 15:46:00.000000000 +0400 -+++ linux-2.6.5-7.283-full/fs/ext3/mballoc.c 2007-03-28 18:33:35.000000000 +0400 -@@ -36,6 +36,8 @@ - #include <linux/seq_file.h> - #include <linux/version.h> - -+#include "group.h" -+ - /* - * MUSTDO: - * - test ext3_ext_search_left() and ext3_ext_search_right() -@@ -323,6 +325,7 @@ struct ext3_group_info { - unsigned long bb_state; - unsigned long bb_tid; - struct ext3_free_metadata *bb_md_cur; -+ struct ext3_group_desc *bb_gdp; - unsigned short bb_first_free; - unsigned short bb_free; - unsigned short bb_fragments; -@@ -928,10 +931,7 @@ static int ext3_mb_init_cache(struct pag - if (first_group + i >= EXT3_SB(sb)->s_groups_count) - break; - -- err = -EIO; -- desc = ext3_get_group_desc(sb, first_group + i, NULL); -- if (desc == NULL) -- goto out; -+ desc = EXT3_GROUP_INFO(sb, first_group + i)->bb_gdp; - - err = -ENOMEM; - bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -@@ -946,7 +946,12 @@ static int ext3_mb_init_cache(struct pag - unlock_buffer(bh[i]); - continue; - } -- -+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ ext3_init_block_bitmap(sb, bh[i], first_group + i,desc); -+ set_buffer_uptodate(bh[i]); -+ unlock_buffer(bh[i]); -+ continue; -+ } - get_bh(bh[i]); - bh[i]->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh[i]); -@@ -1703,6 +1708,10 @@ static int ext3_mb_good_group(struct ext - switch (cr) { - case 0: - BUG_ON(ac->ac_2order == 0); -+ /* If this group is uninitialized, skip it initially */ -+ if (grp->bb_gdp->bg_flags & -+ cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) -+ return 0; - bits = ac->ac_sb->s_blocksize_bits + 1; - for (i = ac->ac_2order; i <= bits; i++) - if (grp->bb_counters[i] > 0) -@@ -1796,7 +1805,9 @@ repeat: - } - - ac->ac_groups_scanned++; -- if (cr == 0) -+ if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags & -+ cpu_to_le16(EXT3_BG_BLOCK_UNINIT) && -+ ac->ac_2order != 0)) - ext3_mb_simple_scan_group(ac, &e3b); - else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe) - ext3_mb_scan_aligned(ac, &e3b); -@@ -2267,12 +2278,13 @@ int ext3_mb_init_backend(struct super_bl - i--; - goto err_freebuddy; - } -+ memset(meta_group_info[j], 0, len); - desc = ext3_get_group_desc(sb, i, NULL); -+ meta_group_info[j]->bb_gdp = desc; - if (desc == NULL) { - printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); - goto err_freebuddy; - } -- memset(meta_group_info[j], 0, len); - set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, - &meta_group_info[j]->bb_state); - -@@ -2936,9 +2948,17 @@ int ext3_mb_mark_diskspace_used(struct e - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - - spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(ext3_free_blocks_after_init(sb, -+ ac->ac_b_ex.fe_group, -+ gdp)); -+ } - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - - ac->ac_b_ex.fe_len); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); - spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len); - -@@ -4303,6 +4323,7 @@ do_more: - spin_lock(sb_bgl_lock(sbi, block_group)); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, count); - -Index: linux-2.6.5-7.283-full/fs/ext3/balloc.c -=================================================================== ---- linux-2.6.5-7.283-full.orig/fs/ext3/balloc.c 2007-03-28 17:33:02.000000000 +0400 -+++ linux-2.6.5-7.283-full/fs/ext3/balloc.c 2007-03-28 18:33:35.000000000 +0400 -@@ -20,6 +20,7 @@ - #include <linux/quotaops.h> - #include <linux/buffer_head.h> - -+#include "group.h" - /* - * balloc.c contains the blocks allocation and deallocation routines - */ -@@ -72,6 +73,75 @@ struct ext3_group_desc * ext3_get_group_ - return gdp + desc; - } - -+/* Initializes an uninitialized block bitmap if given, and returns the -+ * number of blocks free in the group. */ -+unsigned ext3_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, -+ int block_group, struct ext3_group_desc *gdp) -+{ -+ unsigned long start; -+ int bit, bit_max; -+ unsigned free_blocks; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ if (bh) { -+ J_ASSERT_BH(bh, buffer_locked(bh)); -+ -+ /* If checksum is bad mark all blocks use to prevent allocation, -+ * essentially implementing a per-group read-only flag. */ -+ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { -+ ext3_error(sb, __FUNCTION__, -+ "Checksum bad for group %u\n", block_group); -+ gdp->bg_free_blocks_count = 0; -+ gdp->bg_free_inodes_count = 0; -+ gdp->bg_itable_unused = 0; -+ memset(bh->b_data, 0xff, sb->s_blocksize); -+ return 0; -+ } -+ memset(bh->b_data, 0, sb->s_blocksize); -+ } -+ -+ /* Check for superblock and gdt backups in this group */ -+ bit_max = ext3_bg_has_super(sb, block_group); -+ -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || -+ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * -+ sbi->s_desc_per_block) { -+ if (bit_max) { -+ bit_max += ext3_bg_num_gdb(sb, block_group); -+ bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); -+ } -+ } else { /* For META_BG_BLOCK_GROUPS */ -+ int group_rel = (block_group - -+ le32_to_cpu(sbi->s_es->s_first_meta_bg)) % -+ EXT3_DESC_PER_BLOCK(sb); -+ if (group_rel == 0 || group_rel == 1 || -+ (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1)) -+ bit_max += 1; -+ } -+ -+ /* Last and first groups are always initialized */ -+ free_blocks = EXT3_BLOCKS_PER_GROUP(sb) - bit_max; -+ -+ if (bh) { -+ for (bit = 0; bit < bit_max; bit++) -+ ext3_set_bit(bit, bh->b_data); -+ -+ start = block_group * EXT3_BLOCKS_PER_GROUP(sb) + -+ le32_to_cpu(sbi->s_es->s_first_data_block); -+ -+ /* Set bits for block and inode bitmaps, and inode table */ -+ ext3_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - start, -+ bh->b_data); -+ ext3_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - start, -+ bh->b_data); -+ for (bit = le32_to_cpu(gdp->bg_inode_table) - start, -+ bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++) -+ ext3_set_bit(bit, bh->b_data); -+ } -+ -+ return free_blocks - sbi->s_itb_per_group - 2; -+} -+ - /* - * Read the bitmap for a given block_group, reading into the specified - * slot in the superblock's bitmap cache. -@@ -87,7 +157,19 @@ read_block_bitmap(struct super_block *sb - desc = ext3_get_group_desc (sb, block_group, NULL); - if (!desc) - goto error_out; -- bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (!buffer_uptodate(bh)) { -+ lock_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ ext3_init_block_bitmap(sb, bh,block_group,desc); -+ set_buffer_uptodate(bh); -+ } -+ unlock_buffer(bh); -+ } -+ } else { -+ bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ } - if (!bh) - ext3_error (sb, "read_block_bitmap", - "Cannot read block bitmap - " -@@ -432,6 +514,7 @@ do_more: - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + - dquot_freed_blocks); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, count); - -@@ -1372,8 +1455,11 @@ allocated: - ret_block, goal_hits, goal_attempts); - - spin_lock(sb_bgl_lock(sbi, group_no)); -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, group_no, gdp); - spin_unlock(sb_bgl_lock(sbi, group_no)); - percpu_counter_mod(&sbi->s_freeblocks_counter, -1); - diff --git a/lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch b/lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch deleted file mode 100644 index 2dbeb80a2b..0000000000 --- a/lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch +++ /dev/null @@ -1,664 +0,0 @@ -Add support for the uninit_groups feature to the kernel. - -Keep a high water mark of used inodes for each group to improve e2fsck time. -Block and inode bitmaps can be uninitialized on disk via a flag in the -group descriptor to avoid reading or scanning them at e2fsck time. -A checksum of each group descriptor is used to ensure that corruption in -the group descriptor's bit flags does not cause incorrect operation. - -Index: linux-2.6.9-full/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.9-full.orig/include/linux/ext3_fs.h 2007-03-28 18:35:41.000000000 +0400 -+++ linux-2.6.9-full/include/linux/ext3_fs.h 2007-03-28 18:36:16.000000000 +0400 -@@ -153,16 +153,22 @@ struct ext3_allocation_request { - */ - struct ext3_group_desc - { -- __le32 bg_block_bitmap; /* Blocks bitmap block */ -- __le32 bg_inode_bitmap; /* Inodes bitmap block */ -+ __le32 bg_block_bitmap; /* Blocks bitmap block */ -+ __le32 bg_inode_bitmap; /* Inodes bitmap block */ - __le32 bg_inode_table; /* Inodes table block */ - __le16 bg_free_blocks_count; /* Free blocks count */ - __le16 bg_free_inodes_count; /* Free inodes count */ - __le16 bg_used_dirs_count; /* Directories count */ -- __u16 bg_pad; -- __le32 bg_reserved[3]; -+ __le16 bg_flags; /* EXT3_BG_flags (UNINIT, etc) */ -+ __le32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ -+ __le16 bg_itable_unused; /* Unused inodes count */ -+ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ - }; - -+#define EXT3_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ -+#define EXT3_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ -+#define EXT3_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ -+ - /* - * Macro-instructions used to manage group descriptors - */ -@@ -572,6 +578,7 @@ static inline struct ext3_inode_info *EX - #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 - #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 - #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 -+#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 - #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 - - #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 -@@ -588,6 +595,7 @@ static inline struct ext3_inode_info *EX - EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ -+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ - EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - -Index: linux-2.6.9-full/fs/ext3/resize.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/resize.c 2006-03-10 18:20:03.000000000 +0300 -+++ linux-2.6.9-full/fs/ext3/resize.c 2007-03-28 18:36:16.000000000 +0400 -@@ -19,6 +19,7 @@ - #include <linux/errno.h> - #include <linux/slab.h> - -+#include "group.h" - - #define outside(b, first, last) ((b) < (first) || (b) >= (last)) - #define inside(b, first, last) ((b) >= (first) && (b) < (last)) -@@ -807,6 +808,7 @@ int ext3_group_add(struct super_block *s - gdp->bg_inode_table = cpu_to_le32(input->inode_table); - gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); - gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb)); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, input->group, gdp); - - /* - * Make the new blocks and inodes valid next. We do this before -Index: linux-2.6.9-full/fs/ext3/super.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/super.c 2007-03-28 18:35:42.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/super.c 2007-03-28 18:36:16.000000000 +0400 -@@ -38,6 +38,7 @@ - #include <asm/uaccess.h> - #include "xattr.h" - #include "acl.h" -+#include "group.h" - - static int ext3_load_journal(struct super_block *, struct ext3_super_block *, - unsigned long journal_devnum); -@@ -1090,6 +1091,90 @@ static int ext3_setup_super(struct super - return res; - } - -+#if !defined(CONFIG_CRC16) && !defined(CONFIG_CRC16_MODULE) -+/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */ -+__u16 const crc16_table[256] = { -+ 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, -+ 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, -+ 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, -+ 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, -+ 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, -+ 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, -+ 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, -+ 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, -+ 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, -+ 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, -+ 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, -+ 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, -+ 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, -+ 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, -+ 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, -+ 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, -+ 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, -+ 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, -+ 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, -+ 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, -+ 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, -+ 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, -+ 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, -+ 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, -+ 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, -+ 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, -+ 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, -+ 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, -+ 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, -+ 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, -+ 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, -+ 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 -+}; -+ -+static inline __u16 crc16_byte(__u16 crc, const __u8 data) -+{ -+ return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; -+} -+ -+__u16 crc16(__u16 crc, __u8 const *buffer, size_t len) -+{ -+ while (len--) -+ crc = crc16_byte(crc, *buffer++); -+ return crc; -+} -+#endif -+ -+__le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 block_group, -+ struct ext3_group_desc *gdp) -+{ -+ __u16 crc = 0; -+ -+ if (sbi->s_es->s_feature_ro_compat & -+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { -+ int offset = offsetof(struct ext3_group_desc, bg_checksum); -+ __le32 le_group = cpu_to_le32(block_group); -+ -+ crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); -+ crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); -+ crc = crc16(crc, (__u8 *)gdp, offset); -+ offset += sizeof(gdp->bg_checksum); /* skip checksum */ -+ BUG_ON(offset != sizeof(*gdp)); /* XXX handle s_desc_size */ -+ /* for checksum of struct ext4_group_desc do the rest... -+ if (offset < sbi->s_es->s_desc_size) { -+ crc = crc16(crc, (__u8 *)gdp + offset, -+ sbi->s_es->s_desc_size - offset); -+ */ -+ } -+ -+ return cpu_to_le16(crc); -+} -+ -+int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 block_group, -+ struct ext3_group_desc *gdp) -+{ -+ if (gdp->bg_checksum != ext3_group_desc_csum(sbi, block_group, gdp)) -+ return 0; -+ -+ return 1; -+} -+ - /* Called at mount-time, super-block is locked */ - static int ext3_check_descriptors (struct super_block * sb) - { -@@ -1139,6 +1224,13 @@ static int ext3_check_descriptors (struc - le32_to_cpu(gdp->bg_inode_table)); - return 0; - } -+ if (!ext3_group_desc_csum_verify(sbi, i, gdp)) { -+ ext3_error(sb, __FUNCTION__, -+ "Checksum for group %d failed (%u!=%u)\n", i, -+ le16_to_cpu(ext3_group_desc_csum(sbi,i,gdp)), -+ le16_to_cpu(gdp->bg_checksum)); -+ return 0; -+ } - block += EXT3_BLOCKS_PER_GROUP(sb); - gdp++; - } -Index: linux-2.6.9-full/fs/ext3/group.h -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/group.h 2007-02-13 18:39:59.640066087 +0300 -+++ linux-2.6.9-full/fs/ext3/group.h 2007-03-28 18:36:16.000000000 +0400 -@@ -0,0 +1,29 @@ -+/* -+ * linux/fs/ext3/group.h -+ * -+ * Copyright (C) 2007 Cluster File Systems, Inc -+ * -+ * Author: Andreas Dilger <adilger@clusterfs.com> -+ */ -+ -+#ifndef _LINUX_EXT3_GROUP_H -+#define _LINUX_EXT3_GROUP_H -+#if defined(CONFIG_CRC16) || defined(CONFIG_CRC16_MODULE) -+#include <linux/crc16.h> -+#endif -+ -+extern __le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 group, -+ struct ext3_group_desc *gdp); -+extern int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 group, -+ struct ext3_group_desc *gdp); -+struct buffer_head *read_block_bitmap(struct super_block *sb, -+ unsigned int block_group); -+extern unsigned ext3_init_block_bitmap(struct super_block *sb, -+ struct buffer_head *bh, int group, -+ struct ext3_group_desc *desc); -+#define ext3_free_blocks_after_init(sb, group, desc) \ -+ ext3_init_block_bitmap(sb, NULL, group, desc) -+extern unsigned ext3_init_inode_bitmap(struct super_block *sb, -+ struct buffer_head *bh, int group, -+ struct ext3_group_desc *desc); -+#endif /* _LINUX_EXT3_GROUP_H */ -Index: linux-2.6.9-full/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/ialloc.c 2007-03-28 18:35:38.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/ialloc.c 2007-03-28 18:36:16.000000000 +0400 -@@ -28,6 +28,7 @@ - - #include "xattr.h" - #include "acl.h" -+#include "group.h" - - /* - * ialloc.c contains the inodes allocation and deallocation routines -@@ -43,6 +44,52 @@ - * the free blocks count in the block. - */ - -+/* -+ * To avoid calling the atomic setbit hundreds or thousands of times, we only -+ * need to use it within a single byte (to ensure we get endianness right). -+ * We can use memset for the rest of the bitmap as there are no other users. -+ */ -+static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) -+{ -+ int i; -+ -+ if (start_bit >= end_bit) -+ return; -+ -+ ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); -+ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) -+ ext3_set_bit(i, bitmap); -+ if (i < end_bit) -+ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); -+} -+ -+/* Initializes an uninitialized inode bitmap */ -+unsigned ext3_init_inode_bitmap(struct super_block *sb, -+ struct buffer_head *bh, int block_group, -+ struct ext3_group_desc *gdp) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ J_ASSERT_BH(bh, buffer_locked(bh)); -+ -+ /* If checksum is bad mark all blocks and inodes use to prevent -+ * allocation, essentially implementing a per-group read-only flag. */ -+ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { -+ ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n", -+ block_group); -+ gdp->bg_free_blocks_count = 0; -+ gdp->bg_free_inodes_count = 0; -+ gdp->bg_itable_unused = 0; -+ memset(bh->b_data, 0xff, sb->s_blocksize); -+ return 0; -+ } -+ -+ memset(bh->b_data, 0, (EXT3_INODES_PER_GROUP(sb) + 7) / 8); -+ mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), -+ bh->b_data); -+ -+ return EXT3_INODES_PER_GROUP(sb); -+} - - /* - * Read the inode allocation bitmap for a given block_group, reading -@@ -59,8 +106,19 @@ read_inode_bitmap(struct super_block * s - desc = ext3_get_group_desc(sb, block_group, NULL); - if (!desc) - goto error_out; -- -- bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { -+ bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ if (!buffer_uptodate(bh)) { -+ lock_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ ext3_init_inode_bitmap(sb, bh,block_group,desc); -+ set_buffer_uptodate(bh); -+ } -+ unlock_buffer(bh); -+ } -+ } else { -+ bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); -+ } - if (!bh) - ext3_error(sb, "read_inode_bitmap", - "Cannot read inode bitmap - " -@@ -169,6 +227,8 @@ void ext3_free_inode (handle_t *handle, - if (is_directory) - gdp->bg_used_dirs_count = cpu_to_le16( - le16_to_cpu(gdp->bg_used_dirs_count) - 1); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi,block_group, -+ gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_inc(&sbi->s_freeinodes_counter); - if (is_directory) -@@ -453,7 +513,7 @@ struct inode *ext3_new_inode(handle_t *h - struct ext3_sb_info *sbi; - int err = 0; - struct inode *ret; -- int i; -+ int i, free = 0; - - /* Cannot create files in a deleted directory */ - if (!dir || !dir->i_nlink) -@@ -566,11 +626,13 @@ repeat_in_this_group: - goto out; - - got: -- ino += group * EXT3_INODES_PER_GROUP(sb) + 1; -- if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { -- ext3_error (sb, "ext3_new_inode", -- "reserved inode or inode > inodes count - " -- "block_group = %d, inode=%lu", group, ino); -+ ino++; -+ if ((group == 0 && ino < EXT3_FIRST_INO(sb)) || -+ ino > EXT3_INODES_PER_GROUP(sb)) { -+ ext3_error(sb, __FUNCTION__, -+ "reserved inode or inode > inodes count - " -+ "block_group = %d, inode=%lu", group, -+ ino + group * EXT3_INODES_PER_GROUP(sb)); - err = -EIO; - goto fail; - } -@@ -578,13 +640,65 @@ got: - BUFFER_TRACE(bh2, "get_write_access"); - err = ext3_journal_get_write_access(handle, bh2); - if (err) goto fail; -+ -+ /* We may have to initialize the block bitmap if it isn't already */ -+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && -+ gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ struct buffer_head *block_bh = read_block_bitmap(sb, group); -+ -+ BUFFER_TRACE(block_bh, "get block bitmap access"); -+ err = ext3_journal_get_write_access(handle, block_bh); -+ if (err) { -+ brelse(block_bh); -+ goto fail; -+ } -+ -+ free = 0; -+ spin_lock(sb_bgl_lock(sbi, group)); -+ /* recheck and clear flag under lock if we still need to */ -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); -+ free = ext3_free_blocks_after_init(sb, group, gdp); -+ gdp->bg_free_blocks_count = cpu_to_le16(free); -+ } -+ spin_unlock(sb_bgl_lock(sbi, group)); -+ -+ /* Don't need to dirty bitmap block if we didn't change it */ -+ if (free) { -+ BUFFER_TRACE(block_bh, "dirty block bitmap"); -+ err = ext3_journal_dirty_metadata(handle, block_bh); -+ } -+ -+ brelse(block_bh); -+ if (err) -+ goto fail; -+ } -+ - spin_lock(sb_bgl_lock(sbi, group)); -+ /* If we didn't allocate from within the initialized part of the inode -+ * table then we need to initialize up to this inode. */ -+ if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_INODE_UNINIT); -+ free = EXT3_INODES_PER_GROUP(sb); -+ } else { -+ free = EXT3_INODES_PER_GROUP(sb) - -+ le16_to_cpu(gdp->bg_itable_unused); -+ } -+ -+ if (ino > free) { -+ gdp->bg_itable_unused = -+ cpu_to_le16(EXT3_INODES_PER_GROUP(sb) - ino); -+ } -+ } -+ - gdp->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); - if (S_ISDIR(mode)) { - gdp->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); - } -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, group, gdp); - spin_unlock(sb_bgl_lock(sbi, group)); - BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, bh2); -@@ -606,7 +720,7 @@ got: - inode->i_gid = current->fsgid; - inode->i_mode = mode; - -- inode->i_ino = ino; -+ inode->i_ino = ino + group * EXT3_INODES_PER_GROUP(sb); - /* This is the optimal IO size (for stat), not the fs block size */ - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; -Index: linux-2.6.9-full/fs/ext3/mballoc.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/mballoc.c 2007-03-28 15:42:45.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/mballoc.c 2007-03-28 18:36:16.000000000 +0400 -@@ -36,6 +36,8 @@ - #include <linux/seq_file.h> - #include <linux/version.h> - -+#include "group.h" -+ - /* - * MUSTDO: - * - test ext3_ext_search_left() and ext3_ext_search_right() -@@ -323,6 +325,7 @@ struct ext3_group_info { - unsigned long bb_state; - unsigned long bb_tid; - struct ext3_free_metadata *bb_md_cur; -+ struct ext3_group_desc *bb_gdp; - unsigned short bb_first_free; - unsigned short bb_free; - unsigned short bb_fragments; -@@ -928,10 +931,7 @@ static int ext3_mb_init_cache(struct pag - if (first_group + i >= EXT3_SB(sb)->s_groups_count) - break; - -- err = -EIO; -- desc = ext3_get_group_desc(sb, first_group + i, NULL); -- if (desc == NULL) -- goto out; -+ desc = EXT3_GROUP_INFO(sb, first_group + i)->bb_gdp; - - err = -ENOMEM; - bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -@@ -946,7 +946,12 @@ static int ext3_mb_init_cache(struct pag - unlock_buffer(bh[i]); - continue; - } -- -+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ ext3_init_block_bitmap(sb, bh[i], first_group + i,desc); -+ set_buffer_uptodate(bh[i]); -+ unlock_buffer(bh[i]); -+ continue; -+ } - get_bh(bh[i]); - bh[i]->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh[i]); -@@ -1703,6 +1708,10 @@ static int ext3_mb_good_group(struct ext - switch (cr) { - case 0: - BUG_ON(ac->ac_2order == 0); -+ /* If this group is uninitialized, skip it initially */ -+ if (grp->bb_gdp->bg_flags & -+ cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) -+ return 0; - bits = ac->ac_sb->s_blocksize_bits + 1; - for (i = ac->ac_2order; i <= bits; i++) - if (grp->bb_counters[i] > 0) -@@ -1796,7 +1805,9 @@ repeat: - } - - ac->ac_groups_scanned++; -- if (cr == 0) -+ if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags & -+ cpu_to_le16(EXT3_BG_BLOCK_UNINIT) && -+ ac->ac_2order != 0)) - ext3_mb_simple_scan_group(ac, &e3b); - else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe) - ext3_mb_scan_aligned(ac, &e3b); -@@ -2267,12 +2278,13 @@ int ext3_mb_init_backend(struct super_bl - i--; - goto err_freebuddy; - } -+ memset(meta_group_info[j], 0, len); - desc = ext3_get_group_desc(sb, i, NULL); -+ meta_group_info[j]->bb_gdp = desc; - if (desc == NULL) { - printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); - goto err_freebuddy; - } -- memset(meta_group_info[j], 0, len); - set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, - &meta_group_info[j]->bb_state); - -@@ -2936,9 +2948,17 @@ int ext3_mb_mark_diskspace_used(struct e - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - - spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(ext3_free_blocks_after_init(sb, -+ ac->ac_b_ex.fe_group, -+ gdp)); -+ } - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - - ac->ac_b_ex.fe_len); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); - spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len); - -@@ -4303,6 +4323,7 @@ do_more: - spin_lock(sb_bgl_lock(sbi, block_group)); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, count); - -Index: linux-2.6.9-full/fs/ext3/balloc.c -=================================================================== ---- linux-2.6.9-full.orig/fs/ext3/balloc.c 2007-03-28 15:45:41.000000000 +0400 -+++ linux-2.6.9-full/fs/ext3/balloc.c 2007-03-28 18:36:16.000000000 +0400 -@@ -20,6 +20,7 @@ - #include <linux/quotaops.h> - #include <linux/buffer_head.h> - -+#include "group.h" - /* - * balloc.c contains the blocks allocation and deallocation routines - */ -@@ -73,6 +74,75 @@ struct ext3_group_desc * ext3_get_group_ - return gdp + desc; - } - -+/* Initializes an uninitialized block bitmap if given, and returns the -+ * number of blocks free in the group. */ -+unsigned ext3_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, -+ int block_group, struct ext3_group_desc *gdp) -+{ -+ unsigned long start; -+ int bit, bit_max; -+ unsigned free_blocks; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ -+ if (bh) { -+ J_ASSERT_BH(bh, buffer_locked(bh)); -+ -+ /* If checksum is bad mark all blocks use to prevent allocation, -+ * essentially implementing a per-group read-only flag. */ -+ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { -+ ext3_error(sb, __FUNCTION__, -+ "Checksum bad for group %u\n", block_group); -+ gdp->bg_free_blocks_count = 0; -+ gdp->bg_free_inodes_count = 0; -+ gdp->bg_itable_unused = 0; -+ memset(bh->b_data, 0xff, sb->s_blocksize); -+ return 0; -+ } -+ memset(bh->b_data, 0, sb->s_blocksize); -+ } -+ -+ /* Check for superblock and gdt backups in this group */ -+ bit_max = ext3_bg_has_super(sb, block_group); -+ -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || -+ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * -+ sbi->s_desc_per_block) { -+ if (bit_max) { -+ bit_max += ext3_bg_num_gdb(sb, block_group); -+ bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); -+ } -+ } else { /* For META_BG_BLOCK_GROUPS */ -+ int group_rel = (block_group - -+ le32_to_cpu(sbi->s_es->s_first_meta_bg)) % -+ EXT3_DESC_PER_BLOCK(sb); -+ if (group_rel == 0 || group_rel == 1 || -+ (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1)) -+ bit_max += 1; -+ } -+ -+ /* Last and first groups are always initialized */ -+ free_blocks = EXT3_BLOCKS_PER_GROUP(sb) - bit_max; -+ -+ if (bh) { -+ for (bit = 0; bit < bit_max; bit++) -+ ext3_set_bit(bit, bh->b_data); -+ -+ start = block_group * EXT3_BLOCKS_PER_GROUP(sb) + -+ le32_to_cpu(sbi->s_es->s_first_data_block); -+ -+ /* Set bits for block and inode bitmaps, and inode table */ -+ ext3_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - start, -+ bh->b_data); -+ ext3_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - start, -+ bh->b_data); -+ for (bit = le32_to_cpu(gdp->bg_inode_table) - start, -+ bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++) -+ ext3_set_bit(bit, bh->b_data); -+ } -+ -+ return free_blocks - sbi->s_itb_per_group - 2; -+} -+ - /* - * Read the bitmap for a given block_group, reading into the specified - * slot in the superblock's bitmap cache. -@@ -88,7 +158,19 @@ read_block_bitmap(struct super_block *sb - desc = ext3_get_group_desc (sb, block_group, NULL); - if (!desc) - goto error_out; -- bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { -+ bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ if (!buffer_uptodate(bh)) { -+ lock_buffer(bh); -+ if (!buffer_uptodate(bh)) { -+ ext3_init_block_bitmap(sb, bh,block_group,desc); -+ set_buffer_uptodate(bh); -+ } -+ unlock_buffer(bh); -+ } -+ } else { -+ bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); -+ } - if (!bh) - ext3_error (sb, "read_block_bitmap", - "Cannot read block bitmap - " -@@ -429,6 +511,7 @@ do_more: - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + - *pdquot_freed_blocks); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_mod(&sbi->s_freeblocks_counter, count); - -@@ -1330,8 +1413,11 @@ allocated: - ret_block, goal_hits, goal_attempts); - - spin_lock(sb_bgl_lock(sbi, group_no)); -+ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) -+ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); - gdp->bg_free_blocks_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1); -+ gdp->bg_checksum = ext3_group_desc_csum(sbi, group_no, gdp); - spin_unlock(sb_bgl_lock(sbi, group_no)); - percpu_counter_mod(&sbi->s_freeblocks_counter, -1); - diff --git a/lustre/kernel_patches/patches/ext3-wantedi-2.6.15.patch b/lustre/kernel_patches/patches/ext3-wantedi-2.6.15.patch deleted file mode 100644 index 9ed150ea6a..0000000000 --- a/lustre/kernel_patches/patches/ext3-wantedi-2.6.15.patch +++ /dev/null @@ -1,174 +0,0 @@ - fs/ext3/ialloc.c | 35 ++++++++++++++++++++++++++++++++++- - fs/ext3/ioctl.c | 25 +++++++++++++++++++++++++ - fs/ext3/namei.c | 21 +++++++++++++++++---- - include/linux/dcache.h | 5 +++++ - include/linux/ext3_fs.h | 5 ++++- - 5 files changed, 85 insertions(+), 6 deletions(-) - -Index: linux-2.6.15/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.15.orig/fs/ext3/ialloc.c 2006-02-19 15:23:12.000000000 +0300 -+++ linux-2.6.15/fs/ext3/ialloc.c 2006-02-21 00:26:52.000000000 +0300 -@@ -420,7 +420,8 @@ static int find_group_other(struct super - * For other inodes, search forward from the parent directory's block - * group to find a free inode. - */ --struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) -+struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode, -+ unsigned long goal) - { - struct super_block *sb; - struct buffer_head *bitmap_bh = NULL; -@@ -448,6 +449,38 @@ struct inode *ext3_new_inode(handle_t *h - - sbi = EXT3_SB(sb); - es = sbi->s_es; -+ if (goal) { -+ group = (goal - 1) / EXT3_INODES_PER_GROUP(sb); -+ ino = (goal - 1) % EXT3_INODES_PER_GROUP(sb); -+ gdp = ext3_get_group_desc(sb, group, &bh2); -+ -+ err = -EIO; -+ bitmap_bh = read_inode_bitmap (sb, group); -+ if (!bitmap_bh) -+ goto fail; -+ -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bitmap_bh); -+ if (err) goto fail; -+ -+ if (ext3_set_bit_atomic(sb_bgl_lock(sbi, group), -+ ino, bitmap_bh->b_data)) { -+ printk(KERN_ERR "goal inode %lu unavailable\n", goal); -+ /* Oh well, we tried. */ -+ goto continue_allocation; -+ } -+ -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ if (err) goto fail; -+ -+ /* We've shortcircuited the allocation system successfully, -+ * now finish filling in the inode. -+ */ -+ goto got; -+ } -+ -+continue_allocation: - if (S_ISDIR(mode)) { - if (test_opt (sb, OLDALLOC)) - group = find_group_dir(sb, dir); -Index: linux-2.6.15/fs/ext3/ioctl.c -=================================================================== ---- linux-2.6.15.orig/fs/ext3/ioctl.c 2005-11-11 08:33:12.000000000 +0300 -+++ linux-2.6.15/fs/ext3/ioctl.c 2006-02-21 00:26:52.000000000 +0300 -@@ -25,6 +25,31 @@ int ext3_ioctl (struct inode * inode, st - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { -+ case EXT3_IOC_CREATE_INUM: { -+ char name[32]; -+ struct dentry *dchild, *dparent; -+ int rc = 0; -+ -+ dparent = list_entry(inode->i_dentry.next, struct dentry, -+ d_alias); -+ snprintf(name, sizeof name, "%lu", arg); -+ dchild = lookup_one_len(name, dparent, strlen(name)); -+ if (dchild->d_inode) { -+ printk(KERN_ERR "%*s/%lu already exists (ino %lu)\n", -+ dparent->d_name.len, dparent->d_name.name, arg, -+ dchild->d_inode->i_ino); -+ rc = -EEXIST; -+ } else { -+ dchild->d_fsdata = (void *)arg; -+ rc = vfs_create(inode, dchild, 0644, NULL); -+ if (rc) -+ printk(KERN_ERR "vfs_create: %d\n", rc); -+ else if (dchild->d_inode->i_ino != arg) -+ rc = -EEXIST; -+ } -+ dput(dchild); -+ return rc; -+ } - case EXT3_IOC_GETFLAGS: - flags = ei->i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int __user *) arg); -Index: linux-2.6.15/fs/ext3/namei.c -=================================================================== ---- linux-2.6.15.orig/fs/ext3/namei.c 2006-02-19 15:23:12.000000000 +0300 -+++ linux-2.6.15/fs/ext3/namei.c 2006-02-21 00:28:17.000000000 +0300 -@@ -1631,6 +1631,16 @@ static int ext3_add_nondir(handle_t *han - return err; - } - -+static struct inode * ext3_new_inode_wantedi(handle_t *handle, struct inode *dir, -+ int mode, struct dentry *dentry) -+{ -+ unsigned long inum = 0; -+ -+ if (dentry->d_fsdata != NULL) -+ inum = (unsigned long) dentry->d_fsdata; -+ return ext3_new_inode(handle, dir, mode, inum); -+} -+ - /* - * By the time this is called, we already have created - * the directory cache entry for the new file, but it -@@ -1656,7 +1666,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext3_file_inode_operations; -@@ -1690,7 +1700,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - init_special_inode(inode, inode->i_mode, rdev); -@@ -1726,7 +1736,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR | mode); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFDIR | mode, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -2131,7 +2141,7 @@ retry: - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); -+ inode = ext3_new_inode_wantedi (handle, dir, S_IFLNK|S_IRWXUGO, dentry); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -Index: linux-2.6.15/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.15.orig/include/linux/ext3_fs.h 2005-11-11 08:33:12.000000000 +0300 -+++ linux-2.6.15/include/linux/ext3_fs.h 2006-02-21 00:26:52.000000000 +0300 -@@ -762,7 +762,8 @@ extern int ext3fs_dirhash(const char *na - dx_hash_info *hinfo); - - /* ialloc.c */ --extern struct inode * ext3_new_inode (handle_t *, struct inode *, int); -+extern struct inode * ext3_new_inode (handle_t *, struct inode *, int, -+ unsigned long); - extern void ext3_free_inode (handle_t *, struct inode *); - extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); - extern unsigned long ext3_count_free_inodes (struct super_block *); -@@ -844,4 +845,6 @@ extern struct inode_operations ext3_fast - - #endif /* __KERNEL__ */ - -+/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ -+#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) - #endif /* _LINUX_EXT3_FS_H */ diff --git a/lustre/kernel_patches/patches/grab_cache_page_nowait_gfp-2.6-suse.patch b/lustre/kernel_patches/patches/grab_cache_page_nowait_gfp-2.6-suse.patch deleted file mode 100644 index 6f781b46e7..0000000000 --- a/lustre/kernel_patches/patches/grab_cache_page_nowait_gfp-2.6-suse.patch +++ /dev/null @@ -1,57 +0,0 @@ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/mm/filemap.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/mm/filemap.c 2004-11-11 10:28:45.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/mm/filemap.c 2005-02-01 01:36:08.000000000 -0500 -@@ -773,8 +773,19 @@ - struct page * - grab_cache_page_nowait(struct address_space *mapping, unsigned long index) - { -+ return grab_cache_page_nowait_gfp(mapping, index, -+ mapping_gfp_mask(mapping) & -+ ~__GFP_FS); -+} -+ -+EXPORT_SYMBOL(grab_cache_page_nowait); -+ -+struct page * -+grab_cache_page_nowait_gfp(struct address_space *mapping, -+ unsigned long index, -+ unsigned int gfp_mask) -+{ - struct page *page = find_get_page(mapping, index); -- int gfp_mask; - - if (page) { - if (!TestSetPageLocked(page)) -@@ -782,7 +793,7 @@ - page_cache_release(page); - return NULL; - } -- gfp_mask = mapping_gfp_mask(mapping) & ~__GFP_FS; -+ - page = alloc_pages(gfp_mask, 0); - if (page && add_to_page_cache_lru(page, mapping, index, gfp_mask)) { - page_cache_release(page); -@@ -791,7 +802,7 @@ - return page; - } - --EXPORT_SYMBOL(grab_cache_page_nowait); -+EXPORT_SYMBOL(grab_cache_page_nowait_gfp); - - /* - * This is a generic file read routine, and uses the -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/pagemap.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/linux/pagemap.h 2004-11-11 10:28:43.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/pagemap.h 2005-02-01 01:29:06.000000000 -0500 -@@ -92,6 +92,9 @@ - - extern struct page * grab_cache_page_nowait(struct address_space *mapping, - unsigned long index); -+extern struct page * grab_cache_page_nowait_gfp(struct address_space *mapping, -+ unsigned long index, -+ unsigned int gfp_mask); - extern struct page * read_cache_page(struct address_space *mapping, - unsigned long index, filler_t *filler, - void *data); diff --git a/lustre/kernel_patches/patches/jbd-stats-2.6.13.4.patch b/lustre/kernel_patches/patches/jbd-stats-2.6.13.4.patch deleted file mode 100644 index 4db8dd3879..0000000000 --- a/lustre/kernel_patches/patches/jbd-stats-2.6.13.4.patch +++ /dev/null @@ -1,735 +0,0 @@ -Index: linux-2.6.13.4/include/linux/jbd.h -=================================================================== ---- linux-2.6.13.4.orig/include/linux/jbd.h 2005-10-10 22:54:29.000000000 +0400 -+++ linux-2.6.13.4/include/linux/jbd.h 2005-11-20 01:35:08.000000000 +0300 -@@ -394,6 +394,16 @@ - }; - - -+/* -+ * Some stats for checkpoint phase -+ */ -+struct transaction_chp_stats_s { -+ unsigned long cs_chp_time; -+ unsigned long cs_forced_to_close; -+ unsigned long cs_written; -+ unsigned long cs_dropped; -+}; -+ - /* The transaction_t type is the guts of the journaling mechanism. It - * tracks a compound transaction through its various states: - * -@@ -523,6 +533,21 @@ - spinlock_t t_handle_lock; - - /* -+ * Longest time some handle had to wait for running transaction -+ */ -+ unsigned long t_max_wait; -+ -+ /* -+ * When transaction started -+ */ -+ unsigned long t_start; -+ -+ /* -+ * Checkpointing stats [j_checkpoint_sem] -+ */ -+ struct transaction_chp_stats_s t_chp_stats; -+ -+ /* - * Number of outstanding updates running on this transaction - * [t_handle_lock] - */ -@@ -553,6 +578,57 @@ - - }; - -+struct transaction_run_stats_s { -+ unsigned long rs_wait; -+ unsigned long rs_running; -+ unsigned long rs_locked; -+ unsigned long rs_flushing; -+ unsigned long rs_logging; -+ -+ unsigned long rs_handle_count; -+ unsigned long rs_blocks; -+ unsigned long rs_blocks_logged; -+}; -+ -+struct transaction_stats_s -+{ -+ int ts_type; -+ unsigned long ts_tid; -+ union { -+ struct transaction_run_stats_s run; -+ struct transaction_chp_stats_s chp; -+ } u; -+}; -+ -+#define JBD_STATS_RUN 1 -+#define JBD_STATS_CHECKPOINT 2 -+ -+#define ts_wait u.run.rs_wait -+#define ts_running u.run.rs_running -+#define ts_locked u.run.rs_locked -+#define ts_flushing u.run.rs_flushing -+#define ts_logging u.run.rs_logging -+#define ts_handle_count u.run.rs_handle_count -+#define ts_blocks u.run.rs_blocks -+#define ts_blocks_logged u.run.rs_blocks_logged -+ -+#define ts_chp_time u.chp.cs_chp_time -+#define ts_forced_to_close u.chp.cs_forced_to_close -+#define ts_written u.chp.cs_written -+#define ts_dropped u.chp.cs_dropped -+ -+#define CURRENT_MSECS (jiffies_to_msecs(jiffies)) -+ -+static inline unsigned int -+jbd_time_diff(unsigned int start, unsigned int end) -+{ -+ if (unlikely(start > end)) -+ end = end + (~0UL - start); -+ else -+ end -= start; -+ return end; -+} -+ - /** - * struct journal_s - The journal_s type is the concrete type associated with - * journal_t. -@@ -800,6 +876,16 @@ - int j_wbufsize; - - /* -+ * -+ */ -+ struct transaction_stats_s *j_history; -+ int j_history_max; -+ int j_history_cur; -+ spinlock_t j_history_lock; -+ struct proc_dir_entry *j_proc_entry; -+ struct transaction_stats_s j_stats; -+ -+ /* - * An opaque pointer to fs-private information. ext3 puts its - * superblock pointer here - */ -Index: linux-2.6.13.4/fs/jbd/transaction.c -=================================================================== ---- linux-2.6.13.4.orig/fs/jbd/transaction.c 2005-10-10 22:54:29.000000000 +0400 -+++ linux-2.6.13.4/fs/jbd/transaction.c 2005-11-20 01:31:23.000000000 +0300 -@@ -58,6 +58,8 @@ - - J_ASSERT(journal->j_running_transaction == NULL); - journal->j_running_transaction = transaction; -+ transaction->t_max_wait = 0; -+ transaction->t_start = CURRENT_MSECS; - - return transaction; - } -@@ -84,6 +86,7 @@ - int nblocks = handle->h_buffer_credits; - transaction_t *new_transaction = NULL; - int ret = 0; -+ unsigned long ts = CURRENT_MSECS; - - if (nblocks > journal->j_max_transaction_buffers) { - printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", -@@ -217,6 +220,12 @@ - /* OK, account for the buffers that this operation expects to - * use and add the handle to the running transaction. */ - -+ if (time_after(transaction->t_start, ts)) { -+ ts = jbd_time_diff(ts, transaction->t_start); -+ if (ts > transaction->t_max_wait) -+ transaction->t_max_wait= ts; -+ } -+ - handle->h_transaction = transaction; - transaction->t_outstanding_credits += nblocks; - transaction->t_updates++; -Index: linux-2.6.13.4/fs/jbd/journal.c -=================================================================== ---- linux-2.6.13.4.orig/fs/jbd/journal.c 2005-10-10 22:54:29.000000000 +0400 -+++ linux-2.6.13.4/fs/jbd/journal.c 2005-11-20 02:07:44.000000000 +0300 -@@ -36,6 +36,7 @@ - #include <asm/uaccess.h> - #include <asm/page.h> - #include <linux/proc_fs.h> -+#include <linux/seq_file.h> - - EXPORT_SYMBOL(journal_start); - EXPORT_SYMBOL(journal_restart); -@@ -646,6 +647,300 @@ - return journal_add_journal_head(bh); - } - -+struct jbd_stats_proc_session { -+ journal_t *journal; -+ struct transaction_stats_s *stats; -+ int start; -+ int max; -+}; -+ -+static void *jbd_history_skip_empty(struct jbd_stats_proc_session *s, -+ struct transaction_stats_s *ts, -+ int first) -+{ -+ if (ts == s->stats + s->max) -+ ts = s->stats; -+ if (!first && ts == s->stats + s->start) -+ return NULL; -+ while (ts->ts_type == 0) { -+ ts++; -+ if (ts == s->stats + s->max) -+ ts = s->stats; -+ if (ts == s->stats + s->start) -+ return NULL; -+ } -+ return ts; -+ -+} -+ -+static void *jbd_seq_history_start(struct seq_file *seq, loff_t *pos) -+{ -+ struct jbd_stats_proc_session *s = seq->private; -+ struct transaction_stats_s *ts; -+ int l = *pos; -+ -+ if (l == 0) -+ return SEQ_START_TOKEN; -+ ts = jbd_history_skip_empty(s, s->stats + s->start, 1); -+ if (!ts) -+ return NULL; -+ while (--l && (ts = jbd_history_skip_empty(s, ++ts, 0)) != NULL); -+ return ts; -+} -+ -+static void *jbd_seq_history_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ struct jbd_stats_proc_session *s = seq->private; -+ struct transaction_stats_s *ts = v; -+ -+ ++*pos; -+ if (v == SEQ_START_TOKEN) -+ return jbd_history_skip_empty(s, s->stats + s->start, 1); -+ else -+ return jbd_history_skip_empty(s, ++ts, 0); -+} -+ -+static int jbd_seq_history_show(struct seq_file *seq, void *v) -+{ -+ struct transaction_stats_s *ts = v; -+ if (v == SEQ_START_TOKEN) { -+ seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s " -+ "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid", -+ "wait", "run", "lock", "flush", "log", "hndls", -+ "block", "inlog", "ctime", "write", "drop", -+ "close"); -+ return 0; -+ } -+ if (ts->ts_type == JBD_STATS_RUN) -+ seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu " -+ "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid, -+ ts->ts_wait, ts->ts_running, ts->ts_locked, -+ ts->ts_flushing, ts->ts_logging, -+ ts->ts_handle_count, ts->ts_blocks, -+ ts->ts_blocks_logged); -+ else if (ts->ts_type == JBD_STATS_CHECKPOINT) -+ seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n", -+ "C", ts->ts_tid, " ", ts->ts_chp_time, -+ ts->ts_written, ts->ts_dropped, -+ ts->ts_forced_to_close); -+ else -+ J_ASSERT(0); -+ return 0; -+} -+ -+static void jbd_seq_history_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations jbd_seq_history_ops = { -+ .start = jbd_seq_history_start, -+ .next = jbd_seq_history_next, -+ .stop = jbd_seq_history_stop, -+ .show = jbd_seq_history_show, -+}; -+ -+static int jbd_seq_history_open(struct inode *inode, struct file *file) -+{ -+ journal_t *journal = PDE(inode)->data; -+ struct jbd_stats_proc_session *s; -+ int rc, size; -+ -+ s = kmalloc(sizeof(*s), GFP_KERNEL); -+ if (s == NULL) -+ return -EIO; -+ size = sizeof(struct transaction_stats_s) * journal->j_history_max; -+ s->stats = kmalloc(size, GFP_KERNEL); -+ if (s == NULL) { -+ kfree(s); -+ return -EIO; -+ } -+ spin_lock(&journal->j_history_lock); -+ memcpy(s->stats, journal->j_history, size); -+ s->max = journal->j_history_max; -+ s->start = journal->j_history_cur % s->max; -+ spin_unlock(&journal->j_history_lock); -+ -+ rc = seq_open(file, &jbd_seq_history_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = s; -+ } else { -+ kfree(s->stats); -+ kfree(s); -+ } -+ return rc; -+ -+} -+ -+static int jbd_seq_history_release(struct inode *inode, struct file *file) -+{ -+ struct seq_file *seq = (struct seq_file *)file->private_data; -+ struct jbd_stats_proc_session *s = seq->private; -+ kfree(s->stats); -+ kfree(s); -+ return seq_release(inode, file); -+} -+ -+static struct file_operations jbd_seq_history_fops = { -+ .owner = THIS_MODULE, -+ .open = jbd_seq_history_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = jbd_seq_history_release, -+}; -+ -+static void *jbd_seq_info_start(struct seq_file *seq, loff_t *pos) -+{ -+ return *pos ? NULL : SEQ_START_TOKEN; -+} -+ -+static void *jbd_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ return NULL; -+} -+ -+static int jbd_seq_info_show(struct seq_file *seq, void *v) -+{ -+ struct jbd_stats_proc_session *s = seq->private; -+ if (v != SEQ_START_TOKEN) -+ return 0; -+ seq_printf(seq, "%lu transaction, each upto %u blocks\n", -+ s->stats->ts_tid, -+ s->journal->j_max_transaction_buffers); -+ if (s->stats->ts_tid == 0) -+ return 0; -+ seq_printf(seq, "average: \n %lums waiting for transaction\n", -+ s->stats->ts_wait / s->stats->ts_tid); -+ seq_printf(seq, " %lums running transaction\n", -+ s->stats->ts_running / s->stats->ts_tid); -+ seq_printf(seq, " %lums transaction was being locked\n", -+ s->stats->ts_locked / s->stats->ts_tid); -+ seq_printf(seq, " %lums flushing data (in ordered mode)\n", -+ s->stats->ts_flushing / s->stats->ts_tid); -+ seq_printf(seq, " %lums logging transaction\n", -+ s->stats->ts_logging / s->stats->ts_tid); -+ seq_printf(seq, " %lu handles per transaction\n", -+ s->stats->ts_handle_count / s->stats->ts_tid); -+ seq_printf(seq, " %lu blocks per transaction\n", -+ s->stats->ts_blocks / s->stats->ts_tid); -+ seq_printf(seq, " %lu logged blocks per transaction\n", -+ s->stats->ts_blocks_logged / s->stats->ts_tid); -+ return 0; -+} -+ -+static void jbd_seq_info_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static struct seq_operations jbd_seq_info_ops = { -+ .start = jbd_seq_info_start, -+ .next = jbd_seq_info_next, -+ .stop = jbd_seq_info_stop, -+ .show = jbd_seq_info_show, -+}; -+ -+static int jbd_seq_info_open(struct inode *inode, struct file *file) -+{ -+ journal_t *journal = PDE(inode)->data; -+ struct jbd_stats_proc_session *s; -+ int rc, size; -+ -+ s = kmalloc(sizeof(*s), GFP_KERNEL); -+ if (s == NULL) -+ return -EIO; -+ size = sizeof(struct transaction_stats_s); -+ s->stats = kmalloc(size, GFP_KERNEL); -+ if (s == NULL) { -+ kfree(s); -+ return -EIO; -+ } -+ spin_lock(&journal->j_history_lock); -+ memcpy(s->stats, &journal->j_stats, size); -+ s->journal = journal; -+ spin_unlock(&journal->j_history_lock); -+ -+ rc = seq_open(file, &jbd_seq_info_ops); -+ if (rc == 0) { -+ struct seq_file *m = (struct seq_file *)file->private_data; -+ m->private = s; -+ } else { -+ kfree(s->stats); -+ kfree(s); -+ } -+ return rc; -+ -+} -+ -+static int jbd_seq_info_release(struct inode *inode, struct file *file) -+{ -+ struct seq_file *seq = (struct seq_file *)file->private_data; -+ struct jbd_stats_proc_session *s = seq->private; -+ kfree(s->stats); -+ kfree(s); -+ return seq_release(inode, file); -+} -+ -+static struct file_operations jbd_seq_info_fops = { -+ .owner = THIS_MODULE, -+ .open = jbd_seq_info_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = jbd_seq_info_release, -+}; -+ -+static struct proc_dir_entry *proc_jbd_stats = NULL; -+ -+static void jbd_stats_proc_init(journal_t *journal) -+{ -+ char name[64]; -+ -+ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); -+ journal->j_proc_entry = proc_mkdir(name, proc_jbd_stats); -+ if (journal->j_proc_entry) { -+ struct proc_dir_entry *p; -+ p = create_proc_entry("history", S_IRUGO, -+ journal->j_proc_entry); -+ if (p) { -+ p->proc_fops = &jbd_seq_history_fops; -+ p->data = journal; -+ p = create_proc_entry("info", S_IRUGO, -+ journal->j_proc_entry); -+ if (p) { -+ p->proc_fops = &jbd_seq_info_fops; -+ p->data = journal; -+ } -+ } -+ } -+} -+ -+static void jbd_stats_proc_exit(journal_t *journal) -+{ -+ char name[64]; -+ -+ snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); -+ remove_proc_entry("info", journal->j_proc_entry); -+ remove_proc_entry("history", journal->j_proc_entry); -+ remove_proc_entry(name, proc_jbd_stats); -+} -+ -+static void journal_init_stats(journal_t *journal) -+{ -+ int size; -+ -+ if (proc_jbd_stats == NULL) -+ return; -+ -+ journal->j_history_max = 100; -+ size = sizeof(struct transaction_stats_s) * journal->j_history_max; -+ journal->j_history = kmalloc(size, GFP_KERNEL); -+ if (journal->j_history == NULL) { -+ journal->j_history_max = 0; -+ return; -+ } -+ memset(journal->j_history, 0, size); -+ spin_lock_init(&journal->j_history_lock); -+} -+ - /* - * Management for journal control blocks: functions to create and - * destroy journal_t structures, and to initialise and read existing -@@ -688,6 +983,9 @@ - kfree(journal); - goto fail; - } -+ -+ journal_init_stats(journal); -+ - return journal; - fail: - return NULL; -@@ -731,6 +1029,7 @@ - journal->j_blk_offset = start; - journal->j_maxlen = len; - journal->j_blocksize = blocksize; -+ jbd_stats_proc_init(journal); - - bh = __getblk(journal->j_dev, start, journal->j_blocksize); - J_ASSERT(bh != NULL); -@@ -780,6 +1079,7 @@ - - journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; - journal->j_blocksize = inode->i_sb->s_blocksize; -+ jbd_stats_proc_init(journal); - - /* journal descriptor can store up to n blocks -bzzz */ - n = journal->j_blocksize / sizeof(journal_block_tag_t); -@@ -1161,6 +1461,8 @@ - brelse(journal->j_sb_buffer); - } - -+ if (journal->j_proc_entry) -+ jbd_stats_proc_exit(journal); - if (journal->j_inode) - iput(journal->j_inode); - if (journal->j_revoke) -@@ -1929,6 +2231,28 @@ - - #endif - -+#if defined(CONFIG_PROC_FS) -+ -+#define JBD_STATS_PROC_NAME "fs/jbd" -+ -+static void __init create_jbd_stats_proc_entry(void) -+{ -+ proc_jbd_stats = proc_mkdir(JBD_STATS_PROC_NAME, NULL); -+} -+ -+static void __exit remove_jbd_stats_proc_entry(void) -+{ -+ if (proc_jbd_stats) -+ remove_proc_entry(JBD_STATS_PROC_NAME, NULL); -+} -+ -+#else -+ -+#define create_jbd_stats_proc_entry() do {} while (0) -+#define remove_jbd_stats_proc_entry() do {} while (0) -+ -+#endif -+ - kmem_cache_t *jbd_handle_cache; - - static int __init journal_init_handle_cache(void) -@@ -1983,6 +2307,7 @@ - if (ret != 0) - journal_destroy_caches(); - create_jbd_proc_entry(); -+ create_jbd_stats_proc_entry(); - return ret; - } - -@@ -1994,6 +2319,7 @@ - printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); - #endif - remove_jbd_proc_entry(); -+ remove_jbd_stats_proc_entry(); - journal_destroy_caches(); - } - -Index: linux-2.6.13.4/fs/jbd/checkpoint.c -=================================================================== ---- linux-2.6.13.4.orig/fs/jbd/checkpoint.c 2005-11-19 22:46:03.000000000 +0300 -+++ linux-2.6.13.4/fs/jbd/checkpoint.c 2005-11-20 02:24:09.000000000 +0300 -@@ -166,6 +166,7 @@ - transaction_t *t = jh->b_transaction; - tid_t tid = t->t_tid; - -+ transaction->t_chp_stats.cs_forced_to_close++; - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - log_start_commit(journal, tid); -@@ -226,7 +227,7 @@ - */ - static int __flush_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count, -- int *drop_count) -+ int *drop_count, transaction_t *transaction) - { - struct buffer_head *bh = jh2bh(jh); - int ret = 0; -@@ -247,6 +248,7 @@ - set_buffer_jwrite(bh); - bhs[*batch_count] = bh; - jbd_unlock_bh_state(bh); -+ transaction->t_chp_stats.cs_written++; - (*batch_count)++; - if (*batch_count == NR_BATCH) { - __flush_batch(journal, bhs, batch_count); -@@ -315,6 +317,8 @@ - tid_t this_tid; - - transaction = journal->j_checkpoint_transactions; -+ if (transaction->t_chp_stats.cs_chp_time == 0) -+ transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS; - this_tid = transaction->t_tid; - jh = transaction->t_checkpoint_list; - last_jh = jh->b_cpprev; -@@ -331,7 +335,8 @@ - retry = 1; - break; - } -- retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); -+ retry = __flush_buffer(journal, jh, bhs, &batch_count, -+ &drop_count, transaction); - if (cond_resched_lock(&journal->j_list_lock)) { - retry = 1; - break; -@@ -609,6 +614,8 @@ - - void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) - { -+ struct transaction_stats_s stats; -+ - assert_spin_locked(&journal->j_list_lock); - if (transaction->t_cpnext) { - transaction->t_cpnext->t_cpprev = transaction->t_cpprev; -@@ -633,5 +640,25 @@ - J_ASSERT(journal->j_running_transaction != transaction); - - jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); -+ -+ /* -+ * File the transaction for history -+ */ -+ if (transaction->t_chp_stats.cs_written != 0 || -+ transaction->t_chp_stats.cs_chp_time != 0) { -+ stats.ts_type = JBD_STATS_CHECKPOINT; -+ stats.ts_tid = transaction->t_tid; -+ stats.u.chp = transaction->t_chp_stats; -+ if (stats.ts_chp_time) -+ stats.ts_chp_time = -+ jbd_time_diff(stats.ts_chp_time, CURRENT_MSECS); -+ spin_lock(&journal->j_history_lock); -+ memcpy(journal->j_history + journal->j_history_cur, &stats, -+ sizeof(stats)); -+ if (++journal->j_history_cur == journal->j_history_max) -+ journal->j_history_cur = 0; -+ spin_unlock(&journal->j_history_lock); -+ } -+ - kfree(transaction); - } -Index: linux-2.6.13.4/fs/jbd/commit.c -=================================================================== ---- linux-2.6.13.4.orig/fs/jbd/commit.c 2005-10-10 22:54:29.000000000 +0400 -+++ linux-2.6.13.4/fs/jbd/commit.c 2005-11-20 00:54:10.000000000 +0300 -@@ -21,6 +21,7 @@ - #include <linux/mm.h> - #include <linux/pagemap.h> - #include <linux/smp_lock.h> -+#include <linux/jiffies.h> - - /* - * Default IO end handler for temporary BJ_IO buffer_heads. -@@ -168,6 +169,7 @@ - */ - void journal_commit_transaction(journal_t *journal) - { -+ struct transaction_stats_s stats; - transaction_t *commit_transaction; - struct journal_head *jh, *new_jh, *descriptor; - struct buffer_head **wbuf = journal->j_wbuf; -@@ -214,6 +216,11 @@ - spin_lock(&journal->j_state_lock); - commit_transaction->t_state = T_LOCKED; - -+ stats.ts_wait = commit_transaction->t_max_wait; -+ stats.ts_locked = CURRENT_MSECS; -+ stats.ts_running = jbd_time_diff(commit_transaction->t_start, -+ stats.ts_locked); -+ - spin_lock(&commit_transaction->t_handle_lock); - while (commit_transaction->t_updates) { - DEFINE_WAIT(wait); -@@ -286,6 +293,9 @@ - */ - journal_switch_revoke_table(journal); - -+ stats.ts_flushing = CURRENT_MSECS; -+ stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing); -+ - commit_transaction->t_state = T_FLUSH; - journal->j_committing_transaction = commit_transaction; - journal->j_running_transaction = NULL; -@@ -444,6 +454,11 @@ - */ - commit_transaction->t_state = T_COMMIT; - -+ stats.ts_logging = CURRENT_MSECS; -+ stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging); -+ stats.ts_blocks = commit_transaction->t_outstanding_credits; -+ stats.ts_blocks_logged = 0; -+ - descriptor = NULL; - bufs = 0; - while (commit_transaction->t_buffers) { -@@ -592,6 +607,7 @@ - submit_bh(WRITE, bh); - } - cond_resched(); -+ stats.ts_blocks_logged += bufs; - - /* Force a new descriptor to be generated next - time round the loop. */ -@@ -756,6 +772,7 @@ - cp_transaction = jh->b_cp_transaction; - if (cp_transaction) { - JBUFFER_TRACE(jh, "remove from old cp transaction"); -+ cp_transaction->t_chp_stats.cs_dropped++; - __journal_remove_checkpoint(jh); - } - -@@ -803,6 +820,36 @@ - - J_ASSERT(commit_transaction->t_state == T_COMMIT); - -+ commit_transaction->t_start = CURRENT_MSECS; -+ stats.ts_logging = jbd_time_diff(stats.ts_logging, -+ commit_transaction->t_start); -+ -+ /* -+ * File the transaction for history -+ */ -+ stats.ts_type = JBD_STATS_RUN; -+ stats.ts_tid = commit_transaction->t_tid; -+ stats.ts_handle_count = commit_transaction->t_handle_count; -+ spin_lock(&journal->j_history_lock); -+ memcpy(journal->j_history + journal->j_history_cur, &stats, -+ sizeof(stats)); -+ if (++journal->j_history_cur == journal->j_history_max) -+ journal->j_history_cur = 0; -+ -+ /* -+ * Calculate overall stats -+ */ -+ journal->j_stats.ts_tid++; -+ journal->j_stats.ts_wait += stats.ts_wait; -+ journal->j_stats.ts_running += stats.ts_running; -+ journal->j_stats.ts_locked += stats.ts_locked; -+ journal->j_stats.ts_flushing += stats.ts_flushing; -+ journal->j_stats.ts_logging += stats.ts_logging; -+ journal->j_stats.ts_handle_count += stats.ts_handle_count; -+ journal->j_stats.ts_blocks += stats.ts_blocks; -+ journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged; -+ spin_unlock(&journal->j_history_lock); -+ - /* - * This is a bit sleazy. We borrow j_list_lock to protect - * journal->j_committing_transaction in __journal_remove_checkpoint. diff --git a/lustre/kernel_patches/patches/kexec-2.6-suse-lnxi.patch b/lustre/kernel_patches/patches/kexec-2.6-suse-lnxi.patch deleted file mode 100644 index a84f43dbd5..0000000000 --- a/lustre/kernel_patches/patches/kexec-2.6-suse-lnxi.patch +++ /dev/null @@ -1,1603 +0,0 @@ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/MAINTAINERS -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/MAINTAINERS 2004-11-18 20:59:11.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/MAINTAINERS 2004-11-18 23:25:15.000000000 -0500 -@@ -1199,6 +1199,17 @@ - W: http://www.cse.unsw.edu.au/~neilb/patches/linux-devel/ - S: Maintained - -+KEXEC -+P: Eric Biederman -+P: Randy Dunlap -+M: ebiederm@xmission.com -+M: rddunlap@osdl.org -+W: http://www.xmission.com/~ebiederm/files/kexec/ -+W: http://developer.osdl.org/rddunlap/kexec/ -+L: linux-kernel@vger.kernel.org -+L: fastboot@osdl.org -+S: Maintained -+ - LANMEDIA WAN CARD DRIVER - P: Andrew Stanley-Jones - M: asj@lanmedia.com -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/Kconfig -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/Kconfig 2004-11-18 20:59:11.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/Kconfig 2004-11-18 23:25:15.000000000 -0500 -@@ -411,6 +411,23 @@ - depends on IA32_EMULATION - default y - -+config KEXEC -+ bool "kexec system call (EXPERIMENTAL)" -+ depends on EXPERIMENTAL -+ help -+ kexec is a system call that implements the ability to shutdown your -+ current kernel, and to start another kernel. It is like a reboot -+ but it is indepedent of the system firmware. And like a reboot -+ you can start any kernel with it, not just Linux. -+ -+ The name comes from the similiarity to the exec system call. -+ -+ It is an ongoing process to be certain the hardware in a machine -+ is properly shutdown, so do not be surprised if this code does not -+ initially work for you. It may help to enable device hotplugging -+ support. As of this writing the exact hardware interface is -+ strongly in flux, so no good recommendation can be made. -+ - endmenu - - source drivers/Kconfig -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/Makefile -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/Makefile 2004-11-11 10:28:46.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/Makefile 2004-11-18 23:26:29.000000000 -0500 -@@ -19,6 +19,7 @@ - obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o - obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o \ - genapic.o genapic_cluster.o genapic_flat.o -+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o - obj-$(CONFIG_PM) += suspend.o - obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o - obj-$(CONFIG_CPU_FREQ) += cpufreq/ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/apic.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/apic.c 2004-11-11 10:28:46.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/apic.c 2004-11-18 23:25:15.000000000 -0500 -@@ -143,6 +143,36 @@ - outb(0x70, 0x22); - outb(0x00, 0x23); - } -+ else { -+ /* Go back to Virtual Wire compatibility mode */ -+ unsigned long value; -+ -+ /* For the spurious interrupt use vector F, and enable it */ -+ value = apic_read(APIC_SPIV); -+ value &= ~APIC_VECTOR_MASK; -+ value |= APIC_SPIV_APIC_ENABLED; -+ value |= 0xf; -+ apic_write_around(APIC_SPIV, value); -+ -+ /* For LVT0 make it edge triggered, active high, external and enabled */ -+ value = apic_read(APIC_LVT0); -+ value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | -+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | -+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); -+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; -+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXINT); -+ apic_write_around(APIC_LVT0, value); -+ -+ /* For LVT1 make it edge triggered, active high, nmi and enabled */ -+ value = apic_read(APIC_LVT1); -+ value &= ~( -+ APIC_MODE_MASK | APIC_SEND_PENDING | -+ APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | -+ APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); -+ value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; -+ value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); -+ apic_write_around(APIC_LVT1, value); -+ } - } - - void disable_local_APIC(void) -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/e820.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/e820.c 2004-04-03 22:36:53.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/e820.c 2004-11-18 23:25:15.000000000 -0500 -@@ -185,8 +185,6 @@ - int i; - for (i = 0; i < e820.nr_map; i++) { - struct resource *res; -- if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) -- continue; - res = alloc_bootmem_low(sizeof(struct resource)); - switch (e820.map[i].type) { - case E820_RAM: res->name = "System RAM"; break; -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/i8259.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/i8259.c 2004-11-18 20:59:11.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/i8259.c 2004-11-18 23:25:15.000000000 -0500 -@@ -318,6 +318,44 @@ - } - } - -+static int i8259A_resume(struct sys_device *dev) -+{ -+ init_8259A(0); -+ return 0; -+} -+ -+static int i8259A_shutdown(struct sys_device *dev) -+{ -+ /* Put the i8259A into a quiescent state that -+ * the kernel initialization code can get it -+ * out of. -+ */ -+ outb(0xff, 0x21); /* mask all of 8259A-1 */ -+ outb(0xff, 0xA1); /* mask all of 8259A-1 */ -+ return 0; -+} -+ -+static struct sysdev_class i8259_sysdev_class = { -+ set_kset_name("i8259"), -+ .resume = i8259A_resume, -+ .shutdown = i8259A_shutdown, -+}; -+ -+static struct sys_device device_i8259A = { -+ .id = 0, -+ .cls = &i8259_sysdev_class, -+}; -+ -+static int __init i8259A_init_sysfs(void) -+{ -+ int error = sysdev_class_register(&i8259_sysdev_class); -+ if (!error) -+ error = sysdev_register(&device_i8259A); -+ return error; -+} -+ -+device_initcall(i8259A_init_sysfs); -+ - void __init init_8259A(int auto_eoi) - { - unsigned long flags; -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/io_apic.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/io_apic.c 2004-11-11 10:28:46.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/io_apic.c 2004-11-18 23:25:15.000000000 -0500 -@@ -328,7 +328,7 @@ - /* - * Find the pin to which IRQ[irq] (ISA) is connected - */ --static int __init find_isa_irq_pin(int irq, int type) -+static int find_isa_irq_pin(int irq, int type) - { - int i; - -@@ -1112,11 +1112,43 @@ - */ - void disable_IO_APIC(void) - { -+ int pin; - /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - -+ /* -+ * If the i82559 is routed through an IOAPIC -+ * Put that IOAPIC in virtual wire mode -+ * so legacy interrups can be delivered. -+ */ -+ pin = find_isa_irq_pin(0, mp_ExtINT); -+ if (pin != -1) { -+ struct IO_APIC_route_entry entry; -+ unsigned long flags; -+ -+ memset(&entry, 0, sizeof(entry)); -+ entry.mask = 0; /* Enabled */ -+ entry.trigger = 0; /* Edge */ -+ entry.irr = 0; -+ entry.polarity = 0; /* High */ -+ entry.delivery_status = 0; -+ entry.dest_mode = 0; /* Physical */ -+ entry.delivery_mode = 7; /* ExtInt */ -+ entry.vector = 0; -+ entry.dest.physical.physical_dest = 0; -+ -+ -+ /* -+ * Add it to the IO-APIC irq-routing table: -+ */ -+ spin_lock_irqsave(&ioapic_lock, flags); -+ io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); -+ io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); -+ spin_unlock_irqrestore(&ioapic_lock, flags); -+ } -+ - disconnect_bsp_APIC(); - } - -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/machine_kexec.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/machine_kexec.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/machine_kexec.c 2004-11-18 23:25:15.000000000 -0500 -@@ -0,0 +1,246 @@ -+/* -+ * machine_kexec.c - handle transition of Linux booting another kernel -+ * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> -+ * -+ * This source code is licensed under the GNU General Public License, -+ * Version 2. See the file COPYING for more details. -+ */ -+ -+#include <linux/mm.h> -+#include <linux/kexec.h> -+#include <linux/delay.h> -+#include <linux/string.h> -+#include <linux/reboot.h> -+#include <asm/pda.h> -+#include <asm/pgtable.h> -+#include <asm/pgalloc.h> -+#include <asm/tlbflush.h> -+#include <asm/mmu_context.h> -+#include <asm/io.h> -+#include <asm/apic.h> -+#include <asm/cpufeature.h> -+#include <asm/hw_irq.h> -+ -+#define LEVEL0_SIZE (1UL << 12UL) -+#define LEVEL1_SIZE (1UL << 21UL) -+#define LEVEL2_SIZE (1UL << 30UL) -+#define LEVEL3_SIZE (1UL << 39UL) -+#define LEVEL4_SIZE (1UL << 48UL) -+ -+#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -+#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE) -+#define L2_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -+#define L3_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -+ -+static void init_level2_page( -+ uint64_t *level2p, unsigned long addr) -+{ -+ unsigned long end_addr; -+ addr &= PAGE_MASK; -+ end_addr = addr + LEVEL2_SIZE; -+ while(addr < end_addr) { -+ *(level2p++) = addr | L1_ATTR; -+ addr += LEVEL1_SIZE; -+ } -+} -+ -+static int init_level3_page(struct kimage *image, -+ uint64_t *level3p, unsigned long addr, unsigned long last_addr) -+{ -+ unsigned long end_addr; -+ int result; -+ result = 0; -+ addr &= PAGE_MASK; -+ end_addr = addr + LEVEL3_SIZE; -+ while((addr < last_addr) && (addr < end_addr)) { -+ struct page *page; -+ uint64_t *level2p; -+ page = kimage_alloc_control_pages(image, 0); -+ if (!page) { -+ result = -ENOMEM; -+ goto out; -+ } -+ level2p = (uint64_t *)page_address(page); -+ init_level2_page(level2p, addr); -+ *(level3p++) = __pa(level2p) | L2_ATTR; -+ addr += LEVEL2_SIZE; -+ } -+ /* clear the unused entries */ -+ while(addr < end_addr) { -+ *(level3p++) = 0; -+ addr += LEVEL2_SIZE; -+ } -+out: -+ return result; -+} -+ -+ -+static int init_level4_page(struct kimage *image, -+ uint64_t *level4p, unsigned long addr, unsigned long last_addr) -+{ -+ unsigned long end_addr; -+ int result; -+ result = 0; -+ addr &= PAGE_MASK; -+ end_addr = addr + LEVEL4_SIZE; -+ while((addr < last_addr) && (addr < end_addr)) { -+ struct page *page; -+ uint64_t *level3p; -+ page = kimage_alloc_control_pages(image, 0); -+ if (!page) { -+ result = -ENOMEM; -+ goto out; -+ } -+ level3p = (uint64_t *)page_address(page); -+ result = init_level3_page(image, level3p, addr, last_addr); -+ if (result) { -+ goto out; -+ } -+ *(level4p++) = __pa(level3p) | L3_ATTR; -+ addr += LEVEL3_SIZE; -+ } -+ /* clear the unused entries */ -+ while(addr < end_addr) { -+ *(level4p++) = 0; -+ addr += LEVEL3_SIZE; -+ } -+ out: -+ return result; -+} -+ -+ -+static int init_pgtable(struct kimage *image, unsigned long start_pgtable) -+{ -+ uint64_t *level4p; -+ level4p = (uint64_t *)__va(start_pgtable); -+ return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); -+} -+ -+static void set_idt(void *newidt, __u16 limit) -+{ -+ unsigned char curidt[10]; -+ -+ /* x86-64 supports unaliged loads & stores */ -+ (*(__u16 *)(curidt)) = limit; -+ (*(__u64 *)(curidt +2)) = (unsigned long)(newidt); -+ -+ __asm__ __volatile__ ( -+ "lidt %0\n" -+ : "=m" (curidt) -+ ); -+}; -+ -+ -+static void set_gdt(void *newgdt, __u16 limit) -+{ -+ unsigned char curgdt[10]; -+ -+ /* x86-64 supports unaligned loads & stores */ -+ (*(__u16 *)(curgdt)) = limit; -+ (*(__u64 *)(curgdt +2)) = (unsigned long)(newgdt); -+ -+ __asm__ __volatile__ ( -+ "lgdt %0\n" -+ : "=m" (curgdt) -+ ); -+}; -+ -+static void load_segments(void) -+{ -+ __asm__ __volatile__ ( -+ "\tmovl $"STR(__KERNEL_DS)",%eax\n" -+ "\tmovl %eax,%ds\n" -+ "\tmovl %eax,%es\n" -+ "\tmovl %eax,%ss\n" -+ "\tmovl %eax,%fs\n" -+ "\tmovl %eax,%gs\n" -+ ); -+#undef STR -+#undef __STR -+} -+ -+typedef void (*relocate_new_kernel_t)( -+ unsigned long indirection_page, unsigned long control_code_buffer, -+ unsigned long start_address, unsigned long pgtable); -+ -+const extern unsigned char relocate_new_kernel[]; -+extern void relocate_new_kernel_end(void); -+const extern unsigned long relocate_new_kernel_size; -+ -+int machine_kexec_prepare(struct kimage *image) -+{ -+ unsigned long start_pgtable, control_code_buffer; -+ int result; -+ -+ /* Calculate the offsets */ -+ start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; -+ control_code_buffer = start_pgtable + 4096UL; -+ -+ /* Setup the identity mapped 64bit page table */ -+ result = init_pgtable(image, start_pgtable); -+ if (result) { -+ return result; -+ } -+ -+ /* Place the code in the reboot code buffer */ -+ memcpy(__va(control_code_buffer), relocate_new_kernel, relocate_new_kernel_size); -+ -+ return 0; -+} -+ -+void machine_kexec_cleanup(struct kimage *image) -+{ -+ return; -+} -+ -+/* -+ * Do not allocate memory (or fail in any way) in machine_kexec(). -+ * We are past the point of no return, committed to rebooting now. -+ */ -+void machine_kexec(struct kimage *image) -+{ -+ unsigned long indirection_page; -+ unsigned long control_code_buffer; -+ unsigned long start_pgtable; -+ relocate_new_kernel_t rnk; -+ -+ /* Interrupts aren't acceptable while we reboot */ -+ local_irq_disable(); -+ -+ /* Calculate the offsets */ -+ indirection_page = image->head & PAGE_MASK; -+ start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; -+ control_code_buffer = start_pgtable + 4096UL; -+ -+ /* Set the low half of the page table to my identity mapped -+ * page table for kexec. Leave the high half pointing at the -+ * kernel pages. Don't bother to flush the global pages -+ * as that will happen when I fully switch to my identity mapped -+ * page table anyway. -+ */ -+ memcpy((void *)read_pda(level4_pgt), __va(start_pgtable), PAGE_SIZE/2); -+ __flush_tlb(); -+ -+ -+ /* The segment registers are funny things, they are -+ * automatically loaded from a table, in memory wherever you -+ * set them to a specific selector, but this table is never -+ * accessed again unless you set the segment to a different selector. -+ * -+ * The more common model are caches where the behide -+ * the scenes work is done, but is also dropped at arbitrary -+ * times. -+ * -+ * I take advantage of this here by force loading the -+ * segments, before I zap the gdt with an invalid value. -+ */ -+ load_segments(); -+ /* The gdt & idt are now invalid. -+ * If you want to load them you must set up your own idt & gdt. -+ */ -+ set_gdt(phys_to_virt(0),0); -+ set_idt(phys_to_virt(0),0); -+ /* now call it */ -+ rnk = (relocate_new_kernel_t) control_code_buffer; -+ (*rnk)(indirection_page, control_code_buffer, image->start, start_pgtable); -+} -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/reboot.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/reboot.c 2004-04-03 22:37:59.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/reboot.c 2004-11-18 23:25:15.000000000 -0500 -@@ -91,31 +91,6 @@ - [target] "b" (WARMBOOT_TRAMP)); - } - --#ifdef CONFIG_SMP --static void smp_halt(void) --{ -- int cpuid = safe_smp_processor_id(); -- static int first_entry = 1; -- -- if (first_entry) { -- first_entry = 0; -- smp_call_function((void *)machine_restart, NULL, 1, 0); -- } -- -- smp_stop_cpu(); -- -- /* AP calling this. Just halt */ -- if (cpuid != boot_cpu_id) { -- for (;;) -- asm("hlt"); -- } -- -- /* Wait for all other CPUs to have run smp_stop_cpu */ -- while (!cpus_empty(cpu_online_map)) -- rep_nop(); --} --#endif -- - static inline void kb_wait(void) - { - int i; -@@ -125,23 +100,45 @@ - break; - } - --void machine_restart(char * __unused) -+void machine_shutdown(void) - { -- int i; -- -+ /* Stop the cpus and apics */ - #ifdef CONFIG_SMP -- smp_halt(); --#endif -+ int reboot_cpu_id; -+ -+ /* The boot cpu is always logical cpu 0 */ -+ reboot_cpu_id = 0; -+ -+ /* Make certain the cpu I'm about to reboot on is online */ -+ if (!cpu_isset(reboot_cpu_id, cpu_online_map)) { -+ reboot_cpu_id = smp_processor_id(); -+ } -+ -+ /* Make certain I only run on the appropriate processor */ -+ set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id)); - -+ /* O.K Now that I'm on the appropriate processor, -+ * stop all of the others. -+ */ -+ smp_send_stop(); -+#endif -+ - local_irq_disable(); -- -+ - #ifndef CONFIG_SMP - disable_local_APIC(); - #endif - - disable_IO_APIC(); -- -+ - local_irq_enable(); -+} -+ -+void machine_restart(char * __unused) -+{ -+ int i; -+ -+ machine_shutdown(); - - /* Tell the BIOS if we want cold or warm reboot */ - *((unsigned short *)__va(0x472)) = reboot_mode; -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/relocate_kernel.S -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/relocate_kernel.S 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/relocate_kernel.S 2004-11-18 23:25:15.000000000 -0500 -@@ -0,0 +1,141 @@ -+/* -+ * relocate_kernel.S - put the kernel image in place to boot -+ * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> -+ * -+ * This source code is licensed under the GNU General Public License, -+ * Version 2. See the file COPYING for more details. -+ */ -+ -+#include <linux/linkage.h> -+ -+ /* -+ * Must be relocatable PIC code callable as a C function, that once -+ * it starts can not use the previous processes stack. -+ */ -+ .globl relocate_new_kernel -+ .code64 -+relocate_new_kernel: -+ /* %rdi indirection_page -+ * %rsi reboot_code_buffer -+ * %rdx start address -+ * %rcx page_table -+ * %r8 arg5 -+ * %r9 arg6 -+ */ -+ -+ /* zero out flags, and disable interrupts */ -+ pushq $0 -+ popfq -+ -+ /* set a new stack at the bottom of our page... */ -+ lea 4096(%rsi), %rsp -+ -+ /* store the parameters back on the stack */ -+ pushq %rdx /* store the start address */ -+ -+ /* Set cr0 to a known state: -+ * 31 1 == Paging enabled -+ * 18 0 == Alignment check disabled -+ * 16 0 == Write protect disabled -+ * 3 0 == No task switch -+ * 2 0 == Don't do FP software emulation. -+ * 0 1 == Proctected mode enabled -+ */ -+ movq %cr0, %rax -+ andq $~((1<<18)|(1<<16)|(1<<3)|(1<<2)), %rax -+ orl $((1<<31)|(1<<0)), %eax -+ movq %rax, %cr0 -+ -+ /* Set cr4 to a known state: -+ * 10 0 == xmm exceptions disabled -+ * 9 0 == xmm registers instructions disabled -+ * 8 0 == performance monitoring counter disabled -+ * 7 0 == page global disabled -+ * 6 0 == machine check exceptions disabled -+ * 5 1 == physical address extension enabled -+ * 4 0 == page size extensions disabled -+ * 3 0 == Debug extensions disabled -+ * 2 0 == Time stamp disable (disabled) -+ * 1 0 == Protected mode virtual interrupts disabled -+ * 0 0 == VME disabled -+ */ -+ -+ movq $((1<<5)), %rax -+ movq %rax, %cr4 -+ -+ jmp 1f -+1: -+ -+ /* Switch to the identity mapped page tables, -+ * and flush the TLB. -+ */ -+ movq %rcx, %cr3 -+ -+ /* Do the copies */ -+ movq %rdi, %rbx /* Put the indirection page in %rbx */ -+ xorq %rdi, %rdi -+ xorq %rsi, %rsi -+ -+0: /* top, read another word for the indirection page */ -+ -+ movq (%rbx), %rcx -+ addq $8, %rbx -+ testq $0x1, %rcx /* is it a destination page? */ -+ jz 1f -+ movq %rcx, %rdi -+ andq $0xfffffffffffff000, %rdi -+ jmp 0b -+1: -+ testq $0x2, %rcx /* is it an indirection page? */ -+ jz 1f -+ movq %rcx, %rbx -+ andq $0xfffffffffffff000, %rbx -+ jmp 0b -+1: -+ testq $0x4, %rcx /* is it the done indicator? */ -+ jz 1f -+ jmp 2f -+1: -+ testq $0x8, %rcx /* is it the source indicator? */ -+ jz 0b /* Ignore it otherwise */ -+ movq %rcx, %rsi /* For ever source page do a copy */ -+ andq $0xfffffffffffff000, %rsi -+ -+ movq $512, %rcx -+ rep ; movsq -+ jmp 0b -+2: -+ -+ /* To be certain of avoiding problems with self-modifying code -+ * I need to execute a serializing instruction here. -+ * So I flush the TLB by reloading %cr3 here, it's handy, -+ * and not processor dependent. -+ */ -+ movq %cr3, %rax -+ movq %rax, %cr3 -+ -+ /* set all of the registers to known values */ -+ /* leave %rsp alone */ -+ -+ xorq %rax, %rax -+ xorq %rbx, %rbx -+ xorq %rcx, %rcx -+ xorq %rdx, %rdx -+ xorq %rsi, %rsi -+ xorq %rdi, %rdi -+ xorq %rbp, %rbp -+ xorq %r8, %r8 -+ xorq %r9, %r9 -+ xorq %r10, %r9 -+ xorq %r11, %r11 -+ xorq %r12, %r12 -+ xorq %r13, %r13 -+ xorq %r14, %r14 -+ xorq %r15, %r15 -+ -+ ret -+relocate_new_kernel_end: -+ -+ .globl relocate_new_kernel_size -+relocate_new_kernel_size: -+ .quad relocate_new_kernel_end - relocate_new_kernel -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/apicdef.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/apicdef.h 2004-11-11 10:28:46.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/apicdef.h 2004-11-18 23:26:05.000000000 -0500 -@@ -32,8 +32,8 @@ - #define SET_APIC_LOGICAL_ID(x) (((x)<<24)) - #define APIC_ALL_CPUS 0xFFu - #define APIC_DFR 0xE0 --#define APIC_DFR_CLUSTER 0x0FFFFFFFu --#define APIC_DFR_FLAT 0xFFFFFFFFu -+#define APIC_DFR_CLUSTER 0x0FFFFFFFul -+#define APIC_DFR_FLAT 0xFFFFFFFFul - #define APIC_SPIV 0xF0 - #define APIC_SPIV_FOCUS_DISABLED (1<<9) - #define APIC_SPIV_APIC_ENABLED (1<<8) -@@ -89,6 +89,7 @@ - #define APIC_LVT_REMOTE_IRR (1<<14) - #define APIC_INPUT_POLARITY (1<<13) - #define APIC_SEND_PENDING (1<<12) -+#define APIC_MODE_MASK 0x700 - #define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7) - #define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8)) - #define APIC_MODE_FIXED 0x0 -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/kexec.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/kexec.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/kexec.h 2004-11-18 23:25:15.000000000 -0500 -@@ -0,0 +1,25 @@ -+#ifndef _X86_64_KEXEC_H -+#define _X86_64_KEXEC_H -+ -+#include <asm/page.h> -+#include <asm/proto.h> -+ -+/* -+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. -+ * I.e. Maximum page that is mapped directly into kernel memory, -+ * and kmap is not required. -+ * -+ * So far x86_64 is limited to 40 physical address bits. -+ */ -+ -+/* Maximum physical address we can use pages from */ -+#define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL) -+/* Maximum address we can reach in physical address mode */ -+#define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL) -+/* Maximum address we can use for the control pages */ -+#define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL) -+ -+/* Allocate one page for the pdp and the second for the code */ -+#define KEXEC_CONTROL_CODE_SIZE (4096UL + 4096UL) -+ -+#endif /* _X86_64_KEXEC_H */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/unistd.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/unistd.h 2004-11-11 10:28:49.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/unistd.h 2004-11-18 23:27:18.000000000 -0500 -@@ -551,7 +551,22 @@ - #define __NR_mq_getsetattr 245 - __SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr) - --#define __NR_syscall_max __NR_mq_getsetattr -+#define __NR_mq_open 240 -+__SYSCALL(__NR_mq_open, sys_ni_syscall) -+#define __NR_mq_unlink 241 -+__SYSCALL(__NR_mq_unlink, sys_ni_syscall) -+#define __NR_mq_timedsend 242 -+__SYSCALL(__NR_mq_timedsend, sys_ni_syscall) -+#define __NR_mq_timedreceive 243 -+__SYSCALL(__NR_mq_timedreceive, sys_ni_syscall) -+#define __NR_mq_notify 244 -+__SYSCALL(__NR_mq_notify, sys_ni_syscall) -+#define __NR_mq_getsetattr 245 -+__SYSCALL(__NR_mq_getsetattr, sys_ni_syscall) -+#define __NR_kexec_load 246 -+__SYSCALL(__NR_kexec_load, sys_kexec_load) -+ -+#define __NR_syscall_max __NR_kexec_load - #ifndef __NO_STUBS - - /* user-visible error numbers are in the range -1 - -4095 */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/kexec.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/linux/kexec.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/kexec.h 2004-11-18 23:25:15.000000000 -0500 -@@ -0,0 +1,56 @@ -+#ifndef LINUX_KEXEC_H -+#define LINUX_KEXEC_H -+ -+#if CONFIG_KEXEC -+#include <linux/types.h> -+#include <linux/list.h> -+#include <asm/kexec.h> -+ -+/* -+ * This structure is used to hold the arguments that are used when loading -+ * kernel binaries. -+ */ -+ -+typedef unsigned long kimage_entry_t; -+#define IND_DESTINATION 0x1 -+#define IND_INDIRECTION 0x2 -+#define IND_DONE 0x4 -+#define IND_SOURCE 0x8 -+ -+#define KEXEC_SEGMENT_MAX 8 -+struct kexec_segment { -+ void *buf; -+ size_t bufsz; -+ void *mem; -+ size_t memsz; -+}; -+ -+struct kimage { -+ kimage_entry_t head; -+ kimage_entry_t *entry; -+ kimage_entry_t *last_entry; -+ -+ unsigned long destination; -+ -+ unsigned long start; -+ struct page *control_code_page; -+ -+ unsigned long nr_segments; -+ struct kexec_segment segment[KEXEC_SEGMENT_MAX]; -+ -+ struct list_head control_pages; -+ struct list_head dest_pages; -+ struct list_head unuseable_pages; -+}; -+ -+ -+/* kexec interface functions */ -+extern void machine_kexec(struct kimage *image); -+extern int machine_kexec_prepare(struct kimage *image); -+extern void machine_kexec_cleanup(struct kimage *image); -+extern asmlinkage long sys_kexec(unsigned long entry, long nr_segments, -+ struct kexec_segment *segments); -+extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); -+extern struct kimage *kexec_image; -+#endif -+#endif /* LINUX_KEXEC_H */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/reboot.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/linux/reboot.h 2004-04-03 22:38:27.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/reboot.h 2004-11-18 23:25:15.000000000 -0500 -@@ -22,6 +22,7 @@ - * POWER_OFF Stop OS and remove all power from system, if possible. - * RESTART2 Restart system using given command string. - * SW_SUSPEND Suspend system using software suspend if compiled in. -+ * KEXEC Restart system using a previously loaded Linux kernel - */ - - #define LINUX_REBOOT_CMD_RESTART 0x01234567 -@@ -31,6 +32,7 @@ - #define LINUX_REBOOT_CMD_POWER_OFF 0x4321FEDC - #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4 - #define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2 -+#define LINUX_REBOOT_CMD_KEXEC 0x45584543 - - - #ifdef __KERNEL__ -@@ -49,6 +51,8 @@ - extern void machine_halt(void); - extern void machine_power_off(void); - -+extern void machine_shutdown(void); -+ - #endif - - #endif /* _LINUX_REBOOT_H */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/syscalls.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/linux/syscalls.h 2004-11-11 10:28:49.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/syscalls.h 2004-11-18 23:25:15.000000000 -0500 -@@ -19,6 +19,7 @@ - struct iovec; - struct itimerspec; - struct itimerval; -+struct kexec_segment; - struct linux_dirent; - struct linux_dirent64; - struct list_head; -@@ -154,6 +155,8 @@ - asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, - void __user *arg); - asmlinkage long sys_restart_syscall(void); -+asmlinkage long sys_kexec_load(void *entry, unsigned long nr_segments, -+ struct kexec_segment *segments, unsigned long flags); - - asmlinkage long sys_exit(int error_code); - asmlinkage void sys_exit_group(int error_code); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/Makefile -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/kernel/Makefile 2004-11-11 10:28:43.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/Makefile 2004-11-18 23:25:15.000000000 -0500 -@@ -17,6 +17,7 @@ - obj-$(CONFIG_KALLSYMS) += kallsyms.o - obj-$(CONFIG_PM) += power/ - obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o -+obj-$(CONFIG_KEXEC) += kexec.o - obj-$(CONFIG_COMPAT) += compat.o - obj-$(CONFIG_PAGG) += pagg.o - obj-$(CONFIG_IKCONFIG) += configs.o -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/kexec.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/kernel/kexec.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/kexec.c 2004-11-18 23:25:15.000000000 -0500 -@@ -0,0 +1,640 @@ -+/* -+ * kexec.c - kexec system call -+ * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> -+ * -+ * This source code is licensed under the GNU General Public License, -+ * Version 2. See the file COPYING for more details. -+ */ -+ -+#include <linux/mm.h> -+#include <linux/file.h> -+#include <linux/slab.h> -+#include <linux/fs.h> -+#include <linux/kexec.h> -+#include <linux/spinlock.h> -+#include <linux/list.h> -+#include <linux/highmem.h> -+#include <net/checksum.h> -+#include <asm/page.h> -+#include <asm/uaccess.h> -+#include <asm/io.h> -+#include <asm/system.h> -+ -+/* -+ * When kexec transitions to the new kernel there is a one-to-one -+ * mapping between physical and virtual addresses. On processors -+ * where you can disable the MMU this is trivial, and easy. For -+ * others it is still a simple predictable page table to setup. -+ * -+ * In that environment kexec copies the new kernel to its final -+ * resting place. This means I can only support memory whose -+ * physical address can fit in an unsigned long. In particular -+ * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. -+ * If the assembly stub has more restrictive requirements -+ * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be -+ * defined more restrictively in <asm/kexec.h>. -+ * -+ * The code for the transition from the current kernel to the -+ * the new kernel is placed in the control_code_buffer, whose size -+ * is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single -+ * page of memory is necessary, but some architectures require more. -+ * Because this memory must be identity mapped in the transition from -+ * virtual to physical addresses it must live in the range -+ * 0 - TASK_SIZE, as only the user space mappings are arbitrarily -+ * modifiable. -+ * -+ * The assembly stub in the control code buffer is passed a linked list -+ * of descriptor pages detailing the source pages of the new kernel, -+ * and the destination addresses of those source pages. As this data -+ * structure is not used in the context of the current OS, it must -+ * be self-contained. -+ * -+ * The code has been made to work with highmem pages and will use a -+ * destination page in its final resting place (if it happens -+ * to allocate it). The end product of this is that most of the -+ * physical address space, and most of RAM can be used. -+ * -+ * Future directions include: -+ * - allocating a page table with the control code buffer identity -+ * mapped, to simplify machine_kexec and make kexec_on_panic more -+ * reliable. -+ */ -+ -+/* -+ * KIMAGE_NO_DEST is an impossible destination address..., for -+ * allocating pages whose destination address we do not care about. -+ */ -+#define KIMAGE_NO_DEST (-1UL) -+ -+static int kimage_is_destination_range( -+ struct kimage *image, unsigned long start, unsigned long end); -+static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long dest); -+ -+ -+static int kimage_alloc(struct kimage **rimage, -+ unsigned long nr_segments, struct kexec_segment *segments) -+{ -+ int result; -+ struct kimage *image; -+ size_t segment_bytes; -+ unsigned long i; -+ -+ /* Allocate a controlling structure */ -+ result = -ENOMEM; -+ image = kmalloc(sizeof(*image), GFP_KERNEL); -+ if (!image) { -+ goto out; -+ } -+ memset(image, 0, sizeof(*image)); -+ image->head = 0; -+ image->entry = &image->head; -+ image->last_entry = &image->head; -+ -+ /* Initialize the list of control pages */ -+ INIT_LIST_HEAD(&image->control_pages); -+ -+ /* Initialize the list of destination pages */ -+ INIT_LIST_HEAD(&image->dest_pages); -+ -+ /* Initialize the list of unuseable pages */ -+ INIT_LIST_HEAD(&image->unuseable_pages); -+ -+ /* Read in the segments */ -+ image->nr_segments = nr_segments; -+ segment_bytes = nr_segments * sizeof*segments; -+ result = copy_from_user(image->segment, segments, segment_bytes); -+ if (result) -+ goto out; -+ -+ /* -+ * Verify we have good destination addresses. The caller is -+ * responsible for making certain we don't attempt to load -+ * the new image into invalid or reserved areas of RAM. This -+ * just verifies it is an address we can use. -+ */ -+ result = -EADDRNOTAVAIL; -+ for (i = 0; i < nr_segments; i++) { -+ unsigned long mend; -+ mend = ((unsigned long)(image->segment[i].mem)) + -+ image->segment[i].memsz; -+ if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) -+ goto out; -+ } -+ -+ /* -+ * Find a location for the control code buffer, and add it -+ * the vector of segments so that it's pages will also be -+ * counted as destination pages. -+ */ -+ result = -ENOMEM; -+ image->control_code_page = kimage_alloc_control_pages(image, -+ get_order(KEXEC_CONTROL_CODE_SIZE)); -+ if (!image->control_code_page) { -+ printk(KERN_ERR "Could not allocate control_code_buffer\n"); -+ goto out; -+ } -+ -+ result = 0; -+ out: -+ if (result == 0) { -+ *rimage = image; -+ } else { -+ kfree(image); -+ } -+ return result; -+} -+ -+static int kimage_is_destination_range( -+ struct kimage *image, unsigned long start, unsigned long end) -+{ -+ unsigned long i; -+ -+ for (i = 0; i < image->nr_segments; i++) { -+ unsigned long mstart, mend; -+ mstart = (unsigned long)image->segment[i].mem; -+ mend = mstart + image->segment[i].memsz; -+ if ((end > mstart) && (start < mend)) { -+ return 1; -+ } -+ } -+ return 0; -+} -+ -+static struct page *kimage_alloc_pages(unsigned int gfp_mask, unsigned int order) -+{ -+ struct page *pages; -+ pages = alloc_pages(gfp_mask, order); -+ if (pages) { -+ unsigned int count, i; -+ pages->mapping = NULL; -+ pages->private = order; -+ count = 1 << order; -+ for(i = 0; i < count; i++) { -+ SetPageReserved(pages + i); -+ } -+ } -+ return pages; -+} -+ -+static void kimage_free_pages(struct page *page) -+{ -+ unsigned int order, count, i; -+ order = page->private; -+ count = 1 << order; -+ for(i = 0; i < count; i++) { -+ ClearPageReserved(page + i); -+ } -+ __free_pages(page, order); -+} -+ -+static void kimage_free_page_list(struct list_head *list) -+{ -+ struct list_head *pos, *next; -+ list_for_each_safe(pos, next, list) { -+ struct page *page; -+ -+ page = list_entry(pos, struct page, lru); -+ list_del(&page->lru); -+ -+ kimage_free_pages(page); -+ } -+} -+ -+struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order) -+{ -+ /* Control pages are special, they are the intermediaries -+ * that are needed while we copy the rest of the pages -+ * to their final resting place. As such they must -+ * not conflict with either the destination addresses -+ * or memory the kernel is already using. -+ * -+ * The only case where we really need more than one of -+ * these are for architectures where we cannot disable -+ * the MMU and must instead generate an identity mapped -+ * page table for all of the memory. -+ * -+ * At worst this runs in O(N) of the image size. -+ */ -+ struct list_head extra_pages; -+ struct page *pages; -+ unsigned int count; -+ -+ count = 1 << order; -+ INIT_LIST_HEAD(&extra_pages); -+ -+ /* Loop while I can allocate a page and the page allocated -+ * is a destination page. -+ */ -+ do { -+ unsigned long pfn, epfn, addr, eaddr; -+ pages = kimage_alloc_pages(GFP_KERNEL, order); -+ if (!pages) -+ break; -+ pfn = page_to_pfn(pages); -+ epfn = pfn + count; -+ addr = pfn << PAGE_SHIFT; -+ eaddr = epfn << PAGE_SHIFT; -+ if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || -+ kimage_is_destination_range(image, addr, eaddr)) -+ { -+ list_add(&pages->lru, &extra_pages); -+ pages = NULL; -+ } -+ } while(!pages); -+ if (pages) { -+ /* Remember the allocated page... */ -+ list_add(&pages->lru, &image->control_pages); -+ -+ /* Because the page is already in it's destination -+ * location we will never allocate another page at -+ * that address. Therefore kimage_alloc_pages -+ * will not return it (again) and we don't need -+ * to give it an entry in image->segment[]. -+ */ -+ } -+ /* Deal with the destination pages I have inadvertently allocated. -+ * -+ * Ideally I would convert multi-page allocations into single -+ * page allocations, and add everyting to image->dest_pages. -+ * -+ * For now it is simpler to just free the pages. -+ */ -+ kimage_free_page_list(&extra_pages); -+ return pages; -+ -+} -+ -+static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) -+{ -+ if (*image->entry != 0) { -+ image->entry++; -+ } -+ if (image->entry == image->last_entry) { -+ kimage_entry_t *ind_page; -+ struct page *page; -+ page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); -+ if (!page) { -+ return -ENOMEM; -+ } -+ ind_page = page_address(page); -+ *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; -+ image->entry = ind_page; -+ image->last_entry = -+ ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); -+ } -+ *image->entry = entry; -+ image->entry++; -+ *image->entry = 0; -+ return 0; -+} -+ -+static int kimage_set_destination( -+ struct kimage *image, unsigned long destination) -+{ -+ int result; -+ -+ destination &= PAGE_MASK; -+ result = kimage_add_entry(image, destination | IND_DESTINATION); -+ if (result == 0) { -+ image->destination = destination; -+ } -+ return result; -+} -+ -+ -+static int kimage_add_page(struct kimage *image, unsigned long page) -+{ -+ int result; -+ -+ page &= PAGE_MASK; -+ result = kimage_add_entry(image, page | IND_SOURCE); -+ if (result == 0) { -+ image->destination += PAGE_SIZE; -+ } -+ return result; -+} -+ -+ -+static void kimage_free_extra_pages(struct kimage *image) -+{ -+ /* Walk through and free any extra destination pages I may have */ -+ kimage_free_page_list(&image->dest_pages); -+ -+ /* Walk through and free any unuseable pages I have cached */ -+ kimage_free_page_list(&image->unuseable_pages); -+ -+} -+static int kimage_terminate(struct kimage *image) -+{ -+ int result; -+ -+ result = kimage_add_entry(image, IND_DONE); -+ if (result == 0) { -+ /* Point at the terminating element */ -+ image->entry--; -+ kimage_free_extra_pages(image); -+ } -+ return result; -+} -+ -+#define for_each_kimage_entry(image, ptr, entry) \ -+ for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ -+ ptr = (entry & IND_INDIRECTION)? \ -+ phys_to_virt((entry & PAGE_MASK)): ptr +1) -+ -+static void kimage_free_entry(kimage_entry_t entry) -+{ -+ struct page *page; -+ -+ page = pfn_to_page(entry >> PAGE_SHIFT); -+ kimage_free_pages(page); -+} -+ -+static void kimage_free(struct kimage *image) -+{ -+ kimage_entry_t *ptr, entry; -+ kimage_entry_t ind = 0; -+ -+ if (!image) -+ return; -+ kimage_free_extra_pages(image); -+ for_each_kimage_entry(image, ptr, entry) { -+ if (entry & IND_INDIRECTION) { -+ /* Free the previous indirection page */ -+ if (ind & IND_INDIRECTION) { -+ kimage_free_entry(ind); -+ } -+ /* Save this indirection page until we are -+ * done with it. -+ */ -+ ind = entry; -+ } -+ else if (entry & IND_SOURCE) { -+ kimage_free_entry(entry); -+ } -+ } -+ /* Free the final indirection page */ -+ if (ind & IND_INDIRECTION) { -+ kimage_free_entry(ind); -+ } -+ -+ /* Handle any machine specific cleanup */ -+ machine_kexec_cleanup(image); -+ -+ /* Free the kexec control pages... */ -+ kimage_free_page_list(&image->control_pages); -+ kfree(image); -+} -+ -+static kimage_entry_t *kimage_dst_used(struct kimage *image, unsigned long page) -+{ -+ kimage_entry_t *ptr, entry; -+ unsigned long destination = 0; -+ -+ for_each_kimage_entry(image, ptr, entry) { -+ if (entry & IND_DESTINATION) { -+ destination = entry & PAGE_MASK; -+ } -+ else if (entry & IND_SOURCE) { -+ if (page == destination) { -+ return ptr; -+ } -+ destination += PAGE_SIZE; -+ } -+ } -+ return 0; -+} -+ -+static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long destination) -+{ -+ /* -+ * Here we implement safeguards to ensure that a source page -+ * is not copied to its destination page before the data on -+ * the destination page is no longer useful. -+ * -+ * To do this we maintain the invariant that a source page is -+ * either its own destination page, or it is not a -+ * destination page at all. -+ * -+ * That is slightly stronger than required, but the proof -+ * that no problems will not occur is trivial, and the -+ * implementation is simply to verify. -+ * -+ * When allocating all pages normally this algorithm will run -+ * in O(N) time, but in the worst case it will run in O(N^2) -+ * time. If the runtime is a problem the data structures can -+ * be fixed. -+ */ -+ struct page *page; -+ unsigned long addr; -+ -+ /* -+ * Walk through the list of destination pages, and see if I -+ * have a match. -+ */ -+ list_for_each_entry(page, &image->dest_pages, lru) { -+ addr = page_to_pfn(page) << PAGE_SHIFT; -+ if (addr == destination) { -+ list_del(&page->lru); -+ return page; -+ } -+ } -+ page = NULL; -+ while (1) { -+ kimage_entry_t *old; -+ -+ /* Allocate a page, if we run out of memory give up */ -+ page = kimage_alloc_pages(gfp_mask, 0); -+ if (!page) { -+ return 0; -+ } -+ /* If the page cannot be used file it away */ -+ if (page_to_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { -+ list_add(&page->lru, &image->unuseable_pages); -+ continue; -+ } -+ addr = page_to_pfn(page) << PAGE_SHIFT; -+ -+ /* If it is the destination page we want use it */ -+ if (addr == destination) -+ break; -+ -+ /* If the page is not a destination page use it */ -+ if (!kimage_is_destination_range(image, addr, addr + PAGE_SIZE)) -+ break; -+ -+ /* -+ * I know that the page is someones destination page. -+ * See if there is already a source page for this -+ * destination page. And if so swap the source pages. -+ */ -+ old = kimage_dst_used(image, addr); -+ if (old) { -+ /* If so move it */ -+ unsigned long old_addr; -+ struct page *old_page; -+ -+ old_addr = *old & PAGE_MASK; -+ old_page = pfn_to_page(old_addr >> PAGE_SHIFT); -+ copy_highpage(page, old_page); -+ *old = addr | (*old & ~PAGE_MASK); -+ -+ /* The old page I have found cannot be a -+ * destination page, so return it. -+ */ -+ addr = old_addr; -+ page = old_page; -+ break; -+ } -+ else { -+ /* Place the page on the destination list I -+ * will use it later. -+ */ -+ list_add(&page->lru, &image->dest_pages); -+ } -+ } -+ return page; -+} -+ -+static int kimage_load_segment(struct kimage *image, -+ struct kexec_segment *segment) -+{ -+ unsigned long mstart; -+ int result; -+ unsigned long offset; -+ unsigned long offset_end; -+ unsigned char *buf; -+ -+ result = 0; -+ buf = segment->buf; -+ mstart = (unsigned long)segment->mem; -+ -+ offset_end = segment->memsz; -+ -+ result = kimage_set_destination(image, mstart); -+ if (result < 0) { -+ goto out; -+ } -+ for (offset = 0; offset < segment->memsz; offset += PAGE_SIZE) { -+ struct page *page; -+ char *ptr; -+ size_t size, leader; -+ page = kimage_alloc_page(image, GFP_HIGHUSER, mstart + offset); -+ if (page == 0) { -+ result = -ENOMEM; -+ goto out; -+ } -+ result = kimage_add_page(image, page_to_pfn(page) << PAGE_SHIFT); -+ if (result < 0) { -+ goto out; -+ } -+ ptr = kmap(page); -+ if (segment->bufsz < offset) { -+ /* We are past the end zero the whole page */ -+ memset(ptr, 0, PAGE_SIZE); -+ kunmap(page); -+ continue; -+ } -+ size = PAGE_SIZE; -+ leader = 0; -+ if ((offset == 0)) { -+ leader = mstart & ~PAGE_MASK; -+ } -+ if (leader) { -+ /* We are on the first page zero the unused portion */ -+ memset(ptr, 0, leader); -+ size -= leader; -+ ptr += leader; -+ } -+ if (size > (segment->bufsz - offset)) { -+ size = segment->bufsz - offset; -+ } -+ if (size < (PAGE_SIZE - leader)) { -+ /* zero the trailing part of the page */ -+ memset(ptr + size, 0, (PAGE_SIZE - leader) - size); -+ } -+ result = copy_from_user(ptr, buf + offset, size); -+ kunmap(page); -+ if (result) { -+ result = (result < 0) ? result : -EIO; -+ goto out; -+ } -+ } -+ out: -+ return result; -+} -+ -+/* -+ * Exec Kernel system call: for obvious reasons only root may call it. -+ * -+ * This call breaks up into three pieces. -+ * - A generic part which loads the new kernel from the current -+ * address space, and very carefully places the data in the -+ * allocated pages. -+ * -+ * - A generic part that interacts with the kernel and tells all of -+ * the devices to shut down. Preventing on-going dmas, and placing -+ * the devices in a consistent state so a later kernel can -+ * reinitialize them. -+ * -+ * - A machine specific part that includes the syscall number -+ * and the copies the image to it's final destination. And -+ * jumps into the image at entry. -+ * -+ * kexec does not sync, or unmount filesystems so if you need -+ * that to happen you need to do that yourself. -+ */ -+struct kimage *kexec_image = NULL; -+ -+asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, -+ struct kexec_segment *segments, unsigned long flags) -+{ -+ struct kimage *image; -+ int result; -+ -+ /* We only trust the superuser with rebooting the system. */ -+ if (!capable(CAP_SYS_BOOT)) -+ return -EPERM; -+ -+ /* -+ * In case we need just a little bit of special behavior for -+ * reboot on panic. -+ */ -+ if (flags != 0) -+ return -EINVAL; -+ -+ if (nr_segments > KEXEC_SEGMENT_MAX) -+ return -EINVAL; -+ -+ image = NULL; -+ result = 0; -+ -+ if (nr_segments > 0) { -+ unsigned long i; -+ result = kimage_alloc(&image, nr_segments, segments); -+ if (result) { -+ goto out; -+ } -+ result = machine_kexec_prepare(image); -+ if (result) { -+ goto out; -+ } -+ image->start = entry; -+ for (i = 0; i < nr_segments; i++) { -+ result = kimage_load_segment(image, &image->segment[i]); -+ if (result) { -+ goto out; -+ } -+ } -+ result = kimage_terminate(image); -+ if (result) { -+ goto out; -+ } -+ } -+ -+ image = xchg(&kexec_image, image); -+ -+ out: -+ kimage_free(image); -+ return result; -+} -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/sys.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/kernel/sys.c 2004-11-11 10:28:49.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/sys.c 2004-11-18 23:25:15.000000000 -0500 -@@ -17,6 +17,8 @@ - #include <linux/init.h> - #include <linux/highuid.h> - #include <linux/fs.h> -+#include <linux/kernel.h> -+#include <linux/kexec.h> - #include <linux/workqueue.h> - #include <linux/device.h> - #include <linux/times.h> -@@ -226,6 +228,7 @@ - cond_syscall(sys_lookup_dcookie) - cond_syscall(sys_swapon) - cond_syscall(sys_swapoff) -+cond_syscall(sys_kexec_load) - cond_syscall(sys_init_module) - cond_syscall(sys_delete_module) - cond_syscall(sys_socketpair) -@@ -505,6 +508,24 @@ - machine_restart(buffer); - break; - -+#ifdef CONFIG_KEXEC -+ case LINUX_REBOOT_CMD_KEXEC: -+ { -+ struct kimage *image; -+ image = xchg(&kexec_image, 0); -+ if (!image) { -+ unlock_kernel(); -+ return -EINVAL; -+ } -+ notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); -+ system_state = SYSTEM_BOOTING; -+ device_shutdown(); -+ printk(KERN_EMERG "Starting new kernel\n"); -+ machine_shutdown(); -+ machine_kexec(image); -+ break; -+ } -+#endif - #ifdef CONFIG_SOFTWARE_SUSPEND - case LINUX_REBOOT_CMD_SW_SUSPEND: - { diff --git a/lustre/kernel_patches/patches/kjournald_affinity.patch b/lustre/kernel_patches/patches/kjournald_affinity.patch deleted file mode 100644 index 977676b8a0..0000000000 --- a/lustre/kernel_patches/patches/kjournald_affinity.patch +++ /dev/null @@ -1,52 +0,0 @@ ---- linux-2.6.10.orig/fs/ext3/super.c 2004-12-24 22:35:28.000000000 +0100 -+++ linux-2.6.10/fs/ext3/super.c 2005-01-18 12:27:51.896529310 +0100 -@@ -1787,6 +1787,33 @@ - return NULL; - } - -+#ifdef CONFIG_NUMA -+#include <linux/nodemask.h> -+#include <asm/topology.h> -+ -+static int journal_node_affinity = 0; -+spinlock_t journal_node_affinity_lock = SPIN_LOCK_UNLOCKED; -+ -+static void ext3_bind_journal_thread(journal_t *journal) { -+ int i, node; -+ -+ spin_lock(&journal_node_affinity_lock); -+ for (i = 0; i < MAX_NUMNODES; i++) { -+ node = (journal_node_affinity + i) % MAX_NUMNODES; -+ -+ if (!nr_cpus_node(node) || !node_online(node)) -+ continue; -+ -+ set_cpus_allowed(journal->j_task, node_to_cpumask(node)); -+ journal_node_affinity = (node + 1) % MAX_NUMNODES; -+ break; -+ } -+ spin_unlock(&journal_node_affinity_lock); -+} -+#else -+#define ext3_bind_journal_thread(journal) do {} while (0) -+#endif -+ - static int ext3_load_journal(struct super_block * sb, - struct ext3_super_block * es) - { -@@ -1852,6 +1875,7 @@ - return err; - } - -+ ext3_bind_journal_thread(journal); - EXT3_SB(sb)->s_journal = journal; - ext3_clear_journal_err(sb, es); - return 0; -@@ -1881,6 +1908,7 @@ - return -EIO; - } - -+ ext3_bind_journal_thread(journal); - EXT3_SB(sb)->s_journal = journal; - - ext3_update_dynamic_rev(sb); diff --git a/lustre/kernel_patches/patches/link_notlast-susefix.patch b/lustre/kernel_patches/patches/link_notlast-susefix.patch deleted file mode 100644 index e3efbb465b..0000000000 --- a/lustre/kernel_patches/patches/link_notlast-susefix.patch +++ /dev/null @@ -1,16 +0,0 @@ ---- linux-2.6.5-7.141/fs/namei.c.orig 2005-04-01 18:03:37.788262784 +0300 -+++ linux-2.6.5-7.141/fs/namei.c 2005-04-01 18:05:43.058218856 +0300 -@@ -719,10 +719,12 @@ - goto out_dput; - - if (inode->i_op->follow_link) { -+ int save_flags = nd->flags; - mntget(next.mnt); - nd->flags |= LOOKUP_LINK_NOTLAST; - err = do_follow_link(next.dentry, nd); -- nd->flags &= ~LOOKUP_LINK_NOTLAST; -+ if (!(save_flags & LOOKUP_LINK_NOTLAST)) -+ nd->flags &= ~LOOKUP_LINK_NOTLAST; - dput(next.dentry); - mntput(next.mnt); - if (err) diff --git a/lustre/kernel_patches/patches/lustre_build.patch b/lustre/kernel_patches/patches/lustre_build.patch deleted file mode 100644 index 70f6a37d4b..0000000000 --- a/lustre/kernel_patches/patches/lustre_build.patch +++ /dev/null @@ -1,33 +0,0 @@ - fs/Kconfig | 8 ++++++++ - fs/Makefile | 1 + - 2 files changed, 9 insertions(+) - ---- linux-2.5.72/fs/Makefile~lustre_build 2003-06-16 22:20:05.000000000 -0600 -+++ linux-2.5.72-braam/fs/Makefile 2003-06-22 10:11:57.000000000 -0600 -@@ -57,6 +57,7 @@ obj-$(CONFIG_RAMFS) += ramfs/ - obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ - obj-$(CONFIG_CODA_FS) += coda/ - obj-$(CONFIG_INTERMEZZO_FS) += intermezzo/ -+obj-$(CONFIG_LUSTRE_FS) += lustre/ - obj-$(CONFIG_MINIX_FS) += minix/ - obj-$(CONFIG_FAT_FS) += fat/ - obj-$(CONFIG_UMSDOS_FS) += umsdos/ ---- linux-2.5.72/fs/Kconfig~lustre_build 2003-06-16 22:20:05.000000000 -0600 -+++ linux-2.5.72-braam/fs/Kconfig 2003-06-22 10:47:15.000000000 -0600 -@@ -1561,6 +1561,14 @@ config CODA_FS - whenever you want), say M here and read - <file:Documentation/modules.txt>. The module will be called coda. - -+config LUSTRE_FS -+ bool "Lustre: next generation clustering file system (EXPERIMENTAL)" -+ depends on INET && EXPERIMENTAL -+ help -+ Lustre is a next generation storage architecture which includes a -+ POSIX compliant cluster file system. For details see -+ <http://www.lustre.org/>. -+ - config INTERMEZZO_FS - tristate "InterMezzo file system support (replicating fs) (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL - -_ diff --git a/lustre/kernel_patches/patches/mtd-2.6-suse-lnxi.patch b/lustre/kernel_patches/patches/mtd-2.6-suse-lnxi.patch deleted file mode 100644 index 85b2ca2553..0000000000 --- a/lustre/kernel_patches/patches/mtd-2.6-suse-lnxi.patch +++ /dev/null @@ -1,35414 +0,0 @@ -Index: linux-2.6.5/drivers/mtd/Kconfig -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/Kconfig 2004-04-03 22:36:26.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/Kconfig 2005-02-01 17:11:17.000000000 -0500 -@@ -1,4 +1,4 @@ --# $Id: Kconfig,v 1.3 2003/05/28 11:02:23 dwmw2 Exp $ -+# $Id: Kconfig,v 1.6 2004/08/09 13:19:42 dwmw2 Exp $ - - menu "Memory Technology Devices (MTD)" - -@@ -28,7 +28,7 @@ - Determines the verbosity level of the MTD debugging messages. - - config MTD_PARTITIONS -- tristate "MTD partitioning support" -+ bool "MTD partitioning support" - depends on MTD - help - If you have a device which needs to divide its flash chip(s) up -@@ -68,9 +68,23 @@ - SA1100 map driver (CONFIG_MTD_SA1100) has an option for this, for - example. - -+config MTD_REDBOOT_PARTS_UNALLOCATED -+ bool " Include unallocated flash regions" -+ depends on MTD_REDBOOT_PARTS -+ help -+ If you need to register each unallocated flash region as a MTD -+ 'partition', enable this option. -+ -+config MTD_REDBOOT_PARTS_READONLY -+ bool " Force read-only for RedBoot system images" -+ depends on MTD_REDBOOT_PARTS -+ help -+ If you need to force read-only for 'RedBoot', 'RedBoot Config' and -+ 'FIS directory' images, enable this option. -+ - config MTD_CMDLINE_PARTS -- tristate "Command line partition table parsing" -- depends on MTD_PARTITIONS -+ bool "Command line partition table parsing" -+ depends on MTD_PARTITIONS = "y" - ---help--- - Allow generic configuration of the MTD paritition tables via the kernel - command line. Multiple flash resources are supported for hardware where -Index: linux-2.6.5/drivers/mtd/Makefile -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/Makefile 2004-04-03 22:36:57.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/Makefile 2005-02-01 17:11:17.000000000 -0500 -@@ -1,28 +1,14 @@ - # - # Makefile for the memory technology device drivers. - # --# $Id: Makefile.common,v 1.2 2003/05/23 11:38:29 dwmw2 Exp $ -- --# *** BIG UGLY NOTE *** --# --# The shiny new inter_module_xxx has introduced yet another ugly link --# order dependency, which I'd previously taken great care to avoid. --# We now have to ensure that the chip drivers are initialised before the --# map drivers, and that the doc200[01] drivers are initialised before --# docprobe. --# --# We'll hopefully merge the doc200[01] drivers and docprobe back into --# a single driver some time soon, but the CFI drivers are going to have --# to stay like that. --# --# Urgh. --# --# dwmw2 21/11/0 -+# $Id: Makefile.common,v 1.5 2004/08/10 20:51:49 dwmw2 Exp $ - - # Core functionality. --obj-$(CONFIG_MTD) += mtdcore.o -+mtd-y := mtdcore.o -+mtd-$(CONFIG_MTD_PARTITIONS) += mtdpart.o -+obj-$(CONFIG_MTD) += $(mtd-y) -+ - obj-$(CONFIG_MTD_CONCAT) += mtdconcat.o --obj-$(CONFIG_MTD_PARTITIONS) += mtdpart.o - obj-$(CONFIG_MTD_REDBOOT_PARTS) += redboot.o - obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdlinepart.o - obj-$(CONFIG_MTD_AFS_PARTS) += afs.o -Index: linux-2.6.5/drivers/mtd/afs.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/afs.c 2004-04-03 22:38:15.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/afs.c 2005-02-01 17:11:17.000000000 -0500 -@@ -21,7 +21,7 @@ - This is access code for flashes using ARM's flash partitioning - standards. - -- $Id: afs.c,v 1.12 2003/06/13 15:31:06 rmk Exp $ -+ $Id: afs.c,v 1.13 2004/02/27 22:09:59 rmk Exp $ - - ======================================================================*/ - -Index: linux-2.6.5/drivers/mtd/chips/Kconfig -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/Kconfig 2004-04-03 22:36:54.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/Kconfig 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - # drivers/mtd/chips/Kconfig --# $Id: Kconfig,v 1.3 2003/05/28 15:13:24 dwmw2 Exp $ -+# $Id: Kconfig,v 1.9 2004/07/16 15:32:14 dwmw2 Exp $ - - menu "RAM/ROM/Flash chip drivers" - depends on MTD!=n -@@ -85,59 +85,72 @@ - arrangements of CFI chips. If unsure, say 'N' and all options - which are supported by the current code will be enabled. - --config MTD_CFI_B1 -- bool "Support 8-bit buswidth" -- depends on MTD_CFI_GEOMETRY -+config MTD_MAP_BANK_WIDTH_1 -+ bool "Support 8-bit buswidth" if MTD_CFI_GEOMETRY -+ default y - help - If you wish to support CFI devices on a physical bus which is - 8 bits wide, say 'Y'. - --config MTD_CFI_B2 -- bool "Support 16-bit buswidth" -- depends on MTD_CFI_GEOMETRY -+config MTD_MAP_BANK_WIDTH_2 -+ bool "Support 16-bit buswidth" if MTD_CFI_GEOMETRY -+ default y - help - If you wish to support CFI devices on a physical bus which is - 16 bits wide, say 'Y'. - --config MTD_CFI_B4 -- bool "Support 32-bit buswidth" -- depends on MTD_CFI_GEOMETRY -+config MTD_MAP_BANK_WIDTH_4 -+ bool "Support 32-bit buswidth" if MTD_CFI_GEOMETRY -+ default y - help - If you wish to support CFI devices on a physical bus which is - 32 bits wide, say 'Y'. - --config MTD_CFI_B8 -- bool "Support 64-bit buswidth" -- depends on MTD_CFI_GEOMETRY -+config MTD_MAP_BANK_WIDTH_8 -+ bool "Support 64-bit buswidth" if MTD_CFI_GEOMETRY -+ default n - help - If you wish to support CFI devices on a physical bus which is - 64 bits wide, say 'Y'. - -+config MTD_MAP_BANK_WIDTH_16 -+ bool "Support 128-bit buswidth" if MTD_CFI_GEOMETRY -+ default n -+ help -+ If you wish to support CFI devices on a physical bus which is -+ 128 bits wide, say 'Y'. -+ -+config MTD_MAP_BANK_WIDTH_32 -+ bool "Support 256-bit buswidth" if MTD_CFI_GEOMETRY -+ default n -+ help -+ If you wish to support CFI devices on a physical bus which is -+ 256 bits wide, say 'Y'. -+ - config MTD_CFI_I1 -- bool "Support 1-chip flash interleave" if !MTD_CFI_B1 -- depends on MTD_CFI_GEOMETRY -- default y if MTD_CFI_B1 -+ bool "Support 1-chip flash interleave" if MTD_CFI_GEOMETRY -+ default y - help - If your flash chips are not interleaved - i.e. you only have one - flash chip addressed by each bus cycle, then say 'Y'. - - config MTD_CFI_I2 -- bool "Support 2-chip flash interleave" -- depends on MTD_CFI_GEOMETRY -+ bool "Support 2-chip flash interleave" if MTD_CFI_GEOMETRY -+ default y - help - If your flash chips are interleaved in pairs - i.e. you have two - flash chips addressed by each bus cycle, then say 'Y'. - - config MTD_CFI_I4 -- bool "Support 4-chip flash interleave" -- depends on MTD_CFI_GEOMETRY -+ bool "Support 4-chip flash interleave" if MTD_CFI_GEOMETRY -+ default n - help - If your flash chips are interleaved in fours - i.e. you have four - flash chips addressed by each bus cycle, then say 'Y'. - - config MTD_CFI_I8 -- bool "Support 8-chip flash interleave" -- depends on MTD_CFI_GEOMETRY -+ bool "Support 8-chip flash interleave" if MTD_CFI_GEOMETRY -+ default n - help - If your flash chips are interleaved in eights - i.e. you have eight - flash chips addressed by each bus cycle, then say 'Y'. -@@ -160,6 +173,27 @@ - provides support for one of those command sets, used on chips - including the AMD Am29LV320. - -+config MTD_CFI_AMDSTD_RETRY -+ int "Retry failed commands (erase/program)" -+ depends on MTD_CFI_AMDSTD -+ default "0" -+ help -+ Some chips, when attached to a shared bus, don't properly filter -+ bus traffic that is destined to other devices. This broken -+ behavior causes erase and program sequences to be aborted when -+ the sequences are mixed with traffic for other devices. -+ -+ SST49LF040 (and related) chips are know to be broken. -+ -+config MTD_CFI_AMDSTD_RETRY_MAX -+ int "Max retries of failed commands (erase/program)" -+ depends on MTD_CFI_AMDSTD_RETRY -+ default "0" -+ help -+ If you have an SST49LF040 (or related chip) then this value should -+ be set to at least 1. This can also be adjusted at driver load -+ time with the retry_cmd_max module parameter. -+ - config MTD_CFI_STAA - tristate "Support for ST (Advanced Architecture) flash chips" - depends on MTD_GEN_PROBE -@@ -168,6 +202,11 @@ - sets which a CFI-compliant chip may claim to implement. This code - provides support for one of those command sets. - -+config MTD_CFI_UTIL -+ tristate -+ default y if MTD_CFI_INTELEXT=y || MTD_CFI_AMDSTD=y || MTD_CFI_STAA=y -+ default m if MTD_CFI_INTELEXT=m || MTD_CFI_AMDSTD=m || MTD_CFI_STAA=m -+ - config MTD_RAM - tristate "Support for RAM chips in bus mapping" - depends on MTD -@@ -194,6 +233,7 @@ - with this driver will return -ENODEV upon access. - - config MTD_OBSOLETE_CHIPS -+ depends on MTD && BROKEN - bool "Older (theoretically obsoleted now) drivers for non-CFI chips" - help - This option does not enable any code directly, but will allow you to -Index: linux-2.6.5/drivers/mtd/chips/Makefile -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/Makefile 2004-04-03 22:36:53.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/Makefile 2005-02-01 17:11:17.000000000 -0500 -@@ -1,18 +1,19 @@ - # - # linux/drivers/chips/Makefile - # --# $Id: Makefile.common,v 1.1 2003/05/21 15:00:01 dwmw2 Exp $ -+# $Id: Makefile.common,v 1.4 2004/07/12 16:07:30 dwmw2 Exp $ - - # *** BIG UGLY NOTE *** - # - # The removal of get_module_symbol() and replacement with - # inter_module_register() et al has introduced a link order dependency - # here where previously there was none. We now have to ensure that --# the CFI command set drivers are linked before cfi_probe.o -+# the CFI command set drivers are linked before gen_probe.o - - obj-$(CONFIG_MTD) += chipreg.o - obj-$(CONFIG_MTD_AMDSTD) += amd_flash.o - obj-$(CONFIG_MTD_CFI) += cfi_probe.o -+obj-$(CONFIG_MTD_CFI_UTIL) += cfi_util.o - obj-$(CONFIG_MTD_CFI_STAA) += cfi_cmdset_0020.o - obj-$(CONFIG_MTD_CFI_AMDSTD) += cfi_cmdset_0002.o - obj-$(CONFIG_MTD_CFI_INTELEXT) += cfi_cmdset_0001.o -Index: linux-2.6.5/drivers/mtd/chips/amd_flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/amd_flash.c 2004-04-03 22:36:53.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/amd_flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -3,7 +3,7 @@ - * - * Author: Jonas Holmberg <jonas.holmberg@axis.com> - * -- * $Id: amd_flash.c,v 1.23 2003/06/12 09:24:13 dwmw2 Exp $ -+ * $Id: amd_flash.c,v 1.25 2004/08/09 13:19:43 dwmw2 Exp $ - * - * Copyright (c) 2001 Axis Communications AB - * -@@ -718,7 +718,7 @@ - "memory for MTD erase region info\n", map->name); - kfree(mtd); - map->fldrv_priv = NULL; -- return 0; -+ return NULL; - } - - reg_idx = 0; -@@ -780,8 +780,8 @@ - map->fldrv_priv = private; - - map->fldrv = &amd_flash_chipdrv; -- MOD_INC_USE_COUNT; - -+ __module_get(THIS_MODULE); - return mtd; - } - -@@ -1307,9 +1307,7 @@ - } - - instr->state = MTD_ERASE_DONE; -- if (instr->callback) { -- instr->callback(instr); -- } -+ mtd_erase_callback(instr); - - return 0; - } -Index: linux-2.6.5/drivers/mtd/chips/cfi_cmdset_0001.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/cfi_cmdset_0001.c 2004-04-03 22:36:54.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/cfi_cmdset_0001.c 2005-02-01 17:11:17.000000000 -0500 -@@ -4,7 +4,7 @@ - * - * (C) 2000 Red Hat. GPL'd - * -- * $Id: cfi_cmdset_0001.c,v 1.126 2003/06/23 07:45:48 dwmw2 Exp $ -+ * $Id: cfi_cmdset_0001.c,v 1.156 2004/09/17 11:45:05 eric Exp $ - * - * - * 10/10/2000 Nicolas Pitre <nico@cam.org> -@@ -34,12 +34,20 @@ - #include <linux/mtd/compatmac.h> - #include <linux/mtd/cfi.h> - -+/* #define CMDSET0001_DISABLE_ERASE_SUSPEND_ON_WRITE */ -+ - // debugging, turns off buffer write mode if set to 1 - #define FORCE_WORD_WRITE 0 - -+#define MANUFACTURER_INTEL 0x0089 -+#define I82802AB 0x00ad -+#define I82802AC 0x00ac -+#define MANUFACTURER_ST 0x0020 -+#define M50LPW080 0x002F -+ - static int cfi_intelext_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *); --static int cfi_intelext_read_user_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); --static int cfi_intelext_read_fact_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); -+//static int cfi_intelext_read_user_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); -+//static int cfi_intelext_read_fact_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); - static int cfi_intelext_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *); - static int cfi_intelext_write_buffers(struct mtd_info *, loff_t, size_t, size_t *, const u_char *); - static int cfi_intelext_erase_varsize(struct mtd_info *, struct erase_info *); -@@ -53,13 +61,19 @@ - - struct mtd_info *cfi_cmdset_0001(struct map_info *, int); - --static struct mtd_info *cfi_intelext_setup (struct map_info *); -+static struct mtd_info *cfi_intelext_setup (struct mtd_info *); -+static int cfi_intelext_partition_fixup(struct map_info *, struct cfi_private **); - - static int cfi_intelext_point (struct mtd_info *mtd, loff_t from, size_t len, - size_t *retlen, u_char **mtdbuf); - static void cfi_intelext_unpoint (struct mtd_info *mtd, u_char *addr, loff_t from, - size_t len); - -+static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode); -+static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr); -+#include "fwh_lock.h" -+ -+ - - /* - * *********** SETUP AND PROBE BITS *********** -@@ -79,17 +93,18 @@ - static void cfi_tell_features(struct cfi_pri_intelext *extp) - { - int i; -- printk(" Feature/Command Support: %4.4X\n", extp->FeatureSupport); -- printk(" - Chip Erase: %s\n", extp->FeatureSupport&1?"supported":"unsupported"); -- printk(" - Suspend Erase: %s\n", extp->FeatureSupport&2?"supported":"unsupported"); -- printk(" - Suspend Program: %s\n", extp->FeatureSupport&4?"supported":"unsupported"); -- printk(" - Legacy Lock/Unlock: %s\n", extp->FeatureSupport&8?"supported":"unsupported"); -- printk(" - Queued Erase: %s\n", extp->FeatureSupport&16?"supported":"unsupported"); -- printk(" - Instant block lock: %s\n", extp->FeatureSupport&32?"supported":"unsupported"); -- printk(" - Protection Bits: %s\n", extp->FeatureSupport&64?"supported":"unsupported"); -- printk(" - Page-mode read: %s\n", extp->FeatureSupport&128?"supported":"unsupported"); -- printk(" - Synchronous read: %s\n", extp->FeatureSupport&256?"supported":"unsupported"); -- for (i=9; i<32; i++) { -+ printk(" Feature/Command Support: %4.4X\n", extp->FeatureSupport); -+ printk(" - Chip Erase: %s\n", extp->FeatureSupport&1?"supported":"unsupported"); -+ printk(" - Suspend Erase: %s\n", extp->FeatureSupport&2?"supported":"unsupported"); -+ printk(" - Suspend Program: %s\n", extp->FeatureSupport&4?"supported":"unsupported"); -+ printk(" - Legacy Lock/Unlock: %s\n", extp->FeatureSupport&8?"supported":"unsupported"); -+ printk(" - Queued Erase: %s\n", extp->FeatureSupport&16?"supported":"unsupported"); -+ printk(" - Instant block lock: %s\n", extp->FeatureSupport&32?"supported":"unsupported"); -+ printk(" - Protection Bits: %s\n", extp->FeatureSupport&64?"supported":"unsupported"); -+ printk(" - Page-mode read: %s\n", extp->FeatureSupport&128?"supported":"unsupported"); -+ printk(" - Synchronous read: %s\n", extp->FeatureSupport&256?"supported":"unsupported"); -+ printk(" - Simultaneous operations: %s\n", extp->FeatureSupport&512?"supported":"unsupported"); -+ for (i=10; i<32; i++) { - if (extp->FeatureSupport & (1<<i)) - printk(" - Unknown Bit %X: supported\n", i); - } -@@ -110,13 +125,93 @@ - } - - printk(" Vcc Logic Supply Optimum Program/Erase Voltage: %d.%d V\n", -- extp->VccOptimal >> 8, extp->VccOptimal & 0xf); -+ extp->VccOptimal >> 4, extp->VccOptimal & 0xf); - if (extp->VppOptimal) - printk(" Vpp Programming Supply Optimum Program/Erase Voltage: %d.%d V\n", -- extp->VppOptimal >> 8, extp->VppOptimal & 0xf); -+ extp->VppOptimal >> 4, extp->VppOptimal & 0xf); -+} -+#endif -+ -+#ifdef CMDSET0001_DISABLE_ERASE_SUSPEND_ON_WRITE -+/* Some Intel Strata Flash prior to FPO revision C has bugs in this area */ -+static void fixup_intel_strataflash(struct mtd_info *mtd, void* param) -+{ -+ struct map_info *map = mtd->priv; -+ struct cfi_private *cfi = map->fldrv_priv; -+ struct cfi_pri_amdstd *extp = cfi->cmdset_priv; -+ -+ printk(KERN_WARNING "cfi_cmdset_0001: Suspend " -+ "erase on write disabled.\n"); -+ extp->SuspendCmdSupport &= ~1; - } - #endif - -+static void fixup_st_m28w320ct(struct mtd_info *mtd, void* param) -+{ -+ struct map_info *map = mtd->priv; -+ struct cfi_private *cfi = map->fldrv_priv; -+ -+ cfi->cfiq->BufWriteTimeoutTyp = 0; /* Not supported */ -+ cfi->cfiq->BufWriteTimeoutMax = 0; /* Not supported */ -+} -+ -+static void fixup_st_m28w320cb(struct mtd_info *mtd, void* param) -+{ -+ struct map_info *map = mtd->priv; -+ struct cfi_private *cfi = map->fldrv_priv; -+ -+ /* Note this is done after the region info is endian swapped */ -+ cfi->cfiq->EraseRegionInfo[1] = -+ (cfi->cfiq->EraseRegionInfo[1] & 0xffff0000) | 0x3e; -+}; -+ -+static void fixup_use_point(struct mtd_info *mtd, void *param) -+{ -+ struct map_info *map = mtd->priv; -+ if (!mtd->point && map_is_linear(map)) { -+ mtd->point = cfi_intelext_point; -+ mtd->unpoint = cfi_intelext_unpoint; -+ } -+} -+ -+static void fixup_use_write_buffers(struct mtd_info *mtd, void *param) -+{ -+ struct map_info *map = mtd->priv; -+ struct cfi_private *cfi = map->fldrv_priv; -+ if (cfi->cfiq->BufWriteTimeoutTyp) { -+ printk(KERN_INFO "Using buffer write method\n" ); -+ mtd->write = cfi_intelext_write_buffers; -+ } -+} -+ -+static struct cfi_fixup cfi_fixup_table[] = { -+#ifdef CMDSET0001_DISABLE_ERASE_SUSPEND_ON_WRITE -+ { CFI_MFR_ANY, CFI_ID_ANY, fixup_intel_strataflash, NULL }, -+#endif -+#if !FORCE_WORD_WRITE -+ { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers, NULL }, -+#endif -+ { CFI_MFR_ST, 0x00ba, /* M28W320CT */ fixup_st_m28w320ct, NULL }, -+ { CFI_MFR_ST, 0x00bb, /* M28W320CB */ fixup_st_m28w320cb, NULL }, -+ { 0, 0, NULL, NULL } -+}; -+ -+static struct cfi_fixup jedec_fixup_table[] = { -+ { MANUFACTURER_INTEL, I82802AB, fixup_use_fwh_lock, NULL, }, -+ { MANUFACTURER_INTEL, I82802AC, fixup_use_fwh_lock, NULL, }, -+ { MANUFACTURER_ST, M50LPW080, fixup_use_fwh_lock, NULL, }, -+ { 0, 0, NULL, NULL } -+}; -+static struct cfi_fixup fixup_table[] = { -+ /* The CFI vendor ids and the JEDEC vendor IDs appear -+ * to be common. It is like the devices id's are as -+ * well. This table is to pick all cases where -+ * we know that is the case. -+ */ -+ { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_point, NULL }, -+ { 0, 0, NULL, NULL } -+}; -+ - /* This routine is made available to other mtd code via - * inter_module_register. It must only be accessed through - * inter_module_get which will bump the use count of this module. The -@@ -127,9 +222,30 @@ - struct mtd_info *cfi_cmdset_0001(struct map_info *map, int primary) - { - struct cfi_private *cfi = map->fldrv_priv; -+ struct mtd_info *mtd; - int i; -- __u32 base = cfi->chips[0].start; - -+ mtd = kmalloc(sizeof(*mtd), GFP_KERNEL); -+ if (!mtd) { -+ printk(KERN_ERR "Failed to allocate memory for MTD device\n"); -+ return NULL; -+ } -+ memset(mtd, 0, sizeof(*mtd)); -+ mtd->priv = map; -+ mtd->type = MTD_NORFLASH; -+ -+ /* Fill in the default mtd operations */ -+ mtd->erase = cfi_intelext_erase_varsize; -+ mtd->read = cfi_intelext_read; -+ mtd->write = cfi_intelext_write_words; -+ mtd->sync = cfi_intelext_sync; -+ mtd->lock = cfi_intelext_lock; -+ mtd->unlock = cfi_intelext_unlock; -+ mtd->suspend = cfi_intelext_suspend; -+ mtd->resume = cfi_intelext_resume; -+ mtd->flags = MTD_CAP_NORFLASH; -+ mtd->name = map->name; -+ - if (cfi->cfi_mode == CFI_MODE_CFI) { - /* - * It's a real CFI chip, not one for which the probe -@@ -138,33 +254,10 @@ - */ - __u16 adr = primary?cfi->cfiq->P_ADR:cfi->cfiq->A_ADR; - struct cfi_pri_intelext *extp; -- int ofs_factor = cfi->interleave * cfi->device_type; -- -- //printk(" Intel/Sharp Extended Query Table at 0x%4.4X\n", adr); -- if (!adr) -- return NULL; - -- /* Switch it into Query Mode */ -- cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); -- -- extp = kmalloc(sizeof(*extp), GFP_KERNEL); -+ extp = (struct cfi_pri_intelext*)cfi_read_pri(map, adr, sizeof(*extp), "Intel/Sharp"); - if (!extp) { -- printk(KERN_ERR "Failed to allocate memory\n"); -- return NULL; -- } -- -- /* Read in the Extended Query Table */ -- for (i=0; i<sizeof(*extp); i++) { -- ((unsigned char *)extp)[i] = -- cfi_read_query(map, (base+((adr+i)*ofs_factor))); -- } -- -- if (extp->MajorVersion != '1' || -- (extp->MinorVersion < '0' || extp->MinorVersion > '3')) { -- printk(KERN_WARNING " Unknown IntelExt Extended Query " -- "version %c.%c.\n", extp->MajorVersion, -- extp->MinorVersion); -- kfree(extp); -+ kfree(mtd); - return NULL; - } - -@@ -172,6 +265,11 @@ - extp->FeatureSupport = le32_to_cpu(extp->FeatureSupport); - extp->BlkStatusRegMask = le16_to_cpu(extp->BlkStatusRegMask); - extp->ProtRegAddr = le16_to_cpu(extp->ProtRegAddr); -+ -+ /* Install our own private info structure */ -+ cfi->cmdset_priv = extp; -+ -+ cfi_fixup(mtd, cfi_fixup_table); - - #ifdef DEBUG_CFI_FEATURES - /* Tell the user about it in lots of lovely detail */ -@@ -179,19 +277,15 @@ - #endif - - if(extp->SuspendCmdSupport & 1) { --//#define CMDSET0001_DISABLE_ERASE_SUSPEND_ON_WRITE --#ifdef CMDSET0001_DISABLE_ERASE_SUSPEND_ON_WRITE --/* Some Intel Strata Flash prior to FPO revision C has bugs in this area */ -- printk(KERN_WARNING "cfi_cmdset_0001: Suspend " -- "erase on write disabled.\n"); -- extp->SuspendCmdSupport &= ~1; --#else - printk(KERN_NOTICE "cfi_cmdset_0001: Erase suspend on write enabled\n"); --#endif - } -- /* Install our own private info structure */ -- cfi->cmdset_priv = extp; - } -+ else if (cfi->cfi_mode == CFI_MODE_JEDEC) { -+ /* Apply jedec specific fixups */ -+ cfi_fixup(mtd, jedec_fixup_table); -+ } -+ /* Apply generic fixups */ -+ cfi_fixup(mtd, fixup_table); - - for (i=0; i< cfi->numchips; i++) { - cfi->chips[i].word_write_time = 1<<cfi->cfiq->WordWriteTimeoutTyp; -@@ -202,30 +296,19 @@ - - map->fldrv = &cfi_intelext_chipdrv; - -- /* Make sure it's in read mode */ -- cfi_send_gen_cmd(0xff, 0x55, base, map, cfi, cfi->device_type, NULL); -- return cfi_intelext_setup(map); -+ return cfi_intelext_setup(mtd); - } - --static struct mtd_info *cfi_intelext_setup(struct map_info *map) -+static struct mtd_info *cfi_intelext_setup(struct mtd_info *mtd) - { -+ struct map_info *map = mtd->priv; - struct cfi_private *cfi = map->fldrv_priv; -- struct mtd_info *mtd; - unsigned long offset = 0; - int i,j; - unsigned long devsize = (1<<cfi->cfiq->DevSize) * cfi->interleave; - -- mtd = kmalloc(sizeof(*mtd), GFP_KERNEL); - //printk(KERN_DEBUG "number of CFI chips: %d\n", cfi->numchips); - -- if (!mtd) { -- printk(KERN_ERR "Failed to allocate memory for MTD device\n"); -- goto setup_err; -- } -- -- memset(mtd, 0, sizeof(*mtd)); -- mtd->priv = map; -- mtd->type = MTD_NORFLASH; - mtd->size = devsize * cfi->numchips; - - mtd->numeraseregions = cfi->cfiq->NumEraseRegions * cfi->numchips; -@@ -265,32 +348,16 @@ - mtd->eraseregions[i].numblocks); - } - -- /* Also select the correct geometry setup too */ -- mtd->erase = cfi_intelext_erase_varsize; -- mtd->read = cfi_intelext_read; -- -- if (map_is_linear(map)) { -- mtd->point = cfi_intelext_point; -- mtd->unpoint = cfi_intelext_unpoint; -- } -- -- if ( cfi->cfiq->BufWriteTimeoutTyp && !FORCE_WORD_WRITE) { -- printk(KERN_INFO "Using buffer write method\n" ); -- mtd->write = cfi_intelext_write_buffers; -- } else { -- printk(KERN_INFO "Using word write method\n" ); -- mtd->write = cfi_intelext_write_words; -- } -+#if 0 - mtd->read_user_prot_reg = cfi_intelext_read_user_prot_reg; - mtd->read_fact_prot_reg = cfi_intelext_read_fact_prot_reg; -- mtd->sync = cfi_intelext_sync; -- mtd->lock = cfi_intelext_lock; -- mtd->unlock = cfi_intelext_unlock; -- mtd->suspend = cfi_intelext_suspend; -- mtd->resume = cfi_intelext_resume; -- mtd->flags = MTD_CAP_NORFLASH; -- map->fldrv = &cfi_intelext_chipdrv; -- mtd->name = map->name; -+#endif -+ -+ /* This function has the potential to distort the reality -+ a bit and therefore should be called last. */ -+ if (cfi_intelext_partition_fixup(map, &cfi) != 0) -+ goto setup_err; -+ - __module_get(THIS_MODULE); - return mtd; - -@@ -301,10 +368,87 @@ - kfree(mtd); - } - kfree(cfi->cmdset_priv); -- kfree(cfi->cfiq); - return NULL; - } - -+static int cfi_intelext_partition_fixup(struct map_info *map, -+ struct cfi_private **pcfi) -+{ -+ struct cfi_private *cfi = *pcfi; -+ struct cfi_pri_intelext *extp = cfi->cmdset_priv; -+ -+ /* -+ * Probing of multi-partition flash ships. -+ * -+ * This is extremely crude at the moment and should probably be -+ * extracted entirely from the Intel extended query data instead. -+ * Right now a L18 flash is assumed if multiple operations is -+ * detected. -+ * -+ * To support multiple partitions when available, we simply arrange -+ * for each of them to have their own flchip structure even if they -+ * are on the same physical chip. This means completely recreating -+ * a new cfi_private structure right here which is a blatent code -+ * layering violation, but this is still the least intrusive -+ * arrangement at this point. This can be rearranged in the future -+ * if someone feels motivated enough. --nico -+ */ -+ if (extp && extp->FeatureSupport & (1 << 9)) { -+ struct cfi_private *newcfi; -+ struct flchip *chip; -+ struct flchip_shared *shared; -+ int numparts, partshift, numvirtchips, i, j; -+ -+ /* -+ * The L18 flash memory array is divided -+ * into multiple 8-Mbit partitions. -+ */ -+ numparts = 1 << (cfi->cfiq->DevSize - 20); -+ partshift = 20 + __ffs(cfi->interleave); -+ numvirtchips = cfi->numchips * numparts; -+ -+ newcfi = kmalloc(sizeof(struct cfi_private) + numvirtchips * sizeof(struct flchip), GFP_KERNEL); -+ if (!newcfi) -+ return -ENOMEM; -+ shared = kmalloc(sizeof(struct flchip_shared) * cfi->numchips, GFP_KERNEL); -+ if (!shared) { -+ kfree(newcfi); -+ return -ENOMEM; -+ } -+ memcpy(newcfi, cfi, sizeof(struct cfi_private)); -+ newcfi->numchips = numvirtchips; -+ newcfi->chipshift = partshift; -+ -+ chip = &newcfi->chips[0]; -+ for (i = 0; i < cfi->numchips; i++) { -+ shared[i].writing = shared[i].erasing = NULL; -+ spin_lock_init(&shared[i].lock); -+ for (j = 0; j < numparts; j++) { -+ *chip = cfi->chips[i]; -+ chip->start += j << partshift; -+ chip->priv = &shared[i]; -+ /* those should be reset too since -+ they create memory references. */ -+ init_waitqueue_head(&chip->wq); -+ spin_lock_init(&chip->_spinlock); -+ chip->mutex = &chip->_spinlock; -+ chip++; -+ } -+ } -+ -+ printk(KERN_DEBUG "%s: %d sets of %d interleaved chips " -+ "--> %d partitions of %#x bytes\n", -+ map->name, cfi->numchips, cfi->interleave, -+ newcfi->numchips, 1<<newcfi->chipshift); -+ -+ map->fldrv_priv = newcfi; -+ *pcfi = newcfi; -+ kfree(cfi); -+ } -+ -+ return 0; -+} -+ - /* - * *********** CHIP ACCESS FUNCTIONS *********** - */ -@@ -313,25 +457,87 @@ - { - DECLARE_WAITQUEUE(wait, current); - struct cfi_private *cfi = map->fldrv_priv; -- cfi_word status, status_OK = CMD(0x80); -+ map_word status, status_OK = CMD(0x80), status_PWS = CMD(0x01); - unsigned long timeo; -- struct cfi_pri_intelext *cfip = (struct cfi_pri_intelext *)cfi->cmdset_priv; -+ struct cfi_pri_intelext *cfip = cfi->cmdset_priv; - - resettime: - timeo = jiffies + HZ; - retry: -+ if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING)) { -+ /* -+ * OK. We have possibility for contension on the write/erase -+ * operations which are global to the real chip and not per -+ * partition. So let's fight it over in the partition which -+ * currently has authority on the operation. -+ * -+ * The rules are as follows: -+ * -+ * - any write operation must own shared->writing. -+ * -+ * - any erase operation must own _both_ shared->writing and -+ * shared->erasing. -+ * -+ * - contension arbitration is handled in the owner's context. -+ * -+ * The 'shared' struct can be read when its lock is taken. -+ * However any writes to it can only be made when the current -+ * owner's lock is also held. -+ */ -+ struct flchip_shared *shared = chip->priv; -+ struct flchip *contender; -+ spin_lock(&shared->lock); -+ contender = shared->writing; -+ if (contender && contender != chip) { -+ /* -+ * The engine to perform desired operation on this -+ * partition is already in use by someone else. -+ * Let's fight over it in the context of the chip -+ * currently using it. If it is possible to suspend, -+ * that other partition will do just that, otherwise -+ * it'll happily send us to sleep. In any case, when -+ * get_chip returns success we're clear to go ahead. -+ */ -+ int ret = spin_trylock(contender->mutex); -+ spin_unlock(&shared->lock); -+ if (!ret) -+ goto retry; -+ spin_unlock(chip->mutex); -+ ret = get_chip(map, contender, contender->start, mode); -+ spin_lock(chip->mutex); -+ if (ret) { -+ spin_unlock(contender->mutex); -+ return ret; -+ } -+ timeo = jiffies + HZ; -+ spin_lock(&shared->lock); -+ } -+ -+ /* We now own it */ -+ shared->writing = chip; -+ if (mode == FL_ERASING) -+ shared->erasing = chip; -+ if (contender && contender != chip) -+ spin_unlock(contender->mutex); -+ spin_unlock(&shared->lock); -+ } -+ - switch (chip->state) { - - case FL_STATUS: - for (;;) { -- status = cfi_read(map, adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) -+ break; -+ -+ /* At this point we're fine with write operations -+ in other partitions as they don't conflict. */ -+ if (chip->priv && map_word_andequal(map, status, status_PWS, status_PWS)) - break; - - if (time_after(jiffies, timeo)) { -- printk(KERN_ERR "Waiting for chip to be ready timed out. Status %llx\n", -- (long long)status); -- spin_unlock(chip->mutex); -+ printk(KERN_ERR "Waiting for chip to be ready timed out. Status %lx\n", -+ status.x[0]); - return -EIO; - } - spin_unlock(chip->mutex); -@@ -347,38 +553,39 @@ - return 0; - - case FL_ERASING: -- if (!(cfip->FeatureSupport & 2) || -+ if (!cfip || -+ !(cfip->FeatureSupport & 2) || - !(mode == FL_READY || mode == FL_POINT || - (mode == FL_WRITING && (cfip->SuspendCmdSupport & 1)))) - goto sleep; - - - /* Erase suspend */ -- cfi_write(map, CMD(0xB0), adr); -+ map_write(map, CMD(0xB0), adr); - - /* If the flash has finished erasing, then 'erase suspend' - * appears to make some (28F320) flash devices switch to - * 'read' mode. Make sure that we switch to 'read status' - * mode so we get the right data. --rmk - */ -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->oldstate = FL_ERASING; - chip->state = FL_ERASE_SUSPENDING; - chip->erase_suspended = 1; - for (;;) { -- status = cfi_read(map, adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - if (time_after(jiffies, timeo)) { - /* Urgh. Resume and pretend we weren't here. */ -- cfi_write(map, CMD(0xd0), adr); -+ map_write(map, CMD(0xd0), adr); - /* Make sure we're in 'read status' mode if it had finished */ -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->state = FL_ERASING; - chip->oldstate = FL_READY; - printk(KERN_ERR "Chip not ready after erase " -- "suspended: status = 0x%x\n", status); -+ "suspended: status = 0x%lx\n", status.x[0]); - return -EIO; - } - -@@ -412,6 +619,32 @@ - { - struct cfi_private *cfi = map->fldrv_priv; - -+ if (chip->priv) { -+ struct flchip_shared *shared = chip->priv; -+ spin_lock(&shared->lock); -+ if (shared->writing == chip) { -+ /* We own the ability to write, but we're done */ -+ shared->writing = shared->erasing; -+ if (shared->writing && shared->writing != chip) { -+ /* give back ownership to who we loaned it from */ -+ struct flchip *loaner = shared->writing; -+ spin_lock(loaner->mutex); -+ spin_unlock(&shared->lock); -+ spin_unlock(chip->mutex); -+ put_chip(map, loaner, loaner->start); -+ spin_lock(chip->mutex); -+ spin_unlock(loaner->mutex); -+ } else { -+ if (chip->oldstate != FL_ERASING) { -+ shared->erasing = NULL; -+ if (chip->oldstate != FL_WRITING) -+ shared->writing = NULL; -+ } -+ spin_unlock(&shared->lock); -+ } -+ } -+ } -+ - switch(chip->oldstate) { - case FL_ERASING: - chip->state = chip->oldstate; -@@ -424,13 +657,15 @@ - sending the 0x70 (Read Status) command to an erasing - chip and expecting it to be ignored, that's what we - do. */ -- cfi_write(map, CMD(0xd0), adr); -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0xd0), adr); -+ map_write(map, CMD(0x70), adr); - chip->oldstate = FL_READY; - chip->state = FL_ERASING; - break; - - case FL_READY: -+ case FL_STATUS: -+ case FL_JEDEC_QUERY: - /* We should really make set_vpp() count, rather than doing this */ - DISABLE_VPP(map); - break; -@@ -449,7 +684,7 @@ - adr += chip->start; - - /* Ensure cmd read/writes are aligned. */ -- cmd_addr = adr & ~(CFIDEV_BUSWIDTH-1); -+ cmd_addr = adr & ~(map_bankwidth(map)-1); - - spin_lock(chip->mutex); - -@@ -457,7 +692,7 @@ - - if (!ret) { - if (chip->state != FL_POINT && chip->state != FL_READY) -- cfi_write(map, CMD(0xff), cmd_addr); -+ map_write(map, CMD(0xff), cmd_addr); - - chip->state = FL_POINT; - chip->ref_point_counter++; -@@ -475,12 +710,10 @@ - int chipnum; - int ret = 0; - -- if (from + len > mtd->size) -+ if (!map->virt || (from + len > mtd->size)) - return -EINVAL; - - *mtdbuf = (void *)map->virt + from; -- if(*mtdbuf == NULL) -- return -EINVAL; /* can not point this region */ - *retlen = 0; - - /* Now lock the chip(s) to POINT state */ -@@ -565,7 +798,7 @@ - adr += chip->start; - - /* Ensure cmd read/writes are aligned. */ -- cmd_addr = adr & ~(CFIDEV_BUSWIDTH-1); -+ cmd_addr = adr & ~(map_bankwidth(map)-1); - - spin_lock(chip->mutex); - ret = get_chip(map, chip, cmd_addr, FL_READY); -@@ -575,7 +808,7 @@ - } - - if (chip->state != FL_POINT && chip->state != FL_READY) { -- cfi_write(map, CMD(0xff), cmd_addr); -+ map_write(map, CMD(0xff), cmd_addr); - - chip->state = FL_READY; - } -@@ -626,7 +859,7 @@ - } - return ret; - } -- -+#if 0 - static int cfi_intelext_read_prot_reg (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf, int base_offst, int reg_sz) - { - struct map_info *map = mtd->priv; -@@ -657,7 +890,7 @@ - } - - if (chip->state != FL_JEDEC_QUERY) { -- cfi_write(map, CMD(0x90), chip->start); -+ map_write(map, CMD(0x90), chip->start); - chip->state = FL_JEDEC_QUERY; - } - -@@ -688,7 +921,7 @@ - int base_offst,reg_sz; - - /* Check that we actually have some protection registers */ -- if(!(extp->FeatureSupport&64)){ -+ if(!extp || !(extp->FeatureSupport&64)){ - printk(KERN_WARNING "%s: This flash device has no protection data to read!\n",map->name); - return 0; - } -@@ -707,7 +940,7 @@ - int base_offst,reg_sz; - - /* Check that we actually have some protection registers */ -- if(!(extp->FeatureSupport&64)){ -+ if(!extp || !(extp->FeatureSupport&64)){ - printk(KERN_WARNING "%s: This flash device has no protection data to read!\n",map->name); - return 0; - } -@@ -717,12 +950,12 @@ - - return cfi_intelext_read_prot_reg(mtd, from, len, retlen, buf, base_offst, reg_sz); - } -+#endif - -- --static int do_write_oneword(struct map_info *map, struct flchip *chip, unsigned long adr, cfi_word datum) -+static int do_write_oneword(struct map_info *map, struct flchip *chip, unsigned long adr, map_word datum) - { - struct cfi_private *cfi = map->fldrv_priv; -- cfi_word status, status_OK; -+ map_word status, status_OK; - unsigned long timeo; - int z, ret=0; - -@@ -739,11 +972,12 @@ - } - - ENABLE_VPP(map); -- cfi_write(map, CMD(0x40), adr); -- cfi_write(map, datum, adr); -+ map_write(map, CMD(0x40), adr); -+ map_write(map, datum, adr); - chip->state = FL_WRITING; - - spin_unlock(chip->mutex); -+ INVALIDATE_CACHED_RANGE(map, adr, map_bankwidth(map)); - cfi_udelay(chip->word_write_time); - spin_lock(chip->mutex); - -@@ -764,8 +998,8 @@ - continue; - } - -- status = cfi_read(map, adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - /* OK Still waiting */ -@@ -793,11 +1027,11 @@ - /* Done and happy. */ - chip->state = FL_STATUS; - /* check for lock bit */ -- if (status & CMD(0x02)) { -+ if (map_word_bitsset(map, status, CMD(0x02))) { - /* clear status */ -- cfi_write(map, CMD(0x50), adr); -+ map_write(map, CMD(0x50), adr); - /* put back into read status register mode */ -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - ret = -EROFS; - } - out: -@@ -824,35 +1058,22 @@ - ofs = to - (chipnum << cfi->chipshift); - - /* If it's not bus-aligned, do the first byte write */ -- if (ofs & (CFIDEV_BUSWIDTH-1)) { -- unsigned long bus_ofs = ofs & ~(CFIDEV_BUSWIDTH-1); -+ if (ofs & (map_bankwidth(map)-1)) { -+ unsigned long bus_ofs = ofs & ~(map_bankwidth(map)-1); - int gap = ofs - bus_ofs; -- int i = 0, n = 0; -- u_char tmp_buf[8]; -- cfi_word datum; -- -- while (gap--) -- tmp_buf[i++] = 0xff; -- while (len && i < CFIDEV_BUSWIDTH) -- tmp_buf[i++] = buf[n++], len--; -- while (i < CFIDEV_BUSWIDTH) -- tmp_buf[i++] = 0xff; -- -- if (cfi_buswidth_is_2()) { -- datum = *(__u16*)tmp_buf; -- } else if (cfi_buswidth_is_4()) { -- datum = *(__u32*)tmp_buf; -- } else if (cfi_buswidth_is_8()) { -- datum = *(__u64*)tmp_buf; -- } else { -- return -EINVAL; /* should never happen, but be safe */ -- } -+ int n; -+ map_word datum; -+ -+ n = min_t(int, len, map_bankwidth(map)-gap); -+ datum = map_word_ff(map); -+ datum = map_word_load_partial(map, datum, buf, gap, n); - - ret = do_write_oneword(map, &cfi->chips[chipnum], - bus_ofs, datum); - if (ret) - return ret; -- -+ -+ len -= n; - ofs += n; - buf += n; - (*retlen) += n; -@@ -865,30 +1086,18 @@ - } - } - -- while(len >= CFIDEV_BUSWIDTH) { -- cfi_word datum; -- -- if (cfi_buswidth_is_1()) { -- datum = *(__u8*)buf; -- } else if (cfi_buswidth_is_2()) { -- datum = *(__u16*)buf; -- } else if (cfi_buswidth_is_4()) { -- datum = *(__u32*)buf; -- } else if (cfi_buswidth_is_8()) { -- datum = *(__u64*)buf; -- } else { -- return -EINVAL; -- } -+ while(len >= map_bankwidth(map)) { -+ map_word datum = map_word_load(map, buf); - - ret = do_write_oneword(map, &cfi->chips[chipnum], - ofs, datum); - if (ret) - return ret; - -- ofs += CFIDEV_BUSWIDTH; -- buf += CFIDEV_BUSWIDTH; -- (*retlen) += CFIDEV_BUSWIDTH; -- len -= CFIDEV_BUSWIDTH; -+ ofs += map_bankwidth(map); -+ buf += map_bankwidth(map); -+ (*retlen) += map_bankwidth(map); -+ len -= map_bankwidth(map); - - if (ofs >> cfi->chipshift) { - chipnum ++; -@@ -898,32 +1107,18 @@ - } - } - -- if (len & (CFIDEV_BUSWIDTH-1)) { -- int i = 0, n = 0; -- u_char tmp_buf[8]; -- cfi_word datum; -- -- while (len--) -- tmp_buf[i++] = buf[n++]; -- while (i < CFIDEV_BUSWIDTH) -- tmp_buf[i++] = 0xff; -- -- if (cfi_buswidth_is_2()) { -- datum = *(__u16*)tmp_buf; -- } else if (cfi_buswidth_is_4()) { -- datum = *(__u32*)tmp_buf; -- } else if (cfi_buswidth_is_8()) { -- datum = *(__u64*)tmp_buf; -- } else { -- return -EINVAL; /* should never happen, but be safe */ -- } -+ if (len & (map_bankwidth(map)-1)) { -+ map_word datum; -+ -+ datum = map_word_ff(map); -+ datum = map_word_load_partial(map, datum, buf, 0, len); - - ret = do_write_oneword(map, &cfi->chips[chipnum], - ofs, datum); - if (ret) - return ret; - -- (*retlen) += n; -+ (*retlen) += len; - } - - return 0; -@@ -934,11 +1129,11 @@ - unsigned long adr, const u_char *buf, int len) - { - struct cfi_private *cfi = map->fldrv_priv; -- cfi_word status, status_OK; -+ map_word status, status_OK; - unsigned long cmd_adr, timeo; - int wbufsize, z, ret=0, bytes, words; - -- wbufsize = CFIDEV_INTERLEAVE << cfi->cfiq->MaxBufWriteSize; -+ wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize; - adr += chip->start; - cmd_adr = adr & ~(wbufsize-1); - -@@ -952,29 +1147,28 @@ - return ret; - } - -- if (chip->state != FL_STATUS) -- cfi_write(map, CMD(0x70), cmd_adr); -- -- status = cfi_read(map, cmd_adr); -- - /* §4.8 of the 28FxxxJ3A datasheet says "Any time SR.4 and/or SR.5 is set - [...], the device will not accept any more Write to Buffer commands". - So we must check here and reset those bits if they're set. Otherwise - we're just pissing in the wind */ -- if (status & CMD(0x30)) { -- printk(KERN_WARNING "SR.4 or SR.5 bits set in buffer write (status %x). Clearing.\n", status); -- cfi_write(map, CMD(0x50), cmd_adr); -- cfi_write(map, CMD(0x70), cmd_adr); -+ if (chip->state != FL_STATUS) -+ map_write(map, CMD(0x70), cmd_adr); -+ status = map_read(map, cmd_adr); -+ if (map_word_bitsset(map, status, CMD(0x30))) { -+ printk(KERN_WARNING "SR.4 or SR.5 bits set in buffer write (status %lx). Clearing.\n", status.x[0]); -+ map_write(map, CMD(0x50), cmd_adr); -+ map_write(map, CMD(0x70), cmd_adr); - } -+ - ENABLE_VPP(map); - chip->state = FL_WRITING_TO_BUFFER; - - z = 0; - for (;;) { -- cfi_write(map, CMD(0xe8), cmd_adr); -+ map_write(map, CMD(0xe8), cmd_adr); - -- status = cfi_read(map, cmd_adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, cmd_adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - spin_unlock(chip->mutex); -@@ -983,84 +1177,47 @@ - - if (++z > 20) { - /* Argh. Not ready for write to buffer */ -- cfi_write(map, CMD(0x70), cmd_adr); -+ map_write(map, CMD(0x70), cmd_adr); - chip->state = FL_STATUS; -- printk(KERN_ERR "Chip not ready for buffer write. Xstatus = %llx, status = %llx\n", (__u64)status, (__u64)cfi_read(map, cmd_adr)); -+ printk(KERN_ERR "Chip not ready for buffer write. Xstatus = %lx, status = %lx\n", -+ status.x[0], map_read(map, cmd_adr).x[0]); - /* Odd. Clear status bits */ -- cfi_write(map, CMD(0x50), cmd_adr); -- cfi_write(map, CMD(0x70), cmd_adr); -+ map_write(map, CMD(0x50), cmd_adr); -+ map_write(map, CMD(0x70), cmd_adr); - ret = -EIO; - goto out; - } - } - - /* Write length of data to come */ -- bytes = len & (CFIDEV_BUSWIDTH-1); -- words = len / CFIDEV_BUSWIDTH; -- cfi_write(map, CMD(words - !bytes), cmd_adr ); -+ bytes = len & (map_bankwidth(map)-1); -+ words = len / map_bankwidth(map); -+ map_write(map, CMD(words - !bytes), cmd_adr ); - - /* Write data */ - z = 0; -- while(z < words * CFIDEV_BUSWIDTH) { -- if (cfi_buswidth_is_1()) { -- u8 *b = (u8 *)buf; -- -- map_write8 (map, *b++, adr+z); -- buf = (const u_char *)b; -- } else if (cfi_buswidth_is_2()) { -- u16 *b = (u16 *)buf; -- -- map_write16 (map, *b++, adr+z); -- buf = (const u_char *)b; -- } else if (cfi_buswidth_is_4()) { -- u32 *b = (u32 *)buf; -- -- map_write32 (map, *b++, adr+z); -- buf = (const u_char *)b; -- } else if (cfi_buswidth_is_8()) { -- u64 *b = (u64 *)buf; -- -- map_write64 (map, *b++, adr+z); -- buf = (const u_char *)b; -- } else { -- ret = -EINVAL; -- goto out; -- } -- z += CFIDEV_BUSWIDTH; -+ while(z < words * map_bankwidth(map)) { -+ map_word datum = map_word_load(map, buf); -+ map_write(map, datum, adr+z); -+ -+ z += map_bankwidth(map); -+ buf += map_bankwidth(map); - } -+ - if (bytes) { -- int i = 0, n = 0; -- u_char tmp_buf[8], *tmp_p = tmp_buf; -+ map_word datum; - -- while (bytes--) -- tmp_buf[i++] = buf[n++]; -- while (i < CFIDEV_BUSWIDTH) -- tmp_buf[i++] = 0xff; -- if (cfi_buswidth_is_2()) { -- u16 *b = (u16 *)tmp_p; -- -- map_write16 (map, *b++, adr+z); -- tmp_p = (u_char *)b; -- } else if (cfi_buswidth_is_4()) { -- u32 *b = (u32 *)tmp_p; -- -- map_write32 (map, *b++, adr+z); -- tmp_p = (u_char *)b; -- } else if (cfi_buswidth_is_8()) { -- u64 *b = (u64 *)tmp_p; -- -- map_write64 (map, *b++, adr+z); -- tmp_p = (u_char *)b; -- } else { -- ret = -EINVAL; -- goto out; -- } -+ datum = map_word_ff(map); -+ datum = map_word_load_partial(map, datum, buf, 0, bytes); -+ map_write(map, datum, adr+z); - } -+ - /* GO GO GO */ -- cfi_write(map, CMD(0xd0), cmd_adr); -+ map_write(map, CMD(0xd0), cmd_adr); - chip->state = FL_WRITING; - - spin_unlock(chip->mutex); -+ INVALIDATE_CACHED_RANGE(map, adr, len); - cfi_udelay(chip->buffer_write_time); - spin_lock(chip->mutex); - -@@ -1080,8 +1237,8 @@ - continue; - } - -- status = cfi_read(map, cmd_adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, cmd_adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - /* OK Still waiting */ -@@ -1110,11 +1267,11 @@ - chip->state = FL_STATUS; - - /* check for lock bit */ -- if (status & CMD(0x02)) { -+ if (map_word_bitsset(map, status, CMD(0x02))) { - /* clear status */ -- cfi_write(map, CMD(0x50), cmd_adr); -+ map_write(map, CMD(0x50), cmd_adr); - /* put back into read status register mode */ -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - ret = -EROFS; - } - -@@ -1129,7 +1286,7 @@ - { - struct map_info *map = mtd->priv; - struct cfi_private *cfi = map->fldrv_priv; -- int wbufsize = CFIDEV_INTERLEAVE << cfi->cfiq->MaxBufWriteSize; -+ int wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize; - int ret = 0; - int chipnum; - unsigned long ofs; -@@ -1142,8 +1299,8 @@ - ofs = to - (chipnum << cfi->chipshift); - - /* If it's not bus-aligned, do the first word write */ -- if (ofs & (CFIDEV_BUSWIDTH-1)) { -- size_t local_len = (-ofs)&(CFIDEV_BUSWIDTH-1); -+ if (ofs & (map_bankwidth(map)-1)) { -+ size_t local_len = (-ofs)&(map_bankwidth(map)-1); - if (local_len > len) - local_len = len; - ret = cfi_intelext_write_words(mtd, to, local_len, -@@ -1162,7 +1319,6 @@ - } - } - -- /* Write buffer is worth it only if more than one word to write... */ - while(len) { - /* We must not cross write block boundaries */ - int size = wbufsize - (ofs & (wbufsize-1)); -@@ -1189,102 +1345,11 @@ - return 0; - } - --typedef int (*varsize_frob_t)(struct map_info *map, struct flchip *chip, -- unsigned long adr, void *thunk); -- --static int cfi_intelext_varsize_frob(struct mtd_info *mtd, varsize_frob_t frob, -- loff_t ofs, size_t len, void *thunk) --{ -- struct map_info *map = mtd->priv; -- struct cfi_private *cfi = map->fldrv_priv; -- unsigned long adr; -- int chipnum, ret = 0; -- int i, first; -- struct mtd_erase_region_info *regions = mtd->eraseregions; -- -- if (ofs > mtd->size) -- return -EINVAL; -- -- if ((len + ofs) > mtd->size) -- return -EINVAL; -- -- /* Check that both start and end of the requested erase are -- * aligned with the erasesize at the appropriate addresses. -- */ -- -- i = 0; -- -- /* Skip all erase regions which are ended before the start of -- the requested erase. Actually, to save on the calculations, -- we skip to the first erase region which starts after the -- start of the requested erase, and then go back one. -- */ -- -- while (i < mtd->numeraseregions && ofs >= regions[i].offset) -- i++; -- i--; -- -- /* OK, now i is pointing at the erase region in which this -- erase request starts. Check the start of the requested -- erase range is aligned with the erase size which is in -- effect here. -- */ -- -- if (ofs & (regions[i].erasesize-1)) -- return -EINVAL; -- -- /* Remember the erase region we start on */ -- first = i; -- -- /* Next, check that the end of the requested erase is aligned -- * with the erase region at that address. -- */ -- -- while (i<mtd->numeraseregions && (ofs + len) >= regions[i].offset) -- i++; -- -- /* As before, drop back one to point at the region in which -- the address actually falls -- */ -- i--; -- -- if ((ofs + len) & (regions[i].erasesize-1)) -- return -EINVAL; -- -- chipnum = ofs >> cfi->chipshift; -- adr = ofs - (chipnum << cfi->chipshift); -- -- i=first; -- -- while(len) { -- ret = (*frob)(map, &cfi->chips[chipnum], adr, thunk); -- -- if (ret) -- return ret; -- -- adr += regions[i].erasesize; -- len -= regions[i].erasesize; -- -- if (adr % (1<< cfi->chipshift) == ((regions[i].offset + (regions[i].erasesize * regions[i].numblocks)) %( 1<< cfi->chipshift))) -- i++; -- -- if (adr >> cfi->chipshift) { -- adr = 0; -- chipnum++; -- -- if (chipnum >= cfi->numchips) -- break; -- } -- } -- -- return 0; --} -- -- --static int do_erase_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr, void *thunk) -+static int do_erase_oneblock(struct map_info *map, struct flchip *chip, -+ unsigned long adr, int len, void *thunk) - { - struct cfi_private *cfi = map->fldrv_priv; -- cfi_word status, status_OK; -+ map_word status, status_OK; - unsigned long timeo; - int retries = 3; - DECLARE_WAITQUEUE(wait, current); -@@ -1305,17 +1370,17 @@ - - ENABLE_VPP(map); - /* Clear the status register first */ -- cfi_write(map, CMD(0x50), adr); -+ map_write(map, CMD(0x50), adr); - - /* Now erase */ -- cfi_write(map, CMD(0x20), adr); -- cfi_write(map, CMD(0xD0), adr); -+ map_write(map, CMD(0x20), adr); -+ map_write(map, CMD(0xD0), adr); - chip->state = FL_ERASING; - chip->erase_suspended = 0; - - spin_unlock(chip->mutex); -- set_current_state(TASK_UNINTERRUPTIBLE); -- schedule_timeout((chip->erase_time*HZ)/(2*1000)); -+ INVALIDATE_CACHED_RANGE(map, adr, len); -+ msleep(chip->erase_time / 2); - spin_lock(chip->mutex); - - /* FIXME. Use a timer to check this, and return immediately. */ -@@ -1340,19 +1405,19 @@ - chip->erase_suspended = 0; - } - -- status = cfi_read(map, adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - /* OK Still waiting */ - if (time_after(jiffies, timeo)) { -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->state = FL_STATUS; -- printk(KERN_ERR "waiting for erase at %08lx to complete timed out. Xstatus = %llx, status = %llx.\n", -- adr, (__u64)status, (__u64)cfi_read(map, adr)); -+ printk(KERN_ERR "waiting for erase at %08lx to complete timed out. Xstatus = %lx, status = %lx.\n", -+ adr, status.x[0], map_read(map, adr).x[0]); - /* Clear status bits */ -- cfi_write(map, CMD(0x50), adr); -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x50), adr); -+ map_write(map, CMD(0x70), adr); - DISABLE_VPP(map); - spin_unlock(chip->mutex); - return -EIO; -@@ -1369,43 +1434,46 @@ - ret = 0; - - /* We've broken this before. It doesn't hurt to be safe */ -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->state = FL_STATUS; -- status = cfi_read(map, adr); -+ status = map_read(map, adr); - - /* check for lock bit */ -- if (status & CMD(0x3a)) { -- unsigned char chipstatus = status; -- if (status != CMD(status & 0xff)) { -- int i; -- for (i = 1; i<CFIDEV_INTERLEAVE; i++) { -- chipstatus |= status >> (cfi->device_type * 8); -+ if (map_word_bitsset(map, status, CMD(0x3a))) { -+ unsigned char chipstatus = status.x[0]; -+ if (!map_word_equal(map, status, CMD(chipstatus))) { -+ int i, w; -+ for (w=0; w<map_words(map); w++) { -+ for (i = 0; i<cfi_interleave(cfi); i++) { -+ chipstatus |= status.x[w] >> (cfi->device_type * 8); -+ } - } -- printk(KERN_WARNING "Status is not identical for all chips: 0x%llx. Merging to give 0x%02x\n", (__u64)status, chipstatus); -+ printk(KERN_WARNING "Status is not identical for all chips: 0x%lx. Merging to give 0x%02x\n", -+ status.x[0], chipstatus); - } - /* Reset the error bits */ -- cfi_write(map, CMD(0x50), adr); -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x50), adr); -+ map_write(map, CMD(0x70), adr); - - if ((chipstatus & 0x30) == 0x30) { -- printk(KERN_NOTICE "Chip reports improper command sequence: status 0x%llx\n", (__u64)status); -+ printk(KERN_NOTICE "Chip reports improper command sequence: status 0x%x\n", chipstatus); - ret = -EIO; - } else if (chipstatus & 0x02) { - /* Protection bit set */ - ret = -EROFS; - } else if (chipstatus & 0x8) { - /* Voltage */ -- printk(KERN_WARNING "Chip reports voltage low on erase: status 0x%llx\n", (__u64)status); -+ printk(KERN_WARNING "Chip reports voltage low on erase: status 0x%x\n", chipstatus); - ret = -EIO; - } else if (chipstatus & 0x20) { - if (retries--) { -- printk(KERN_DEBUG "Chip erase failed at 0x%08lx: status 0x%llx. Retrying...\n", adr, (__u64)status); -+ printk(KERN_DEBUG "Chip erase failed at 0x%08lx: status 0x%x. Retrying...\n", adr, chipstatus); - timeo = jiffies + HZ; - chip->state = FL_STATUS; - spin_unlock(chip->mutex); - goto retry; - } -- printk(KERN_DEBUG "Chip erase failed at 0x%08lx: status 0x%llx\n", adr, (__u64)status); -+ printk(KERN_DEBUG "Chip erase failed at 0x%08lx: status 0x%x\n", adr, chipstatus); - ret = -EIO; - } - } -@@ -1423,13 +1491,12 @@ - ofs = instr->addr; - len = instr->len; - -- ret = cfi_intelext_varsize_frob(mtd, do_erase_oneblock, ofs, len, 0); -+ ret = cfi_varsize_frob(mtd, do_erase_oneblock, ofs, len, NULL); - if (ret) - return ret; - - instr->state = MTD_ERASE_DONE; -- if (instr->callback) -- instr->callback(instr); -+ mtd_erase_callback(instr); - - return 0; - } -@@ -1475,7 +1542,8 @@ - } - - #ifdef DEBUG_LOCK_BITS --static int do_printlockstatus_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr, void *thunk) -+static int do_printlockstatus_oneblock(struct map_info *map, struct flchip *chip, -+ unsigned long adr, int len, void *thunk) - { - struct cfi_private *cfi = map->fldrv_priv; - int ofs_factor = cfi->interleave * cfi->device_type; -@@ -1483,8 +1551,7 @@ - cfi_send_gen_cmd(0x90, 0x55, 0, map, cfi, cfi->device_type, NULL); - printk(KERN_DEBUG "block status register for 0x%08lx is %x\n", - adr, cfi_read_query(map, adr+(2*ofs_factor))); -- cfi_send_gen_cmd(0xff, 0x55, 0, map, cfi, cfi->device_type, NULL); -- -+ chip->state = FL_JEDEC_QUERY; - return 0; - } - #endif -@@ -1492,10 +1559,11 @@ - #define DO_XXLOCK_ONEBLOCK_LOCK ((void *) 1) - #define DO_XXLOCK_ONEBLOCK_UNLOCK ((void *) 2) - --static int do_xxlock_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr, void *thunk) -+static int do_xxlock_oneblock(struct map_info *map, struct flchip *chip, -+ unsigned long adr, int len, void *thunk) - { - struct cfi_private *cfi = map->fldrv_priv; -- cfi_word status, status_OK; -+ map_word status, status_OK; - unsigned long timeo = jiffies + HZ; - int ret; - -@@ -1512,13 +1580,13 @@ - } - - ENABLE_VPP(map); -- cfi_write(map, CMD(0x60), adr); -+ map_write(map, CMD(0x60), adr); - - if (thunk == DO_XXLOCK_ONEBLOCK_LOCK) { -- cfi_write(map, CMD(0x01), adr); -+ map_write(map, CMD(0x01), adr); - chip->state = FL_LOCKING; - } else if (thunk == DO_XXLOCK_ONEBLOCK_UNLOCK) { -- cfi_write(map, CMD(0xD0), adr); -+ map_write(map, CMD(0xD0), adr); - chip->state = FL_UNLOCKING; - } else - BUG(); -@@ -1533,15 +1601,16 @@ - timeo = jiffies + (HZ*20); - for (;;) { - -- status = cfi_read(map, adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - /* OK Still waiting */ - if (time_after(jiffies, timeo)) { -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->state = FL_STATUS; -- printk(KERN_ERR "waiting for unlock to complete timed out. Xstatus = %llx, status = %llx.\n", (__u64)status, (__u64)cfi_read(map, adr)); -+ printk(KERN_ERR "waiting for unlock to complete timed out. Xstatus = %lx, status = %lx.\n", -+ status.x[0], map_read(map, adr).x[0]); - DISABLE_VPP(map); - spin_unlock(chip->mutex); - return -EIO; -@@ -1567,18 +1636,18 @@ - #ifdef DEBUG_LOCK_BITS - printk(KERN_DEBUG "%s: lock status before, ofs=0x%08llx, len=0x%08X\n", - __FUNCTION__, ofs, len); -- cfi_intelext_varsize_frob(mtd, do_printlockstatus_oneblock, -- ofs, len, 0); -+ cfi_varsize_frob(mtd, do_printlockstatus_oneblock, -+ ofs, len, 0); - #endif - -- ret = cfi_intelext_varsize_frob(mtd, do_xxlock_oneblock, -- ofs, len, DO_XXLOCK_ONEBLOCK_LOCK); -+ ret = cfi_varsize_frob(mtd, do_xxlock_oneblock, -+ ofs, len, DO_XXLOCK_ONEBLOCK_LOCK); - - #ifdef DEBUG_LOCK_BITS -- printk(KERN_DEBUG __FUNCTION__ -- "%s: lock status after, ret=%d\n", __FUNCTION__, ret); -- cfi_intelext_varsize_frob(mtd, do_printlockstatus_oneblock, -- ofs, len, 0); -+ printk(KERN_DEBUG "%s: lock status after, ret=%d\n", -+ __FUNCTION__, ret); -+ cfi_varsize_frob(mtd, do_printlockstatus_oneblock, -+ ofs, len, 0); - #endif - - return ret; -@@ -1591,17 +1660,18 @@ - #ifdef DEBUG_LOCK_BITS - printk(KERN_DEBUG "%s: lock status before, ofs=0x%08llx, len=0x%08X\n", - __FUNCTION__, ofs, len); -- cfi_intelext_varsize_frob(mtd, do_printlockstatus_oneblock, -- ofs, len, 0); -+ cfi_varsize_frob(mtd, do_printlockstatus_oneblock, -+ ofs, len, 0); - #endif - -- ret = cfi_intelext_varsize_frob(mtd, do_xxlock_oneblock, -+ ret = cfi_varsize_frob(mtd, do_xxlock_oneblock, - ofs, len, DO_XXLOCK_ONEBLOCK_UNLOCK); - - #ifdef DEBUG_LOCK_BITS -- printk(KERN_DEBUG "%s: lock status after, ret=%d\n", __FUNCTION__, ret); -- cfi_intelext_varsize_frob(mtd, do_printlockstatus_oneblock, -- ofs, len, 0); -+ printk(KERN_DEBUG "%s: lock status after, ret=%d\n", -+ __FUNCTION__, ret); -+ cfi_varsize_frob(mtd, do_printlockstatus_oneblock, -+ ofs, len, 0); - #endif - - return ret; -@@ -1679,7 +1749,7 @@ - - /* Go to known state. Chip may have been power cycled */ - if (chip->state == FL_PM_SUSPENDED) { -- cfi_write(map, CMD(0xFF), 0); -+ map_write(map, CMD(0xFF), cfi->chips[i].start); - chip->state = FL_READY; - wake_up(&chip->wq); - } -@@ -1694,6 +1764,7 @@ - struct cfi_private *cfi = map->fldrv_priv; - kfree(cfi->cmdset_priv); - kfree(cfi->cfiq); -+ kfree(cfi->chips[0].priv); - kfree(cfi); - kfree(mtd->eraseregions); - } -Index: linux-2.6.5/drivers/mtd/chips/cfi_cmdset_0002.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/cfi_cmdset_0002.c 2004-04-03 22:36:57.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/cfi_cmdset_0002.c 2005-02-01 17:11:17.000000000 -0500 -@@ -3,15 +3,21 @@ - * AMD & Fujitsu Standard Vendor Command Set (ID 0x0002) - * - * Copyright (C) 2000 Crossnet Co. <info@crossnet.co.jp> -+ * Copyright (C) 2004 Arcom Control Systems Ltd <linux@arcom.com> - * - * 2_by_8 routines added by Simon Munton - * -+ * 4_by_16 work by Carolyn J. Smith -+ * -+ * Occasionally maintained by Thayne Harbaugh tharbaugh at lnxi dot com -+ * - * This code is GPL - * -- * $Id: cfi_cmdset_0002.c,v 1.74 2003/05/28 12:51:48 dwmw2 Exp $ -+ * $Id: cfi_cmdset_0002.c,v 1.109 2004/09/15 23:48:09 thayne Exp $ - * - */ - -+#include <linux/config.h> - #include <linux/module.h> - #include <linux/types.h> - #include <linux/kernel.h> -@@ -24,17 +30,24 @@ - #include <linux/slab.h> - #include <linux/delay.h> - #include <linux/interrupt.h> -+#include <linux/mtd/compatmac.h> - #include <linux/mtd/map.h> - #include <linux/mtd/mtd.h> - #include <linux/mtd/cfi.h> --#include <linux/mtd/compatmac.h> - - #define AMD_BOOTLOC_BUG -+#define FORCE_WORD_WRITE 0 -+ -+#define MAX_WORD_RETRIES 3 -+ -+#define MANUFACTURER_AMD 0x0001 -+#define MANUFACTURER_SST 0x00BF -+#define SST49LF004B 0x0060 - - static int cfi_amdstd_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *); --static int cfi_amdstd_write(struct mtd_info *, loff_t, size_t, size_t *, const u_char *); -+static int cfi_amdstd_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *); -+static int cfi_amdstd_write_buffers(struct mtd_info *, loff_t, size_t, size_t *, const u_char *); - static int cfi_amdstd_erase_chip(struct mtd_info *, struct erase_info *); --static int cfi_amdstd_erase_onesize(struct mtd_info *, struct erase_info *); - static int cfi_amdstd_erase_varsize(struct mtd_info *, struct erase_info *); - static void cfi_amdstd_sync (struct mtd_info *); - static int cfi_amdstd_suspend (struct mtd_info *); -@@ -44,8 +57,11 @@ - static void cfi_amdstd_destroy(struct mtd_info *); - - struct mtd_info *cfi_cmdset_0002(struct map_info *, int); --static struct mtd_info *cfi_amdstd_setup (struct map_info *); -+static struct mtd_info *cfi_amdstd_setup (struct mtd_info *); - -+static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode); -+static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr); -+#include "fwh_lock.h" - - static struct mtd_chip_driver cfi_amdstd_chipdrv = { - .probe = NULL, /* Not usable directly */ -@@ -55,50 +71,199 @@ - }; - - -+/* #define DEBUG_CFI_FEATURES */ -+ -+ -+#ifdef DEBUG_CFI_FEATURES -+static void cfi_tell_features(struct cfi_pri_amdstd *extp) -+{ -+ const char* erase_suspend[3] = { -+ "Not supported", "Read only", "Read/write" -+ }; -+ const char* top_bottom[6] = { -+ "No WP", "8x8KiB sectors at top & bottom, no WP", -+ "Bottom boot", "Top boot", -+ "Uniform, Bottom WP", "Uniform, Top WP" -+ }; -+ -+ printk(" Silicon revision: %d\n", extp->SiliconRevision >> 1); -+ printk(" Address sensitive unlock: %s\n", -+ (extp->SiliconRevision & 1) ? "Not required" : "Required"); -+ -+ if (extp->EraseSuspend < ARRAY_SIZE(erase_suspend)) -+ printk(" Erase Suspend: %s\n", erase_suspend[extp->EraseSuspend]); -+ else -+ printk(" Erase Suspend: Unknown value %d\n", extp->EraseSuspend); -+ -+ if (extp->BlkProt == 0) -+ printk(" Block protection: Not supported\n"); -+ else -+ printk(" Block protection: %d sectors per group\n", extp->BlkProt); -+ -+ -+ printk(" Temporary block unprotect: %s\n", -+ extp->TmpBlkUnprotect ? "Supported" : "Not supported"); -+ printk(" Block protect/unprotect scheme: %d\n", extp->BlkProtUnprot); -+ printk(" Number of simultaneous operations: %d\n", extp->SimultaneousOps); -+ printk(" Burst mode: %s\n", -+ extp->BurstMode ? "Supported" : "Not supported"); -+ if (extp->PageMode == 0) -+ printk(" Page mode: Not supported\n"); -+ else -+ printk(" Page mode: %d word page\n", extp->PageMode << 2); -+ -+ printk(" Vpp Supply Minimum Program/Erase Voltage: %d.%d V\n", -+ extp->VppMin >> 4, extp->VppMin & 0xf); -+ printk(" Vpp Supply Maximum Program/Erase Voltage: %d.%d V\n", -+ extp->VppMax >> 4, extp->VppMax & 0xf); -+ -+ if (extp->TopBottom < ARRAY_SIZE(top_bottom)) -+ printk(" Top/Bottom Boot Block: %s\n", top_bottom[extp->TopBottom]); -+ else -+ printk(" Top/Bottom Boot Block: Unknown value %d\n", extp->TopBottom); -+} -+#endif -+ -+#ifdef AMD_BOOTLOC_BUG -+/* Wheee. Bring me the head of someone at AMD. */ -+static void fixup_amd_bootblock(struct mtd_info *mtd, void* param) -+{ -+ struct map_info *map = mtd->priv; -+ struct cfi_private *cfi = map->fldrv_priv; -+ struct cfi_pri_amdstd *extp = cfi->cmdset_priv; -+ __u8 major = extp->MajorVersion; -+ __u8 minor = extp->MinorVersion; -+ -+ if (((major << 8) | minor) < 0x3131) { -+ /* CFI version 1.0 => don't trust bootloc */ -+ if (cfi->id & 0x80) { -+ printk(KERN_WARNING "%s: JEDEC Device ID is 0x%02X. Assuming broken CFI table.\n", map->name, cfi->id); -+ extp->TopBottom = 3; /* top boot */ -+ } else { -+ extp->TopBottom = 2; /* bottom boot */ -+ } -+ } -+} -+#endif -+ -+static void fixup_use_write_buffers(struct mtd_info *mtd, void *param) -+{ -+ struct map_info *map = mtd->priv; -+ struct cfi_private *cfi = map->fldrv_priv; -+ if (cfi->cfiq->BufWriteTimeoutTyp) { -+ DEBUG(MTD_DEBUG_LEVEL1, "Using buffer write method\n" ); -+ mtd->write = cfi_amdstd_write_buffers; -+ } -+} -+ -+static void fixup_use_secsi(struct mtd_info *mtd, void *param) -+{ -+ /* Setup for chips with a secsi area */ -+ mtd->read_user_prot_reg = cfi_amdstd_secsi_read; -+ mtd->read_fact_prot_reg = cfi_amdstd_secsi_read; -+} -+ -+static void fixup_use_erase_chip(struct mtd_info *mtd, void *param) -+{ -+ struct map_info *map = mtd->priv; -+ struct cfi_private *cfi = map->fldrv_priv; -+ if ((cfi->cfiq->NumEraseRegions == 1) && -+ ((cfi->cfiq->EraseRegionInfo[0] & 0xffff) == 0)) { -+ mtd->erase = cfi_amdstd_erase_chip; -+ } -+ -+} -+ -+static struct cfi_fixup cfi_fixup_table[] = { -+#ifdef AMD_BOOTLOC_BUG -+ { CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL }, -+#endif -+ { CFI_MFR_AMD, 0x0050, fixup_use_secsi, NULL, }, -+ { CFI_MFR_AMD, 0x0053, fixup_use_secsi, NULL, }, -+ { CFI_MFR_AMD, 0x0055, fixup_use_secsi, NULL, }, -+ { CFI_MFR_AMD, 0x0056, fixup_use_secsi, NULL, }, -+ { CFI_MFR_AMD, 0x005C, fixup_use_secsi, NULL, }, -+ { CFI_MFR_AMD, 0x005F, fixup_use_secsi, NULL, }, -+#if !FORCE_WORD_WRITE -+ { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers, NULL, }, -+#endif -+ { 0, 0, NULL, NULL } -+}; -+static struct cfi_fixup jedec_fixup_table[] = { -+ { MANUFACTURER_SST, SST49LF004B, fixup_use_fwh_lock, NULL, }, -+ { 0, 0, NULL, NULL } -+}; -+ -+static struct cfi_fixup fixup_table[] = { -+ /* The CFI vendor ids and the JEDEC vendor IDs appear -+ * to be common. It is like the devices id's are as -+ * well. This table is to pick all cases where -+ * we know that is the case. -+ */ -+ { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_erase_chip, NULL }, -+ { 0, 0, NULL, NULL } -+}; -+ -+ - struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary) - { - struct cfi_private *cfi = map->fldrv_priv; -- unsigned char bootloc; -- int ofs_factor = cfi->interleave * cfi->device_type; -+ struct mtd_info *mtd; - int i; -- __u8 major, minor; -- __u32 base = cfi->chips[0].start; -+ -+ mtd = kmalloc(sizeof(*mtd), GFP_KERNEL); -+ if (!mtd) { -+ printk(KERN_WARNING "Failed to allocate memory for MTD device\n"); -+ return NULL; -+ } -+ memset(mtd, 0, sizeof(*mtd)); -+ mtd->priv = map; -+ mtd->type = MTD_NORFLASH; -+ -+ /* Fill in the default mtd operations */ -+ mtd->erase = cfi_amdstd_erase_varsize; -+ mtd->write = cfi_amdstd_write_words; -+ mtd->read = cfi_amdstd_read; -+ mtd->sync = cfi_amdstd_sync; -+ mtd->suspend = cfi_amdstd_suspend; -+ mtd->resume = cfi_amdstd_resume; -+ mtd->flags = MTD_CAP_NORFLASH; -+ mtd->name = map->name; - - if (cfi->cfi_mode==CFI_MODE_CFI){ -+ unsigned char bootloc; -+ /* -+ * It's a real CFI chip, not one for which the probe -+ * routine faked a CFI structure. So we read the feature -+ * table from it. -+ */ - __u16 adr = primary?cfi->cfiq->P_ADR:cfi->cfiq->A_ADR; -+ struct cfi_pri_amdstd *extp; - -- cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); -- -- major = cfi_read_query(map, base + (adr+3)*ofs_factor); -- minor = cfi_read_query(map, base + (adr+4)*ofs_factor); -- -- printk(KERN_NOTICE " Amd/Fujitsu Extended Query Table v%c.%c at 0x%4.4X\n", -- major, minor, adr); -- cfi_send_gen_cmd(0xf0, 0x55, base, map, cfi, cfi->device_type, NULL); -- -- cfi_send_gen_cmd(0xaa, 0x555, base, map, cfi, cfi->device_type, NULL); -- cfi_send_gen_cmd(0x55, 0x2aa, base, map, cfi, cfi->device_type, NULL); -- cfi_send_gen_cmd(0x90, 0x555, base, map, cfi, cfi->device_type, NULL); -- /* FIXME - should have a delay before continuing */ -- cfi->mfr = cfi_read_query(map, base); -- cfi->id = cfi_read_query(map, base + ofs_factor); -+ extp = (struct cfi_pri_amdstd*)cfi_read_pri(map, adr, sizeof(*extp), "Amd/Fujitsu"); -+ if (!extp) { -+ kfree(mtd); -+ return NULL; -+ } -+ -+ /* Install our own private info structure */ -+ cfi->cmdset_priv = extp; -+ -+ /* Apply cfi device specific fixups */ -+ cfi_fixup(mtd, cfi_fixup_table); -+ -+#ifdef DEBUG_CFI_FEATURES -+ /* Tell the user about it in lots of lovely detail */ -+ cfi_tell_features(extp); -+#endif -+ -+ bootloc = extp->TopBottom; -+ if ((bootloc != 2) && (bootloc != 3)) { -+ printk(KERN_WARNING "%s: CFI does not contain boot " -+ "bank location. Assuming top.\n", map->name); -+ bootloc = 2; -+ } - -- /* Wheee. Bring me the head of someone at AMD. */ --#ifdef AMD_BOOTLOC_BUG -- if (((major << 8) | minor) < 0x3131) { -- /* CFI version 1.0 => don't trust bootloc */ -- if (cfi->id & 0x80) { -- printk(KERN_WARNING "%s: JEDEC Device ID is 0x%02X. Assuming broken CFI table.\n", map->name, cfi->id); -- bootloc = 3; /* top boot */ -- } else { -- bootloc = 2; /* bottom boot */ -- } -- } else --#endif -- { -- cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); -- bootloc = cfi_read_query(map, base + (adr+15)*ofs_factor); -- } - if (bootloc == 3 && cfi->cfiq->NumEraseRegions > 1) { - printk(KERN_WARNING "%s: Swapping erase regions for broken CFI table.\n", map->name); - -@@ -112,32 +277,50 @@ - } - } - /* -- * FIXME - These might already be setup (more correctly) -- * buy jedec_probe.c. -+ * These might already be setup (more correctly) by -+ * jedec_probe.c - still need it for cfi_probe.c path. - */ -- switch (cfi->device_type) { -- case CFI_DEVICETYPE_X8: -- cfi->addr_unlock1 = 0x555; -- cfi->addr_unlock2 = 0x2aa; -- break; -- case CFI_DEVICETYPE_X16: -- cfi->addr_unlock1 = 0xaaa; -- if (map->buswidth == cfi->interleave) { -- /* X16 chip(s) in X8 mode */ -- cfi->addr_unlock2 = 0x555; -- } else { -- cfi->addr_unlock2 = 0x554; -+ if ( ! (cfi->addr_unlock1 && cfi->addr_unlock2) ) { -+ switch (cfi->device_type) { -+ case CFI_DEVICETYPE_X8: -+ cfi->addr_unlock1 = 0x555; -+ cfi->addr_unlock2 = 0x2aa; -+ break; -+ case CFI_DEVICETYPE_X16: -+ cfi->addr_unlock1 = 0xaaa; -+ if (map_bankwidth(map) == cfi_interleave(cfi)) { -+ /* X16 chip(s) in X8 mode */ -+ cfi->addr_unlock2 = 0x555; -+ } else { -+ cfi->addr_unlock2 = 0x554; -+ } -+ break; -+ case CFI_DEVICETYPE_X32: -+ cfi->addr_unlock1 = 0x1554; -+ if (map_bankwidth(map) == cfi_interleave(cfi)*2) { -+ /* X32 chip(s) in X16 mode */ -+ cfi->addr_unlock1 = 0xaaa; -+ } else { -+ cfi->addr_unlock2 = 0xaa8; -+ } -+ break; -+ default: -+ printk(KERN_WARNING -+ "MTD %s(): Unsupported device type %d\n", -+ __func__, cfi->device_type); -+ kfree(mtd); -+ kfree(extp); -+ return NULL; - } -- break; -- case CFI_DEVICETYPE_X32: -- cfi->addr_unlock1 = 0x1555; -- cfi->addr_unlock2 = 0xaaa; -- break; -- default: -- printk(KERN_NOTICE "Eep. Unknown cfi_cmdset_0002 device type %d\n", cfi->device_type); -- return NULL; - } -+ - } /* CFI mode */ -+ else if (cfi->cfi_mode == CFI_MODE_JEDEC) { -+ /* Apply jedec specific fixups */ -+ cfi_fixup(mtd, jedec_fixup_table); -+ } -+ /* Apply generic fixups */ -+ cfi_fixup(mtd, fixup_table); - - for (i=0; i< cfi->numchips; i++) { - cfi->chips[i].word_write_time = 1<<cfi->cfiq->WordWriteTimeoutTyp; -@@ -146,135 +329,66 @@ - } - - map->fldrv = &cfi_amdstd_chipdrv; -- -- cfi_send_gen_cmd(0xf0, 0x55, base, map, cfi, cfi->device_type, NULL); -- return cfi_amdstd_setup(map); -+ -+ return cfi_amdstd_setup(mtd); - } - --static struct mtd_info *cfi_amdstd_setup(struct map_info *map) -+ -+static struct mtd_info *cfi_amdstd_setup(struct mtd_info *mtd) - { -+ struct map_info *map = mtd->priv; - struct cfi_private *cfi = map->fldrv_priv; -- struct mtd_info *mtd; - unsigned long devsize = (1<<cfi->cfiq->DevSize) * cfi->interleave; -+ unsigned long offset = 0; -+ int i,j; - -- mtd = kmalloc(sizeof(*mtd), GFP_KERNEL); - printk(KERN_NOTICE "number of %s chips: %d\n", -- (cfi->cfi_mode == CFI_MODE_CFI)?"CFI":"JEDEC",cfi->numchips); -+ (cfi->cfi_mode == CFI_MODE_CFI)?"CFI":"JEDEC",cfi->numchips); -+ /* Select the correct geometry setup */ -+ mtd->size = devsize * cfi->numchips; - -- if (!mtd) { -- printk(KERN_WARNING "Failed to allocate memory for MTD device\n"); -- goto setup_err; -+ mtd->numeraseregions = cfi->cfiq->NumEraseRegions * cfi->numchips; -+ mtd->eraseregions = kmalloc(sizeof(struct mtd_erase_region_info) -+ * mtd->numeraseregions, GFP_KERNEL); -+ if (!mtd->eraseregions) { -+ printk(KERN_WARNING "Failed to allocate memory for MTD erase region info\n"); -+ goto setup_err; - } -- -- memset(mtd, 0, sizeof(*mtd)); -- mtd->priv = map; -- mtd->type = MTD_NORFLASH; -- /* Also select the correct geometry setup too */ -- mtd->size = devsize * cfi->numchips; -- -- if (cfi->cfiq->NumEraseRegions == 1) { -- /* No need to muck about with multiple erase sizes */ -- mtd->erasesize = ((cfi->cfiq->EraseRegionInfo[0] >> 8) & ~0xff) * cfi->interleave; -- } else { -- unsigned long offset = 0; -- int i,j; -- -- mtd->numeraseregions = cfi->cfiq->NumEraseRegions * cfi->numchips; -- mtd->eraseregions = kmalloc(sizeof(struct mtd_erase_region_info) * mtd->numeraseregions, GFP_KERNEL); -- if (!mtd->eraseregions) { -- printk(KERN_WARNING "Failed to allocate memory for MTD erase region info\n"); -- goto setup_err; -- } - -- for (i=0; i<cfi->cfiq->NumEraseRegions; i++) { -- unsigned long ernum, ersize; -- ersize = ((cfi->cfiq->EraseRegionInfo[i] >> 8) & ~0xff) * cfi->interleave; -- ernum = (cfi->cfiq->EraseRegionInfo[i] & 0xffff) + 1; -+ for (i=0; i<cfi->cfiq->NumEraseRegions; i++) { -+ unsigned long ernum, ersize; -+ ersize = ((cfi->cfiq->EraseRegionInfo[i] >> 8) & ~0xff) * cfi->interleave; -+ ernum = (cfi->cfiq->EraseRegionInfo[i] & 0xffff) + 1; - -- if (mtd->erasesize < ersize) { -- mtd->erasesize = ersize; -- } -- for (j=0; j<cfi->numchips; j++) { -- mtd->eraseregions[(j*cfi->cfiq->NumEraseRegions)+i].offset = (j*devsize)+offset; -- mtd->eraseregions[(j*cfi->cfiq->NumEraseRegions)+i].erasesize = ersize; -- mtd->eraseregions[(j*cfi->cfiq->NumEraseRegions)+i].numblocks = ernum; -- } -- offset += (ersize * ernum); -- } -- if (offset != devsize) { -- /* Argh */ -- printk(KERN_WARNING "Sum of regions (%lx) != total size of set of interleaved chips (%lx)\n", offset, devsize); -- goto setup_err; -+ if (mtd->erasesize < ersize) { -+ mtd->erasesize = ersize; - } --#if 0 -- // debug -- for (i=0; i<mtd->numeraseregions;i++){ -- printk("%d: offset=0x%x,size=0x%x,blocks=%d\n", -- i,mtd->eraseregions[i].offset, -- mtd->eraseregions[i].erasesize, -- mtd->eraseregions[i].numblocks); -- } --#endif -- } -- -- switch (CFIDEV_BUSWIDTH) -- { -- case 1: -- case 2: -- case 4: --#if 1 -- if (mtd->numeraseregions > 1) -- mtd->erase = cfi_amdstd_erase_varsize; -- else --#endif -- if (((cfi->cfiq->EraseRegionInfo[0] & 0xffff) + 1) == 1) -- mtd->erase = cfi_amdstd_erase_chip; -- else -- mtd->erase = cfi_amdstd_erase_onesize; -- mtd->read = cfi_amdstd_read; -- mtd->write = cfi_amdstd_write; -- break; -- -- default: -- printk(KERN_WARNING "Unsupported buswidth\n"); -+ for (j=0; j<cfi->numchips; j++) { -+ mtd->eraseregions[(j*cfi->cfiq->NumEraseRegions)+i].offset = (j*devsize)+offset; -+ mtd->eraseregions[(j*cfi->cfiq->NumEraseRegions)+i].erasesize = ersize; -+ mtd->eraseregions[(j*cfi->cfiq->NumEraseRegions)+i].numblocks = ernum; -+ } -+ offset += (ersize * ernum); -+ } -+ if (offset != devsize) { -+ /* Argh */ -+ printk(KERN_WARNING "Sum of regions (%lx) != total size of set of interleaved chips (%lx)\n", offset, devsize); - goto setup_err; -- break; - } -- if (cfi->fast_prog) { -- /* In cfi_amdstd_write() we frob the protection stuff -- without paying any attention to the state machine. -- This upsets in-progress erases. So we turn this flag -- off for now till the code gets fixed. */ -- printk(KERN_NOTICE "cfi_cmdset_0002: Disabling fast programming due to code brokenness.\n"); -- cfi->fast_prog = 0; -+#if 0 -+ // debug -+ for (i=0; i<mtd->numeraseregions;i++){ -+ printk("%d: offset=0x%x,size=0x%x,blocks=%d\n", -+ i,mtd->eraseregions[i].offset, -+ mtd->eraseregions[i].erasesize, -+ mtd->eraseregions[i].numblocks); - } -+#endif - -+ /* FIXME: erase-suspend-program is broken. See -+ http://lists.infradead.org/pipermail/linux-mtd/2003-December/009001.html */ -+ printk(KERN_NOTICE "cfi_cmdset_0002: Disabling erase-suspend-program due to code brokenness.\n"); - -- /* does this chip have a secsi area? */ -- if(cfi->mfr==1){ -- -- switch(cfi->id){ -- case 0x50: -- case 0x53: -- case 0x55: -- case 0x56: -- case 0x5C: -- case 0x5F: -- /* Yes */ -- mtd->read_user_prot_reg = cfi_amdstd_secsi_read; -- mtd->read_fact_prot_reg = cfi_amdstd_secsi_read; -- default: -- ; -- } -- } -- -- -- mtd->sync = cfi_amdstd_sync; -- mtd->suspend = cfi_amdstd_suspend; -- mtd->resume = cfi_amdstd_resume; -- mtd->flags = MTD_CAP_NORFLASH; -- map->fldrv = &cfi_amdstd_chipdrv; -- mtd->name = map->name; - __module_get(THIS_MODULE); - return mtd; - -@@ -289,46 +403,182 @@ - return NULL; - } - --static inline int do_read_onechip(struct map_info *map, struct flchip *chip, loff_t adr, size_t len, u_char *buf) -+/* -+ * Return true if the chip is ready. -+ * -+ * Ready is one of: read mode, query mode, erase-suspend-read mode (in any -+ * non-suspended sector) and is indicated by no toggle bits toggling. -+ * -+ * Note that anything more complicated than checking if no bits are toggling -+ * (including checking DQ5 for an error status) is tricky to get working -+ * correctly and is therefore not done (particulary with interleaved chips -+ * as each chip must be checked independantly of the others). -+ */ -+static int chip_ready(struct map_info *map, unsigned long addr) -+{ -+ map_word d, t; -+ -+ d = map_read(map, addr); -+ t = map_read(map, addr); -+ -+ return map_word_equal(map, d, t); -+} -+ -+static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode) - { - DECLARE_WAITQUEUE(wait, current); -- unsigned long timeo = jiffies + HZ; -+ struct cfi_private *cfi = map->fldrv_priv; -+ unsigned long timeo; -+ struct cfi_pri_amdstd *cfip = (struct cfi_pri_amdstd *)cfi->cmdset_priv; - -+ resettime: -+ timeo = jiffies + HZ; - retry: -- cfi_spin_lock(chip->mutex); -+ switch (chip->state) { - -- if (chip->state != FL_READY){ --#if 0 -- printk(KERN_DEBUG "Waiting for chip to read, status = %d\n", chip->state); --#endif -+ case FL_STATUS: -+ for (;;) { -+ if (chip_ready(map, adr)) -+ break; -+ -+ if (time_after(jiffies, timeo)) { -+ printk(KERN_ERR "Waiting for chip to be ready timed out.\n"); -+ cfi_spin_unlock(chip->mutex); -+ return -EIO; -+ } -+ cfi_spin_unlock(chip->mutex); -+ cfi_udelay(1); -+ cfi_spin_lock(chip->mutex); -+ /* Someone else might have been playing with it. */ -+ goto retry; -+ } -+ -+ case FL_READY: -+ case FL_CFI_QUERY: -+ case FL_JEDEC_QUERY: -+ return 0; -+ -+ case FL_ERASING: -+ if (mode == FL_WRITING) /* FIXME: Erase-suspend-program appears broken. */ -+ goto sleep; -+ -+ if (!(mode == FL_READY || mode == FL_POINT -+ || !cfip -+ || (mode == FL_WRITING && (cfip->EraseSuspend & 0x2)) -+ || (mode == FL_WRITING && (cfip->EraseSuspend & 0x1)))) -+ goto sleep; -+ -+ /* We could check to see if we're trying to access the sector -+ * that is currently being erased. However, no user will try -+ * anything like that so we just wait for the timeout. */ -+ -+ /* Erase suspend */ -+ /* It's harmless to issue the Erase-Suspend and Erase-Resume -+ * commands when the erase algorithm isn't in progress. */ -+ map_write(map, CMD(0xB0), chip->in_progress_block_addr); -+ chip->oldstate = FL_ERASING; -+ chip->state = FL_ERASE_SUSPENDING; -+ chip->erase_suspended = 1; -+ for (;;) { -+ if (chip_ready(map, adr)) -+ break; -+ -+ if (time_after(jiffies, timeo)) { -+ /* Should have suspended the erase by now. -+ * Send an Erase-Resume command as either -+ * there was an error (so leave the erase -+ * routine to recover from it) or we trying to -+ * use the erase-in-progress sector. */ -+ map_write(map, CMD(0x30), chip->in_progress_block_addr); -+ chip->state = FL_ERASING; -+ chip->oldstate = FL_READY; -+ printk(KERN_ERR "MTD %s(): chip not ready after erase suspend\n", __func__); -+ return -EIO; -+ } -+ -+ cfi_spin_unlock(chip->mutex); -+ cfi_udelay(1); -+ cfi_spin_lock(chip->mutex); -+ /* Nobody will touch it while it's in state FL_ERASE_SUSPENDING. -+ So we can just loop here. */ -+ } -+ chip->state = FL_READY; -+ return 0; -+ -+ case FL_POINT: -+ /* Only if there's no operation suspended... */ -+ if (mode == FL_READY && chip->oldstate == FL_READY) -+ return 0; -+ -+ default: -+ sleep: - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&chip->wq, &wait); -- - cfi_spin_unlock(chip->mutex); -- - schedule(); - remove_wait_queue(&chip->wq, &wait); --#if 0 -- if(signal_pending(current)) -- return -EINTR; --#endif -- timeo = jiffies + HZ; -+ cfi_spin_lock(chip->mutex); -+ goto resettime; -+ } -+} - -- goto retry; -- } -+ -+static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr) -+{ -+ struct cfi_private *cfi = map->fldrv_priv; -+ -+ switch(chip->oldstate) { -+ case FL_ERASING: -+ chip->state = chip->oldstate; -+ map_write(map, CMD(0x30), chip->in_progress_block_addr); -+ chip->oldstate = FL_READY; -+ chip->state = FL_ERASING; -+ break; -+ -+ case FL_READY: -+ case FL_STATUS: -+ /* We should really make set_vpp() count, rather than doing this */ -+ DISABLE_VPP(map); -+ break; -+ default: -+ printk(KERN_ERR "MTD: put_chip() called with oldstate %d!!\n", chip->oldstate); -+ } -+ wake_up(&chip->wq); -+} -+ -+ -+static inline int do_read_onechip(struct map_info *map, struct flchip *chip, loff_t adr, size_t len, u_char *buf) -+{ -+ unsigned long cmd_addr; -+ struct cfi_private *cfi = map->fldrv_priv; -+ int ret; - - adr += chip->start; - -- chip->state = FL_READY; -+ /* Ensure cmd read/writes are aligned. */ -+ cmd_addr = adr & ~(map_bankwidth(map)-1); -+ -+ cfi_spin_lock(chip->mutex); -+ ret = get_chip(map, chip, cmd_addr, FL_READY); -+ if (ret) { -+ cfi_spin_unlock(chip->mutex); -+ return ret; -+ } -+ -+ if (chip->state != FL_POINT && chip->state != FL_READY) { -+ map_write(map, CMD(0xf0), cmd_addr); -+ chip->state = FL_READY; -+ } - - map_copy_from(map, buf, adr, len); - -- wake_up(&chip->wq); -- cfi_spin_unlock(chip->mutex); -+ put_chip(map, chip, cmd_addr); - -+ cfi_spin_unlock(chip->mutex); - return 0; - } - -+ - static int cfi_amdstd_read (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) - { - struct map_info *map = mtd->priv; -@@ -370,6 +620,7 @@ - return ret; - } - -+ - static inline int do_read_secsi_onechip(struct map_info *map, struct flchip *chip, loff_t adr, size_t len, u_char *buf) - { - DECLARE_WAITQUEUE(wait, current); -@@ -381,11 +632,11 @@ - - if (chip->state != FL_READY){ - #if 0 -- printk(KERN_DEBUG "Waiting for chip to read, status = %d\n", chip->state); -+ printk(KERN_DEBUG "Waiting for chip to read, status = %d\n", chip->state); - #endif - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&chip->wq, &wait); -- -+ - cfi_spin_unlock(chip->mutex); - - schedule(); -@@ -402,13 +653,15 @@ - adr += chip->start; - - chip->state = FL_READY; -- -+ -+ /* should these be CFI_DEVICETYPE_X8 instead of cfi->device_type? */ - cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); - cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL); - cfi_send_gen_cmd(0x88, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); - - map_copy_from(map, buf, adr, len); - -+ /* should these be CFI_DEVICETYPE_X8 instead of cfi->device_type? */ - cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); - cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL); - cfi_send_gen_cmd(0x90, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL); -@@ -463,215 +716,388 @@ - return ret; - } - --static int do_write_oneword(struct map_info *map, struct flchip *chip, unsigned long adr, cfi_word datum, int fast) -+ -+static int do_write_oneword(struct map_info *map, struct flchip *chip, unsigned long adr, map_word datum) - { -- unsigned long timeo = jiffies + HZ; -- unsigned int oldstatus, status, prev_oldstatus, prev_status; -- unsigned int dq6; - struct cfi_private *cfi = map->fldrv_priv; -- /* We use a 1ms + 1 jiffies generic timeout for writes (most devices have -- a max write time of a few hundreds usec). However, we should use the -- maximum timeout value given by the chip at probe time instead. -- Unfortunately, struct flchip does have a field for maximum timeout, -- only for typical which can be far too short depending of the conditions. -- The ' + 1' is to avoid having a timeout of 0 jiffies if HZ is smaller -- than 1000. Using a static variable allows makes us save the costly -- divide operation at each word write.*/ -- static unsigned long uWriteTimeout = ( HZ / 1000 ) + 1; -- DECLARE_WAITQUEUE(wait, current); -+ unsigned long timeo = jiffies + HZ; -+ /* -+ * We use a 1ms + 1 jiffies generic timeout for writes (most devices -+ * have a max write time of a few hundreds usec). However, we should -+ * use the maximum timeout value given by the chip at probe time -+ * instead. Unfortunately, struct flchip does have a field for -+ * maximum timeout, only for typical which can be far too short -+ * depending of the conditions. The ' + 1' is to avoid having a -+ * timeout of 0 jiffies if HZ is smaller than 1000. -+ */ -+ unsigned long uWriteTimeout = ( HZ / 1000 ) + 1; - int ret = 0; -- int ta = 0; -+ map_word oldd, curd; -+ int retry_cnt = 0; - -+ adr += chip->start; -+ -+ cfi_spin_lock(chip->mutex); -+ ret = get_chip(map, chip, adr, FL_WRITING); -+ if (ret) { -+ cfi_spin_unlock(chip->mutex); -+ return ret; -+ } -+ -+ DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): WRITE 0x%.8lx(0x%.8lx)\n", -+ __func__, adr, datum.x[0] ); -+ -+ /* -+ * Check for a NOP for the case when the datum to write is already -+ * present - it saves time and works around buggy chips that corrupt -+ * data at other locations when 0xff is written to a location that -+ * already contains 0xff. -+ */ -+ oldd = map_read(map, adr); -+ if (map_word_equal(map, oldd, datum)) { -+ DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): NOP\n", -+ __func__); -+ goto op_done; -+ } -+ -+ ENABLE_VPP(map); - retry: -+ /* -+ * The CFI_DEVICETYPE_X8 argument is needed even when -+ * cfi->device_type != CFI_DEVICETYPE_X8. The addresses for -+ * command sequences don't scale even when the device is -+ * wider. This is the case for many of the cfi_send_gen_cmd() -+ * below. I'm not sure, however, why some use -+ * cfi->device_type. -+ */ -+ cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); -+ cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); -+ cfi_send_gen_cmd(0xA0, cfi->addr_unlock1, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); -+ map_write(map, datum, adr); -+ chip->state = FL_WRITING; -+ -+ cfi_spin_unlock(chip->mutex); -+ cfi_udelay(chip->word_write_time); - cfi_spin_lock(chip->mutex); - -- if (chip->state != FL_READY) { -+ /* See comment above for timeout value. */ -+ timeo = jiffies + uWriteTimeout; -+ for (;;) { -+ if (chip->state != FL_WRITING) { -+ /* Someone's suspended the write. Sleep */ -+ DECLARE_WAITQUEUE(wait, current); -+ -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ add_wait_queue(&chip->wq, &wait); -+ cfi_spin_unlock(chip->mutex); -+ schedule(); -+ remove_wait_queue(&chip->wq, &wait); -+ timeo = jiffies + (HZ / 2); /* FIXME */ -+ cfi_spin_lock(chip->mutex); -+ continue; -+ } -+ -+ /* Test to see if toggling has stopped. */ -+ oldd = map_read(map, adr); -+ curd = map_read(map, adr); -+ if (map_word_equal(map, curd, oldd)) { -+ /* Do we have the correct value? */ -+ if (map_word_equal(map, curd, datum)) { -+ goto op_done; -+ } -+ /* Nope something has gone wrong. */ -+ break; -+ } -+ -+ if (time_after(jiffies, timeo)) { -+ printk(KERN_WARNING "MTD %s(): software timeout\n", -+ __func__ ); -+ break; -+ } -+ -+ /* Latency issues. Drop the lock, wait a while and retry */ -+ cfi_spin_unlock(chip->mutex); -+ cfi_udelay(1); -+ cfi_spin_lock(chip->mutex); -+ } -+ -+ /* reset on all failures. */ -+ map_write( map, CMD(0xF0), chip->start ); -+ /* FIXME - should have reset delay before continuing */ -+ if (++retry_cnt <= MAX_WORD_RETRIES) -+ goto retry; -+ -+ ret = -EIO; -+ op_done: -+ chip->state = FL_READY; -+ put_chip(map, chip, adr); -+ cfi_spin_unlock(chip->mutex); -+ -+ return ret; -+} -+ -+ -+static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len, -+ size_t *retlen, const u_char *buf) -+{ -+ struct map_info *map = mtd->priv; -+ struct cfi_private *cfi = map->fldrv_priv; -+ int ret = 0; -+ int chipnum; -+ unsigned long ofs, chipstart; -+ DECLARE_WAITQUEUE(wait, current); -+ -+ *retlen = 0; -+ if (!len) -+ return 0; -+ -+ chipnum = to >> cfi->chipshift; -+ ofs = to - (chipnum << cfi->chipshift); -+ chipstart = cfi->chips[chipnum].start; -+ -+ /* If it's not bus-aligned, do the first byte write */ -+ if (ofs & (map_bankwidth(map)-1)) { -+ unsigned long bus_ofs = ofs & ~(map_bankwidth(map)-1); -+ int i = ofs - bus_ofs; -+ int n = 0; -+ map_word tmp_buf; -+ -+ retry: -+ cfi_spin_lock(cfi->chips[chipnum].mutex); -+ -+ if (cfi->chips[chipnum].state != FL_READY) { - #if 0 -- printk(KERN_DEBUG "Waiting for chip to write, status = %d\n", chip->state); -+ printk(KERN_DEBUG "Waiting for chip to write, status = %d\n", cfi->chips[chipnum].state); - #endif -- set_current_state(TASK_UNINTERRUPTIBLE); -- add_wait_queue(&chip->wq, &wait); -- -- cfi_spin_unlock(chip->mutex); -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ add_wait_queue(&cfi->chips[chipnum].wq, &wait); - -- schedule(); -- remove_wait_queue(&chip->wq, &wait); -+ cfi_spin_unlock(cfi->chips[chipnum].mutex); -+ -+ schedule(); -+ remove_wait_queue(&cfi->chips[chipnum].wq, &wait); - #if 0 -- printk(KERN_DEBUG "Wake up to write:\n"); -- if(signal_pending(current)) -- return -EINTR; -+ if(signal_pending(current)) -+ return -EINTR; - #endif -- timeo = jiffies + HZ; -+ goto retry; -+ } - -- goto retry; -- } -+ /* Load 'tmp_buf' with old contents of flash */ -+ tmp_buf = map_read(map, bus_ofs+chipstart); - -- chip->state = FL_WRITING; -+ cfi_spin_unlock(cfi->chips[chipnum].mutex); -+ -+ /* Number of bytes to copy from buffer */ -+ n = min_t(int, len, map_bankwidth(map)-i); -+ -+ tmp_buf = map_word_load_partial(map, tmp_buf, buf, i, n); -+ -+ ret = do_write_oneword(map, &cfi->chips[chipnum], -+ bus_ofs, tmp_buf); -+ if (ret) -+ return ret; -+ -+ ofs += n; -+ buf += n; -+ (*retlen) += n; -+ len -= n; -+ -+ if (ofs >> cfi->chipshift) { -+ chipnum ++; -+ ofs = 0; -+ if (chipnum == cfi->numchips) -+ return 0; -+ } -+ } -+ -+ /* We are now aligned, write as much as possible */ -+ while(len >= map_bankwidth(map)) { -+ map_word datum; -+ -+ datum = map_word_load(map, buf); -+ -+ ret = do_write_oneword(map, &cfi->chips[chipnum], -+ ofs, datum); -+ if (ret) -+ return ret; -+ -+ ofs += map_bankwidth(map); -+ buf += map_bankwidth(map); -+ (*retlen) += map_bankwidth(map); -+ len -= map_bankwidth(map); -+ -+ if (ofs >> cfi->chipshift) { -+ chipnum ++; -+ ofs = 0; -+ if (chipnum == cfi->numchips) -+ return 0; -+ chipstart = cfi->chips[chipnum].start; -+ } -+ } -+ -+ /* Write the trailing bytes if any */ -+ if (len & (map_bankwidth(map)-1)) { -+ map_word tmp_buf; -+ -+ retry1: -+ cfi_spin_lock(cfi->chips[chipnum].mutex); -+ -+ if (cfi->chips[chipnum].state != FL_READY) { -+#if 0 -+ printk(KERN_DEBUG "Waiting for chip to write, status = %d\n", cfi->chips[chipnum].state); -+#endif -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ add_wait_queue(&cfi->chips[chipnum].wq, &wait); -+ -+ cfi_spin_unlock(cfi->chips[chipnum].mutex); -+ -+ schedule(); -+ remove_wait_queue(&cfi->chips[chipnum].wq, &wait); -+#if 0 -+ if(signal_pending(current)) -+ return -EINTR; -+#endif -+ goto retry1; -+ } -+ -+ tmp_buf = map_read(map, ofs + chipstart); -+ -+ cfi_spin_unlock(cfi->chips[chipnum].mutex); -+ -+ tmp_buf = map_word_load_partial(map, tmp_buf, buf, 0, len); -+ -+ ret = do_write_oneword(map, &cfi->chips[chipnum], -+ ofs, tmp_buf); -+ if (ret) -+ return ret; -+ -+ (*retlen) += len; -+ } -+ -+ return 0; -+} -+ -+ -+/* -+ * FIXME: interleaved mode not tested, and probably not supported! -+ */ -+static inline int do_write_buffer(struct map_info *map, struct flchip *chip, -+ unsigned long adr, const u_char *buf, int len) -+{ -+ struct cfi_private *cfi = map->fldrv_priv; -+ unsigned long timeo = jiffies + HZ; -+ /* see comments in do_write_oneword() regarding uWriteTimeo. */ -+ unsigned long uWriteTimeout = ( HZ / 1000 ) + 1; -+ int ret = -EIO; -+ unsigned long cmd_adr; -+ int z, words; -+ map_word datum; - - adr += chip->start; -- DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): WRITE 0x%.8lx(0x%.8x)\n", -- __func__, adr, datum ); -+ cmd_adr = adr; - -- ENABLE_VPP(map); -- if (fast) { /* Unlock bypass */ -- cfi_send_gen_cmd(0xA0, 0, chip->start, map, cfi, cfi->device_type, NULL); -+ cfi_spin_lock(chip->mutex); -+ ret = get_chip(map, chip, adr, FL_WRITING); -+ if (ret) { -+ cfi_spin_unlock(chip->mutex); -+ return ret; - } -- else { -- cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); -- cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); -- cfi_send_gen_cmd(0xA0, cfi->addr_unlock1, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); -+ -+ datum = map_word_load(map, buf); -+ -+ DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): WRITE 0x%.8lx(0x%.8lx)\n", -+ __func__, adr, datum.x[0] ); -+ -+ ENABLE_VPP(map); -+ cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); -+ cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); -+ //cfi_send_gen_cmd(0xA0, cfi->addr_unlock1, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); -+ -+ /* Write Buffer Load */ -+ map_write(map, CMD(0x25), cmd_adr); -+ -+ chip->state = FL_WRITING_TO_BUFFER; -+ -+ /* Write length of data to come */ -+ words = len / map_bankwidth(map); -+ map_write(map, CMD(words - 1), cmd_adr); -+ /* Write data */ -+ z = 0; -+ while(z < words * map_bankwidth(map)) { -+ datum = map_word_load(map, buf); -+ map_write(map, datum, adr + z); -+ -+ z += map_bankwidth(map); -+ buf += map_bankwidth(map); - } -- cfi_write(map, datum, adr); -+ z -= map_bankwidth(map); -+ -+ adr += z; -+ -+ /* Write Buffer Program Confirm: GO GO GO */ -+ map_write(map, CMD(0x29), cmd_adr); -+ chip->state = FL_WRITING; - - cfi_spin_unlock(chip->mutex); -- cfi_udelay(chip->word_write_time); -+ cfi_udelay(chip->buffer_write_time); - cfi_spin_lock(chip->mutex); - -- /* -- * Polling toggle bits instead of reading back many times -- * This ensures that write operation is really completed, -- * or tells us why it failed. -- * -- * It appears tha the polling and decoding of error state might -- * be simplified. Don't do it unless you really know what you -- * are doing. You must remember that JESD21-C 3.5.3 states that -- * the status must be read back an _additional_ two times before -- * a failure is determined. This is because these devices have -- * internal state machines that are asynchronous to the external -- * data bus. During an erase or write the read-back status of the -- * polling bits might be transitioning internaly when the external -- * read-back occurs. This means that the bits aren't in the final -- * state and they might appear to report an error as they transition -- * and are in a weird state. This will produce infrequent errors -- * that will usually disappear the next time an erase or write -- * happens (Try tracking those errors down!). To ensure that -- * the bits are not in transition the location must be read-back -- * two more times and compared against what was written - BOTH reads -- * MUST match what was written - don't think this can be simplified -- * to only the last read matching. If the comparison fails, error -- * state can then be decoded. -- * -- * - Thayne Harbaugh -- */ -- dq6 = CMD(1<<6); -- /* See comment above for timeout value. */ - timeo = jiffies + uWriteTimeout; -- -- oldstatus = cfi_read(map, adr); -- status = cfi_read(map, adr); -- DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.8x 0x%.8x\n", -- __func__, oldstatus, status ); -- -- /* -- * This only checks if dq6 is still toggling and that our -- * timer hasn't expired. We purposefully ignore the chips -- * internal timer that will assert dq5 and leave dq6 toggling. -- * This is done for a variety of reasons: -- * 1) Not all chips support dq5. -- * 2) Dealing with asynchronous status bit and data updates -- * and reading a device two more times creates _messy_ -- * logic when trying to deal with interleaved devices - -- * some may be changing while others are still busy. -- * 3) Checking dq5 only helps to optimize an error case that -- * should at worst be infrequent and at best non-existent. -- * -- * If our timeout occurs _then_ we will check dq5 to see -- * if the device also had an internal timeout. -- */ -- while( ( ( status ^ oldstatus ) & dq6 ) -- && ! ( ta = time_after(jiffies, timeo) ) ) { -+ -+ for (;;) { -+ if (chip->state != FL_WRITING) { -+ /* Someone's suspended the write. Sleep */ -+ DECLARE_WAITQUEUE(wait, current); - -- if (need_resched()) { -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ add_wait_queue(&chip->wq, &wait); - cfi_spin_unlock(chip->mutex); -- yield(); -+ schedule(); -+ remove_wait_queue(&chip->wq, &wait); -+ timeo = jiffies + (HZ / 2); /* FIXME */ - cfi_spin_lock(chip->mutex); -- } else -- udelay(1); -+ continue; -+ } - -- oldstatus = cfi_read( map, adr ); -- status = cfi_read( map, adr ); -- DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.8x 0x%.8x\n", -- __func__, oldstatus, status ); -- } -+ if (chip_ready(map, adr)) -+ goto op_done; -+ -+ if( time_after(jiffies, timeo)) -+ break; - -- /* -- * Something kicked us out of the read-back loop. We'll -- * check success befor checking failure. -- * Even though dq6 might be true data, it is unkown if -- * all of the other bits have changed to true data due to -- * the asynchronous nature of the internal state machine. -- * We will read two more times and use this to either -- * verify that the write completed successfully or -- * that something really went wrong. BOTH reads -- * must match what was written - this certifies that -- * bits aren't still changing and that the status -- * bits erroneously match the datum that was written. -- */ -- prev_oldstatus = oldstatus; -- prev_status = status; -- oldstatus = cfi_read(map, adr); -- status = cfi_read(map, adr); -- DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.8x 0x%.8x\n", -- __func__, oldstatus, status ); -- -- if ( oldstatus == datum && status == datum ) { -- /* success - do nothing */ -- goto write_done; -- } -- -- if ( ta ) { -- int dq5mask = ( ( status ^ oldstatus ) & dq6 ) >> 1; -- if ( status & dq5mask ) { -- /* dq5 asserted - decode interleave chips */ -- printk( KERN_WARNING -- "MTD %s(): FLASH internal timeout: 0x%.8x\n", -- __func__, -- status & dq5mask ); -- } else { -- printk( KERN_WARNING -- "MTD %s(): Software timed out during write.\n", -- __func__ ); -- } -- goto write_failed; -+ /* Latency issues. Drop the lock, wait a while and retry */ -+ cfi_spin_unlock(chip->mutex); -+ cfi_udelay(1); -+ cfi_spin_lock(chip->mutex); - } - -- /* -- * If we get to here then it means that something -- * is wrong and it's not a timeout. Something -- * is seriously wacky! Dump some debug info. -- */ -- printk(KERN_WARNING -- "MTD %s(): Wacky! Unable to decode failure status\n", -+ printk(KERN_WARNING "MTD %s(): software timeout\n", - __func__ ); - -- printk(KERN_WARNING -- "MTD %s(): 0x%.8lx(0x%.8x): 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n", -- __func__, adr, datum, -- prev_oldstatus, prev_status, -- oldstatus, status); -- -- write_failed: -- ret = -EIO; - /* reset on all failures. */ -- cfi_write( map, CMD(0xF0), chip->start ); -+ map_write( map, CMD(0xF0), chip->start ); - /* FIXME - should have reset delay before continuing */ - -- write_done: -- DISABLE_VPP(map); -+ ret = -EIO; -+ op_done: - chip->state = FL_READY; -- wake_up(&chip->wq); -+ put_chip(map, chip, adr); - cfi_spin_unlock(chip->mutex); - - return ret; - } - --static int cfi_amdstd_write (struct mtd_info *mtd, loff_t to , size_t len, size_t *retlen, const u_char *buf) -+ -+static int cfi_amdstd_write_buffers(struct mtd_info *mtd, loff_t to, size_t len, -+ size_t *retlen, const u_char *buf) - { - struct map_info *map = mtd->priv; - struct cfi_private *cfi = map->fldrv_priv; -+ int wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize; - int ret = 0; - int chipnum; -- unsigned long ofs, chipstart; -+ unsigned long ofs; - - *retlen = 0; - if (!len) -@@ -679,176 +1105,94 @@ - - chipnum = to >> cfi->chipshift; - ofs = to - (chipnum << cfi->chipshift); -- chipstart = cfi->chips[chipnum].start; -- -- /* If it's not bus-aligned, do the first byte write */ -- if (ofs & (CFIDEV_BUSWIDTH-1)) { -- unsigned long bus_ofs = ofs & ~(CFIDEV_BUSWIDTH-1); -- int i = ofs - bus_ofs; -- int n = 0; -- u_char tmp_buf[8]; -- cfi_word datum; -- -- map_copy_from(map, tmp_buf, bus_ofs + cfi->chips[chipnum].start, CFIDEV_BUSWIDTH); -- while (len && i < CFIDEV_BUSWIDTH) -- tmp_buf[i++] = buf[n++], len--; -- -- if (cfi_buswidth_is_2()) { -- datum = *(__u16*)tmp_buf; -- } else if (cfi_buswidth_is_4()) { -- datum = *(__u32*)tmp_buf; -- } else { -- return -EINVAL; /* should never happen, but be safe */ -- } - -- ret = do_write_oneword(map, &cfi->chips[chipnum], -- bus_ofs, datum, 0); -- if (ret) -+ /* If it's not bus-aligned, do the first word write */ -+ if (ofs & (map_bankwidth(map)-1)) { -+ size_t local_len = (-ofs)&(map_bankwidth(map)-1); -+ if (local_len > len) -+ local_len = len; -+ ret = cfi_amdstd_write_words(mtd, ofs + (chipnum<<cfi->chipshift), -+ local_len, retlen, buf); -+ if (ret) - return ret; -- -- ofs += n; -- buf += n; -- (*retlen) += n; -+ ofs += local_len; -+ buf += local_len; -+ len -= local_len; - - if (ofs >> cfi->chipshift) { -- chipnum ++; -+ chipnum ++; - ofs = 0; - if (chipnum == cfi->numchips) - return 0; - } - } -- -- if (cfi->fast_prog) { -- /* Go into unlock bypass mode */ -- cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chipstart, map, cfi, CFI_DEVICETYPE_X8, NULL); -- cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chipstart, map, cfi, CFI_DEVICETYPE_X8, NULL); -- cfi_send_gen_cmd(0x20, cfi->addr_unlock1, chipstart, map, cfi, CFI_DEVICETYPE_X8, NULL); -- } - -- /* We are now aligned, write as much as possible */ -- while(len >= CFIDEV_BUSWIDTH) { -- cfi_word datum; -+ /* Write buffer is worth it only if more than one word to write... */ -+ while (len >= map_bankwidth(map) * 2) { -+ /* We must not cross write block boundaries */ -+ int size = wbufsize - (ofs & (wbufsize-1)); -+ -+ if (size > len) -+ size = len; -+ if (size % map_bankwidth(map)) -+ size -= size % map_bankwidth(map); - -- if (cfi_buswidth_is_1()) { -- datum = *(__u8*)buf; -- } else if (cfi_buswidth_is_2()) { -- datum = *(__u16*)buf; -- } else if (cfi_buswidth_is_4()) { -- datum = *(__u32*)buf; -- } else { -- return -EINVAL; -- } -- ret = do_write_oneword(map, &cfi->chips[chipnum], -- ofs, datum, cfi->fast_prog); -- if (ret) { -- if (cfi->fast_prog){ -- /* Get out of unlock bypass mode */ -- cfi_send_gen_cmd(0x90, 0, chipstart, map, cfi, cfi->device_type, NULL); -- cfi_send_gen_cmd(0x00, 0, chipstart, map, cfi, cfi->device_type, NULL); -- } -+ ret = do_write_buffer(map, &cfi->chips[chipnum], -+ ofs, buf, size); -+ if (ret) - return ret; -- } - -- ofs += CFIDEV_BUSWIDTH; -- buf += CFIDEV_BUSWIDTH; -- (*retlen) += CFIDEV_BUSWIDTH; -- len -= CFIDEV_BUSWIDTH; -+ ofs += size; -+ buf += size; -+ (*retlen) += size; -+ len -= size; - - if (ofs >> cfi->chipshift) { -- if (cfi->fast_prog){ -- /* Get out of unlock bypass mode */ -- cfi_send_gen_cmd(0x90, 0, chipstart, map, cfi, cfi->device_type, NULL); -- cfi_send_gen_cmd(0x00, 0, chipstart, map, cfi, cfi->device_type, NULL); -- } -- - chipnum ++; - ofs = 0; - if (chipnum == cfi->numchips) - return 0; -- chipstart = cfi->chips[chipnum].start; -- if (cfi->fast_prog){ -- /* Go into unlock bypass mode for next set of chips */ -- cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chipstart, map, cfi, CFI_DEVICETYPE_X8, NULL); -- cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chipstart, map, cfi, CFI_DEVICETYPE_X8, NULL); -- cfi_send_gen_cmd(0x20, cfi->addr_unlock1, chipstart, map, cfi, CFI_DEVICETYPE_X8, NULL); -- } - } - } - -- if (cfi->fast_prog){ -- /* Get out of unlock bypass mode */ -- cfi_send_gen_cmd(0x90, 0, chipstart, map, cfi, cfi->device_type, NULL); -- cfi_send_gen_cmd(0x00, 0, chipstart, map, cfi, cfi->device_type, NULL); -- } -+ if (len) { -+ size_t retlen_dregs = 0; - -- /* Write the trailing bytes if any */ -- if (len & (CFIDEV_BUSWIDTH-1)) { -- int i = 0, n = 0; -- u_char tmp_buf[8]; -- cfi_word datum; -- -- map_copy_from(map, tmp_buf, ofs + cfi->chips[chipnum].start, CFIDEV_BUSWIDTH); -- while (len--) -- tmp_buf[i++] = buf[n++]; -- -- if (cfi_buswidth_is_2()) { -- datum = *(__u16*)tmp_buf; -- } else if (cfi_buswidth_is_4()) { -- datum = *(__u32*)tmp_buf; -- } else { -- return -EINVAL; /* should never happen, but be safe */ -- } -+ ret = cfi_amdstd_write_words(mtd, ofs + (chipnum<<cfi->chipshift), -+ len, &retlen_dregs, buf); - -- ret = do_write_oneword(map, &cfi->chips[chipnum], -- ofs, datum, 0); -- if (ret) -- return ret; -- -- (*retlen) += n; -+ *retlen += retlen_dregs; -+ return ret; - } - - return 0; - } - -+ -+/* -+ * Handle devices with one erase region, that only implement -+ * the chip erase command. -+ */ - static inline int do_erase_chip(struct map_info *map, struct flchip *chip) - { -- unsigned int oldstatus, status, prev_oldstatus, prev_status; -- unsigned int dq6; -+ struct cfi_private *cfi = map->fldrv_priv; - unsigned long timeo = jiffies + HZ; - unsigned long int adr; -- struct cfi_private *cfi = map->fldrv_priv; - DECLARE_WAITQUEUE(wait, current); - int ret = 0; -- int ta = 0; -- cfi_word ones = 0; - -- retry: -- cfi_spin_lock(chip->mutex); -+ adr = cfi->addr_unlock1; - -- if (chip->state != FL_READY){ -- set_current_state(TASK_UNINTERRUPTIBLE); -- add_wait_queue(&chip->wq, &wait); -- -+ cfi_spin_lock(chip->mutex); -+ ret = get_chip(map, chip, adr, FL_WRITING); -+ if (ret) { - cfi_spin_unlock(chip->mutex); -+ return ret; -+ } - -- schedule(); -- remove_wait_queue(&chip->wq, &wait); --#if 0 -- if(signal_pending(current)) -- return -EINTR; --#endif -- timeo = jiffies + HZ; -- -- goto retry; -- } -- -- chip->state = FL_ERASING; - DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): ERASE 0x%.8lx\n", - __func__, chip->start ); -- -- /* Handle devices with one erase region, that only implement -- * the chip erase command. -- */ -+ - ENABLE_VPP(map); - cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); - cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); -@@ -856,175 +1200,82 @@ - cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); - cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); - cfi_send_gen_cmd(0x10, cfi->addr_unlock1, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); -- timeo = jiffies + (HZ*20); -- adr = cfi->addr_unlock1; - -- /* Wait for the end of programing/erasure by using the toggle method. -- * As long as there is a programming procedure going on, bit 6 -- * is toggling it's state with each consecutive read. -- * The toggling stops as soon as the procedure is completed. -- * -- * If the process has gone on for too long on the chip bit 5 gets. -- * After bit5 is set you can kill the operation by sending a reset -- * command to the chip. -- */ -- /* see comments in do_write_oneword */ -- dq6 = CMD(1<<6); -+ chip->state = FL_ERASING; -+ chip->erase_suspended = 0; -+ chip->in_progress_block_addr = adr; - -- oldstatus = cfi_read(map, adr); -- status = cfi_read(map, adr); -- DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.8x 0x%.8x\n", -- __func__, oldstatus, status ); -- -- while( ( ( status ^ oldstatus ) & dq6 ) -- && ! ( ta = time_after(jiffies, timeo) ) ) { -- int wait_reps; -+ cfi_spin_unlock(chip->mutex); -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ schedule_timeout((chip->erase_time*HZ)/(2*1000)); -+ cfi_spin_lock(chip->mutex); - -- /* an initial short sleep */ -- cfi_spin_unlock(chip->mutex); -- schedule_timeout(HZ/100); -- cfi_spin_lock(chip->mutex); -- -+ timeo = jiffies + (HZ*20); -+ -+ for (;;) { - if (chip->state != FL_ERASING) { - /* Someone's suspended the erase. Sleep */ - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&chip->wq, &wait); -- - cfi_spin_unlock(chip->mutex); -- printk("erase suspended. Sleeping\n"); -- - schedule(); - remove_wait_queue(&chip->wq, &wait); --#if 0 -- if (signal_pending(current)) -- return -EINTR; --#endif -- timeo = jiffies + (HZ*2); /* FIXME */ - cfi_spin_lock(chip->mutex); - continue; - } -+ if (chip->erase_suspended) { -+ /* This erase was suspended and resumed. -+ Adjust the timeout */ -+ timeo = jiffies + (HZ*20); /* FIXME */ -+ chip->erase_suspended = 0; -+ } - -- /* Busy wait for 1/10 of a milisecond */ -- for(wait_reps = 0; -- (wait_reps < 100) -- && ( ( status ^ oldstatus ) & dq6 ); -- wait_reps++) { -- -- /* Latency issues. Drop the lock, wait a while and retry */ -- cfi_spin_unlock(chip->mutex); -- -- cfi_udelay(1); -+ if (chip_ready(map, adr)) -+ goto op_done; - -- cfi_spin_lock(chip->mutex); -- oldstatus = cfi_read(map, adr); -- status = cfi_read(map, adr); -- DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.8x 0x%.8x\n", -- __func__, oldstatus, status ); -- } -- oldstatus = cfi_read(map, adr); -- status = cfi_read(map, adr); -- DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.8x 0x%.8x\n", -- __func__, oldstatus, status ); -- } -- -- prev_oldstatus = oldstatus; -- prev_status = status; -- oldstatus = cfi_read(map, adr); -- status = cfi_read(map, adr); -- DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.8x 0x%.8x\n", -- __func__, oldstatus, status ); -- -- if ( cfi_buswidth_is_1() ) { -- ones = (__u8)~0; -- } else if ( cfi_buswidth_is_2() ) { -- ones = (__u16)~0; -- } else if ( cfi_buswidth_is_4() ) { -- ones = (__u32)~0; -- } else { -- printk(KERN_WARNING "Unsupported buswidth\n"); -- goto erase_failed; -- } -- -- if ( oldstatus == ones && status == ones ) { -- /* success - do nothing */ -- goto erase_done; -- } -+ if (time_after(jiffies, timeo)) -+ break; - -- if ( ta ) { -- int dq5mask = ( ( status ^ oldstatus ) & dq6 ) >> 1; -- if ( status & dq5mask ) { -- /* dq5 asserted - decode interleave chips */ -- printk( KERN_WARNING -- "MTD %s(): FLASH internal timeout: 0x%.8x\n", -- __func__, -- status & dq5mask ); -- } else { -- printk( KERN_WARNING -- "MTD %s(): Software timed out during write.\n", -- __func__ ); -- } -- goto erase_failed; -+ /* Latency issues. Drop the lock, wait a while and retry */ -+ cfi_spin_unlock(chip->mutex); -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ schedule_timeout(1); -+ cfi_spin_lock(chip->mutex); - } - -- printk(KERN_WARNING -- "MTD %s(): Wacky! Unable to decode failure status\n", -+ printk(KERN_WARNING "MTD %s(): software timeout\n", - __func__ ); - -- printk(KERN_WARNING -- "MTD %s(): 0x%.8lx(0x%.8x): 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n", -- __func__, adr, ones, -- prev_oldstatus, prev_status, -- oldstatus, status); -- -- erase_failed: -- ret = -EIO; - /* reset on all failures. */ -- cfi_write( map, CMD(0xF0), chip->start ); -+ map_write( map, CMD(0xF0), chip->start ); - /* FIXME - should have reset delay before continuing */ - -- erase_done: -- DISABLE_VPP(map); -+ ret = -EIO; -+ op_done: - chip->state = FL_READY; -- wake_up(&chip->wq); -+ put_chip(map, chip, adr); - cfi_spin_unlock(chip->mutex); -+ - return ret; - } - - --static inline int do_erase_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr) -+static inline int do_erase_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr, int len, void *thunk) - { -- unsigned int oldstatus, status, prev_oldstatus, prev_status; -- unsigned int dq6; -- unsigned long timeo = jiffies + HZ; - struct cfi_private *cfi = map->fldrv_priv; -+ unsigned long timeo = jiffies + HZ; - DECLARE_WAITQUEUE(wait, current); - int ret = 0; -- int ta = 0; -- cfi_word ones = 0; - -- retry: -- cfi_spin_lock(chip->mutex); -+ adr += chip->start; - -- if (chip->state != FL_READY){ -- set_current_state(TASK_UNINTERRUPTIBLE); -- add_wait_queue(&chip->wq, &wait); -- -+ cfi_spin_lock(chip->mutex); -+ ret = get_chip(map, chip, adr, FL_ERASING); -+ if (ret) { - cfi_spin_unlock(chip->mutex); -+ return ret; -+ } - -- schedule(); -- remove_wait_queue(&chip->wq, &wait); --#if 0 -- if(signal_pending(current)) -- return -EINTR; --#endif -- timeo = jiffies + HZ; -- -- goto retry; -- } -- -- chip->state = FL_ERASING; -- -- adr += chip->start; - DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): ERASE 0x%.8lx\n", - __func__, adr ); - -@@ -1034,279 +1285,85 @@ - cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); - cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); - cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL); -+ map_write(map, CMD(0x30), adr); - -- cfi_write(map, CMD(0x30), adr); -+ chip->state = FL_ERASING; -+ chip->erase_suspended = 0; -+ chip->in_progress_block_addr = adr; - -- timeo = jiffies + (HZ*20); -- -- /* Wait for the end of programing/erasure by using the toggle method. -- * As long as there is a programming procedure going on, bit 6 -- * is toggling it's state with each consecutive read. -- * The toggling stops as soon as the procedure is completed. -- * -- * If the process has gone on for too long on the chip bit 5 gets. -- * After bit5 is set you can kill the operation by sending a reset -- * command to the chip. -- */ -- /* see comments in do_write_oneword */ -- dq6 = CMD(1<<6); -+ cfi_spin_unlock(chip->mutex); -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ schedule_timeout((chip->erase_time*HZ)/(2*1000)); -+ cfi_spin_lock(chip->mutex); - -- oldstatus = cfi_read(map, adr); -- status = cfi_read(map, adr); -- DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.8x 0x%.8x\n", -- __func__, oldstatus, status ); -- -- while( ( ( status ^ oldstatus ) & dq6 ) -- && ! ( ta = time_after(jiffies, timeo) ) ) { -- int wait_reps; -+ timeo = jiffies + (HZ*20); - -- /* an initial short sleep */ -- cfi_spin_unlock(chip->mutex); -- schedule_timeout(HZ/100); -- cfi_spin_lock(chip->mutex); -- -+ for (;;) { - if (chip->state != FL_ERASING) { - /* Someone's suspended the erase. Sleep */ - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&chip->wq, &wait); -- - cfi_spin_unlock(chip->mutex); -- printk(KERN_DEBUG "erase suspended. Sleeping\n"); -- - schedule(); - remove_wait_queue(&chip->wq, &wait); --#if 0 -- if (signal_pending(current)) -- return -EINTR; --#endif -- timeo = jiffies + (HZ*2); /* FIXME */ - cfi_spin_lock(chip->mutex); - continue; - } -- -- /* Busy wait for 1/10 of a milisecond */ -- for(wait_reps = 0; -- (wait_reps < 100) -- && ( ( status ^ oldstatus ) & dq6 ); -- wait_reps++) { -- -- /* Latency issues. Drop the lock, wait a while and retry */ -- cfi_spin_unlock(chip->mutex); -- -- cfi_udelay(1); -- -- cfi_spin_lock(chip->mutex); -- oldstatus = cfi_read(map, adr); -- status = cfi_read(map, adr); -- DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.8x 0x%.8x\n", -- __func__, oldstatus, status ); -- } -- oldstatus = cfi_read(map, adr); -- status = cfi_read(map, adr); -- DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.8x 0x%.8x\n", -- __func__, oldstatus, status ); -- } -- -- prev_oldstatus = oldstatus; -- prev_status = status; -- oldstatus = cfi_read(map, adr); -- status = cfi_read(map, adr); -- DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.8x 0x%.8x\n", -- __func__, oldstatus, status ); -- -- if ( cfi_buswidth_is_1() ) { -- ones = (__u8)~0; -- } else if ( cfi_buswidth_is_2() ) { -- ones = (__u16)~0; -- } else if ( cfi_buswidth_is_4() ) { -- ones = (__u32)~0; -- } else { -- printk(KERN_WARNING "Unsupported buswidth\n"); -- goto erase_failed; -- } -- -- if ( oldstatus == ones && status == ones ) { -- /* success - do nothing */ -- goto erase_done; -- } -- -- if ( ta ) { -- int dq5mask = ( ( status ^ oldstatus ) & dq6 ) >> 1; -- if ( status & dq5mask ) { -- /* dq5 asserted - decode interleave chips */ -- printk( KERN_WARNING -- "MTD %s(): FLASH internal timeout: 0x%.8x\n", -- __func__, -- status & dq5mask ); -- } else { -- printk( KERN_WARNING -- "MTD %s(): Software timed out during write.\n", -- __func__ ); -+ if (chip->erase_suspended) { -+ /* This erase was suspended and resumed. -+ Adjust the timeout */ -+ timeo = jiffies + (HZ*20); /* FIXME */ -+ chip->erase_suspended = 0; - } -- goto erase_failed; -- } - -- printk(KERN_WARNING -- "MTD %s(): Wacky! Unable to decode failure status\n", -- __func__ ); -+ if (chip_ready(map, adr)) -+ goto op_done; - -- printk(KERN_WARNING -- "MTD %s(): 0x%.8lx(0x%.8x): 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n", -- __func__, adr, ones, -- prev_oldstatus, prev_status, -- oldstatus, status); -+ if (time_after(jiffies, timeo)) -+ break; - -- erase_failed: -- ret = -EIO; -+ /* Latency issues. Drop the lock, wait a while and retry */ -+ cfi_spin_unlock(chip->mutex); -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ schedule_timeout(1); -+ cfi_spin_lock(chip->mutex); -+ } -+ -+ printk(KERN_WARNING "MTD %s(): software timeout\n", -+ __func__ ); -+ - /* reset on all failures. */ -- cfi_write( map, CMD(0xF0), chip->start ); -+ map_write( map, CMD(0xF0), chip->start ); - /* FIXME - should have reset delay before continuing */ - -- erase_done: -- DISABLE_VPP(map); -+ ret = -EIO; -+ op_done: - chip->state = FL_READY; -- wake_up(&chip->wq); -+ put_chip(map, chip, adr); - cfi_spin_unlock(chip->mutex); - return ret; - } - --static int cfi_amdstd_erase_varsize(struct mtd_info *mtd, struct erase_info *instr) --{ -- struct map_info *map = mtd->priv; -- struct cfi_private *cfi = map->fldrv_priv; -- unsigned long adr, len; -- int chipnum, ret = 0; -- int i, first; -- struct mtd_erase_region_info *regions = mtd->eraseregions; -- -- if (instr->addr > mtd->size) -- return -EINVAL; -- -- if ((instr->len + instr->addr) > mtd->size) -- return -EINVAL; -- -- /* Check that both start and end of the requested erase are -- * aligned with the erasesize at the appropriate addresses. -- */ -- -- i = 0; -- -- /* Skip all erase regions which are ended before the start of -- the requested erase. Actually, to save on the calculations, -- we skip to the first erase region which starts after the -- start of the requested erase, and then go back one. -- */ -- -- while (i < mtd->numeraseregions && instr->addr >= regions[i].offset) -- i++; -- i--; -- -- /* OK, now i is pointing at the erase region in which this -- erase request starts. Check the start of the requested -- erase range is aligned with the erase size which is in -- effect here. -- */ -- -- if (instr->addr & (regions[i].erasesize-1)) -- return -EINVAL; -- -- /* Remember the erase region we start on */ -- first = i; -- -- /* Next, check that the end of the requested erase is aligned -- * with the erase region at that address. -- */ -- -- while (i<mtd->numeraseregions && (instr->addr + instr->len) >= regions[i].offset) -- i++; -- -- /* As before, drop back one to point at the region in which -- the address actually falls -- */ -- i--; -- -- if ((instr->addr + instr->len) & (regions[i].erasesize-1)) -- return -EINVAL; -- -- chipnum = instr->addr >> cfi->chipshift; -- adr = instr->addr - (chipnum << cfi->chipshift); -- len = instr->len; -- -- i=first; -- -- while(len) { -- ret = do_erase_oneblock(map, &cfi->chips[chipnum], adr); -- -- if (ret) -- return ret; -- -- adr += regions[i].erasesize; -- len -= regions[i].erasesize; -- -- if (adr % (1<< cfi->chipshift) == ((regions[i].offset + (regions[i].erasesize * regions[i].numblocks)) %( 1<< cfi->chipshift))) -- i++; -- -- if (adr >> cfi->chipshift) { -- adr = 0; -- chipnum++; -- -- if (chipnum >= cfi->numchips) -- break; -- } -- } -- -- instr->state = MTD_ERASE_DONE; -- if (instr->callback) -- instr->callback(instr); -- -- return 0; --} - --static int cfi_amdstd_erase_onesize(struct mtd_info *mtd, struct erase_info *instr) -+int cfi_amdstd_erase_varsize(struct mtd_info *mtd, struct erase_info *instr) - { -- struct map_info *map = mtd->priv; -- struct cfi_private *cfi = map->fldrv_priv; -- unsigned long adr, len; -- int chipnum, ret = 0; -- -- if (instr->addr & (mtd->erasesize - 1)) -- return -EINVAL; -- -- if (instr->len & (mtd->erasesize -1)) -- return -EINVAL; -- -- if ((instr->len + instr->addr) > mtd->size) -- return -EINVAL; -+ unsigned long ofs, len; -+ int ret; - -- chipnum = instr->addr >> cfi->chipshift; -- adr = instr->addr - (chipnum << cfi->chipshift); -+ ofs = instr->addr; - len = instr->len; - -- while(len) { -- ret = do_erase_oneblock(map, &cfi->chips[chipnum], adr); -- -- if (ret) -- return ret; -- -- adr += mtd->erasesize; -- len -= mtd->erasesize; -+ ret = cfi_varsize_frob(mtd, do_erase_oneblock, ofs, len, NULL); -+ if (ret) -+ return ret; - -- if (adr >> cfi->chipshift) { -- adr = 0; -- chipnum++; -- -- if (chipnum >= cfi->numchips) -- break; -- } -- } -- - instr->state = MTD_ERASE_DONE; -- if (instr->callback) -- instr->callback(instr); -+ mtd_erase_callback(instr); - - return 0; - } - -+ - static int cfi_amdstd_erase_chip(struct mtd_info *mtd, struct erase_info *instr) - { - struct map_info *map = mtd->priv; -@@ -1324,12 +1381,12 @@ - return ret; - - instr->state = MTD_ERASE_DONE; -- if (instr->callback) -- instr->callback(instr); -+ mtd_erase_callback(instr); - - return 0; - } - -+ - static void cfi_amdstd_sync (struct mtd_info *mtd) - { - struct map_info *map = mtd->priv; -@@ -1368,7 +1425,7 @@ - - schedule(); - -- remove_wait_queue(&chip->wq, &wait); -+ remove_wait_queue(&chip->wq, &wait); - - goto retry; - } -@@ -1427,7 +1484,7 @@ - /* Unlock the chips again */ - - if (ret) { -- for (i--; i >=0; i--) { -+ for (i--; i >=0; i--) { - chip = &cfi->chips[i]; - - cfi_spin_lock(chip->mutex); -@@ -1443,6 +1500,7 @@ - return ret; - } - -+ - static void cfi_amdstd_resume(struct mtd_info *mtd) - { - struct map_info *map = mtd->priv; -@@ -1458,7 +1516,7 @@ - - if (chip->state == FL_PM_SUSPENDED) { - chip->state = FL_READY; -- cfi_write(map, CMD(0xF0), chip->start); -+ map_write(map, CMD(0xF0), chip->start); - wake_up(&chip->wq); - } - else -@@ -1480,21 +1538,23 @@ - - static char im_name[]="cfi_cmdset_0002"; - -+ - int __init cfi_amdstd_init(void) - { - inter_module_register(im_name, THIS_MODULE, &cfi_cmdset_0002); - return 0; - } - -+ - static void __exit cfi_amdstd_exit(void) - { - inter_module_unregister(im_name); - } - -+ - module_init(cfi_amdstd_init); - module_exit(cfi_amdstd_exit); - - MODULE_LICENSE("GPL"); - MODULE_AUTHOR("Crossnet Co. <info@crossnet.co.jp> et al."); - MODULE_DESCRIPTION("MTD chip driver for AMD/Fujitsu flash chips"); -- -Index: linux-2.6.5/drivers/mtd/chips/cfi_cmdset_0020.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/cfi_cmdset_0020.c 2004-04-03 22:36:55.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/cfi_cmdset_0020.c 2005-02-01 17:11:17.000000000 -0500 -@@ -4,6 +4,7 @@ - * - * (C) 2000 Red Hat. GPL'd - * -+ * $Id: cfi_cmdset_0020.c,v 1.15 2004/08/09 13:19:43 dwmw2 Exp $ - * - * 10/10/2000 Nicolas Pitre <nico@cam.org> - * - completely revamped method functions so they are aware and -@@ -38,7 +39,7 @@ - - static int cfi_staa_read(struct mtd_info *, loff_t, size_t, size_t *, u_char *); - static int cfi_staa_write_buffers(struct mtd_info *, loff_t, size_t, size_t *, const u_char *); --static int cfi_staa_writev(struct mtd_info *mtd, const struct iovec *vecs, -+static int cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs, - unsigned long count, loff_t to, size_t *retlen); - static int cfi_staa_erase_varsize(struct mtd_info *, struct erase_info *); - static void cfi_staa_sync (struct mtd_info *); -@@ -116,7 +117,6 @@ - { - struct cfi_private *cfi = map->fldrv_priv; - int i; -- __u32 base = cfi->chips[0].start; - - if (cfi->cfi_mode) { - /* -@@ -126,36 +126,11 @@ - */ - __u16 adr = primary?cfi->cfiq->P_ADR:cfi->cfiq->A_ADR; - struct cfi_pri_intelext *extp; -- int ofs_factor = cfi->interleave * cfi->device_type; - -- printk(" ST Microelectronics Extended Query Table at 0x%4.4X\n", adr); -- if (!adr) -+ extp = (struct cfi_pri_intelext*)cfi_read_pri(map, adr, sizeof(*extp), "ST Microelectronics"); -+ if (!extp) - return NULL; - -- /* Switch it into Query Mode */ -- cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); -- -- extp = kmalloc(sizeof(*extp), GFP_KERNEL); -- if (!extp) { -- printk(KERN_ERR "Failed to allocate memory\n"); -- return NULL; -- } -- -- /* Read in the Extended Query Table */ -- for (i=0; i<sizeof(*extp); i++) { -- ((unsigned char *)extp)[i] = -- cfi_read_query(map, (base+((adr+i)*ofs_factor))); -- } -- -- if (extp->MajorVersion != '1' || -- (extp->MinorVersion < '0' || extp->MinorVersion > '2')) { -- printk(KERN_WARNING " Unknown staa Extended Query " -- "version %c.%c.\n", extp->MajorVersion, -- extp->MinorVersion); -- kfree(extp); -- return NULL; -- } -- - /* Do some byteswapping if necessary */ - extp->FeatureSupport = cfi32_to_cpu(extp->FeatureSupport); - extp->BlkStatusRegMask = cfi32_to_cpu(extp->BlkStatusRegMask); -@@ -175,8 +150,6 @@ - cfi->chips[i].erase_time = 1024; - } - -- /* Make sure it's in read mode */ -- cfi_send_gen_cmd(0xff, 0x55, base, map, cfi, cfi->device_type, NULL); - return cfi_staa_setup(map); - } - -@@ -266,7 +239,7 @@ - - static inline int do_read_onechip(struct map_info *map, struct flchip *chip, loff_t adr, size_t len, u_char *buf) - { -- __u32 status, status_OK; -+ map_word status, status_OK; - unsigned long timeo; - DECLARE_WAITQUEUE(wait, current); - int suspended = 0; -@@ -276,7 +249,7 @@ - adr += chip->start; - - /* Ensure cmd read/writes are aligned. */ -- cmd_addr = adr & ~(CFIDEV_BUSWIDTH-1); -+ cmd_addr = adr & ~(map_bankwidth(map)-1); - - /* Let's determine this according to the interleave only once */ - status_OK = CMD(0x80); -@@ -290,33 +263,33 @@ - */ - switch (chip->state) { - case FL_ERASING: -- if (!((struct cfi_pri_intelext *)cfi->cmdset_priv)->FeatureSupport & 2) -+ if (!(((struct cfi_pri_intelext *)cfi->cmdset_priv)->FeatureSupport & 2)) - goto sleep; /* We don't support erase suspend */ - -- cfi_write (map, CMD(0xb0), cmd_addr); -+ map_write (map, CMD(0xb0), cmd_addr); - /* If the flash has finished erasing, then 'erase suspend' - * appears to make some (28F320) flash devices switch to - * 'read' mode. Make sure that we switch to 'read status' - * mode so we get the right data. --rmk - */ -- cfi_write(map, CMD(0x70), cmd_addr); -+ map_write(map, CMD(0x70), cmd_addr); - chip->oldstate = FL_ERASING; - chip->state = FL_ERASE_SUSPENDING; - // printk("Erase suspending at 0x%lx\n", cmd_addr); - for (;;) { -- status = cfi_read(map, cmd_addr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, cmd_addr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - if (time_after(jiffies, timeo)) { - /* Urgh */ -- cfi_write(map, CMD(0xd0), cmd_addr); -+ map_write(map, CMD(0xd0), cmd_addr); - /* make sure we're in 'read status' mode */ -- cfi_write(map, CMD(0x70), cmd_addr); -+ map_write(map, CMD(0x70), cmd_addr); - chip->state = FL_ERASING; - spin_unlock_bh(chip->mutex); - printk(KERN_ERR "Chip not ready after erase " -- "suspended: status = 0x%x\n", status); -+ "suspended: status = 0x%lx\n", status.x[0]); - return -EIO; - } - -@@ -326,7 +299,7 @@ - } - - suspended = 1; -- cfi_write(map, CMD(0xff), cmd_addr); -+ map_write(map, CMD(0xff), cmd_addr); - chip->state = FL_READY; - break; - -@@ -340,13 +313,13 @@ - - case FL_CFI_QUERY: - case FL_JEDEC_QUERY: -- cfi_write(map, CMD(0x70), cmd_addr); -+ map_write(map, CMD(0x70), cmd_addr); - chip->state = FL_STATUS; - - case FL_STATUS: -- status = cfi_read(map, cmd_addr); -- if ((status & status_OK) == status_OK) { -- cfi_write(map, CMD(0xff), cmd_addr); -+ status = map_read(map, cmd_addr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) { -+ map_write(map, CMD(0xff), cmd_addr); - chip->state = FL_READY; - break; - } -@@ -354,7 +327,7 @@ - /* Urgh. Chip not yet ready to talk to us. */ - if (time_after(jiffies, timeo)) { - spin_unlock_bh(chip->mutex); -- printk(KERN_ERR "waiting for chip to be ready timed out in read. WSM status = %x\n", status); -+ printk(KERN_ERR "waiting for chip to be ready timed out in read. WSM status = %lx\n", status.x[0]); - return -EIO; - } - -@@ -389,8 +362,8 @@ - sending the 0x70 (Read Status) command to an erasing - chip and expecting it to be ignored, that's what we - do. */ -- cfi_write(map, CMD(0xd0), cmd_addr); -- cfi_write(map, CMD(0x70), cmd_addr); -+ map_write(map, CMD(0xd0), cmd_addr); -+ map_write(map, CMD(0x70), cmd_addr); - } - - wake_up(&chip->wq); -@@ -441,16 +414,16 @@ - unsigned long adr, const u_char *buf, int len) - { - struct cfi_private *cfi = map->fldrv_priv; -- __u32 status, status_OK; -+ map_word status, status_OK; - unsigned long cmd_adr, timeo; - DECLARE_WAITQUEUE(wait, current); - int wbufsize, z; - - /* M58LW064A requires bus alignment for buffer wriets -- saw */ -- if (adr & (CFIDEV_BUSWIDTH-1)) -+ if (adr & (map_bankwidth(map)-1)) - return -EINVAL; - -- wbufsize = CFIDEV_INTERLEAVE << cfi->cfiq->MaxBufWriteSize; -+ wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize; - adr += chip->start; - cmd_adr = adr & ~(wbufsize-1); - -@@ -476,21 +449,21 @@ - - case FL_CFI_QUERY: - case FL_JEDEC_QUERY: -- cfi_write(map, CMD(0x70), cmd_adr); -+ map_write(map, CMD(0x70), cmd_adr); - chip->state = FL_STATUS; - #ifdef DEBUG_CFI_FEATURES -- printk("%s: 1 status[%x]\n", __FUNCTION__, cfi_read(map, cmd_adr)); -+ printk("%s: 1 status[%x]\n", __FUNCTION__, map_read(map, cmd_adr)); - #endif - - case FL_STATUS: -- status = cfi_read(map, cmd_adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, cmd_adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - /* Urgh. Chip not yet ready to talk to us. */ - if (time_after(jiffies, timeo)) { - spin_unlock_bh(chip->mutex); -- printk(KERN_ERR "waiting for chip to be ready timed out in buffer write Xstatus = %x, status = %x\n", -- status, cfi_read(map, cmd_adr)); -+ printk(KERN_ERR "waiting for chip to be ready timed out in buffer write Xstatus = %lx, status = %lx\n", -+ status.x[0], map_read(map, cmd_adr).x[0]); - return -EIO; - } - -@@ -512,13 +485,13 @@ - } - - ENABLE_VPP(map); -- cfi_write(map, CMD(0xe8), cmd_adr); -+ map_write(map, CMD(0xe8), cmd_adr); - chip->state = FL_WRITING_TO_BUFFER; - - z = 0; - for (;;) { -- status = cfi_read(map, cmd_adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, cmd_adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - spin_unlock_bh(chip->mutex); -@@ -528,41 +501,26 @@ - if (++z > 100) { - /* Argh. Not ready for write to buffer */ - DISABLE_VPP(map); -- cfi_write(map, CMD(0x70), cmd_adr); -+ map_write(map, CMD(0x70), cmd_adr); - chip->state = FL_STATUS; - spin_unlock_bh(chip->mutex); -- printk(KERN_ERR "Chip not ready for buffer write. Xstatus = %x\n", status); -+ printk(KERN_ERR "Chip not ready for buffer write. Xstatus = %lx\n", status.x[0]); - return -EIO; - } - } - - /* Write length of data to come */ -- cfi_write(map, CMD(len/CFIDEV_BUSWIDTH-1), cmd_adr ); -+ map_write(map, CMD(len/map_bankwidth(map)-1), cmd_adr ); - - /* Write data */ -- for (z = 0; z < len; z += CFIDEV_BUSWIDTH) { -- if (cfi_buswidth_is_1()) { -- u8 *b = (u8 *)buf; -- -- map_write8 (map, *b++, adr+z); -- buf = (const u_char *)b; -- } else if (cfi_buswidth_is_2()) { -- u16 *b = (u16 *)buf; -- -- map_write16 (map, *b++, adr+z); -- buf = (const u_char *)b; -- } else if (cfi_buswidth_is_4()) { -- u32 *b = (u32 *)buf; -- -- map_write32 (map, *b++, adr+z); -- buf = (const u_char *)b; -- } else { -- DISABLE_VPP(map); -- return -EINVAL; -- } -+ for (z = 0; z < len; -+ z += map_bankwidth(map), buf += map_bankwidth(map)) { -+ map_word d; -+ d = map_word_load(map, buf); -+ map_write(map, d, adr+z); - } - /* GO GO GO */ -- cfi_write(map, CMD(0xd0), cmd_adr); -+ map_write(map, CMD(0xd0), cmd_adr); - chip->state = FL_WRITING; - - spin_unlock_bh(chip->mutex); -@@ -584,16 +542,16 @@ - continue; - } - -- status = cfi_read(map, cmd_adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, cmd_adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - /* OK Still waiting */ - if (time_after(jiffies, timeo)) { - /* clear status */ -- cfi_write(map, CMD(0x50), cmd_adr); -+ map_write(map, CMD(0x50), cmd_adr); - /* put back into read status register mode */ -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->state = FL_STATUS; - DISABLE_VPP(map); - spin_unlock_bh(chip->mutex); -@@ -620,19 +578,18 @@ - chip->state = FL_STATUS; - - /* check for errors: 'lock bit', 'VPP', 'dead cell'/'unerased cell' or 'incorrect cmd' -- saw */ -- if ((status & CMD(0x02)) || (status & CMD(0x08)) || -- (status & CMD(0x10)) || (status & CMD(0x20))) { -+ if (map_word_bitsset(map, status, CMD(0x3a))) { - #ifdef DEBUG_CFI_FEATURES -- printk("%s: 2 status[%x]\n", __FUNCTION__, status); -+ printk("%s: 2 status[%lx]\n", __FUNCTION__, status.x[0]); - #endif -- /* clear status */ -- cfi_write(map, CMD(0x50), cmd_adr); -- /* put back into read status register mode */ -- cfi_write(map, CMD(0x70), adr); -- wake_up(&chip->wq); -- spin_unlock_bh(chip->mutex); -- return (status & CMD(0x02)) ? -EROFS : -EIO; -- } -+ /* clear status */ -+ map_write(map, CMD(0x50), cmd_adr); -+ /* put back into read status register mode */ -+ map_write(map, CMD(0x70), adr); -+ wake_up(&chip->wq); -+ spin_unlock_bh(chip->mutex); -+ return map_word_bitsset(map, status, CMD(0x02)) ? -EROFS : -EIO; -+ } - wake_up(&chip->wq); - spin_unlock_bh(chip->mutex); - -@@ -644,7 +601,7 @@ - { - struct map_info *map = mtd->priv; - struct cfi_private *cfi = map->fldrv_priv; -- int wbufsize = CFIDEV_INTERLEAVE << cfi->cfiq->MaxBufWriteSize; -+ int wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize; - int ret = 0; - int chipnum; - unsigned long ofs; -@@ -657,7 +614,7 @@ - ofs = to - (chipnum << cfi->chipshift); - - #ifdef DEBUG_CFI_FEATURES -- printk("%s: CFIDEV_BUSWIDTH[%x]\n", __FUNCTION__, CFIDEV_BUSWIDTH); -+ printk("%s: map_bankwidth(map)[%x]\n", __FUNCTION__, map_bankwidth(map)); - printk("%s: chipnum[%x] wbufsize[%x]\n", __FUNCTION__, chipnum, wbufsize); - printk("%s: ofs[%x] len[%x]\n", __FUNCTION__, ofs, len); - #endif -@@ -700,7 +657,7 @@ - #define ECCBUF_DIV(x) ((x) & ~(ECCBUF_SIZE - 1)) - #define ECCBUF_MOD(x) ((x) & (ECCBUF_SIZE - 1)) - static int --cfi_staa_writev(struct mtd_info *mtd, const struct iovec *vecs, -+cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs, - unsigned long count, loff_t to, size_t *retlen) - { - unsigned long i; -@@ -769,7 +726,7 @@ - static inline int do_erase_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr) - { - struct cfi_private *cfi = map->fldrv_priv; -- __u32 status, status_OK; -+ map_word status, status_OK; - unsigned long timeo; - int retries = 3; - DECLARE_WAITQUEUE(wait, current); -@@ -789,12 +746,12 @@ - case FL_CFI_QUERY: - case FL_JEDEC_QUERY: - case FL_READY: -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->state = FL_STATUS; - - case FL_STATUS: -- status = cfi_read(map, adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - /* Urgh. Chip not yet ready to talk to us. */ -@@ -823,11 +780,11 @@ - - ENABLE_VPP(map); - /* Clear the status register first */ -- cfi_write(map, CMD(0x50), adr); -+ map_write(map, CMD(0x50), adr); - - /* Now erase */ -- cfi_write(map, CMD(0x20), adr); -- cfi_write(map, CMD(0xD0), adr); -+ map_write(map, CMD(0x20), adr); -+ map_write(map, CMD(0xD0), adr); - chip->state = FL_ERASING; - - spin_unlock_bh(chip->mutex); -@@ -851,15 +808,15 @@ - continue; - } - -- status = cfi_read(map, adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - /* OK Still waiting */ - if (time_after(jiffies, timeo)) { -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->state = FL_STATUS; -- printk(KERN_ERR "waiting for erase to complete timed out. Xstatus = %x, status = %x.\n", status, cfi_read(map, adr)); -+ printk(KERN_ERR "waiting for erase to complete timed out. Xstatus = %lx, status = %lx.\n", status.x[0], map_read(map, adr).x[0]); - DISABLE_VPP(map); - spin_unlock_bh(chip->mutex); - return -EIO; -@@ -875,43 +832,46 @@ - ret = 0; - - /* We've broken this before. It doesn't hurt to be safe */ -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->state = FL_STATUS; -- status = cfi_read(map, adr); -+ status = map_read(map, adr); - - /* check for lock bit */ -- if (status & CMD(0x3a)) { -- unsigned char chipstatus = status; -- if (status != CMD(status & 0xff)) { -- int i; -- for (i = 1; i<CFIDEV_INTERLEAVE; i++) { -- chipstatus |= status >> (cfi->device_type * 8); -+ if (map_word_bitsset(map, status, CMD(0x3a))) { -+ unsigned char chipstatus = status.x[0]; -+ if (!map_word_equal(map, status, CMD(chipstatus))) { -+ int i, w; -+ for (w=0; w<map_words(map); w++) { -+ for (i = 0; i<cfi_interleave(cfi); i++) { -+ chipstatus |= status.x[w] >> (cfi->device_type * 8); -+ } - } -- printk(KERN_WARNING "Status is not identical for all chips: 0x%x. Merging to give 0x%02x\n", status, chipstatus); -+ printk(KERN_WARNING "Status is not identical for all chips: 0x%lx. Merging to give 0x%02x\n", -+ status.x[0], chipstatus); - } - /* Reset the error bits */ -- cfi_write(map, CMD(0x50), adr); -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x50), adr); -+ map_write(map, CMD(0x70), adr); - - if ((chipstatus & 0x30) == 0x30) { -- printk(KERN_NOTICE "Chip reports improper command sequence: status 0x%x\n", status); -+ printk(KERN_NOTICE "Chip reports improper command sequence: status 0x%x\n", chipstatus); - ret = -EIO; - } else if (chipstatus & 0x02) { - /* Protection bit set */ - ret = -EROFS; - } else if (chipstatus & 0x8) { - /* Voltage */ -- printk(KERN_WARNING "Chip reports voltage low on erase: status 0x%x\n", status); -+ printk(KERN_WARNING "Chip reports voltage low on erase: status 0x%x\n", chipstatus); - ret = -EIO; - } else if (chipstatus & 0x20) { - if (retries--) { -- printk(KERN_DEBUG "Chip erase failed at 0x%08lx: status 0x%x. Retrying...\n", adr, status); -+ printk(KERN_DEBUG "Chip erase failed at 0x%08lx: status 0x%x. Retrying...\n", adr, chipstatus); - timeo = jiffies + HZ; - chip->state = FL_STATUS; - spin_unlock_bh(chip->mutex); - goto retry; - } -- printk(KERN_DEBUG "Chip erase failed at 0x%08lx: status 0x%x\n", adr, status); -+ printk(KERN_DEBUG "Chip erase failed at 0x%08lx: status 0x%x\n", adr, chipstatus); - ret = -EIO; - } - } -@@ -1006,8 +966,7 @@ - } - - instr->state = MTD_ERASE_DONE; -- if (instr->callback) -- instr->callback(instr); -+ mtd_erase_callback(instr); - - return 0; - } -@@ -1072,7 +1031,7 @@ - static inline int do_lock_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr) - { - struct cfi_private *cfi = map->fldrv_priv; -- __u32 status, status_OK; -+ map_word status, status_OK; - unsigned long timeo = jiffies + HZ; - DECLARE_WAITQUEUE(wait, current); - -@@ -1090,12 +1049,12 @@ - case FL_CFI_QUERY: - case FL_JEDEC_QUERY: - case FL_READY: -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->state = FL_STATUS; - - case FL_STATUS: -- status = cfi_read(map, adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - /* Urgh. Chip not yet ready to talk to us. */ -@@ -1123,8 +1082,8 @@ - } - - ENABLE_VPP(map); -- cfi_write(map, CMD(0x60), adr); -- cfi_write(map, CMD(0x01), adr); -+ map_write(map, CMD(0x60), adr); -+ map_write(map, CMD(0x01), adr); - chip->state = FL_LOCKING; - - spin_unlock_bh(chip->mutex); -@@ -1137,15 +1096,15 @@ - timeo = jiffies + (HZ*2); - for (;;) { - -- status = cfi_read(map, adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - /* OK Still waiting */ - if (time_after(jiffies, timeo)) { -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->state = FL_STATUS; -- printk(KERN_ERR "waiting for lock to complete timed out. Xstatus = %x, status = %x.\n", status, cfi_read(map, adr)); -+ printk(KERN_ERR "waiting for lock to complete timed out. Xstatus = %lx, status = %lx.\n", status.x[0], map_read(map, adr).x[0]); - DISABLE_VPP(map); - spin_unlock_bh(chip->mutex); - return -EIO; -@@ -1221,7 +1180,7 @@ - static inline int do_unlock_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr) - { - struct cfi_private *cfi = map->fldrv_priv; -- __u32 status, status_OK; -+ map_word status, status_OK; - unsigned long timeo = jiffies + HZ; - DECLARE_WAITQUEUE(wait, current); - -@@ -1239,12 +1198,12 @@ - case FL_CFI_QUERY: - case FL_JEDEC_QUERY: - case FL_READY: -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->state = FL_STATUS; - - case FL_STATUS: -- status = cfi_read(map, adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - /* Urgh. Chip not yet ready to talk to us. */ -@@ -1272,8 +1231,8 @@ - } - - ENABLE_VPP(map); -- cfi_write(map, CMD(0x60), adr); -- cfi_write(map, CMD(0xD0), adr); -+ map_write(map, CMD(0x60), adr); -+ map_write(map, CMD(0xD0), adr); - chip->state = FL_UNLOCKING; - - spin_unlock_bh(chip->mutex); -@@ -1286,15 +1245,15 @@ - timeo = jiffies + (HZ*2); - for (;;) { - -- status = cfi_read(map, adr); -- if ((status & status_OK) == status_OK) -+ status = map_read(map, adr); -+ if (map_word_andequal(map, status, status_OK, status_OK)) - break; - - /* OK Still waiting */ - if (time_after(jiffies, timeo)) { -- cfi_write(map, CMD(0x70), adr); -+ map_write(map, CMD(0x70), adr); - chip->state = FL_STATUS; -- printk(KERN_ERR "waiting for unlock to complete timed out. Xstatus = %x, status = %x.\n", status, cfi_read(map, adr)); -+ printk(KERN_ERR "waiting for unlock to complete timed out. Xstatus = %lx, status = %lx.\n", status.x[0], map_read(map, adr).x[0]); - DISABLE_VPP(map); - spin_unlock_bh(chip->mutex); - return -EIO; -@@ -1423,7 +1382,7 @@ - - /* Go to known state. Chip may have been power cycled */ - if (chip->state == FL_PM_SUSPENDED) { -- cfi_write(map, CMD(0xFF), 0); -+ map_write(map, CMD(0xFF), 0); - chip->state = FL_READY; - wake_up(&chip->wq); - } -@@ -1440,11 +1399,6 @@ - kfree(cfi); - } - --#if LINUX_VERSION_CODE < 0x20212 && defined(MODULE) --#define cfi_staa_init init_module --#define cfi_staa_exit cleanup_module --#endif -- - static char im_name[]="cfi_cmdset_0020"; - - int __init cfi_staa_init(void) -Index: linux-2.6.5/drivers/mtd/chips/cfi_probe.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/cfi_probe.c 2004-04-03 22:37:40.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/cfi_probe.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,7 +1,7 @@ - /* - Common Flash Interface probe code. - (C) 2000 Red Hat. GPL'd. -- $Id: cfi_probe.c,v 1.71 2003/05/28 12:51:48 dwmw2 Exp $ -+ $Id: cfi_probe.c,v 1.77 2004/07/14 08:38:44 dwmw2 Exp $ - */ - - #include <linux/config.h> -@@ -26,7 +26,7 @@ - #endif - - static int cfi_probe_chip(struct map_info *map, __u32 base, -- struct flchip *chips, struct cfi_private *cfi); -+ unsigned long *chip_map, struct cfi_private *cfi); - static int cfi_chip_setup(struct map_info *map, struct cfi_private *cfi); - - struct mtd_info *cfi_probe(struct map_info *map); -@@ -35,21 +35,36 @@ - in: interleave,type,mode - ret: table index, <0 for error - */ --static inline int qry_present(struct map_info *map, __u32 base, -+static int qry_present(struct map_info *map, __u32 base, - struct cfi_private *cfi) - { - int osf = cfi->interleave * cfi->device_type; // scale factor -+ map_word val; -+ map_word qry; - -- if (cfi_read(map,base+osf*0x10)==cfi_build_cmd('Q',map,cfi) && -- cfi_read(map,base+osf*0x11)==cfi_build_cmd('R',map,cfi) && -- cfi_read(map,base+osf*0x12)==cfi_build_cmd('Y',map,cfi)) -- return 1; // ok ! -+ qry = cfi_build_cmd('Q', map, cfi); -+ val = map_read(map, base + osf*0x10); - -- return 0; // nothing found -+ if (!map_word_equal(map, qry, val)) -+ return 0; -+ -+ qry = cfi_build_cmd('R', map, cfi); -+ val = map_read(map, base + osf*0x11); -+ -+ if (!map_word_equal(map, qry, val)) -+ return 0; -+ -+ qry = cfi_build_cmd('Y', map, cfi); -+ val = map_read(map, base + osf*0x12); -+ -+ if (!map_word_equal(map, qry, val)) -+ return 0; -+ -+ return 1; // nothing found - } - - static int cfi_probe_chip(struct map_info *map, __u32 base, -- struct flchip *chips, struct cfi_private *cfi) -+ unsigned long *chip_map, struct cfi_private *cfi) - { - int i; - -@@ -66,6 +81,7 @@ - return 0; - } - cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); -+ cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); - cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); - - if (!qry_present(map,base,cfi)) -@@ -78,18 +94,25 @@ - } - - /* Check each previous chip to see if it's an alias */ -- for (i=0; i<cfi->numchips; i++) { -+ for (i=0; i < (base >> cfi->chipshift); i++) { -+ unsigned long start; -+ if(!test_bit(i, chip_map)) { -+ /* Skip location; no valid chip at this address */ -+ continue; -+ } -+ start = i << cfi->chipshift; - /* This chip should be in read mode if it's one - we've already touched. */ -- if (qry_present(map,chips[i].start,cfi)) { -+ if (qry_present(map, start, cfi)) { - /* Eep. This chip also had the QRY marker. - * Is it an alias for the new one? */ -- cfi_send_gen_cmd(0xF0, 0, chips[i].start, map, cfi, cfi->device_type, NULL); -+ cfi_send_gen_cmd(0xF0, 0, start, map, cfi, cfi->device_type, NULL); -+ cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL); - - /* If the QRY marker goes away, it's an alias */ -- if (!qry_present(map, chips[i].start, cfi)) { -+ if (!qry_present(map, start, cfi)) { - printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n", -- map->name, base, chips[i].start); -+ map->name, base, start); - return 0; - } - /* Yes, it's actually got QRY for data. Most -@@ -97,10 +120,11 @@ - * too and if it's the same, assume it's an alias. */ - /* FIXME: Use other modes to do a proper check */ - cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); -+ cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL); - - if (qry_present(map, base, cfi)) { - printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n", -- map->name, base, chips[i].start); -+ map->name, base, start); - return 0; - } - } -@@ -108,21 +132,16 @@ - - /* OK, if we got to here, then none of the previous chips appear to - be aliases for the current one. */ -- if (cfi->numchips == MAX_CFI_CHIPS) { -- printk(KERN_WARNING"%s: Too many flash chips detected. Increase MAX_CFI_CHIPS from %d.\n", map->name, MAX_CFI_CHIPS); -- /* Doesn't matter about resetting it to Read Mode - we're not going to talk to it anyway */ -- return -1; -- } -- chips[cfi->numchips].start = base; -- chips[cfi->numchips].state = FL_READY; -+ set_bit((base >> cfi->chipshift), chip_map); /* Update chip map */ - cfi->numchips++; - - /* Put it back into Read Mode */ - cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); -+ cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); - -- printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit mode\n", -+ printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit bank\n", - map->name, cfi->interleave, cfi->device_type*8, base, -- map->buswidth*8); -+ map->bankwidth*8); - - return 1; - } -@@ -150,7 +169,6 @@ - memset(cfi->cfiq,0,sizeof(struct cfi_ident)); - - cfi->cfi_mode = CFI_MODE_CFI; -- cfi->fast_prog=1; /* CFI supports fast programming */ - - /* Read the CFI info structure */ - for (i=0; i<(sizeof(struct cfi_ident) + num_erase_regions * 4); i++) { -@@ -180,8 +198,29 @@ - (cfi->cfiq->EraseRegionInfo[i] & 0xffff) + 1); - #endif - } -+ -+ /* Note we put the device back into Read Mode BEFORE going into Auto -+ * Select Mode, as some devices support nesting of modes, others -+ * don't. This way should always work. -+ * On cmdset 0001 the writes of 0xaa and 0x55 are not needed, and -+ * so should be treated as nops or illegal (and so put the device -+ * back into Read Mode, which is a nop in this case). -+ */ -+ cfi_send_gen_cmd(0xf0, 0, base, map, cfi, cfi->device_type, NULL); -+ cfi_send_gen_cmd(0xaa, 0x555, base, map, cfi, cfi->device_type, NULL); -+ cfi_send_gen_cmd(0x55, 0x2aa, base, map, cfi, cfi->device_type, NULL); -+ cfi_send_gen_cmd(0x90, 0x555, base, map, cfi, cfi->device_type, NULL); -+ cfi->mfr = cfi_read_query(map, base); -+ cfi->id = cfi_read_query(map, base + ofs_factor); -+ - /* Put it back into Read Mode */ - cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); -+ /* ... even if it's an Intel chip */ -+ cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); -+ -+ printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit bank\n", -+ map->name, cfi->interleave, cfi->device_type*8, base, -+ map->bankwidth*8); - - return 1; - } -@@ -241,11 +280,11 @@ - printk("No Alternate Algorithm Table\n"); - - -- printk("Vcc Minimum: %x.%x V\n", cfip->VccMin >> 4, cfip->VccMin & 0xf); -- printk("Vcc Maximum: %x.%x V\n", cfip->VccMax >> 4, cfip->VccMax & 0xf); -+ printk("Vcc Minimum: %2d.%d V\n", cfip->VccMin >> 4, cfip->VccMin & 0xf); -+ printk("Vcc Maximum: %2d.%d V\n", cfip->VccMax >> 4, cfip->VccMax & 0xf); - if (cfip->VppMin) { -- printk("Vpp Minimum: %x.%x V\n", cfip->VppMin >> 4, cfip->VppMin & 0xf); -- printk("Vpp Maximum: %x.%x V\n", cfip->VppMax >> 4, cfip->VppMax & 0xf); -+ printk("Vpp Minimum: %2d.%d V\n", cfip->VppMin >> 4, cfip->VppMin & 0xf); -+ printk("Vpp Maximum: %2d.%d V\n", cfip->VppMax >> 4, cfip->VppMax & 0xf); - } - else - printk("No Vpp line\n"); -Index: linux-2.6.5/drivers/mtd/chips/cfi_util.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/cfi_util.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/cfi_util.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,188 @@ -+/* -+ * Common Flash Interface support: -+ * Generic utility functions not dependant on command set -+ * -+ * Copyright (C) 2002 Red Hat -+ * Copyright (C) 2003 STMicroelectronics Limited -+ * -+ * This code is covered by the GPL. -+ * -+ * $Id: cfi_util.c,v 1.5 2004/08/12 06:40:23 eric Exp $ -+ * -+ */ -+ -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <linux/sched.h> -+#include <asm/io.h> -+#include <asm/byteorder.h> -+ -+#include <linux/errno.h> -+#include <linux/slab.h> -+#include <linux/delay.h> -+#include <linux/interrupt.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/cfi.h> -+#include <linux/mtd/compatmac.h> -+ -+struct cfi_extquery * -+cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* name) -+{ -+ struct cfi_private *cfi = map->fldrv_priv; -+ __u32 base = 0; // cfi->chips[0].start; -+ int ofs_factor = cfi->interleave * cfi->device_type; -+ int i; -+ struct cfi_extquery *extp = NULL; -+ -+ printk(" %s Extended Query Table at 0x%4.4X\n", name, adr); -+ if (!adr) -+ goto out; -+ -+ /* Switch it into Query Mode */ -+ cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); -+ -+ extp = kmalloc(size, GFP_KERNEL); -+ if (!extp) { -+ printk(KERN_ERR "Failed to allocate memory\n"); -+ goto out; -+ } -+ -+ /* Read in the Extended Query Table */ -+ for (i=0; i<size; i++) { -+ ((unsigned char *)extp)[i] = -+ cfi_read_query(map, base+((adr+i)*ofs_factor)); -+ } -+ -+ if (extp->MajorVersion != '1' || -+ (extp->MinorVersion < '0' || extp->MinorVersion > '3')) { -+ printk(KERN_WARNING " Unknown %s Extended Query " -+ "version %c.%c.\n", name, extp->MajorVersion, -+ extp->MinorVersion); -+ kfree(extp); -+ extp = NULL; -+ goto out; -+ } -+ -+out: -+ /* Make sure it's in read mode */ -+ cfi_send_gen_cmd(0xf0, 0, base, map, cfi, cfi->device_type, NULL); -+ cfi_send_gen_cmd(0xff, 0, base, map, cfi, cfi->device_type, NULL); -+ -+ return extp; -+} -+ -+EXPORT_SYMBOL(cfi_read_pri); -+ -+void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup *fixups) -+{ -+ struct map_info *map = mtd->priv; -+ struct cfi_private *cfi = map->fldrv_priv; -+ struct cfi_fixup *f; -+ -+ for (f=fixups; f->fixup; f++) { -+ if (((f->mfr == CFI_MFR_ANY) || (f->mfr == cfi->mfr)) && -+ ((f->id == CFI_ID_ANY) || (f->id == cfi->id))) { -+ f->fixup(mtd, f->param); -+ } -+ } -+} -+ -+EXPORT_SYMBOL(cfi_fixup); -+ -+int cfi_varsize_frob(struct mtd_info *mtd, varsize_frob_t frob, -+ loff_t ofs, size_t len, void *thunk) -+{ -+ struct map_info *map = mtd->priv; -+ struct cfi_private *cfi = map->fldrv_priv; -+ unsigned long adr; -+ int chipnum, ret = 0; -+ int i, first; -+ struct mtd_erase_region_info *regions = mtd->eraseregions; -+ -+ if (ofs > mtd->size) -+ return -EINVAL; -+ -+ if ((len + ofs) > mtd->size) -+ return -EINVAL; -+ -+ /* Check that both start and end of the requested erase are -+ * aligned with the erasesize at the appropriate addresses. -+ */ -+ -+ i = 0; -+ -+ /* Skip all erase regions which are ended before the start of -+ the requested erase. Actually, to save on the calculations, -+ we skip to the first erase region which starts after the -+ start of the requested erase, and then go back one. -+ */ -+ -+ while (i < mtd->numeraseregions && ofs >= regions[i].offset) -+ i++; -+ i--; -+ -+ /* OK, now i is pointing at the erase region in which this -+ erase request starts. Check the start of the requested -+ erase range is aligned with the erase size which is in -+ effect here. -+ */ -+ -+ if (ofs & (regions[i].erasesize-1)) -+ return -EINVAL; -+ -+ /* Remember the erase region we start on */ -+ first = i; -+ -+ /* Next, check that the end of the requested erase is aligned -+ * with the erase region at that address. -+ */ -+ -+ while (i<mtd->numeraseregions && (ofs + len) >= regions[i].offset) -+ i++; -+ -+ /* As before, drop back one to point at the region in which -+ the address actually falls -+ */ -+ i--; -+ -+ if ((ofs + len) & (regions[i].erasesize-1)) -+ return -EINVAL; -+ -+ chipnum = ofs >> cfi->chipshift; -+ adr = ofs - (chipnum << cfi->chipshift); -+ -+ i=first; -+ -+ while(len) { -+ unsigned long chipmask; -+ int size = regions[i].erasesize; -+ -+ ret = (*frob)(map, &cfi->chips[chipnum], adr, size, thunk); -+ -+ if (ret) -+ return ret; -+ -+ adr += size; -+ len -= size; -+ -+ chipmask = (1 << cfi->chipshift) - 1; -+ if ((adr & chipmask) == ((regions[i].offset + size * regions[i].numblocks) & chipmask)) -+ i++; -+ -+ if (adr >> cfi->chipshift) { -+ adr = 0; -+ chipnum++; -+ -+ if (chipnum >= cfi->numchips) -+ break; -+ } -+ } -+ -+ return 0; -+} -+ -+EXPORT_SYMBOL(cfi_varsize_frob); -+ -+MODULE_LICENSE("GPL"); -Index: linux-2.6.5/drivers/mtd/chips/chipreg.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/chipreg.c 2004-04-03 22:36:56.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/chipreg.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: chipreg.c,v 1.15 2003/05/21 15:15:05 dwmw2 Exp $ -+ * $Id: chipreg.c,v 1.16 2003/05/29 09:36:15 dwmw2 Exp $ - * - * Registration for chip drivers - * -Index: linux-2.6.5/drivers/mtd/chips/fwh_lock.h -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/fwh_lock.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/fwh_lock.h 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,107 @@ -+#ifndef FWH_LOCK_H -+#define FWH_LOCK_H -+ -+ -+enum fwh_lock_state { -+ FWH_UNLOCKED = 0, -+ FWH_DENY_WRITE = 1, -+ FWH_IMMUTABLE = 2, -+ FWH_DENY_READ = 4, -+}; -+ -+struct fwh_xxlock_thunk { -+ enum fwh_lock_state val; -+ flstate_t state; -+}; -+ -+ -+#define FWH_XXLOCK_ONEBLOCK_LOCK ((struct fwh_xxlock_thunk){ FWH_DENY_WRITE, FL_LOCKING}) -+#define FWH_XXLOCK_ONEBLOCK_UNLOCK ((struct fwh_xxlock_thunk){ FWH_UNLOCKED, FL_UNLOCKING}) -+ -+/* -+ * This locking/unlock is specific to firmware hub parts. Only one -+ * is known that supports the Intel command set. Firmware -+ * hub parts cannot be interleaved as they are on the LPC bus -+ * so this code has not been tested with interleaved chips, -+ * and will likely fail in that context. -+ */ -+static int fwh_xxlock_oneblock(struct map_info *map, struct flchip *chip, -+ unsigned long adr, int len, void *thunk) -+{ -+ struct cfi_private *cfi = map->fldrv_priv; -+ struct fwh_xxlock_thunk *xxlt = (struct fwh_xxlock_thunk *)thunk; -+ int ret; -+ -+ /* Refuse the operation if the we cannot look behind the chip */ -+ if (chip->start < 0x400000) { -+ DEBUG( MTD_DEBUG_LEVEL3, -+ "MTD %s(): chip->start: %lx wanted >= 0x400000\n", -+ __func__, chip->start ); -+ return -EIO; -+ } -+ /* -+ * lock block registers: -+ * - on 64k boundariesand -+ * - bit 1 set high -+ * - block lock registers are 4MiB lower - overflow subtract (danger) -+ * -+ * The address manipulation is first done on the logical address -+ * which is 0 at the start of the chip, and then the offset of -+ * the individual chip is addted to it. Any other order a weird -+ * map offset could cause problems. -+ */ -+ adr = (adr & ~0xffffUL) | 0x2; -+ adr += chip->start - 0x400000; -+ -+ /* -+ * This is easy because these are writes to registers and not writes -+ * to flash memory - that means that we don't have to check status -+ * and timeout. -+ */ -+ cfi_spin_lock(chip->mutex); -+ ret = get_chip(map, chip, adr, FL_LOCKING); -+ if (ret) { -+ cfi_spin_unlock(chip->mutex); -+ return ret; -+ } -+ -+ chip->state = xxlt->state; -+ map_write(map, CMD(xxlt->val), adr); -+ -+ /* Done and happy. */ -+ chip->state = FL_READY; -+ put_chip(map, chip, adr); -+ cfi_spin_unlock(chip->mutex); -+ return 0; -+} -+ -+ -+static int fwh_lock_varsize(struct mtd_info *mtd, loff_t ofs, size_t len) -+{ -+ int ret; -+ -+ ret = cfi_varsize_frob(mtd, fwh_xxlock_oneblock, ofs, len, -+ (void *)&FWH_XXLOCK_ONEBLOCK_LOCK); -+ -+ return ret; -+} -+ -+ -+static int fwh_unlock_varsize(struct mtd_info *mtd, loff_t ofs, size_t len) -+{ -+ int ret; -+ -+ ret = cfi_varsize_frob(mtd, fwh_xxlock_oneblock, ofs, len, -+ (void *)&FWH_XXLOCK_ONEBLOCK_UNLOCK); -+ -+ return ret; -+} -+ -+static void fixup_use_fwh_lock(struct mtd_info *mtd, void *param) -+{ -+ printk(KERN_NOTICE "using fwh lock/unlock method\n"); -+ /* Setup for the chips with the fwh lock method */ -+ mtd->lock = fwh_lock_varsize; -+ mtd->unlock = fwh_unlock_varsize; -+} -+#endif /* FWH_LOCK_H */ -Index: linux-2.6.5/drivers/mtd/chips/gen_probe.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/gen_probe.c 2005-02-01 16:55:45.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/gen_probe.c 2005-02-01 17:11:17.000000000 -0500 -@@ -2,7 +2,7 @@ - * Routines common to all CFI-type probes. - * (C) 2001-2003 Red Hat, Inc. - * GPL'd -- * $Id: gen_probe.c,v 1.13 2003/06/25 11:50:37 dwmw2 Exp $ -+ * $Id: gen_probe.c,v 1.21 2004/08/14 15:14:05 dwmw2 Exp $ - */ - - #include <linux/kernel.h> -@@ -50,24 +50,22 @@ - EXPORT_SYMBOL(mtd_do_chip_probe); - - --struct cfi_private *genprobe_ident_chips(struct map_info *map, struct chip_probe *cp) -+static struct cfi_private *genprobe_ident_chips(struct map_info *map, struct chip_probe *cp) - { -- unsigned long base=0; - struct cfi_private cfi; - struct cfi_private *retcfi; -- struct flchip chip[MAX_CFI_CHIPS]; -- int i; -+ unsigned long *chip_map; -+ int i, j, mapsize; -+ int max_chips; - - memset(&cfi, 0, sizeof(cfi)); -- memset(&chip[0], 0, sizeof(chip)); - - /* Call the probetype-specific code with all permutations of - interleave and device type, etc. */ - if (!genprobe_new_chip(map, cp, &cfi)) { - /* The probe didn't like it */ -- DEBUG(MTD_DEBUG_LEVEL3, -- "MTD %s(): %s: Found no %s device at location zero\n", -- __func__, cp->name, map->name); -+ printk(KERN_DEBUG "%s: Found no %s device at location zero\n", -+ cp->name, map->name); - return NULL; - } - -@@ -81,46 +79,47 @@ - return NULL; - } - #endif -- chip[0].start = 0; -- chip[0].state = FL_READY; - cfi.chipshift = cfi.cfiq->DevSize; - -- switch(cfi.interleave) { --#ifdef CFIDEV_INTERLEAVE_1 -- case 1: -- break; --#endif --#ifdef CFIDEV_INTERLEAVE_2 -- case 2: -+ if (cfi_interleave_is_1(&cfi)) { -+ ; -+ } else if (cfi_interleave_is_2(&cfi)) { - cfi.chipshift++; -- break; --#endif --#ifdef CFIDEV_INTERLEAVE_4 -- case 4: -- cfi.chipshift+=2; -- break; --#endif -- default: -+ } else if (cfi_interleave_is_4((&cfi))) { -+ cfi.chipshift += 2; -+ } else if (cfi_interleave_is_8(&cfi)) { -+ cfi.chipshift += 3; -+ } else { - BUG(); - } - - cfi.numchips = 1; - -+ /* -+ * Allocate memory for bitmap of valid chips. -+ * Align bitmap storage size to full byte. -+ */ -+ max_chips = map->size >> cfi.chipshift; -+ mapsize = (max_chips / 8) + ((max_chips % 8) ? 1 : 0); -+ chip_map = kmalloc(mapsize, GFP_KERNEL); -+ if (!chip_map) { -+ printk(KERN_WARNING "%s: kmalloc failed for CFI chip map\n", map->name); -+ kfree(cfi.cfiq); -+ return NULL; -+ } -+ memset (chip_map, 0, mapsize); -+ -+ set_bit(0, chip_map); /* Mark first chip valid */ -+ - /* - * Now probe for other chips, checking sensibly for aliases while - * we're at it. The new_chip probe above should have let the first - * chip in read mode. -- * -- * NOTE: Here, we're checking if there is room for another chip -- * the same size within the mapping. Therefore, -- * base + chipsize <= map->size is the correct thing to do, -- * because, base + chipsize would be the _first_ byte of the -- * next chip, not the one we're currently pondering. - */ - -- for (base = (1<<cfi.chipshift); base + (1<<cfi.chipshift) <= map->size; -- base += (1<<cfi.chipshift)) -- cp->probe_chip(map, base, &chip[0], &cfi); -+ for (i = 1; i < max_chips; i++) { -+ cp->probe_chip(map, i << cfi.chipshift, chip_map, &cfi); -+ } - - /* - * Now allocate the space for the structures we need to return to -@@ -132,19 +131,26 @@ - if (!retcfi) { - printk(KERN_WARNING "%s: kmalloc failed for CFI private structure\n", map->name); - kfree(cfi.cfiq); -+ kfree(chip_map); - return NULL; - } - - memcpy(retcfi, &cfi, sizeof(cfi)); -- memcpy(&retcfi->chips[0], chip, sizeof(struct flchip) * cfi.numchips); -+ memset(&retcfi->chips[0], 0, sizeof(struct flchip) * cfi.numchips); -+ -+ for (i = 0, j = 0; (j < cfi.numchips) && (i < max_chips); i++) { -+ if(test_bit(i, chip_map)) { -+ struct flchip *pchip = &retcfi->chips[j++]; - -- /* Fix up the stuff that breaks when you move it */ -- for (i=0; i< retcfi->numchips; i++) { -- init_waitqueue_head(&retcfi->chips[i].wq); -- spin_lock_init(&retcfi->chips[i]._spinlock); -- retcfi->chips[i].mutex = &retcfi->chips[i]._spinlock; -+ pchip->start = (i << cfi.chipshift); -+ pchip->state = FL_READY; -+ init_waitqueue_head(&pchip->wq); -+ spin_lock_init(&pchip->_spinlock); -+ pchip->mutex = &pchip->_spinlock; -+ } - } - -+ kfree(chip_map); - return retcfi; - } - -@@ -152,131 +158,31 @@ - static int genprobe_new_chip(struct map_info *map, struct chip_probe *cp, - struct cfi_private *cfi) - { -- switch (map->buswidth) { --#ifdef CFIDEV_BUSWIDTH_1 -- case CFIDEV_BUSWIDTH_1: -- cfi->interleave = CFIDEV_INTERLEAVE_1; -- -- cfi->device_type = CFI_DEVICETYPE_X8; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; -- -- cfi->device_type = CFI_DEVICETYPE_X16; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; -- break; --#endif /* CFIDEV_BUSWITDH_1 */ -- --#ifdef CFIDEV_BUSWIDTH_2 -- case CFIDEV_BUSWIDTH_2: --#ifdef CFIDEV_INTERLEAVE_1 -- cfi->interleave = CFIDEV_INTERLEAVE_1; -- -- cfi->device_type = CFI_DEVICETYPE_X16; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; --#endif /* CFIDEV_INTERLEAVE_1 */ --#ifdef CFIDEV_INTERLEAVE_2 -- cfi->interleave = CFIDEV_INTERLEAVE_2; -- -- cfi->device_type = CFI_DEVICETYPE_X8; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; -- -- cfi->device_type = CFI_DEVICETYPE_X16; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; --#endif /* CFIDEV_INTERLEAVE_2 */ -- break; --#endif /* CFIDEV_BUSWIDTH_2 */ -- --#ifdef CFIDEV_BUSWIDTH_4 -- case CFIDEV_BUSWIDTH_4: --#if defined(CFIDEV_INTERLEAVE_1) && defined(SOMEONE_ACTUALLY_MAKES_THESE) -- cfi->interleave = CFIDEV_INTERLEAVE_1; -- -- cfi->device_type = CFI_DEVICETYPE_X32; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; --#endif /* CFIDEV_INTERLEAVE_1 */ --#ifdef CFIDEV_INTERLEAVE_2 -- cfi->interleave = CFIDEV_INTERLEAVE_2; -- --#ifdef SOMEONE_ACTUALLY_MAKES_THESE -- cfi->device_type = CFI_DEVICETYPE_X32; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; --#endif -- cfi->device_type = CFI_DEVICETYPE_X16; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; -- -- cfi->device_type = CFI_DEVICETYPE_X8; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; --#endif /* CFIDEV_INTERLEAVE_2 */ --#ifdef CFIDEV_INTERLEAVE_4 -- cfi->interleave = CFIDEV_INTERLEAVE_4; -- --#ifdef SOMEONE_ACTUALLY_MAKES_THESE -- cfi->device_type = CFI_DEVICETYPE_X32; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; --#endif -- cfi->device_type = CFI_DEVICETYPE_X16; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; -- -- cfi->device_type = CFI_DEVICETYPE_X8; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; --#endif /* CFIDEV_INTERLEAVE_4 */ -- break; --#endif /* CFIDEV_BUSWIDTH_4 */ -- --#ifdef CFIDEV_BUSWIDTH_8 -- case CFIDEV_BUSWIDTH_8: --#if defined(CFIDEV_INTERLEAVE_2) && defined(SOMEONE_ACTUALLY_MAKES_THESE) -- cfi->interleave = CFIDEV_INTERLEAVE_2; -- -- cfi->device_type = CFI_DEVICETYPE_X32; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; --#endif /* CFIDEV_INTERLEAVE_2 */ --#ifdef CFIDEV_INTERLEAVE_4 -- cfi->interleave = CFIDEV_INTERLEAVE_4; -- --#ifdef SOMEONE_ACTUALLY_MAKES_THESE -- cfi->device_type = CFI_DEVICETYPE_X32; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; --#endif -- cfi->device_type = CFI_DEVICETYPE_X16; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; --#endif /* CFIDEV_INTERLEAVE_4 */ --#ifdef CFIDEV_INTERLEAVE_8 -- cfi->interleave = CFIDEV_INTERLEAVE_8; -- -- cfi->device_type = CFI_DEVICETYPE_X16; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; -- -- cfi->device_type = CFI_DEVICETYPE_X8; -- if (cp->probe_chip(map, 0, NULL, cfi)) -- return 1; --#endif /* CFIDEV_INTERLEAVE_8 */ -- break; --#endif /* CFIDEV_BUSWIDTH_8 */ -- -- default: -- printk(KERN_WARNING "genprobe_new_chip called with unsupported buswidth %d\n", map->buswidth); -- return 0; -+ int min_chips = (map_bankwidth(map)/4?:1); /* At most 4-bytes wide. */ -+ int max_chips = map_bankwidth(map); /* And minimum 1 */ -+ int nr_chips, type; -+ -+ for (nr_chips = min_chips; nr_chips <= max_chips; nr_chips <<= 1) { -+ -+ if (!cfi_interleave_supported(nr_chips)) -+ continue; -+ -+ cfi->interleave = nr_chips; -+ -+ /* Minimum device size. Don't look for one 8-bit device -+ in a 16-bit bus, etc. */ -+ type = map_bankwidth(map) / nr_chips; -+ -+ for (; type <= CFI_DEVICETYPE_X32; type<<=1) { -+ cfi->device_type = type; -+ -+ if (cp->probe_chip(map, 0, NULL, cfi)) -+ return 1; -+ } - } - return 0; - } - -- - typedef struct mtd_info *cfi_cmdset_fn_t(struct map_info *, int); - - extern cfi_cmdset_fn_t cfi_cmdset_0001; -Index: linux-2.6.5/drivers/mtd/chips/jedec.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/jedec.c 2004-04-03 22:37:23.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/jedec.c 2005-02-01 17:11:17.000000000 -0500 -@@ -11,7 +11,7 @@ - * not going to guess how to send commands to them, plus I expect they will - * all speak CFI.. - * -- * $Id: jedec.c,v 1.19 2003/05/29 09:25:23 dwmw2 Exp $ -+ * $Id: jedec.c,v 1.21 2004/08/09 13:19:43 dwmw2 Exp $ - */ - - #include <linux/init.h> -@@ -128,7 +128,7 @@ - { - printk("mtd: Increase MAX_JEDEC_CHIPS, too many banks.\n"); - kfree(MTD); -- return 0; -+ return NULL; - } - - for (Base = 0; Base < map->size; Base += my_bank_size) -@@ -141,7 +141,7 @@ - if (jedec_probe8(map,Base,priv) == 0) { - printk("did recognize jedec chip\n"); - kfree(MTD); -- return 0; -+ return NULL; - } - } - if (map->buswidth == 2) -@@ -167,7 +167,7 @@ - { - printk("mtd: Failed. Device has incompatible mixed sector sizes\n"); - kfree(MTD); -- return 0; -+ return NULL; - } - } - -@@ -193,7 +193,7 @@ - { - printk("mtd: Internal Error, JEDEC not set\n"); - kfree(MTD); -- return 0; -+ return NULL; - } - - if (Uniq != 0) -@@ -221,7 +221,7 @@ - if (!priv->size) { - printk("priv->size is zero\n"); - kfree(MTD); -- return 0; -+ return NULL; - } - if (priv->size/my_bank_size) { - if (priv->size/my_bank_size == 1) { -@@ -240,7 +240,7 @@ - { - printk("mtd: Failed. Cannot handle unsymmetric banking\n"); - kfree(MTD); -- return 0; -+ return NULL; - } - } - } -@@ -385,7 +385,7 @@ - for (I = 0; JEDEC_table[I].jedec != 0; I++) - if (JEDEC_table[I].jedec == Id) - return JEDEC_table + I; -- return 0; -+ return NULL; - } - - // Look for flash using an 8 bit bus interface -@@ -780,8 +780,7 @@ - - //printk("done\n"); - instr->state = MTD_ERASE_DONE; -- if (instr->callback) -- instr->callback(instr); -+ mtd_erase_callback(instr); - return 0; - - #undef flread -Index: linux-2.6.5/drivers/mtd/chips/jedec_probe.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/jedec_probe.c 2005-02-01 16:55:45.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/jedec_probe.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,9 +1,11 @@ - /* - Common Flash Interface probe code. - (C) 2000 Red Hat. GPL'd. -- $Id: jedec_probe.c,v 1.29 2003/05/28 13:57:46 dwmw2 Exp $ -+ $Id: jedec_probe.c,v 1.57 2004/09/17 11:45:05 eric Exp $ - See JEDEC (http://www.jedec.org/) standard JESD21C (section 3.5) - for the standard this probe goes back to. -+ -+ Occasionally maintained by Thayne Harbaugh tharbaugh at lnxi dot com - */ - - #include <linux/config.h> -@@ -27,6 +29,7 @@ - #define MANUFACTURER_AMD 0x0001 - #define MANUFACTURER_ATMEL 0x001f - #define MANUFACTURER_FUJITSU 0x0004 -+#define MANUFACTURER_HYUNDAI 0x00AD - #define MANUFACTURER_INTEL 0x0089 - #define MANUFACTURER_MACRONIX 0x00C2 - #define MANUFACTURER_PMC 0x009D -@@ -37,8 +40,13 @@ - - - /* AMD */ -+#define AM29DL800BB 0x22C8 -+#define AM29DL800BT 0x224A -+ - #define AM29F800BB 0x2258 - #define AM29F800BT 0x22D6 -+#define AM29LV400BB 0x22BA -+#define AM29LV400BT 0x22B9 - #define AM29LV800BB 0x225B - #define AM29LV800BT 0x22DA - #define AM29LV160DT 0x22C4 -@@ -49,6 +57,7 @@ - #define AM29F040 0x00A4 - #define AM29LV040B 0x004F - #define AM29F032B 0x0041 -+#define AM29F002T 0x00B0 - - /* Atmel */ - #define AT49BV512 0x0003 -@@ -59,6 +68,7 @@ - #define AT49BV32XT 0x00C9 - - /* Fujitsu */ -+#define MBM29F040C 0x00A4 - #define MBM29LV650UE 0x22D7 - #define MBM29LV320TE 0x22F6 - #define MBM29LV320BE 0x22F9 -@@ -66,6 +76,11 @@ - #define MBM29LV160BE 0x2249 - #define MBM29LV800BA 0x225B - #define MBM29LV800TA 0x22DA -+#define MBM29LV400TC 0x22B9 -+#define MBM29LV400BC 0x22BA -+ -+/* Hyundai */ -+#define HY29F002T 0x00B0 - - /* Intel */ - #define I28F004B3T 0x00d4 -@@ -92,9 +107,11 @@ - #define I82802AC 0x00ac - - /* Macronix */ -+#define MX29LV040C 0x004F - #define MX29LV160T 0x22C4 - #define MX29LV160B 0x2249 - #define MX29F016 0x00AD -+#define MX29F002T 0x00B0 - #define MX29F004T 0x0045 - #define MX29F004B 0x0046 - -@@ -109,8 +126,14 @@ - #define M29W160DT 0x22C4 - #define M29W160DB 0x2249 - #define M29W040B 0x00E3 -+#define M50FW040 0x002C -+#define M50FW080 0x002D -+#define M50FW016 0x002E -+#define M50LPW080 0x002F - - /* SST */ -+#define SST29EE020 0x0010 -+#define SST29LE020 0x0012 - #define SST29EE512 0x005d - #define SST29LE512 0x003d - #define SST39LF800 0x2781 -@@ -121,6 +144,8 @@ - #define SST39LF040 0x00D7 - #define SST39SF010A 0x00B5 - #define SST39SF020A 0x00B6 -+#define SST49LF004B 0x0060 -+#define SST49LF008A 0x005a - #define SST49LF030A 0x001C - #define SST49LF040A 0x0051 - #define SST49LF080A 0x005B -@@ -158,8 +183,8 @@ - - - struct unlock_addr { -- int addr1; -- int addr2; -+ u32 addr1; -+ u32 addr2; - }; - - -@@ -211,11 +236,10 @@ - const __u16 dev_id; - const char *name; - const int DevSize; -- const int InterfaceDesc; - const int NumEraseRegions; - const int CmdSet; -- const __u8 uaddr[3]; /* unlock addrs for 8, 16, 32 modes */ -- const ulong regions[4]; -+ const __u8 uaddr[4]; /* unlock addrs for 8, 16, 32, 64 */ -+ const ulong regions[6]; - }; - - #define ERASEINFO(size,blocks) (size<<8)|(blocks-1) -@@ -285,6 +309,40 @@ - } - }, { - .mfr_id = MANUFACTURER_AMD, -+ .dev_id = AM29LV400BB, -+ .name = "AMD AM29LV400BB", -+ .uaddr = { -+ [0] = MTD_UADDR_0x0AAA_0x0555, /* x8 */ -+ [1] = MTD_UADDR_0x0555_0x02AA, /* x16 */ -+ }, -+ .DevSize = SIZE_512KiB, -+ .CmdSet = P_ID_AMD_STD, -+ .NumEraseRegions= 4, -+ .regions = { -+ ERASEINFO(0x04000,1), -+ ERASEINFO(0x02000,2), -+ ERASEINFO(0x08000,1), -+ ERASEINFO(0x10000,7) -+ } -+ }, { -+ .mfr_id = MANUFACTURER_AMD, -+ .dev_id = AM29LV400BT, -+ .name = "AMD AM29LV400BT", -+ .uaddr = { -+ [0] = MTD_UADDR_0x0AAA_0x0555, /* x8 */ -+ [1] = MTD_UADDR_0x0555_0x02AA, /* x16 */ -+ }, -+ .DevSize = SIZE_512KiB, -+ .CmdSet = P_ID_AMD_STD, -+ .NumEraseRegions= 4, -+ .regions = { -+ ERASEINFO(0x10000,7), -+ ERASEINFO(0x08000,1), -+ ERASEINFO(0x02000,2), -+ ERASEINFO(0x04000,1) -+ } -+ }, { -+ .mfr_id = MANUFACTURER_AMD, - .dev_id = AM29LV800BB, - .name = "AMD AM29LV800BB", - .uaddr = { -@@ -301,6 +359,45 @@ - ERASEINFO(0x10000,15), - } - }, { -+/* add DL */ -+ .mfr_id = MANUFACTURER_AMD, -+ .dev_id = AM29DL800BB, -+ .name = "AMD AM29DL800BB", -+ .uaddr = { -+ [0] = MTD_UADDR_0x0AAA_0x0555, /* x8 */ -+ [1] = MTD_UADDR_0x0555_0x02AA, /* x16 */ -+ }, -+ .DevSize = SIZE_1MiB, -+ .CmdSet = P_ID_AMD_STD, -+ .NumEraseRegions= 6, -+ .regions = { -+ ERASEINFO(0x04000,1), -+ ERASEINFO(0x08000,1), -+ ERASEINFO(0x02000,4), -+ ERASEINFO(0x08000,1), -+ ERASEINFO(0x04000,1), -+ ERASEINFO(0x10000,14) -+ } -+ }, { -+ .mfr_id = MANUFACTURER_AMD, -+ .dev_id = AM29DL800BT, -+ .name = "AMD AM29DL800BT", -+ .uaddr = { -+ [0] = MTD_UADDR_0x0AAA_0x0555, /* x8 */ -+ [1] = MTD_UADDR_0x0555_0x02AA, /* x16 */ -+ }, -+ .DevSize = SIZE_1MiB, -+ .CmdSet = P_ID_AMD_STD, -+ .NumEraseRegions= 6, -+ .regions = { -+ ERASEINFO(0x10000,14), -+ ERASEINFO(0x04000,1), -+ ERASEINFO(0x08000,1), -+ ERASEINFO(0x02000,4), -+ ERASEINFO(0x08000,1), -+ ERASEINFO(0x04000,1) -+ } -+ }, { - .mfr_id = MANUFACTURER_AMD, - .dev_id = AM29F800BB, - .name = "AMD AM29F800BB", -@@ -417,6 +514,17 @@ - ERASEINFO(0x10000,8), - } - }, { -+ mfr_id: MANUFACTURER_AMD, -+ dev_id: AM29F002T, -+ name: "AMD AM29F002T", -+ DevSize: SIZE_256KiB, -+ NumEraseRegions: 4, -+ regions: {ERASEINFO(0x10000,3), -+ ERASEINFO(0x08000,1), -+ ERASEINFO(0x02000,2), -+ ERASEINFO(0x04000,1) -+ } -+ }, { - .mfr_id = MANUFACTURER_ATMEL, - .dev_id = AT49BV512, - .name = "Atmel AT49BV512", -@@ -505,6 +613,19 @@ - } - }, { - .mfr_id = MANUFACTURER_FUJITSU, -+ .dev_id = MBM29F040C, -+ .name = "Fujitsu MBM29F040C", -+ .uaddr = { -+ [0] = MTD_UADDR_0x0AAA_0x0555, /* x8 */ -+ }, -+ .DevSize = SIZE_512KiB, -+ .CmdSet = P_ID_AMD_STD, -+ .NumEraseRegions= 1, -+ .regions = { -+ ERASEINFO(0x10000,8) -+ } -+ }, { -+ .mfr_id = MANUFACTURER_FUJITSU, - .dev_id = MBM29LV650UE, - .name = "Fujitsu MBM29LV650UE", - .uaddr = { -@@ -615,6 +736,51 @@ - ERASEINFO(0x04000,1) - } - }, { -+ .mfr_id = MANUFACTURER_FUJITSU, -+ .dev_id = MBM29LV400BC, -+ .name = "Fujitsu MBM29LV400BC", -+ .uaddr = { -+ [0] = MTD_UADDR_0x0AAA_0x0555, /* x8 */ -+ [1] = MTD_UADDR_0x0555_0x02AA, /* x16 */ -+ }, -+ .DevSize = SIZE_512KiB, -+ .CmdSet = P_ID_AMD_STD, -+ .NumEraseRegions= 4, -+ .regions = { -+ ERASEINFO(0x04000,1), -+ ERASEINFO(0x02000,2), -+ ERASEINFO(0x08000,1), -+ ERASEINFO(0x10000,7) -+ } -+ }, { -+ .mfr_id = MANUFACTURER_FUJITSU, -+ .dev_id = MBM29LV400TC, -+ .name = "Fujitsu MBM29LV400TC", -+ .uaddr = { -+ [0] = MTD_UADDR_0x0AAA_0x0555, /* x8 */ -+ [1] = MTD_UADDR_0x0555_0x02AA, /* x16 */ -+ }, -+ .DevSize = SIZE_512KiB, -+ .CmdSet = P_ID_AMD_STD, -+ .NumEraseRegions= 4, -+ .regions = { -+ ERASEINFO(0x10000,7), -+ ERASEINFO(0x08000,1), -+ ERASEINFO(0x02000,2), -+ ERASEINFO(0x04000,1) -+ } -+ }, { -+ mfr_id: MANUFACTURER_HYUNDAI, -+ dev_id: HY29F002T, -+ name: "Hyundai HY29F002T", -+ DevSize: SIZE_256KiB, -+ NumEraseRegions: 4, -+ regions: {ERASEINFO(0x10000,3), -+ ERASEINFO(0x08000,1), -+ ERASEINFO(0x02000,2), -+ ERASEINFO(0x04000,1) -+ } -+ }, { - .mfr_id = MANUFACTURER_INTEL, - .dev_id = I28F004B3B, - .name = "Intel 28F004B3B", -@@ -920,6 +1086,19 @@ - } - }, { - .mfr_id = MANUFACTURER_MACRONIX, -+ .dev_id = MX29LV040C, -+ .name = "Macronix MX29LV040C", -+ .uaddr = { -+ [0] = MTD_UADDR_0x0555_0x02AA, /* x8 */ -+ }, -+ .DevSize = SIZE_512KiB, -+ .CmdSet = P_ID_AMD_STD, -+ .NumEraseRegions= 1, -+ .regions = { -+ ERASEINFO(0x10000,8), -+ } -+ }, { -+ .mfr_id = MANUFACTURER_MACRONIX, - .dev_id = MX29LV160T, - .name = "MXIC MX29LV160T", - .uaddr = { -@@ -998,6 +1177,17 @@ - ERASEINFO(0x10000,7), - } - }, { -+ mfr_id: MANUFACTURER_MACRONIX, -+ dev_id: MX29F002T, -+ name: "Macronix MX29F002T", -+ DevSize: SIZE_256KiB, -+ NumEraseRegions: 4, -+ regions: {ERASEINFO(0x10000,3), -+ ERASEINFO(0x08000,1), -+ ERASEINFO(0x02000,2), -+ ERASEINFO(0x04000,1) -+ } -+ }, { - .mfr_id = MANUFACTURER_PMC, - .dev_id = PM49FL002, - .name = "PMC Pm49FL002", -@@ -1064,6 +1254,30 @@ - } - }, { - .mfr_id = MANUFACTURER_SST, -+ .dev_id = SST29EE020, -+ .name = "SST 29EE020", -+ .uaddr = { -+ [0] = MTD_UADDR_0x5555_0x2AAA /* x8 */ -+ }, -+ .DevSize = SIZE_256KiB, -+ .CmdSet = P_ID_SST_PAGE, -+ .NumEraseRegions= 1, -+ regions: {ERASEINFO(0x01000,64), -+ } -+ }, { -+ .mfr_id = MANUFACTURER_SST, -+ .dev_id = SST29LE020, -+ .name = "SST 29LE020", -+ .uaddr = { -+ [0] = MTD_UADDR_0x5555_0x2AAA /* x8 */ -+ }, -+ .DevSize = SIZE_256KiB, -+ .CmdSet = P_ID_SST_PAGE, -+ .NumEraseRegions= 1, -+ regions: {ERASEINFO(0x01000,64), -+ } -+ }, { -+ .mfr_id = MANUFACTURER_SST, - .dev_id = SST39LF020, - .name = "SST 39LF020", - .uaddr = { -@@ -1116,6 +1330,32 @@ - } - }, { - .mfr_id = MANUFACTURER_SST, -+ .dev_id = SST49LF004B, -+ .name = "SST 49LF004B", -+ .uaddr = { -+ [0] = MTD_UADDR_0x5555_0x2AAA /* x8 */ -+ }, -+ .DevSize = SIZE_512KiB, -+ .CmdSet = P_ID_AMD_STD, -+ .NumEraseRegions= 1, -+ .regions = { -+ ERASEINFO(0x01000,128), -+ } -+ }, { -+ .mfr_id = MANUFACTURER_SST, -+ .dev_id = SST49LF008A, -+ .name = "SST 49LF008A", -+ .uaddr = { -+ [0] = MTD_UADDR_0x5555_0x2AAA /* x8 */ -+ }, -+ .DevSize = SIZE_1MiB, -+ .CmdSet = P_ID_AMD_STD, -+ .NumEraseRegions= 1, -+ .regions = { -+ ERASEINFO(0x01000,256), -+ } -+ }, { -+ .mfr_id = MANUFACTURER_SST, - .dev_id = SST49LF030A, - .name = "SST 49LF030A", - .uaddr = { -@@ -1154,6 +1394,22 @@ - ERASEINFO(0x01000,256), - } - }, { -+ .mfr_id = MANUFACTURER_SST, /* should be CFI */ -+ .dev_id = SST39LF160, -+ .name = "SST 39LF160", -+ .uaddr = { -+ [0] = MTD_UADDR_0x5555_0x2AAA, /* x8 */ -+ [1] = MTD_UADDR_0x5555_0x2AAA /* x16 */ -+ }, -+ .DevSize = SIZE_2MiB, -+ .CmdSet = P_ID_AMD_STD, -+ .NumEraseRegions= 2, -+ .regions = { -+ ERASEINFO(0x1000,256), -+ ERASEINFO(0x1000,256) -+ } -+ -+ }, { - .mfr_id = MANUFACTURER_ST, /* FIXME - CFI device? */ - .dev_id = M29W800DT, - .name = "ST M29W800DT", -@@ -1234,6 +1490,58 @@ - .regions = { - ERASEINFO(0x10000,8), - } -+ }, { -+ .mfr_id = MANUFACTURER_ST, -+ .dev_id = M50FW040, -+ .name = "ST M50FW040", -+ .uaddr = { -+ [0] = MTD_UADDR_UNNECESSARY, /* x8 */ -+ }, -+ .DevSize = SIZE_512KiB, -+ .CmdSet = P_ID_INTEL_EXT, -+ .NumEraseRegions= 1, -+ .regions = { -+ ERASEINFO(0x10000,8), -+ } -+ }, { -+ .mfr_id = MANUFACTURER_ST, -+ .dev_id = M50FW080, -+ .name = "ST M50FW080", -+ .uaddr = { -+ [0] = MTD_UADDR_UNNECESSARY, /* x8 */ -+ }, -+ .DevSize = SIZE_1MiB, -+ .CmdSet = P_ID_INTEL_EXT, -+ .NumEraseRegions= 1, -+ .regions = { -+ ERASEINFO(0x10000,16), -+ } -+ }, { -+ .mfr_id = MANUFACTURER_ST, -+ .dev_id = M50FW016, -+ .name = "ST M50FW016", -+ .uaddr = { -+ [0] = MTD_UADDR_UNNECESSARY, /* x8 */ -+ }, -+ .DevSize = SIZE_2MiB, -+ .CmdSet = P_ID_INTEL_EXT, -+ .NumEraseRegions= 1, -+ .regions = { -+ ERASEINFO(0x10000,32), -+ } -+ }, { -+ .mfr_id = MANUFACTURER_ST, -+ .dev_id = M50LPW080, -+ .name = "ST M50LPW080", -+ .uaddr = { -+ [0] = MTD_UADDR_UNNECESSARY, /* x8 */ -+ }, -+ .DevSize = SIZE_1MiB, -+ .CmdSet = P_ID_INTEL_EXT, -+ .NumEraseRegions= 1, -+ .regions = { -+ ERASEINFO(0x10000,16), -+ } - }, { - .mfr_id = MANUFACTURER_TOSHIBA, - .dev_id = TC58FVT160, -@@ -1344,44 +1652,59 @@ - ERASEINFO(0x02000, 2), - ERASEINFO(0x04000, 1), - } -- } -+ } - }; - - - static int cfi_jedec_setup(struct cfi_private *p_cfi, int index); - - static int jedec_probe_chip(struct map_info *map, __u32 base, -- struct flchip *chips, struct cfi_private *cfi); -+ unsigned long *chip_map, struct cfi_private *cfi); - - struct mtd_info *jedec_probe(struct map_info *map); - - static inline u32 jedec_read_mfr(struct map_info *map, __u32 base, - struct cfi_private *cfi) - { -- u32 result, mask; -+ map_word result; -+ unsigned long mask; -+ u32 ofs = cfi_build_cmd_addr(0, cfi_interleave(cfi), cfi->device_type); - mask = (1 << (cfi->device_type * 8)) -1; -- result = cfi_read(map, base); -- result &= mask; -- return result; -+ result = map_read(map, base + ofs); -+ return result.x[0] & mask; - } - - static inline u32 jedec_read_id(struct map_info *map, __u32 base, - struct cfi_private *cfi) - { -- int osf; -- u32 result, mask; -- osf = cfi->interleave *cfi->device_type; -+ map_word result; -+ unsigned long mask; -+ u32 ofs = cfi_build_cmd_addr(1, cfi_interleave(cfi), cfi->device_type); - mask = (1 << (cfi->device_type * 8)) -1; -- result = cfi_read(map, base + osf); -- result &= mask; -- return result; -+ result = map_read(map, base + ofs); -+ return result.x[0] & mask; - } - - static inline void jedec_reset(u32 base, struct map_info *map, - struct cfi_private *cfi) - { - /* Reset */ -- cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); -+ -+ /* after checking the datasheets for SST, MACRONIX and ATMEL -+ * (oh and incidentaly the jedec spec - 3.5.3.3) the reset -+ * sequence is *supposed* to be 0xaa at 0x5555, 0x55 at -+ * 0x2aaa, 0xF0 at 0x5555 this will not affect the AMD chips -+ * as they will ignore the writes and dont care what address -+ * the F0 is written to */ -+ if(cfi->addr_unlock1) { -+ DEBUG( MTD_DEBUG_LEVEL3, -+ "reset unlock called %x %x \n", -+ cfi->addr_unlock1,cfi->addr_unlock2); -+ cfi_send_gen_cmd(0xaa, cfi->addr_unlock1, base, map, cfi, cfi->device_type, NULL); -+ cfi_send_gen_cmd(0x55, cfi->addr_unlock2, base, map, cfi, cfi->device_type, NULL); -+ } -+ -+ cfi_send_gen_cmd(0xF0, cfi->addr_unlock1, base, map, cfi, cfi->device_type, NULL); - /* Some misdesigned intel chips do not respond for 0xF0 for a reset, - * so ensure we're in read mode. Send both the Intel and the AMD command - * for this. Intel uses 0xff for this, AMD uses 0xff for NOP, so -@@ -1409,6 +1732,12 @@ - - uaddr = finfo->uaddr[uaddr_idx]; - -+ if (uaddr != MTD_UADDR_NOT_SUPPORTED ) { -+ /* ASSERT("The unlock addresses for non-8-bit mode -+ are bollocks. We don't really need an array."); */ -+ uaddr = finfo->uaddr[0]; -+ } -+ - uaddr_done: - return uaddr; - } -@@ -1439,17 +1768,19 @@ - for (i=0; i<num_erase_regions; i++){ - p_cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i]; - } -- p_cfi->cmdset_priv = 0; -+ p_cfi->cmdset_priv = NULL; - - /* This may be redundant for some cases, but it doesn't hurt */ - p_cfi->mfr = jedec_table[index].mfr_id; - p_cfi->id = jedec_table[index].dev_id; - - uaddr = finfo_uaddr(&jedec_table[index], p_cfi->device_type); -- if ( MTD_UADDR_NOT_SUPPORTED ) { -+ if ( uaddr == MTD_UADDR_NOT_SUPPORTED ) { - kfree( p_cfi->cfiq ); - return 0; - } -+ -+ /* Mask out address bits which are smaller than the device type */ - p_cfi->addr_unlock1 = unlock_addrs[uaddr].addr1; - p_cfi->addr_unlock2 = unlock_addrs[uaddr].addr2; - -@@ -1473,8 +1804,35 @@ - u32 mfr, id; - __u8 uaddr; - -- /* The ID's must match */ -- if ( cfi->mfr != finfo->mfr_id || cfi->id != finfo->dev_id ) { -+ /* -+ * The IDs must match. For X16 and X32 devices operating in -+ * a lower width ( X8 or X16 ), the device ID's are usually just -+ * the lower byte(s) of the larger device ID for wider mode. If -+ * a part is found that doesn't fit this assumption (device id for -+ * smaller width mode is completely unrealated to full-width mode) -+ * then the jedec_table[] will have to be augmented with the IDs -+ * for different widths. -+ */ -+ switch (cfi->device_type) { -+ case CFI_DEVICETYPE_X8: -+ mfr = (__u8)finfo->mfr_id; -+ id = (__u8)finfo->dev_id; -+ break; -+ case CFI_DEVICETYPE_X16: -+ mfr = (__u16)finfo->mfr_id; -+ id = (__u16)finfo->dev_id; -+ break; -+ case CFI_DEVICETYPE_X32: -+ mfr = (__u16)finfo->mfr_id; -+ id = (__u32)finfo->dev_id; -+ break; -+ default: -+ printk(KERN_WARNING -+ "MTD %s(): Unsupported device type %d\n", -+ __func__, cfi->device_type); -+ goto match_done; -+ } -+ if ( cfi->mfr != mfr || cfi->id != id ) { - goto match_done; - } - -@@ -1482,7 +1840,7 @@ - DEBUG( MTD_DEBUG_LEVEL3, - "MTD %s(): Check fit 0x%.8x + 0x%.8x = 0x%.8x\n", - __func__, base, 1 << finfo->DevSize, base + (1 << finfo->DevSize) ); -- if ( base + ( 1 << finfo->DevSize ) > map->size ) { -+ if ( base + cfi_interleave(cfi) * ( 1 << finfo->DevSize ) > map->size ) { - DEBUG( MTD_DEBUG_LEVEL3, - "MTD %s(): 0x%.4x 0x%.4x %dKiB doesn't fit\n", - __func__, finfo->mfr_id, finfo->dev_id, -@@ -1491,20 +1849,20 @@ - } - - uaddr = finfo_uaddr(finfo, cfi->device_type); -- if ( MTD_UADDR_NOT_SUPPORTED ) { -+ if ( uaddr == MTD_UADDR_NOT_SUPPORTED ) { - goto match_done; - } - - DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): check unlock addrs 0x%.4x 0x%.4x\n", - __func__, cfi->addr_unlock1, cfi->addr_unlock2 ); - if ( MTD_UADDR_UNNECESSARY != uaddr && MTD_UADDR_DONT_CARE != uaddr -- && ( unlock_addrs[uaddr].addr1 != cfi->addr_unlock1 -- || unlock_addrs[uaddr].addr2 != cfi->addr_unlock2 ) ) { -+ && ( unlock_addrs[uaddr].addr1 != cfi->addr_unlock1 || -+ unlock_addrs[uaddr].addr2 != cfi->addr_unlock2 ) ) { - DEBUG( MTD_DEBUG_LEVEL3, -- "MTD %s(): 0x%.4x 0x%.4x did not match\n", -- __func__, -- unlock_addrs[uaddr].addr1, -- unlock_addrs[uaddr].addr2 ); -+ "MTD %s(): 0x%.4x 0x%.4x did not match\n", -+ __func__, -+ unlock_addrs[uaddr].addr1, -+ unlock_addrs[uaddr].addr2); - goto match_done; - } - -@@ -1540,10 +1898,10 @@ - */ - DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): return to ID mode\n", __func__ ); - if(cfi->addr_unlock1) { -- cfi_send_gen_cmd(0xaa, cfi->addr_unlock1, base, map, cfi, CFI_DEVICETYPE_X8, NULL); -- cfi_send_gen_cmd(0x55, cfi->addr_unlock2, base, map, cfi, CFI_DEVICETYPE_X8, NULL); -+ cfi_send_gen_cmd(0xaa, cfi->addr_unlock1, base, map, cfi, cfi->device_type, NULL); -+ cfi_send_gen_cmd(0x55, cfi->addr_unlock2, base, map, cfi, cfi->device_type, NULL); - } -- cfi_send_gen_cmd(0x90, cfi->addr_unlock1, base, map, cfi, CFI_DEVICETYPE_X8, NULL); -+ cfi_send_gen_cmd(0x90, cfi->addr_unlock1, base, map, cfi, cfi->device_type, NULL); - /* FIXME - should have a delay before continuing */ - - match_done: -@@ -1552,41 +1910,24 @@ - - - static int jedec_probe_chip(struct map_info *map, __u32 base, -- struct flchip *chips, struct cfi_private *cfi) -+ unsigned long *chip_map, struct cfi_private *cfi) - { - int i; -- int unlockpass = 0; -+ enum uaddr uaddr_idx = MTD_UADDR_NOT_SUPPORTED; -+ u32 probe_offset1, probe_offset2; - -- /* -- * FIXME - eventually replace these unlock address seeds with -- * information from unlock_addrs[]. -- */ -+ retry: - if (!cfi->numchips) { -- switch (cfi->device_type) { -- case CFI_DEVICETYPE_X8: -- cfi->addr_unlock1 = 0x555; -- cfi->addr_unlock2 = 0x2aa; -- break; -- case CFI_DEVICETYPE_X16: -- cfi->addr_unlock1 = 0xaaa; -- if (map->buswidth == cfi->interleave) { -- /* X16 chip(s) in X8 mode */ -- cfi->addr_unlock2 = 0x555; -- } else { -- cfi->addr_unlock2 = 0x554; -- } -- break; -- case CFI_DEVICETYPE_X32: -- cfi->addr_unlock1 = 0x1555; -- cfi->addr_unlock2 = 0xaaa; -- break; -- default: -- printk(KERN_NOTICE "Eep. Unknown jedec_probe device type %d\n", cfi->device_type); -- return 0; -- } -+ uaddr_idx++; -+ -+ if (MTD_UADDR_UNNECESSARY == uaddr_idx) -+ return 0; -+ -+ /* Mask out address bits which are smaller than the device type */ -+ cfi->addr_unlock1 = unlock_addrs[uaddr_idx].addr1; -+ cfi->addr_unlock2 = unlock_addrs[uaddr_idx].addr2; - } - -- retry: - /* Make certain we aren't probing past the end of map */ - if (base >= map->size) { - printk(KERN_NOTICE -@@ -1595,30 +1936,30 @@ - return 0; - - } -- if ((base + cfi->addr_unlock1) >= map->size) { -- printk(KERN_NOTICE -- "Probe at addr_unlock1(0x%08x + 0x%08x) past the end of the map(0x%08lx)\n", -- base, cfi->addr_unlock1, map->size -1); -- -- return 0; -+ /* Ensure the unlock addresses we try stay inside the map */ -+ probe_offset1 = cfi_build_cmd_addr( -+ cfi->addr_unlock1, -+ cfi_interleave(cfi), -+ cfi->device_type); -+ probe_offset2 = cfi_build_cmd_addr( -+ cfi->addr_unlock1, -+ cfi_interleave(cfi), -+ cfi->device_type); -+ if ( ((base + probe_offset1 + map_bankwidth(map)) >= map->size) || -+ ((base + probe_offset2 + map_bankwidth(map)) >= map->size)) -+ { -+ goto retry; - } -- if ((base + cfi->addr_unlock2) >= map->size) { -- printk(KERN_NOTICE -- "Probe at addr_unlock2(0x%08x + 0x%08x) past the end of the map(0x%08lx)\n", -- base, cfi->addr_unlock2, map->size -1); -- return 0; - -- } -- - /* Reset */ - jedec_reset(base, map, cfi); - - /* Autoselect Mode */ - if(cfi->addr_unlock1) { -- cfi_send_gen_cmd(0xaa, cfi->addr_unlock1, base, map, cfi, CFI_DEVICETYPE_X8, NULL); -- cfi_send_gen_cmd(0x55, cfi->addr_unlock2, base, map, cfi, CFI_DEVICETYPE_X8, NULL); -+ cfi_send_gen_cmd(0xaa, cfi->addr_unlock1, base, map, cfi, cfi->device_type, NULL); -+ cfi_send_gen_cmd(0x55, cfi->addr_unlock2, base, map, cfi, cfi->device_type, NULL); - } -- cfi_send_gen_cmd(0x90, cfi->addr_unlock1, base, map, cfi, CFI_DEVICETYPE_X8, NULL); -+ cfi_send_gen_cmd(0x90, cfi->addr_unlock1, base, map, cfi, cfi->device_type, NULL); - /* FIXME - should have a delay before continuing */ - - if (!cfi->numchips) { -@@ -1628,9 +1969,8 @@ - cfi->mfr = jedec_read_mfr(map, base, cfi); - cfi->id = jedec_read_id(map, base, cfi); - DEBUG(MTD_DEBUG_LEVEL3, -- "MTD %s(): Search for id:(%02x %02x) interleave(%d) type(%d)\n", -- __func__, cfi->mfr, cfi->id, cfi->interleave, -- cfi->device_type); -+ "Search for id:(%02x %02x) interleave(%d) type(%d)\n", -+ cfi->mfr, cfi->id, cfi_interleave(cfi), cfi->device_type); - for (i=0; i<sizeof(jedec_table)/sizeof(jedec_table[0]); i++) { - if ( jedec_match( base, map, cfi, &jedec_table[i] ) ) { - DEBUG( MTD_DEBUG_LEVEL3, -@@ -1642,16 +1982,7 @@ - goto ok_out; - } - } -- switch(unlockpass++) { -- case 0: -- cfi->addr_unlock1 |= cfi->addr_unlock1 << 4; -- cfi->addr_unlock2 |= cfi->addr_unlock2 << 4; -- goto retry; -- case 1: -- cfi->addr_unlock1 = cfi->addr_unlock2 = 0; -- goto retry; -- } -- return 0; -+ goto retry; - } else { - __u16 mfr; - __u16 id; -@@ -1668,21 +1999,24 @@ - } - } - -- /* Check each previous chip to see if it's an alias */ -- for (i=0; i<cfi->numchips; i++) { -- /* This chip should be in read mode if it's one -- we've already touched. */ -- if (jedec_read_mfr(map, chips[i].start, cfi) == cfi->mfr && -- jedec_read_id(map, chips[i].start, cfi) == cfi->id) { -+ /* Check each previous chip locations to see if it's an alias */ -+ for (i=0; i < (base >> cfi->chipshift); i++) { -+ unsigned long start; -+ if(!test_bit(i, chip_map)) { -+ continue; /* Skip location; no valid chip at this address */ -+ } -+ start = i << cfi->chipshift; -+ if (jedec_read_mfr(map, start, cfi) == cfi->mfr && -+ jedec_read_id(map, start, cfi) == cfi->id) { - /* Eep. This chip also looks like it's in autoselect mode. - Is it an alias for the new one? */ -- jedec_reset(chips[i].start, map, cfi); -+ jedec_reset(start, map, cfi); - - /* If the device IDs go away, it's an alias */ - if (jedec_read_mfr(map, base, cfi) != cfi->mfr || - jedec_read_id(map, base, cfi) != cfi->id) { - printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n", -- map->name, base, chips[i].start); -+ map->name, base, start); - return 0; - } - -@@ -1694,7 +2028,7 @@ - if (jedec_read_mfr(map, base, cfi) == cfi->mfr && - jedec_read_id(map, base, cfi) == cfi->id) { - printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n", -- map->name, base, chips[i].start); -+ map->name, base, start); - return 0; - } - } -@@ -1702,22 +2036,16 @@ - - /* OK, if we got to here, then none of the previous chips appear to - be aliases for the current one. */ -- if (cfi->numchips == MAX_CFI_CHIPS) { -- printk(KERN_WARNING"%s: Too many flash chips detected. Increase MAX_CFI_CHIPS from %d.\n", map->name, MAX_CFI_CHIPS); -- /* Doesn't matter about resetting it to Read Mode - we're not going to talk to it anyway */ -- return -1; -- } -- chips[cfi->numchips].start = base; -- chips[cfi->numchips].state = FL_READY; -+ set_bit((base >> cfi->chipshift), chip_map); /* Update chip map */ - cfi->numchips++; - - ok_out: - /* Put it back into Read Mode */ - jedec_reset(base, map, cfi); - -- printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit mode\n", -- map->name, cfi->interleave, cfi->device_type*8, base, -- map->buswidth*8); -+ printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit bank\n", -+ map->name, cfi_interleave(cfi), cfi->device_type*8, base, -+ map->bankwidth*8); - - return 1; - } -Index: linux-2.6.5/drivers/mtd/chips/map_ram.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/map_ram.c 2004-04-03 22:36:55.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/map_ram.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,7 +1,7 @@ - /* - * Common code to handle map devices which are simple RAM - * (C) 2000 Red Hat. GPL'd. -- * $Id: map_ram.c,v 1.17 2003/05/28 12:51:49 dwmw2 Exp $ -+ * $Id: map_ram.c,v 1.20 2004/08/09 13:19:43 dwmw2 Exp $ - */ - - #include <linux/module.h> -@@ -104,13 +104,17 @@ - /* Yeah, it's inefficient. Who cares? It's faster than a _real_ - flash erase. */ - struct map_info *map = (struct map_info *)mtd->priv; -+ map_word allff; - unsigned long i; - -- for (i=0; i<instr->len; i++) -- map_write8(map, 0xFF, instr->addr + i); -+ allff = map_word_ff(map); - -- if (instr->callback) -- instr->callback(instr); -+ for (i=0; i<instr->len; i += map_bankwidth(map)) -+ map_write(map, allff, instr->addr + i); -+ -+ instr->state = MTD_ERASE_DONE; -+ -+ mtd_erase_callback(instr); - - return 0; - } -Index: linux-2.6.5/drivers/mtd/chips/map_rom.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/map_rom.c 2004-04-03 22:37:25.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/map_rom.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,7 +1,7 @@ - /* - * Common code to handle map devices which are simple ROM - * (C) 2000 Red Hat. GPL'd. -- * $Id: map_rom.c,v 1.20 2003/05/28 12:51:49 dwmw2 Exp $ -+ * $Id: map_rom.c,v 1.21 2004/07/12 14:06:01 dwmw2 Exp $ - */ - - #include <linux/module.h> -Index: linux-2.6.5/drivers/mtd/chips/sharp.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/chips/sharp.c 2004-04-03 22:37:23.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/chips/sharp.c 2005-02-01 17:11:17.000000000 -0500 -@@ -4,7 +4,7 @@ - * Copyright 2000,2001 David A. Schleef <ds@schleef.org> - * 2000,2001 Lineo, Inc. - * -- * $Id: sharp.c,v 1.12 2003/05/28 15:39:52 dwmw2 Exp $ -+ * $Id: sharp.c,v 1.14 2004/08/09 13:19:43 dwmw2 Exp $ - * - * Devices supported: - * LH28F016SCT Symmetrical block flash memory, 2Mx8 -@@ -30,6 +30,7 @@ - #include <linux/mtd/mtd.h> - #include <linux/mtd/cfi.h> - #include <linux/delay.h> -+#include <linux/init.h> - - #define CMD_RESET 0xffffffff - #define CMD_READ_ID 0x90909090 -@@ -154,7 +155,7 @@ - map->fldrv = &sharp_chipdrv; - map->fldrv_priv = sharp; - -- MOD_INC_USE_COUNT; -+ __module_get(THIS_MODULE); - return mtd; - } - -@@ -424,8 +425,7 @@ - } - - instr->state = MTD_ERASE_DONE; -- if(instr->callback) -- instr->callback(instr); -+ mtd_erase_callback(instr); - - return 0; - } -Index: linux-2.6.5/drivers/mtd/cmdlinepart.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/cmdlinepart.c 2004-04-03 22:37:37.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/cmdlinepart.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: cmdlinepart.c,v 1.9 2003/05/16 17:08:24 dwmw2 Exp $ -+ * $Id: cmdlinepart.c,v 1.14 2004/07/12 12:34:23 dwmw2 Exp $ - * - * Read flash partition table from command line - * -@@ -10,7 +10,7 @@ - * mtdparts=<mtddef>[;<mtddef] - * <mtddef> := <mtd-id>:<partdef>[,<partdef>] - * <partdef> := <size>[@offset][<name>][ro] -- * <mtd-id> := unique id used in mapping driver/device -+ * <mtd-id> := unique name used in mapping driver/device (mtd->name) - * <size> := standard linux memsize OR "-" to denote all remaining space - * <name> := '(' NAME ')' - * -@@ -94,7 +94,7 @@ - if (size < PAGE_SIZE) - { - printk(KERN_ERR ERRP "partition size too small (%lx)\n", size); -- return 0; -+ return NULL; - } - } - -@@ -121,7 +121,7 @@ - if ((p = strchr(name, delim)) == 0) - { - printk(KERN_ERR ERRP "no closing %c found in partition name\n", delim); -- return 0; -+ return NULL; - } - name_len = p - name; - s = p + 1; -@@ -148,12 +148,12 @@ - if (size == SIZE_REMAINING) - { - printk(KERN_ERR ERRP "no partitions allowed after a fill-up partition\n"); -- return 0; -+ return NULL; - } - /* more partitions follow, parse them */ - if ((parts = newpart(s + 1, &s, num_parts, - this_part + 1, &extra_mem, extra_mem_size)) == 0) -- return 0; -+ return NULL; - } - else - { /* this is the last partition: allocate space for all */ -@@ -166,7 +166,7 @@ - if (!parts) - { - printk(KERN_ERR ERRP "out of memory\n"); -- return 0; -+ return NULL; - } - memset(parts, 0, alloc_size); - extra_mem = (unsigned char *)(parts + *num_parts); -@@ -358,14 +358,7 @@ - return register_mtd_parser(&cmdline_parser); - } - --static void __exit cmdline_parser_exit(void) --{ -- deregister_mtd_parser(&cmdline_parser); --} -- - module_init(cmdline_parser_init); --module_exit(cmdline_parser_exit); -- - - MODULE_LICENSE("GPL"); - MODULE_AUTHOR("Marius Groeger <mag@sysgo.de>"); -Index: linux-2.6.5/drivers/mtd/devices/Kconfig -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/Kconfig 2004-04-03 22:38:28.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/Kconfig 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - # drivers/mtd/maps/Kconfig --# $Id: Kconfig,v 1.4 2003/05/28 15:18:54 dwmw2 Exp $ -+# $Id: Kconfig,v 1.12 2004/08/10 13:12:18 dwmw2 Exp $ - - menu "Self-contained MTD device drivers" - depends on MTD!=n -@@ -40,9 +40,12 @@ - - config MTD_MS02NV - tristate "DEC MS02-NV NVRAM module support" -- depends on CONFIG_DECSTATION -+ depends on MTD && MACH_DECSTATION - help -- Support for NVRAM module on DECstation. -+ This is an MTD driver for the DEC's MS02-NV (54-20948-01) battery -+ backed-up NVRAM module. The module was originally meant as an NFS -+ accelerator. Say Y here if you have a DECstation 5000/2x0 or a -+ DECsystem 5900 equipped with such a module. - - config MTD_SLRAM - tristate "Uncached system RAM" -@@ -52,6 +55,16 @@ - you can still use it for storage or swap by using this driver to - present it to the system as a Memory Technology Device. - -+config MTD_PHRAM -+ tristate "Physical system RAM" -+ depends on MTD -+ help -+ This is a re-implementation of the slram driver above. -+ -+ Use this driver to access physical memory that the kernel proper -+ doesn't have access to, memory beyond the mem=xxx limit, nvram, -+ memory on the video card, etc... -+ - config MTD_LART - tristate "28F160xx flash driver for LART" - depends on SA1100_LART && MTD -@@ -115,7 +128,7 @@ - comment "Disk-On-Chip Device Drivers" - - config MTD_DOC2000 -- tristate "M-Systems Disk-On-Chip 2000 and Millennium" -+ tristate "M-Systems Disk-On-Chip 2000 and Millennium (DEPRECATED)" - depends on MTD - ---help--- - This provides an MTD device driver for the M-Systems DiskOnChip -@@ -131,8 +144,12 @@ - emulate a block device by using a kind of file system on the flash - chips. - -+ NOTE: This driver is deprecated and will probably be removed soon. -+ Please try the new DiskOnChip driver under "NAND Flash Device -+ Drivers". -+ - config MTD_DOC2001 -- tristate "M-Systems Disk-On-Chip Millennium-only alternative driver (see help)" -+ tristate "M-Systems Disk-On-Chip Millennium-only alternative driver (DEPRECATED)" - depends on MTD - ---help--- - This provides an alternative MTD device driver for the M-Systems -@@ -147,6 +164,10 @@ - emulate a block device by using a kind of file system on the flash - chips. - -+ NOTE: This driver is deprecated and will probably be removed soon. -+ Please try the new DiskOnChip driver under "NAND Flash Device -+ Drivers". -+ - config MTD_DOC2001PLUS - tristate "M-Systems Disk-On-Chip Millennium Plus" - depends on MTD -@@ -159,12 +180,23 @@ - to emulate a block device by using a kind of file system on the - flash chips. - -+ NOTE: This driver will soon be replaced by the new DiskOnChip driver -+ under "NAND Flash Device Drivers" (currently that driver does not -+ support all Millennium Plus devices). -+ - config MTD_DOCPROBE - tristate -- default m if MTD_DOC2001!=y && MTD_DOC2000!=y && MTD_DOC2001PLUS!=y && (MTD_DOC2001=m || MTD_DOC2000=m || MOD_DOC2001PLUS=m) -+ default m if MTD_DOC2001!=y && MTD_DOC2000!=y && MTD_DOC2001PLUS!=y && (MTD_DOC2001=m || MTD_DOC2000=m || MTD_DOC2001PLUS=m) - default y if MTD_DOC2001=y || MTD_DOC2000=y || MTD_DOC2001PLUS=y - help -- This isn't a real config option, it's derived. -+ This isn't a real config option; it's derived. -+ -+config MTD_DOCECC -+ tristate -+ default m if MTD_DOCPROBE!=y && MTD_NAND_DISKONCHIP!=y && (MTD_DOCPROBE=m || MTD_NAND_DISKONCHIP=m) -+ default y if MTD_DOCPROBE=y || MTD_NAND_DISKONCHIP=y -+ help -+ This isn't a real config option; it's derived. - - config MTD_DOCPROBE_ADVANCED - bool "Advanced detection options for DiskOnChip" -Index: linux-2.6.5/drivers/mtd/devices/Makefile -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/Makefile 2004-04-03 22:36:13.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/Makefile 2005-02-01 17:11:17.000000000 -0500 -@@ -1,7 +1,7 @@ - # - # linux/drivers/devices/Makefile - # --# $Id: Makefile.common,v 1.3 2003/05/28 10:54:23 dwmw2 Exp $ -+# $Id: Makefile.common,v 1.6 2004/07/12 16:07:30 dwmw2 Exp $ - - # *** BIG UGLY NOTE *** - # -@@ -13,8 +13,10 @@ - obj-$(CONFIG_MTD_DOC2000) += doc2000.o - obj-$(CONFIG_MTD_DOC2001) += doc2001.o - obj-$(CONFIG_MTD_DOC2001PLUS) += doc2001plus.o --obj-$(CONFIG_MTD_DOCPROBE) += docprobe.o docecc.o -+obj-$(CONFIG_MTD_DOCPROBE) += docprobe.o -+obj-$(CONFIG_MTD_DOCECC) += docecc.o - obj-$(CONFIG_MTD_SLRAM) += slram.o -+obj-$(CONFIG_MTD_PHRAM) += phram.o - obj-$(CONFIG_MTD_PMC551) += pmc551.o - obj-$(CONFIG_MTD_MS02NV) += ms02-nv.o - obj-$(CONFIG_MTD_MTDRAM) += mtdram.o -Index: linux-2.6.5/drivers/mtd/devices/blkmtd-24.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/blkmtd-24.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/blkmtd-24.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,1056 @@ -+/* -+ * $Id: blkmtd-24.c,v 1.23 2004/08/09 18:49:42 dmarlin Exp $ -+ * -+ * blkmtd.c - use a block device as a fake MTD -+ * -+ * Author: Simon Evans <spse@secret.org.uk> -+ * -+ * Copyright (C) 2001,2002 Simon Evans -+ * -+ * Licence: GPL -+ * -+ * How it works: -+ * The driver uses raw/io to read/write the device and the page -+ * cache to cache access. Writes update the page cache with the -+ * new data and mark it dirty and add the page into a kiobuf. -+ * When the kiobuf becomes full or the next extry is to an earlier -+ * block in the kiobuf then it is flushed to disk. This allows -+ * writes to remained ordered and gives a small and simple outgoing -+ * write cache. -+ * -+ * It can be loaded Read-Only to prevent erases and writes to the -+ * medium. -+ * -+ */ -+ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/fs.h> -+#include <linux/blkdev.h> -+#include <linux/iobuf.h> -+#include <linux/slab.h> -+#include <linux/pagemap.h> -+#include <linux/list.h> -+#include <linux/mtd/mtd.h> -+ -+#ifdef CONFIG_MTD_DEBUG -+#ifdef CONFIG_PROC_FS -+# include <linux/proc_fs.h> -+# define BLKMTD_PROC_DEBUG -+ static struct proc_dir_entry *blkmtd_proc; -+#endif -+#endif -+ -+ -+#define err(format, arg...) printk(KERN_ERR "blkmtd: " format "\n" , ## arg) -+#define info(format, arg...) printk(KERN_INFO "blkmtd: " format "\n" , ## arg) -+#define warn(format, arg...) printk(KERN_WARNING "blkmtd: " format "\n" , ## arg) -+#define crit(format, arg...) printk(KERN_CRIT "blkmtd: " format "\n" , ## arg) -+ -+ -+/* Default erase size in KiB, always make it a multiple of PAGE_SIZE */ -+#define CONFIG_MTD_BLKDEV_ERASESIZE (128 << 10) /* 128KiB */ -+#define VERSION "1.10" -+ -+/* Info for the block device */ -+struct blkmtd_dev { -+ struct list_head list; -+ struct block_device *binding; -+ struct mtd_info mtd_info; -+ struct kiobuf *rd_buf, *wr_buf; -+ long iobuf_locks; -+ struct semaphore wrbuf_mutex; -+}; -+ -+ -+/* Static info about the MTD, used in cleanup_module */ -+static LIST_HEAD(blkmtd_device_list); -+ -+ -+static void blkmtd_sync(struct mtd_info *mtd); -+ -+#define MAX_DEVICES 4 -+ -+/* Module parameters passed by insmod/modprobe */ -+char *device[MAX_DEVICES]; /* the block device to use */ -+int erasesz[MAX_DEVICES]; /* optional default erase size */ -+int ro[MAX_DEVICES]; /* optional read only flag */ -+int sync; -+ -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Simon Evans <spse@secret.org.uk>"); -+MODULE_DESCRIPTION("Emulate an MTD using a block device"); -+MODULE_PARM(device, "1-4s"); -+MODULE_PARM_DESC(device, "block device to use"); -+MODULE_PARM(erasesz, "1-4i"); -+MODULE_PARM_DESC(erasesz, "optional erase size to use in KiB. eg 4=4KiB."); -+MODULE_PARM(ro, "1-4i"); -+MODULE_PARM_DESC(ro, "1=Read only, writes and erases cause errors"); -+MODULE_PARM(sync, "i"); -+MODULE_PARM_DESC(sync, "1=Synchronous writes"); -+ -+ -+/** -+ * read_pages - read in pages via the page cache -+ * @dev: device to read from -+ * @pagenrs: list of page numbers wanted -+ * @pagelst: storage for struce page * pointers -+ * @pages: count of pages wanted -+ * -+ * Read pages, getting them from the page cache if available -+ * else reading them in from disk if not. pagelst must be preallocated -+ * to hold the page count. -+ */ -+static int read_pages(struct blkmtd_dev *dev, int pagenrs[], struct page **pagelst, int pages) -+{ -+ kdev_t kdev; -+ struct page *page; -+ int cnt = 0; -+ struct kiobuf *iobuf; -+ int err = 0; -+ -+ if(!dev) { -+ err("read_pages: PANIC dev == NULL"); -+ return -EIO; -+ } -+ kdev = to_kdev_t(dev->binding->bd_dev); -+ -+ DEBUG(2, "read_pages: reading %d pages\n", pages); -+ if(test_and_set_bit(0, &dev->iobuf_locks)) { -+ err = alloc_kiovec(1, &iobuf); -+ if (err) { -+ crit("cant allocate kiobuf"); -+ return -ENOMEM; -+ } -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,4) -+ iobuf->blocks = kmalloc(KIO_MAX_SECTORS * sizeof(unsigned long), GFP_KERNEL); -+ if(iobuf->blocks == NULL) { -+ crit("cant allocate iobuf blocks"); -+ free_kiovec(1, &iobuf); -+ return -ENOMEM; -+ } -+#endif -+ } else { -+ iobuf = dev->rd_buf; -+ } -+ -+ iobuf->nr_pages = 0; -+ iobuf->length = 0; -+ iobuf->offset = 0; -+ iobuf->locked = 1; -+ -+ for(cnt = 0; cnt < pages; cnt++) { -+ page = grab_cache_page(dev->binding->bd_inode->i_mapping, pagenrs[cnt]); -+ pagelst[cnt] = page; -+ if(!Page_Uptodate(page)) { -+ iobuf->blocks[iobuf->nr_pages] = pagenrs[cnt]; -+ iobuf->maplist[iobuf->nr_pages++] = page; -+ } -+ } -+ -+ if(iobuf->nr_pages) { -+ iobuf->length = iobuf->nr_pages << PAGE_SHIFT; -+ err = brw_kiovec(READ, 1, &iobuf, kdev, iobuf->blocks, PAGE_SIZE); -+ DEBUG(3, "blkmtd: read_pages: finished, err = %d\n", err); -+ if(err < 0) { -+ while(pages--) { -+ ClearPageUptodate(pagelst[pages]); -+ unlock_page(pagelst[pages]); -+ page_cache_release(pagelst[pages]); -+ } -+ } else { -+ while(iobuf->nr_pages--) { -+ SetPageUptodate(iobuf->maplist[iobuf->nr_pages]); -+ } -+ err = 0; -+ } -+ } -+ -+ -+ if(iobuf != dev->rd_buf) { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,4) -+ kfree(iobuf->blocks); -+#endif -+ free_kiovec(1, &iobuf); -+ } else { -+ clear_bit(0, &dev->iobuf_locks); -+ } -+ DEBUG(2, "read_pages: done, err = %d\n", err); -+ return err; -+} -+ -+ -+/** -+ * commit_pages - commit pages in the writeout kiobuf to disk -+ * @dev: device to write to -+ * -+ * If the current dev has pages in the dev->wr_buf kiobuf, -+ * they are written to disk using brw_kiovec() -+ */ -+static int commit_pages(struct blkmtd_dev *dev) -+{ -+ struct kiobuf *iobuf = dev->wr_buf; -+ kdev_t kdev = to_kdev_t(dev->binding->bd_dev); -+ int err = 0; -+ -+ iobuf->length = iobuf->nr_pages << PAGE_SHIFT; -+ iobuf->locked = 1; -+ if(iobuf->length) { -+ int i; -+ DEBUG(2, "blkmtd: commit_pages: nrpages = %d\n", iobuf->nr_pages); -+ /* Check all the pages are dirty and lock them */ -+ for(i = 0; i < iobuf->nr_pages; i++) { -+ struct page *page = iobuf->maplist[i]; -+ BUG_ON(!PageDirty(page)); -+ lock_page(page); -+ } -+ err = brw_kiovec(WRITE, 1, &iobuf, kdev, iobuf->blocks, PAGE_SIZE); -+ DEBUG(3, "commit_write: committed %d pages err = %d\n", iobuf->nr_pages, err); -+ while(iobuf->nr_pages) { -+ struct page *page = iobuf->maplist[--iobuf->nr_pages]; -+ ClearPageDirty(page); -+ SetPageUptodate(page); -+ unlock_page(page); -+ page_cache_release(page); -+ } -+ } -+ -+ DEBUG(2, "blkmtd: sync: end, err = %d\n", err); -+ iobuf->offset = 0; -+ iobuf->nr_pages = 0; -+ iobuf->length = 0; -+ return err; -+} -+ -+ -+/** -+ * write_pages - write block of data to device via the page cache -+ * @dev: device to write to -+ * @buf: data source or NULL if erase (output is set to 0xff) -+ * @to: offset into output device -+ * @len: amount to data to write -+ * @retlen: amount of data written -+ * -+ * Grab pages from the page cache and fill them with the source data. -+ * Non page aligned start and end result in a readin of the page and -+ * part of the page being modified. Pages are added to the wr_buf kiobuf -+ * until this becomes full or the next page written to has a lower pagenr -+ * then the current max pagenr in the kiobuf. -+ */ -+static int write_pages(struct blkmtd_dev *dev, const u_char *buf, loff_t to, -+ size_t len, int *retlen) -+{ -+ int pagenr, offset; -+ size_t start_len = 0, end_len; -+ int pagecnt = 0; -+ struct kiobuf *iobuf = dev->wr_buf; -+ int err = 0; -+ struct page *pagelst[2]; -+ int pagenrs[2]; -+ int readpages = 0; -+ int ignorepage = -1; -+ -+ pagenr = to >> PAGE_SHIFT; -+ offset = to & ~PAGE_MASK; -+ -+ DEBUG(2, "blkmtd: write_pages: buf = %p to = %ld len = %zd pagenr = %d offset = %d\n", -+ buf, (long)to, len, pagenr, offset); -+ -+ *retlen = 0; -+ /* see if we have to do a partial write at the start */ -+ if(offset) { -+ start_len = ((offset + len) > PAGE_SIZE) ? PAGE_SIZE - offset : len; -+ len -= start_len; -+ } -+ -+ /* calculate the length of the other two regions */ -+ end_len = len & ~PAGE_MASK; -+ len -= end_len; -+ -+ if(start_len) { -+ pagenrs[0] = pagenr; -+ readpages++; -+ pagecnt++; -+ } -+ if(len) -+ pagecnt += len >> PAGE_SHIFT; -+ if(end_len) { -+ pagenrs[readpages] = pagenr + pagecnt; -+ readpages++; -+ pagecnt++; -+ } -+ -+ DEBUG(3, "blkmtd: write: start_len = %zd len = %zd end_len = %zd pagecnt = %d\n", -+ start_len, len, end_len, pagecnt); -+ -+ down(&dev->wrbuf_mutex); -+ -+ if(iobuf->nr_pages && ((pagenr <= iobuf->blocks[iobuf->nr_pages-1]) -+ || (iobuf->nr_pages + pagecnt) >= KIO_STATIC_PAGES)) { -+ -+ if((pagenr == iobuf->blocks[iobuf->nr_pages-1]) -+ && ((iobuf->nr_pages + pagecnt) < KIO_STATIC_PAGES)) { -+ iobuf->nr_pages--; -+ ignorepage = pagenr; -+ } else { -+ DEBUG(3, "blkmtd: doing writeout pagenr = %d max_pagenr = %ld pagecnt = %d idx = %d\n", -+ pagenr, iobuf->blocks[iobuf->nr_pages-1], -+ pagecnt, iobuf->nr_pages); -+ commit_pages(dev); -+ } -+ } -+ -+ if(readpages) { -+ err = read_pages(dev, pagenrs, pagelst, readpages); -+ if(err < 0) -+ goto readin_err; -+ } -+ -+ if(start_len) { -+ /* do partial start region */ -+ struct page *page; -+ -+ DEBUG(3, "blkmtd: write: doing partial start, page = %d len = %zd offset = %d\n", -+ pagenr, start_len, offset); -+ page = pagelst[0]; -+ BUG_ON(!buf); -+ if(PageDirty(page) && pagenr != ignorepage) { -+ err("to = %lld start_len = %zd len = %zd end_len = %zd pagenr = %d ignorepage = %d\n", -+ to, start_len, len, end_len, pagenr, ignorepage); -+ BUG(); -+ } -+ memcpy(page_address(page)+offset, buf, start_len); -+ SetPageDirty(page); -+ SetPageUptodate(page); -+ unlock_page(page); -+ buf += start_len; -+ *retlen = start_len; -+ err = 0; -+ iobuf->blocks[iobuf->nr_pages] = pagenr++; -+ iobuf->maplist[iobuf->nr_pages] = page; -+ iobuf->nr_pages++; -+ } -+ -+ /* Now do the main loop to a page aligned, n page sized output */ -+ if(len) { -+ int pagesc = len >> PAGE_SHIFT; -+ DEBUG(3, "blkmtd: write: whole pages start = %d, count = %d\n", -+ pagenr, pagesc); -+ while(pagesc) { -+ struct page *page; -+ -+ /* see if page is in the page cache */ -+ DEBUG(3, "blkmtd: write: grabbing page %d from page cache\n", pagenr); -+ page = grab_cache_page(dev->binding->bd_inode->i_mapping, pagenr); -+ if(PageDirty(page) && pagenr != ignorepage) { -+ BUG(); -+ } -+ if(!page) { -+ warn("write: cant grab cache page %d", pagenr); -+ err = -ENOMEM; -+ goto write_err; -+ } -+ if(!buf) { -+ memset(page_address(page), 0xff, PAGE_SIZE); -+ } else { -+ memcpy(page_address(page), buf, PAGE_SIZE); -+ buf += PAGE_SIZE; -+ } -+ iobuf->blocks[iobuf->nr_pages] = pagenr++; -+ iobuf->maplist[iobuf->nr_pages] = page; -+ iobuf->nr_pages++; -+ SetPageDirty(page); -+ SetPageUptodate(page); -+ unlock_page(page); -+ pagesc--; -+ *retlen += PAGE_SIZE; -+ } -+ } -+ -+ if(end_len) { -+ /* do the third region */ -+ struct page *page; -+ DEBUG(3, "blkmtd: write: doing partial end, page = %d len = %zd\n", -+ pagenr, end_len); -+ page = pagelst[readpages-1]; -+ BUG_ON(!buf); -+ if(PageDirty(page) && pagenr != ignorepage) { -+ err("to = %lld start_len = %zd len = %zd end_len = %zd pagenr = %d ignorepage = %d\n", -+ to, start_len, len, end_len, pagenr, ignorepage); -+ BUG(); -+ } -+ memcpy(page_address(page), buf, end_len); -+ SetPageDirty(page); -+ SetPageUptodate(page); -+ unlock_page(page); -+ DEBUG(3, "blkmtd: write: writing out partial end\n"); -+ *retlen += end_len; -+ err = 0; -+ iobuf->blocks[iobuf->nr_pages] = pagenr; -+ iobuf->maplist[iobuf->nr_pages] = page; -+ iobuf->nr_pages++; -+ } -+ -+ DEBUG(2, "blkmtd: write: end, retlen = %zd, err = %d\n", *retlen, err); -+ -+ if(sync) { -+write_err: -+ commit_pages(dev); -+ } -+ -+readin_err: -+ up(&dev->wrbuf_mutex); -+ return err; -+} -+ -+ -+/* erase a specified part of the device */ -+static int blkmtd_erase(struct mtd_info *mtd, struct erase_info *instr) -+{ -+ struct blkmtd_dev *dev = mtd->priv; -+ struct mtd_erase_region_info *einfo = mtd->eraseregions; -+ int numregions = mtd->numeraseregions; -+ size_t from; -+ u_long len; -+ int err = -EIO; -+ size_t retlen; -+ -+ /* check readonly */ -+ if(!dev->wr_buf) { -+ err("error: mtd%d trying to erase readonly device %s", -+ mtd->index, mtd->name); -+ instr->state = MTD_ERASE_FAILED; -+ goto erase_callback; -+ } -+ -+ instr->state = MTD_ERASING; -+ from = instr->addr; -+ len = instr->len; -+ -+ /* check erase region has valid start and length */ -+ DEBUG(2, "blkmtd: erase: dev = `%s' from = 0x%zx len = 0x%lx\n", -+ bdevname(dev->binding->bd_dev), from, len); -+ while(numregions) { -+ DEBUG(3, "blkmtd: checking erase region = 0x%08X size = 0x%X num = 0x%x\n", -+ einfo->offset, einfo->erasesize, einfo->numblocks); -+ if(from >= einfo->offset -+ && from < einfo->offset + (einfo->erasesize * einfo->numblocks)) { -+ if(len == einfo->erasesize -+ && ( (from - einfo->offset) % einfo->erasesize == 0)) -+ break; -+ } -+ numregions--; -+ einfo++; -+ } -+ -+ if(!numregions) { -+ /* Not a valid erase block */ -+ err("erase: invalid erase request 0x%lX @ 0x%08zX", len, from); -+ instr->state = MTD_ERASE_FAILED; -+ err = -EIO; -+ } -+ -+ if(instr->state != MTD_ERASE_FAILED) { -+ /* do the erase */ -+ DEBUG(3, "Doing erase from = %zd len = %ld\n", from, len); -+ err = write_pages(dev, NULL, from, len, &retlen); -+ if(err < 0) { -+ err("erase failed err = %d", err); -+ instr->state = MTD_ERASE_FAILED; -+ } else { -+ instr->state = MTD_ERASE_DONE; -+ err = 0; -+ } -+ } -+ -+ DEBUG(3, "blkmtd: erase: checking callback\n"); -+ erase_callback: -+ mtd_erase_callback(instr); -+ DEBUG(2, "blkmtd: erase: finished (err = %d)\n", err); -+ return err; -+} -+ -+ -+/* read a range of the data via the page cache */ -+static int blkmtd_read(struct mtd_info *mtd, loff_t from, size_t len, -+ size_t *retlen, u_char *buf) -+{ -+ struct blkmtd_dev *dev = mtd->priv; -+ int err = 0; -+ int offset; -+ int pagenr, pages; -+ struct page **pagelst; -+ int *pagenrs; -+ int i; -+ -+ *retlen = 0; -+ -+ DEBUG(2, "blkmtd: read: dev = `%s' from = %lld len = %zd buf = %p\n", -+ bdevname(dev->binding->bd_dev), from, len, buf); -+ -+ pagenr = from >> PAGE_SHIFT; -+ offset = from - (pagenr << PAGE_SHIFT); -+ -+ pages = (offset+len+PAGE_SIZE-1) >> PAGE_SHIFT; -+ DEBUG(3, "blkmtd: read: pagenr = %d offset = %d, pages = %d\n", -+ pagenr, offset, pages); -+ -+ pagelst = kmalloc(sizeof(struct page *) * pages, GFP_KERNEL); -+ if(!pagelst) -+ return -ENOMEM; -+ pagenrs = kmalloc(sizeof(int) * pages, GFP_KERNEL); -+ if(!pagenrs) { -+ kfree(pagelst); -+ return -ENOMEM; -+ } -+ for(i = 0; i < pages; i++) -+ pagenrs[i] = pagenr+i; -+ -+ err = read_pages(dev, pagenrs, pagelst, pages); -+ if(err) -+ goto readerr; -+ -+ pagenr = 0; -+ while(pages) { -+ struct page *page; -+ int cpylen; -+ -+ DEBUG(3, "blkmtd: read: looking for page: %d\n", pagenr); -+ page = pagelst[pagenr]; -+ -+ cpylen = (PAGE_SIZE > len) ? len : PAGE_SIZE; -+ if(offset+cpylen > PAGE_SIZE) -+ cpylen = PAGE_SIZE-offset; -+ -+ memcpy(buf + *retlen, page_address(page) + offset, cpylen); -+ offset = 0; -+ len -= cpylen; -+ *retlen += cpylen; -+ pagenr++; -+ pages--; -+ unlock_page(page); -+ if(!PageDirty(page)) -+ page_cache_release(page); -+ } -+ -+ readerr: -+ kfree(pagelst); -+ kfree(pagenrs); -+ DEBUG(2, "blkmtd: end read: retlen = %zd, err = %d\n", *retlen, err); -+ return err; -+} -+ -+ -+/* write data to the underlying device */ -+static int blkmtd_write(struct mtd_info *mtd, loff_t to, size_t len, -+ size_t *retlen, const u_char *buf) -+{ -+ struct blkmtd_dev *dev = mtd->priv; -+ int err; -+ -+ *retlen = 0; -+ if(!len) -+ return 0; -+ -+ DEBUG(2, "blkmtd: write: dev = `%s' to = %lld len = %zd buf = %p\n", -+ bdevname(dev->binding->bd_dev), to, len, buf); -+ -+ /* handle readonly and out of range numbers */ -+ -+ if(!dev->wr_buf) { -+ err("error: trying to write to a readonly device %s", mtd->name); -+ return -EROFS; -+ } -+ -+ if(to >= mtd->size) { -+ return -ENOSPC; -+ } -+ -+ if(to + len > mtd->size) { -+ len = (mtd->size - to); -+ } -+ -+ err = write_pages(dev, buf, to, len, retlen); -+ if(err < 0) -+ *retlen = 0; -+ else -+ err = 0; -+ DEBUG(2, "blkmtd: write: end, err = %d\n", err); -+ return err; -+} -+ -+ -+/* sync the device - wait until the write queue is empty */ -+static void blkmtd_sync(struct mtd_info *mtd) -+{ -+ struct blkmtd_dev *dev = mtd->priv; -+ struct kiobuf *iobuf = dev->wr_buf; -+ -+ DEBUG(2, "blkmtd: sync: called\n"); -+ if(iobuf == NULL) -+ return; -+ -+ DEBUG(3, "blkmtd: kiovec: length = %d nr_pages = %d\n", -+ iobuf->length, iobuf->nr_pages); -+ down(&dev->wrbuf_mutex); -+ if(iobuf->nr_pages) -+ commit_pages(dev); -+ up(&dev->wrbuf_mutex); -+} -+ -+ -+#ifdef BLKMTD_PROC_DEBUG -+/* procfs stuff */ -+static int blkmtd_proc_read(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ struct list_head *temp1, *temp2; -+ -+ MOD_INC_USE_COUNT; -+ -+ /* Count the size of the page lists */ -+ -+ len = sprintf(page, "dev\twr_idx\tmax_idx\tnrpages\tclean\tdirty\tlocked\tlru\n"); -+ list_for_each_safe(temp1, temp2, &blkmtd_device_list) { -+ struct blkmtd_dev *dev = list_entry(temp1, struct blkmtd_dev, -+ list); -+ struct list_head *temp; -+ struct page *pagei; -+ -+ int clean = 0, dirty = 0, locked = 0, lru = 0; -+ /* Count the size of the page lists */ -+ list_for_each(temp, &dev->binding->bd_inode->i_mapping->clean_pages) { -+ pagei = list_entry(temp, struct page, list); -+ clean++; -+ if(PageLocked(pagei)) -+ locked++; -+ if(PageDirty(pagei)) -+ dirty++; -+ if(PageLRU(pagei)) -+ lru++; -+ } -+ list_for_each(temp, &dev->binding->bd_inode->i_mapping->dirty_pages) { -+ pagei = list_entry(temp, struct page, list); -+ if(PageLocked(pagei)) -+ locked++; -+ if(PageDirty(pagei)) -+ dirty++; -+ if(PageLRU(pagei)) -+ lru++; -+ } -+ list_for_each(temp, &dev->binding->bd_inode->i_mapping->locked_pages) { -+ pagei = list_entry(temp, struct page, list); -+ if(PageLocked(pagei)) -+ locked++; -+ if(PageDirty(pagei)) -+ dirty++; -+ if(PageLRU(pagei)) -+ lru++; -+ } -+ -+ len += sprintf(page+len, "mtd%d:\t%ld\t%d\t%ld\t%d\t%d\t%d\t%d\n", -+ dev->mtd_info.index, -+ (dev->wr_buf && dev->wr_buf->nr_pages) ? -+ dev->wr_buf->blocks[dev->wr_buf->nr_pages-1] : 0, -+ (dev->wr_buf) ? dev->wr_buf->nr_pages : 0, -+ dev->binding->bd_inode->i_mapping->nrpages, -+ clean, dirty, locked, lru); -+ } -+ -+ if(len <= count) -+ *eof = 1; -+ -+ MOD_DEC_USE_COUNT; -+ return len; -+} -+#endif -+ -+ -+static void free_device(struct blkmtd_dev *dev) -+{ -+ DEBUG(2, "blkmtd: free_device() dev = %p\n", dev); -+ if(dev) { -+ del_mtd_device(&dev->mtd_info); -+ info("mtd%d: [%s] removed", dev->mtd_info.index, -+ dev->mtd_info.name + strlen("blkmtd: ")); -+ if(dev->mtd_info.eraseregions) -+ kfree(dev->mtd_info.eraseregions); -+ if(dev->mtd_info.name) -+ kfree(dev->mtd_info.name); -+ -+ if(dev->rd_buf) { -+ dev->rd_buf->locked = 0; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,4) -+ if(dev->rd_buf->blocks) -+ kfree(dev->rd_buf->blocks); -+#endif -+ free_kiovec(1, &dev->rd_buf); -+ } -+ if(dev->wr_buf) { -+ dev->wr_buf->locked = 0; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,4) -+ if(dev->wr_buf->blocks) -+ kfree(dev->rw_buf->blocks); -+#endif -+ free_kiovec(1, &dev->wr_buf); -+ } -+ -+ if(dev->binding) { -+ kdev_t kdev = to_kdev_t(dev->binding->bd_dev); -+ invalidate_inode_pages(dev->binding->bd_inode); -+ set_blocksize(kdev, 1 << 10); -+ blkdev_put(dev->binding, BDEV_RAW); -+ } -+ kfree(dev); -+ } -+} -+ -+ -+/* For a given size and initial erase size, calculate the number -+ * and size of each erase region. Goes round the loop twice, -+ * once to find out how many regions, then allocates space, -+ * then round the loop again to fill it in. -+ */ -+static struct mtd_erase_region_info *calc_erase_regions( -+ size_t erase_size, size_t total_size, int *regions) -+{ -+ struct mtd_erase_region_info *info = NULL; -+ -+ DEBUG(2, "calc_erase_regions, es = %zd size = %zd regions = %d\n", -+ erase_size, total_size, *regions); -+ /* Make any user specified erasesize be a power of 2 -+ and at least PAGE_SIZE */ -+ if(erase_size) { -+ int es = erase_size; -+ erase_size = 1; -+ while(es != 1) { -+ es >>= 1; -+ erase_size <<= 1; -+ } -+ if(erase_size < PAGE_SIZE) -+ erase_size = PAGE_SIZE; -+ } else { -+ erase_size = CONFIG_MTD_BLKDEV_ERASESIZE; -+ } -+ -+ *regions = 0; -+ -+ do { -+ int tot_size = total_size; -+ int er_size = erase_size; -+ int count = 0, offset = 0, regcnt = 0; -+ -+ while(tot_size) { -+ count = tot_size / er_size; -+ if(count) { -+ tot_size = tot_size % er_size; -+ if(info) { -+ DEBUG(2, "adding to erase info off=%d er=%d cnt=%d\n", -+ offset, er_size, count); -+ (info+regcnt)->offset = offset; -+ (info+regcnt)->erasesize = er_size; -+ (info+regcnt)->numblocks = count; -+ (*regions)++; -+ } -+ regcnt++; -+ offset += (count * er_size); -+ } -+ while(er_size > tot_size) -+ er_size >>= 1; -+ } -+ if(info == NULL) { -+ info = kmalloc(regcnt * sizeof(struct mtd_erase_region_info), GFP_KERNEL); -+ if(!info) -+ break; -+ } -+ } while(!(*regions)); -+ DEBUG(2, "calc_erase_regions done, es = %zd size = %zd regions = %d\n", -+ erase_size, total_size, *regions); -+ return info; -+} -+ -+ -+extern kdev_t name_to_kdev_t(char *line) __init; -+ -+ -+static struct blkmtd_dev *add_device(char *devname, int readonly, int erase_size) -+{ -+ int maj, min; -+ kdev_t kdev; -+ int mode; -+ struct blkmtd_dev *dev; -+ -+#ifdef MODULE -+ struct file *file = NULL; -+ struct inode *inode; -+#endif -+ -+ if(!devname) -+ return NULL; -+ -+ /* Get a handle on the device */ -+ mode = (readonly) ? O_RDONLY : O_RDWR; -+ -+#ifdef MODULE -+ -+ file = filp_open(devname, mode, 0); -+ if(IS_ERR(file)) { -+ err("error: cant open device %s", devname); -+ DEBUG(2, "blkmtd: filp_open returned %ld\n", PTR_ERR(file)); -+ return NULL; -+ } -+ -+ /* determine is this is a block device and -+ * if so get its major and minor numbers -+ */ -+ inode = file->f_dentry->d_inode; -+ if(!S_ISBLK(inode->i_mode)) { -+ err("%s not a block device", devname); -+ filp_close(file, NULL); -+ return NULL; -+ } -+ kdev = inode->i_rdev; -+ filp_close(file, NULL); -+#else -+ kdev = name_to_kdev_t(devname); -+#endif /* MODULE */ -+ -+ if(!kdev) { -+ err("bad block device: `%s'", devname); -+ return NULL; -+ } -+ -+ maj = MAJOR(kdev); -+ min = MINOR(kdev); -+ DEBUG(1, "blkmtd: found a block device major = %d, minor = %d\n", -+ maj, min); -+ -+ if(maj == MTD_BLOCK_MAJOR) { -+ err("attempting to use an MTD device as a block device"); -+ return NULL; -+ } -+ -+ DEBUG(1, "blkmtd: devname = %s\n", bdevname(kdev)); -+ -+ dev = kmalloc(sizeof(struct blkmtd_dev), GFP_KERNEL); -+ if(dev == NULL) -+ return NULL; -+ -+ memset(dev, 0, sizeof(struct blkmtd_dev)); -+ if(alloc_kiovec(1, &dev->rd_buf)) { -+ err("cant allocate read iobuf"); -+ goto devinit_err; -+ } -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,4) -+ dev->rd_buf->blocks = kmalloc(KIO_MAX_SECTORS * sizeof(unsigned long), GFP_KERNEL); -+ if(dev->rd_buf->blocks == NULL) { -+ crit("cant allocate rd_buf blocks"); -+ goto devinit_err; -+ } -+#endif -+ -+ if(!readonly) { -+ if(alloc_kiovec(1, &dev->wr_buf)) { -+ err("cant allocate kiobuf - readonly enabled"); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,4) -+ } else { -+ dev->wr_buf->blocks = kmalloc(KIO_MAX_SECTORS * sizeof(unsigned long), GFP_KERNEL); -+ if(dev->wr_buf->blocks == NULL) { -+ crit("cant allocate wr_buf blocks - readonly enabled"); -+ free_kiovec(1, &iobuf); -+ } -+#endif -+ } -+ if(dev->wr_buf) -+ init_MUTEX(&dev->wrbuf_mutex); -+ } -+ -+ /* get the block device */ -+ dev->binding = bdget(kdev_t_to_nr(MKDEV(maj, min))); -+ if(blkdev_get(dev->binding, mode, 0, BDEV_RAW)) -+ goto devinit_err; -+ -+ if(set_blocksize(kdev, PAGE_SIZE)) { -+ err("cant set block size to PAGE_SIZE on %s", bdevname(kdev)); -+ goto devinit_err; -+ } -+ -+ dev->mtd_info.size = dev->binding->bd_inode->i_size & PAGE_MASK; -+ -+ /* Setup the MTD structure */ -+ /* make the name contain the block device in */ -+ dev->mtd_info.name = kmalloc(sizeof("blkmtd: ") + strlen(devname), GFP_KERNEL); -+ if(dev->mtd_info.name == NULL) -+ goto devinit_err; -+ -+ sprintf(dev->mtd_info.name, "blkmtd: %s", devname); -+ dev->mtd_info.eraseregions = calc_erase_regions(erase_size, dev->mtd_info.size, -+ &dev->mtd_info.numeraseregions); -+ if(dev->mtd_info.eraseregions == NULL) -+ goto devinit_err; -+ -+ dev->mtd_info.erasesize = dev->mtd_info.eraseregions->erasesize; -+ DEBUG(1, "blkmtd: init: found %d erase regions\n", -+ dev->mtd_info.numeraseregions); -+ -+ if(readonly) { -+ dev->mtd_info.type = MTD_ROM; -+ dev->mtd_info.flags = MTD_CAP_ROM; -+ } else { -+ dev->mtd_info.type = MTD_RAM; -+ dev->mtd_info.flags = MTD_CAP_RAM; -+ } -+ dev->mtd_info.erase = blkmtd_erase; -+ dev->mtd_info.read = blkmtd_read; -+ dev->mtd_info.write = blkmtd_write; -+ dev->mtd_info.sync = blkmtd_sync; -+ dev->mtd_info.point = 0; -+ dev->mtd_info.unpoint = 0; -+ dev->mtd_info.priv = dev; -+ dev->mtd_info.owner = THIS_MODULE; -+ -+ list_add(&dev->list, &blkmtd_device_list); -+ if (add_mtd_device(&dev->mtd_info)) { -+ /* Device didnt get added, so free the entry */ -+ list_del(&dev->list); -+ free_device(dev); -+ return NULL; -+ } else { -+ info("mtd%d: [%s] erase_size = %dKiB %s", -+ dev->mtd_info.index, dev->mtd_info.name + strlen("blkmtd: "), -+ dev->mtd_info.erasesize >> 10, -+ (dev->wr_buf) ? "" : "(read-only)"); -+ } -+ -+ return dev; -+ -+ devinit_err: -+ free_device(dev); -+ return NULL; -+} -+ -+ -+/* Cleanup and exit - sync the device and kill of the kernel thread */ -+static void __devexit cleanup_blkmtd(void) -+{ -+ struct list_head *temp1, *temp2; -+#ifdef BLKMTD_PROC_DEBUG -+ if(blkmtd_proc) { -+ remove_proc_entry("blkmtd_debug", NULL); -+ } -+#endif -+ -+ /* Remove the MTD devices */ -+ list_for_each_safe(temp1, temp2, &blkmtd_device_list) { -+ struct blkmtd_dev *dev = list_entry(temp1, struct blkmtd_dev, -+ list); -+ blkmtd_sync(&dev->mtd_info); -+ free_device(dev); -+ } -+} -+ -+#ifndef MODULE -+ -+/* Handle kernel boot params */ -+ -+ -+static int __init param_blkmtd_device(char *str) -+{ -+ int i; -+ -+ for(i = 0; i < MAX_DEVICES; i++) { -+ device[i] = str; -+ DEBUG(2, "blkmtd: device setup: %d = %s\n", i, device[i]); -+ strsep(&str, ","); -+ } -+ return 1; -+} -+ -+ -+static int __init param_blkmtd_erasesz(char *str) -+{ -+ int i; -+ for(i = 0; i < MAX_DEVICES; i++) { -+ char *val = strsep(&str, ","); -+ if(val) -+ erasesz[i] = simple_strtoul(val, NULL, 0); -+ DEBUG(2, "blkmtd: erasesz setup: %d = %d\n", i, erasesz[i]); -+ } -+ -+ return 1; -+} -+ -+ -+static int __init param_blkmtd_ro(char *str) -+{ -+ int i; -+ for(i = 0; i < MAX_DEVICES; i++) { -+ char *val = strsep(&str, ","); -+ if(val) -+ ro[i] = simple_strtoul(val, NULL, 0); -+ DEBUG(2, "blkmtd: ro setup: %d = %d\n", i, ro[i]); -+ } -+ -+ return 1; -+} -+ -+ -+static int __init param_blkmtd_sync(char *str) -+{ -+ if(str[0] == '1') -+ sync = 1; -+ return 1; -+} -+ -+__setup("blkmtd_device=", param_blkmtd_device); -+__setup("blkmtd_erasesz=", param_blkmtd_erasesz); -+__setup("blkmtd_ro=", param_blkmtd_ro); -+__setup("blkmtd_sync=", param_blkmtd_sync); -+ -+#endif -+ -+ -+/* Startup */ -+static int __init init_blkmtd(void) -+{ -+ int i; -+ -+ /* Check args - device[0] is the bare minimum*/ -+ if(!device[0]) { -+ err("error: missing `device' name\n"); -+ return -EINVAL; -+ } -+ -+ for(i = 0; i < MAX_DEVICES; i++) -+ add_device(device[i], ro[i], erasesz[i] << 10); -+ -+ if(list_empty(&blkmtd_device_list)) -+ goto init_err; -+ -+ info("version " VERSION); -+ -+#ifdef BLKMTD_PROC_DEBUG -+ /* create proc entry */ -+ DEBUG(2, "Creating /proc/blkmtd_debug\n"); -+ blkmtd_proc = create_proc_read_entry("blkmtd_debug", 0444, -+ NULL, blkmtd_proc_read, NULL); -+ if(blkmtd_proc == NULL) { -+ err("Cant create /proc/blkmtd_debug"); -+ } else { -+ blkmtd_proc->owner = THIS_MODULE; -+ } -+#endif -+ -+ if(!list_empty(&blkmtd_device_list)) -+ /* Everything is ok if we got here */ -+ return 0; -+ -+ init_err: -+ return -EINVAL; -+} -+ -+module_init(init_blkmtd); -+module_exit(cleanup_blkmtd); -Index: linux-2.6.5/drivers/mtd/devices/blkmtd.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/blkmtd.c 2005-02-01 16:55:31.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/blkmtd.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: blkmtd-25.c,v 1.5 2003/07/16 06:48:27 spse Exp $ -+ * $Id: blkmtd.c,v 1.23 2004/08/09 14:03:19 dwmw2 Exp $ - * - * blkmtd.c - use a block device as a fake MTD - * -@@ -39,7 +39,7 @@ - - /* Default erase size in K, always make it a multiple of PAGE_SIZE */ - #define CONFIG_MTD_BLKDEV_ERASESIZE (128 << 10) /* 128KiB */ --#define VERSION "$Revision: 1.5 $" -+#define VERSION "$Revision: 1.23 $" - - /* Info for the block device */ - struct blkmtd_dev { -@@ -246,7 +246,7 @@ - pagenr = to >> PAGE_SHIFT; - offset = to & ~PAGE_MASK; - -- DEBUG(2, "blkmtd: write_pages: buf = %p to = %ld len = %d pagenr = %d offset = %d\n", -+ DEBUG(2, "blkmtd: write_pages: buf = %p to = %ld len = %zd pagenr = %d offset = %d\n", - buf, (long)to, len, pagenr, offset); - - /* see if we have to do a partial write at the start */ -@@ -270,21 +270,21 @@ - - down(&dev->wrbuf_mutex); - -- DEBUG(3, "blkmtd: write: start_len = %d len = %d end_len = %d pagecnt = %d\n", -+ DEBUG(3, "blkmtd: write: start_len = %zd len = %zd end_len = %zd pagecnt = %d\n", - start_len, len, end_len, pagecnt); - - if(start_len) { - /* do partial start region */ - struct page *page; - -- DEBUG(3, "blkmtd: write: doing partial start, page = %d len = %d offset = %d\n", -+ DEBUG(3, "blkmtd: write: doing partial start, page = %d len = %zd offset = %d\n", - pagenr, start_len, offset); - - BUG_ON(!buf); - page = read_cache_page(dev->blkdev->bd_inode->i_mapping, pagenr, (filler_t *)blkmtd_readpage, dev); - lock_page(page); - if(PageDirty(page)) { -- err("to = %lld start_len = %d len = %d end_len = %d pagenr = %d\n", -+ err("to = %lld start_len = %zd len = %zd end_len = %zd pagenr = %d\n", - to, start_len, len, end_len, pagenr); - BUG(); - } -@@ -346,13 +346,13 @@ - if(end_len) { - /* do the third region */ - struct page *page; -- DEBUG(3, "blkmtd: write: doing partial end, page = %d len = %d\n", -+ DEBUG(3, "blkmtd: write: doing partial end, page = %d len = %zd\n", - pagenr, end_len); - BUG_ON(!buf); - page = read_cache_page(dev->blkdev->bd_inode->i_mapping, pagenr, (filler_t *)blkmtd_readpage, dev); - lock_page(page); - if(PageDirty(page)) { -- err("to = %lld start_len = %d len = %d end_len = %d pagenr = %d\n", -+ err("to = %lld start_len = %zd len = %zd end_len = %zd pagenr = %d\n", - to, start_len, len, end_len, pagenr); - BUG(); - } -@@ -375,7 +375,7 @@ - if(bio) - blkmtd_write_out(bio); - -- DEBUG(2, "blkmtd: write: end, retlen = %d, err = %d\n", *retlen, err); -+ DEBUG(2, "blkmtd: write: end, retlen = %zd, err = %d\n", *retlen, err); - up(&dev->wrbuf_mutex); - - if(retlen) -@@ -393,14 +393,14 @@ - size_t from; - u_long len; - int err = -EIO; -- int retlen; -+ size_t retlen; - - instr->state = MTD_ERASING; - from = instr->addr; - len = instr->len; - - /* check erase region has valid start and length */ -- DEBUG(2, "blkmtd: erase: dev = `%s' from = 0x%x len = 0x%lx\n", -+ DEBUG(2, "blkmtd: erase: dev = `%s' from = 0x%zx len = 0x%lx\n", - mtd->name+9, from, len); - while(numregions) { - DEBUG(3, "blkmtd: checking erase region = 0x%08X size = 0x%X num = 0x%x\n", -@@ -417,14 +417,14 @@ - - if(!numregions) { - /* Not a valid erase block */ -- err("erase: invalid erase request 0x%lX @ 0x%08X", len, from); -+ err("erase: invalid erase request 0x%lX @ 0x%08zX", len, from); - instr->state = MTD_ERASE_FAILED; - err = -EIO; - } - - if(instr->state != MTD_ERASE_FAILED) { - /* do the erase */ -- DEBUG(3, "Doing erase from = %d len = %ld\n", from, len); -+ DEBUG(3, "Doing erase from = %zd len = %ld\n", from, len); - err = write_pages(dev, NULL, from, len, &retlen); - if(err || retlen != len) { - err("erase failed err = %d", err); -@@ -435,9 +435,7 @@ - } - - DEBUG(3, "blkmtd: erase: checking callback\n"); -- if (instr->callback) { -- (*(instr->callback))(instr); -- } -+ mtd_erase_callback(instr); - DEBUG(2, "blkmtd: erase: finished (err = %d)\n", err); - return err; - } -@@ -453,8 +451,8 @@ - int pagenr, pages; - size_t thislen = 0; - -- DEBUG(2, "blkmtd: read: dev = `%s' from = %ld len = %d buf = %p\n", -- mtd->name+9, (long int)from, len, buf); -+ DEBUG(2, "blkmtd: read: dev = `%s' from = %lld len = %zd buf = %p\n", -+ mtd->name+9, from, len, buf); - - if(from > mtd->size) - return -EINVAL; -@@ -496,7 +494,7 @@ - readerr: - if(retlen) - *retlen = thislen; -- DEBUG(2, "blkmtd: end read: retlen = %d, err = %d\n", thislen, err); -+ DEBUG(2, "blkmtd: end read: retlen = %zd, err = %d\n", thislen, err); - return err; - } - -@@ -511,8 +509,8 @@ - if(!len) - return 0; - -- DEBUG(2, "blkmtd: write: dev = `%s' to = %ld len = %d buf = %p\n", -- mtd->name+9, (long int)to, len, buf); -+ DEBUG(2, "blkmtd: write: dev = `%s' to = %lld len = %zd buf = %p\n", -+ mtd->name+9, to, len, buf); - - if(to >= mtd->size) { - return -ENOSPC; -@@ -565,7 +563,7 @@ - { - struct mtd_erase_region_info *info = NULL; - -- DEBUG(2, "calc_erase_regions, es = %d size = %d regions = %d\n", -+ DEBUG(2, "calc_erase_regions, es = %zd size = %zd regions = %d\n", - erase_size, total_size, *regions); - /* Make any user specified erasesize be a power of 2 - and at least PAGE_SIZE */ -@@ -613,7 +611,7 @@ - break; - } - } while(!(*regions)); -- DEBUG(2, "calc_erase_regions done, es = %d size = %d regions = %d\n", -+ DEBUG(2, "calc_erase_regions done, es = %zd size = %zd regions = %d\n", - erase_size, total_size, *regions); - return info; - } -Index: linux-2.6.5/drivers/mtd/devices/doc2000.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/doc2000.c 2004-04-03 22:36:12.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/doc2000.c 2005-02-01 17:11:17.000000000 -0500 -@@ -4,7 +4,7 @@ - * (c) 1999 Machine Vision Holdings, Inc. - * (c) 1999, 2000 David Woodhouse <dwmw2@infradead.org> - * -- * $Id: doc2000.c,v 1.53 2003/06/11 09:45:19 dwmw2 Exp $ -+ * $Id: doc2000.c,v 1.63 2004/09/16 23:51:56 gleixner Exp $ - */ - - #include <linux/kernel.h> -@@ -19,12 +19,14 @@ - #include <linux/sched.h> - #include <linux/init.h> - #include <linux/types.h> -+#include <linux/bitops.h> - - #include <linux/mtd/mtd.h> - #include <linux/mtd/nand.h> - #include <linux/mtd/doc2000.h> - - #define DOC_SUPPORT_2000 -+#define DOC_SUPPORT_2000TSOP - #define DOC_SUPPORT_MILLENNIUM - - #ifdef DOC_SUPPORT_2000 -@@ -33,7 +35,7 @@ - #define DoC_is_2000(doc) (0) - #endif - --#ifdef DOC_SUPPORT_MILLENNIUM -+#if defined(DOC_SUPPORT_2000TSOP) || defined(DOC_SUPPORT_MILLENNIUM) - #define DoC_is_Millennium(doc) (doc->ChipID == DOC_ChipID_DocMil) - #else - #define DoC_is_Millennium(doc) (0) -@@ -53,9 +55,12 @@ - static int doc_write(struct mtd_info *mtd, loff_t to, size_t len, - size_t *retlen, const u_char *buf); - static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len, -- size_t *retlen, u_char *buf, u_char *eccbuf, int oobsel); -+ size_t *retlen, u_char *buf, u_char *eccbuf, struct nand_oobinfo *oobsel); - static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len, -- size_t *retlen, const u_char *buf, u_char *eccbuf, int oobsel); -+ size_t *retlen, const u_char *buf, u_char *eccbuf, struct nand_oobinfo *oobsel); -+static int doc_writev_ecc(struct mtd_info *mtd, const struct kvec *vecs, -+ unsigned long count, loff_t to, size_t *retlen, -+ u_char *eccbuf, struct nand_oobinfo *oobsel); - static int doc_read_oob(struct mtd_info *mtd, loff_t ofs, size_t len, - size_t *retlen, u_char *buf); - static int doc_write_oob(struct mtd_info *mtd, loff_t ofs, size_t len, -@@ -84,7 +89,7 @@ - /* DOC_WaitReady: Wait for RDY line to be asserted by the flash chip */ - static int _DoC_WaitReady(struct DiskOnChip *doc) - { -- unsigned long docptr = doc->virtadr; -+ void __iomem *docptr = doc->virtadr; - unsigned long timeo = jiffies + (HZ * 10); - - DEBUG(MTD_DEBUG_LEVEL3, -@@ -92,6 +97,10 @@ - - /* Out-of-line routine to wait for chip response */ - while (!(ReadDOC(docptr, CDSNControl) & CDSN_CTRL_FR_B)) { -+ /* issue 2 read from NOP register after reading from CDSNControl register -+ see Software Requirement 11.4 item 2. */ -+ DoC_Delay(doc, 2); -+ - if (time_after(jiffies, timeo)) { - DEBUG(MTD_DEBUG_LEVEL2, "_DoC_WaitReady timed out.\n"); - return -EIO; -@@ -105,7 +114,8 @@ - - static inline int DoC_WaitReady(struct DiskOnChip *doc) - { -- unsigned long docptr = doc->virtadr; -+ void __iomem *docptr = doc->virtadr; -+ - /* This is inline, to optimise the common case, where it's ready instantly */ - int ret = 0; - -@@ -131,7 +141,7 @@ - static inline int DoC_Command(struct DiskOnChip *doc, unsigned char command, - unsigned char xtraflags) - { -- unsigned long docptr = doc->virtadr; -+ void __iomem *docptr = doc->virtadr; - - if (DoC_is_2000(doc)) - xtraflags |= CDSN_CTRL_FLASH_IO; -@@ -145,6 +155,8 @@ - - /* Send the command */ - WriteDOC_(command, docptr, doc->ioreg); -+ if (DoC_is_Millennium(doc)) -+ WriteDOC(command, docptr, WritePipeTerm); - - /* Lower the CLE line */ - WriteDOC(xtraflags | CDSN_CTRL_CE, docptr, CDSNControl); -@@ -161,10 +173,8 @@ - static int DoC_Address(struct DiskOnChip *doc, int numbytes, unsigned long ofs, - unsigned char xtraflags1, unsigned char xtraflags2) - { -- unsigned long docptr; - int i; -- -- docptr = doc->virtadr; -+ void __iomem *docptr = doc->virtadr; - - if (DoC_is_2000(doc)) - xtraflags1 |= CDSN_CTRL_FLASH_IO; -@@ -206,6 +216,9 @@ - } - } - -+ if (DoC_is_Millennium(doc)) -+ WriteDOC(ofs & 0xff, docptr, WritePipeTerm); -+ - DoC_Delay(doc, 2); /* Needed for some slow flash chips. mf. */ - - /* FIXME: The SlowIO's for millennium could be replaced by -@@ -226,11 +239,9 @@ - { - volatile int dummy; - int modulus = 0xffff; -- unsigned long docptr; -+ void __iomem *docptr = doc->virtadr; - int i; - -- docptr = doc->virtadr; -- - if (len <= 0) - return; - -@@ -257,11 +268,9 @@ - /* Write a buffer to DoC, taking care of Millennium odditys */ - static void DoC_WriteBuf(struct DiskOnChip *doc, const u_char * buf, int len) - { -- unsigned long docptr; -+ void __iomem *docptr = doc->virtadr; - int i; - -- docptr = doc->virtadr; -- - if (len <= 0) - return; - -@@ -278,7 +287,7 @@ - - static inline int DoC_SelectChip(struct DiskOnChip *doc, int chip) - { -- unsigned long docptr = doc->virtadr; -+ void __iomem *docptr = doc->virtadr; - - /* Software requirement 11.4.4 before writing DeviceSelect */ - /* Deassert the CE line to eliminate glitches on the FCE# outputs */ -@@ -302,7 +311,7 @@ - - static inline int DoC_SelectFloor(struct DiskOnChip *doc, int floor) - { -- unsigned long docptr = doc->virtadr; -+ void __iomem *docptr = doc->virtadr; - - /* Select the floor (bank) of chips required */ - WriteDOC(floor, docptr, FloorSelect); -@@ -344,15 +353,25 @@ - - /* Read the manufacturer and device id codes from the device */ - -- /* CDSN Slow IO register see Software Requirement 11.4 item 5. */ -- dummy = ReadDOC(doc->virtadr, CDSNSlowIO); -- DoC_Delay(doc, 2); -- mfr = ReadDOC_(doc->virtadr, doc->ioreg); -- -- /* CDSN Slow IO register see Software Requirement 11.4 item 5. */ -- dummy = ReadDOC(doc->virtadr, CDSNSlowIO); -- DoC_Delay(doc, 2); -- id = ReadDOC_(doc->virtadr, doc->ioreg); -+ if (DoC_is_Millennium(doc)) { -+ DoC_Delay(doc, 2); -+ dummy = ReadDOC(doc->virtadr, ReadPipeInit); -+ mfr = ReadDOC(doc->virtadr, LastDataRead); -+ -+ DoC_Delay(doc, 2); -+ dummy = ReadDOC(doc->virtadr, ReadPipeInit); -+ id = ReadDOC(doc->virtadr, LastDataRead); -+ } else { -+ /* CDSN Slow IO register see Software Req 11.4 item 5. */ -+ dummy = ReadDOC(doc->virtadr, CDSNSlowIO); -+ DoC_Delay(doc, 2); -+ mfr = ReadDOC_(doc->virtadr, doc->ioreg); -+ -+ /* CDSN Slow IO register see Software Req 11.4 item 5. */ -+ dummy = ReadDOC(doc->virtadr, CDSNSlowIO); -+ DoC_Delay(doc, 2); -+ id = ReadDOC_(doc->virtadr, doc->ioreg); -+ } - - /* No response - return failure */ - if (mfr == 0xff || mfr == 0) -@@ -386,11 +405,10 @@ - if (!doc->mfr) { - doc->mfr = mfr; - doc->id = id; -- doc->chipshift = -- nand_flash_ids[i].chipshift; -- doc->page256 = nand_flash_ids[i].page256; -- doc->pageadrlen = -- nand_flash_ids[i].chipshift > 25 ? 3 : 2; -+ doc->chipshift = -+ ffs((nand_flash_ids[i].chipsize << 20)) - 1; -+ doc->page256 = (nand_flash_ids[i].pagesize == 256) ? 1 : 0; -+ doc->pageadrlen = doc->chipshift > 25 ? 3 : 2; - doc->erasesize = - nand_flash_ids[i].erasesize; - return 1; -@@ -410,20 +428,16 @@ - - /* DoC_ScanChips: Find all NAND chips present in a DiskOnChip, and identify them */ - --static void DoC_ScanChips(struct DiskOnChip *this) -+static void DoC_ScanChips(struct DiskOnChip *this, int maxchips) - { - int floor, chip; - int numchips[MAX_FLOORS]; -- int maxchips = MAX_CHIPS; - int ret = 1; - - this->numchips = 0; - this->mfr = 0; - this->id = 0; - -- if (DoC_is_Millennium(this)) -- maxchips = MAX_CHIPS_MIL; -- - /* For each floor, find the number of valid chips it contains */ - for (floor = 0; floor < MAX_FLOORS; floor++) { - ret = 1; -@@ -515,6 +529,7 @@ - { - struct DiskOnChip *this = (struct DiskOnChip *) mtd->priv; - struct DiskOnChip *old = NULL; -+ int maxchips; - - /* We must avoid being called twice for the same device. */ - -@@ -538,14 +553,28 @@ - - - switch (this->ChipID) { -+ case DOC_ChipID_Doc2kTSOP: -+ mtd->name = "DiskOnChip 2000 TSOP"; -+ this->ioreg = DoC_Mil_CDSN_IO; -+ /* Pretend it's a Millennium */ -+ this->ChipID = DOC_ChipID_DocMil; -+ maxchips = MAX_CHIPS; -+ break; - case DOC_ChipID_Doc2k: - mtd->name = "DiskOnChip 2000"; - this->ioreg = DoC_2k_CDSN_IO; -+ maxchips = MAX_CHIPS; - break; - case DOC_ChipID_DocMil: - mtd->name = "DiskOnChip Millennium"; - this->ioreg = DoC_Mil_CDSN_IO; -+ maxchips = MAX_CHIPS_MIL; - break; -+ default: -+ printk("Unknown ChipID 0x%02x\n", this->ChipID); -+ kfree(mtd); -+ iounmap((void *) this->virtadr); -+ return; - } - - printk(KERN_NOTICE "%s found at address 0x%lX\n", mtd->name, -@@ -566,6 +595,7 @@ - mtd->write = doc_write; - mtd->read_ecc = doc_read_ecc; - mtd->write_ecc = doc_write_ecc; -+ mtd->writev_ecc = doc_writev_ecc; - mtd->read_oob = doc_read_oob; - mtd->write_oob = doc_write_oob; - mtd->sync = NULL; -@@ -578,7 +608,7 @@ - init_MUTEX(&this->lock); - - /* Ident all the chips present. */ -- DoC_ScanChips(this); -+ DoC_ScanChips(this, maxchips); - - if (!this->totlen) { - kfree(mtd); -@@ -597,20 +627,19 @@ - size_t * retlen, u_char * buf) - { - /* Just a special case of doc_read_ecc */ -- return doc_read_ecc(mtd, from, len, retlen, buf, NULL, 0); -+ return doc_read_ecc(mtd, from, len, retlen, buf, NULL, NULL); - } - - static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len, -- size_t * retlen, u_char * buf, u_char * eccbuf, int oobsel) -+ size_t * retlen, u_char * buf, u_char * eccbuf, struct nand_oobinfo *oobsel) - { - struct DiskOnChip *this = (struct DiskOnChip *) mtd->priv; -- unsigned long docptr; -+ void __iomem *docptr = this->virtadr; - struct Nand *mychip; - unsigned char syndrome[6]; - volatile char dummy; - int i, len256 = 0, ret=0; -- -- docptr = this->virtadr; -+ size_t left = len; - - /* Don't allow read past end of device */ - if (from >= this->totlen) -@@ -618,122 +647,131 @@ - - down(&this->lock); - -- /* Don't allow a single read to cross a 512-byte block boundary */ -- if (from + len > ((from | 0x1ff) + 1)) -- len = ((from | 0x1ff) + 1) - from; -- -- /* The ECC will not be calculated correctly if less than 512 is read */ -- if (len != 0x200 && eccbuf) -- printk(KERN_WARNING -- "ECC needs a full sector read (adr: %lx size %lx)\n", -- (long) from, (long) len); -+ *retlen = 0; -+ while (left) { -+ len = left; -+ -+ /* Don't allow a single read to cross a 512-byte block boundary */ -+ if (from + len > ((from | 0x1ff) + 1)) -+ len = ((from | 0x1ff) + 1) - from; - -- /* printk("DoC_Read (adr: %lx size %lx)\n", (long) from, (long) len); */ -+ /* The ECC will not be calculated correctly if less than 512 is read */ -+ if (len != 0x200 && eccbuf) -+ printk(KERN_WARNING -+ "ECC needs a full sector read (adr: %lx size %lx)\n", -+ (long) from, (long) len); - -+ /* printk("DoC_Read (adr: %lx size %lx)\n", (long) from, (long) len); */ - -- /* Find the chip which is to be used and select it */ -- mychip = &this->chips[from >> (this->chipshift)]; - -- if (this->curfloor != mychip->floor) { -- DoC_SelectFloor(this, mychip->floor); -- DoC_SelectChip(this, mychip->chip); -- } else if (this->curchip != mychip->chip) { -- DoC_SelectChip(this, mychip->chip); -- } -+ /* Find the chip which is to be used and select it */ -+ mychip = &this->chips[from >> (this->chipshift)]; - -- this->curfloor = mychip->floor; -- this->curchip = mychip->chip; -+ if (this->curfloor != mychip->floor) { -+ DoC_SelectFloor(this, mychip->floor); -+ DoC_SelectChip(this, mychip->chip); -+ } else if (this->curchip != mychip->chip) { -+ DoC_SelectChip(this, mychip->chip); -+ } - -- DoC_Command(this, -- (!this->page256 -- && (from & 0x100)) ? NAND_CMD_READ1 : NAND_CMD_READ0, -- CDSN_CTRL_WP); -- DoC_Address(this, ADDR_COLUMN_PAGE, from, CDSN_CTRL_WP, -- CDSN_CTRL_ECC_IO); -- -- if (eccbuf) { -- /* Prime the ECC engine */ -- WriteDOC(DOC_ECC_RESET, docptr, ECCConf); -- WriteDOC(DOC_ECC_EN, docptr, ECCConf); -- } else { -- /* disable the ECC engine */ -- WriteDOC(DOC_ECC_RESET, docptr, ECCConf); -- WriteDOC(DOC_ECC_DIS, docptr, ECCConf); -- } -+ this->curfloor = mychip->floor; -+ this->curchip = mychip->chip; - -- /* treat crossing 256-byte sector for 2M x 8bits devices */ -- if (this->page256 && from + len > (from | 0xff) + 1) { -- len256 = (from | 0xff) + 1 - from; -- DoC_ReadBuf(this, buf, len256); -+ DoC_Command(this, -+ (!this->page256 -+ && (from & 0x100)) ? NAND_CMD_READ1 : NAND_CMD_READ0, -+ CDSN_CTRL_WP); -+ DoC_Address(this, ADDR_COLUMN_PAGE, from, CDSN_CTRL_WP, -+ CDSN_CTRL_ECC_IO); - -- DoC_Command(this, NAND_CMD_READ0, CDSN_CTRL_WP); -- DoC_Address(this, ADDR_COLUMN_PAGE, from + len256, -- CDSN_CTRL_WP, CDSN_CTRL_ECC_IO); -- } -+ if (eccbuf) { -+ /* Prime the ECC engine */ -+ WriteDOC(DOC_ECC_RESET, docptr, ECCConf); -+ WriteDOC(DOC_ECC_EN, docptr, ECCConf); -+ } else { -+ /* disable the ECC engine */ -+ WriteDOC(DOC_ECC_RESET, docptr, ECCConf); -+ WriteDOC(DOC_ECC_DIS, docptr, ECCConf); -+ } - -- DoC_ReadBuf(this, &buf[len256], len - len256); -+ /* treat crossing 256-byte sector for 2M x 8bits devices */ -+ if (this->page256 && from + len > (from | 0xff) + 1) { -+ len256 = (from | 0xff) + 1 - from; -+ DoC_ReadBuf(this, buf, len256); -+ -+ DoC_Command(this, NAND_CMD_READ0, CDSN_CTRL_WP); -+ DoC_Address(this, ADDR_COLUMN_PAGE, from + len256, -+ CDSN_CTRL_WP, CDSN_CTRL_ECC_IO); -+ } - -- /* Let the caller know we completed it */ -- *retlen = len; -+ DoC_ReadBuf(this, &buf[len256], len - len256); - -- if (eccbuf) { -- /* Read the ECC data through the DiskOnChip ECC logic */ -- /* Note: this will work even with 2M x 8bit devices as */ -- /* they have 8 bytes of OOB per 256 page. mf. */ -- DoC_ReadBuf(this, eccbuf, 6); -+ /* Let the caller know we completed it */ -+ *retlen += len; - -- /* Flush the pipeline */ -- if (DoC_is_Millennium(this)) { -- dummy = ReadDOC(docptr, ECCConf); -- dummy = ReadDOC(docptr, ECCConf); -- i = ReadDOC(docptr, ECCConf); -- } else { -- dummy = ReadDOC(docptr, 2k_ECCStatus); -- dummy = ReadDOC(docptr, 2k_ECCStatus); -- i = ReadDOC(docptr, 2k_ECCStatus); -- } -+ if (eccbuf) { -+ /* Read the ECC data through the DiskOnChip ECC logic */ -+ /* Note: this will work even with 2M x 8bit devices as */ -+ /* they have 8 bytes of OOB per 256 page. mf. */ -+ DoC_ReadBuf(this, eccbuf, 6); -+ -+ /* Flush the pipeline */ -+ if (DoC_is_Millennium(this)) { -+ dummy = ReadDOC(docptr, ECCConf); -+ dummy = ReadDOC(docptr, ECCConf); -+ i = ReadDOC(docptr, ECCConf); -+ } else { -+ dummy = ReadDOC(docptr, 2k_ECCStatus); -+ dummy = ReadDOC(docptr, 2k_ECCStatus); -+ i = ReadDOC(docptr, 2k_ECCStatus); -+ } - -- /* Check the ECC Status */ -- if (i & 0x80) { -- int nb_errors; -- /* There was an ECC error */ -+ /* Check the ECC Status */ -+ if (i & 0x80) { -+ int nb_errors; -+ /* There was an ECC error */ - #ifdef ECC_DEBUG -- printk(KERN_ERR "DiskOnChip ECC Error: Read at %lx\n", (long)from); -+ printk(KERN_ERR "DiskOnChip ECC Error: Read at %lx\n", (long)from); - #endif -- /* Read the ECC syndrom through the DiskOnChip ECC logic. -- These syndrome will be all ZERO when there is no error */ -- for (i = 0; i < 6; i++) { -- syndrome[i] = -- ReadDOC(docptr, ECCSyndrome0 + i); -- } -- nb_errors = doc_decode_ecc(buf, syndrome); -+ /* Read the ECC syndrom through the DiskOnChip ECC logic. -+ These syndrome will be all ZERO when there is no error */ -+ for (i = 0; i < 6; i++) { -+ syndrome[i] = -+ ReadDOC(docptr, ECCSyndrome0 + i); -+ } -+ nb_errors = doc_decode_ecc(buf, syndrome); - - #ifdef ECC_DEBUG -- printk(KERN_ERR "Errors corrected: %x\n", nb_errors); -+ printk(KERN_ERR "Errors corrected: %x\n", nb_errors); - #endif -- if (nb_errors < 0) { -- /* We return error, but have actually done the read. Not that -- this can be told to user-space, via sys_read(), but at least -- MTD-aware stuff can know about it by checking *retlen */ -- ret = -EIO; -- } -- } -+ if (nb_errors < 0) { -+ /* We return error, but have actually done the read. Not that -+ this can be told to user-space, via sys_read(), but at least -+ MTD-aware stuff can know about it by checking *retlen */ -+ ret = -EIO; -+ } -+ } - - #ifdef PSYCHO_DEBUG -- printk(KERN_DEBUG "ECC DATA at %lxB: %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n", -- (long)from, eccbuf[0], eccbuf[1], eccbuf[2], -- eccbuf[3], eccbuf[4], eccbuf[5]); -+ printk(KERN_DEBUG "ECC DATA at %lxB: %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n", -+ (long)from, eccbuf[0], eccbuf[1], eccbuf[2], -+ eccbuf[3], eccbuf[4], eccbuf[5]); - #endif - -- /* disable the ECC engine */ -- WriteDOC(DOC_ECC_DIS, docptr , ECCConf); -- } -+ /* disable the ECC engine */ -+ WriteDOC(DOC_ECC_DIS, docptr , ECCConf); -+ } - -- /* according to 11.4.1, we need to wait for the busy line -- * drop if we read to the end of the page. */ -- if(0 == ((from + *retlen) & 0x1ff)) -- { -- DoC_WaitReady(this); -+ /* according to 11.4.1, we need to wait for the busy line -+ * drop if we read to the end of the page. */ -+ if(0 == ((from + len) & 0x1ff)) -+ { -+ DoC_WaitReady(this); -+ } -+ -+ from += len; -+ left -= len; -+ buf += len; - } - - up(&this->lock); -@@ -745,21 +783,21 @@ - size_t * retlen, const u_char * buf) - { - char eccbuf[6]; -- return doc_write_ecc(mtd, to, len, retlen, buf, eccbuf, 0); -+ return doc_write_ecc(mtd, to, len, retlen, buf, eccbuf, NULL); - } - - static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len, - size_t * retlen, const u_char * buf, -- u_char * eccbuf, int oobsel) -+ u_char * eccbuf, struct nand_oobinfo *oobsel) - { - struct DiskOnChip *this = (struct DiskOnChip *) mtd->priv; - int di; /* Yes, DI is a hangover from when I was disassembling the binary driver */ -- unsigned long docptr; -+ void __iomem *docptr = this->virtadr; - volatile char dummy; - int len256 = 0; - struct Nand *mychip; -- -- docptr = this->virtadr; -+ size_t left = len; -+ int status; - - /* Don't allow write past end of device */ - if (to >= this->totlen) -@@ -767,65 +805,133 @@ - - down(&this->lock); - -- /* Don't allow a single write to cross a 512-byte block boundary */ -- if (to + len > ((to | 0x1ff) + 1)) -- len = ((to | 0x1ff) + 1) - to; -- -- /* The ECC will not be calculated correctly if less than 512 is written */ -- if (len != 0x200 && eccbuf) -- printk(KERN_WARNING -- "ECC needs a full sector write (adr: %lx size %lx)\n", -- (long) to, (long) len); -+ *retlen = 0; -+ while (left) { -+ len = left; -+ -+ /* Don't allow a single write to cross a 512-byte block boundary */ -+ if (to + len > ((to | 0x1ff) + 1)) -+ len = ((to | 0x1ff) + 1) - to; -+ -+ /* The ECC will not be calculated correctly if less than 512 is written */ -+/* DBB- -+ if (len != 0x200 && eccbuf) -+ printk(KERN_WARNING -+ "ECC needs a full sector write (adr: %lx size %lx)\n", -+ (long) to, (long) len); -+ -DBB */ - -- /* printk("DoC_Write (adr: %lx size %lx)\n", (long) to, (long) len); */ -+ /* printk("DoC_Write (adr: %lx size %lx)\n", (long) to, (long) len); */ - -- /* Find the chip which is to be used and select it */ -- mychip = &this->chips[to >> (this->chipshift)]; -+ /* Find the chip which is to be used and select it */ -+ mychip = &this->chips[to >> (this->chipshift)]; - -- if (this->curfloor != mychip->floor) { -- DoC_SelectFloor(this, mychip->floor); -- DoC_SelectChip(this, mychip->chip); -- } else if (this->curchip != mychip->chip) { -- DoC_SelectChip(this, mychip->chip); -- } -+ if (this->curfloor != mychip->floor) { -+ DoC_SelectFloor(this, mychip->floor); -+ DoC_SelectChip(this, mychip->chip); -+ } else if (this->curchip != mychip->chip) { -+ DoC_SelectChip(this, mychip->chip); -+ } - -- this->curfloor = mychip->floor; -- this->curchip = mychip->chip; -+ this->curfloor = mychip->floor; -+ this->curchip = mychip->chip; - -- /* Set device to main plane of flash */ -- DoC_Command(this, NAND_CMD_RESET, CDSN_CTRL_WP); -- DoC_Command(this, -- (!this->page256 -- && (to & 0x100)) ? NAND_CMD_READ1 : NAND_CMD_READ0, -- CDSN_CTRL_WP); -+ /* Set device to main plane of flash */ -+ DoC_Command(this, NAND_CMD_RESET, CDSN_CTRL_WP); -+ DoC_Command(this, -+ (!this->page256 -+ && (to & 0x100)) ? NAND_CMD_READ1 : NAND_CMD_READ0, -+ CDSN_CTRL_WP); - -- DoC_Command(this, NAND_CMD_SEQIN, 0); -- DoC_Address(this, ADDR_COLUMN_PAGE, to, 0, CDSN_CTRL_ECC_IO); -+ DoC_Command(this, NAND_CMD_SEQIN, 0); -+ DoC_Address(this, ADDR_COLUMN_PAGE, to, 0, CDSN_CTRL_ECC_IO); - -- if (eccbuf) { -- /* Prime the ECC engine */ -- WriteDOC(DOC_ECC_RESET, docptr, ECCConf); -- WriteDOC(DOC_ECC_EN | DOC_ECC_RW, docptr, ECCConf); -- } else { -- /* disable the ECC engine */ -- WriteDOC(DOC_ECC_RESET, docptr, ECCConf); -- WriteDOC(DOC_ECC_DIS, docptr, ECCConf); -- } -+ if (eccbuf) { -+ /* Prime the ECC engine */ -+ WriteDOC(DOC_ECC_RESET, docptr, ECCConf); -+ WriteDOC(DOC_ECC_EN | DOC_ECC_RW, docptr, ECCConf); -+ } else { -+ /* disable the ECC engine */ -+ WriteDOC(DOC_ECC_RESET, docptr, ECCConf); -+ WriteDOC(DOC_ECC_DIS, docptr, ECCConf); -+ } - -- /* treat crossing 256-byte sector for 2M x 8bits devices */ -- if (this->page256 && to + len > (to | 0xff) + 1) { -- len256 = (to | 0xff) + 1 - to; -- DoC_WriteBuf(this, buf, len256); -+ /* treat crossing 256-byte sector for 2M x 8bits devices */ -+ if (this->page256 && to + len > (to | 0xff) + 1) { -+ len256 = (to | 0xff) + 1 - to; -+ DoC_WriteBuf(this, buf, len256); -+ -+ DoC_Command(this, NAND_CMD_PAGEPROG, 0); -+ -+ DoC_Command(this, NAND_CMD_STATUS, CDSN_CTRL_WP); -+ /* There's an implicit DoC_WaitReady() in DoC_Command */ -+ -+ dummy = ReadDOC(docptr, CDSNSlowIO); -+ DoC_Delay(this, 2); -+ -+ if (ReadDOC_(docptr, this->ioreg) & 1) { -+ printk(KERN_ERR "Error programming flash\n"); -+ /* Error in programming */ -+ *retlen = 0; -+ up(&this->lock); -+ return -EIO; -+ } -+ -+ DoC_Command(this, NAND_CMD_SEQIN, 0); -+ DoC_Address(this, ADDR_COLUMN_PAGE, to + len256, 0, -+ CDSN_CTRL_ECC_IO); -+ } -+ -+ DoC_WriteBuf(this, &buf[len256], len - len256); -+ -+ if (eccbuf) { -+ WriteDOC(CDSN_CTRL_ECC_IO | CDSN_CTRL_CE, docptr, -+ CDSNControl); -+ -+ if (DoC_is_Millennium(this)) { -+ WriteDOC(0, docptr, NOP); -+ WriteDOC(0, docptr, NOP); -+ WriteDOC(0, docptr, NOP); -+ } else { -+ WriteDOC_(0, docptr, this->ioreg); -+ WriteDOC_(0, docptr, this->ioreg); -+ WriteDOC_(0, docptr, this->ioreg); -+ } -+ -+ WriteDOC(CDSN_CTRL_ECC_IO | CDSN_CTRL_FLASH_IO | CDSN_CTRL_CE, docptr, -+ CDSNControl); -+ -+ /* Read the ECC data through the DiskOnChip ECC logic */ -+ for (di = 0; di < 6; di++) { -+ eccbuf[di] = ReadDOC(docptr, ECCSyndrome0 + di); -+ } -+ -+ /* Reset the ECC engine */ -+ WriteDOC(DOC_ECC_DIS, docptr, ECCConf); -+ -+#ifdef PSYCHO_DEBUG -+ printk -+ ("OOB data at %lx is %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n", -+ (long) to, eccbuf[0], eccbuf[1], eccbuf[2], eccbuf[3], -+ eccbuf[4], eccbuf[5]); -+#endif -+ } - - DoC_Command(this, NAND_CMD_PAGEPROG, 0); - - DoC_Command(this, NAND_CMD_STATUS, CDSN_CTRL_WP); - /* There's an implicit DoC_WaitReady() in DoC_Command */ - -- dummy = ReadDOC(docptr, CDSNSlowIO); -- DoC_Delay(this, 2); -+ if (DoC_is_Millennium(this)) { -+ ReadDOC(docptr, ReadPipeInit); -+ status = ReadDOC(docptr, LastDataRead); -+ } else { -+ dummy = ReadDOC(docptr, CDSNSlowIO); -+ DoC_Delay(this, 2); -+ status = ReadDOC_(docptr, this->ioreg); -+ } - -- if (ReadDOC_(docptr, this->ioreg) & 1) { -+ if (status & 1) { - printk(KERN_ERR "Error programming flash\n"); - /* Error in programming */ - *retlen = 0; -@@ -833,94 +939,106 @@ - return -EIO; - } - -- DoC_Command(this, NAND_CMD_SEQIN, 0); -- DoC_Address(this, ADDR_COLUMN_PAGE, to + len256, 0, -- CDSN_CTRL_ECC_IO); -+ /* Let the caller know we completed it */ -+ *retlen += len; -+ -+ if (eccbuf) { -+ unsigned char x[8]; -+ size_t dummy; -+ int ret; -+ -+ /* Write the ECC data to flash */ -+ for (di=0; di<6; di++) -+ x[di] = eccbuf[di]; -+ -+ x[6]=0x55; -+ x[7]=0x55; -+ -+ ret = doc_write_oob_nolock(mtd, to, 8, &dummy, x); -+ if (ret) { -+ up(&this->lock); -+ return ret; -+ } -+ } -+ -+ to += len; -+ left -= len; -+ buf += len; - } - -- DoC_WriteBuf(this, &buf[len256], len - len256); -+ up(&this->lock); -+ return 0; -+} - -- if (eccbuf) { -- WriteDOC(CDSN_CTRL_ECC_IO | CDSN_CTRL_CE, docptr, -- CDSNControl); -+static int doc_writev_ecc(struct mtd_info *mtd, const struct kvec *vecs, -+ unsigned long count, loff_t to, size_t *retlen, -+ u_char *eccbuf, struct nand_oobinfo *oobsel) -+{ -+ static char static_buf[512]; -+ static DECLARE_MUTEX(writev_buf_sem); - -- if (DoC_is_Millennium(this)) { -- WriteDOC(0, docptr, NOP); -- WriteDOC(0, docptr, NOP); -- WriteDOC(0, docptr, NOP); -- } else { -- WriteDOC_(0, docptr, this->ioreg); -- WriteDOC_(0, docptr, this->ioreg); -- WriteDOC_(0, docptr, this->ioreg); -- } -+ size_t totretlen = 0; -+ size_t thisvecofs = 0; -+ int ret= 0; - -- /* Read the ECC data through the DiskOnChip ECC logic */ -- for (di = 0; di < 6; di++) { -- eccbuf[di] = ReadDOC(docptr, ECCSyndrome0 + di); -- } -+ down(&writev_buf_sem); - -- /* Reset the ECC engine */ -- WriteDOC(DOC_ECC_DIS, docptr, ECCConf); -+ while(count) { -+ size_t thislen, thisretlen; -+ unsigned char *buf; - --#ifdef PSYCHO_DEBUG -- printk -- ("OOB data at %lx is %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n", -- (long) to, eccbuf[0], eccbuf[1], eccbuf[2], eccbuf[3], -- eccbuf[4], eccbuf[5]); --#endif -- } -+ buf = vecs->iov_base + thisvecofs; -+ thislen = vecs->iov_len - thisvecofs; - -- DoC_Command(this, NAND_CMD_PAGEPROG, 0); - -- DoC_Command(this, NAND_CMD_STATUS, CDSN_CTRL_WP); -- /* There's an implicit DoC_WaitReady() in DoC_Command */ -+ if (thislen >= 512) { -+ thislen = thislen & ~(512-1); -+ thisvecofs += thislen; -+ } else { -+ /* Not enough to fill a page. Copy into buf */ -+ memcpy(static_buf, buf, thislen); -+ buf = &static_buf[thislen]; -+ -+ while(count && thislen < 512) { -+ vecs++; -+ count--; -+ thisvecofs = min((512-thislen), vecs->iov_len); -+ memcpy(buf, vecs->iov_base, thisvecofs); -+ thislen += thisvecofs; -+ buf += thisvecofs; -+ } -+ buf = static_buf; -+ } -+ if (count && thisvecofs == vecs->iov_len) { -+ thisvecofs = 0; -+ vecs++; -+ count--; -+ } -+ ret = doc_write_ecc(mtd, to, thislen, &thisretlen, buf, eccbuf, oobsel); - -- dummy = ReadDOC(docptr, CDSNSlowIO); -- DoC_Delay(this, 2); -+ totretlen += thisretlen; - -- if (ReadDOC_(docptr, this->ioreg) & 1) { -- printk(KERN_ERR "Error programming flash\n"); -- /* Error in programming */ -- *retlen = 0; -- up(&this->lock); -- return -EIO; -- } -+ if (ret || thisretlen != thislen) -+ break; - -- /* Let the caller know we completed it */ -- *retlen = len; -- -- if (eccbuf) { -- unsigned char x[8]; -- size_t dummy; -- int ret; -- -- /* Write the ECC data to flash */ -- for (di=0; di<6; di++) -- x[di] = eccbuf[di]; -- -- x[6]=0x55; -- x[7]=0x55; -- -- ret = doc_write_oob_nolock(mtd, to, 8, &dummy, x); -- up(&this->lock); -- return ret; -- } -- up(&this->lock); -- return 0; -+ to += thislen; -+ } -+ -+ up(&writev_buf_sem); -+ *retlen = totretlen; -+ return ret; - } - -+ - static int doc_read_oob(struct mtd_info *mtd, loff_t ofs, size_t len, - size_t * retlen, u_char * buf) - { - struct DiskOnChip *this = (struct DiskOnChip *) mtd->priv; - int len256 = 0, ret; -- unsigned long docptr; - struct Nand *mychip; - - down(&this->lock); - -- docptr = this->virtadr; -- - mychip = &this->chips[ofs >> this->chipshift]; - - if (this->curfloor != mychip->floor) { -@@ -975,9 +1093,10 @@ - { - struct DiskOnChip *this = (struct DiskOnChip *) mtd->priv; - int len256 = 0; -- unsigned long docptr = this->virtadr; -+ void __iomem *docptr = this->virtadr; - struct Nand *mychip = &this->chips[ofs >> this->chipshift]; - volatile int dummy; -+ int status; - - // printk("doc_write_oob(%lx, %d): %2.2X %2.2X %2.2X %2.2X ... %2.2X %2.2X .. %2.2X %2.2X\n",(long)ofs, len, - // buf[0], buf[1], buf[2], buf[3], buf[8], buf[9], buf[14],buf[15]); -@@ -1026,10 +1145,16 @@ - DoC_Command(this, NAND_CMD_STATUS, 0); - /* DoC_WaitReady() is implicit in DoC_Command */ - -- dummy = ReadDOC(docptr, CDSNSlowIO); -- DoC_Delay(this, 2); -+ if (DoC_is_Millennium(this)) { -+ ReadDOC(docptr, ReadPipeInit); -+ status = ReadDOC(docptr, LastDataRead); -+ } else { -+ dummy = ReadDOC(docptr, CDSNSlowIO); -+ DoC_Delay(this, 2); -+ status = ReadDOC_(docptr, this->ioreg); -+ } - -- if (ReadDOC_(docptr, this->ioreg) & 1) { -+ if (status & 1) { - printk(KERN_ERR "Error programming oob data\n"); - /* There was an error */ - *retlen = 0; -@@ -1045,10 +1170,16 @@ - DoC_Command(this, NAND_CMD_STATUS, 0); - /* DoC_WaitReady() is implicit in DoC_Command */ - -- dummy = ReadDOC(docptr, CDSNSlowIO); -- DoC_Delay(this, 2); -+ if (DoC_is_Millennium(this)) { -+ ReadDOC(docptr, ReadPipeInit); -+ status = ReadDOC(docptr, LastDataRead); -+ } else { -+ dummy = ReadDOC(docptr, CDSNSlowIO); -+ DoC_Delay(this, 2); -+ status = ReadDOC_(docptr, this->ioreg); -+ } - -- if (ReadDOC_(docptr, this->ioreg) & 1) { -+ if (status & 1) { - printk(KERN_ERR "Error programming oob data\n"); - /* There was an error */ - *retlen = 0; -@@ -1079,8 +1210,9 @@ - __u32 ofs = instr->addr; - __u32 len = instr->len; - volatile int dummy; -- unsigned long docptr; -+ void __iomem *docptr = this->virtadr; - struct Nand *mychip; -+ int status; - - down(&this->lock); - -@@ -1091,8 +1223,6 @@ - - instr->state = MTD_ERASING; - -- docptr = this->virtadr; -- - /* FIXME: Do this in the background. Use timers or schedule_task() */ - while(len) { - mychip = &this->chips[ofs >> this->chipshift]; -@@ -1112,10 +1242,16 @@ - - DoC_Command(this, NAND_CMD_STATUS, CDSN_CTRL_WP); - -- dummy = ReadDOC(docptr, CDSNSlowIO); -- DoC_Delay(this, 2); -- -- if (ReadDOC_(docptr, this->ioreg) & 1) { -+ if (DoC_is_Millennium(this)) { -+ ReadDOC(docptr, ReadPipeInit); -+ status = ReadDOC(docptr, LastDataRead); -+ } else { -+ dummy = ReadDOC(docptr, CDSNSlowIO); -+ DoC_Delay(this, 2); -+ status = ReadDOC_(docptr, this->ioreg); -+ } -+ -+ if (status & 1) { - printk(KERN_ERR "Error erasing at 0x%x\n", ofs); - /* There was an error */ - instr->state = MTD_ERASE_FAILED; -@@ -1127,8 +1263,7 @@ - instr->state = MTD_ERASE_DONE; - - callback: -- if (instr->callback) -- instr->callback(instr); -+ mtd_erase_callback(instr); - - up(&this->lock); - return 0; -Index: linux-2.6.5/drivers/mtd/devices/doc2001.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/doc2001.c 2004-04-03 22:36:54.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/doc2001.c 2005-02-01 17:11:17.000000000 -0500 -@@ -4,7 +4,7 @@ - * (c) 1999 Machine Vision Holdings, Inc. - * (c) 1999, 2000 David Woodhouse <dwmw2@infradead.org> - * -- * $Id: doc2001.c,v 1.41 2003/06/11 09:45:19 dwmw2 Exp $ -+ * $Id: doc2001.c,v 1.45 2004/09/16 23:51:57 gleixner Exp $ - */ - - #include <linux/kernel.h> -@@ -19,6 +19,7 @@ - #include <linux/sched.h> - #include <linux/init.h> - #include <linux/types.h> -+#include <linux/bitops.h> - - #include <linux/mtd/mtd.h> - #include <linux/mtd/nand.h> -@@ -37,9 +38,11 @@ - static int doc_write(struct mtd_info *mtd, loff_t to, size_t len, - size_t *retlen, const u_char *buf); - static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len, -- size_t *retlen, u_char *buf, u_char *eccbuf, int oobsel); -+ size_t *retlen, u_char *buf, u_char *eccbuf, -+ struct nand_oobinfo *oobsel); - static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len, -- size_t *retlen, const u_char *buf, u_char *eccbuf, int oobsel); -+ size_t *retlen, const u_char *buf, u_char *eccbuf, -+ struct nand_oobinfo *oobsel); - static int doc_read_oob(struct mtd_info *mtd, loff_t ofs, size_t len, - size_t *retlen, u_char *buf); - static int doc_write_oob(struct mtd_info *mtd, loff_t ofs, size_t len, -@@ -49,7 +52,7 @@ - static struct mtd_info *docmillist = NULL; - - /* Perform the required delay cycles by reading from the NOP register */ --static void DoC_Delay(unsigned long docptr, unsigned short cycles) -+static void DoC_Delay(void __iomem * docptr, unsigned short cycles) - { - volatile char dummy; - int i; -@@ -59,7 +62,7 @@ - } - - /* DOC_WaitReady: Wait for RDY line to be asserted by the flash chip */ --static int _DoC_WaitReady(unsigned long docptr) -+static int _DoC_WaitReady(void __iomem * docptr) - { - unsigned short c = 0xffff; - -@@ -76,7 +79,7 @@ - return (c == 0); - } - --static inline int DoC_WaitReady(unsigned long docptr) -+static inline int DoC_WaitReady(void __iomem * docptr) - { - /* This is inline, to optimise the common case, where it's ready instantly */ - int ret = 0; -@@ -100,7 +103,7 @@ - with the internal pipeline. Each of 4 delay cycles (read from the NOP register) is - required after writing to CDSN Control register, see Software Requirement 11.4 item 3. */ - --static inline void DoC_Command(unsigned long docptr, unsigned char command, -+static inline void DoC_Command(void __iomem * docptr, unsigned char command, - unsigned char xtraflags) - { - /* Assert the CLE (Command Latch Enable) line to the flash chip */ -@@ -120,7 +123,7 @@ - with the internal pipeline. Each of 4 delay cycles (read from the NOP register) is - required after writing to CDSN Control register, see Software Requirement 11.4 item 3. */ - --static inline void DoC_Address(unsigned long docptr, int numbytes, unsigned long ofs, -+static inline void DoC_Address(void __iomem * docptr, int numbytes, unsigned long ofs, - unsigned char xtraflags1, unsigned char xtraflags2) - { - /* Assert the ALE (Address Latch Enable) line to the flash chip */ -@@ -158,7 +161,7 @@ - } - - /* DoC_SelectChip: Select a given flash chip within the current floor */ --static int DoC_SelectChip(unsigned long docptr, int chip) -+static int DoC_SelectChip(void __iomem * docptr, int chip) - { - /* Select the individual flash chip requested */ - WriteDOC(chip, docptr, CDSNDeviceSelect); -@@ -169,7 +172,7 @@ - } - - /* DoC_SelectFloor: Select a given floor (bank of flash chips) */ --static int DoC_SelectFloor(unsigned long docptr, int floor) -+static int DoC_SelectFloor(void __iomem * docptr, int floor) - { - /* Select the floor (bank) of chips required */ - WriteDOC(floor, docptr, FloorSelect); -@@ -226,7 +229,7 @@ - mfr, id, nand_manuf_ids[j].name, nand_flash_ids[i].name); - doc->mfr = mfr; - doc->id = id; -- doc->chipshift = nand_flash_ids[i].chipshift; -+ doc->chipshift = ffs((nand_flash_ids[i].chipsize << 20)) - 1; - break; - } - } -@@ -403,17 +406,18 @@ - size_t *retlen, u_char *buf) - { - /* Just a special case of doc_read_ecc */ -- return doc_read_ecc(mtd, from, len, retlen, buf, NULL, 0); -+ return doc_read_ecc(mtd, from, len, retlen, buf, NULL, NULL); - } - - static int doc_read_ecc (struct mtd_info *mtd, loff_t from, size_t len, -- size_t *retlen, u_char *buf, u_char *eccbuf, int oobsel) -+ size_t *retlen, u_char *buf, u_char *eccbuf, -+ struct nand_oobinfo *oobsel) - { - int i, ret; - volatile char dummy; - unsigned char syndrome[6]; - struct DiskOnChip *this = (struct DiskOnChip *)mtd->priv; -- unsigned long docptr = this->virtadr; -+ void __iomem *docptr = this->virtadr; - struct Nand *mychip = &this->chips[from >> (this->chipshift)]; - - /* Don't allow read past end of device */ -@@ -529,16 +533,17 @@ - size_t *retlen, const u_char *buf) - { - char eccbuf[6]; -- return doc_write_ecc(mtd, to, len, retlen, buf, eccbuf, 0); -+ return doc_write_ecc(mtd, to, len, retlen, buf, eccbuf, NULL); - } - - static int doc_write_ecc (struct mtd_info *mtd, loff_t to, size_t len, -- size_t *retlen, const u_char *buf, u_char *eccbuf, int oobsel) -+ size_t *retlen, const u_char *buf, u_char *eccbuf, -+ struct nand_oobinfo *oobsel) - { - int i,ret = 0; - volatile char dummy; - struct DiskOnChip *this = (struct DiskOnChip *)mtd->priv; -- unsigned long docptr = this->virtadr; -+ void __iomem *docptr = this->virtadr; - struct Nand *mychip = &this->chips[to >> (this->chipshift)]; - - /* Don't allow write past end of device */ -@@ -673,7 +678,7 @@ - #endif - volatile char dummy; - struct DiskOnChip *this = (struct DiskOnChip *)mtd->priv; -- unsigned long docptr = this->virtadr; -+ void __iomem *docptr = this->virtadr; - struct Nand *mychip = &this->chips[ofs >> this->chipshift]; - - /* Find the chip which is to be used and select it */ -@@ -725,7 +730,7 @@ - volatile char dummy; - int ret = 0; - struct DiskOnChip *this = (struct DiskOnChip *)mtd->priv; -- unsigned long docptr = this->virtadr; -+ void __iomem *docptr = this->virtadr; - struct Nand *mychip = &this->chips[ofs >> this->chipshift]; - - /* Find the chip which is to be used and select it */ -@@ -794,7 +799,7 @@ - struct DiskOnChip *this = (struct DiskOnChip *)mtd->priv; - __u32 ofs = instr->addr; - __u32 len = instr->len; -- unsigned long docptr = this->virtadr; -+ void __iomem *docptr = this->virtadr; - struct Nand *mychip = &this->chips[ofs >> this->chipshift]; - - if (len != mtd->erasesize) -@@ -840,8 +845,7 @@ - instr->state = MTD_ERASE_DONE; - dummy = ReadDOC(docptr, LastDataRead); - -- if (instr->callback) -- instr->callback(instr); -+ mtd_erase_callback(instr); - - return 0; - } -Index: linux-2.6.5/drivers/mtd/devices/doc2001plus.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/doc2001plus.c 2004-04-03 22:36:12.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/doc2001plus.c 2005-02-01 17:11:17.000000000 -0500 -@@ -6,7 +6,9 @@ - * (c) 1999 Machine Vision Holdings, Inc. - * (c) 1999, 2000 David Woodhouse <dwmw2@infradead.org> - * -- * $Id: doc2001plus.c,v 1.5 2003/06/11 09:45:19 dwmw2 Exp $ -+ * $Id: doc2001plus.c,v 1.10 2004/09/16 23:51:57 gleixner Exp $ -+ * -+ * Released under GPL - */ - - #include <linux/kernel.h> -@@ -21,6 +23,7 @@ - #include <linux/sched.h> - #include <linux/init.h> - #include <linux/types.h> -+#include <linux/bitops.h> - - #include <linux/mtd/mtd.h> - #include <linux/mtd/nand.h> -@@ -54,7 +57,7 @@ - - - /* Perform the required delay cycles by writing to the NOP register */ --static void DoC_Delay(unsigned long docptr, int cycles) -+static void DoC_Delay(void __iomem * docptr, int cycles) - { - int i; - -@@ -65,7 +68,7 @@ - #define CDSN_CTRL_FR_B_MASK (CDSN_CTRL_FR_B0 | CDSN_CTRL_FR_B1) - - /* DOC_WaitReady: Wait for RDY line to be asserted by the flash chip */ --static int _DoC_WaitReady(unsigned long docptr) -+static int _DoC_WaitReady(void __iomem * docptr) - { - unsigned int c = 0xffff; - -@@ -82,7 +85,7 @@ - return (c == 0); - } - --static inline int DoC_WaitReady(unsigned long docptr) -+static inline int DoC_WaitReady(void __iomem * docptr) - { - /* This is inline, to optimise the common case, where it's ready instantly */ - int ret = 0; -@@ -103,7 +106,7 @@ - * can detect. M-systems suggest always check this on any block level - * operation and setting to normal mode if in reset mode. - */ --static inline void DoC_CheckASIC(unsigned long docptr) -+static inline void DoC_CheckASIC(void __iomem * docptr) - { - /* Make sure the DoC is in normal mode */ - if ((ReadDOC(docptr, Mplus_DOCControl) & DOC_MODE_NORMAL) == 0) { -@@ -115,7 +118,7 @@ - /* DoC_Command: Send a flash command to the flash chip through the Flash - * command register. Need 2 Write Pipeline Terminates to complete send. - */ --static inline void DoC_Command(unsigned long docptr, unsigned char command, -+static inline void DoC_Command(void __iomem * docptr, unsigned char command, - unsigned char xtraflags) - { - WriteDOC(command, docptr, Mplus_FlashCmd); -@@ -130,7 +133,7 @@ - unsigned long ofs, unsigned char xtraflags1, - unsigned char xtraflags2) - { -- unsigned long docptr = doc->virtadr; -+ void __iomem * docptr = doc->virtadr; - - /* Allow for possible Mill Plus internal flash interleaving */ - ofs >>= doc->interleave; -@@ -160,14 +163,14 @@ - } - - /* DoC_SelectChip: Select a given flash chip within the current floor */ --static int DoC_SelectChip(unsigned long docptr, int chip) -+static int DoC_SelectChip(void __iomem * docptr, int chip) - { - /* No choice for flash chip on Millennium Plus */ - return 0; - } - - /* DoC_SelectFloor: Select a given floor (bank of flash chips) */ --static int DoC_SelectFloor(unsigned long docptr, int floor) -+static int DoC_SelectFloor(void __iomem * docptr, int floor) - { - WriteDOC((floor & 0x3), docptr, Mplus_DeviceSelect); - return 0; -@@ -183,24 +186,35 @@ - * | Data 0 | ECC 0 |Flags0 |Flags1 | Data 1 |ECC 1 | OOB 1 + 2 | - * +-----------+-------+-------+-------+--------------+---------+-----------+ - */ -+/* FIXME: This lives in INFTL not here. Other users of flash devices -+ may not want it */ - static unsigned int DoC_GetDataOffset(struct mtd_info *mtd, loff_t *from) - { -- unsigned int ofs = *from & 0x3ff; -- unsigned int cmd; -+ struct DiskOnChip *this = (struct DiskOnChip *)mtd->priv; - -- if (ofs < 512) { -- cmd = NAND_CMD_READ0; -- ofs &= 0x1ff; -- } else if (ofs < 1014) { -- cmd = NAND_CMD_READ1; -- ofs = (ofs & 0x1ff) + 10; -+ if (this->interleave) { -+ unsigned int ofs = *from & 0x3ff; -+ unsigned int cmd; -+ -+ if (ofs < 512) { -+ cmd = NAND_CMD_READ0; -+ ofs &= 0x1ff; -+ } else if (ofs < 1014) { -+ cmd = NAND_CMD_READ1; -+ ofs = (ofs & 0x1ff) + 10; -+ } else { -+ cmd = NAND_CMD_READOOB; -+ ofs = ofs - 1014; -+ } -+ -+ *from = (*from & ~0x3ff) | ofs; -+ return cmd; - } else { -- cmd = NAND_CMD_READOOB; -- ofs = ofs - 1014; -+ /* No interleave */ -+ if ((*from) & 0x100) -+ return NAND_CMD_READ1; -+ return NAND_CMD_READ0; - } -- -- *from = (*from & ~0x3ff) | ofs; -- return cmd; - } - - static unsigned int DoC_GetECCOffset(struct mtd_info *mtd, loff_t *from) -@@ -239,7 +253,7 @@ - return cmd; - } - --static inline void MemReadDOC(unsigned long docptr, unsigned char *buf, int len) -+static inline void MemReadDOC(void __iomem * docptr, unsigned char *buf, int len) - { - #ifndef USE_MEMCPY - int i; -@@ -250,7 +264,7 @@ - #endif - } - --static inline void MemWriteDOC(unsigned long docptr, unsigned char *buf, int len) -+static inline void MemWriteDOC(void __iomem * docptr, unsigned char *buf, int len) - { - #ifndef USE_MEMCPY - int i; -@@ -266,7 +280,7 @@ - { - int mfr, id, i, j; - volatile char dummy; -- unsigned long docptr = doc->virtadr; -+ void __iomem * docptr = doc->virtadr; - - /* Page in the required floor/chip */ - DoC_SelectFloor(docptr, floor); -@@ -294,10 +308,12 @@ - dummy = ReadDOC(docptr, Mplus_ReadPipeInit); - - mfr = ReadDOC(docptr, Mil_CDSN_IO); -- dummy = ReadDOC(docptr, Mil_CDSN_IO); /* 2 way interleave */ -+ if (doc->interleave) -+ dummy = ReadDOC(docptr, Mil_CDSN_IO); /* 2 way interleave */ - - id = ReadDOC(docptr, Mil_CDSN_IO); -- dummy = ReadDOC(docptr, Mil_CDSN_IO); /* 2 way interleave */ -+ if (doc->interleave) -+ dummy = ReadDOC(docptr, Mil_CDSN_IO); /* 2 way interleave */ - - dummy = ReadDOC(docptr, Mplus_LastDataRead); - dummy = ReadDOC(docptr, Mplus_LastDataRead); -@@ -321,10 +337,7 @@ - nand_manuf_ids[j].name, nand_flash_ids[i].name); - doc->mfr = mfr; - doc->id = id; -- doc->interleave = 0; -- if (doc->ChipID == DOC_ChipID_DocMilPlus32) -- doc->interleave = 1; -- doc->chipshift = nand_flash_ids[i].chipshift; -+ doc->chipshift = ffs((nand_flash_ids[i].chipsize << 20)) - 1; - doc->erasesize = nand_flash_ids[i].erasesize << doc->interleave; - break; - } -@@ -346,6 +359,21 @@ - this->mfr = 0; - this->id = 0; - -+ /* Work out the intended interleave setting */ -+ this->interleave = 0; -+ if (this->ChipID == DOC_ChipID_DocMilPlus32) -+ this->interleave = 1; -+ -+ /* Check the ASIC agrees */ -+ if ( (this->interleave << 2) != -+ (ReadDOC(this->virtadr, Mplus_Configuration) & 4)) { -+ u_char conf = ReadDOC(this->virtadr, Mplus_Configuration); -+ printk(KERN_NOTICE "Setting DiskOnChip Millennium Plus interleave to %s\n", -+ this->interleave?"on (16-bit)":"off (8-bit)"); -+ conf ^= 4; -+ WriteDOC(conf, this->virtadr, Mplus_Configuration); -+ } -+ - /* For each floor, find the number of valid chips it contains */ - for (floor = 0,ret = 1; floor < MAX_FLOORS_MPLUS; floor++) { - numchips[floor] = 0; -@@ -503,7 +531,7 @@ - int i; - loff_t fofs; - struct DiskOnChip *this = (struct DiskOnChip *)mtd->priv; -- unsigned long docptr = this->virtadr; -+ void __iomem * docptr = this->virtadr; - struct Nand *mychip = &this->chips[from >> (this->chipshift)]; - unsigned char *bp, buf[1056]; - char c[32]; -@@ -588,7 +616,7 @@ - loff_t fofs; - unsigned char syndrome[6]; - struct DiskOnChip *this = (struct DiskOnChip *)mtd->priv; -- unsigned long docptr = this->virtadr; -+ void __iomem * docptr = this->virtadr; - struct Nand *mychip = &this->chips[from >> (this->chipshift)]; - - /* Don't allow read past end of device */ -@@ -727,7 +755,7 @@ - loff_t fto; - volatile char dummy; - struct DiskOnChip *this = (struct DiskOnChip *)mtd->priv; -- unsigned long docptr = this->virtadr; -+ void __iomem * docptr = this->virtadr; - struct Nand *mychip = &this->chips[to >> (this->chipshift)]; - - /* Don't allow write past end of device */ -@@ -739,7 +767,7 @@ - return -EINVAL; - - /* Determine position of OOB flags, before or after data */ -- before = to & 0x200; -+ before = (this->interleave && (to & 0x200)); - - DoC_CheckASIC(docptr); - -@@ -853,7 +881,7 @@ - { - loff_t fofs, base; - struct DiskOnChip *this = (struct DiskOnChip *)mtd->priv; -- unsigned long docptr = this->virtadr; -+ void __iomem * docptr = this->virtadr; - struct Nand *mychip = &this->chips[ofs >> this->chipshift]; - size_t i, size, got, want; - -@@ -886,7 +914,10 @@ - /* Figure out which region we are accessing... */ - fofs = ofs; - base = ofs & 0xf; -- if (base < 6) { -+ if (!this->interleave) { -+ DoC_Command(docptr, NAND_CMD_READOOB, 0); -+ size = 16 - base; -+ } else if (base < 6) { - DoC_Command(docptr, DoC_GetECCOffset(mtd, &fofs), 0); - size = 6 - base; - } else if (base < 8) { -@@ -928,7 +959,7 @@ - volatile char dummy; - loff_t fofs, base; - struct DiskOnChip *this = (struct DiskOnChip *)mtd->priv; -- unsigned long docptr = this->virtadr; -+ void __iomem * docptr = this->virtadr; - struct Nand *mychip = &this->chips[ofs >> this->chipshift]; - size_t i, size, got, want; - int ret = 0; -@@ -963,7 +994,10 @@ - /* Figure out which region we are accessing... */ - fofs = ofs; - base = ofs & 0x0f; -- if (base < 6) { -+ if (!this->interleave) { -+ WriteDOC(NAND_CMD_READOOB, docptr, Mplus_FlashCmd); -+ size = 16 - base; -+ } else if (base < 6) { - WriteDOC(DoC_GetECCOffset(mtd, &fofs), docptr, Mplus_FlashCmd); - size = 6 - base; - } else if (base < 8) { -@@ -1027,7 +1061,7 @@ - struct DiskOnChip *this = (struct DiskOnChip *)mtd->priv; - __u32 ofs = instr->addr; - __u32 len = instr->len; -- unsigned long docptr = this->virtadr; -+ void __iomem * docptr = this->virtadr; - struct Nand *mychip = &this->chips[ofs >> this->chipshift]; - - DoC_CheckASIC(docptr); -@@ -1077,8 +1111,7 @@ - /* Disable flash internally */ - WriteDOC(0, docptr, Mplus_FlashSelect); - -- if (instr->callback) -- instr->callback(instr); -+ mtd_erase_callback(instr); - - return 0; - } -Index: linux-2.6.5/drivers/mtd/devices/docprobe.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/docprobe.c 2004-04-03 22:37:37.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/docprobe.c 2005-02-01 17:11:17.000000000 -0500 -@@ -4,7 +4,7 @@ - /* (C) 1999 Machine Vision Holdings, Inc. */ - /* (C) 1999-2003 David Woodhouse <dwmw2@infradead.org> */ - --/* $Id: docprobe.c,v 1.36 2003/05/23 11:29:34 dwmw2 Exp $ */ -+/* $Id: docprobe.c,v 1.42 2004/09/16 23:51:57 gleixner Exp $ */ - - - -@@ -135,6 +135,9 @@ - window, DOCControl); - #endif /* !DOC_PASSIVE_PROBE */ - -+ /* We need to read the ChipID register four times. For some -+ newer DiskOnChip 2000 units, the first three reads will -+ return the DiskOnChip Millennium ident. Don't ask. */ - ChipID = ReadDOC(window, ChipID); - - switch (ChipID) { -@@ -148,6 +151,12 @@ - break; - - case DOC_ChipID_DocMil: -+ /* Check for the new 2000 with Millennium ASIC */ -+ ReadDOC(window, ChipID); -+ ReadDOC(window, ChipID); -+ if (ReadDOC(window, ChipID) != DOC_ChipID_DocMil) -+ ChipID = DOC_ChipID_Doc2kTSOP; -+ - /* Check the TOGGLE bit in the ECC register */ - tmp = ReadDOC(window, ECCConf) & DOC_TOGGLE_BIT; - tmpb = ReadDOC(window, ECCConf) & DOC_TOGGLE_BIT; -@@ -191,7 +200,6 @@ - tmpc = ReadDOC(window, Mplus_Toggle) & DOC_TOGGLE_BIT; - if (tmp != tmpb && tmp == tmpc) - return ChipID; -- break; - default: - break; - } -@@ -199,8 +207,8 @@ - - default: - --#ifndef CONFIG_MTD_DOCPROBE_55AA -- printk(KERN_WARNING "Possible DiskOnChip with unknown ChipID %2.2X found at 0x%lx\n", -+#ifdef CONFIG_MTD_DOCPROBE_55AA -+ printk(KERN_DEBUG "Possible DiskOnChip with unknown ChipID %2.2X found at 0x%lx\n", - ChipID, physadr); - #endif - #ifndef DOC_PASSIVE_PROBE -@@ -241,6 +249,12 @@ - return; - - if ((ChipID = doccheck(docptr, physadr))) { -+ if (ChipID == DOC_ChipID_Doc2kTSOP) { -+ /* Remove this at your own peril. The hardware driver works but nothing prevents you from erasing bad blocks */ -+ printk(KERN_NOTICE "Refusing to drive DiskOnChip 2000 TSOP until Bad Block Table is correctly supported by INFTL\n"); -+ iounmap((void *)docptr); -+ return; -+ } - docfound = 1; - mtd = kmalloc(sizeof(struct DiskOnChip) + sizeof(struct mtd_info), GFP_KERNEL); - -@@ -256,12 +270,18 @@ - memset((char *)this, 0, sizeof(struct DiskOnChip)); - - mtd->priv = this; -- this->virtadr = docptr; -+ this->virtadr = (void __iomem *)docptr; - this->physadr = physadr; - this->ChipID = ChipID; - sprintf(namebuf, "with ChipID %2.2X", ChipID); - - switch(ChipID) { -+ case DOC_ChipID_Doc2kTSOP: -+ name="2000 TSOP"; -+ im_funcname = "DoC2k_init"; -+ im_modname = "doc2000"; -+ break; -+ - case DOC_ChipID_Doc2k: - name="2000"; - im_funcname = "DoC2k_init"; -Index: linux-2.6.5/drivers/mtd/devices/lart.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/lart.c 2004-04-03 22:36:57.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/lart.c 2005-02-01 17:11:17.000000000 -0500 -@@ -2,7 +2,7 @@ - /* - * MTD driver for the 28F160F3 Flash Memory (non-CFI) on LART. - * -- * $Id: lart.c,v 1.5 2003/05/20 21:03:07 dwmw2 Exp $ -+ * $Id: lart.c,v 1.7 2004/08/09 13:19:44 dwmw2 Exp $ - * - * Author: Abraham vd Merwe <abraham@2d3d.co.za> - * -@@ -433,7 +433,7 @@ - } - - instr->state = MTD_ERASE_DONE; -- if (instr->callback) instr->callback (instr); -+ mtd_erase_callback(instr); - - return (0); - } -Index: linux-2.6.5/drivers/mtd/devices/ms02-nv.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/ms02-nv.c 2004-04-03 22:36:15.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/ms02-nv.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,12 +1,12 @@ - /* -- * Copyright (c) 2001 Maciej W. Rozycki -+ * Copyright (c) 2001 Maciej W. Rozycki - * -- * This program is free software; you can redistribute it and/or -- * modify it under the terms of the GNU General Public License -- * as published by the Free Software Foundation; either version -- * 2 of the License, or (at your option) any later version. -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. - * -- * $Id: ms02-nv.c,v 1.4 2003/05/20 21:03:07 dwmw2 Exp $ -+ * $Id: ms02-nv.c,v 1.7 2004/07/29 14:16:45 macro Exp $ - */ - - #include <linux/init.h> -@@ -29,18 +29,18 @@ - - - static char version[] __initdata = -- "ms02-nv.c: v.1.0.0 13 Aug 2001 Maciej W. Rozycki.\n"; -+ "ms02-nv.c: v.1.0.0 13 Aug 2001 Maciej W. Rozycki.\n"; - --MODULE_AUTHOR("Maciej W. Rozycki <macro@ds2.pg.gda.pl>"); -+MODULE_AUTHOR("Maciej W. Rozycki <macro@linux-mips.org>"); - MODULE_DESCRIPTION("DEC MS02-NV NVRAM module driver"); - MODULE_LICENSE("GPL"); - - - /* - * Addresses we probe for an MS02-NV at. Modules may be located -- * at any 8MB boundary within a 0MB up to 112MB range or at any 32MB -- * boundary within a 0MB up to 448MB range. We don't support a module -- * at 0MB, though. -+ * at any 8MiB boundary within a 0MiB up to 112MiB range or at any 32MiB -+ * boundary within a 0MiB up to 448MiB range. We don't support a module -+ * at 0MiB, though. - */ - static ulong ms02nv_addrs[] __initdata = { - 0x07000000, 0x06800000, 0x06000000, 0x05800000, 0x05000000, -@@ -130,7 +130,7 @@ - - int ret = -ENODEV; - -- /* The module decodes 8MB of address space. */ -+ /* The module decodes 8MiB of address space. */ - mod_res = kmalloc(sizeof(*mod_res), GFP_KERNEL); - if (!mod_res) - return -ENOMEM; -@@ -233,7 +233,7 @@ - goto err_out_csr_res; - } - -- printk(KERN_INFO "mtd%d: %s at 0x%08lx, size %uMB.\n", -+ printk(KERN_INFO "mtd%d: %s at 0x%08lx, size %uMiB.\n", - mtd->index, ms02nv_name, addr, size >> 20); - - mp->next = root_ms02nv_mtd; -@@ -293,12 +293,12 @@ - - switch (mips_machtype) { - case MACH_DS5000_200: -- csr = (volatile u32 *)KN02_CSR_ADDR; -+ csr = (volatile u32 *)KN02_CSR_BASE; - if (*csr & KN02_CSR_BNK32M) - stride = 2; - break; - case MACH_DS5000_2X0: -- case MACH_DS5000: -+ case MACH_DS5900: - csr = (volatile u32 *)KN03_MCR_BASE; - if (*csr & KN03_MCR_BNK32M) - stride = 2; -Index: linux-2.6.5/drivers/mtd/devices/ms02-nv.h -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/ms02-nv.h 2004-04-03 22:37:07.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/ms02-nv.h 2005-02-01 17:11:17.000000000 -0500 -@@ -1,34 +1,96 @@ - /* -- * Copyright (c) 2001 Maciej W. Rozycki -+ * Copyright (c) 2001, 2003 Maciej W. Rozycki - * -- * This program is free software; you can redistribute it and/or -- * modify it under the terms of the GNU General Public License -- * as published by the Free Software Foundation; either version -- * 2 of the License, or (at your option) any later version. -+ * DEC MS02-NV (54-20948-01) battery backed-up NVRAM module for -+ * DECstation/DECsystem 5000/2x0 and DECsystem 5900 and 5900/260 -+ * systems. - * -- * $Id: ms02-nv.h,v 1.1 2002/09/13 13:46:55 dwmw2 Exp $ -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ * -+ * $Id: ms02-nv.h,v 1.3 2003/08/19 09:25:36 dwmw2 Exp $ - */ - - #include <linux/ioport.h> - #include <linux/mtd/mtd.h> - -+/* -+ * Addresses are decoded as follows: -+ * -+ * 0x000000 - 0x3fffff SRAM -+ * 0x400000 - 0x7fffff CSR -+ * -+ * Within the SRAM area the following ranges are forced by the system -+ * firmware: -+ * -+ * 0x000000 - 0x0003ff diagnostic area, destroyed upon a reboot -+ * 0x000400 - ENDofRAM storage area, available to operating systems -+ * -+ * but we can't really use the available area right from 0x000400 as -+ * the first word is used by the firmware as a status flag passed -+ * from an operating system. If anything but the valid data magic -+ * ID value is found, the firmware considers the SRAM clean, i.e. -+ * containing no valid data, and disables the battery resulting in -+ * data being erased as soon as power is switched off. So the choice -+ * for the start address of the user-available is 0x001000 which is -+ * nicely page aligned. The area between 0x000404 and 0x000fff may -+ * be used by the driver for own needs. -+ * -+ * The diagnostic area defines two status words to be read by an -+ * operating system, a magic ID to distinguish a MS02-NV board from -+ * anything else and a status information providing results of tests -+ * as well as the size of SRAM available, which can be 1MiB or 2MiB -+ * (that's what the firmware handles; no idea if 2MiB modules ever -+ * existed). -+ * -+ * The firmware only handles the MS02-NV board if installed in the -+ * last (15th) slot, so for any other location the status information -+ * stored in the SRAM cannot be relied upon. But from the hardware -+ * point of view there is no problem using up to 14 such boards in a -+ * system -- only the 1st slot needs to be filled with a DRAM module. -+ * The MS02-NV board is ECC-protected, like other MS02 memory boards. -+ * -+ * The state of the battery as provided by the CSR is reflected on -+ * the two onboard LEDs. When facing the battery side of the board, -+ * with the LEDs at the top left and the battery at the bottom right -+ * (i.e. looking from the back side of the system box), their meaning -+ * is as follows (the system has to be powered on): -+ * -+ * left LED battery disable status: lit = enabled -+ * right LED battery condition status: lit = OK -+ */ -+ - /* MS02-NV iomem register offsets. */ - #define MS02NV_CSR 0x400000 /* control & status register */ - -+/* MS02-NV CSR status bits. */ -+#define MS02NV_CSR_BATT_OK 0x01 /* battery OK */ -+#define MS02NV_CSR_BATT_OFF 0x02 /* battery disabled */ -+ -+ - /* MS02-NV memory offsets. */ - #define MS02NV_DIAG 0x0003f8 /* diagnostic status */ - #define MS02NV_MAGIC 0x0003fc /* MS02-NV magic ID */ --#define MS02NV_RAM 0x000400 /* general-purpose RAM start */ -+#define MS02NV_VALID 0x000400 /* valid data magic ID */ -+#define MS02NV_RAM 0x001000 /* user-exposed RAM start */ - --/* MS02-NV diagnostic status constants. */ --#define MS02NV_DIAG_SIZE_MASK 0xf0 /* RAM size mask */ --#define MS02NV_DIAG_SIZE_SHIFT 0x10 /* RAM size shift (left) */ -+/* MS02-NV diagnostic status bits. */ -+#define MS02NV_DIAG_TEST 0x01 /* SRAM test done (?) */ -+#define MS02NV_DIAG_RO 0x02 /* SRAM r/o test done */ -+#define MS02NV_DIAG_RW 0x04 /* SRAM r/w test done */ -+#define MS02NV_DIAG_FAIL 0x08 /* SRAM test failed */ -+#define MS02NV_DIAG_SIZE_MASK 0xf0 /* SRAM size mask */ -+#define MS02NV_DIAG_SIZE_SHIFT 0x10 /* SRAM size shift (left) */ - - /* MS02-NV general constants. */ - #define MS02NV_ID 0x03021966 /* MS02-NV magic ID value */ -+#define MS02NV_VALID_ID 0xbd100248 /* valid data magic ID value */ - #define MS02NV_SLOT_SIZE 0x800000 /* size of the address space - decoded by the module */ - -+ - typedef volatile u32 ms02nv_uint; - - struct ms02nv_private { -Index: linux-2.6.5/drivers/mtd/devices/mtdram.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/mtdram.c 2004-04-03 22:36:15.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/mtdram.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,6 +1,6 @@ - /* - * mtdram - a test mtd device -- * $Id: mtdram.c,v 1.32 2003/05/21 15:15:07 dwmw2 Exp $ -+ * $Id: mtdram.c,v 1.33 2004/08/09 13:19:44 dwmw2 Exp $ - * Author: Alexander Larsson <alex@cendio.se> - * - * Copyright (c) 1999 Alexander Larsson <alex@cendio.se> -@@ -57,9 +57,8 @@ - memset((char *)mtd->priv + instr->addr, 0xff, instr->len); - - instr->state = MTD_ERASE_DONE; -+ mtd_erase_callback(instr); - -- if (instr->callback) -- (*(instr->callback))(instr); - return 0; - } - -Index: linux-2.6.5/drivers/mtd/devices/phram.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/phram.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/phram.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,359 @@ -+/** -+ * -+ * $Id: phram.c,v 1.2 2004/08/09 13:19:44 dwmw2 Exp $ -+ * -+ * Copyright (c) Jochen Schaeuble <psionic@psionic.de> -+ * 07/2003 rewritten by Joern Engel <joern@wh.fh-wedel.de> -+ * -+ * DISCLAIMER: This driver makes use of Rusty's excellent module code, -+ * so it will not work for 2.4 without changes and it wont work for 2.4 -+ * as a module without major changes. Oh well! -+ * -+ * Usage: -+ * -+ * one commend line parameter per device, each in the form: -+ * phram=<name>,<start>,<len> -+ * <name> may be up to 63 characters. -+ * <start> and <len> can be octal, decimal or hexadecimal. If followed -+ * by "k", "M" or "G", the numbers will be interpreted as kilo, mega or -+ * gigabytes. -+ * -+ */ -+ -+#include <asm/io.h> -+#include <linux/init.h> -+#include <linux/kernel.h> -+#include <linux/list.h> -+#include <linux/module.h> -+#include <linux/moduleparam.h> -+#include <linux/mtd/mtd.h> -+ -+#define ERROR(fmt, args...) printk(KERN_ERR "phram: " fmt , ## args) -+ -+struct phram_mtd_list { -+ struct list_head list; -+ struct mtd_info *mtdinfo; -+}; -+ -+static LIST_HEAD(phram_list); -+ -+ -+ -+int phram_erase(struct mtd_info *mtd, struct erase_info *instr) -+{ -+ u_char *start = (u_char *)mtd->priv; -+ -+ if (instr->addr + instr->len > mtd->size) -+ return -EINVAL; -+ -+ memset(start + instr->addr, 0xff, instr->len); -+ -+ /* This'll catch a few races. Free the thing before returning :) -+ * I don't feel at all ashamed. This kind of thing is possible anyway -+ * with flash, but unlikely. -+ */ -+ -+ instr->state = MTD_ERASE_DONE; -+ -+ mtd_erase_callback(instr); -+ -+ return 0; -+} -+ -+int phram_point(struct mtd_info *mtd, loff_t from, size_t len, -+ size_t *retlen, u_char **mtdbuf) -+{ -+ u_char *start = (u_char *)mtd->priv; -+ -+ if (from + len > mtd->size) -+ return -EINVAL; -+ -+ *mtdbuf = start + from; -+ *retlen = len; -+ return 0; -+} -+ -+void phram_unpoint(struct mtd_info *mtd, u_char *addr, loff_t from, size_t len) -+{ -+} -+ -+int phram_read(struct mtd_info *mtd, loff_t from, size_t len, -+ size_t *retlen, u_char *buf) -+{ -+ u_char *start = (u_char *)mtd->priv; -+ -+ if (from + len > mtd->size) -+ return -EINVAL; -+ -+ memcpy(buf, start + from, len); -+ -+ *retlen = len; -+ return 0; -+} -+ -+int phram_write(struct mtd_info *mtd, loff_t to, size_t len, -+ size_t *retlen, const u_char *buf) -+{ -+ u_char *start = (u_char *)mtd->priv; -+ -+ if (to + len > mtd->size) -+ return -EINVAL; -+ -+ memcpy(start + to, buf, len); -+ -+ *retlen = len; -+ return 0; -+} -+ -+ -+ -+static void unregister_devices(void) -+{ -+ struct phram_mtd_list *this; -+ -+ list_for_each_entry(this, &phram_list, list) { -+ del_mtd_device(this->mtdinfo); -+ iounmap(this->mtdinfo->priv); -+ kfree(this->mtdinfo); -+ kfree(this); -+ } -+} -+ -+static int register_device(char *name, unsigned long start, unsigned long len) -+{ -+ struct phram_mtd_list *new; -+ int ret = -ENOMEM; -+ -+ new = kmalloc(sizeof(*new), GFP_KERNEL); -+ if (!new) -+ goto out0; -+ -+ new->mtdinfo = kmalloc(sizeof(struct mtd_info), GFP_KERNEL); -+ if (!new->mtdinfo) -+ goto out1; -+ -+ memset(new->mtdinfo, 0, sizeof(struct mtd_info)); -+ -+ ret = -EIO; -+ new->mtdinfo->priv = ioremap(start, len); -+ if (!new->mtdinfo->priv) { -+ ERROR("ioremap failed\n"); -+ goto out2; -+ } -+ -+ -+ new->mtdinfo->name = name; -+ new->mtdinfo->size = len; -+ new->mtdinfo->flags = MTD_CAP_RAM | MTD_ERASEABLE | MTD_VOLATILE; -+ new->mtdinfo->erase = phram_erase; -+ new->mtdinfo->point = phram_point; -+ new->mtdinfo->unpoint = phram_unpoint; -+ new->mtdinfo->read = phram_read; -+ new->mtdinfo->write = phram_write; -+ new->mtdinfo->owner = THIS_MODULE; -+ new->mtdinfo->type = MTD_RAM; -+ new->mtdinfo->erasesize = 0x0; -+ -+ ret = -EAGAIN; -+ if (add_mtd_device(new->mtdinfo)) { -+ ERROR("Failed to register new device\n"); -+ goto out3; -+ } -+ -+ list_add_tail(&new->list, &phram_list); -+ return 0; -+ -+out3: -+ iounmap(new->mtdinfo->priv); -+out2: -+ kfree(new->mtdinfo); -+out1: -+ kfree(new); -+out0: -+ return ret; -+} -+ -+static int ustrtoul(const char *cp, char **endp, unsigned int base) -+{ -+ unsigned long result = simple_strtoul(cp, endp, base); -+ -+ switch (**endp) { -+ case 'G': -+ result *= 1024; -+ case 'M': -+ result *= 1024; -+ case 'k': -+ result *= 1024; -+ endp++; -+ } -+ return result; -+} -+ -+static int parse_num32(uint32_t *num32, const char *token) -+{ -+ char *endp; -+ unsigned long n; -+ -+ n = ustrtoul(token, &endp, 0); -+ if (*endp) -+ return -EINVAL; -+ -+ *num32 = n; -+ return 0; -+} -+ -+static int parse_name(char **pname, const char *token) -+{ -+ size_t len; -+ char *name; -+ -+ len = strlen(token) + 1; -+ if (len > 64) -+ return -ENOSPC; -+ -+ name = kmalloc(len, GFP_KERNEL); -+ if (!name) -+ return -ENOMEM; -+ -+ strcpy(name, token); -+ -+ *pname = name; -+ return 0; -+} -+ -+#define parse_err(fmt, args...) do { \ -+ ERROR(fmt , ## args); \ -+ return 0; \ -+} while (0) -+ -+static int phram_setup(const char *val, struct kernel_param *kp) -+{ -+ char buf[64+12+12], *str = buf; -+ char *token[3]; -+ char *name; -+ uint32_t start; -+ uint32_t len; -+ int i, ret; -+ -+ if (strnlen(val, sizeof(str)) >= sizeof(str)) -+ parse_err("parameter too long\n"); -+ -+ strcpy(str, val); -+ -+ for (i=0; i<3; i++) -+ token[i] = strsep(&str, ","); -+ -+ if (str) -+ parse_err("too many arguments\n"); -+ -+ if (!token[2]) -+ parse_err("not enough arguments\n"); -+ -+ ret = parse_name(&name, token[0]); -+ if (ret == -ENOMEM) -+ parse_err("out of memory\n"); -+ if (ret == -ENOSPC) -+ parse_err("name too long\n"); -+ if (ret) -+ return 0; -+ -+ ret = parse_num32(&start, token[1]); -+ if (ret) -+ parse_err("illegal start address\n"); -+ -+ ret = parse_num32(&len, token[2]); -+ if (ret) -+ parse_err("illegal device length\n"); -+ -+ register_device(name, start, len); -+ -+ return 0; -+} -+ -+module_param_call(phram, phram_setup, NULL, NULL, 000); -+MODULE_PARM_DESC(phram, "Memory region to map. \"map=<name>,<start><length>\""); -+ -+/* -+ * Just for compatibility with slram, this is horrible and should go someday. -+ */ -+static int __init slram_setup(const char *val, struct kernel_param *kp) -+{ -+ char buf[256], *str = buf; -+ -+ if (!val || !val[0]) -+ parse_err("no arguments to \"slram=\"\n"); -+ -+ if (strnlen(val, sizeof(str)) >= sizeof(str)) -+ parse_err("parameter too long\n"); -+ -+ strcpy(str, val); -+ -+ while (str) { -+ char *token[3]; -+ char *name; -+ uint32_t start; -+ uint32_t len; -+ int i, ret; -+ -+ for (i=0; i<3; i++) { -+ token[i] = strsep(&str, ","); -+ if (token[i]) -+ continue; -+ parse_err("wrong number of arguments to \"slram=\"\n"); -+ } -+ -+ /* name */ -+ ret = parse_name(&name, token[0]); -+ if (ret == -ENOMEM) -+ parse_err("of memory\n"); -+ if (ret == -ENOSPC) -+ parse_err("too long\n"); -+ if (ret) -+ return 1; -+ -+ /* start */ -+ ret = parse_num32(&start, token[1]); -+ if (ret) -+ parse_err("illegal start address\n"); -+ -+ /* len */ -+ if (token[2][0] == '+') -+ ret = parse_num32(&len, token[2] + 1); -+ else -+ ret = parse_num32(&len, token[2]); -+ -+ if (ret) -+ parse_err("illegal device length\n"); -+ -+ if (token[2][0] != '+') { -+ if (len < start) -+ parse_err("end < start\n"); -+ len -= start; -+ } -+ -+ register_device(name, start, len); -+ } -+ return 1; -+} -+ -+module_param_call(slram, slram_setup, NULL, NULL, 000); -+MODULE_PARM_DESC(slram, "List of memory regions to map. \"map=<name>,<start><length/end>\""); -+ -+ -+int __init init_phram(void) -+{ -+ printk(KERN_ERR "phram loaded\n"); -+ return 0; -+} -+ -+static void __exit cleanup_phram(void) -+{ -+ unregister_devices(); -+} -+ -+module_init(init_phram); -+module_exit(cleanup_phram); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Jörn Engel <joern@wh.fh-wedel.de>"); -+MODULE_DESCRIPTION("MTD driver for physical RAM"); -Index: linux-2.6.5/drivers/mtd/devices/pmc551.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/pmc551.c 2004-04-03 22:37:41.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/pmc551.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: pmc551.c,v 1.24 2003/05/20 21:03:08 dwmw2 Exp $ -+ * $Id: pmc551.c,v 1.28 2004/08/09 13:19:44 dwmw2 Exp $ - * - * PMC551 PCI Mezzanine Ram Device - * -@@ -109,12 +109,6 @@ - #include <linux/mtd/pmc551.h> - #include <linux/mtd/compatmac.h> - --#if LINUX_VERSION_CODE > 0x20300 --#define PCI_BASE_ADDRESS(dev) (dev->resource[0].start) --#else --#define PCI_BASE_ADDRESS(dev) (dev->base_address[0]) --#endif -- - static struct mtd_info *pmc551list; - - static int pmc551_erase (struct mtd_info *mtd, struct erase_info *instr) -@@ -175,9 +169,7 @@ - printk(KERN_DEBUG "pmc551_erase() done\n"); - #endif - -- if (instr->callback) { -- (*(instr->callback))(instr); -- } -+ mtd_erase_callback(instr); - return 0; - } - -@@ -564,7 +556,7 @@ - (size<1024)?size:(size<1048576)?size>>10:size>>20, - (size<1024)?'B':(size<1048576)?'K':'M', - size, ((dcmd&(0x1<<3)) == 0)?"non-":"", -- PCI_BASE_ADDRESS(dev)&PCI_BASE_ADDRESS_MEM_MASK ); -+ (dev->resource[0].start)&PCI_BASE_ADDRESS_MEM_MASK ); - - /* - * Check to see the state of the memory -@@ -694,7 +686,7 @@ - } - - printk(KERN_NOTICE "pmc551: Found PCI V370PDC at 0x%lX\n", -- PCI_BASE_ADDRESS(PCI_Device)); -+ PCI_Device->resource[0].start); - - /* - * The PMC551 device acts VERY weird if you don't init it -@@ -748,7 +740,7 @@ - printk(KERN_NOTICE "pmc551: Using specified aperture size %dM\n", asize>>20); - priv->asize = asize; - } -- priv->start = ioremap((PCI_BASE_ADDRESS(PCI_Device) -+ priv->start = ioremap(((PCI_Device->resource[0].start) - & PCI_BASE_ADDRESS_MEM_MASK), - priv->asize); - -Index: linux-2.6.5/drivers/mtd/devices/slram.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/devices/slram.c 2004-04-03 22:36:12.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/devices/slram.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,6 +1,6 @@ - /*====================================================================== - -- $Id: slram.c,v 1.30 2003/05/20 21:03:08 dwmw2 Exp $ -+ $Id: slram.c,v 1.31 2004/08/09 13:19:44 dwmw2 Exp $ - - This driver provides a method to access memory not used by the kernel - itself (i.e. if the kernel commandline mem=xxx is used). To actually -@@ -98,12 +98,7 @@ - - instr->state = MTD_ERASE_DONE; - -- if (instr->callback) { -- (*(instr->callback))(instr); -- } -- else { -- kfree(instr); -- } -+ mtd_erase_callback(instr); - - return(0); - } -Index: linux-2.6.5/drivers/mtd/ftl.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/ftl.c 2004-04-03 22:37:45.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/ftl.c 2005-02-01 17:11:29.000000000 -0500 -@@ -1,5 +1,5 @@ - /* This version ported to the Linux-MTD system by dwmw2@infradead.org -- * $Id: ftl.c,v 1.51 2003/06/23 12:00:08 dwmw2 Exp $ -+ * $Id: ftl.c,v 1.53 2004/08/09 13:55:43 dwmw2 Exp $ - * - * Fixes: Arnaldo Carvalho de Melo <acme@conectiva.com.br> - * - fixes some leaks on failure in build_maps and ftl_notify_add, cleanups -@@ -167,7 +167,8 @@ - { - erase_unit_header_t header; - loff_t offset, max_offset; -- int ret; -+ size_t ret; -+ int err; - part->header.FormattedSize = 0; - max_offset = (0x100000<part->mbd.mtd->size)?0x100000:part->mbd.mtd->size; - /* Search first megabyte for a valid FTL header */ -@@ -175,11 +176,11 @@ - (offset + sizeof(header)) < max_offset; - offset += part->mbd.mtd->erasesize ? : 0x2000) { - -- ret = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(header), &ret, -+ err = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(header), &ret, - (unsigned char *)&header); - -- if (ret) -- return ret; -+ if (err) -+ return err; - - if (strcmp(header.DataOrgTuple+3, "FTL100") == 0) break; - } -@@ -958,7 +959,7 @@ - if (ret) { - printk(KERN_NOTICE "ftl_cs: block write failed!\n"); - printk(KERN_NOTICE "ftl_cs: log_addr = 0x%x, virt_addr" -- " = 0x%x, Offset = 0x%x\n", log_addr, virt_addr, -+ " = 0x%x, Offset = 0x%zx\n", log_addr, virt_addr, - offset); - return -EIO; - } -@@ -1093,7 +1094,7 @@ - - int init_ftl(void) - { -- DEBUG(0, "$Id: ftl.c,v 1.51 2003/06/23 12:00:08 dwmw2 Exp $\n"); -+ DEBUG(0, "$Id: ftl.c,v 1.53 2004/08/09 13:55:43 dwmw2 Exp $\n"); - - return register_mtd_blktrans(&ftl_tr); - } -Index: linux-2.6.5/drivers/mtd/inftlcore.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/inftlcore.c 2004-04-03 22:36:18.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/inftlcore.c 2005-02-01 17:11:29.000000000 -0500 -@@ -7,7 +7,7 @@ - * (c) 1999 Machine Vision Holdings, Inc. - * Author: David Woodhouse <dwmw2@infradead.org> - * -- * $Id: inftlcore.c,v 1.14 2003/06/26 08:28:26 dwmw2 Exp $ -+ * $Id: inftlcore.c,v 1.17 2004/08/09 13:56:48 dwmw2 Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by -@@ -55,9 +55,19 @@ - struct INFTLrecord *inftl; - unsigned long temp; - -- if (mtd->ecctype != MTD_ECC_RS_DiskOnChip) -+ if (mtd->type != MTD_NANDFLASH) -+ return; -+ /* OK, this is moderately ugly. But probably safe. Alternatives? */ -+ if (memcmp(mtd->name, "DiskOnChip", 10)) - return; - -+ if (!mtd->block_isbad) { -+ printk(KERN_ERR -+"INFTL no longer supports the old DiskOnChip drivers loaded via docprobe.\n" -+"Please use the new diskonchip driver under the NAND subsystem.\n"); -+ return; -+ } -+ - DEBUG(MTD_DEBUG_LEVEL3, "INFTL: add_mtd for %s\n", mtd->name); - - inftl = kmalloc(sizeof(*inftl), GFP_KERNEL); -@@ -72,6 +82,8 @@ - inftl->mbd.devnum = -1; - inftl->mbd.blksize = 512; - inftl->mbd.tr = tr; -+ memcpy(&inftl->oobinfo, &mtd->oobinfo, sizeof(struct nand_oobinfo)); -+ inftl->oobinfo.useecc = MTD_NANDECC_PLACEONLY; - - if (INFTL_mount(inftl) < 0) { - printk(KERN_WARNING "INFTL: could not mount device\n"); -@@ -155,8 +167,8 @@ - u16 pot = inftl->LastFreeEUN; - int silly = inftl->nb_blocks; - -- DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_findfreeblock(inftl=0x%x," -- "desperate=%d)\n", (int)inftl, desperate); -+ DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_findfreeblock(inftl=%p," -+ "desperate=%d)\n", inftl, desperate); - - /* - * Normally, we force a fold to happen before we run out of free -@@ -198,8 +210,8 @@ - struct inftl_oob oob; - size_t retlen; - -- DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_foldchain(inftl=0x%x,thisVUC=%d," -- "pending=%d)\n", (int)inftl, thisVUC, pendingblock); -+ DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_foldchain(inftl=%p,thisVUC=%d," -+ "pending=%d)\n", inftl, thisVUC, pendingblock); - - memset(BlockMap, 0xff, sizeof(BlockMap)); - memset(BlockDeleted, 0, sizeof(BlockDeleted)); -@@ -284,21 +296,22 @@ - if (BlockMap[block] == BLOCK_NIL) - continue; - -- ret = MTD_READECC(inftl->mbd.mtd, (inftl->EraseSize * -+ ret = MTD_READ(inftl->mbd.mtd, (inftl->EraseSize * - BlockMap[block]) + (block * SECTORSIZE), SECTORSIZE, -- &retlen, movebuf, (char *)&oob, NULL); -+ &retlen, movebuf); - if (ret < 0) { -- ret = MTD_READECC(inftl->mbd.mtd, (inftl->EraseSize * -+ ret = MTD_READ(inftl->mbd.mtd, (inftl->EraseSize * - BlockMap[block]) + (block * SECTORSIZE), -- SECTORSIZE, &retlen, movebuf, (char *)&oob, -- NULL); -+ SECTORSIZE, &retlen, movebuf); - if (ret != -EIO) - DEBUG(MTD_DEBUG_LEVEL1, "INFTL: error went " - "away on retry?\n"); - } -+ memset(&oob, 0xff, sizeof(struct inftl_oob)); -+ oob.b.Status = oob.b.Status1 = SECTOR_USED; - MTD_WRITEECC(inftl->mbd.mtd, (inftl->EraseSize * targetEUN) + - (block * SECTORSIZE), SECTORSIZE, &retlen, -- movebuf, (char *)&oob, NULL); -+ movebuf, (char *)&oob, &inftl->oobinfo); - } - - /* -@@ -326,7 +339,6 @@ - if (INFTL_formatblock(inftl, thisEUN) < 0) { - /* - * Could not erase : mark block as reserved. -- * FixMe: Update Bad Unit Table on disk. - */ - inftl->PUtable[thisEUN] = BLOCK_RESERVED; - } else { -@@ -354,8 +366,8 @@ - u16 ChainLength = 0, thislen; - u16 chain, EUN; - -- DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_makefreeblock(inftl=0x%x," -- "pending=%d)\n", (int)inftl, pendingblock); -+ DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_makefreeblock(inftl=%p," -+ "pending=%d)\n", inftl, pendingblock); - - for (chain = 0; chain < inftl->nb_blocks; chain++) { - EUN = inftl->VUtable[chain]; -@@ -416,8 +428,8 @@ - size_t retlen; - int silly, silly2 = 3; - -- DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_findwriteunit(inftl=0x%x," -- "block=%d)\n", (int)inftl, block); -+ DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_findwriteunit(inftl=%p," -+ "block=%d)\n", inftl, block); - - do { - /* -@@ -578,8 +590,8 @@ - struct inftl_bci bci; - size_t retlen; - -- DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_trydeletechain(inftl=0x%x," -- "thisVUC=%d)\n", (int)inftl, thisVUC); -+ DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_trydeletechain(inftl=%p," -+ "thisVUC=%d)\n", inftl, thisVUC); - - memset(BlockUsed, 0, sizeof(BlockUsed)); - memset(BlockDeleted, 0, sizeof(BlockDeleted)); -@@ -668,7 +680,6 @@ - if (INFTL_formatblock(inftl, thisEUN) < 0) { - /* - * Could not erase : mark block as reserved. -- * FixMe: Update Bad Unit Table on medium. - */ - inftl->PUtable[thisEUN] = BLOCK_RESERVED; - } else { -@@ -698,8 +709,8 @@ - size_t retlen; - struct inftl_bci bci; - -- DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_deleteblock(inftl=0x%x," -- "block=%d)\n", (int)inftl, block); -+ DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_deleteblock(inftl=%p," -+ "block=%d)\n", inftl, block); - - while (thisEUN < inftl->nb_blocks) { - if (MTD_READOOB(inftl->mbd.mtd, (thisEUN * inftl->EraseSize) + -@@ -754,11 +765,11 @@ - unsigned int writeEUN; - unsigned long blockofs = (block * SECTORSIZE) & (inftl->EraseSize - 1); - size_t retlen; -- u8 eccbuf[6]; -+ struct inftl_oob oob; - char *p, *pend; - -- DEBUG(MTD_DEBUG_LEVEL3, "INFTL: inftl_writeblock(inftl=0x%x,block=%ld," -- "buffer=0x%x)\n", (int)inftl, block, (int)buffer); -+ DEBUG(MTD_DEBUG_LEVEL3, "INFTL: inftl_writeblock(inftl=%p,block=%ld," -+ "buffer=%p)\n", inftl, block, buffer); - - /* Is block all zero? */ - pend = buffer + SECTORSIZE; -@@ -778,11 +789,13 @@ - return 1; - } - -+ memset(&oob, 0xff, sizeof(struct inftl_oob)); -+ oob.b.Status = oob.b.Status1 = SECTOR_USED; - MTD_WRITEECC(inftl->mbd.mtd, (writeEUN * inftl->EraseSize) + - blockofs, SECTORSIZE, &retlen, (char *)buffer, -- (char *)eccbuf, NULL); -+ (char *)&oob, &inftl->oobinfo); - /* -- * No need to write SECTOR_USED flags since they are written -+ * need to write SECTOR_USED flags since they are not written - * in mtd_writeecc - */ - } else { -@@ -803,8 +816,8 @@ - struct inftl_bci bci; - size_t retlen; - -- DEBUG(MTD_DEBUG_LEVEL3, "INFTL: inftl_readblock(inftl=0x%x,block=%ld," -- "buffer=0x%x)\n", (int)inftl, block, (int)buffer); -+ DEBUG(MTD_DEBUG_LEVEL3, "INFTL: inftl_readblock(inftl=%p,block=%ld," -+ "buffer=%p)\n", inftl, block, buffer); - - while (thisEUN < inftl->nb_blocks) { - if (MTD_READOOB(inftl->mbd.mtd, (thisEUN * inftl->EraseSize) + -@@ -846,9 +859,8 @@ - } else { - size_t retlen; - loff_t ptr = (thisEUN * inftl->EraseSize) + blockofs; -- u_char eccbuf[6]; -- if (MTD_READECC(inftl->mbd.mtd, ptr, SECTORSIZE, &retlen, -- buffer, eccbuf, NULL)) -+ if (MTD_READ(inftl->mbd.mtd, ptr, SECTORSIZE, &retlen, -+ buffer)) - return -EIO; - } - return 0; -@@ -881,7 +893,7 @@ - - int __init init_inftl(void) - { -- printk(KERN_INFO "INFTL: inftlcore.c $Revision: 1.14 $, " -+ printk(KERN_INFO "INFTL: inftlcore.c $Revision: 1.17 $, " - "inftlmount.c %s\n", inftlmountrev); - - return register_mtd_blktrans(&inftl_tr); -Index: linux-2.6.5/drivers/mtd/inftlmount.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/inftlmount.c 2004-04-03 22:36:16.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/inftlmount.c 2005-02-01 17:11:29.000000000 -0500 -@@ -8,7 +8,7 @@ - * Author: Fabrice Bellard (fabrice.bellard@netgem.com) - * Copyright (C) 2000 Netgem S.A. - * -- * $Id: inftlmount.c,v 1.11 2003/06/23 07:39:21 dwmw2 Exp $ -+ * $Id: inftlmount.c,v 1.14 2004/08/09 13:57:42 dwmw2 Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by -@@ -41,7 +41,7 @@ - #include <linux/mtd/inftl.h> - #include <linux/mtd/compatmac.h> - --char inftlmountrev[]="$Revision: 1.11 $"; -+char inftlmountrev[]="$Revision: 1.14 $"; - - /* - * find_boot_record: Find the INFTL Media Header and its Spare copy which -@@ -54,14 +54,13 @@ - { - struct inftl_unittail h1; - //struct inftl_oob oob; -- unsigned int i, block, boot_record_count = 0; -+ unsigned int i, block; - u8 buf[SECTORSIZE]; - struct INFTLMediaHeader *mh = &inftl->MediaHdr; - struct INFTLPartition *ip; -- int retlen; -+ size_t retlen; - -- DEBUG(MTD_DEBUG_LEVEL3, "INFTL: find_boot_record(inftl=0x%x)\n", -- (int)inftl); -+ DEBUG(MTD_DEBUG_LEVEL3, "INFTL: find_boot_record(inftl=%p)\n", inftl); - - /* - * Assume logical EraseSize == physical erasesize for starting the -@@ -72,7 +71,6 @@ - inftl->nb_blocks = inftl->mbd.mtd->size / inftl->EraseSize; - - inftl->MediaUnit = BLOCK_NIL; -- inftl->SpareMediaUnit = BLOCK_NIL; - - /* Search for a valid boot record */ - for (block = 0; block < inftl->nb_blocks; block++) { -@@ -82,8 +80,11 @@ - * Check for BNAND header first. Then whinge if it's found - * but later checks fail. - */ -- if ((ret = MTD_READ(inftl->mbd.mtd, block * inftl->EraseSize, -- SECTORSIZE, &retlen, buf))) { -+ ret = MTD_READ(inftl->mbd.mtd, block * inftl->EraseSize, -+ SECTORSIZE, &retlen, buf); -+ /* We ignore ret in case the ECC of the MediaHeader is invalid -+ (which is apparently acceptable) */ -+ if (retlen != SECTORSIZE) { - static int warncount = 5; - - if (warncount) { -@@ -114,36 +115,28 @@ - continue; - } - -- if (boot_record_count) { -- /* -- * We've already processed one. So we just check if -- * this one is the same as the first one we found. -- */ -- if (memcmp(mh, buf, sizeof(struct INFTLMediaHeader))) { -- printk(KERN_WARNING "INFTL: Media Headers at " -- "0x%x and 0x%x disagree.\n", -- inftl->MediaUnit * inftl->EraseSize, -- block * inftl->EraseSize); -- return -1; -- } -- if (boot_record_count == 1) -- inftl->SpareMediaUnit = block; -- -- /* -- * Mark this boot record (INFTL MediaHeader) block as -- * reserved. -- */ -- inftl->PUtable[block] = BLOCK_RESERVED; -- -- boot_record_count++; -- continue; -- } - - /* - * This is the first we've seen. - * Copy the media header structure into place. - */ - memcpy(mh, buf, sizeof(struct INFTLMediaHeader)); -+ -+ /* Read the spare media header at offset 4096 */ -+ MTD_READ(inftl->mbd.mtd, block * inftl->EraseSize + 4096, -+ SECTORSIZE, &retlen, buf); -+ if (retlen != SECTORSIZE) { -+ printk(KERN_WARNING "INFTL: Unable to read spare " -+ "Media Header\n"); -+ return -1; -+ } -+ /* Check if this one is the same as the first one we found. */ -+ if (memcmp(mh, buf, sizeof(struct INFTLMediaHeader))) { -+ printk(KERN_WARNING "INFTL: Primary and spare Media " -+ "Headers disagree.\n"); -+ return -1; -+ } -+ - mh->NoOfBootImageBlocks = le32_to_cpu(mh->NoOfBootImageBlocks); - mh->NoOfBinaryPartitions = le32_to_cpu(mh->NoOfBinaryPartitions); - mh->NoOfBDTLPartitions = le32_to_cpu(mh->NoOfBDTLPartitions); -@@ -197,8 +190,9 @@ - "UnitSizeFactor 0x%02x is experimental\n", - mh->BlockMultiplierBits); - inftl->EraseSize = inftl->mbd.mtd->erasesize << -- (0xff - mh->BlockMultiplierBits); -+ mh->BlockMultiplierBits; - inftl->nb_blocks = inftl->mbd.mtd->size / inftl->EraseSize; -+ block >>= mh->BlockMultiplierBits; - } - - /* Scan the partitions */ -@@ -293,7 +287,7 @@ - inftl->PUtable = kmalloc(inftl->nb_blocks * sizeof(u16), GFP_KERNEL); - if (!inftl->PUtable) { - printk(KERN_WARNING "INFTL: allocation of PUtable " -- "failed (%d bytes)\n", -+ "failed (%zd bytes)\n", - inftl->nb_blocks * sizeof(u16)); - return -ENOMEM; - } -@@ -302,7 +296,7 @@ - if (!inftl->VUtable) { - kfree(inftl->PUtable); - printk(KERN_WARNING "INFTL: allocation of VUtable " -- "failed (%d bytes)\n", -+ "failed (%zd bytes)\n", - inftl->nb_blocks * sizeof(u16)); - return -ENOMEM; - } -@@ -317,34 +311,23 @@ - /* Mark this boot record (NFTL MediaHeader) block as reserved */ - inftl->PUtable[block] = BLOCK_RESERVED; - --#if 0 - /* Read Bad Erase Unit Table and modify PUtable[] accordingly */ - for (i = 0; i < inftl->nb_blocks; i++) { -- if ((i & (SECTORSIZE - 1)) == 0) { -- /* read one sector for every SECTORSIZE of blocks */ -- if ((ret = MTD_READECC(inftl->mbd.mtd, -- block * inftl->EraseSize + i + SECTORSIZE, -- SECTORSIZE, &retlen, buf, -- (char *)&oob, NULL)) < 0) { -- printk(KERN_WARNING "INFTL: read of " -- "bad sector table failed " -- "(err %d)\n", ret); -- kfree(inftl->VUtable); -- kfree(inftl->PUtable); -- return -1; -- } -+ int physblock; -+ /* If any of the physical eraseblocks are bad, don't -+ use the unit. */ -+ for (physblock = 0; physblock < inftl->EraseSize; physblock += inftl->mbd.mtd->erasesize) { -+ if (inftl->mbd.mtd->block_isbad(inftl->mbd.mtd, i * inftl->EraseSize + physblock)) -+ inftl->PUtable[i] = BLOCK_RESERVED; - } -- /* Mark the Bad Erase Unit as RESERVED in PUtable */ -- if (buf[i & (SECTORSIZE - 1)] != 0xff) -- inftl->PUtable[i] = BLOCK_RESERVED; - } --#endif - - inftl->MediaUnit = block; -- boot_record_count++; -+ return 0; - } -- -- return boot_record_count ? 0 : -1; -+ -+ /* Not found. */ -+ return -1; - } - - static int memcmpb(void *a, int c, int n) -@@ -364,28 +347,22 @@ - static int check_free_sectors(struct INFTLrecord *inftl, unsigned int address, - int len, int check_oob) - { -- int i, retlen; -- u8 buf[SECTORSIZE]; -+ u8 buf[SECTORSIZE + inftl->mbd.mtd->oobsize]; -+ size_t retlen; -+ int i; - -- DEBUG(MTD_DEBUG_LEVEL3, "INFTL: check_free_sectors(inftl=0x%x," -- "address=0x%x,len=%d,check_oob=%d)\n", (int)inftl, -+ DEBUG(MTD_DEBUG_LEVEL3, "INFTL: check_free_sectors(inftl=%p," -+ "address=0x%x,len=%d,check_oob=%d)\n", inftl, - address, len, check_oob); - - for (i = 0; i < len; i += SECTORSIZE) { -- /* -- * We want to read the sector without ECC check here since a -- * free sector does not have ECC syndrome on it yet. -- */ -- if (MTD_READ(inftl->mbd.mtd, address, SECTORSIZE, &retlen, buf) < 0) -+ if (MTD_READECC(inftl->mbd.mtd, address, SECTORSIZE, &retlen, buf, &buf[SECTORSIZE], &inftl->oobinfo) < 0) - return -1; - if (memcmpb(buf, 0xff, SECTORSIZE) != 0) - return -1; - - if (check_oob) { -- if (MTD_READOOB(inftl->mbd.mtd, address, -- inftl->mbd.mtd->oobsize, &retlen, buf) < 0) -- return -1; -- if (memcmpb(buf, 0xff, inftl->mbd.mtd->oobsize) != 0) -+ if (memcmpb(buf + SECTORSIZE, 0xff, inftl->mbd.mtd->oobsize) != 0) - return -1; - } - address += SECTORSIZE; -@@ -402,52 +379,62 @@ - * Return: 0 when succeed, -1 on error. - * - * ToDo: 1. Is it neceressary to check_free_sector after erasing ?? -- * 2. UnitSizeFactor != 0xFF - */ - int INFTL_formatblock(struct INFTLrecord *inftl, int block) - { -- int retlen; -+ size_t retlen; - struct inftl_unittail uci; - struct erase_info *instr = &inftl->instr; -+ int physblock; - -- DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_formatblock(inftl=0x%x," -- "block=%d)\n", (int)inftl, block); -+ DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_formatblock(inftl=%p," -+ "block=%d)\n", inftl, block); - - memset(instr, 0, sizeof(struct erase_info)); - -+ /* FIXME: Shouldn't we be setting the 'discarded' flag to zero -+ _first_? */ -+ - /* Use async erase interface, test return code */ - instr->addr = block * inftl->EraseSize; -- instr->len = inftl->EraseSize; -- MTD_ERASE(inftl->mbd.mtd, instr); -+ instr->len = inftl->mbd.mtd->erasesize; -+ /* Erase one physical eraseblock at a time, even though the NAND api -+ allows us to group them. This way we if we have a failure, we can -+ mark only the failed block in the bbt. */ -+ for (physblock = 0; physblock < inftl->EraseSize; physblock += instr->len, instr->addr += instr->len) { -+ MTD_ERASE(inftl->mbd.mtd, instr); -+ -+ if (instr->state == MTD_ERASE_FAILED) { -+ printk(KERN_WARNING "INFTL: error while formatting block %d\n", -+ block); -+ goto fail; -+ } - -- if (instr->state == MTD_ERASE_FAILED) { - /* -- * Could not format, FixMe: We should update the BadUnitTable -- * both in memory and on disk. -- */ -- printk(KERN_WARNING "INFTL: error while formatting block %d\n", -- block); -- return -1; -+ * Check the "freeness" of Erase Unit before updating metadata. -+ * FixMe: is this check really necessary? Since we have check the -+ * return code after the erase operation. -+ */ -+ if (check_free_sectors(inftl, instr->addr, instr->len, 1) != 0) -+ goto fail; - } - -- /* -- * Check the "freeness" of Erase Unit before updating metadata. -- * FixMe: is this check really necessary? Since we have check the -- * return code after the erase operation. -- */ -- if (check_free_sectors(inftl, instr->addr, inftl->EraseSize, 1) != 0) -- return -1; -- - uci.EraseMark = cpu_to_le16(ERASE_MARK); - uci.EraseMark1 = cpu_to_le16(ERASE_MARK); - uci.Reserved[0] = 0; - uci.Reserved[1] = 0; - uci.Reserved[2] = 0; - uci.Reserved[3] = 0; -- if (MTD_WRITEOOB(inftl->mbd.mtd, block * inftl->EraseSize + SECTORSIZE * 2 + -+ instr->addr = block * inftl->EraseSize + SECTORSIZE * 2; -+ if (MTD_WRITEOOB(inftl->mbd.mtd, instr->addr + - 8, 8, &retlen, (char *)&uci) < 0) -- return -1; -+ goto fail; - return 0; -+fail: -+ /* could not format, update the bad block table (caller is responsible -+ for setting the PUtable to BLOCK_RESERVED on failure) */ -+ inftl->mbd.mtd->block_markbad(inftl->mbd.mtd, instr->addr); -+ return -1; - } - - /* -@@ -472,7 +459,6 @@ - if (INFTL_formatblock(inftl, block) < 0) { - /* - * Cannot format !!!! Mark it as Bad Unit, -- * FixMe: update the BadUnitTable on disk. - */ - inftl->PUtable[block] = BLOCK_RESERVED; - } else { -@@ -565,10 +551,11 @@ - int chain_length, do_format_chain; - struct inftl_unithead1 h0; - struct inftl_unittail h1; -- int i, retlen; -+ size_t retlen; -+ int i; - u8 *ANACtable, ANAC; - -- DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_mount(inftl=0x%x)\n", (int)s); -+ DEBUG(MTD_DEBUG_LEVEL3, "INFTL: INFTL_mount(inftl=%p)\n", s); - - /* Search for INFTL MediaHeader and Spare INFTL Media Header */ - if (find_boot_record(s) < 0) { -Index: linux-2.6.5/drivers/mtd/maps/Kconfig -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/Kconfig 2004-04-03 22:38:21.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/Kconfig 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - # drivers/mtd/maps/Kconfig --# $Id: Kconfig,v 1.12 2003/06/23 07:38:11 dwmw2 Exp $ -+# $Id: Kconfig,v 1.34 2004/09/02 01:27:07 eric Exp $ - - menu "Mapping drivers for chip access" - depends on MTD!=n -@@ -19,7 +19,8 @@ - command set driver code to communicate with flash chips which - are mapped physically into the CPU's memory. You will need to - configure the physical address and size of the flash chips on -- your particular board as well as the bus width. -+ your particular board as well as the bus width, either statically -+ with config options or at run-time. - - config MTD_PHYSMAP_START - hex "Physical start address of flash mapping" -@@ -30,6 +31,8 @@ - are mapped on your particular target board. Refer to the - memory map which should hopefully be in the documentation for - your board. -+ Ignore this option if you use run-time physmap configuration -+ (i.e., run-time calling physmap_configure()). - - config MTD_PHYSMAP_LEN - hex "Physical length of flash mapping" -@@ -42,9 +45,11 @@ - than the total amount of flash present. Refer to the memory - map which should hopefully be in the documentation for your - board. -+ Ignore this option if you use run-time physmap configuration -+ (i.e., run-time calling physmap_configure()). - --config MTD_PHYSMAP_BUSWIDTH -- int "Bus width in octets" -+config MTD_PHYSMAP_BANKWIDTH -+ int "Bank width in octets" - depends on MTD_PHYSMAP - default "2" - help -@@ -52,6 +57,8 @@ - in octets. For example, if you have a data bus width of 32 - bits, you would set the bus width octect value to 4. This is - used internally by the CFI drivers. -+ Ignore this option if you use run-time physmap configuration -+ (i.e., run-time calling physmap_configure()). - - config MTD_SUN_UFLASH - tristate "Sun Microsystems userflash support" -@@ -94,7 +101,7 @@ - By default the flash is split into 3 partitions which are accessed - as separate MTD devices. This board utilizes Intel StrataFlash. - More info at -- <http://www.arcomcontrols.com/products/icp/pc104/processors/>. -+ <http://www.arcomcontrols.com/products/icp/pc104/processors/SBC_GX1.htm>. - - config MTD_ELAN_104NC - tristate "CFI Flash device mapped on Arcom ELAN-104NC" -@@ -104,7 +111,7 @@ - System's ELAN-104NC development board. By default the flash - is split into 3 partitions which are accessed as separate MTD - devices. This board utilizes Intel StrataFlash. More info at -- <http://www.arcomcontrols.com/products/icp/pc104/processors/>. -+ <http://www.arcomcontrols.com/products/icp/pc104/processors/ELAN104NC.htm>. - - config MTD_LUBBOCK - tristate "CFI Flash device mapped on Intel Lubbock XScale eval board" -@@ -120,7 +127,7 @@ - This provides a 'mapping' driver which supports the way in which - the flash chips are connected in the Octagon-5066 Single Board - Computer. More information on the board is available at -- <http://www.octagonsystems.com/Products/5066/5066.html>. -+ <http://www.octagonsystems.com/CPUpages/5066.html>. - - config MTD_VMAX - tristate "JEDEC Flash device mapped on Tempustech VMAX SBC301" -@@ -129,7 +136,7 @@ - This provides a 'mapping' driver which supports the way in which - the flash chips are connected in the Tempustech VMAX SBC301 Single - Board Computer. More information on the board is available at -- <http://www.tempustech.com/tt301.htm>. -+ <http://www.tempustech.com/>. - - config MTD_SCx200_DOCFLASH - tristate "Flash device mapped with DOCCS on NatSemi SCx200" -@@ -151,11 +158,11 @@ - - BE VERY CAREFUL. - --config MTD_ICH2ROM -- tristate "BIOS flash chip on Intel Hub Controller 2" -- depends on X86 && MTD_JEDECPROBE && MTD_COMPLEX_MAPPINGS -+config MTD_ICHXROM -+ tristate "BIOS flash chip on Intel Controller Hub 2/3/4/5" -+ depends on X86 && MTD_JEDECPROBE - help -- Support for treating the BIOS flash chip on ICH2 motherboards -+ Support for treating the BIOS flash chip on ICHX motherboards - as an MTD device - with this you can reprogram your BIOS. - - BE VERY CAREFUL. -@@ -177,7 +184,7 @@ - - config MTD_LASAT - tristate "Flash chips on LASAT board" -- depends on LASAT && MTD_CFI -+ depends on LASAT - help - Support for the flash chips on the Lasat 100 and 200 boards. - -@@ -210,13 +217,59 @@ - You can say 'Y' to both this and 'MTD_PB1XXX_BOOT' above, to use - both banks. - -+config MTD_PB1550 -+ tristate "Flash devices on Alchemy PB1550 board" -+ depends on MIPS && MIPS_PB1550 -+ help -+ Flash memory access on Alchemy Pb1550 board -+ -+config MTD_PB1550_BOOT -+ bool "PB1550 boot flash device" -+ depends on MTD_PB1550 -+ help -+ Use the first of the two 64MiB flash banks on Pb1550 board. -+ You can say 'Y' to both this and 'MTD_PB1550_USER' below, to use -+ both banks. -+ -+config MTD_PB1550_USER -+ bool "PB1550 user flash device" -+ depends on MTD_PB1550 -+ default y if MTD_PB1550_BOOT = n -+ help -+ Use the second of the two 64MiB flash banks on Pb1550 board. -+ You can say 'Y' to both this and 'MTD_PB1550_BOOT' above, to use -+ both banks. -+ -+config MTD_DB1550 -+ tristate "Flash devices on Alchemy DB1550 board" -+ depends on MIPS && MIPS_DB1550 -+ help -+ Flash memory access on Alchemy Db1550 board -+ -+config MTD_DB1550_BOOT -+ bool "DB1550 boot flash device" -+ depends on MTD_DB1550 -+ help -+ Use the first of the two 64MiB flash banks on Db1550 board. -+ You can say 'Y' to both this and 'MTD_DB1550_USER' below, to use -+ both banks. -+ -+config MTD_DB1550_USER -+ bool "DB1550 user flash device" -+ depends on MTD_DB1550 -+ default y if MTD_DB1550_BOOT = n -+ help -+ Use the second of the two 64MiB flash banks on Db1550 board. -+ You can say 'Y' to both this and 'MTD_DB1550_BOOT' above, to use -+ both banks. -+ - config MTD_DILNETPC - tristate "CFI Flash device mapped on DIL/Net PC" - depends on X86 && MTD_CONCAT && MTD_PARTITIONS && MTD_CFI_INTELEXT - help - MTD map driver for SSV DIL/Net PC Boards "DNP" and "ADNP". -- For details, see http://www.ssv-embedded.de/ssv/pc104/p169.htm -- and http://www.ssv-embedded.de/ssv/pc104/p170.htm -+ For details, see <http://www.ssv-embedded.de/ssv/pc104/p169.htm> -+ and <http://www.ssv-embedded.de/ssv/pc104/p170.htm> - - config MTD_DILNETPC_BOOTSIZE - hex "Size of DIL/Net PC flash boot partition" -@@ -235,6 +288,13 @@ - - BE VERY CAREFUL. - -+config MTD_SBC8240 -+ tristate "Flash device on SBC8240" -+ depends on PPC32 && MTD_JEDECPROBE && 6xx && 8260 -+ help -+ Flash access on the SBC8240 board from Wind River. See -+ <http://www.windriver.com/products/sbc8240/> -+ - config MTD_TQM8XXL - tristate "CFI Flash device mapped on TQM8XXL" - depends on MTD_CFI && PPC32 && 8xx && TQM8xxL -@@ -253,7 +313,7 @@ - a strange sparse mapping. This 'mapping' driver supports that - arrangement, allowing the CFI probe and command set driver code - to communicate with the chips on the RPXLite board. More at -- <http://www.embeddedplanet.com/rpx_lite_specification_sheet.htm>. -+ <http://www.embeddedplanet.com/>. - - config MTD_MBX860 - tristate "System flash on MBX860 board" -@@ -265,7 +325,7 @@ - - config MTD_DBOX2 - tristate "CFI Flash device mapped on D-Box2" -- depends on PPC32 && 8xx && MTD_CFI_INTELSTD && MTD_CFI_INTELEXT && MTD_CFI_AMDSTD -+ depends on PPC32 && 8xx && DBOX2 && MTD_CFI_INTELSTD && MTD_CFI_INTELEXT && MTD_CFI_AMDSTD - help - This enables access routines for the flash chips on the Nokia/Sagem - D-Box 2 board. If you have one of these boards and would like to use -@@ -295,13 +355,21 @@ - use the flash chips on it, say 'Y'. - - config MTD_EBONY -- tristate "CFI Flash device mapped on IBM 440GP Ebony" -- depends on MTD_CFI && PPC32 && 440 && EBONY -+ tristate "Flash devices mapped on IBM 440GP Ebony" -+ depends on MTD_CFI && PPC32 && 44x && EBONY - help - This enables access routines for the flash chips on the IBM 440GP - Ebony board. If you have one of these boards and would like to - use the flash chips on it, say 'Y'. - -+config MTD_OCOTEA -+ tristate "Flash devices mapped on IBM 440GX Ocotea" -+ depends on MTD_CFI && PPC32 && 44x && OCOTEA -+ help -+ This enables access routines for the flash chips on the IBM 440GX -+ Ocotea board. If you have one of these boards and would like to -+ use the flash chips on it, say 'Y'. -+ - config MTD_REDWOOD - tristate "CFI Flash devices mapped on IBM Redwood" - depends on MTD_CFI && PPC32 && 4xx && 40x && ( REDWOOD_4 || REDWOOD_5 || REDWOOD_6 ) -@@ -388,13 +456,19 @@ - the SA1100 and SA1110, including the Assabet and the Compaq iPAQ. - If you have such a board, say 'Y'. - -+config MTD_IPAQ -+ tristate "CFI Flash device mapped on Compaq/HP iPAQ" -+ depends on ARM && IPAQ_HANDHELD && MTD_CFI -+ help -+ This provides a driver for the on-board flash of the iPAQ. -+ - config MTD_DC21285 - tristate "CFI Flash device mapped on DC21285 Footbridge" - depends on ARM && MTD_CFI && ARCH_FOOTBRIDGE && MTD_COMPLEX_MAPPINGS - help - This provides a driver for the flash accessed using Intel's - 21285 bridge used with Intel's StrongARM processors. More info at -- <http://developer.intel.com/design/bridge/quicklist/dsc-21285.htm>. -+ <http://www.intel.com/design/bridge/docs/21285_documentation.htm>. - - config MTD_IQ80310 - tristate "CFI Flash device mapped on the XScale IQ80310 board" -@@ -404,6 +478,24 @@ - IQ80310 evaluation board. If you have one of these boards and would - like to use the flash chips on it, say 'Y'. - -+config MTD_IXP4XX -+ tristate "CFI Flash device mapped on Intel IXP4xx based systems" -+ depends on ARM && MTD_CFI && MTD_COMPLEX_MAPPINGS && ARCH_IXP4XX -+ help -+ This enables MTD access to flash devices on platforms based -+ on Intel's IXP4xx family of network processors such as the -+ IXDP425 and Coyote. If you have an IXP4xx based board and -+ would like to use the flash chips on it, say 'Y'. -+ -+config MTD_IXP2000 -+ tristate "CFI Flash device mapped on Intel IXP2000 based systems" -+ depends on ARM && MTD_CFI && MTD_COMPLEX_MAPPINGS && ARCH_IXP2000 -+ help -+ This enables MTD access to flash devices on platforms based -+ on Intel's IXP2000 family of network processors such as the -+ IXDP425 and Coyote. If you have an IXP2000 based board and -+ would like to use the flash chips on it, say 'Y'. -+ - config MTD_EPXA10DB - tristate "CFI Flash device mapped on Epxa10db" - depends on ARM && MTD_CFI && MTD_PARTITIONS && ARCH_CAMELOT -@@ -448,6 +540,13 @@ - PhotoMax Digital Picture Frame. - If you have such a device, say 'Y'. - -+config MTD_NOR_TOTO -+ tristate "NOR Flash device on TOTO board" -+ depends on ARM && ARCH_OMAP && OMAP_TOTO -+ help -+ This enables access to the NOR flash on the Texas Instruments -+ TOTO board. -+ - config MTD_H720X - tristate "Hynix evaluation board mappings" - depends on ARM && MTD_CFI && ( ARCH_H7201 || ARCH_H7202 ) -@@ -455,6 +554,13 @@ - This enables access to the flash chips on the Hynix evaluation boards. - If you have such a board, say 'Y'. - -+config MTD_MPC1211 -+ tristate "CFI Flash device mapped on Interface MPC-1211" -+ depends on SUPERH && SH_MPC1211 && MTD_CFI -+ help -+ This enables access to the flash chips on the Interface MPC-1211(CTP/PCI/MPC-SH02). -+ If you have such a board, say 'Y'. -+ - # This needs CFI or JEDEC, depending on the cards found. - config MTD_PCI - tristate "PCI MTD driver" -@@ -480,5 +586,28 @@ - help - Map driver to support image based filesystems for uClinux. - -+config MTD_WRSBC8260 -+ tristate "Map driver for WindRiver PowerQUICC II MPC82xx board" -+ depends on (SBC82xx || SBC8560) -+ select MTD_PARTITIONS -+ select MTD_MAP_BANK_WIDTH_4 -+ select MTD_MAP_BANK_WIDTH_1 -+ select MTD_CFI_I1 -+ select MTD_CFI_I4 -+ help -+ Map driver for WindRiver PowerQUICC II MPC82xx board. Drives -+ all three flash regions on CS0, CS1 and CS6 if they are configured -+ correctly by the boot loader. -+ -+config MTD_DMV182 -+ tristate "Map driver for Dy-4 SVME/DMV-182 board." -+ depends on DMV182 -+ select MTD_PARTITIONS -+ select MTD_MAP_BANK_WIDTH_32 -+ select MTD_CFI_I8 -+ select MTD_CFI_AMDSTD -+ help -+ Map driver for Dy-4 SVME/DMV-182 board. -+ - endmenu - -Index: linux-2.6.5/drivers/mtd/maps/Makefile -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/Makefile 2004-04-03 22:36:12.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/Makefile 2005-02-01 17:11:17.000000000 -0500 -@@ -1,7 +1,7 @@ - # - # linux/drivers/maps/Makefile - # --# $Id: Makefile.common,v 1.2 2003/05/28 10:48:41 dwmw2 Exp $ -+# $Id: Makefile.common,v 1.17 2004/09/02 00:13:41 dsaxena Exp $ - - ifeq ($(CONFIG_MTD_COMPLEX_MAPPINGS),y) - obj-$(CONFIG_MTD) += map_funcs.o -@@ -19,7 +19,7 @@ - obj-$(CONFIG_MTD_IQ80310) += iq80310.o - obj-$(CONFIG_MTD_L440GX) += l440gx.o - obj-$(CONFIG_MTD_AMD76XROM) += amd76xrom.o --obj-$(CONFIG_MTD_ICH2ROM) += ich2rom.o -+obj-$(CONFIG_MTD_ICHXROM) += ichxrom.o - obj-$(CONFIG_MTD_TSUNAMI) += tsunami_flash.o - obj-$(CONFIG_MTD_LUBBOCK) += lubbock-flash.o - obj-$(CONFIG_MTD_MBX860) += mbx860.o -@@ -31,6 +31,7 @@ - obj-$(CONFIG_MTD_RPXLITE) += rpxlite.o - obj-$(CONFIG_MTD_TQM8XXL) += tqm8xxl.o - obj-$(CONFIG_MTD_SA1100) += sa1100-flash.o -+obj-$(CONFIG_MTD_IPAQ) += ipaq-flash.o - obj-$(CONFIG_MTD_SBC_GXX) += sbc_gxx.o - obj-$(CONFIG_MTD_SC520CDP) += sc520cdp.o - obj-$(CONFIG_MTD_NETSC520) += netsc520.o -@@ -42,6 +43,9 @@ - obj-$(CONFIG_MTD_SOLUTIONENGINE)+= solutionengine.o - obj-$(CONFIG_MTD_PCI) += pci.o - obj-$(CONFIG_MTD_PB1XXX) += pb1xxx-flash.o -+obj-$(CONFIG_MTD_DB1X00) += db1x00-flash.o -+obj-$(CONFIG_MTD_PB1550) += pb1550-flash.o -+obj-$(CONFIG_MTD_DB1550) += db1550-flash.o - obj-$(CONFIG_MTD_LASAT) += lasat.o - obj-$(CONFIG_MTD_AUTCPU12) += autcpu12-nvram.o - obj-$(CONFIG_MTD_EDB7312) += edb7312.o -@@ -52,6 +56,14 @@ - obj-$(CONFIG_MTD_NETtel) += nettel.o - obj-$(CONFIG_MTD_SCB2_FLASH) += scb2_flash.o - obj-$(CONFIG_MTD_EBONY) += ebony.o -+obj-$(CONFIG_MTD_OCOTEA) += ocotea.o - obj-$(CONFIG_MTD_BEECH) += beech-mtd.o - obj-$(CONFIG_MTD_ARCTIC) += arctic-mtd.o - obj-$(CONFIG_MTD_H720X) += h720x-flash.o -+obj-$(CONFIG_MTD_SBC8240) += sbc8240.o -+obj-$(CONFIG_MTD_NOR_TOTO) += omap-toto-flash.o -+obj-$(CONFIG_MTD_MPC1211) += mpc1211.o -+obj-$(CONFIG_MTD_IXP4XX) += ixp4xx.o -+obj-$(CONFIG_MTD_IXP2000) += ixp2000.o -+obj-$(CONFIG_MTD_WRSBC8260) += wr_sbc82xx_flash.o -+obj-$(CONFIG_MTD_DMV182) += dmv182.o -Index: linux-2.6.5/drivers/mtd/maps/amd76xrom.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/amd76xrom.c 2004-04-03 22:36:52.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/amd76xrom.c 2005-02-01 17:11:17.000000000 -0500 -@@ -2,7 +2,7 @@ - * amd76xrom.c - * - * Normal mappings of chips in physical memory -- * $Id: amd76xrom.c,v 1.8 2003/05/28 15:44:28 dwmw2 Exp $ -+ * $Id: amd76xrom.c,v 1.16 2004/09/17 11:45:06 eric Exp $ - */ - - #include <linux/module.h> -@@ -12,152 +12,265 @@ - #include <asm/io.h> - #include <linux/mtd/mtd.h> - #include <linux/mtd/map.h> -+#include <linux/mtd/cfi.h> -+#include <linux/mtd/flashchip.h> - #include <linux/config.h> - #include <linux/pci.h> - #include <linux/pci_ids.h> -+#include <linux/list.h> - - -+#define xstr(s) str(s) -+#define str(s) #s -+#define MOD_NAME xstr(KBUILD_BASENAME) -+ -+#define ADDRESS_NAME_LEN 18 -+ -+#define ROM_PROBE_STEP_SIZE (64*1024) /* 64KiB */ -+ -+struct amd76xrom_window { -+ void __iomem *virt; -+ unsigned long phys; -+ unsigned long size; -+ struct list_head maps; -+ struct resource rsrc; -+ struct pci_dev *pdev; -+}; -+ - struct amd76xrom_map_info { -+ struct list_head list; - struct map_info map; - struct mtd_info *mtd; -- unsigned long window_addr; -- u32 window_start, window_size; -- struct pci_dev *pdev; -+ struct resource rsrc; -+ char map_name[sizeof(MOD_NAME) + 2 + ADDRESS_NAME_LEN]; - }; - -- --static struct amd76xrom_map_info amd76xrom_map = { -- .map = { -- .name = "AMD76X rom", -- .size = 0, -- .buswidth = 1, -- }, -- .mtd = 0, -- .window_addr = 0, -+static struct amd76xrom_window amd76xrom_window = { -+ .maps = LIST_HEAD_INIT(amd76xrom_window.maps), - }; - -+static void amd76xrom_cleanup(struct amd76xrom_window *window) -+{ -+ struct amd76xrom_map_info *map, *scratch; -+ u8 byte; -+ -+ if (window->pdev) { -+ /* Disable writes through the rom window */ -+ pci_read_config_byte(window->pdev, 0x40, &byte); -+ pci_write_config_byte(window->pdev, 0x40, byte & ~1); -+ } -+ -+ /* Free all of the mtd devices */ -+ list_for_each_entry_safe(map, scratch, &window->maps, list) { -+ if (map->rsrc.parent) { -+ release_resource(&map->rsrc); -+ } -+ del_mtd_device(map->mtd); -+ map_destroy(map->mtd); -+ list_del(&map->list); -+ kfree(map); -+ } -+ if (window->rsrc.parent) -+ release_resource(&window->rsrc); -+ -+ if (window->virt) { -+ iounmap(window->virt); -+ window->virt = NULL; -+ window->phys = 0; -+ window->size = 0; -+ window->pdev = NULL; -+ } -+} -+ -+ - static int __devinit amd76xrom_init_one (struct pci_dev *pdev, - const struct pci_device_id *ent) - { -- struct rom_window { -- u32 start; -- u32 size; -- u8 segen_bits; -- }; -- static struct rom_window rom_window[] = { -- { 0xffb00000, 5*1024*1024, (1<<7) | (1<<6), }, -- { 0xffc00000, 4*1024*1024, (1<<7), }, -- { 0xffff0000, 64*1024, 0 }, -- { 0 , 0, 0 }, -- }; -- static const u32 rom_probe_sizes[] = { -- 5*1024*1024, 4*1024*1024, 2*1024*1024, 1024*1024, 512*1024, -- 256*1024, 128*1024, 64*1024, 0}; -- static char *rom_probe_types[] = { "cfi_probe", "jedec_probe", 0 }; -+ static char *rom_probe_types[] = { "cfi_probe", "jedec_probe", NULL }; - u8 byte; -- struct amd76xrom_map_info *info = &amd76xrom_map; -- struct rom_window *window; -- int i; -- u32 rom_size; -- -- window = &rom_window[0]; -- -- /* disabled because it fights with BIOS reserved regions */ --#define REQUEST_MEM_REGION 0 --#if REQUEST_MEM_REGION -- while(window->size) { -- if (request_mem_region(window->start, window->size, "amd76xrom")) { -- break; -- } -- window++; -+ struct amd76xrom_window *window = &amd76xrom_window; -+ struct amd76xrom_map_info *map = 0; -+ unsigned long map_top; -+ -+ /* Remember the pci dev I find the window in */ -+ window->pdev = pdev; -+ -+ /* Assume the rom window is properly setup, and find it's size */ -+ pci_read_config_byte(pdev, 0x43, &byte); -+ if ((byte & ((1<<7)|(1<<6))) == ((1<<7)|(1<<6))) { -+ window->phys = 0xffb00000; /* 5MiB */ -+ } -+ else if ((byte & (1<<7)) == (1<<7)) { -+ window->phys = 0xffc00000; /* 4MiB */ -+ } -+ else { -+ window->phys = 0xffff0000; /* 64KiB */ - } -- if (!window->size) { -- printk(KERN_ERR "amd76xrom: cannot reserve rom window\n"); -- goto err_out_none; -+ window->size = 0xffffffffUL - window->phys + 1UL; -+ -+ /* -+ * Try to reserve the window mem region. If this fails then -+ * it is likely due to a fragment of the window being -+ * "reseved" by the BIOS. In the case that the -+ * request_mem_region() fails then once the rom size is -+ * discovered we will try to reserve the unreserved fragment. -+ */ -+ window->rsrc.name = MOD_NAME; -+ window->rsrc.start = window->phys; -+ window->rsrc.end = window->phys + window->size - 1; -+ window->rsrc.flags = IORESOURCE_MEM | IORESOURCE_BUSY; -+ if (request_resource(&iomem_resource, &window->rsrc)) { -+ window->rsrc.parent = NULL; -+ printk(KERN_ERR MOD_NAME -+ " %s(): Unable to register resource" -+ " 0x%.08lx-0x%.08lx - kernel bug?\n", -+ __func__, -+ window->rsrc.start, window->rsrc.end); - } --#endif /* REQUEST_MEM_REGION */ -+ -+#if 0 - - /* Enable the selected rom window */ - pci_read_config_byte(pdev, 0x43, &byte); -- pci_write_config_byte(pdev, 0x43, byte | window->segen_bits); -+ pci_write_config_byte(pdev, 0x43, byte | rwindow->segen_bits); -+#endif - - /* Enable writes through the rom window */ - pci_read_config_byte(pdev, 0x40, &byte); - pci_write_config_byte(pdev, 0x40, byte | 1); -- -+ - /* FIXME handle registers 0x80 - 0x8C the bios region locks */ - -- printk(KERN_NOTICE "amd76xrom window : %x at %x\n", -- window->size, window->start); - /* For write accesses caches are useless */ -- info->window_addr = (unsigned long)ioremap_nocache(window->start, window->size); -+ window->virt = ioremap_nocache(window->phys, window->size); -+ if (!window->virt) { -+ printk(KERN_ERR MOD_NAME ": Failed to ioremap\n"); -+ goto out; -+ } -+ -+ /* Get the first address to look for an rom chip at */ -+ map_top = window->phys; -+#if 1 -+ /* The probe sequence run over the firmware hub lock -+ * registers sets them to 0x7 (no access). -+ * Probe at most the last 4M of the address space. -+ */ -+ if (map_top < 0xffc00000) { -+ map_top = 0xffc00000; -+ } -+#endif -+ /* Loop through and look for rom chips */ -+ while((map_top - 1) < 0xffffffffUL) { -+ struct cfi_private *cfi; -+ unsigned long offset; -+ int i; - -- if (!info->window_addr) { -- printk(KERN_ERR "Failed to ioremap\n"); -- goto err_out_free_mmio_region; -- } -- info->mtd = 0; -- for(i = 0; (rom_size = rom_probe_sizes[i]); i++) { -- char **chip_type; -- if (rom_size > window->size) { -- continue; -- } -- info->map.phys = window->start + window->size - rom_size; -- info->map.virt = -- info->window_addr + window->size - rom_size; -- info->map.size = rom_size; -- simple_map_init(&info->map); -- chip_type = rom_probe_types; -- for(; !info->mtd && *chip_type; chip_type++) { -- info->mtd = do_map_probe(*chip_type, &amd76xrom_map.map); -+ if (!map) { -+ map = kmalloc(sizeof(*map), GFP_KERNEL); - } -- if (info->mtd) { -- break; -+ if (!map) { -+ printk(KERN_ERR MOD_NAME ": kmalloc failed"); -+ goto out; -+ } -+ memset(map, 0, sizeof(*map)); -+ INIT_LIST_HEAD(&map->list); -+ map->map.name = map->map_name; -+ map->map.phys = map_top; -+ offset = map_top - window->phys; -+ map->map.virt = (void __iomem *) -+ (((unsigned long)(window->virt)) + offset); -+ map->map.size = 0xffffffffUL - map_top + 1UL; -+ /* Set the name of the map to the address I am trying */ -+ sprintf(map->map_name, "%s @%08lx", -+ MOD_NAME, map->map.phys); -+ -+ /* There is no generic VPP support */ -+ for(map->map.bankwidth = 32; map->map.bankwidth; -+ map->map.bankwidth >>= 1) -+ { -+ char **probe_type; -+ /* Skip bankwidths that are not supported */ -+ if (!map_bankwidth_supported(map->map.bankwidth)) -+ continue; -+ -+ /* Setup the map methods */ -+ simple_map_init(&map->map); -+ -+ /* Try all of the probe methods */ -+ probe_type = rom_probe_types; -+ for(; *probe_type; probe_type++) { -+ map->mtd = do_map_probe(*probe_type, &map->map); -+ if (map->mtd) -+ goto found; -+ } -+ } -+ map_top += ROM_PROBE_STEP_SIZE; -+ continue; -+ found: -+ /* Trim the size if we are larger than the map */ -+ if (map->mtd->size > map->map.size) { -+ printk(KERN_WARNING MOD_NAME -+ " rom(%u) larger than window(%lu). fixing...\n", -+ map->mtd->size, map->map.size); -+ map->mtd->size = map->map.size; -+ } -+ if (window->rsrc.parent) { -+ /* -+ * Registering the MTD device in iomem may not be possible -+ * if there is a BIOS "reserved" and BUSY range. If this -+ * fails then continue anyway. -+ */ -+ map->rsrc.name = map->map_name; -+ map->rsrc.start = map->map.phys; -+ map->rsrc.end = map->map.phys + map->mtd->size - 1; -+ map->rsrc.flags = IORESOURCE_MEM | IORESOURCE_BUSY; -+ if (request_resource(&window->rsrc, &map->rsrc)) { -+ printk(KERN_ERR MOD_NAME -+ ": cannot reserve MTD resource\n"); -+ map->rsrc.parent = NULL; -+ } -+ } -+ -+ /* Make the whole region visible in the map */ -+ map->map.virt = window->virt; -+ map->map.phys = window->phys; -+ cfi = map->map.fldrv_priv; -+ for(i = 0; i < cfi->numchips; i++) { -+ cfi->chips[i].start += offset; - } -+ -+ /* Now that the mtd devices is complete claim and export it */ -+ map->mtd->owner = THIS_MODULE; -+ add_mtd_device(map->mtd); -+ -+ -+ /* Calculate the new value of map_top */ -+ map_top += map->mtd->size; -+ -+ /* File away the map structure */ -+ list_add(&map->list, &window->maps); -+ map = 0; - } -- if (!info->mtd) { -- goto err_out_iounmap; -+ -+ out: -+ /* Free any left over map structures */ -+ if (map) { -+ kfree(map); -+ } -+ /* See if I have any map structures */ -+ if (list_empty(&window->maps)) { -+ amd76xrom_cleanup(window); -+ return -ENODEV; - } -- printk(KERN_NOTICE "amd76xrom chip at offset: 0x%x\n", -- window->size - rom_size); -- -- info->mtd->owner = THIS_MODULE; -- add_mtd_device(info->mtd); -- info->window_start = window->start; -- info->window_size = window->size; - return 0; -- --err_out_iounmap: -- iounmap((void *)(info->window_addr)); --err_out_free_mmio_region: --#if REQUEST_MEM_REGION -- release_mem_region(window->start, window->size); --err_out_none: --#endif /* REQUEST_MEM_REGION */ -- return -ENODEV; - } - - - static void __devexit amd76xrom_remove_one (struct pci_dev *pdev) - { -- struct amd76xrom_map_info *info = &amd76xrom_map; -- u8 byte; -- -- del_mtd_device(info->mtd); -- map_destroy(info->mtd); -- info->mtd = 0; -- info->map.virt = 0; -- -- iounmap((void *)(info->window_addr)); -- info->window_addr = 0; -- -- /* Disable writes through the rom window */ -- pci_read_config_byte(pdev, 0x40, &byte); -- pci_write_config_byte(pdev, 0x40, byte & ~1); -+ struct amd76xrom_window *window = &amd76xrom_window; - --#if REQUEST_MEM_REGION -- release_mem_region(info->window_start, info->window_size); --#endif /* REQUEST_MEM_REGION */ -+ amd76xrom_cleanup(window); - } - - static struct pci_device_id amd76xrom_pci_tbl[] = { -@@ -173,7 +286,7 @@ - - #if 0 - static struct pci_driver amd76xrom_driver = { -- .name = "amd76xrom", -+ .name = MOD_NAME, - .id_table = amd76xrom_pci_tbl, - .probe = amd76xrom_init_one, - .remove = amd76xrom_remove_one, -@@ -184,15 +297,14 @@ - { - struct pci_dev *pdev; - struct pci_device_id *id; -- pdev = 0; -+ pdev = NULL; - for(id = amd76xrom_pci_tbl; id->vendor; id++) { -- pdev = pci_find_device(id->vendor, id->device, 0); -+ pdev = pci_find_device(id->vendor, id->device, NULL); - if (pdev) { - break; - } - } - if (pdev) { -- amd76xrom_map.pdev = pdev; - return amd76xrom_init_one(pdev, &amd76xrom_pci_tbl[0]); - } - return -ENXIO; -@@ -203,7 +315,7 @@ - - static void __exit cleanup_amd76xrom(void) - { -- amd76xrom_remove_one(amd76xrom_map.pdev); -+ amd76xrom_remove_one(amd76xrom_window.pdev); - } - - module_init(init_amd76xrom); -Index: linux-2.6.5/drivers/mtd/maps/arctic-mtd.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/arctic-mtd.c 2004-04-03 22:36:55.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/arctic-mtd.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: arctic-mtd.c,v 1.10 2003/06/02 16:37:59 trini Exp $ -+ * $Id: arctic-mtd.c,v 1.12 2004/09/16 23:27:12 gleixner Exp $ - * - * drivers/mtd/maps/arctic-mtd.c MTD mappings and partition tables for - * IBM 405LP Arctic boards. -@@ -72,7 +72,7 @@ - static struct map_info arctic_mtd_map = { - .name = NAME, - .size = SIZE, -- .buswidth = BUSWIDTH, -+ .bankwidth = BUSWIDTH, - .phys = PADDR, - }; - -@@ -98,7 +98,7 @@ - { - printk("%s: 0x%08x at 0x%08x\n", NAME, SIZE, PADDR); - -- arctic_mtd_map.virt = (unsigned long) ioremap(PADDR, SIZE); -+ arctic_mtd_map.virt = (void __iomem *) ioremap(PADDR, SIZE); - - if (!arctic_mtd_map.virt) { - printk("%s: failed to ioremap 0x%x\n", NAME, PADDR); -Index: linux-2.6.5/drivers/mtd/maps/autcpu12-nvram.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/autcpu12-nvram.c 2004-04-03 22:38:17.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/autcpu12-nvram.c 2005-02-01 17:11:17.000000000 -0500 -@@ -2,7 +2,7 @@ - * NV-RAM memory access on autcpu12 - * (C) 2002 Thomas Gleixner (gleixner@autronix.de) - * -- * $Id: autcpu12-nvram.c,v 1.5 2003/05/21 12:45:18 dwmw2 Exp $ -+ * $Id: autcpu12-nvram.c,v 1.7 2004/09/16 23:27:12 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by -@@ -39,7 +39,7 @@ - struct map_info autcpu12_sram_map = { - .name = "SRAM", - .size = 32768, -- .buswidth = 4, -+ .bankwidth = 4, - .phys = 0x12000000, - }; - -@@ -47,7 +47,7 @@ - { - int err, save0, save1; - -- autcpu12_sram_map.virt = (unsigned long)ioremap(0x12000000, SZ_128K); -+ autcpu12_sram_map.virt = (void __iomem *)ioremap(0x12000000, SZ_128K); - if (!autcpu12_sram_map.virt) { - printk("Failed to ioremap autcpu12 NV-RAM space\n"); - err = -EIO; -@@ -76,7 +76,7 @@ - /* We have a 128K found, restore 0x10000 and set size - * to 128K - */ -- ma[_write32(&autcpu12_sram_map,save1,0x10000); -+ map_write32(&autcpu12_sram_map,save1,0x10000); - autcpu12_sram_map.size = SZ_128K; - - map: -Index: linux-2.6.5/drivers/mtd/maps/beech-mtd.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/beech-mtd.c 2004-04-03 22:37:36.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/beech-mtd.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: beech-mtd.c,v 1.7 2003/05/21 12:45:18 dwmw2 Exp $ -+ * $Id: beech-mtd.c,v 1.9 2004/09/16 23:27:12 gleixner Exp $ - * - * drivers/mtd/maps/beech-mtd.c MTD mappings and partition tables for - * IBM 405LP Beech boards. -@@ -51,7 +51,7 @@ - static struct map_info beech_mtd_map = { - .name = NAME, - .size = SIZE, -- .buswidth = BUSWIDTH, -+ .bankwidth = BUSWIDTH, - .phys = PADDR - }; - -@@ -74,7 +74,7 @@ - { - printk("%s: 0x%08x at 0x%08x\n", NAME, SIZE, PADDR); - -- beech_mtd_map.virt = (unsigned long) ioremap(PADDR, SIZE); -+ beech_mtd_map.virt = (void __iomem *) ioremap(PADDR, SIZE); - - if (!beech_mtd_map.virt) { - printk("%s: failed to ioremap 0x%x\n", NAME, PADDR); -Index: linux-2.6.5/drivers/mtd/maps/cdb89712.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/cdb89712.c 2004-04-03 22:38:26.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/cdb89712.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,7 +1,7 @@ - /* - * Flash on Cirrus CDB89712 - * -- * $Id: cdb89712.c,v 1.7 2003/05/21 12:45:18 dwmw2 Exp $ -+ * $Id: cdb89712.c,v 1.9 2004/09/16 23:27:12 gleixner Exp $ - */ - - #include <linux/module.h> -@@ -23,7 +23,7 @@ - struct map_info cdb89712_flash_map = { - .name = "flash", - .size = FLASH_SIZE, -- .buswidth = FLASH_WIDTH, -+ .bankwidth = FLASH_WIDTH, - .phys = FLASH_START, - }; - -@@ -44,7 +44,7 @@ - goto out; - } - -- cdb89712_flash_map.virt = (unsigned long)ioremap(FLASH_START, FLASH_SIZE); -+ cdb89712_flash_map.virt = (void __iomem *)ioremap(FLASH_START, FLASH_SIZE); - if (!cdb89712_flash_map.virt) { - printk(KERN_NOTICE "Failed to ioremap Cdb89712 FLASH space\n"); - err = -EIO; -@@ -93,7 +93,7 @@ - struct map_info cdb89712_sram_map = { - .name = "SRAM", - .size = SRAM_SIZE, -- .buswidth = SRAM_WIDTH, -+ .bankwidth = SRAM_WIDTH, - .phys = SRAM_START, - }; - -@@ -114,7 +114,7 @@ - goto out; - } - -- cdb89712_sram_map.virt = (unsigned long)ioremap(SRAM_START, SRAM_SIZE); -+ cdb89712_sram_map.virt = (void __iomem *)ioremap(SRAM_START, SRAM_SIZE); - if (!cdb89712_sram_map.virt) { - printk(KERN_NOTICE "Failed to ioremap Cdb89712 SRAM space\n"); - err = -EIO; -@@ -161,7 +161,7 @@ - struct map_info cdb89712_bootrom_map = { - .name = "BootROM", - .size = BOOTROM_SIZE, -- .buswidth = BOOTROM_WIDTH, -+ .bankwidth = BOOTROM_WIDTH, - .phys = BOOTROM_START, - }; - -@@ -182,7 +182,7 @@ - goto out; - } - -- cdb89712_bootrom_map.virt = (unsigned long)ioremap(BOOTROM_START, BOOTROM_SIZE); -+ cdb89712_bootrom_map.virt = (void __iomem *)ioremap(BOOTROM_START, BOOTROM_SIZE); - if (!cdb89712_bootrom_map.virt) { - printk(KERN_NOTICE "Failed to ioremap Cdb89712 BootROM space\n"); - err = -EIO; -Index: linux-2.6.5/drivers/mtd/maps/ceiva.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/ceiva.c 2004-04-03 22:36:55.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/ceiva.c 2005-02-01 17:11:17.000000000 -0500 -@@ -11,7 +11,7 @@ - * - * (C) 2000 Nicolas Pitre <nico@cam.org> - * -- * $Id: ceiva.c,v 1.8 2003/05/21 12:45:18 dwmw2 Exp $ -+ * $Id: ceiva.c,v 1.11 2004/09/16 23:27:12 gleixner Exp $ - */ - - #include <linux/config.h> -@@ -150,8 +150,8 @@ - break; - } - -- clps[i].map->virt = (unsigned long)clps[i].vbase; -- clps[i].map->buswidth = clps[i].width; -+ clps[i].map->virt = (void __iomem *)clps[i].vbase; -+ clps[i].map->bankwidth = clps[i].width; - clps[i].map->size = clps[i].size; - - simple_map_init(&clps[i].map); -Index: linux-2.6.5/drivers/mtd/maps/cfi_flagadm.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/cfi_flagadm.c 2004-04-03 22:36:13.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/cfi_flagadm.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,7 +1,7 @@ - /* - * Copyright © 2001 Flaga hf. Medical Devices, Kári DavÃðsson <kd@flaga.is> - * -- * $Id: cfi_flagadm.c,v 1.11 2003/05/21 12:45:18 dwmw2 Exp $ -+ * $Id: cfi_flagadm.c,v 1.13 2004/09/16 23:27:12 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the -@@ -60,7 +60,7 @@ - struct map_info flagadm_map = { - .name = "FlagaDM flash device", - .size = FLASH_SIZE, -- .buswidth = 2, -+ .bankwidth = 2, - }; - - struct mtd_partition flagadm_parts[] = { -@@ -96,7 +96,7 @@ - FLASH_SIZE, FLASH_PHYS_ADDR); - - flagadm_map.phys = FLASH_PHYS_ADDR; -- flagadm_map.virt = (unsigned long)ioremap(FLASH_PHYS_ADDR, -+ flagadm_map.virt = (void __iomem *s)ioremap(FLASH_PHYS_ADDR, - FLASH_SIZE); - - if (!flagadm_map.virt) { -Index: linux-2.6.5/drivers/mtd/maps/cstm_mips_ixx.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/cstm_mips_ixx.c 2004-04-03 22:36:25.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/cstm_mips_ixx.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: cstm_mips_ixx.c,v 1.9 2003/05/21 12:45:18 dwmw2 Exp $ -+ * $Id: cstm_mips_ixx.c,v 1.11 2004/09/16 23:27:12 gleixner Exp $ - * - * Mapping of a custom board with both AMD CFI and JEDEC flash in partitions. - * Config with both CFI and JEDEC device support. -@@ -104,7 +104,7 @@ - char *name; - unsigned long window_addr; - unsigned long window_size; -- int buswidth; -+ int bankwidth; - int num_partitions; - }; - -@@ -116,7 +116,7 @@ - "big flash", // name - 0x08000000, // window_addr - 0x02000000, // window_size -- 4, // buswidth -+ 4, // bankwidth - 1, // num_partitions - } - -@@ -138,7 +138,7 @@ - "MTD flash", // name - CONFIG_MTD_CSTM_MIPS_IXX_START, // window_addr - CONFIG_MTD_CSTM_MIPS_IXX_LEN, // window_size -- CONFIG_MTD_CSTM_MIPS_IXX_BUSWIDTH, // buswidth -+ CONFIG_MTD_CSTM_MIPS_IXX_BUSWIDTH, // bankwidth - 1, // num_partitions - }, - -@@ -170,14 +170,14 @@ - - - cstm_mips_ixx_map[i].phys = cstm_mips_ixx_board_desc[i].window_addr; -- cstm_mips_ixx_map[i].virt = (unsigned long)ioremap(cstm_mips_ixx_board_desc[i].window_addr, cstm_mips_ixx_board_desc[i].window_size); -+ cstm_mips_ixx_map[i].virt = (void __iomem *)ioremap(cstm_mips_ixx_board_desc[i].window_addr, cstm_mips_ixx_board_desc[i].window_size); - if (!cstm_mips_ixx_map[i].virt) { - printk(KERN_WARNING "Failed to ioremap\n"); - return -EIO; - } - cstm_mips_ixx_map[i].name = cstm_mips_ixx_board_desc[i].name; - cstm_mips_ixx_map[i].size = cstm_mips_ixx_board_desc[i].window_size; -- cstm_mips_ixx_map[i].buswidth = cstm_mips_ixx_board_desc[i].buswidth; -+ cstm_mips_ixx_map[i].bankwidth = cstm_mips_ixx_board_desc[i].bankwidth; - #if defined(CONFIG_MIPS_ITE8172) || defined(CONFIG_MIPS_IVR) - cstm_mips_ixx_map[i].set_vpp = cstm_mips_ixx_set_vpp; - #endif -Index: linux-2.6.5/drivers/mtd/maps/db1550-flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/db1550-flash.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/db1550-flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,188 @@ -+/* -+ * Flash memory access on Alchemy Db1550 board -+ * -+ * $Id: db1550-flash.c,v 1.4 2004/09/16 23:27:12 gleixner Exp $ -+ * -+ * (C) 2004 Embedded Edge, LLC, based on db1550-flash.c: -+ * (C) 2003 Pete Popov <pete_popov@yahoo.com> -+ * -+ */ -+ -+#include <linux/config.h> -+#include <linux/init.h> -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+ -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/partitions.h> -+ -+#include <asm/io.h> -+#include <asm/au1000.h> -+ -+#ifdef DEBUG_RW -+#define DBG(x...) printk(x) -+#else -+#define DBG(x...) -+#endif -+ -+static unsigned long window_addr; -+static unsigned long window_size; -+ -+ -+static struct map_info db1550_map = { -+ .name = "Db1550 flash", -+}; -+ -+static unsigned char flash_bankwidth = 4; -+ -+/* -+ * Support only 64MB NOR Flash parts -+ */ -+ -+#if defined(CONFIG_MTD_DB1550_BOOT) && defined(CONFIG_MTD_DB1550_USER) -+#define DB1550_BOTH_BANKS -+#elif defined(CONFIG_MTD_DB1550_BOOT) && !defined(CONFIG_MTD_DB1550_USER) -+#define DB1550_BOOT_ONLY -+#elif !defined(CONFIG_MTD_DB1550_BOOT) && defined(CONFIG_MTD_DB1550_USER) -+#define DB1550_USER_ONLY -+#endif -+ -+#ifdef DB1550_BOTH_BANKS -+/* both banks will be used. Combine the first bank and the first -+ * part of the second bank together into a single jffs/jffs2 -+ * partition. -+ */ -+static struct mtd_partition db1550_partitions[] = { -+ /* assume boot[2:0]:swap is '0000' or '1000', which translates to: -+ * 1C00 0000 1FFF FFFF CE0 64MB Boot NOR Flash -+ * 1800 0000 1BFF FFFF CE0 64MB Param NOR Flash -+ */ -+ { -+ .name = "User FS", -+ .size = (0x1FC00000 - 0x18000000), -+ .offset = 0x0000000 -+ },{ -+ .name = "yamon", -+ .size = 0x0100000, -+ .offset = MTDPART_OFS_APPEND, -+ .mask_flags = MTD_WRITEABLE -+ },{ -+ .name = "raw kernel", -+ .size = (0x300000 - 0x40000), /* last 256KB is yamon env */ -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+#elif defined(DB1550_BOOT_ONLY) -+static struct mtd_partition db1550_partitions[] = { -+ /* assume boot[2:0]:swap is '0000' or '1000', which translates to: -+ * 1C00 0000 1FFF FFFF CE0 64MB Boot NOR Flash -+ */ -+ { -+ .name = "User FS", -+ .size = 0x03c00000, -+ .offset = 0x0000000 -+ },{ -+ .name = "yamon", -+ .size = 0x0100000, -+ .offset = MTDPART_OFS_APPEND, -+ .mask_flags = MTD_WRITEABLE -+ },{ -+ .name = "raw kernel", -+ .size = (0x300000-0x40000), /* last 256KB is yamon env */ -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+#elif defined(DB1550_USER_ONLY) -+static struct mtd_partition db1550_partitions[] = { -+ /* assume boot[2:0]:swap is '0000' or '1000', which translates to: -+ * 1800 0000 1BFF FFFF CE0 64MB Param NOR Flash -+ */ -+ { -+ .name = "User FS", -+ .size = (0x4000000 - 0x200000), /* reserve 2MB for raw kernel */ -+ .offset = 0x0000000 -+ },{ -+ .name = "raw kernel", -+ .size = MTDPART_SIZ_FULL, -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+#else -+#error MTD_DB1550 define combo error /* should never happen */ -+#endif -+ -+#define NB_OF(x) (sizeof(x)/sizeof(x[0])) -+ -+static struct mtd_info *mymtd; -+ -+/* -+ * Probe the flash density and setup window address and size -+ * based on user CONFIG options. There are times when we don't -+ * want the MTD driver to be probing the boot or user flash, -+ * so having the option to enable only one bank is important. -+ */ -+int setup_flash_params(void) -+{ -+#if defined(DB1550_BOTH_BANKS) -+ window_addr = 0x18000000; -+ window_size = 0x8000000; -+#elif defined(DB1550_BOOT_ONLY) -+ window_addr = 0x1C000000; -+ window_size = 0x4000000; -+#else /* USER ONLY */ -+ window_addr = 0x1E000000; -+ window_size = 0x4000000; -+#endif -+ return 0; -+} -+ -+int __init db1550_mtd_init(void) -+{ -+ struct mtd_partition *parts; -+ int nb_parts = 0; -+ -+ /* Default flash bankwidth */ -+ db1550_map.bankwidth = flash_bankwidth; -+ -+ if (setup_flash_params()) -+ return -ENXIO; -+ -+ /* -+ * Static partition definition selection -+ */ -+ parts = db1550_partitions; -+ nb_parts = NB_OF(db1550_partitions); -+ db1550_map.size = window_size; -+ -+ /* -+ * Now let's probe for the actual flash. Do it here since -+ * specific machine settings might have been set above. -+ */ -+ printk(KERN_NOTICE "Pb1550 flash: probing %d-bit flash bus\n", -+ db1550_map.bankwidth*8); -+ db1550_map.virt = -+ (void __iomem *)ioremap(window_addr, window_size); -+ mymtd = do_map_probe("cfi_probe", &db1550_map); -+ if (!mymtd) return -ENXIO; -+ mymtd->owner = THIS_MODULE; -+ -+ add_mtd_partitions(mymtd, parts, nb_parts); -+ return 0; -+} -+ -+static void __exit db1550_mtd_cleanup(void) -+{ -+ if (mymtd) { -+ del_mtd_partitions(mymtd); -+ map_destroy(mymtd); -+ } -+} -+ -+module_init(db1550_mtd_init); -+module_exit(db1550_mtd_cleanup); -+ -+MODULE_AUTHOR("Embedded Edge, LLC"); -+MODULE_DESCRIPTION("Db1550 mtd map driver"); -+MODULE_LICENSE("GPL"); -Index: linux-2.6.5/drivers/mtd/maps/db1x00-flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/db1x00-flash.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/db1x00-flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,219 @@ -+/* -+ * Flash memory access on Alchemy Db1xxx boards -+ * -+ * $Id: db1x00-flash.c,v 1.4 2004/09/16 23:27:12 gleixner Exp $ -+ * -+ * (C) 2003 Pete Popov <ppopov@pacbell.net> -+ * -+ */ -+ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/init.h> -+#include <linux/kernel.h> -+ -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/partitions.h> -+ -+#include <asm/io.h> -+#include <asm/au1000.h> -+#include <asm/db1x00.h> -+ -+#ifdef DEBUG_RW -+#define DBG(x...) printk(x) -+#else -+#define DBG(x...) -+#endif -+ -+static unsigned long window_addr; -+static unsigned long window_size; -+static unsigned long flash_size; -+ -+static BCSR * const bcsr = (BCSR *)0xAE000000; -+static unsigned char flash_bankwidth = 4; -+ -+/* -+ * The Db1x boards support different flash densities. We setup -+ * the mtd_partition structures below for default of 64Mbit -+ * flash densities, and override the partitions sizes, if -+ * necessary, after we check the board status register. -+ */ -+ -+#ifdef DB1X00_BOTH_BANKS -+/* both banks will be used. Combine the first bank and the first -+ * part of the second bank together into a single jffs/jffs2 -+ * partition. -+ */ -+static struct mtd_partition db1x00_partitions[] = { -+ { -+ .name = "User FS", -+ .size = 0x1c00000, -+ .offset = 0x0000000 -+ },{ -+ .name = "yamon", -+ .size = 0x0100000, -+ .offset = MTDPART_OFS_APPEND, -+ .mask_flags = MTD_WRITEABLE -+ },{ -+ .name = "raw kernel", -+ .size = (0x300000-0x40000), /* last 256KB is env */ -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+#elif defined(DB1X00_BOOT_ONLY) -+static struct mtd_partition db1x00_partitions[] = { -+ { -+ .name = "User FS", -+ .size = 0x00c00000, -+ .offset = 0x0000000 -+ },{ -+ .name = "yamon", -+ .size = 0x0100000, -+ .offset = MTDPART_OFS_APPEND, -+ .mask_flags = MTD_WRITEABLE -+ },{ -+ .name = "raw kernel", -+ .size = (0x300000-0x40000), /* last 256KB is env */ -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+#elif defined(DB1X00_USER_ONLY) -+static struct mtd_partition db1x00_partitions[] = { -+ { -+ .name = "User FS", -+ .size = 0x0e00000, -+ .offset = 0x0000000 -+ },{ -+ .name = "raw kernel", -+ .size = MTDPART_SIZ_FULL, -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+#else -+#error MTD_DB1X00 define combo error /* should never happen */ -+#endif -+#define NB_OF(x) (sizeof(x)/sizeof(x[0])) -+ -+#define NAME "Db1x00 Linux Flash" -+ -+static struct map_info db1xxx_mtd_map = { -+ .name = NAME, -+}; -+ -+static struct mtd_partition *parsed_parts; -+static struct mtd_info *db1xxx_mtd; -+ -+/* -+ * Probe the flash density and setup window address and size -+ * based on user CONFIG options. There are times when we don't -+ * want the MTD driver to be probing the boot or user flash, -+ * so having the option to enable only one bank is important. -+ */ -+int setup_flash_params(void) -+{ -+ switch ((bcsr->status >> 14) & 0x3) { -+ case 0: /* 64Mbit devices */ -+ flash_size = 0x800000; /* 8MB per part */ -+#if defined(DB1X00_BOTH_BANKS) -+ window_addr = 0x1E000000; -+ window_size = 0x2000000; -+#elif defined(DB1X00_BOOT_ONLY) -+ window_addr = 0x1F000000; -+ window_size = 0x1000000; -+#else /* USER ONLY */ -+ window_addr = 0x1E000000; -+ window_size = 0x1000000; -+#endif -+ break; -+ case 1: -+ /* 128 Mbit devices */ -+ flash_size = 0x1000000; /* 16MB per part */ -+#if defined(DB1X00_BOTH_BANKS) -+ window_addr = 0x1C000000; -+ window_size = 0x4000000; -+ /* USERFS from 0x1C00 0000 to 0x1FC0 0000 */ -+ db1x00_partitions[0].size = 0x3C00000; -+#elif defined(DB1X00_BOOT_ONLY) -+ window_addr = 0x1E000000; -+ window_size = 0x2000000; -+ /* USERFS from 0x1E00 0000 to 0x1FC0 0000 */ -+ db1x00_partitions[0].size = 0x1C00000; -+#else /* USER ONLY */ -+ window_addr = 0x1C000000; -+ window_size = 0x2000000; -+ /* USERFS from 0x1C00 0000 to 0x1DE00000 */ -+ db1x00_partitions[0].size = 0x1DE0000; -+#endif -+ break; -+ case 2: -+ /* 256 Mbit devices */ -+ flash_size = 0x4000000; /* 64MB per part */ -+#if defined(DB1X00_BOTH_BANKS) -+ return 1; -+#elif defined(DB1X00_BOOT_ONLY) -+ /* Boot ROM flash bank only; no user bank */ -+ window_addr = 0x1C000000; -+ window_size = 0x4000000; -+ /* USERFS from 0x1C00 0000 to 0x1FC00000 */ -+ db1x00_partitions[0].size = 0x3C00000; -+#else /* USER ONLY */ -+ return 1; -+#endif -+ break; -+ default: -+ return 1; -+ } -+ db1xxx_mtd_map.size = window_size; -+ db1xxx_mtd_map.bankwidth = flash_bankwidth; -+ db1xxx_mtd_map.phys = window_addr; -+ db1xxx_mtd_map.bankwidth = flash_bankwidth; -+ return 0; -+} -+ -+int __init db1x00_mtd_init(void) -+{ -+ struct mtd_partition *parts; -+ int nb_parts = 0; -+ -+ if (setup_flash_params()) -+ return -ENXIO; -+ -+ /* -+ * Static partition definition selection -+ */ -+ parts = db1x00_partitions; -+ nb_parts = NB_OF(db1x00_partitions); -+ -+ /* -+ * Now let's probe for the actual flash. Do it here since -+ * specific machine settings might have been set above. -+ */ -+ printk(KERN_NOTICE "Db1xxx flash: probing %d-bit flash bus\n", -+ db1xxx_mtd_map.bankwidth*8); -+ db1xxx_mtd_map.virt = (void __iomem *)ioremap(window_addr, window_size); -+ db1xxx_mtd = do_map_probe("cfi_probe", &db1xxx_mtd_map); -+ if (!db1xxx_mtd) return -ENXIO; -+ db1xxx_mtd->owner = THIS_MODULE; -+ -+ add_mtd_partitions(db1xxx_mtd, parts, nb_parts); -+ return 0; -+} -+ -+static void __exit db1x00_mtd_cleanup(void) -+{ -+ if (db1xxx_mtd) { -+ del_mtd_partitions(db1xxx_mtd); -+ map_destroy(db1xxx_mtd); -+ if (parsed_parts) -+ kfree(parsed_parts); -+ } -+} -+ -+module_init(db1x00_mtd_init); -+module_exit(db1x00_mtd_cleanup); -+ -+MODULE_AUTHOR("Pete Popov"); -+MODULE_DESCRIPTION("Db1x00 mtd map driver"); -+MODULE_LICENSE("GPL"); -Index: linux-2.6.5/drivers/mtd/maps/dbox2-flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/dbox2-flash.c 2004-04-03 22:38:21.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/dbox2-flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: dbox2-flash.c,v 1.9 2003/05/21 12:45:18 dwmw2 Exp $ -+ * $Id: dbox2-flash.c,v 1.12 2004/09/16 23:27:12 gleixner Exp $ - * - * D-Box 2 flash driver - */ -@@ -13,6 +13,7 @@ - #include <linux/mtd/map.h> - #include <linux/mtd/partitions.h> - #include <linux/config.h> -+#include <linux/errno.h> - - /* partition_info gives details on the logical partitions that the split the - * single flash device into. If the size if zero we use up to the end of the -@@ -25,31 +26,31 @@ - .mask_flags = MTD_WRITEABLE - }, - { -- .name = "flfs (ppcboot)", -+ .name = "FLFS (U-Boot)", - .size = 128 * 1024, - .offset = MTDPART_OFS_APPEND, - .mask_flags = 0 - }, - { -- .name = "root (cramfs)", -+ .name = "Root (SquashFS)", - .size = 7040 * 1024, - .offset = MTDPART_OFS_APPEND, - .mask_flags = 0 - }, - { -- .name = "var (jffs2)", -+ .name = "var (JFFS2)", - .size = 896 * 1024, - .offset = MTDPART_OFS_APPEND, - .mask_flags = 0 - }, - { -- .name = "flash without bootloader", -+ .name = "Flash without bootloader", - .size = MTDPART_SIZ_FULL, - .offset = 128 * 1024, - .mask_flags = 0 - }, - { -- .name = "complete flash", -+ .name = "Complete Flash", - .size = MTDPART_SIZ_FULL, - .offset = 0, - .mask_flags = MTD_WRITEABLE -@@ -67,14 +68,14 @@ - struct map_info dbox2_flash_map = { - .name = "D-Box 2 flash memory", - .size = WINDOW_SIZE, -- .buswidth = 4, -+ .bankwidth = 4, - .phys = WINDOW_ADDR, - }; - - int __init init_dbox2_flash(void) - { - printk(KERN_NOTICE "D-Box 2 flash driver (size->0x%X mem->0x%X)\n", WINDOW_SIZE, WINDOW_ADDR); -- dbox2_flash_map.virt = (unsigned long)ioremap(WINDOW_ADDR, WINDOW_SIZE); -+ dbox2_flash_map.virt = (void __iomem *)ioremap(WINDOW_ADDR, WINDOW_SIZE); - - if (!dbox2_flash_map.virt) { - printk("Failed to ioremap\n"); -@@ -86,7 +87,7 @@ - mymtd = do_map_probe("cfi_probe", &dbox2_flash_map); - if (!mymtd) { - // Probe for single Intel 28F640 -- dbox2_flash_map.buswidth = 2; -+ dbox2_flash_map.bankwidth = 2; - - mymtd = do_map_probe("cfi_probe", &dbox2_flash_map); - } -Index: linux-2.6.5/drivers/mtd/maps/dc21285.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/dc21285.c 2004-04-03 22:36:57.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/dc21285.c 2005-02-01 17:11:17.000000000 -0500 -@@ -5,13 +5,14 @@ - * - * This code is GPL - * -- * $Id: dc21285.c,v 1.15 2003/05/21 12:45:18 dwmw2 Exp $ -+ * $Id: dc21285.c,v 1.21 2004/09/16 23:27:13 gleixner Exp $ - */ - #include <linux/config.h> - #include <linux/module.h> - #include <linux/types.h> - #include <linux/kernel.h> - #include <linux/init.h> -+#include <linux/delay.h> - - #include <linux/mtd/mtd.h> - #include <linux/mtd/map.h> -@@ -19,91 +20,117 @@ - - #include <asm/io.h> - #include <asm/hardware/dec21285.h> -+#include <asm/mach-types.h> - - --static struct mtd_info *mymtd; -+static struct mtd_info *dc21285_mtd; - --__u8 dc21285_read8(struct map_info *map, unsigned long ofs) -+#ifdef CONFIG_ARCH_NETWINDER -+/* -+ * This is really ugly, but it seams to be the only -+ * realiable way to do it, as the cpld state machine -+ * is unpredictible. So we have a 25us penalty per -+ * write access. -+ */ -+static void nw_en_write(void) { -+ extern spinlock_t gpio_lock; -+ unsigned long flags; -+ -+ /* -+ * we want to write a bit pattern XXX1 to Xilinx to enable -+ * the write gate, which will be open for about the next 2ms. -+ */ -+ spin_lock_irqsave(&gpio_lock, flags); -+ cpld_modify(1, 1); -+ spin_unlock_irqrestore(&gpio_lock, flags); -+ -+ /* -+ * let the ISA bus to catch on... -+ */ -+ udelay(25); -+} -+#else -+#define nw_en_write() do { } while (0) -+#endif -+ -+static map_word dc21285_read8(struct map_info *map, unsigned long ofs) - { -- return *(__u8*)(map->map_priv_1 + ofs); -+ return *(uint8_t*)(map->map_priv_1 + ofs); - } - --__u16 dc21285_read16(struct map_info *map, unsigned long ofs) -+static map_word dc21285_read16(struct map_info *map, unsigned long ofs) - { -- return *(__u16*)(map->map_priv_1 + ofs); -+ return *(uint16_t*)(map->map_priv_1 + ofs); - } - --__u32 dc21285_read32(struct map_info *map, unsigned long ofs) -+static map_word dc21285_read32(struct map_info *map, unsigned long ofs) - { -- return *(__u32*)(map->map_priv_1 + ofs); -+ return *(uint32_t*)(map->map_priv_1 + ofs); - } - --void dc21285_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len) -+static void dc21285_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len) - { - memcpy(to, (void*)(map->map_priv_1 + from), len); - } - --void dc21285_write8(struct map_info *map, __u8 d, unsigned long adr) -+static void dc21285_write(struct map_info *map, map_word d, unsigned long adr) - { -+ if (machine_is_netwinder()) -+ nw_en_write(); - *CSR_ROMWRITEREG = adr & 3; - adr &= ~3; -- *(__u8*)(map->map_priv_1 + adr) = d; -+ *(uint8_t*)(map->map_priv_1 + adr) = d.x[0]; - } - --void dc21285_write16(struct map_info *map, __u16 d, unsigned long adr) -+static void dc21285_write16(struct map_info *map, map_word d, unsigned long adr) - { -+ if (machine_is_netwinder()) -+ nw_en_write(); - *CSR_ROMWRITEREG = adr & 3; - adr &= ~3; -- *(__u16*)(map->map_priv_1 + adr) = d; -+ *(uint16_t*)(map->map_priv_1 + adr) = d.x[0]; - } - --void dc21285_write32(struct map_info *map, __u32 d, unsigned long adr) -+static void dc21285_write32(struct map_info *map, map_word d, unsigned long adr) - { -- *(__u32*)(map->map_priv_1 + adr) = d; -+ if (machine_is_netwinder()) -+ nw_en_write(); -+ *(uint32_t*)(map->map_priv_1 + adr) = d.x[0]; - } - --void dc21285_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len) -+static void dc21285_copy_to_32(struct map_info *map, unsigned long to, const void *from, ssize_t len) - { -- switch (map->buswidth) { -- case 4: -- while (len > 0) { -- __u32 d = *((__u32*)from)++; -- dc21285_write32(map, d, to); -- to += 4; -- len -= 4; -- } -- break; -- case 2: -- while (len > 0) { -- __u16 d = *((__u16*)from)++; -- dc21285_write16(map, d, to); -- to += 2; -- len -= 2; -- } -- break; -- case 1: -- while (len > 0) { -- __u8 d = *((__u8*)from)++; -- dc21285_write8(map, d, to); -- to++; -- len--; -- } -- break; -+ while (len > 0) { -+ uint32_t d = *((uint32_t*)from)++; -+ dc21285_write32(map, d, to); -+ to += 4; -+ len -= 4; -+ } -+} -+ -+static void dc21285_copy_to_16(struct map_info *map, unsigned long to, const void *from, ssize_t len) -+{ -+ while (len > 0) { -+ uint16_t d = *((uint16_t*)from)++; -+ dc21285_write16(map, d, to); -+ to += 2; -+ len -= 2; - } - } - --struct map_info dc21285_map = { -+static void dc21285_copy_to_8(struct map_info *map, unsigned long to, const void *from, ssize_t len) -+{ -+ uint8_t d = *((uint8_t*)from)++; -+ dc21285_write8(map, d, to); -+ to++; -+ len--; -+} -+ -+static struct map_info dc21285_map = { - .name = "DC21285 flash", - .phys = NO_XIP, - .size = 16*1024*1024, -- .read8 = dc21285_read8, -- .read16 = dc21285_read16, -- .read32 = dc21285_read32, - .copy_from = dc21285_copy_from, -- .write8 = dc21285_write8, -- .write16 = dc21285_write16, -- .write32 = dc21285_write32, -- .copy_to = dc21285_copy_to - }; - - -@@ -113,81 +140,97 @@ - static const char *probes[] = { "RedBoot", "cmdlinepart", NULL }; - #endif - --int __init init_dc21285(void) -+static int __init init_dc21285(void) - { - -- /* -- * Flash timing is determined with bits 19-16 of the -- * CSR_SA110_CNTL. The value is the number of wait cycles, or -- * 0 for 16 cycles (the default). Cycles are 20 ns. -- * Here we use 7 for 140 ns flash chips. -- */ -- /* access time */ -- *CSR_SA110_CNTL = ((*CSR_SA110_CNTL & ~0x000f0000) | (7 << 16)); -- /* burst time */ -- *CSR_SA110_CNTL = ((*CSR_SA110_CNTL & ~0x00f00000) | (7 << 20)); -- /* tristate time */ -- *CSR_SA110_CNTL = ((*CSR_SA110_CNTL & ~0x0f000000) | (7 << 24)); -+#ifdef CONFIG_MTD_PARTITIONS -+ int nrparts; -+#endif - -- /* Determine buswidth */ -+ /* Determine bankwidth */ - switch (*CSR_SA110_CNTL & (3<<14)) { - case SA110_CNTL_ROMWIDTH_8: -- dc21285_map.buswidth = 1; -+ dc21285_map.bankwidth = 1; -+ dc21285_map.read = dc21285_read8; -+ dc21285_map.write = dc21285_write8; -+ dc21285_map.copy_to = dc21285_copy_to_8; - break; - case SA110_CNTL_ROMWIDTH_16: -- dc21285_map.buswidth = 2; -+ dc21285_map.bankwidth = 2; -+ dc21285_map.read = dc21285_read16; -+ dc21285_map.write = dc21285_write16; -+ dc21285_map.copy_to = dc21285_copy_to_16; - break; - case SA110_CNTL_ROMWIDTH_32: -- dc21285_map.buswidth = 4; -+ dc21285_map.bankwidth = 4; - break; -+ dc21285_map.read = dc21285_read32; -+ dc21285_map.write = dc21285_write32; -+ dc21285_map.copy_to = dc21285_copy_to_32; - default: -- printk (KERN_ERR "DC21285 flash: undefined buswidth\n"); -+ printk (KERN_ERR "DC21285 flash: undefined bankwidth\n"); - return -ENXIO; - } -- printk (KERN_NOTICE "DC21285 flash support (%d-bit buswidth)\n", -- dc21285_map.buswidth*8); -+ printk (KERN_NOTICE "DC21285 flash support (%d-bit bankwidth)\n", -+ dc21285_map.bankwidth*8); - - /* Let's map the flash area */ -- dc21285_map.map_priv_1 = (unsigned long)ioremap(DC21285_FLASH, 16*1024*1024); -+ dc21285_map.map_priv_1 = (void __iomem *)ioremap(DC21285_FLASH, 16*1024*1024); - if (!dc21285_map.map_priv_1) { - printk("Failed to ioremap\n"); - return -EIO; - } - -- mymtd = do_map_probe("cfi_probe", &dc21285_map); -- if (mymtd) { -- int nrparts = 0; -+ if (machine_is_ebsa285()) { -+ dc21285_mtd = do_map_probe("cfi_probe", &dc21285_map); -+ } else { -+ dc21285_mtd = do_map_probe("jedec_probe", &dc21285_map); -+ } - -- mymtd->owner = THIS_MODULE; -- -- /* partition fixup */ -+ if (!dc21285_mtd) { -+ iounmap((void *)dc21285_map.map_priv_1); -+ return -ENXIO; -+ } -+ -+ dc21285_mtd->owner = THIS_MODULE; - - #ifdef CONFIG_MTD_PARTITIONS -- nrparts = parse_mtd_partitions(mymtd, probes, &dc21285_parts, (void *)0); -- if (nrparts > 0) { -- add_mtd_partitions(mymtd, dc21285_parts, nrparts); -- return 0; -- } --#endif -- add_mtd_device(mymtd); -- return 0; -+ nrparts = parse_mtd_partitions(dc21285_mtd, probes, &dc21285_parts, (void *)0); -+ if (nrparts > 0) -+ add_mtd_partitions(dc21285_mtd, dc21285_parts, nrparts); -+ else -+#endif -+ add_mtd_device(dc21285_mtd); -+ -+ if(machine_is_ebsa285()) { -+ /* -+ * Flash timing is determined with bits 19-16 of the -+ * CSR_SA110_CNTL. The value is the number of wait cycles, or -+ * 0 for 16 cycles (the default). Cycles are 20 ns. -+ * Here we use 7 for 140 ns flash chips. -+ */ -+ /* access time */ -+ *CSR_SA110_CNTL = ((*CSR_SA110_CNTL & ~0x000f0000) | (7 << 16)); -+ /* burst time */ -+ *CSR_SA110_CNTL = ((*CSR_SA110_CNTL & ~0x00f00000) | (7 << 20)); -+ /* tristate time */ -+ *CSR_SA110_CNTL = ((*CSR_SA110_CNTL & ~0x0f000000) | (7 << 24)); - } -- -- iounmap((void *)dc21285_map.map_priv_1); -- return -ENXIO; -+ -+ return 0; - } - - static void __exit cleanup_dc21285(void) - { - #ifdef CONFIG_MTD_PARTITIONS - if (dc21285_parts) { -- del_mtd_partitions(mymtd); -+ del_mtd_partitions(dc21285_mtd); - kfree(dc21285_parts); - } else - #endif -- del_mtd_device(mymtd); -+ del_mtd_device(dc21285_mtd); - -- map_destroy(mymtd); -+ map_destroy(dc21285_mtd); - iounmap((void *)dc21285_map.map_priv_1); - } - -Index: linux-2.6.5/drivers/mtd/maps/dilnetpc.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/dilnetpc.c 2004-04-03 22:36:26.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/dilnetpc.c 2005-02-01 17:11:17.000000000 -0500 -@@ -14,7 +14,7 @@ - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * -- * $Id: dilnetpc.c,v 1.12 2003/05/21 12:45:18 dwmw2 Exp $ -+ * $Id: dilnetpc.c,v 1.14 2004/09/16 23:27:13 gleixner Exp $ - * - * The DIL/Net PC is a tiny embedded PC board made by SSV Embedded Systems - * featuring the AMD Elan SC410 processor. There are two variants of this -@@ -252,7 +252,7 @@ - static struct map_info dnpc_map = { - .name = "ADNP Flash Bank", - .size = ADNP_WINDOW_SIZE, -- .buswidth = 1, -+ .bankwidth = 1, - .set_vpp = adnp_set_vpp, - .phys = WINDOW_ADDR - }; -@@ -403,7 +403,7 @@ - printk(KERN_NOTICE "DIL/Net %s flash: 0x%lx at 0x%lx\n", - is_dnp ? "DNPC" : "ADNP", dnpc_map.size, dnpc_map.phys); - -- dnpc_map.virt = (unsigned long)ioremap_nocache(dnpc_map.phys, dnpc_map.size); -+ dnpc_map.virt = (void __iomem *)ioremap_nocache(dnpc_map.phys, dnpc_map.size); - - dnpc_map_flash(dnpc_map.phys, dnpc_map.size); - -Index: linux-2.6.5/drivers/mtd/maps/dmv182.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/dmv182.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/dmv182.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,150 @@ -+ -+/* -+ * drivers/mtd/maps/svme182.c -+ * -+ * Flash map driver for the Dy4 SVME182 board -+ * -+ * $Id: dmv182.c,v 1.4 2004/09/16 23:27:13 gleixner Exp $ -+ * -+ * Copyright 2003-2004, TimeSys Corporation -+ * -+ * Based on the SVME181 flash map, by Tom Nelson, Dot4, Inc. for TimeSys Corp. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2 of the License, or (at your -+ * option) any later version. -+ */ -+ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <asm/io.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/partitions.h> -+#include <linux/errno.h> -+ -+/* -+ * This driver currently handles only the 16MiB user flash bank 1 on the -+ * board. It does not provide access to bank 0 (contains the Dy4 FFW), bank 2 -+ * (VxWorks boot), or the optional 48MiB expansion flash. -+ * -+ * scott.wood@timesys.com: On the newer boards with 128MiB flash, it -+ * now supports the first 96MiB (the boot flash bank containing FFW -+ * is excluded). The VxWorks loader is in partition 1. -+ */ -+ -+#define FLASH_BASE_ADDR 0xf0000000 -+#define FLASH_BANK_SIZE (128*1024*1024) -+ -+MODULE_AUTHOR("Scott Wood, TimeSys Corporation <scott.wood@timesys.com>"); -+MODULE_DESCRIPTION("User-programmable flash device on the Dy4 SVME182 board"); -+MODULE_LICENSE("GPL"); -+ -+static struct map_info svme182_map = { -+ .name = "Dy4 SVME182", -+ .bankwidth = 32, -+ .size = 128 * 1024 * 1024 -+}; -+ -+#define BOOTIMAGE_PART_SIZE ((6*1024*1024)-RESERVED_PART_SIZE) -+ -+// Allow 6MiB for the kernel -+#define NEW_BOOTIMAGE_PART_SIZE (6 * 1024 * 1024) -+// Allow 1MiB for the bootloader -+#define NEW_BOOTLOADER_PART_SIZE (1024 * 1024) -+// Use the remaining 9MiB at the end of flash for the RFS -+#define NEW_RFS_PART_SIZE (0x01000000 - NEW_BOOTLOADER_PART_SIZE - \ -+ NEW_BOOTIMAGE_PART_SIZE) -+ -+static struct mtd_partition svme182_partitions[] = { -+ // The Lower PABS is only 128KiB, but the partition code doesn't -+ // like partitions that don't end on the largest erase block -+ // size of the device, even if all of the erase blocks in the -+ // partition are small ones. The hardware should prevent -+ // writes to the actual PABS areas. -+ { -+ name: "Lower PABS and CPU 0 bootloader or kernel", -+ size: 6*1024*1024, -+ offset: 0, -+ }, -+ { -+ name: "Root Filesystem", -+ size: 10*1024*1024, -+ offset: MTDPART_OFS_NXTBLK -+ }, -+ { -+ name: "CPU1 Bootloader", -+ size: 1024*1024, -+ offset: MTDPART_OFS_NXTBLK, -+ }, -+ { -+ name: "Extra", -+ size: 110*1024*1024, -+ offset: MTDPART_OFS_NXTBLK -+ }, -+ { -+ name: "Foundation Firmware and Upper PABS", -+ size: 1024*1024, -+ offset: MTDPART_OFS_NXTBLK, -+ mask_flags: MTD_WRITEABLE // read-only -+ } -+}; -+ -+static struct mtd_info *this_mtd; -+ -+static int __init init_svme182(void) -+{ -+ struct mtd_partition *partitions; -+ int num_parts = sizeof(svme182_partitions) / sizeof(struct mtd_partition); -+ -+ partitions = svme182_partitions; -+ -+ svme182_map.virt = -+ (void __iomem *)ioremap(FLASH_BASE_ADDR, svme182_map.size); -+ -+ if (svme182_map.virt == 0) { -+ printk("Failed to ioremap FLASH memory area.\n"); -+ return -EIO; -+ } -+ -+ simple_map_init(&svme182_map); -+ -+ this_mtd = do_map_probe("cfi_probe", &svme182_map); -+ if (!this_mtd) -+ { -+ iounmap((void *)svme182_map.virt); -+ return -ENXIO; -+ } -+ -+ printk(KERN_NOTICE "SVME182 flash device: %dMiB at 0x%08x\n", -+ this_mtd->size >> 20, FLASH_BASE_ADDR); -+ -+ this_mtd->owner = THIS_MODULE; -+ add_mtd_partitions(this_mtd, partitions, num_parts); -+ -+ return 0; -+} -+ -+static void __exit cleanup_svme182(void) -+{ -+ if (this_mtd) -+ { -+ del_mtd_partitions(this_mtd); -+ map_destroy(this_mtd); -+ } -+ -+ if (svme182_map.virt) -+ { -+ iounmap((void *)svme182_map.virt); -+ svme182_map.virt = 0; -+ } -+ -+ return; -+} -+ -+module_init(init_svme182); -+module_exit(cleanup_svme182); -Index: linux-2.6.5/drivers/mtd/maps/ebony.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/ebony.c 2004-04-03 22:37:59.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/ebony.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,11 +1,11 @@ - /* -- * $Id: ebony.c,v 1.8 2003/06/23 11:48:18 dwmw2 Exp $ -+ * $Id: ebony.c,v 1.12 2004/09/16 23:27:13 gleixner Exp $ - * - * Mapping for Ebony user flash - * -- * Matt Porter <mporter@mvista.com> -+ * Matt Porter <mporter@kernel.crashing.org> - * -- * Copyright 2002 MontaVista Software Inc. -+ * Copyright 2002-2004 MontaVista Software Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the -@@ -21,22 +21,23 @@ - #include <linux/mtd/map.h> - #include <linux/mtd/partitions.h> - #include <linux/config.h> -+#include <linux/version.h> - #include <asm/io.h> --#include <asm/ibm440.h> --#include <platforms/ebony.h> -+#include <asm/ibm44x.h> -+#include <platforms/4xx/ebony.h> - - static struct mtd_info *flash; - - static struct map_info ebony_small_map = { - .name = "Ebony small flash", - .size = EBONY_SMALL_FLASH_SIZE, -- .buswidth = 1, -+ .bankwidth = 1, - }; - - static struct map_info ebony_large_map = { - .name = "Ebony large flash", - .size = EBONY_LARGE_FLASH_SIZE, -- .buswidth = 1, -+ .bankwidth = 1, - }; - - static struct mtd_partition ebony_small_partitions[] = { -@@ -63,7 +64,7 @@ - int __init init_ebony(void) - { - u8 fpga0_reg; -- unsigned long fpga0_adr; -+ u8 *fpga0_adr; - unsigned long long small_flash_base, large_flash_base; - - fpga0_adr = ioremap64(EBONY_FPGA_ADDR, 16); -@@ -71,7 +72,7 @@ - return -ENOMEM; - - fpga0_reg = readb(fpga0_adr); -- iounmap64(fpga0_adr); -+ iounmap(fpga0_adr); - - if (EBONY_BOOT_SMALL_FLASH(fpga0_reg) && - !EBONY_FLASH_SEL(fpga0_reg)) -@@ -93,7 +94,7 @@ - - ebony_small_map.phys = small_flash_base; - ebony_small_map.virt = -- (unsigned long)ioremap64(small_flash_base, -+ (void __iomem *)ioremap64(small_flash_base, - ebony_small_map.size); - - if (!ebony_small_map.virt) { -@@ -160,5 +161,5 @@ - module_exit(cleanup_ebony); - - MODULE_LICENSE("GPL"); --MODULE_AUTHOR("Matt Porter <mporter@mvista.com>"); -+MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>"); - MODULE_DESCRIPTION("MTD map and partitions for IBM 440GP Ebony boards"); -Index: linux-2.6.5/drivers/mtd/maps/edb7312.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/edb7312.c 2004-04-03 22:38:27.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/edb7312.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: edb7312.c,v 1.9 2003/06/23 11:48:18 dwmw2 Exp $ -+ * $Id: edb7312.c,v 1.12 2004/09/16 23:27:13 gleixner Exp $ - * - * Handle mapping of the NOR flash on Cogent EDB7312 boards - * -@@ -28,8 +28,8 @@ - #define BUSWIDTH 2 - #define FLASH_BLOCKSIZE_MAIN 0x20000 - #define FLASH_NUMBLOCKS_MAIN 128 --/* can be "cfi_probe", "jedec_probe", "map_rom", 0 }; */ --#define PROBETYPES { "cfi_probe", 0 } -+/* can be "cfi_probe", "jedec_probe", "map_rom", NULL }; */ -+#define PROBETYPES { "cfi_probe", NULL } - - #define MSG_PREFIX "EDB7312-NOR:" /* prefix for our printk()'s */ - #define MTDID "edb7312-nor" /* for mtdparts= partitioning */ -@@ -39,7 +39,7 @@ - struct map_info edb7312nor_map = { - .name = "NOR flash on EDB7312", - .size = WINDOW_SIZE, -- .buswidth = BUSWIDTH, -+ .bankwidth = BUSWIDTH, - .phys = WINDOW_ADDR, - }; - -@@ -82,8 +82,8 @@ - - printk(KERN_NOTICE MSG_PREFIX "0x%08x at 0x%08x\n", - WINDOW_SIZE, WINDOW_ADDR); -- edb7312nor_map.virt = (unsigned long) -- ioremap(WINDOW_ADDR, WINDOW_SIZE); -+ edb7312nor_map.virt = (void __iomem *) -+ ioremap(WINDOW_ADDR, WINDOW_SIZE); - - if (!edb7312nor_map.virt) { - printk(MSG_PREFIX "failed to ioremap\n"); -Index: linux-2.6.5/drivers/mtd/maps/elan-104nc.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/elan-104nc.c 2004-04-03 22:37:06.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/elan-104nc.c 2005-02-01 17:11:17.000000000 -0500 -@@ -16,7 +16,7 @@ - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - -- $Id: elan-104nc.c,v 1.18 2003/06/23 07:37:02 dwmw2 Exp $ -+ $Id: elan-104nc.c,v 1.22 2004/09/16 23:27:13 gleixner Exp $ - - The ELAN-104NC has up to 8 Mibyte of Intel StrataFlash (28F320/28F640) in x16 - mode. This drivers uses the CFI probe and Intel Extended Command Set drivers. -@@ -53,7 +53,7 @@ - #define PAGE_IO_SIZE 2 - - static volatile int page_in_window = -1; // Current page in window. --static unsigned long iomapadr; -+static void __iomem *iomapadr; - static spinlock_t elan_104nc_spin = SPIN_LOCK_UNLOCKED; - - /* partition_info gives details on the logical partitions that the split the -@@ -107,39 +107,19 @@ - } - - --static __u8 elan_104nc_read8(struct map_info *map, unsigned long ofs) -+static map_word elan_104nc_read16(struct map_info *map, unsigned long ofs) - { -- __u8 ret; -+ map_word ret; - spin_lock(&elan_104nc_spin); - elan_104nc_page(map, ofs); -- ret = readb(iomapadr + (ofs & WINDOW_MASK)); -- spin_unlock(&elan_104nc_spin); -- return ret; --} -- --static __u16 elan_104nc_read16(struct map_info *map, unsigned long ofs) --{ -- __u16 ret; -- spin_lock(&elan_104nc_spin); -- elan_104nc_page(map, ofs); -- ret = readw(iomapadr + (ofs & WINDOW_MASK)); -- spin_unlock(&elan_104nc_spin); -- return ret; --} -- --static __u32 elan_104nc_read32(struct map_info *map, unsigned long ofs) --{ -- __u32 ret; -- spin_lock(&elan_104nc_spin); -- elan_104nc_page(map, ofs); -- ret = readl(iomapadr + (ofs & WINDOW_MASK)); -+ ret.x[0] = readw(iomapadr + (ofs & WINDOW_MASK)); - spin_unlock(&elan_104nc_spin); - return ret; - } - - static void elan_104nc_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len) - { -- while(len) { -+ while (len) { - unsigned long thislen = len; - if (len > (WINDOW_LENGTH - (from & WINDOW_MASK))) - thislen = WINDOW_LENGTH-(from & WINDOW_MASK); -@@ -154,27 +134,11 @@ - } - } - --static void elan_104nc_write8(struct map_info *map, __u8 d, unsigned long adr) --{ -- spin_lock(&elan_104nc_spin); -- elan_104nc_page(map, adr); -- writeb(d, iomapadr + (adr & WINDOW_MASK)); -- spin_unlock(&elan_104nc_spin); --} -- --static void elan_104nc_write16(struct map_info *map, __u16 d, unsigned long adr) --{ -- spin_lock(&elan_104nc_spin); -- elan_104nc_page(map, adr); -- writew(d, iomapadr + (adr & WINDOW_MASK)); -- spin_unlock(&elan_104nc_spin); --} -- --static void elan_104nc_write32(struct map_info *map, __u32 d, unsigned long adr) -+static void elan_104nc_write16(struct map_info *map, map_word d, unsigned long adr) - { - spin_lock(&elan_104nc_spin); - elan_104nc_page(map, adr); -- writel(d, iomapadr + (adr & WINDOW_MASK)); -+ writew(d.x[0], iomapadr + (adr & WINDOW_MASK)); - spin_unlock(&elan_104nc_spin); - } - -@@ -201,14 +165,10 @@ - .size = 8*1024*1024, /* this must be set to a maximum possible amount - of flash so the cfi probe routines find all - the chips */ -- .buswidth = 2, -- .read8 = elan_104nc_read8, -- .read16 = elan_104nc_read16, -- .read32 = elan_104nc_read32, -+ .bankwidth = 2, -+ .read = elan_104nc_read16, - .copy_from = elan_104nc_copy_from, -- .write8 = elan_104nc_write8, -- .write16 = elan_104nc_write16, -- .write32 = elan_104nc_write32, -+ .write = elan_104nc_write16, - .copy_to = elan_104nc_copy_to - }; - -@@ -230,7 +190,7 @@ - /* Urg! We use I/O port 0x22 without request_region()ing it, - because it's already allocated to the PIC. */ - -- iomapadr = (unsigned long)ioremap(WINDOW_START, WINDOW_LENGTH); -+ iomapadr = (void __iomem *)ioremap(WINDOW_START, WINDOW_LENGTH); - if (!iomapadr) { - printk( KERN_ERR"%s: failed to ioremap memory region\n", - elan_104nc_map.name ); -Index: linux-2.6.5/drivers/mtd/maps/epxa10db-flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/epxa10db-flash.c 2004-04-03 22:37:39.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/epxa10db-flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -5,7 +5,7 @@ - * Copyright (C) 2001 Altera Corporation - * Copyright (C) 2001 Red Hat, Inc. - * -- * $Id: epxa10db-flash.c,v 1.10 2003/05/21 12:45:18 dwmw2 Exp $ -+ * $Id: epxa10db-flash.c,v 1.12 2004/09/16 23:27:13 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by -@@ -50,7 +50,7 @@ - static struct map_info epxa_map = { - .name = "EPXA flash", - .size = FLASH_SIZE, -- .buswidth = 2, -+ .bankwidth = 2, - .phys = FLASH_START, - }; - -@@ -62,7 +62,7 @@ - - printk(KERN_NOTICE "%s flash device: 0x%x at 0x%x\n", BOARD_NAME, FLASH_SIZE, FLASH_START); - -- epxa_map.virt = (unsigned long)ioremap(FLASH_START, FLASH_SIZE); -+ epxa_map.virt = (void __iomem *)ioremap(FLASH_START, FLASH_SIZE); - if (!epxa_map.virt) { - printk("Failed to ioremap %s flash\n",BOARD_NAME); - return -EIO; -Index: linux-2.6.5/drivers/mtd/maps/fortunet.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/fortunet.c 2004-04-03 22:36:27.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/fortunet.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,6 +1,6 @@ - /* fortunet.c memory map - * -- * $Id: fortunet.c,v 1.6 2003/05/21 12:45:18 dwmw2 Exp $ -+ * $Id: fortunet.c,v 1.8 2004/09/16 23:27:13 gleixner Exp $ - */ - - #include <linux/module.h> -@@ -25,7 +25,7 @@ - struct map_region - { - int window_addr_physical; -- int altbuswidth; -+ int altbankwidth; - struct map_info map_info; - struct mtd_info *mymtd; - struct mtd_partition parts[MAX_NUM_PARTITIONS]; -@@ -41,7 +41,7 @@ - - struct map_info default_map = { - .size = DEF_WINDOW_SIZE, -- .buswidth = 4, -+ .bankwidth = 4, - }; - - static char * __init get_string_option(char *dest,int dest_size,char *sor) -@@ -102,7 +102,7 @@ - if(params[0]<1) - { - printk(MTD_FORTUNET_PK "Bad parameters for MTD Region " -- " name,region-number[,base,size,buswidth,altbuswidth]\n"); -+ " name,region-number[,base,size,bankwidth,altbankwidth]\n"); - return 1; - } - if((params[1]<0)||(params[1]>=MAX_NUM_REGIONS)) -@@ -116,7 +116,7 @@ - &default_map,sizeof(map_regions[params[1]].map_info)); - map_regions_set[params[1]] = 1; - map_regions[params[1]].window_addr_physical = DEF_WINDOW_ADDR_PHY; -- map_regions[params[1]].altbuswidth = 2; -+ map_regions[params[1]].altbankwidth = 2; - map_regions[params[1]].mymtd = NULL; - map_regions[params[1]].map_info.name = map_regions[params[1]].map_name; - strcpy(map_regions[params[1]].map_info.name,string); -@@ -130,11 +130,11 @@ - } - if(params[0]>3) - { -- map_regions[params[1]].map_info.buswidth = params[4]; -+ map_regions[params[1]].map_info.bankwidth = params[4]; - } - if(params[0]>4) - { -- map_regions[params[1]].altbuswidth = params[5]; -+ map_regions[params[1]].altbankwidth = params[5]; - } - return 1; - } -@@ -193,7 +193,7 @@ - sizeof(map_regions[ix].map_info)); - map_regions_set[ix] = 1; - map_regions[ix].window_addr_physical = DEF_WINDOW_ADDR_PHY; -- map_regions[ix].altbuswidth = 2; -+ map_regions[ix].altbankwidth = 2; - map_regions[ix].mymtd = NULL; - map_regions[ix].map_info.name = map_regions[ix].map_name; - strcpy(map_regions[ix].map_info.name,"FORTUNET"); -@@ -210,7 +210,7 @@ - map_regions[ix].map_info.phys = map_regions[ix].window_addr_physical, - - map_regions[ix].map_info.virt = -- (int)ioremap_nocache( -+ (void __iomem *)ioremap_nocache( - map_regions[ix].window_addr_physical, - map_regions[ix].map_info.size); - if(!map_regions[ix].map_info.virt) -@@ -227,13 +227,13 @@ - map_regions[ix].mymtd = do_map_probe("cfi_probe", - &map_regions[ix].map_info); - if((!map_regions[ix].mymtd)&&( -- map_regions[ix].altbuswidth!=map_regions[ix].map_info.buswidth)) -+ map_regions[ix].altbankwidth!=map_regions[ix].map_info.bankwidth)) - { -- printk(KERN_NOTICE MTD_FORTUNET_PK "Trying alternate buswidth " -+ printk(KERN_NOTICE MTD_FORTUNET_PK "Trying alternate bankwidth " - "for %s flash.\n", - map_regions[ix].map_info.name); -- map_regions[ix].map_info.buswidth = -- map_regions[ix].altbuswidth; -+ map_regions[ix].map_info.bankwidth = -+ map_regions[ix].altbankwidth; - map_regions[ix].mymtd = do_map_probe("cfi_probe", - &map_regions[ix].map_info); - } -Index: linux-2.6.5/drivers/mtd/maps/h720x-flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/h720x-flash.c 2004-04-03 22:36:26.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/h720x-flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -2,9 +2,11 @@ - * Flash memory access on Hynix GMS30C7201/HMS30C7202 based - * evaluation boards - * -+ * $Id: h720x-flash.c,v 1.10 2004/09/16 23:27:13 gleixner Exp $ -+ * - * (C) 2002 Jungjun Kim <jungjun.kim@hynix.com> - * 2003 Thomas Gleixner <tglx@linutronix.de> --*/ -+ */ - - #include <linux/config.h> - #include <linux/module.h> -@@ -24,7 +26,7 @@ - - static struct map_info h720x_map = { - .name = "H720X", -- .buswidth = 4, -+ .bankwidth = 4, - .size = FLASH_SIZE, - .phys = FLASH_PHYS, - }; -@@ -71,7 +73,7 @@ - - char *part_type = NULL; - -- h720x_map.virt = (unsigned long)ioremap(FLASH_PHYS, FLASH_SIZE); -+ h720x_map.virt = (void __iomem *)ioremap(FLASH_PHYS, FLASH_SIZE); - - if (!h720x_map.virt) { - printk(KERN_ERR "H720x-MTD: ioremap failed\n"); -@@ -80,13 +82,13 @@ - - simple_map_init(&h720x_map); - -- // Probe for flash buswidth 4 -+ // Probe for flash bankwidth 4 - printk (KERN_INFO "H720x-MTD probing 32bit FLASH\n"); - mymtd = do_map_probe("cfi_probe", &h720x_map); - if (!mymtd) { - printk (KERN_INFO "H720x-MTD probing 16bit FLASH\n"); -- // Probe for buswidth 2 -- h720x_map.buswidth = 2; -+ // Probe for bankwidth 2 -+ h720x_map.bankwidth = 2; - mymtd = do_map_probe("cfi_probe", &h720x_map); - } - -Index: linux-2.6.5/drivers/mtd/maps/ichxrom.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/ichxrom.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/ichxrom.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,380 @@ -+/* -+ * ichxrom.c -+ * -+ * Normal mappings of chips in physical memory -+ * $Id: ichxrom.c,v 1.13 2004/09/17 11:45:06 eric Exp $ -+ */ -+ -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <linux/init.h> -+#include <asm/io.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/cfi.h> -+#include <linux/mtd/flashchip.h> -+#include <linux/config.h> -+#include <linux/pci.h> -+#include <linux/pci_ids.h> -+#include <linux/list.h> -+ -+#define xstr(s) str(s) -+#define str(s) #s -+#define MOD_NAME xstr(KBUILD_BASENAME) -+ -+#define ADDRESS_NAME_LEN 18 -+ -+#define ROM_PROBE_STEP_SIZE (64*1024) /* 64KiB */ -+ -+#define BIOS_CNTL 0x4e -+#define FWH_DEC_EN1 0xE3 -+#define FWH_DEC_EN2 0xF0 -+#define FWH_SEL1 0xE8 -+#define FWH_SEL2 0xEE -+ -+struct ichxrom_window { -+ void __iomem* virt; -+ unsigned long phys; -+ unsigned long size; -+ struct list_head maps; -+ struct resource rsrc; -+ struct pci_dev *pdev; -+}; -+ -+struct ichxrom_map_info { -+ struct list_head list; -+ struct map_info map; -+ struct mtd_info *mtd; -+ struct resource rsrc; -+ char map_name[sizeof(MOD_NAME) + 2 + ADDRESS_NAME_LEN]; -+}; -+ -+static struct ichxrom_window ichxrom_window = { -+ .maps = LIST_HEAD_INIT(ichxrom_window.maps), -+}; -+ -+static void ichxrom_cleanup(struct ichxrom_window *window) -+{ -+ struct ichxrom_map_info *map, *scratch; -+ u16 word; -+ -+ /* Disable writes through the rom window */ -+ pci_read_config_word(window->pdev, BIOS_CNTL, &word); -+ pci_write_config_word(window->pdev, BIOS_CNTL, word & ~1); -+ -+ /* Free all of the mtd devices */ -+ list_for_each_entry_safe(map, scratch, &window->maps, list) { -+ if (map->rsrc.parent) -+ release_resource(&map->rsrc); -+ del_mtd_device(map->mtd); -+ map_destroy(map->mtd); -+ list_del(&map->list); -+ kfree(map); -+ } -+ if (window->rsrc.parent) -+ release_resource(&window->rsrc); -+ if (window->virt) { -+ iounmap(window->virt); -+ window->virt = NULL; -+ window->phys = 0; -+ window->size = 0; -+ window->pdev = NULL; -+ } -+} -+ -+ -+static int __devinit ichxrom_init_one (struct pci_dev *pdev, -+ const struct pci_device_id *ent) -+{ -+ static char *rom_probe_types[] = { "cfi_probe", "jedec_probe", NULL }; -+ struct ichxrom_window *window = &ichxrom_window; -+ struct ichxrom_map_info *map = 0; -+ unsigned long map_top; -+ u8 byte; -+ u16 word; -+ -+ /* For now I just handle the ichx and I assume there -+ * are not a lot of resources up at the top of the address -+ * space. It is possible to handle other devices in the -+ * top 16MB but it is very painful. Also since -+ * you can only really attach a FWH to an ICHX there -+ * a number of simplifications you can make. -+ * -+ * Also you can page firmware hubs if an 8MB window isn't enough -+ * but don't currently handle that case either. -+ */ -+ window->pdev = pdev; -+ -+ /* Find a region continuous to the end of the ROM window */ -+ window->phys = 0; -+ pci_read_config_byte(pdev, FWH_DEC_EN1, &byte); -+ if (byte == 0xff) { -+ window->phys = 0xffc00000; -+ pci_read_config_byte(pdev, FWH_DEC_EN2, &byte); -+ if ((byte & 0x0f) == 0x0f) { -+ window->phys = 0xff400000; -+ } -+ else if ((byte & 0x0e) == 0x0e) { -+ window->phys = 0xff500000; -+ } -+ else if ((byte & 0x0c) == 0x0c) { -+ window->phys = 0xff600000; -+ } -+ else if ((byte & 0x08) == 0x08) { -+ window->phys = 0xff700000; -+ } -+ } -+ else if ((byte & 0xfe) == 0xfe) { -+ window->phys = 0xffc80000; -+ } -+ else if ((byte & 0xfc) == 0xfc) { -+ window->phys = 0xffd00000; -+ } -+ else if ((byte & 0xf8) == 0xf8) { -+ window->phys = 0xffd80000; -+ } -+ else if ((byte & 0xf0) == 0xf0) { -+ window->phys = 0xffe00000; -+ } -+ else if ((byte & 0xe0) == 0xe0) { -+ window->phys = 0xffe80000; -+ } -+ else if ((byte & 0xc0) == 0xc0) { -+ window->phys = 0xfff00000; -+ } -+ else if ((byte & 0x80) == 0x80) { -+ window->phys = 0xfff80000; -+ } -+ -+ if (window->phys == 0) { -+ printk(KERN_ERR MOD_NAME ": Rom window is closed\n"); -+ goto out; -+ } -+ window->phys -= 0x400000UL; -+ window->size = (0xffffffffUL - window->phys) + 1UL; -+ -+ /* Enable writes through the rom window */ -+ pci_read_config_word(pdev, BIOS_CNTL, &word); -+ if (!(word & 1) && (word & (1<<1))) { -+ /* The BIOS will generate an error if I enable -+ * this device, so don't even try. -+ */ -+ printk(KERN_ERR MOD_NAME ": firmware access control, I can't enable writes\n"); -+ goto out; -+ } -+ pci_write_config_word(pdev, BIOS_CNTL, word | 1); -+ -+ /* -+ * Try to reserve the window mem region. If this fails then -+ * it is likely due to the window being "reseved" by the BIOS. -+ */ -+ window->rsrc.name = MOD_NAME; -+ window->rsrc.start = window->phys; -+ window->rsrc.end = window->phys + window->size - 1; -+ window->rsrc.flags = IORESOURCE_MEM | IORESOURCE_BUSY; -+ if (request_resource(&iomem_resource, &window->rsrc)) { -+ window->rsrc.parent = NULL; -+ printk(KERN_DEBUG MOD_NAME -+ ": %s(): Unable to register resource" -+ " 0x%.08lx-0x%.08lx - kernel bug?\n", -+ __func__, -+ window->rsrc.start, window->rsrc.end); -+ } -+ -+ /* Map the firmware hub into my address space. */ -+ window->virt = ioremap_nocache(window->phys, window->size); -+ if (!window->virt) { -+ printk(KERN_ERR MOD_NAME ": ioremap(%08lx, %08lx) failed\n", -+ window->phys, window->size); -+ goto out; -+ } -+ -+ /* Get the first address to look for an rom chip at */ -+ map_top = window->phys; -+ if ((window->phys & 0x3fffff) != 0) { -+ map_top = window->phys + 0x400000; -+ } -+#if 1 -+ /* The probe sequence run over the firmware hub lock -+ * registers sets them to 0x7 (no access). -+ * Probe at most the last 4M of the address space. -+ */ -+ if (map_top < 0xffc00000) { -+ map_top = 0xffc00000; -+ } -+#endif -+ /* Loop through and look for rom chips */ -+ while((map_top - 1) < 0xffffffffUL) { -+ struct cfi_private *cfi; -+ unsigned long offset; -+ int i; -+ -+ if (!map) { -+ map = kmalloc(sizeof(*map), GFP_KERNEL); -+ } -+ if (!map) { -+ printk(KERN_ERR MOD_NAME ": kmalloc failed"); -+ goto out; -+ } -+ memset(map, 0, sizeof(*map)); -+ INIT_LIST_HEAD(&map->list); -+ map->map.name = map->map_name; -+ map->map.phys = map_top; -+ offset = map_top - window->phys; -+ map->map.virt = (void __iomem *) -+ (((unsigned long)(window->virt)) + offset); -+ map->map.size = 0xffffffffUL - map_top + 1UL; -+ /* Set the name of the map to the address I am trying */ -+ sprintf(map->map_name, "%s @%08lx", -+ MOD_NAME, map->map.phys); -+ -+ /* Firmware hubs only use vpp when being programmed -+ * in a factory setting. So in-place programming -+ * needs to use a different method. -+ */ -+ for(map->map.bankwidth = 32; map->map.bankwidth; -+ map->map.bankwidth >>= 1) -+ { -+ char **probe_type; -+ /* Skip bankwidths that are not supported */ -+ if (!map_bankwidth_supported(map->map.bankwidth)) -+ continue; -+ -+ /* Setup the map methods */ -+ simple_map_init(&map->map); -+ -+ /* Try all of the probe methods */ -+ probe_type = rom_probe_types; -+ for(; *probe_type; probe_type++) { -+ map->mtd = do_map_probe(*probe_type, &map->map); -+ if (map->mtd) -+ goto found; -+ } -+ } -+ map_top += ROM_PROBE_STEP_SIZE; -+ continue; -+ found: -+ /* Trim the size if we are larger than the map */ -+ if (map->mtd->size > map->map.size) { -+ printk(KERN_WARNING MOD_NAME -+ " rom(%u) larger than window(%lu). fixing...\n", -+ map->mtd->size, map->map.size); -+ map->mtd->size = map->map.size; -+ } -+ if (window->rsrc.parent) { -+ /* -+ * Registering the MTD device in iomem may not be possible -+ * if there is a BIOS "reserved" and BUSY range. If this -+ * fails then continue anyway. -+ */ -+ map->rsrc.name = map->map_name; -+ map->rsrc.start = map->map.phys; -+ map->rsrc.end = map->map.phys + map->mtd->size - 1; -+ map->rsrc.flags = IORESOURCE_MEM | IORESOURCE_BUSY; -+ if (request_resource(&window->rsrc, &map->rsrc)) { -+ printk(KERN_ERR MOD_NAME -+ ": cannot reserve MTD resource\n"); -+ map->rsrc.parent = NULL; -+ } -+ } -+ -+ /* Make the whole region visible in the map */ -+ map->map.virt = window->virt; -+ map->map.phys = window->phys; -+ cfi = map->map.fldrv_priv; -+ for(i = 0; i < cfi->numchips; i++) { -+ cfi->chips[i].start += offset; -+ } -+ -+ /* Now that the mtd devices is complete claim and export it */ -+ map->mtd->owner = THIS_MODULE; -+ add_mtd_device(map->mtd); -+ -+ -+ /* Calculate the new value of map_top */ -+ map_top += map->mtd->size; -+ -+ /* File away the map structure */ -+ list_add(&map->list, &window->maps); -+ map = 0; -+ } -+ -+ out: -+ /* Free any left over map structures */ -+ if (map) { -+ kfree(map); -+ } -+ /* See if I have any map structures */ -+ if (list_empty(&window->maps)) { -+ ichxrom_cleanup(window); -+ return -ENODEV; -+ } -+ return 0; -+ -+} -+ -+ -+static void __devexit ichxrom_remove_one (struct pci_dev *pdev) -+{ -+ struct ichxrom_window *window = &ichxrom_window; -+ ichxrom_cleanup(window); -+} -+ -+static struct pci_device_id ichxrom_pci_tbl[] __devinitdata = { -+ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0, -+ PCI_ANY_ID, PCI_ANY_ID, }, -+ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, -+ PCI_ANY_ID, PCI_ANY_ID, }, -+ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, -+ PCI_ANY_ID, PCI_ANY_ID, }, -+ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, -+ PCI_ANY_ID, PCI_ANY_ID, }, -+ { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1, -+ PCI_ANY_ID, PCI_ANY_ID, }, -+ { 0, }, -+}; -+ -+MODULE_DEVICE_TABLE(pci, ichxrom_pci_tbl); -+ -+#if 0 -+static struct pci_driver ichxrom_driver = { -+ .name = MOD_NAME, -+ .id_table = ichxrom_pci_tbl, -+ .probe = ichxrom_init_one, -+ .remove = ichxrom_remove_one, -+}; -+#endif -+ -+int __init init_ichxrom(void) -+{ -+ struct pci_dev *pdev; -+ struct pci_device_id *id; -+ -+ pdev = NULL; -+ for (id = ichxrom_pci_tbl; id->vendor; id++) { -+ pdev = pci_find_device(id->vendor, id->device, NULL); -+ if (pdev) { -+ break; -+ } -+ } -+ if (pdev) { -+ return ichxrom_init_one(pdev, &ichxrom_pci_tbl[0]); -+ } -+ return -ENXIO; -+#if 0 -+ return pci_module_init(&ichxrom_driver); -+#endif -+} -+ -+static void __exit cleanup_ichxrom(void) -+{ -+ ichxrom_remove_one(ichxrom_window.pdev); -+} -+ -+module_init(init_ichxrom); -+module_exit(cleanup_ichxrom); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Eric Biederman <ebiederman@lnxi.com>"); -+MODULE_DESCRIPTION("MTD map driver for BIOS chips on the ICHX southbridge"); -Index: linux-2.6.5/drivers/mtd/maps/impa7.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/impa7.c 2004-04-03 22:36:26.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/impa7.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: impa7.c,v 1.9 2003/06/23 11:47:43 dwmw2 Exp $ -+ * $Id: impa7.c,v 1.12 2004/09/16 23:27:13 gleixner Exp $ - * - * Handle mapping of the NOR flash on implementa A7 boards - * -@@ -30,25 +30,25 @@ - #define NUM_FLASHBANKS 2 - #define BUSWIDTH 4 - --/* can be { "cfi_probe", "jedec_probe", "map_rom", 0 }; */ --#define PROBETYPES { "jedec_probe", 0 } -+/* can be { "cfi_probe", "jedec_probe", "map_rom", NULL } */ -+#define PROBETYPES { "jedec_probe", NULL } - - #define MSG_PREFIX "impA7:" /* prefix for our printk()'s */ - #define MTDID "impa7-%d" /* for mtdparts= partitioning */ - --static struct mtd_info *impa7_mtd[NUM_FLASHBANKS] = { 0 }; -+static struct mtd_info *impa7_mtd[NUM_FLASHBANKS]; - - - static struct map_info impa7_map[NUM_FLASHBANKS] = { - { - .name = "impA7 NOR Flash Bank #0", - .size = WINDOW_SIZE0, -- .buswidth = BUSWIDTH, -+ .bankwidth = BUSWIDTH, - }, - { - .name = "impA7 NOR Flash Bank #1", - .size = WINDOW_SIZE1, -- .buswidth = BUSWIDTH, -+ .bankwidth = BUSWIDTH, - }, - }; - -@@ -91,7 +91,7 @@ - pt[i].size, pt[i].addr); - - impa7_map[i].phys = pt[i].addr; -- impa7_map[i].virt = (unsigned long) -+ impa7_map[i].virt = (void __iomem *) - ioremap(pt[i].addr, pt[i].size); - if (!impa7_map[i].virt) { - printk(MSG_PREFIX "failed to ioremap\n"); -Index: linux-2.6.5/drivers/mtd/maps/integrator-flash-v24.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/integrator-flash-v24.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/integrator-flash-v24.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,258 @@ -+/*====================================================================== -+ -+ drivers/mtd/maps/armflash.c: ARM Flash Layout/Partitioning -+ -+ Copyright (C) 2000 ARM Limited -+ -+ This program is free software; you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 2 of the License, or -+ (at your option) any later version. -+ -+ This program is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program; if not, write to the Free Software -+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+ This is access code for flashes using ARM's flash partitioning -+ standards. -+ -+ $Id: integrator-flash-v24.c,v 1.14 2004/09/16 23:27:13 gleixner Exp $ -+ -+======================================================================*/ -+ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <linux/slab.h> -+#include <linux/ioport.h> -+#include <linux/init.h> -+ -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/partitions.h> -+ -+#include <asm/hardware.h> -+#include <asm/io.h> -+#include <asm/system.h> -+ -+// board specific stuff - sorry, it should be in arch/arm/mach-*. -+#ifdef CONFIG_ARCH_INTEGRATOR -+ -+#define FLASH_BASE INTEGRATOR_FLASH_BASE -+#define FLASH_SIZE INTEGRATOR_FLASH_SIZE -+ -+#define FLASH_PART_SIZE 0x400000 -+ -+#define SC_CTRLC (IO_ADDRESS(INTEGRATOR_SC_BASE) + INTEGRATOR_SC_CTRLC_OFFSET) -+#define SC_CTRLS (IO_ADDRESS(INTEGRATOR_SC_BASE) + INTEGRATOR_SC_CTRLS_OFFSET) -+#define EBI_CSR1 (IO_ADDRESS(INTEGRATOR_EBI_BASE) + INTEGRATOR_EBI_CSR1_OFFSET) -+#define EBI_LOCK (IO_ADDRESS(INTEGRATOR_EBI_BASE) + INTEGRATOR_EBI_LOCK_OFFSET) -+ -+/* -+ * Initialise the flash access systems: -+ * - Disable VPP -+ * - Assert WP -+ * - Set write enable bit in EBI reg -+ */ -+static void armflash_flash_init(void) -+{ -+ unsigned int tmp; -+ -+ __raw_writel(INTEGRATOR_SC_CTRL_nFLVPPEN | INTEGRATOR_SC_CTRL_nFLWP, SC_CTRLC); -+ -+ tmp = __raw_readl(EBI_CSR1) | INTEGRATOR_EBI_WRITE_ENABLE; -+ __raw_writel(tmp, EBI_CSR1); -+ -+ if (!(__raw_readl(EBI_CSR1) & INTEGRATOR_EBI_WRITE_ENABLE)) { -+ __raw_writel(0xa05f, EBI_LOCK); -+ __raw_writel(tmp, EBI_CSR1); -+ __raw_writel(0, EBI_LOCK); -+ } -+} -+ -+/* -+ * Shutdown the flash access systems: -+ * - Disable VPP -+ * - Assert WP -+ * - Clear write enable bit in EBI reg -+ */ -+static void armflash_flash_exit(void) -+{ -+ unsigned int tmp; -+ -+ __raw_writel(INTEGRATOR_SC_CTRL_nFLVPPEN | INTEGRATOR_SC_CTRL_nFLWP, SC_CTRLC); -+ -+ /* -+ * Clear the write enable bit in system controller EBI register. -+ */ -+ tmp = __raw_readl(EBI_CSR1) & ~INTEGRATOR_EBI_WRITE_ENABLE; -+ __raw_writel(tmp, EBI_CSR1); -+ -+ if (__raw_readl(EBI_CSR1) & INTEGRATOR_EBI_WRITE_ENABLE) { -+ __raw_writel(0xa05f, EBI_LOCK); -+ __raw_writel(tmp, EBI_CSR1); -+ __raw_writel(0, EBI_LOCK); -+ } -+} -+ -+static void armflash_flash_wp(int on) -+{ -+ unsigned int reg; -+ -+ if (on) -+ reg = SC_CTRLC; -+ else -+ reg = SC_CTRLS; -+ -+ __raw_writel(INTEGRATOR_SC_CTRL_nFLWP, reg); -+} -+ -+static void armflash_set_vpp(struct map_info *map, int on) -+{ -+ unsigned int reg; -+ -+ if (on) -+ reg = SC_CTRLS; -+ else -+ reg = SC_CTRLC; -+ -+ __raw_writel(INTEGRATOR_SC_CTRL_nFLVPPEN, reg); -+} -+#endif -+ -+#ifdef CONFIG_ARCH_P720T -+ -+#define FLASH_BASE (0x04000000) -+#define FLASH_SIZE (64*1024*1024) -+ -+#define FLASH_PART_SIZE (4*1024*1024) -+#define FLASH_BLOCK_SIZE (128*1024) -+ -+static void armflash_flash_init(void) -+{ -+} -+ -+static void armflash_flash_exit(void) -+{ -+} -+ -+static void armflash_flash_wp(int on) -+{ -+} -+ -+static void armflash_set_vpp(struct map_info *map, int on) -+{ -+} -+#endif -+ -+ -+static struct map_info armflash_map = -+{ -+ .name = "AFS", -+ .set_vpp = armflash_set_vpp, -+ .phys = FLASH_BASE, -+}; -+ -+static struct mtd_info *mtd; -+static struct mtd_partition *parts; -+static const char *probes[] = { "RedBoot", "afs", NULL }; -+ -+static int __init armflash_cfi_init(void *base, u_int size) -+{ -+ int ret; -+ -+ armflash_flash_init(); -+ armflash_flash_wp(1); -+ -+ /* -+ * look for CFI based flash parts fitted to this board -+ */ -+ armflash_map.size = size; -+ armflash_map.bankwidth = 4; -+ armflash_map.virt = (void __iomem *) base; -+ -+ simple_map_init(&armflash_map); -+ -+ /* -+ * Also, the CFI layer automatically works out what size -+ * of chips we have, and does the necessary identification -+ * for us automatically. -+ */ -+ mtd = do_map_probe("cfi_probe", &armflash_map); -+ if (!mtd) -+ return -ENXIO; -+ -+ mtd->owner = THIS_MODULE; -+ -+ ret = parse_mtd_partitions(mtd, probes, &parts, (void *)0); -+ if (ret > 0) { -+ ret = add_mtd_partitions(mtd, parts, ret); -+ if (ret) -+ printk(KERN_ERR "mtd partition registration " -+ "failed: %d\n", ret); -+ } -+ -+ /* -+ * If we got an error, free all resources. -+ */ -+ if (ret < 0) { -+ del_mtd_partitions(mtd); -+ map_destroy(mtd); -+ } -+ -+ return ret; -+} -+ -+static void armflash_cfi_exit(void) -+{ -+ if (mtd) { -+ del_mtd_partitions(mtd); -+ map_destroy(mtd); -+ } -+ if (parts) -+ kfree(parts); -+} -+ -+static int __init armflash_init(void) -+{ -+ int err = -EBUSY; -+ void *base; -+ -+ if (request_mem_region(FLASH_BASE, FLASH_SIZE, "flash") == NULL) -+ goto out; -+ -+ base = ioremap(FLASH_BASE, FLASH_SIZE); -+ err = -ENOMEM; -+ if (base == NULL) -+ goto release; -+ -+ err = armflash_cfi_init(base, FLASH_SIZE); -+ if (err) { -+ iounmap(base); -+release: -+ release_mem_region(FLASH_BASE, FLASH_SIZE); -+ } -+out: -+ return err; -+} -+ -+static void __exit armflash_exit(void) -+{ -+ armflash_cfi_exit(); -+ iounmap((void *)armflash_map.virt); -+ release_mem_region(FLASH_BASE, FLASH_SIZE); -+ armflash_flash_exit(); -+} -+ -+module_init(armflash_init); -+module_exit(armflash_exit); -+ -+MODULE_AUTHOR("ARM Ltd"); -+MODULE_DESCRIPTION("ARM Integrator CFI map driver"); -+MODULE_LICENSE("GPL"); -Index: linux-2.6.5/drivers/mtd/maps/integrator-flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/integrator-flash.c 2004-04-03 22:37:41.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/integrator-flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -22,7 +22,7 @@ - This is access code for flashes using ARM's flash partitioning - standards. - -- $Id: integrator-flash.c,v 1.15 2004/02/27 22:37:39 rmk Exp $ -+ $Id: integrator-flash.c,v 1.17 2004/09/16 23:27:13 gleixner Exp $ - - ======================================================================*/ - -@@ -108,9 +108,9 @@ - * look for CFI based flash parts fitted to this board - */ - info->map.size = size; -- info->map.buswidth = plat->width; -+ info->map.bankwidth = plat->width; - info->map.phys = res->start; -- info->map.virt = (unsigned long) base; -+ info->map.virt = (void __iomem *) base; - info->map.name = dev->dev.bus_id; - info->map.set_vpp = armflash_set_vpp; - -Index: linux-2.6.5/drivers/mtd/maps/ipaq-flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/ipaq-flash.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/ipaq-flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,464 @@ -+/* -+ * Flash memory access on iPAQ Handhelds (either SA1100 or PXA250 based) -+ * -+ * (C) 2000 Nicolas Pitre <nico@cam.org> -+ * (C) 2002 Hewlett-Packard Company <jamey.hicks@hp.com> -+ * (C) 2003 Christian Pellegrin <chri@ascensit.com>, <chri@infis.univ.ts.it>: concatenation of multiple flashes -+ * -+ * $Id: ipaq-flash.c,v 1.2 2004/09/16 23:27:13 gleixner Exp $ -+ */ -+ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <linux/spinlock.h> -+#include <linux/init.h> -+#include <linux/slab.h> -+#include <asm/page.h> -+#include <asm/mach-types.h> -+#include <asm/system.h> -+#include <asm/errno.h> -+ -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/partitions.h> -+#ifdef CONFIG_MTD_CONCAT -+#include <linux/mtd/concat.h> -+#endif -+ -+#include <asm/hardware.h> -+#include <asm/arch-sa1100/h3600.h> -+#include <asm/io.h> -+ -+ -+#ifndef CONFIG_IPAQ_HANDHELD -+#error This is for iPAQ Handhelds only -+#endif -+#ifdef CONFIG_SA1100_JORNADA56X -+ -+static void jornada56x_set_vpp(struct map_info *map, int vpp) -+{ -+ if (vpp) -+ GPSR = GPIO_GPIO26; -+ else -+ GPCR = GPIO_GPIO26; -+ GPDR |= GPIO_GPIO26; -+} -+ -+#endif -+ -+#ifdef CONFIG_SA1100_JORNADA720 -+ -+static void jornada720_set_vpp(struct map_info *map, int vpp) -+{ -+ if (vpp) -+ PPSR |= 0x80; -+ else -+ PPSR &= ~0x80; -+ PPDR |= 0x80; -+} -+ -+#endif -+ -+#define MAX_IPAQ_CS 2 /* Number of CS we are going to test */ -+ -+#define IPAQ_MAP_INIT(X) \ -+ { \ -+ name: "IPAQ flash " X, \ -+ } -+ -+ -+static struct map_info ipaq_map[MAX_IPAQ_CS] = { -+ IPAQ_MAP_INIT("bank 1"), -+ IPAQ_MAP_INIT("bank 2") -+}; -+ -+static struct mtd_info *my_sub_mtd[MAX_IPAQ_CS] = { -+ NULL, -+ NULL -+}; -+ -+/* -+ * Here are partition information for all known IPAQ-based devices. -+ * See include/linux/mtd/partitions.h for definition of the mtd_partition -+ * structure. -+ * -+ * The *_max_flash_size is the maximum possible mapped flash size which -+ * is not necessarily the actual flash size. It must be no more than -+ * the value specified in the "struct map_desc *_io_desc" mapping -+ * definition for the corresponding machine. -+ * -+ * Please keep these in alphabetical order, and formatted as per existing -+ * entries. Thanks. -+ */ -+ -+#ifdef CONFIG_IPAQ_HANDHELD -+static unsigned long h3xxx_max_flash_size = 0x04000000; -+static struct mtd_partition h3xxx_partitions[] = { -+ { -+ name: "H3XXX boot firmware", -+#ifndef CONFIG_LAB -+ size: 0x00040000, -+#else -+ size: 0x00080000, -+#endif -+ offset: 0, -+#ifndef CONFIG_LAB -+ mask_flags: MTD_WRITEABLE, /* force read-only */ -+#endif -+ }, -+ { -+ name: "H3XXX root jffs2", -+#ifndef CONFIG_LAB -+ size: 0x2000000 - 2*0x40000, /* Warning, this is fixed later */ -+ offset: 0x00040000, -+#else -+ size: 0x2000000 - 0x40000 - 0x80000, /* Warning, this is fixed later */ -+ offset: 0x00080000, -+#endif -+ }, -+ { -+ name: "asset", -+ size: 0x40000, -+ offset: 0x2000000 - 0x40000, /* Warning, this is fixed later */ -+ mask_flags: MTD_WRITEABLE, /* force read-only */ -+ } -+}; -+ -+#ifndef CONFIG_MTD_CONCAT -+static struct mtd_partition h3xxx_partitions_bank2[] = { -+ /* this is used only on 2 CS machines when concat is not present */ -+ { -+ name: "second H3XXX root jffs2", -+ size: 0x1000000 - 0x40000, /* Warning, this is fixed later */ -+ offset: 0x00000000, -+ }, -+ { -+ name: "second asset", -+ size: 0x40000, -+ offset: 0x1000000 - 0x40000, /* Warning, this is fixed later */ -+ mask_flags: MTD_WRITEABLE, /* force read-only */ -+ } -+}; -+#endif -+ -+static spinlock_t ipaq_vpp_lock = SPIN_LOCK_UNLOCKED; -+ -+static void h3xxx_set_vpp(struct map_info *map, int vpp) -+{ -+ static int nest = 0; -+ -+ spin_lock(&ipaq_vpp_lock); -+ if (vpp) -+ nest++; -+ else -+ nest--; -+ if (nest) -+ assign_h3600_egpio(IPAQ_EGPIO_VPP_ON, 1); -+ else -+ assign_h3600_egpio(IPAQ_EGPIO_VPP_ON, 0); -+ spin_unlock(&ipaq_vpp_lock); -+} -+ -+#endif -+ -+#if defined(CONFIG_SA1100_JORNADA56X) || defined(CONFIG_SA1100_JORNADA720) -+static unsigned long jornada_max_flash_size = 0x02000000; -+static struct mtd_partition jornada_partitions[] = { -+ { -+ name: "Jornada boot firmware", -+ size: 0x00040000, -+ offset: 0, -+ mask_flags: MTD_WRITEABLE, /* force read-only */ -+ }, { -+ name: "Jornada root jffs2", -+ size: MTDPART_SIZ_FULL, -+ offset: 0x00040000, -+ } -+}; -+#endif -+ -+ -+static struct mtd_partition *parsed_parts; -+static struct mtd_info *mymtd; -+ -+static unsigned long cs_phys[] = { -+#ifdef CONFIG_ARCH_SA1100 -+ SA1100_CS0_PHYS, -+ SA1100_CS1_PHYS, -+ SA1100_CS2_PHYS, -+ SA1100_CS3_PHYS, -+ SA1100_CS4_PHYS, -+ SA1100_CS5_PHYS, -+#else -+ PXA_CS0_PHYS, -+ PXA_CS1_PHYS, -+ PXA_CS2_PHYS, -+ PXA_CS3_PHYS, -+ PXA_CS4_PHYS, -+ PXA_CS5_PHYS, -+#endif -+}; -+ -+static const char *part_probes[] = { "cmdlinepart", "RedBoot", NULL }; -+ -+static int __init h1900_special_case(void); -+ -+int __init ipaq_mtd_init(void) -+{ -+ struct mtd_partition *parts = NULL; -+ int nb_parts = 0; -+ int parsed_nr_parts = 0; -+ const char *part_type; -+ int i; /* used when we have >1 flash chips */ -+ unsigned long tot_flashsize = 0; /* used when we have >1 flash chips */ -+ -+ /* Default flash bankwidth */ -+ // ipaq_map.bankwidth = (MSC0 & MSC_RBW) ? 2 : 4; -+ -+ if (machine_is_h1900()) -+ { -+ /* For our intents, the h1900 is not a real iPAQ, so we special-case it. */ -+ return h1900_special_case(); -+ } -+ -+ if (machine_is_h3100() || machine_is_h1900()) -+ for(i=0; i<MAX_IPAQ_CS; i++) -+ ipaq_map[i].bankwidth = 2; -+ else -+ for(i=0; i<MAX_IPAQ_CS; i++) -+ ipaq_map[i].bankwidth = 4; -+ -+ /* -+ * Static partition definition selection -+ */ -+ part_type = "static"; -+ -+ simple_map_init(&ipaq_map[0]); -+ simple_map_init(&ipaq_map[1]); -+ -+#ifdef CONFIG_IPAQ_HANDHELD -+ if (machine_is_ipaq()) { -+ parts = h3xxx_partitions; -+ nb_parts = ARRAY_SIZE(h3xxx_partitions); -+ for(i=0; i<MAX_IPAQ_CS; i++) { -+ ipaq_map[i].size = h3xxx_max_flash_size; -+ ipaq_map[i].set_vpp = h3xxx_set_vpp; -+ ipaq_map[i].phys = cs_phys[i]; -+ ipaq_map[i].virt = (void __iomem *)__ioremap(cs_phys[i], 0x04000000, 0, 1); -+ if (machine_is_h3100 () || machine_is_h1900()) -+ ipaq_map[i].bankwidth = 2; -+ } -+ if (machine_is_h3600()) { -+ /* No asset partition here */ -+ h3xxx_partitions[1].size += 0x40000; -+ nb_parts--; -+ } -+ } -+#endif -+#ifdef CONFIG_ARCH_H5400 -+ if (machine_is_h5400()) { -+ ipaq_map[0].size = 0x02000000; -+ ipaq_map[1].size = 0x02000000; -+ ipaq_map[1].phys = 0x02000000; -+ ipaq_map[1].virt = ipaq_map[0].virt + 0x02000000; -+ } -+#endif -+#ifdef CONFIG_ARCH_H1900 -+ if (machine_is_h1900()) { -+ ipaq_map[0].size = 0x00400000; -+ ipaq_map[1].size = 0x02000000; -+ ipaq_map[1].phys = 0x00080000; -+ ipaq_map[1].virt = ipaq_map[0].virt + 0x00080000; -+ } -+#endif -+ -+#ifdef CONFIG_SA1100_JORNADA56X -+ if (machine_is_jornada56x()) { -+ parts = jornada_partitions; -+ nb_parts = ARRAY_SIZE(jornada_partitions); -+ ipaq_map[0].size = jornada_max_flash_size; -+ ipaq_map[0].set_vpp = jornada56x_set_vpp; -+ ipaq_map[0].virt = (__u32)__ioremap(0x0, 0x04000000, 0, 1); -+ } -+#endif -+#ifdef CONFIG_SA1100_JORNADA720 -+ if (machine_is_jornada720()) { -+ parts = jornada_partitions; -+ nb_parts = ARRAY_SIZE(jornada_partitions); -+ ipaq_map[0].size = jornada_max_flash_size; -+ ipaq_map[0].set_vpp = jornada720_set_vpp; -+ } -+#endif -+ -+ -+ if (machine_is_ipaq()) { /* for iPAQs only */ -+ for(i=0; i<MAX_IPAQ_CS; i++) { -+ printk(KERN_NOTICE "iPAQ flash: probing %d-bit flash bus, window=%lx with CFI.\n", ipaq_map[i].bankwidth*8, ipaq_map[i].virt); -+ my_sub_mtd[i] = do_map_probe("cfi_probe", &ipaq_map[i]); -+ if (!my_sub_mtd[i]) { -+ printk(KERN_NOTICE "iPAQ flash: probing %d-bit flash bus, window=%lx with JEDEC.\n", ipaq_map[i].bankwidth*8, ipaq_map[i].virt); -+ my_sub_mtd[i] = do_map_probe("jedec_probe", &ipaq_map[i]); -+ } -+ if (!my_sub_mtd[i]) { -+ printk(KERN_NOTICE "iPAQ flash: failed to find flash.\n"); -+ if (i) -+ break; -+ else -+ return -ENXIO; -+ } else -+ printk(KERN_NOTICE "iPAQ flash: found %d bytes\n", my_sub_mtd[i]->size); -+ -+ /* do we really need this debugging? --joshua 20030703 */ -+ // printk("my_sub_mtd[%d]=%p\n", i, my_sub_mtd[i]); -+ my_sub_mtd[i]->owner = THIS_MODULE; -+ tot_flashsize += my_sub_mtd[i]->size; -+ } -+#ifdef CONFIG_MTD_CONCAT -+ /* fix the asset location */ -+# ifdef CONFIG_LAB -+ h3xxx_partitions[1].size = tot_flashsize - 0x40000 - 0x80000 /* extra big boot block */; -+# else -+ h3xxx_partitions[1].size = tot_flashsize - 2 * 0x40000; -+# endif -+ h3xxx_partitions[2].offset = tot_flashsize - 0x40000; -+ /* and concat the devices */ -+ mymtd = mtd_concat_create(&my_sub_mtd[0], i, -+ "ipaq"); -+ if (!mymtd) { -+ printk("Cannot create iPAQ concat device\n"); -+ return -ENXIO; -+ } -+#else -+ mymtd = my_sub_mtd[0]; -+ -+ /* -+ *In the very near future, command line partition parsing -+ * will use the device name as 'mtd-id' instead of a value -+ * passed to the parse_cmdline_partitions() routine. Since -+ * the bootldr says 'ipaq', make sure it continues to work. -+ */ -+ mymtd->name = "ipaq"; -+ -+ if ((machine_is_h3600())) { -+# ifdef CONFIG_LAB -+ h3xxx_partitions[1].size = my_sub_mtd[0]->size - 0x80000; -+# else -+ h3xxx_partitions[1].size = my_sub_mtd[0]->size - 0x40000; -+# endif -+ nb_parts = 2; -+ } else { -+# ifdef CONFIG_LAB -+ h3xxx_partitions[1].size = my_sub_mtd[0]->size - 0x40000 - 0x80000; /* extra big boot block */ -+# else -+ h3xxx_partitions[1].size = my_sub_mtd[0]->size - 2*0x40000; -+# endif -+ h3xxx_partitions[2].offset = my_sub_mtd[0]->size - 0x40000; -+ } -+ -+ if (my_sub_mtd[1]) { -+# ifdef CONFIG_LAB -+ h3xxx_partitions_bank2[0].size = my_sub_mtd[1]->size - 0x80000; -+# else -+ h3xxx_partitions_bank2[0].size = my_sub_mtd[1]->size - 0x40000; -+# endif -+ h3xxx_partitions_bank2[1].offset = my_sub_mtd[1]->size - 0x40000; -+ } -+#endif -+ } -+ else { -+ /* -+ * Now let's probe for the actual flash. Do it here since -+ * specific machine settings might have been set above. -+ */ -+ printk(KERN_NOTICE "IPAQ flash: probing %d-bit flash bus, window=%lx\n", ipaq_map[0].bankwidth*8, ipaq_map[0].virt); -+ mymtd = do_map_probe("cfi_probe", &ipaq_map[0]); -+ if (!mymtd) -+ return -ENXIO; -+ mymtd->owner = THIS_MODULE; -+ } -+ -+ -+ /* -+ * Dynamic partition selection stuff (might override the static ones) -+ */ -+ -+ i = parse_mtd_partitions(mymtd, part_probes, &parsed_parts, 0); -+ -+ if (i > 0) { -+ nb_parts = parsed_nr_parts = i; -+ parts = parsed_parts; -+ part_type = "dynamic"; -+ } -+ -+ if (!parts) { -+ printk(KERN_NOTICE "IPAQ flash: no partition info available, registering whole flash at once\n"); -+ add_mtd_device(mymtd); -+#ifndef CONFIG_MTD_CONCAT -+ if (my_sub_mtd[1]) -+ add_mtd_device(my_sub_mtd[1]); -+#endif -+ } else { -+ printk(KERN_NOTICE "Using %s partition definition\n", part_type); -+ add_mtd_partitions(mymtd, parts, nb_parts); -+#ifndef CONFIG_MTD_CONCAT -+ if (my_sub_mtd[1]) -+ add_mtd_partitions(my_sub_mtd[1], h3xxx_partitions_bank2, ARRAY_SIZE(h3xxx_partitions_bank2)); -+#endif -+ } -+ -+ return 0; -+} -+ -+static void __exit ipaq_mtd_cleanup(void) -+{ -+ int i; -+ -+ if (mymtd) { -+ del_mtd_partitions(mymtd); -+#ifndef CONFIG_MTD_CONCAT -+ if (my_sub_mtd[1]) -+ del_mtd_partitions(my_sub_mtd[1]); -+#endif -+ map_destroy(mymtd); -+#ifdef CONFIG_MTD_CONCAT -+ for(i=0; i<MAX_IPAQ_CS; i++) -+#else -+ for(i=1; i<MAX_IPAQ_CS; i++) -+#endif -+ { -+ if (my_sub_mtd[i]) -+ map_destroy(my_sub_mtd[i]); -+ } -+ if (parsed_parts) -+ kfree(parsed_parts); -+ } -+} -+ -+static int __init h1900_special_case(void) -+{ -+ /* The iPAQ h1900 is a special case - it has weird ROM. */ -+ simple_map_init(&ipaq_map[0]); -+ ipaq_map[0].size = 0x80000; -+ ipaq_map[0].set_vpp = h3xxx_set_vpp; -+ ipaq_map[0].phys = 0x0; -+ ipaq_map[0].virt = (void __iomem *)__ioremap(0x0, 0x04000000, 0, 1); -+ ipaq_map[0].bankwidth = 2; -+ -+ printk(KERN_NOTICE "iPAQ flash: probing %d-bit flash bus, window=%lx with JEDEC.\n", ipaq_map[0].bankwidth*8, ipaq_map[0].virt); -+ mymtd = do_map_probe("jedec_probe", &ipaq_map[0]); -+ if (!mymtd) -+ return -ENODEV; -+ add_mtd_device(mymtd); -+ printk(KERN_NOTICE "iPAQ flash: registered h1910 flash\n"); -+ -+ return 0; -+} -+ -+module_init(ipaq_mtd_init); -+module_exit(ipaq_mtd_cleanup); -+ -+MODULE_AUTHOR("Jamey Hicks"); -+MODULE_DESCRIPTION("IPAQ CFI map driver"); -+MODULE_LICENSE("MIT"); -Index: linux-2.6.5/drivers/mtd/maps/iq80310.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/iq80310.c 2004-04-03 22:38:15.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/iq80310.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: iq80310.c,v 1.17 2003/06/23 11:48:18 dwmw2 Exp $ -+ * $Id: iq80310.c,v 1.19 2004/09/16 23:27:13 gleixner Exp $ - * - * Mapping for the Intel XScale IQ80310 evaluation board - * -@@ -31,7 +31,7 @@ - static struct map_info iq80310_map = { - .name = "IQ80310 flash", - .size = WINDOW_SIZE, -- .buswidth = BUSWIDTH, -+ .bankwidth = BUSWIDTH, - .phys = WINDOW_ADDR - }; - -@@ -68,7 +68,7 @@ - int parsed_nr_parts = 0; - int ret; - -- iq80310_map.virt = (unsigned long)ioremap(WINDOW_ADDR, WINDOW_SIZE); -+ iq80310_map.virt = (void __iomem *)ioremap(WINDOW_ADDR, WINDOW_SIZE); - if (!iq80310_map.virt) { - printk("Failed to ioremap\n"); - return -EIO; -Index: linux-2.6.5/drivers/mtd/maps/ixp2000.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/ixp2000.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/ixp2000.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,281 @@ -+/* -+ * $Id: ixp2000.c,v 1.3 2004/09/16 23:27:13 gleixner Exp $ -+ * -+ * drivers/mtd/maps/ixp2000.c -+ * -+ * Mapping for the Intel XScale IXP2000 based systems -+ * -+ * Copyright (C) 2002 Intel Corp. -+ * Copyright (C) 2003-2004 MontaVista Software, Inc. -+ * -+ * Original Author: Naeem M Afzal <naeem.m.afzal@intel.com> -+ * Maintainer: Deepak Saxena <dsaxena@plexity.net> -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ */ -+ -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/init.h> -+#include <linux/kernel.h> -+#include <linux/string.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/partitions.h> -+#include <linux/ioport.h> -+#include <linux/device.h> -+ -+#include <asm/io.h> -+#include <asm/hardware.h> -+#include <asm/mach-types.h> -+#include <asm/mach/flash.h> -+ -+#include <linux/reboot.h> -+ -+struct ixp2000_flash_info { -+ struct mtd_info *mtd; -+ struct map_info map; -+ struct mtd_partition *partitions; -+ struct resource *res; -+ int nr_banks; -+}; -+ -+static inline unsigned long flash_bank_setup(struct map_info *map, unsigned long ofs) -+{ -+ unsigned long (*set_bank)(unsigned long) = -+ (unsigned long(*)(unsigned long))map->map_priv_2; -+ -+ return (set_bank ? set_bank(ofs) : ofs); -+} -+ -+#ifdef __ARMEB__ -+/* -+ * Rev A0 and A1 of IXP2400 silicon have a broken addressing unit which -+ * causes the lower address bits to be XORed with 0x11 on 8 bit accesses -+ * and XORed with 0x10 on 16 bit accesses. See the spec update, erratum 44. -+ */ -+static int erratum44_workaround = 0; -+ -+static inline unsigned long address_fix8_write(unsigned long addr) -+{ -+ if (erratum44_workaround) { -+ return (addr ^ 3); -+ } -+ return addr; -+} -+#else -+ -+#define address_fix8_write(x) (x) -+#endif -+ -+static map_word ixp2000_flash_read8(struct map_info *map, unsigned long ofs) -+{ -+ map_word val; -+ -+ val.x[0] = *((u8 *)(map->map_priv_1 + flash_bank_setup(map, ofs))); -+ return val; -+} -+ -+/* -+ * We can't use the standard memcpy due to the broken SlowPort -+ * address translation on rev A0 and A1 silicon and the fact that -+ * we have banked flash. -+ */ -+static void ixp2000_flash_copy_from(struct map_info *map, void *to, -+ unsigned long from, ssize_t len) -+{ -+ from = flash_bank_setup(map, from); -+ while(len--) -+ *(__u8 *) to++ = *(__u8 *)(map->map_priv_1 + from++); -+} -+ -+static void ixp2000_flash_write8(struct map_info *map, map_word d, unsigned long ofs) -+{ -+ *(__u8 *) (address_fix8_write(map->map_priv_1 + -+ flash_bank_setup(map, ofs))) = d.x[0]; -+} -+ -+static void ixp2000_flash_copy_to(struct map_info *map, unsigned long to, -+ const void *from, ssize_t len) -+{ -+ to = flash_bank_setup(map, to); -+ while(len--) { -+ unsigned long tmp = address_fix8_write(map->map_priv_1 + to++); -+ *(__u8 *)(tmp) = *(__u8 *)(from++); -+ } -+} -+ -+ -+static int ixp2000_flash_remove(struct device *_dev) -+{ -+ struct platform_device *dev = to_platform_device(_dev); -+ struct flash_platform_data *plat = dev->dev.platform_data; -+ struct ixp2000_flash_info *info = dev_get_drvdata(&dev->dev); -+ -+ dev_set_drvdata(&dev->dev, NULL); -+ -+ if(!info) -+ return 0; -+ -+ if (info->mtd) { -+ del_mtd_partitions(info->mtd); -+ map_destroy(info->mtd); -+ } -+ if (info->map.map_priv_1) -+ iounmap((void *) info->map.map_priv_1); -+ -+ if (info->partitions) { -+ kfree(info->partitions); } -+ -+ if (info->res) { -+ release_resource(info->res); -+ kfree(info->res); -+ } -+ -+ if (plat->exit) -+ plat->exit(); -+ -+ return 0; -+} -+ -+ -+static int ixp2000_flash_probe(struct device *_dev) -+{ -+ static const char *probes[] = { "RedBoot", "cmdlinepart", NULL }; -+ struct platform_device *dev = to_platform_device(_dev); -+ struct ixp2000_flash_data *ixp_data = dev->dev.platform_data; -+ struct flash_platform_data *plat; -+ struct ixp2000_flash_info *info; -+ unsigned long window_size; -+ int err = -1; -+ -+ if (!ixp_data) -+ return -ENODEV; -+ -+ plat = ixp_data->platform_data; -+ if (!plat) -+ return -ENODEV; -+ -+ window_size = dev->resource->end - dev->resource->start + 1; -+ dev_info(_dev, "Probe of IXP2000 flash(%d banks x %dMiB)\n", -+ ixp_data->nr_banks, ((u32)window_size >> 20)); -+ -+ if (plat->width != 1) { -+ dev_err(_dev, "IXP2000 MTD map only supports 8-bit mode, asking for %d\n", -+ plat->width * 8); -+ return -EIO; -+ } -+ -+ info = kmalloc(sizeof(struct ixp2000_flash_info), GFP_KERNEL); -+ if(!info) { -+ err = -ENOMEM; -+ goto Error; -+ } -+ memzero(info, sizeof(struct ixp2000_flash_info)); -+ -+ dev_set_drvdata(&dev->dev, info); -+ -+ /* -+ * Tell the MTD layer we're not 1:1 mapped so that it does -+ * not attempt to do a direct access on us. -+ */ -+ info->map.phys = NO_XIP; -+ -+ info->nr_banks = ixp_data->nr_banks; -+ info->map.size = ixp_data->nr_banks * window_size; -+ info->map.bankwidth = 1; -+ -+ /* -+ * map_priv_2 is used to store a ptr to to the bank_setup routine -+ */ -+ info->map.map_priv_2 = (void __iomem *) ixp_data->bank_setup; -+ -+ info->map.name = dev->dev.bus_id; -+ info->map.read = ixp2000_flash_read8; -+ info->map.write = ixp2000_flash_write8; -+ info->map.copy_from = ixp2000_flash_copy_from; -+ info->map.copy_to = ixp2000_flash_copy_to; -+ -+ info->res = request_mem_region(dev->resource->start, -+ dev->resource->end - dev->resource->start + 1, -+ dev->dev.bus_id); -+ if (!info->res) { -+ dev_err(_dev, "Could not reserve memory region\n"); -+ err = -ENOMEM; -+ goto Error; -+ } -+ -+ info->map.map_priv_1 = -+ (void __iomem *) ioremap(dev->resource->start, -+ dev->resource->end - dev->resource->start + 1); -+ if (!info->map.map_priv_1) { -+ dev_err(_dev, "Failed to ioremap flash region\n"); -+ err = -EIO; -+ goto Error; -+ } -+ -+ /* -+ * Setup read mode for FLASH -+ */ -+ *IXP2000_SLOWPORT_FRM = 1; -+ -+#if defined(__ARMEB__) -+ /* -+ * Enable erratum 44 workaround for NPUs with broken slowport -+ */ -+ -+ errata44_workaround = ixp2000_has_broken_slowport(); -+ dev_info(_dev, "Erratum 44 workaround %s\n", -+ erratum44_workaround ? "enabled" : "disabled"); -+#endif -+ -+ info->mtd = do_map_probe(plat->map_name, &info->map); -+ if (!info->mtd) { -+ dev_err(_dev, "map_probe failed\n"); -+ err = -ENXIO; -+ goto Error; -+ } -+ info->mtd->owner = THIS_MODULE; -+ -+ err = parse_mtd_partitions(info->mtd, probes, &info->partitions, 0); -+ if (err > 0) { -+ err = add_mtd_partitions(info->mtd, info->partitions, err); -+ if(err) -+ dev_err(_dev, "Could not parse partitions\n"); -+ } -+ -+ if (err) -+ goto Error; -+ -+ return 0; -+ -+Error: -+ ixp2000_flash_remove(_dev); -+ return err; -+} -+ -+static struct device_driver ixp2000_flash_driver = { -+ .name = "IXP2000-Flash", -+ .bus = &platform_bus_type, -+ .probe = &ixp2000_flash_probe, -+ .remove = &ixp2000_flash_remove -+}; -+ -+static int __init ixp2000_flash_init(void) -+{ -+ return driver_register(&ixp2000_flash_driver); -+} -+ -+static void __exit ixp2000_flash_exit(void) -+{ -+ driver_unregister(&ixp2000_flash_driver); -+} -+ -+module_init(ixp2000_flash_init); -+module_exit(ixp2000_flash_exit); -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Deepak Saxena <dsaxena@plexity.net>"); -+ -Index: linux-2.6.5/drivers/mtd/maps/ixp4xx.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/ixp4xx.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/ixp4xx.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,260 @@ -+/* -+ * $Id: ixp4xx.c,v 1.6 2004/09/17 00:25:06 gleixner Exp $ -+ * -+ * drivers/mtd/maps/ixp4xx.c -+ * -+ * MTD Map file for IXP4XX based systems. Please do not make per-board -+ * changes in here. If your board needs special setup, do it in your -+ * platform level code in arch/arm/mach-ixp4xx/board-setup.c -+ * -+ * Original Author: Intel Corporation -+ * Maintainer: Deepak Saxena <dsaxena@mvista.com> -+ * -+ * Copyright (C) 2002 Intel Corporation -+ * Copyright (C) 2003-2004 MontaVista Software, Inc. -+ * -+ */ -+ -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/init.h> -+#include <linux/kernel.h> -+#include <linux/string.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/partitions.h> -+#include <linux/ioport.h> -+#include <linux/device.h> -+#include <asm/io.h> -+#include <asm/mach-types.h> -+#include <asm/mach/flash.h> -+ -+#include <linux/reboot.h> -+ -+#ifndef __ARMEB__ -+#define BYTE0(h) ((h) & 0xFF) -+#define BYTE1(h) (((h) >> 8) & 0xFF) -+#else -+#define BYTE0(h) (((h) >> 8) & 0xFF) -+#define BYTE1(h) ((h) & 0xFF) -+#endif -+ -+static map_word ixp4xx_read16(struct map_info *map, unsigned long ofs) -+{ -+ map_word val; -+ val.x[0] = *(__u16 *) (map->map_priv_1 + ofs); -+ return val; -+} -+ -+/* -+ * The IXP4xx expansion bus only allows 16-bit wide acceses -+ * when attached to a 16-bit wide device (such as the 28F128J3A), -+ * so we can't just memcpy_fromio(). -+ */ -+static void ixp4xx_copy_from(struct map_info *map, void *to, -+ unsigned long from, ssize_t len) -+{ -+ int i; -+ u8 *dest = (u8 *) to; -+ u16 *src = (u16 *) (map->map_priv_1 + from); -+ u16 data; -+ -+ for (i = 0; i < (len / 2); i++) { -+ data = src[i]; -+ dest[i * 2] = BYTE0(data); -+ dest[i * 2 + 1] = BYTE1(data); -+ } -+ -+ if (len & 1) -+ dest[len - 1] = BYTE0(src[i]); -+} -+ -+/* -+ * Unaligned writes are ignored, causing the 8-bit -+ * probe to fail and proceed to the 16-bit probe (which succeeds). -+ */ -+static void ixp4xx_probe_write16(struct map_info *map, map_word d, unsigned long adr) -+{ -+ if (!(adr & 1)) -+ *(__u16 *) (map->map_priv_1 + adr) = d.x[0]; -+} -+ -+/* -+ * Fast write16 function without the probing check above -+ */ -+static void ixp4xx_write16(struct map_info *map, map_word d, unsigned long adr) -+{ -+ *(__u16 *) (map->map_priv_1 + adr) = d.x[0]; -+} -+ -+struct ixp4xx_flash_info { -+ struct mtd_info *mtd; -+ struct map_info map; -+ struct mtd_partition *partitions; -+ struct resource *res; -+}; -+ -+static const char *probes[] = { "RedBoot", "cmdlinepart", NULL }; -+ -+static int ixp4xx_flash_remove(struct device *_dev) -+{ -+ struct platform_device *dev = to_platform_device(_dev); -+ struct flash_platform_data *plat = dev->dev.platform_data; -+ struct ixp4xx_flash_info *info = dev_get_drvdata(&dev->dev); -+ map_word d; -+ -+ dev_set_drvdata(&dev->dev, NULL); -+ -+ if(!info) -+ return 0; -+ -+ /* -+ * This is required for a soft reboot to work. -+ */ -+ d.x[0] = 0xff; -+ ixp4xx_write16(&info->map, d, 0x55 * 0x2); -+ -+ if (info->mtd) { -+ del_mtd_partitions(info->mtd); -+ map_destroy(info->mtd); -+ } -+ if (info->map.map_priv_1) -+ iounmap((void *) info->map.map_priv_1); -+ -+ if (info->partitions) -+ kfree(info->partitions); -+ -+ if (info->res) { -+ release_resource(info->res); -+ kfree(info->res); -+ } -+ -+ if (plat->exit) -+ plat->exit(); -+ -+ /* Disable flash write */ -+ *IXP4XX_EXP_CS0 &= ~IXP4XX_FLASH_WRITABLE; -+ -+ return 0; -+} -+ -+static int ixp4xx_flash_probe(struct device *_dev) -+{ -+ struct platform_device *dev = to_platform_device(_dev); -+ struct flash_platform_data *plat = dev->dev.platform_data; -+ struct ixp4xx_flash_info *info; -+ int err = -1; -+ -+ if (!plat) -+ return -ENODEV; -+ -+ if (plat->init) { -+ err = plat->init(); -+ if (err) -+ return err; -+ } -+ -+ info = kmalloc(sizeof(struct ixp4xx_flash_info), GFP_KERNEL); -+ if(!info) { -+ err = -ENOMEM; -+ goto Error; -+ } -+ memzero(info, sizeof(struct ixp4xx_flash_info)); -+ -+ dev_set_drvdata(&dev->dev, info); -+ -+ /* -+ * Enable flash write -+ * TODO: Move this out to board specific code -+ */ -+ *IXP4XX_EXP_CS0 |= IXP4XX_FLASH_WRITABLE; -+ -+ /* -+ * Tell the MTD layer we're not 1:1 mapped so that it does -+ * not attempt to do a direct access on us. -+ */ -+ info->map.phys = NO_XIP; -+ info->map.size = dev->resource->end - dev->resource->start + 1; -+ -+ /* -+ * We only support 16-bit accesses for now. If and when -+ * any board use 8-bit access, we'll fixup the driver to -+ * handle that. -+ */ -+ info->map.bankwidth = 2; -+ info->map.name = dev->dev.bus_id; -+ info->map.read = ixp4xx_read16, -+ info->map.write = ixp4xx_probe_write16, -+ info->map.copy_from = ixp4xx_copy_from, -+ -+ info->res = request_mem_region(dev->resource->start, -+ dev->resource->end - dev->resource->start + 1, -+ "IXP4XXFlash"); -+ if (!info->res) { -+ printk(KERN_ERR "IXP4XXFlash: Could not reserve memory region\n"); -+ err = -ENOMEM; -+ goto Error; -+ } -+ -+ info->map.map_priv_1 = -+ (void __iomem *) ioremap(dev->resource->start, -+ dev->resource->end - dev->resource->start + 1); -+ if (!info->map.map_priv_1) { -+ printk(KERN_ERR "IXP4XXFlash: Failed to ioremap region\n"); -+ err = -EIO; -+ goto Error; -+ } -+ -+ info->mtd = do_map_probe(plat->map_name, &info->map); -+ if (!info->mtd) { -+ printk(KERN_ERR "IXP4XXFlash: map_probe failed\n"); -+ err = -ENXIO; -+ goto Error; -+ } -+ info->mtd->owner = THIS_MODULE; -+ -+ /* Use the fast version */ -+ info->map.write = ixp4xx_write16, -+ -+ err = parse_mtd_partitions(info->mtd, probes, &info->partitions, 0); -+ if (err > 0) { -+ err = add_mtd_partitions(info->mtd, info->partitions, err); -+ if(err) -+ printk(KERN_ERR "Could not parse partitions\n"); -+ } -+ -+ if (err) -+ goto Error; -+ -+ return 0; -+ -+Error: -+ ixp4xx_flash_remove(_dev); -+ return err; -+} -+ -+static struct device_driver ixp4xx_flash_driver = { -+ .name = "IXP4XX-Flash", -+ .bus = &platform_bus_type, -+ .probe = ixp4xx_flash_probe, -+ .remove = ixp4xx_flash_remove, -+}; -+ -+static int __init ixp4xx_flash_init(void) -+{ -+ return driver_register(&ixp4xx_flash_driver); -+} -+ -+static void __exit ixp4xx_flash_exit(void) -+{ -+ driver_unregister(&ixp4xx_flash_driver); -+} -+ -+ -+module_init(ixp4xx_flash_init); -+module_exit(ixp4xx_flash_exit); -+ -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("MTD map driver for Intel IXP4xx systems") -+MODULE_AUTHOR("Deepak Saxena"); -+ -Index: linux-2.6.5/drivers/mtd/maps/l440gx.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/l440gx.c 2004-04-03 22:37:59.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/l440gx.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: l440gx.c,v 1.12 2003/05/21 12:45:19 dwmw2 Exp $ -+ * $Id: l440gx.c,v 1.14 2004/09/16 23:27:13 gleixner Exp $ - * - * BIOS Flash chip on Intel 440GX board. - * -@@ -46,7 +46,7 @@ - struct map_info l440gx_map = { - .name = "L440GX BIOS", - .size = WINDOW_SIZE, -- .buswidth = BUSWIDTH, -+ .bankwidth = BUSWIDTH, - .phys = WINDOW_ADDR, - #if 0 - /* FIXME verify that this is the -@@ -73,7 +73,7 @@ - return -ENODEV; - } - -- l440gx_map.virt = (unsigned long)ioremap_nocache(WINDOW_ADDR, WINDOW_SIZE); -+ l440gx_map.virt = (void __iomem *)ioremap_nocache(WINDOW_ADDR, WINDOW_SIZE); - - if (!l440gx_map.virt) { - printk(KERN_WARNING "Failed to ioremap L440GX flash region\n"); -Index: linux-2.6.5/drivers/mtd/maps/lasat.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/lasat.c 2004-04-03 22:36:25.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/lasat.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,14 +1,13 @@ - /* -- * Flash device on lasat 100 and 200 boards -+ * Flash device on Lasat 100 and 200 boards - * -- * Presumably (C) 2002 Brian Murphy <brian@murphy.dk> or whoever he -- * works for. -+ * (C) 2002 Brian Murphy <brian@murphy.dk> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * -- * $Id: lasat.c,v 1.5 2003/05/21 12:45:19 dwmw2 Exp $ -+ * $Id: lasat.c,v 1.8 2004/09/16 23:27:13 gleixner Exp $ - * - */ - -@@ -22,44 +21,53 @@ - #include <linux/mtd/partitions.h> - #include <linux/config.h> - #include <asm/lasat/lasat.h> --#include <asm/lasat/lasat_mtd.h> - --static struct mtd_info *mymtd; -- --static struct map_info sp_map = { -- .name = "SP flash", -- .buswidth = 4, --}; -+static struct mtd_info *lasat_mtd; - - static struct mtd_partition partition_info[LASAT_MTD_LAST]; - static char *lasat_mtd_partnames[] = {"Bootloader", "Service", "Normal", "Filesystem", "Config"}; - --static int __init init_sp(void) -+static void lasat_set_vpp(struct map_info *map, int vpp) - { -- int i; -- /* this does not play well with the old flash code which -- * protects and uprotects the flash when necessary */ -- /* FIXME: Implement set_vpp() */ -- printk(KERN_NOTICE "Unprotecting flash\n"); -- *lasat_misc->flash_wp_reg |= 1 << lasat_misc->flash_wp_bit; -- -- sp_map.virt = lasat_flash_partition_start(LASAT_MTD_BOOTLOADER); -- sp_map.phys = virt_to_phys(sp_map.virt); -- sp_map.size = lasat_board_info.li_flash_size; -+ if (vpp) -+ *lasat_misc->flash_wp_reg |= 1 << lasat_misc->flash_wp_bit; -+ else -+ *lasat_misc->flash_wp_reg &= ~(1 << lasat_misc->flash_wp_bit); -+} -+ -+static struct map_info lasat_map = { -+ .name = "LASAT flash", -+ .bankwidth = 4, -+ .set_vpp = lasat_set_vpp -+}; - -- simple_map_init(&sp_map); -+static int __init init_lasat(void) -+{ -+ int i; -+ /* since we use AMD chips and set_vpp is not implimented -+ * for these (yet) we still have to permanently enable flash write */ -+ printk(KERN_NOTICE "Unprotecting flash\n"); -+ ENABLE_VPP((&lasat_map)); -+ -+ lasat_map.phys = lasat_flash_partition_start(LASAT_MTD_BOOTLOADER); -+ lasat_map.virt = (void __iomem *)ioremap_nocache( -+ lasat_map.phys, lasat_board_info.li_flash_size); -+ lasat_map.size = lasat_board_info.li_flash_size; - -- printk(KERN_NOTICE "sp flash device: %lx at %lx\n", -- sp_map.size, sp_map.phys); -+ simple_map_init(&lasat_map); - - for (i=0; i < LASAT_MTD_LAST; i++) - partition_info[i].name = lasat_mtd_partnames[i]; - -- mymtd = do_map_probe("cfi_probe", &sp_map); -- if (mymtd) { -+ lasat_mtd = do_map_probe("cfi_probe", &lasat_map); -+ -+ if (!lasat_mtd) -+ lasat_mtd = do_map_probe("jedec_probe", &lasat_map); -+ -+ if (lasat_mtd) { - u32 size, offset = 0; - -- mymtd->owner = THIS_MODULE; -+ lasat_mtd->owner = THIS_MODULE; - - for (i=0; i < LASAT_MTD_LAST; i++) { - size = lasat_flash_partition_size(i); -@@ -68,26 +76,26 @@ - offset += size; - } - -- add_mtd_partitions( mymtd, partition_info, LASAT_MTD_LAST ); -+ add_mtd_partitions( lasat_mtd, partition_info, LASAT_MTD_LAST ); - return 0; - } - - return -ENXIO; - } - --static void __exit cleanup_sp(void) -+static void __exit cleanup_lasat(void) - { -- if (mymtd) { -- del_mtd_partitions(mymtd); -- map_destroy(mymtd); -+ if (lasat_mtd) { -+ del_mtd_partitions(lasat_mtd); -+ map_destroy(lasat_mtd); - } -- if (sp_map.virt) { -- sp_map.virt = 0; -+ if (lasat_map.virt) { -+ lasat_map.virt = 0; - } - } - --module_init(init_sp); --module_exit(cleanup_sp); -+module_init(init_lasat); -+module_exit(cleanup_lasat); - - MODULE_LICENSE("GPL"); - MODULE_AUTHOR("Brian Murphy <brian@murphy.dk>"); -Index: linux-2.6.5/drivers/mtd/maps/lubbock-flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/lubbock-flash.c 2004-04-03 22:37:37.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/lubbock-flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: lubbock-flash.c,v 1.9 2003/06/23 11:48:18 dwmw2 Exp $ -+ * $Id: lubbock-flash.c,v 1.17 2004/09/16 23:27:13 gleixner Exp $ - * - * Map driver for the Lubbock developer platform. - * -@@ -15,11 +15,12 @@ - #include <linux/types.h> - #include <linux/kernel.h> - #include <linux/init.h> --#include <asm/io.h> - #include <linux/mtd/mtd.h> - #include <linux/mtd/map.h> - #include <linux/mtd/partitions.h> -+#include <asm/io.h> - #include <asm/hardware.h> -+#include <asm/arch/lubbock.h> - - - #define ROM_ADDR 0x00000000 -@@ -27,12 +28,19 @@ - - #define WINDOW_SIZE 64*1024*1024 - -+static void lubbock_map_inval_cache(struct map_info *map, unsigned long from, ssize_t len) -+{ -+ consistent_sync((char *)map->cached + from, len, DMA_FROM_DEVICE); -+} -+ - static struct map_info lubbock_maps[2] = { { - .size = WINDOW_SIZE, - .phys = 0x00000000, -+ .inval_cache = lubbock_map_inval_cache, - }, { - .size = WINDOW_SIZE, - .phys = 0x04000000, -+ .inval_cache = lubbock_map_inval_cache, - } }; - - static struct mtd_partition lubbock_partitions[] = { -@@ -60,10 +68,10 @@ - - static int __init init_lubbock(void) - { -- int flashboot = (CONF_SWITCHES & 1); -+ int flashboot = (LUB_CONF_SWITCHES & 1); - int ret = 0, i; - -- lubbock_maps[0].buswidth = lubbock_maps[1].buswidth = -+ lubbock_maps[0].bankwidth = lubbock_maps[1].bankwidth = - (BOOT_DEF & 1) ? 2 : 4; - - /* Compensate for the nROMBT switch which swaps the flash banks */ -@@ -74,23 +82,28 @@ - lubbock_maps[flashboot].name = "Lubbock Boot ROM"; - - for (i = 0; i < 2; i++) { -- lubbock_maps[i].virt = (unsigned long)ioremap(lubbock_maps[i].phys, WINDOW_SIZE); -+ lubbock_maps[i].virt = (void __iomem *)ioremap(lubbock_maps[i].phys, WINDOW_SIZE); - if (!lubbock_maps[i].virt) { - printk(KERN_WARNING "Failed to ioremap %s\n", lubbock_maps[i].name); - if (!ret) - ret = -ENOMEM; - continue; - } -+ lubbock_maps[i].cached = ioremap_cached(lubbock_maps[i].phys, WINDOW_SIZE); -+ if (!lubbock_maps[i].cached) -+ printk(KERN_WARNING "Failed to ioremap cached %s\n", lubbock_maps[i].name); - simple_map_init(&lubbock_maps[i]); - -- printk(KERN_NOTICE "Probing %s at physical address 0x%08lx (%d-bit buswidth)\n", -+ printk(KERN_NOTICE "Probing %s at physical address 0x%08lx (%d-bit bankwidth)\n", - lubbock_maps[i].name, lubbock_maps[i].phys, -- lubbock_maps[i].buswidth * 8); -+ lubbock_maps[i].bankwidth * 8); - - mymtds[i] = do_map_probe("cfi_probe", &lubbock_maps[i]); - - if (!mymtds[i]) { - iounmap((void *)lubbock_maps[i].virt); -+ if (lubbock_maps[i].cached) -+ iounmap(lubbock_maps[i].cached); - if (!ret) - ret = -EIO; - continue; -@@ -137,6 +150,8 @@ - - map_destroy(mymtds[i]); - iounmap((void *)lubbock_maps[i].virt); -+ if (lubbock_maps[i].cached) -+ iounmap(lubbock_maps[i].cached); - - if (parsed_parts[i]) - kfree(parsed_parts[i]); -Index: linux-2.6.5/drivers/mtd/maps/map_funcs.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/map_funcs.c 2005-02-01 16:55:39.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/map_funcs.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: map_funcs.c,v 1.2 2003/05/21 15:15:07 dwmw2 Exp $ -+ * $Id: map_funcs.c,v 1.9 2004/07/13 22:33:15 dwmw2 Exp $ - * - * Out-of-line map I/O functions for simple maps when CONFIG_COMPLEX_MAPPINGS - * is enabled. -@@ -7,87 +7,35 @@ - - #include <linux/kernel.h> - #include <linux/module.h> --#include <linux/config.h> --#include <linux/types.h> --#include <linux/string.h> --#include <asm/io.h> - - #include <linux/mtd/map.h> --#include <linux/mtd/cfi.h> - --static u8 simple_map_read8(struct map_info *map, unsigned long ofs) -+static map_word simple_map_read(struct map_info *map, unsigned long ofs) - { -- return __raw_readb(map->virt + ofs); -+ return inline_map_read(map, ofs); - } - --static u16 simple_map_read16(struct map_info *map, unsigned long ofs) -+static void simple_map_write(struct map_info *map, const map_word datum, unsigned long ofs) - { -- return __raw_readw(map->virt + ofs); --} -- --static u32 simple_map_read32(struct map_info *map, unsigned long ofs) --{ -- return __raw_readl(map->virt + ofs); --} -- --static u64 simple_map_read64(struct map_info *map, unsigned long ofs) --{ --#ifndef CONFIG_MTD_CFI_B8 /* 64-bit mappings */ -- BUG(); -- return 0; --#else -- return __raw_readll(map->virt + ofs); --#endif --} -- --static void simple_map_write8(struct map_info *map, u8 datum, unsigned long ofs) --{ -- __raw_writeb(datum, map->virt + ofs); -- mb(); --} -- --static void simple_map_write16(struct map_info *map, u16 datum, unsigned long ofs) --{ -- __raw_writew(datum, map->virt + ofs); -- mb(); --} -- --static void simple_map_write32(struct map_info *map, u32 datum, unsigned long ofs) --{ -- __raw_writel(datum, map->virt + ofs); -- mb(); --} -- --static void simple_map_write64(struct map_info *map, u64 datum, unsigned long ofs) --{ --#ifndef CONFIG_MTD_CFI_B8 /* 64-bit mappings */ -- BUG(); --#else -- __raw_writell(datum, map->virt + ofs); -- mb(); --#endif /* CFI_B8 */ -+ inline_map_write(map, datum, ofs); - } - - static void simple_map_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len) - { -- memcpy_fromio(to, (char *)map->virt + from, len); -+ inline_map_copy_from(map, to, from, len); - } - - static void simple_map_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len) - { -- memcpy_toio((char *)map->virt + to, from, len); -+ inline_map_copy_to(map, to, from, len); - } - - void simple_map_init(struct map_info *map) - { -- map->read8 = simple_map_read8; -- map->read16 = simple_map_read16; -- map->read32 = simple_map_read32; -- map->read64 = simple_map_read64; -- map->write8 = simple_map_write8; -- map->write16 = simple_map_write16; -- map->write32 = simple_map_write32; -- map->write64 = simple_map_write64; -+ BUG_ON(!map_bankwidth_supported(map->bankwidth)); -+ -+ map->read = simple_map_read; -+ map->write = simple_map_write; - map->copy_from = simple_map_copy_from; - map->copy_to = simple_map_copy_to; - } -Index: linux-2.6.5/drivers/mtd/maps/mbx860.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/mbx860.c 2004-04-03 22:36:54.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/mbx860.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: mbx860.c,v 1.5 2003/05/21 12:45:19 dwmw2 Exp $ -+ * $Id: mbx860.c,v 1.7 2004/09/16 23:27:13 gleixner Exp $ - * - * Handle mapping of the flash on MBX860 boards - * -@@ -54,13 +54,13 @@ - .name = "MBX flash", - .size = WINDOW_SIZE, - .phys = WINDOW_ADDR, -- .buswidth = 4, -+ .bankwidth = 4, - }; - - int __init init_mbx(void) - { - printk(KERN_NOTICE "Motorola MBX flash device: 0x%x at 0x%x\n", WINDOW_SIZE*4, WINDOW_ADDR); -- mbx_map.virt = (unsigned long)ioremap(WINDOW_ADDR, WINDOW_SIZE * 4); -+ mbx_map.virt = (void __iomem *)ioremap(WINDOW_ADDR, WINDOW_SIZE * 4); - - if (!mbx_map.virt) { - printk("Failed to ioremap\n"); -Index: linux-2.6.5/drivers/mtd/maps/mpc1211.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/mpc1211.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/mpc1211.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,81 @@ -+/* -+ * Flash on MPC-1211 -+ * -+ * $Id: mpc1211.c,v 1.4 2004/09/16 23:27:13 gleixner Exp $ -+ * -+ * (C) 2002 Interface, Saito.K & Jeanne -+ * -+ * GPL'd -+ */ -+ -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <asm/io.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/partitions.h> -+#include <linux/config.h> -+ -+static struct mtd_info *flash_mtd; -+static struct mtd_partition *parsed_parts; -+ -+struct map_info mpc1211_flash_map = { -+ .name = "MPC-1211 FLASH", -+ .size = 0x80000, -+ .bankwidth = 1, -+}; -+ -+static struct mtd_partition mpc1211_partitions[] = { -+ { -+ .name = "IPL & ETH-BOOT", -+ .offset = 0x00000000, -+ .size = 0x10000, -+ }, -+ { -+ .name = "Flash FS", -+ .offset = 0x00010000, -+ .size = MTDPART_SIZ_FULL, -+ } -+}; -+ -+static int __init init_mpc1211_maps(void) -+{ -+ int nr_parts; -+ -+ mpc1211_flash_map.phys = 0; -+ mpc1211_flash_map.virt = (void __iomem *)P2SEGADDR(0); -+ -+ simple_map_init(&mpc1211_flash_map); -+ -+ printk(KERN_NOTICE "Probing for flash chips at 0x00000000:\n"); -+ flash_mtd = do_map_probe("jedec_probe", &mpc1211_flash_map); -+ if (!flash_mtd) { -+ printk(KERN_NOTICE "Flash chips not detected at either possible location.\n"); -+ return -ENXIO; -+ } -+ printk(KERN_NOTICE "MPC-1211: Flash at 0x%08lx\n", mpc1211_flash_map.virt & 0x1fffffff); -+ flash_mtd->module = THIS_MODULE; -+ -+ parsed_parts = mpc1211_partitions; -+ nr_parts = ARRAY_SIZE(mpc1211_partitions); -+ -+ add_mtd_partitions(flash_mtd, parsed_parts, nr_parts); -+ return 0; -+} -+ -+static void __exit cleanup_mpc1211_maps(void) -+{ -+ if (parsed_parts) -+ del_mtd_partitions(flash_mtd); -+ else -+ del_mtd_device(flash_mtd); -+ map_destroy(flash_mtd); -+} -+ -+module_init(init_mpc1211_maps); -+module_exit(cleanup_mpc1211_maps); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Saito.K & Jeanne <ksaito@interface.co.jp>"); -+MODULE_DESCRIPTION("MTD map driver for MPC-1211 boards. Interface"); -Index: linux-2.6.5/drivers/mtd/maps/netsc520.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/netsc520.c 2004-04-03 22:37:41.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/netsc520.c 2005-02-01 17:11:17.000000000 -0500 -@@ -3,7 +3,7 @@ - * Copyright (C) 2001 Mark Langsdorf (mark.langsdorf@amd.com) - * based on sc520cdp.c by Sysgo Real-Time Solutions GmbH - * -- * $Id: netsc520.c,v 1.9 2003/05/21 12:45:19 dwmw2 Exp $ -+ * $Id: netsc520.c,v 1.11 2004/09/16 23:27:13 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by -@@ -84,7 +84,7 @@ - static struct map_info netsc520_map = { - .name = "netsc520 Flash Bank", - .size = WINDOW_SIZE, -- .buswidth = 4, -+ .bankwidth = 4, - .phys = WINDOW_ADDR, - }; - -@@ -95,7 +95,7 @@ - static int __init init_netsc520(void) - { - printk(KERN_NOTICE "NetSc520 flash device: 0x%lx at 0x%lx\n", netsc520_map.size, netsc520_map.phys); -- netsc520_map.virt = (unsigned long)ioremap_nocache(netsc520_map.phys, netsc520_map.size); -+ netsc520_map.virt = (void __iomem *)ioremap_nocache(netsc520_map.phys, netsc520_map.size); - - if (!netsc520_map.virt) { - printk("Failed to ioremap_nocache\n"); -Index: linux-2.6.5/drivers/mtd/maps/nettel.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/nettel.c 2004-04-03 22:36:52.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/nettel.c 2005-02-01 17:11:17.000000000 -0500 -@@ -6,7 +6,7 @@ - * (C) Copyright 2000-2001, Greg Ungerer (gerg@snapgear.com) - * (C) Copyright 2001-2002, SnapGear (www.snapgear.com) - * -- * $Id: nettel.c,v 1.4 2003/05/20 20:59:30 dwmw2 Exp $ -+ * $Id: nettel.c,v 1.6 2004/09/16 23:27:13 gleixner Exp $ - */ - - /****************************************************************************/ -@@ -65,7 +65,7 @@ - static struct map_info nettel_intel_map = { - .name = "SnapGear Intel", - .size = 0, -- .buswidth = INTEL_BUSWIDTH, -+ .bankwidth = INTEL_BUSWIDTH, - }; - - static struct mtd_partition nettel_intel_partitions[] = { -@@ -103,7 +103,7 @@ - static struct map_info nettel_amd_map = { - .name = "SnapGear AMD", - .size = AMD_WINDOW_MAXSIZE, -- .buswidth = AMD_BUSWIDTH, -+ .bankwidth = AMD_BUSWIDTH, - }; - - static struct mtd_partition nettel_amd_partitions[] = { -@@ -273,8 +273,7 @@ - __asm__ ("wbinvd"); - - nettel_amd_map.phys = amdaddr; -- nettel_amd_map.virt = (unsigned long) -- ioremap_nocache(amdaddr, maxsize); -+ nettel_amd_map.virt = (void __iomem *) ioremap_nocache(amdaddr, maxsize); - if (!nettel_amd_map.virt) { - printk("SNAPGEAR: failed to ioremap() BOOTCS\n"); - return(-EIO); -Index: linux-2.6.5/drivers/mtd/maps/ocelot.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/ocelot.c 2004-04-03 22:38:22.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/ocelot.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: ocelot.c,v 1.12 2003/05/21 12:45:19 dwmw2 Exp $ -+ * $Id: ocelot.c,v 1.14 2004/09/16 23:27:13 gleixner Exp $ - * - * Flash on Momenco Ocelot - */ -@@ -49,14 +49,14 @@ - struct map_info ocelot_flash_map = { - .name = "Ocelot boot flash", - .size = FLASH_WINDOW_SIZE, -- .buswidth = FLASH_BUSWIDTH, -+ .bankwidth = FLASH_BUSWIDTH, - .phys = FLASH_WINDOW_ADDR, - }; - - struct map_info ocelot_nvram_map = { - .name = "Ocelot NVRAM", - .size = NVRAM_WINDOW_SIZE, -- .buswidth = NVRAM_BUSWIDTH, -+ .bankwidth = NVRAM_BUSWIDTH, - .phys = NVRAM_WINDOW_ADDR, - }; - -@@ -81,7 +81,7 @@ - iounmap(pld); - - /* Now ioremap the NVRAM space */ -- ocelot_nvram_map.virt = (unsigned long)ioremap_nocache(NVRAM_WINDOW_ADDR, NVRAM_WINDOW_SIZE); -+ ocelot_nvram_map.virt = (void __iomem *)ioremap_nocache(NVRAM_WINDOW_ADDR, NVRAM_WINDOW_SIZE); - if (!ocelot_nvram_map.virt) { - printk(KERN_NOTICE "Failed to ioremap Ocelot NVRAM space\n"); - return -EIO; -@@ -101,7 +101,7 @@ - nvram_mtd->write = ocelot_ram_write; - - /* Now map the flash space */ -- ocelot_flash_map.virt = (unsigned long)ioremap_nocache(FLASH_WINDOW_ADDR, FLASH_WINDOW_SIZE); -+ ocelot_flash_map.virt = (void __iomem *)ioremap_nocache(FLASH_WINDOW_ADDR, FLASH_WINDOW_SIZE); - if (!ocelot_flash_map.virt) { - printk(KERN_NOTICE "Failed to ioremap Ocelot flash space\n"); - goto fail_2; -Index: linux-2.6.5/drivers/mtd/maps/ocotea.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/ocotea.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/ocotea.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,156 @@ -+/* -+ * Mapping for Ocotea user flash -+ * -+ * Matt Porter <mporter@kernel.crashing.org> -+ * -+ * Copyright 2002-2004 MontaVista Software Inc. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2 of the License, or (at your -+ * option) any later version. -+ */ -+ -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <linux/init.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/partitions.h> -+#include <linux/config.h> -+#include <linux/version.h> -+#include <asm/io.h> -+#include <asm/ibm44x.h> -+#include <platforms/4xx/ocotea.h> -+ -+static struct mtd_info *flash; -+ -+static struct map_info ocotea_small_map = { -+ .name = "Ocotea small flash", -+ .size = OCOTEA_SMALL_FLASH_SIZE, -+ .buswidth = 1, -+}; -+ -+static struct map_info ocotea_large_map = { -+ .name = "Ocotea large flash", -+ .size = OCOTEA_LARGE_FLASH_SIZE, -+ .buswidth = 1, -+}; -+ -+static struct mtd_partition ocotea_small_partitions[] = { -+ { -+ .name = "pibs", -+ .offset = 0x0, -+ .size = 0x100000, -+ } -+}; -+ -+static struct mtd_partition ocotea_large_partitions[] = { -+ { -+ .name = "fs", -+ .offset = 0, -+ .size = 0x300000, -+ }, -+ { -+ .name = "firmware", -+ .offset = 0x300000, -+ .size = 0x100000, -+ } -+}; -+ -+#define NB_OF(x) (sizeof(x)/sizeof(x[0])) -+ -+int __init init_ocotea(void) -+{ -+ u8 fpga0_reg; -+ u8 *fpga0_adr; -+ unsigned long long small_flash_base, large_flash_base; -+ -+ fpga0_adr = ioremap64(OCOTEA_FPGA_ADDR, 16); -+ if (!fpga0_adr) -+ return -ENOMEM; -+ -+ fpga0_reg = readb((unsigned long)fpga0_adr); -+ iounmap(fpga0_adr); -+ -+ if (OCOTEA_BOOT_LARGE_FLASH(fpga0_reg)) { -+ small_flash_base = OCOTEA_SMALL_FLASH_HIGH; -+ large_flash_base = OCOTEA_LARGE_FLASH_LOW; -+ } -+ else { -+ small_flash_base = OCOTEA_SMALL_FLASH_LOW; -+ large_flash_base = OCOTEA_LARGE_FLASH_HIGH; -+ } -+ -+ ocotea_small_map.phys = small_flash_base; -+ ocotea_small_map.virt = -+ (void __iomem *)ioremap64(small_flash_base, -+ ocotea_small_map.size); -+ -+ if (!ocotea_small_map.virt) { -+ printk("Failed to ioremap flash\n"); -+ return -EIO; -+ } -+ -+ simple_map_init(&ocotea_small_map); -+ -+ flash = do_map_probe("map_rom", &ocotea_small_map); -+ if (flash) { -+ flash->owner = THIS_MODULE; -+ add_mtd_partitions(flash, ocotea_small_partitions, -+ NB_OF(ocotea_small_partitions)); -+ } else { -+ printk("map probe failed for flash\n"); -+ return -ENXIO; -+ } -+ -+ ocotea_large_map.phys = large_flash_base; -+ ocotea_large_map.virt = -+ (void __iomem *)ioremap64(large_flash_base, -+ ocotea_large_map.size); -+ -+ if (!ocotea_large_map.virt) { -+ printk("Failed to ioremap flash\n"); -+ return -EIO; -+ } -+ -+ simple_map_init(&ocotea_large_map); -+ -+ flash = do_map_probe("cfi_probe", &ocotea_large_map); -+ if (flash) { -+ flash->owner = THIS_MODULE; -+ add_mtd_partitions(flash, ocotea_large_partitions, -+ NB_OF(ocotea_large_partitions)); -+ } else { -+ printk("map probe failed for flash\n"); -+ return -ENXIO; -+ } -+ -+ return 0; -+} -+ -+static void __exit cleanup_ocotea(void) -+{ -+ if (flash) { -+ del_mtd_partitions(flash); -+ map_destroy(flash); -+ } -+ -+ if (ocotea_small_map.virt) { -+ iounmap((void *)ocotea_small_map.virt); -+ ocotea_small_map.virt = 0; -+ } -+ -+ if (ocotea_large_map.virt) { -+ iounmap((void *)ocotea_large_map.virt); -+ ocotea_large_map.virt = 0; -+ } -+} -+ -+module_init(init_ocotea); -+module_exit(cleanup_ocotea); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>"); -+MODULE_DESCRIPTION("MTD map and partitions for IBM 440GX Ocotea boards"); -Index: linux-2.6.5/drivers/mtd/maps/octagon-5066.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/octagon-5066.c 2004-04-03 22:36:25.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/octagon-5066.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,4 +1,4 @@ --// $Id: octagon-5066.c,v 1.24 2003/05/21 15:15:07 dwmw2 Exp $ -+// $Id: octagon-5066.c,v 1.26 2004/07/12 22:38:29 dwmw2 Exp $ - /* ###################################################################### - - Octagon 5066 MTD Driver. -@@ -62,32 +62,12 @@ - } - - --static __u8 oct5066_read8(struct map_info *map, unsigned long ofs) -+static map_word oct5066_read8(struct map_info *map, unsigned long ofs) - { -- __u8 ret; -+ map_word ret; - spin_lock(&oct5066_spin); - oct5066_page(map, ofs); -- ret = readb(iomapadr + (ofs & WINDOW_MASK)); -- spin_unlock(&oct5066_spin); -- return ret; --} -- --static __u16 oct5066_read16(struct map_info *map, unsigned long ofs) --{ -- __u16 ret; -- spin_lock(&oct5066_spin); -- oct5066_page(map, ofs); -- ret = readw(iomapadr + (ofs & WINDOW_MASK)); -- spin_unlock(&oct5066_spin); -- return ret; --} -- --static __u32 oct5066_read32(struct map_info *map, unsigned long ofs) --{ -- __u32 ret; -- spin_lock(&oct5066_spin); -- oct5066_page(map, ofs); -- ret = readl(iomapadr + (ofs & WINDOW_MASK)); -+ ret.x[0] = readb(iomapadr + (ofs & WINDOW_MASK)); - spin_unlock(&oct5066_spin); - return ret; - } -@@ -109,27 +89,11 @@ - } - } - --static void oct5066_write8(struct map_info *map, __u8 d, unsigned long adr) --{ -- spin_lock(&oct5066_spin); -- oct5066_page(map, adr); -- writeb(d, iomapadr + (adr & WINDOW_MASK)); -- spin_unlock(&oct5066_spin); --} -- --static void oct5066_write16(struct map_info *map, __u16 d, unsigned long adr) --{ -- spin_lock(&oct5066_spin); -- oct5066_page(map, adr); -- writew(d, iomapadr + (adr & WINDOW_MASK)); -- spin_unlock(&oct5066_spin); --} -- --static void oct5066_write32(struct map_info *map, __u32 d, unsigned long adr) -+static void oct5066_write8(struct map_info *map, map_word d, unsigned long adr) - { - spin_lock(&oct5066_spin); - oct5066_page(map, adr); -- writel(d, iomapadr + (adr & WINDOW_MASK)); -+ writeb(d.x[0], iomapadr + (adr & WINDOW_MASK)); - spin_unlock(&oct5066_spin); - } - -@@ -155,14 +119,10 @@ - .name = "Octagon 5066 Socket", - .phys = NO_XIP, - .size = 512 * 1024, -- .buswidth = 1, -- .read8 = oct5066_read8, -- .read16 = oct5066_read16, -- .read32 = oct5066_read32, -+ .bankwidth = 1, -+ .read = oct5066_read8, - .copy_from = oct5066_copy_from, -- .write8 = oct5066_write8, -- .write16 = oct5066_write16, -- .write32 = oct5066_write32, -+ .write = oct5066_write8, - .copy_to = oct5066_copy_to, - .map_priv_1 = 1<<6 - }, -@@ -170,14 +130,10 @@ - .name = "Octagon 5066 Internal Flash", - .phys = NO_XIP, - .size = 2 * 1024 * 1024, -- .buswidth = 1, -- .read8 = oct5066_read8, -- .read16 = oct5066_read16, -- .read32 = oct5066_read32, -+ .bankwidth = 1, -+ .read = oct5066_read8, - .copy_from = oct5066_copy_from, -- .write8 = oct5066_write8, -- .write16 = oct5066_write16, -- .write32 = oct5066_write32, -+ .write = oct5066_write8, - .copy_to = oct5066_copy_to, - .map_priv_1 = 2<<6 - } -Index: linux-2.6.5/drivers/mtd/maps/omap-toto-flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/omap-toto-flash.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/omap-toto-flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,137 @@ -+/* -+ * NOR Flash memory access on TI Toto board -+ * -+ * jzhang@ti.com (C) 2003 Texas Instruments. -+ * -+ * (C) 2002 MontVista Software, Inc. -+ * -+ * $Id: omap-toto-flash.c,v 1.3 2004/09/16 23:27:13 gleixner Exp $ -+ */ -+ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+ -+#include <linux/errno.h> -+#include <linux/init.h> -+ -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/partitions.h> -+ -+#include <asm/hardware.h> -+#include <asm/io.h> -+ -+ -+#ifndef CONFIG_ARCH_OMAP -+#error This is for OMAP architecture only -+#endif -+ -+//these lines need be moved to a hardware header file -+#define OMAP_TOTO_FLASH_BASE 0xd8000000 -+#define OMAP_TOTO_FLASH_SIZE 0x80000 -+ -+static struct map_info omap_toto_map_flash = { -+ .name = "OMAP Toto flash", -+ .bankwidth = 2, -+ .virt = (void __iomem *)OMAP_TOTO_FLASH_BASE, -+}; -+ -+ -+static struct mtd_partition toto_flash_partitions[] = { -+ { -+ .name = "BootLoader", -+ .size = 0x00040000, /* hopefully u-boot will stay 128k + 128*/ -+ .offset = 0, -+ .mask_flags = MTD_WRITEABLE, /* force read-only */ -+ }, { -+ .name = "ReservedSpace", -+ .size = 0x00030000, -+ .offset = MTDPART_OFS_APPEND, -+ //mask_flags: MTD_WRITEABLE, /* force read-only */ -+ }, { -+ .name = "EnvArea", /* bottom 64KiB for env vars */ -+ .size = MTDPART_SIZ_FULL, -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+ -+static struct mtd_partition *parsed_parts; -+ -+static struct mtd_info *flash_mtd; -+ -+static int __init init_flash (void) -+{ -+ -+ struct mtd_partition *parts; -+ int nb_parts = 0; -+ int parsed_nr_parts = 0; -+ const char *part_type; -+ -+ /* -+ * Static partition definition selection -+ */ -+ part_type = "static"; -+ -+ parts = toto_flash_partitions; -+ nb_parts = ARRAY_SIZE(toto_flash_partitions); -+ omap_toto_map_flash.size = OMAP_TOTO_FLASH_SIZE; -+ omap_toto_map_flash.phys = virt_to_phys(OMAP_TOTO_FLASH_BASE); -+ -+ simple_map_init(&omap_toto_map_flash); -+ /* -+ * Now let's probe for the actual flash. Do it here since -+ * specific machine settings might have been set above. -+ */ -+ printk(KERN_NOTICE "OMAP toto flash: probing %d-bit flash bus\n", -+ omap_toto_map_flash.bankwidth*8); -+ flash_mtd = do_map_probe("jedec_probe", &omap_toto_map_flash); -+ if (!flash_mtd) -+ return -ENXIO; -+ -+ if (parsed_nr_parts > 0) { -+ parts = parsed_parts; -+ nb_parts = parsed_nr_parts; -+ } -+ -+ if (nb_parts == 0) { -+ printk(KERN_NOTICE "OMAP toto flash: no partition info available," -+ "registering whole flash at once\n"); -+ if (add_mtd_device(flash_mtd)){ -+ return -ENXIO; -+ } -+ } else { -+ printk(KERN_NOTICE "Using %s partition definition\n", -+ part_type); -+ return add_mtd_partitions(flash_mtd, parts, nb_parts); -+ } -+ return 0; -+} -+ -+int __init omap_toto_mtd_init(void) -+{ -+ int status; -+ -+ if (status = init_flash()) { -+ printk(KERN_ERR "OMAP Toto Flash: unable to init map for toto flash\n"); -+ } -+ return status; -+} -+ -+static void __exit omap_toto_mtd_cleanup(void) -+{ -+ if (flash_mtd) { -+ del_mtd_partitions(flash_mtd); -+ map_destroy(flash_mtd); -+ if (parsed_parts) -+ kfree(parsed_parts); -+ } -+} -+ -+module_init(omap_toto_mtd_init); -+module_exit(omap_toto_mtd_cleanup); -+ -+MODULE_AUTHOR("Jian Zhang"); -+MODULE_DESCRIPTION("OMAP Toto board map driver"); -+MODULE_LICENSE("GPL"); -Index: linux-2.6.5/drivers/mtd/maps/pb1550-flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/pb1550-flash.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/pb1550-flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,204 @@ -+/* -+ * Flash memory access on Alchemy Pb1550 board -+ * -+ * $Id: pb1550-flash.c,v 1.5 2004/09/16 23:27:13 gleixner Exp $ -+ * -+ * (C) 2004 Embedded Edge, LLC, based on pb1550-flash.c: -+ * (C) 2003 Pete Popov <ppopov@pacbell.net> -+ * -+ */ -+ -+#include <linux/config.h> -+#include <linux/init.h> -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+ -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/partitions.h> -+ -+#include <asm/io.h> -+#include <asm/au1000.h> -+#include <asm/pb1550.h> -+ -+#ifdef DEBUG_RW -+#define DBG(x...) printk(x) -+#else -+#define DBG(x...) -+#endif -+ -+static unsigned long window_addr; -+static unsigned long window_size; -+ -+ -+static struct map_info pb1550_map = { -+ .name = "Pb1550 flash", -+}; -+ -+static unsigned char flash_bankwidth = 4; -+ -+/* -+ * Support only 64MB NOR Flash parts -+ */ -+ -+#ifdef PB1550_BOTH_BANKS -+/* both banks will be used. Combine the first bank and the first -+ * part of the second bank together into a single jffs/jffs2 -+ * partition. -+ */ -+static struct mtd_partition pb1550_partitions[] = { -+ /* assume boot[2:0]:swap is '0000' or '1000', which translates to: -+ * 1C00 0000 1FFF FFFF CE0 64MB Boot NOR Flash -+ * 1800 0000 1BFF FFFF CE0 64MB Param NOR Flash -+ */ -+ { -+ .name = "User FS", -+ .size = (0x1FC00000 - 0x18000000), -+ .offset = 0x0000000 -+ },{ -+ .name = "yamon", -+ .size = 0x0100000, -+ .offset = MTDPART_OFS_APPEND, -+ .mask_flags = MTD_WRITEABLE -+ },{ -+ .name = "raw kernel", -+ .size = (0x300000 - 0x40000), /* last 256KB is yamon env */ -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+#elif defined(PB1550_BOOT_ONLY) -+static struct mtd_partition pb1550_partitions[] = { -+ /* assume boot[2:0]:swap is '0000' or '1000', which translates to: -+ * 1C00 0000 1FFF FFFF CE0 64MB Boot NOR Flash -+ */ -+ { -+ .name = "User FS", -+ .size = 0x03c00000, -+ .offset = 0x0000000 -+ },{ -+ .name = "yamon", -+ .size = 0x0100000, -+ .offset = MTDPART_OFS_APPEND, -+ .mask_flags = MTD_WRITEABLE -+ },{ -+ .name = "raw kernel", -+ .size = (0x300000-0x40000), /* last 256KB is yamon env */ -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+#elif defined(PB1550_USER_ONLY) -+static struct mtd_partition pb1550_partitions[] = { -+ /* assume boot[2:0]:swap is '0000' or '1000', which translates to: -+ * 1800 0000 1BFF FFFF CE0 64MB Param NOR Flash -+ */ -+ { -+ .name = "User FS", -+ .size = (0x4000000 - 0x200000), /* reserve 2MB for raw kernel */ -+ .offset = 0x0000000 -+ },{ -+ .name = "raw kernel", -+ .size = MTDPART_SIZ_FULL, -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+#else -+#error MTD_PB1550 define combo error /* should never happen */ -+#endif -+ -+#define NB_OF(x) (sizeof(x)/sizeof(x[0])) -+ -+static struct mtd_info *mymtd; -+ -+/* -+ * Probe the flash density and setup window address and size -+ * based on user CONFIG options. There are times when we don't -+ * want the MTD driver to be probing the boot or user flash, -+ * so having the option to enable only one bank is important. -+ */ -+int setup_flash_params(void) -+{ -+ u16 boot_swapboot; -+ boot_swapboot = (au_readl(MEM_STSTAT) & (0x7<<1)) | -+ ((bcsr->status >> 6) & 0x1); -+ printk("Pb1550 MTD: boot:swap %d\n", boot_swapboot); -+ -+ switch (boot_swapboot) { -+ case 0: /* 512Mbit devices, both enabled */ -+ case 1: -+ case 8: -+ case 9: -+#if defined(PB1550_BOTH_BANKS) -+ window_addr = 0x18000000; -+ window_size = 0x8000000; -+#elif defined(PB1550_BOOT_ONLY) -+ window_addr = 0x1C000000; -+ window_size = 0x4000000; -+#else /* USER ONLY */ -+ window_addr = 0x1E000000; -+ window_size = 0x4000000; -+#endif -+ break; -+ case 0xC: -+ case 0xD: -+ case 0xE: -+ case 0xF: -+ /* 64 MB Boot NOR Flash is disabled */ -+ /* and the start address is moved to 0x0C00000 */ -+ window_addr = 0x0C000000; -+ window_size = 0x4000000; -+ default: -+ printk("Pb1550 MTD: unsupported boot:swap setting\n"); -+ return 1; -+ } -+ return 0; -+} -+ -+int __init pb1550_mtd_init(void) -+{ -+ struct mtd_partition *parts; -+ int nb_parts = 0; -+ -+ /* Default flash bankwidth */ -+ pb1550_map.bankwidth = flash_bankwidth; -+ -+ if (setup_flash_params()) -+ return -ENXIO; -+ -+ /* -+ * Static partition definition selection -+ */ -+ parts = pb1550_partitions; -+ nb_parts = NB_OF(pb1550_partitions); -+ pb1550_map.size = window_size; -+ -+ /* -+ * Now let's probe for the actual flash. Do it here since -+ * specific machine settings might have been set above. -+ */ -+ printk(KERN_NOTICE "Pb1550 flash: probing %d-bit flash bus\n", -+ pb1550_map.bankwidth*8); -+ pb1550_map.virt = -+ (void __iomem *)ioremap(window_addr, window_size); -+ mymtd = do_map_probe("cfi_probe", &pb1550_map); -+ if (!mymtd) return -ENXIO; -+ mymtd->owner = THIS_MODULE; -+ -+ add_mtd_partitions(mymtd, parts, nb_parts); -+ return 0; -+} -+ -+static void __exit pb1550_mtd_cleanup(void) -+{ -+ if (mymtd) { -+ del_mtd_partitions(mymtd); -+ map_destroy(mymtd); -+ } -+} -+ -+module_init(pb1550_mtd_init); -+module_exit(pb1550_mtd_cleanup); -+ -+MODULE_AUTHOR("Embedded Edge, LLC"); -+MODULE_DESCRIPTION("Pb1550 mtd map driver"); -+MODULE_LICENSE("GPL"); -Index: linux-2.6.5/drivers/mtd/maps/pb1xxx-flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/pb1xxx-flash.c 2004-04-03 22:38:24.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/pb1xxx-flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -3,14 +3,14 @@ - * - * (C) 2001 Pete Popov <ppopov@mvista.com> - * -- * $Id: pb1xxx-flash.c,v 1.9 2003/06/23 11:48:18 dwmw2 Exp $ -+ * $Id: pb1xxx-flash.c,v 1.12 2004/09/16 23:27:13 gleixner Exp $ - */ - - #include <linux/config.h> - #include <linux/module.h> - #include <linux/types.h> --#include <linux/kernel.h> - #include <linux/init.h> -+#include <linux/kernel.h> - - #include <linux/mtd/mtd.h> - #include <linux/mtd/map.h> -@@ -26,102 +26,87 @@ - #endif - - #ifdef CONFIG_MIPS_PB1000 -+ - #define WINDOW_ADDR 0x1F800000 - #define WINDOW_SIZE 0x800000 --#endif -- -- --static struct map_info pb1xxx_map = { -- .name = "Pb1xxx flash", --}; - -- --#ifdef CONFIG_MIPS_PB1000 -- --static unsigned long flash_size = 0x00800000; --static unsigned char flash_buswidth = 4; - static struct mtd_partition pb1xxx_partitions[] = { - { -- .name = "yamon env", -- .size = 0x00020000, -- .offset = 0, -- .mask_flags = MTD_WRITEABLE -- },{ -- .name = "User FS", -- .size = 0x003e0000, -- .offset = 0x20000, -- },{ -- .name = "boot code", -- .size = 0x100000, -- .offset = 0x400000, -- .mask_flags = MTD_WRITEABLE -- },{ -- .name = "raw/kernel", -- .size = 0x300000, -- .offset = 0x500000 -- } -+ .name = "yamon env", -+ .size = 0x00020000, -+ .offset = 0, -+ .mask_flags = MTD_WRITEABLE}, -+ { -+ .name = "User FS", -+ .size = 0x003e0000, -+ .offset = 0x20000,}, -+ { -+ .name = "boot code", -+ .size = 0x100000, -+ .offset = 0x400000, -+ .mask_flags = MTD_WRITEABLE}, -+ { -+ .name = "raw/kernel", -+ .size = 0x300000, -+ .offset = 0x500000} - }; - - #elif defined(CONFIG_MIPS_PB1500) || defined(CONFIG_MIPS_PB1100) - --static unsigned char flash_buswidth = 4; - #if defined(CONFIG_MTD_PB1500_BOOT) && defined(CONFIG_MTD_PB1500_USER) --/* both 32MiB banks will be used. Combine the first 32MiB bank and the -- * first 28MiB of the second bank together into a single jffs/jffs2 -+/* both 32MB banks will be used. Combine the first 32MB bank and the -+ * first 28MB of the second bank together into a single jffs/jffs2 - * partition. - */ --static unsigned long flash_size = 0x04000000; - #define WINDOW_ADDR 0x1C000000 - #define WINDOW_SIZE 0x4000000 - static struct mtd_partition pb1xxx_partitions[] = { - { -- .name = "User FS", -- .size = 0x3c00000, -- .offset = 0x0000000 -- },{ -- .name = "yamon", -- .size = 0x0100000, -- .offset = 0x3c00000, -- .mask_flags = MTD_WRITEABLE -- },{ -- .name = "raw kernel", -- .size = 0x02c0000, -- .offset = 0x3d00000 -+ .name = "User FS", -+ .size = 0x3c00000, -+ .offset = 0x0000000 -+ },{ -+ .name = "yamon", -+ .size = 0x0100000, -+ .offset = 0x3c00000, -+ .mask_flags = MTD_WRITEABLE -+ },{ -+ .name = "raw kernel", -+ .size = 0x02c0000, -+ .offset = 0x3d00000 - } - }; - #elif defined(CONFIG_MTD_PB1500_BOOT) && !defined(CONFIG_MTD_PB1500_USER) --static unsigned long flash_size = 0x02000000; - #define WINDOW_ADDR 0x1E000000 - #define WINDOW_SIZE 0x2000000 - static struct mtd_partition pb1xxx_partitions[] = { - { -- .name = "User FS", -- .size = 0x1c00000, -- .offset = 0x0000000 -- },{ -- .name = "yamon", -- .size = 0x0100000, -- .offset = 0x1c00000, -- .mask_flags = MTD_WRITEABLE -- },{ -- .name = "raw kernel", -- .size = 0x02c0000, -- .offset = 0x1d00000 -+ .name = "User FS", -+ .size = 0x1c00000, -+ .offset = 0x0000000 -+ },{ -+ .name = "yamon", -+ .size = 0x0100000, -+ .offset = 0x1c00000, -+ .mask_flags = MTD_WRITEABLE -+ },{ -+ .name = "raw kernel", -+ .size = 0x02c0000, -+ .offset = 0x1d00000 - } - }; - #elif !defined(CONFIG_MTD_PB1500_BOOT) && defined(CONFIG_MTD_PB1500_USER) --static unsigned long flash_size = 0x02000000; - #define WINDOW_ADDR 0x1C000000 - #define WINDOW_SIZE 0x2000000 - static struct mtd_partition pb1xxx_partitions[] = { - { -- .name = "User FS", -- .size = 0x1e00000, -- .offset = 0x0000000 -- },{ -- .name = "raw kernel", -- .size = 0x0200000, -- .offset = 0x1e00000, -+ .name = "User FS", -+ .size = 0x1e00000, -+ .offset = 0x0000000 -+ },{ -+ .name = "raw kernel", -+ .size = 0x0200000, -+ .offset = 0x1e00000, - } - }; - #else -@@ -131,8 +116,20 @@ - #error Unsupported board - #endif - --static struct mtd_partition *parsed_parts; --static struct mtd_info *mymtd; -+#define NAME "Pb1x00 Linux Flash" -+#define PADDR WINDOW_ADDR -+#define BUSWIDTH 4 -+#define SIZE WINDOW_SIZE -+#define PARTITIONS 4 -+ -+static struct map_info pb1xxx_mtd_map = { -+ .name = NAME, -+ .size = SIZE, -+ .bankwidth = BUSWIDTH, -+ .phys = PADDR, -+}; -+ -+static struct mtd_info *pb1xxx_mtd; - - int __init pb1xxx_mtd_init(void) - { -@@ -140,49 +137,38 @@ - int nb_parts = 0; - char *part_type; - -- /* Default flash buswidth */ -- pb1xxx_map.buswidth = flash_buswidth; -- - /* - * Static partition definition selection - */ - part_type = "static"; - parts = pb1xxx_partitions; - nb_parts = ARRAY_SIZE(pb1xxx_partitions); -- pb1xxx_map.size = flash_size; - - /* - * Now let's probe for the actual flash. Do it here since - * specific machine settings might have been set above. - */ - printk(KERN_NOTICE "Pb1xxx flash: probing %d-bit flash bus\n", -- pb1xxx_map.buswidth*8); -- pb1xxx_map.phys = WINDOW_ADDR; -- pb1xxx_map.virt = (unsigned long)ioremap(WINDOW_ADDR, WINDOW_SIZE); -- -- simple_map_init(&pb1xxx_map); -- -- mymtd = do_map_probe("cfi_probe", &pb1xxx_map); -- if (!mymtd) { -- iounmap(pb1xxx_map.virt); -- return -ENXIO; -- } -- mymtd->owner = THIS_MODULE; -+ BUSWIDTH*8); -+ pb1xxx_mtd_map.virt = (void __iomem *)ioremap(WINDOW_ADDR, WINDOW_SIZE); -+ -+ simple_map_init(&pb1xxx_mtd_map); -+ -+ pb1xxx_mtd = do_map_probe("cfi_probe", &pb1xxx_mtd_map); -+ if (!pb1xxx_mtd) return -ENXIO; -+ pb1xxx_mtd->owner = THIS_MODULE; - -- add_mtd_partitions(mymtd, parts, nb_parts); -+ add_mtd_partitions(pb1xxx_mtd, parts, nb_parts); - return 0; - } - - static void __exit pb1xxx_mtd_cleanup(void) - { -- if (mymtd) { -- del_mtd_partitions(mymtd); -- map_destroy(mymtd); -- if (parsed_parts) -- kfree(parsed_parts); -+ if (pb1xxx_mtd) { -+ del_mtd_partitions(pb1xxx_mtd); -+ map_destroy(pb1xxx_mtd); -+ iounmap((void *) pb1xxx_mtd_map.virt); - } -- if (pb1xxx_map.virt) -- iounmap(pb1xxx_map.virt); - } - - module_init(pb1xxx_mtd_init); -Index: linux-2.6.5/drivers/mtd/maps/pci.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/pci.c 2004-04-03 22:36:56.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/pci.c 2005-02-01 17:11:17.000000000 -0500 -@@ -7,7 +7,7 @@ - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * -- * $Id: pci.c,v 1.5 2003/05/20 20:59:31 dwmw2 Exp $ -+ * $Id: pci.c,v 1.8 2004/07/12 22:38:29 dwmw2 Exp $ - * - * Generic PCI memory map driver. We support the following boards: - * - Intel IQ80310 ATU. -@@ -39,6 +39,74 @@ - struct pci_dev *dev; - }; - -+static map_word mtd_pci_read8(struct map_info *_map, unsigned long ofs) -+{ -+ struct map_pci_info *map = (struct map_pci_info *)_map; -+ map_word val; -+ val.x[0]= readb(map->base + map->translate(map, ofs)); -+// printk("read8 : %08lx => %02x\n", ofs, val.x[0]); -+ return val; -+} -+ -+#if 0 -+static map_word mtd_pci_read16(struct map_info *_map, unsigned long ofs) -+{ -+ struct map_pci_info *map = (struct map_pci_info *)_map; -+ map_word val; -+ val.x[0] = readw(map->base + map->translate(map, ofs)); -+// printk("read16: %08lx => %04x\n", ofs, val.x[0]); -+ return val; -+} -+#endif -+static map_word mtd_pci_read32(struct map_info *_map, unsigned long ofs) -+{ -+ struct map_pci_info *map = (struct map_pci_info *)_map; -+ map_word val; -+ val.x[0] = readl(map->base + map->translate(map, ofs)); -+// printk("read32: %08lx => %08x\n", ofs, val.x[0]); -+ return val; -+} -+ -+static void mtd_pci_copyfrom(struct map_info *_map, void *to, unsigned long from, ssize_t len) -+{ -+ struct map_pci_info *map = (struct map_pci_info *)_map; -+ memcpy_fromio(to, map->base + map->translate(map, from), len); -+} -+ -+static void mtd_pci_write8(struct map_info *_map, map_word val, unsigned long ofs) -+{ -+ struct map_pci_info *map = (struct map_pci_info *)_map; -+// printk("write8 : %08lx <= %02x\n", ofs, val.x[0]); -+ writeb(val.x[0], map->base + map->translate(map, ofs)); -+} -+ -+#if 0 -+static void mtd_pci_write16(struct map_info *_map, map_word val, unsigned long ofs) -+{ -+ struct map_pci_info *map = (struct map_pci_info *)_map; -+// printk("write16: %08lx <= %04x\n", ofs, val.x[0]); -+ writew(val.x[0], map->base + map->translate(map, ofs)); -+} -+#endif -+static void mtd_pci_write32(struct map_info *_map, map_word val, unsigned long ofs) -+{ -+ struct map_pci_info *map = (struct map_pci_info *)_map; -+// printk("write32: %08lx <= %08x\n", ofs, val.x[0]); -+ writel(val.x[0], map->base + map->translate(map, ofs)); -+} -+ -+static void mtd_pci_copyto(struct map_info *_map, unsigned long to, const void *from, ssize_t len) -+{ -+ struct map_pci_info *map = (struct map_pci_info *)_map; -+ memcpy_toio(map->base + map->translate(map, to), from, len); -+} -+ -+static struct map_info mtd_pci_map = { -+ .phys = NO_XIP, -+ .copy_from = mtd_pci_copyfrom, -+ .copy_to = mtd_pci_copyto, -+}; -+ - /* - * Intel IOP80310 Flash driver - */ -@@ -48,7 +116,10 @@ - { - u32 win_base; - -- map->map.buswidth = 1; -+ map->map.bankwidth = 1; -+ map->map.read = mtd_pci_read8, -+ map->map.write = mtd_pci_write8, -+ - map->map.size = 0x00800000; - map->base = ioremap_nocache(pci_resource_start(dev, 0), - pci_resource_len(dev, 0)); -@@ -147,7 +218,9 @@ - if (!len || !base) - return -ENXIO; - -- map->map.buswidth = 4; -+ map->map.bankwidth = 4; -+ map->map.read = mtd_pci_read32, -+ map->map.write = mtd_pci_write32, - map->map.size = len; - map->base = ioremap_nocache(base, len); - -@@ -215,75 +288,6 @@ - * Generic code follows. - */ - --static u8 mtd_pci_read8(struct map_info *_map, unsigned long ofs) --{ -- struct map_pci_info *map = (struct map_pci_info *)_map; -- u8 val = readb(map->base + map->translate(map, ofs)); --// printk("read8 : %08lx => %02x\n", ofs, val); -- return val; --} -- --static u16 mtd_pci_read16(struct map_info *_map, unsigned long ofs) --{ -- struct map_pci_info *map = (struct map_pci_info *)_map; -- u16 val = readw(map->base + map->translate(map, ofs)); --// printk("read16: %08lx => %04x\n", ofs, val); -- return val; --} -- --static u32 mtd_pci_read32(struct map_info *_map, unsigned long ofs) --{ -- struct map_pci_info *map = (struct map_pci_info *)_map; -- u32 val = readl(map->base + map->translate(map, ofs)); --// printk("read32: %08lx => %08x\n", ofs, val); -- return val; --} -- --static void mtd_pci_copyfrom(struct map_info *_map, void *to, unsigned long from, ssize_t len) --{ -- struct map_pci_info *map = (struct map_pci_info *)_map; -- memcpy_fromio(to, map->base + map->translate(map, from), len); --} -- --static void mtd_pci_write8(struct map_info *_map, u8 val, unsigned long ofs) --{ -- struct map_pci_info *map = (struct map_pci_info *)_map; --// printk("write8 : %08lx <= %02x\n", ofs, val); -- writeb(val, map->base + map->translate(map, ofs)); --} -- --static void mtd_pci_write16(struct map_info *_map, u16 val, unsigned long ofs) --{ -- struct map_pci_info *map = (struct map_pci_info *)_map; --// printk("write16: %08lx <= %04x\n", ofs, val); -- writew(val, map->base + map->translate(map, ofs)); --} -- --static void mtd_pci_write32(struct map_info *_map, u32 val, unsigned long ofs) --{ -- struct map_pci_info *map = (struct map_pci_info *)_map; --// printk("write32: %08lx <= %08x\n", ofs, val); -- writel(val, map->base + map->translate(map, ofs)); --} -- --static void mtd_pci_copyto(struct map_info *_map, unsigned long to, const void *from, ssize_t len) --{ -- struct map_pci_info *map = (struct map_pci_info *)_map; -- memcpy_toio(map->base + map->translate(map, to), from, len); --} -- --static struct map_info mtd_pci_map = { -- .phys = NO_XIP, -- .read8 = mtd_pci_read8, -- .read16 = mtd_pci_read16, -- .read32 = mtd_pci_read32, -- .copy_from = mtd_pci_copyfrom, -- .write8 = mtd_pci_write8, -- .write16 = mtd_pci_write16, -- .write32 = mtd_pci_write32, -- .copy_to = mtd_pci_copyto, --}; -- - static int __devinit - mtd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) - { -Index: linux-2.6.5/drivers/mtd/maps/pcmciamtd.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/pcmciamtd.c 2004-04-03 22:36:19.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/pcmciamtd.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: pcmciamtd.c,v 1.48 2003/06/24 07:14:38 spse Exp $ -+ * $Id: pcmciamtd.c,v 1.51 2004/07/12 22:38:29 dwmw2 Exp $ - * - * pcmciamtd.c - MTD driver for PCMCIA flash memory cards - * -@@ -49,7 +49,7 @@ - - - #define DRIVER_DESC "PCMCIA Flash memory card driver" --#define DRIVER_VERSION "$Revision: 1.48 $" -+#define DRIVER_VERSION "$Revision: 1.51 $" - - /* Size of the PCMCIA address space: 26 bits = 64 MB */ - #define MAX_PCMCIA_ADDR 0x4000000 -@@ -73,7 +73,7 @@ - /* Module parameters */ - - /* 2 = do 16-bit transfers, 1 = do 8-bit transfers */ --static int buswidth = 2; -+static int bankwidth = 2; - - /* Speed of memory accesses, in ns */ - static int mem_speed; -@@ -93,8 +93,8 @@ - MODULE_LICENSE("GPL"); - MODULE_AUTHOR("Simon Evans <spse@secret.org.uk>"); - MODULE_DESCRIPTION(DRIVER_DESC); --MODULE_PARM(buswidth, "i"); --MODULE_PARM_DESC(buswidth, "Set buswidth (1=8 bit, 2=16 bit, default=2)"); -+MODULE_PARM(bankwidth, "i"); -+MODULE_PARM_DESC(bankwidth, "Set bankwidth (1=8 bit, 2=16 bit, default=2)"); - MODULE_PARM(mem_speed, "i"); - MODULE_PARM_DESC(mem_speed, "Set memory access speed in ns"); - MODULE_PARM(force_size, "i"); -@@ -135,32 +135,32 @@ - } - - --static u8 pcmcia_read8_remap(struct map_info *map, unsigned long ofs) -+static map_word pcmcia_read8_remap(struct map_info *map, unsigned long ofs) - { - caddr_t addr; -- u8 d; -+ map_word d = {{0}}; - - addr = remap_window(map, ofs); - if(!addr) -- return 0; -+ return d; - -- d = readb(addr); -- DEBUG(3, "ofs = 0x%08lx (%p) data = 0x%02x", ofs, addr, d); -+ d.x[0] = readb(addr); -+ DEBUG(3, "ofs = 0x%08lx (%p) data = 0x%02x", ofs, addr, d.x[0]); - return d; - } - - --static u16 pcmcia_read16_remap(struct map_info *map, unsigned long ofs) -+static map_word pcmcia_read16_remap(struct map_info *map, unsigned long ofs) - { - caddr_t addr; -- u16 d; -+ map_word d = {{0}}; - - addr = remap_window(map, ofs); - if(!addr) -- return 0; -+ return d; - -- d = readw(addr); -- DEBUG(3, "ofs = 0x%08lx (%p) data = 0x%04x", ofs, addr, d); -+ d.x[0] = readw(addr); -+ DEBUG(3, "ofs = 0x%08lx (%p) data = 0x%04x", ofs, addr, d.x[0]); - return d; - } - -@@ -191,26 +191,26 @@ - } - - --static void pcmcia_write8_remap(struct map_info *map, u8 d, unsigned long adr) -+static void pcmcia_write8_remap(struct map_info *map, map_word d, unsigned long adr) - { - caddr_t addr = remap_window(map, adr); - - if(!addr) - return; - -- DEBUG(3, "adr = 0x%08lx (%p) data = 0x%02x", adr, addr, d); -- writeb(d, addr); -+ DEBUG(3, "adr = 0x%08lx (%p) data = 0x%02x", adr, addr, d.x[0]); -+ writeb(d.x[0], addr); - } - - --static void pcmcia_write16_remap(struct map_info *map, u16 d, unsigned long adr) -+static void pcmcia_write16_remap(struct map_info *map, map_word d, unsigned long adr) - { - caddr_t addr = remap_window(map, adr); - if(!addr) - return; - -- DEBUG(3, "adr = 0x%08lx (%p) data = 0x%04x", adr, addr, d); -- writew(d, addr); -+ DEBUG(3, "adr = 0x%08lx (%p) data = 0x%04x", adr, addr, d.x[0]); -+ writew(d.x[0], addr); - } - - -@@ -244,30 +244,30 @@ - - #define DEV_REMOVED(x) (!(*(u_int *)x->map_priv_1 & DEV_PRESENT)) - --static u8 pcmcia_read8(struct map_info *map, unsigned long ofs) -+static map_word pcmcia_read8(struct map_info *map, unsigned long ofs) - { - caddr_t win_base = (caddr_t)map->map_priv_2; -- u8 d; -+ map_word d = {{0}}; - - if(DEV_REMOVED(map)) -- return 0; -+ return d; - -- d = readb(win_base + ofs); -- DEBUG(3, "ofs = 0x%08lx (%p) data = 0x%02x", ofs, win_base + ofs, d); -+ d.x[0] = readb(win_base + ofs); -+ DEBUG(3, "ofs = 0x%08lx (%p) data = 0x%02x", ofs, win_base + ofs, d.x[0]); - return d; - } - - --static u16 pcmcia_read16(struct map_info *map, unsigned long ofs) -+static map_word pcmcia_read16(struct map_info *map, unsigned long ofs) - { - caddr_t win_base = (caddr_t)map->map_priv_2; -- u16 d; -+ map_word d = {{0}}; - - if(DEV_REMOVED(map)) -- return 0; -+ return d; - -- d = readw(win_base + ofs); -- DEBUG(3, "ofs = 0x%08lx (%p) data = 0x%04x", ofs, win_base + ofs, d); -+ d.x[0] = readw(win_base + ofs); -+ DEBUG(3, "ofs = 0x%08lx (%p) data = 0x%04x", ofs, win_base + ofs, d.x[0]); - return d; - } - -@@ -439,9 +439,9 @@ - case CISTPL_DEVICE_GEO: { - cistpl_device_geo_t *t = &parse.device_geo; - int i; -- dev->pcmcia_map.buswidth = t->geo[0].buswidth; -+ dev->pcmcia_map.bankwidth = t->geo[0].buswidth; - for(i = 0; i < t->ngeo; i++) { -- DEBUG(2, "region: %d buswidth = %u", i, t->geo[i].buswidth); -+ DEBUG(2, "region: %d bankwidth = %u", i, t->geo[i].buswidth); - DEBUG(2, "region: %d erase_block = %u", i, t->geo[i].erase_block); - DEBUG(2, "region: %d read_block = %u", i, t->geo[i].read_block); - DEBUG(2, "region: %d write_block = %u", i, t->geo[i].write_block); -@@ -460,17 +460,17 @@ - if(!dev->pcmcia_map.size) - dev->pcmcia_map.size = MAX_PCMCIA_ADDR; - -- if(!dev->pcmcia_map.buswidth) -- dev->pcmcia_map.buswidth = 2; -+ if(!dev->pcmcia_map.bankwidth) -+ dev->pcmcia_map.bankwidth = 2; - - if(force_size) { - dev->pcmcia_map.size = force_size << 20; - DEBUG(2, "size forced to %dM", force_size); - } - -- if(buswidth) { -- dev->pcmcia_map.buswidth = buswidth; -- DEBUG(2, "buswidth forced to %d", buswidth); -+ if(bankwidth) { -+ dev->pcmcia_map.bankwidth = bankwidth; -+ DEBUG(2, "bankwidth forced to %d", bankwidth); - } - - dev->pcmcia_map.name = dev->mtd_name; -@@ -480,7 +480,7 @@ - } - - DEBUG(1, "Device: Size: %lu Width:%d Name: %s", -- dev->pcmcia_map.size, dev->pcmcia_map.buswidth << 3, dev->mtd_name); -+ dev->pcmcia_map.size, dev->pcmcia_map.bankwidth << 3, dev->mtd_name); - } - - -@@ -522,12 +522,15 @@ - card_settings(dev, link, &new_name); - - dev->pcmcia_map.phys = NO_XIP; -- dev->pcmcia_map.read8 = pcmcia_read8_remap; -- dev->pcmcia_map.read16 = pcmcia_read16_remap; - dev->pcmcia_map.copy_from = pcmcia_copy_from_remap; -- dev->pcmcia_map.write8 = pcmcia_write8_remap; -- dev->pcmcia_map.write16 = pcmcia_write16_remap; - dev->pcmcia_map.copy_to = pcmcia_copy_to_remap; -+ if (dev->pcmcia_map.bankwidth == 1) { -+ dev->pcmcia_map.read = pcmcia_read8_remap; -+ dev->pcmcia_map.write = pcmcia_write8_remap; -+ } else { -+ dev->pcmcia_map.read = pcmcia_read16_remap; -+ dev->pcmcia_map.write = pcmcia_write16_remap; -+ } - if(setvpp == 1) - dev->pcmcia_map.set_vpp = pcmciamtd_set_vpp; - -@@ -536,7 +539,7 @@ - whole card - otherwise we try smaller windows until we succeed */ - - req.Attributes = WIN_MEMORY_TYPE_CM | WIN_ENABLE; -- req.Attributes |= (dev->pcmcia_map.buswidth == 1) ? WIN_DATA_WIDTH_8 : WIN_DATA_WIDTH_16; -+ req.Attributes |= (dev->pcmcia_map.bankwidth == 1) ? WIN_DATA_WIDTH_8 : WIN_DATA_WIDTH_16; - req.Base = 0; - req.AccessSpeed = mem_speed; - link->win = (window_handle_t)link->handle; -@@ -657,11 +660,14 @@ - DEBUG(1, "Using non remapping memory functions"); - dev->pcmcia_map.map_priv_1 = (unsigned long)&(dev->link.state); - dev->pcmcia_map.map_priv_2 = (unsigned long)dev->win_base; -- dev->pcmcia_map.read8 = pcmcia_read8; -- dev->pcmcia_map.read16 = pcmcia_read16; -+ if (dev->pcmcia_map.bankwidth == 1) { -+ dev->pcmcia_map.read = pcmcia_read8; -+ dev->pcmcia_map.write = pcmcia_write8; -+ } else { -+ dev->pcmcia_map.read = pcmcia_read16; -+ dev->pcmcia_map.write = pcmcia_write16; -+ } - dev->pcmcia_map.copy_from = pcmcia_copy_from; -- dev->pcmcia_map.write8 = pcmcia_write8; -- dev->pcmcia_map.write16 = pcmcia_write16; - dev->pcmcia_map.copy_to = pcmcia_copy_to; - } - -@@ -828,9 +834,9 @@ - { - info(DRIVER_DESC " " DRIVER_VERSION); - -- if(buswidth && buswidth != 1 && buswidth != 2) { -- info("bad buswidth (%d), using default", buswidth); -- buswidth = 2; -+ if(bankwidth && bankwidth != 1 && bankwidth != 2) { -+ info("bad bankwidth (%d), using default", bankwidth); -+ bankwidth = 2; - } - if(force_size && (force_size < 1 || force_size > 64)) { - info("bad force_size (%d), using default", force_size); -Index: linux-2.6.5/drivers/mtd/maps/physmap.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/physmap.c 2004-04-03 22:37:25.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/physmap.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,7 +1,12 @@ - /* -- * $Id: physmap.c,v 1.29 2003/05/29 09:24:10 dwmw2 Exp $ -+ * $Id: physmap.c,v 1.35 2004/09/16 23:27:13 gleixner Exp $ - * - * Normal mappings of chips in physical memory -+ * -+ * Copyright (C) 2003 MontaVista Software Inc. -+ * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net -+ * -+ * 031022 - [jsun] add run-time configure and partition setup - */ - - #include <linux/module.h> -@@ -15,62 +20,38 @@ - #include <linux/config.h> - #include <linux/mtd/partitions.h> - --#define WINDOW_ADDR CONFIG_MTD_PHYSMAP_START --#define WINDOW_SIZE CONFIG_MTD_PHYSMAP_LEN --#define BUSWIDTH CONFIG_MTD_PHYSMAP_BUSWIDTH -- - static struct mtd_info *mymtd; - -- - struct map_info physmap_map = { -- .name = "Physically mapped flash", -- .size = WINDOW_SIZE, -- .buswidth = BUSWIDTH, -- .phys = WINDOW_ADDR, -+ .name = "phys_mapped_flash", -+ .phys = CONFIG_MTD_PHYSMAP_START, -+ .size = CONFIG_MTD_PHYSMAP_LEN, -+ .bankwidth = CONFIG_MTD_PHYSMAP_BANKWIDTH, - }; - - #ifdef CONFIG_MTD_PARTITIONS - static struct mtd_partition *mtd_parts; - static int mtd_parts_nb; - --static struct mtd_partition physmap_partitions[] = { --#if 0 --/* Put your own partition definitions here */ -- { -- .name = "bootROM", -- .size = 0x80000, -- .offset = 0, -- .mask_flags = MTD_WRITEABLE, /* force read-only */ -- }, { -- .name = "zImage", -- .size = 0x100000, -- .offset = MTDPART_OFS_APPEND, -- .mask_flags = MTD_WRITEABLE, /* force read-only */ -- }, { -- .name = "ramdisk.gz", -- .size = 0x300000, -- .offset = MTDPART_OFS_APPEND, -- .mask_flags = MTD_WRITEABLE, /* force read-only */ -- }, { -- .name = "User FS", -- .size = MTDPART_SIZ_FULL, -- .offset = MTDPART_OFS_APPEND, -- } --#endif --}; -+static int num_physmap_partitions; -+static struct mtd_partition *physmap_partitions; - --#define NUM_PARTITIONS (sizeof(physmap_partitions)/sizeof(struct mtd_partition)) --const char *part_probes[] = {"cmdlinepart", "RedBoot", NULL}; -+static const char *part_probes[] __initdata = {"cmdlinepart", "RedBoot", NULL}; - -+void physmap_set_partitions(struct mtd_partition *parts, int num_parts) -+{ -+ physmap_partitions=parts; -+ num_physmap_partitions=num_parts; -+} - #endif /* CONFIG_MTD_PARTITIONS */ - --int __init init_physmap(void) -+static int __init init_physmap(void) - { -- static const char *rom_probe_types[] = { "cfi_probe", "jedec_probe", "map_rom", 0 }; -+ static const char *rom_probe_types[] = { "cfi_probe", "jedec_probe", "map_rom", NULL }; - const char **type; - -- printk(KERN_NOTICE "physmap flash device: %x at %x\n", WINDOW_SIZE, WINDOW_ADDR); -- physmap_map.virt = (unsigned long)ioremap(WINDOW_ADDR, WINDOW_SIZE); -+ printk(KERN_NOTICE "physmap flash device: %lx at %lx\n", physmap_map.size, physmap_map.phys); -+ physmap_map.virt = (void __iomem *)ioremap(physmap_map.phys, physmap_map.size); - - if (!physmap_map.virt) { - printk("Failed to ioremap\n"); -@@ -79,7 +60,7 @@ - - simple_map_init(&physmap_map); - -- mymtd = 0; -+ mymtd = NULL; - type = rom_probe_types; - for(; !mymtd && *type; type++) { - mymtd = do_map_probe(*type, &physmap_map); -@@ -97,11 +78,11 @@ - return 0; - } - -- if (NUM_PARTITIONS != 0) -+ if (num_physmap_partitions != 0) - { - printk(KERN_NOTICE - "Using physmap partition definition\n"); -- add_mtd_partitions (mymtd, physmap_partitions, NUM_PARTITIONS); -+ add_mtd_partitions (mymtd, physmap_partitions, num_physmap_partitions); - return 0; - } - -@@ -121,7 +102,7 @@ - if (mtd_parts_nb) { - del_mtd_partitions(mymtd); - kfree(mtd_parts); -- } else if (NUM_PARTITIONS) { -+ } else if (num_physmap_partitions) { - del_mtd_partitions(mymtd); - } else { - del_mtd_device(mymtd); -Index: linux-2.6.5/drivers/mtd/maps/pnc2000.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/pnc2000.c 2004-04-03 22:37:06.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/pnc2000.c 2005-02-01 17:11:17.000000000 -0500 -@@ -5,7 +5,7 @@ - * - * This code is GPL - * -- * $Id: pnc2000.c,v 1.14 2003/05/21 12:45:19 dwmw2 Exp $ -+ * $Id: pnc2000.c,v 1.16 2004/09/16 23:27:13 gleixner Exp $ - */ - - #include <linux/module.h> -@@ -29,9 +29,9 @@ - struct map_info pnc_map = { - .name = "PNC-2000", - .size = WINDOW_SIZE, -- .buswidth = 4, -+ .bankwidth = 4, - .phys = 0xFFFFFFFF, -- .virt = WINDOW_ADDR, -+ .virt = (void __iomem *)WINDOW_ADDR, - }; - - -Index: linux-2.6.5/drivers/mtd/maps/redwood.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/redwood.c 2004-04-03 22:37:37.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/redwood.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,14 +1,13 @@ - /* -- * $Id: redwood.c,v 1.6 2003/05/21 12:45:19 dwmw2 Exp $ -+ * $Id: redwood.c,v 1.9 2004/09/16 23:27:13 gleixner Exp $ - * - * drivers/mtd/maps/redwood.c - * - * FLASH map for the IBM Redwood 4/5/6 boards. - * -+ * Author: MontaVista Software, Inc. <source@mvista.com> - * -- * Author: Armin Kuster <akuster@mvista.com> -- * -- * 2001-2002 (c) MontaVista, Software, Inc. This file is licensed under -+ * 2001-2003 (c) MontaVista, Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. -@@ -89,7 +88,7 @@ - - static struct mtd_partition redwood_flash_partitions[] = { - { -- .name = "Redwood kernel", -+ .name = "Redwood filesystem", - .offset = RW_PART0_OF, - .size = RW_PART0_SZ - }, -@@ -100,7 +99,7 @@ - .mask_flags = MTD_WRITEABLE /* force read-only */ - }, - { -- .name = "Redwood filesystem", -+ .name = "Redwood kernel", - .offset = RW_PART2_OF, - .size = RW_PART2_SZ - }, -@@ -117,7 +116,7 @@ - struct map_info redwood_flash_map = { - .name = "IBM Redwood", - .size = WINDOW_SIZE, -- .buswidth = 2, -+ .bankwidth = 2, - .phys = WINDOW_ADDR, - }; - -@@ -133,7 +132,7 @@ - WINDOW_SIZE, WINDOW_ADDR); - - redwood_flash_map.virt = -- (unsigned long)ioremap(WINDOW_ADDR, WINDOW_SIZE); -+ (void __iomem *)ioremap(WINDOW_ADDR, WINDOW_SIZE); - - if (!redwood_flash_map.virt) { - printk("init_redwood_flash: failed to ioremap\n"); -@@ -167,5 +166,5 @@ - module_exit(cleanup_redwood_flash); - - MODULE_LICENSE("GPL"); --MODULE_AUTHOR("Armin Kuster <akuster@mvista.com>"); -+MODULE_AUTHOR("MontaVista Software <source@mvista.com>"); - MODULE_DESCRIPTION("MTD map driver for the IBM Redwood reference boards"); -Index: linux-2.6.5/drivers/mtd/maps/rpxlite.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/rpxlite.c 2004-04-03 22:38:23.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/rpxlite.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: rpxlite.c,v 1.19 2003/05/21 12:45:19 dwmw2 Exp $ -+ * $Id: rpxlite.c,v 1.21 2004/09/16 23:27:13 gleixner Exp $ - * - * Handle mapping of the flash on the RPX Lite and CLLF boards - */ -@@ -21,14 +21,14 @@ - static struct map_info rpxlite_map = { - .name = "RPX", - .size = WINDOW_SIZE, -- .buswidth = 4, -+ .bankwidth = 4, - .phys = WINDOW_ADDR, - }; - - int __init init_rpxlite(void) - { - printk(KERN_NOTICE "RPX Lite or CLLF flash device: %x at %x\n", WINDOW_SIZE*4, WINDOW_ADDR); -- rpxlite_map.virt = (unsigned long)ioremap(WINDOW_ADDR, WINDOW_SIZE * 4); -+ rpxlite_map.virt = (void __iomem *)ioremap(WINDOW_ADDR, WINDOW_SIZE * 4); - - if (!rpxlite_map.virt) { - printk("Failed to ioremap\n"); -Index: linux-2.6.5/drivers/mtd/maps/sa1100-flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/sa1100-flash.c 2004-04-03 22:36:51.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/sa1100-flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -3,7 +3,7 @@ - * - * (C) 2000 Nicolas Pitre <nico@cam.org> - * -- * $Id: sa1100-flash.c,v 1.36 2003/05/29 08:59:35 dwmw2 Exp $ -+ * $Id: sa1100-flash.c,v 1.41 2004/09/16 23:27:13 gleixner Exp $ - */ - - #include <linux/config.h> -@@ -496,6 +496,32 @@ - }; - #endif - -+#ifdef CONFIG_SA1100_JORNADA56X -+static struct mtd_partition jornada56x_partitions[] = { -+ { -+ .name = "bootldr", -+ .size = 0x00040000, -+ .offset = 0, -+ .mask_flags = MTD_WRITEABLE, -+ }, { -+ .name = "rootfs", -+ .size = MTDPART_SIZ_FULL, -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+ -+static void jornada56x_set_vpp(struct map_info *map, int vpp) -+{ -+ if (vpp) -+ GPSR = GPIO_GPIO26; -+ else -+ GPCR = GPIO_GPIO26; -+ GPDR |= GPIO_GPIO26; -+} -+#else -+#define jornada56x_set_vpp NULL -+#endif -+ - #ifdef CONFIG_SA1100_JORNADA720 - static struct mtd_partition jornada720_partitions[] = { - { -@@ -822,6 +848,12 @@ - nb_parts = ARRAY_SIZE(huw_webpanel_partitions); - } - #endif -+#ifdef CONFIG_SA1100_JORNADA56X -+ if (machine_is_jornada56x()) { -+ *parts = jornada56x_partitions; -+ nb_parts = ARRAY_SIZE(jornada56x_partitions); -+ } -+#endif - #ifdef CONFIG_SA1100_JORNADA720 - if (machine_is_jornada720()) { - *parts = jornada720_partitions; -@@ -932,10 +964,10 @@ - break; - } - -- sa[i].map->virt = (unsigned long)sa[i].vbase; -+ sa[i].map->virt = (void __iomem *)sa[i].vbase; - sa[i].map->phys = sa[i].base; - sa[i].map->set_vpp = sa[i].set_vpp; -- sa[i].map->buswidth = sa[i].width; -+ sa[i].map->bankwidth = sa[i].width; - sa[i].map->size = sa[i].size; - - simple_map_init(sa[i].map); -@@ -1066,10 +1098,10 @@ - return; - } - -- sa1100_probe_map.buswidth = msc & MSC_RBW ? 2 : 4; -+ sa1100_probe_map.bankwidth = msc & MSC_RBW ? 2 : 4; - sa1100_probe_map.size = SZ_1M; - sa1100_probe_map.phys = phys; -- sa1100_probe_map.virt = (unsigned long)ioremap(phys, SZ_1M); -+ sa1100_probe_map.virt = (void __iomem *)ioremap(phys, SZ_1M); - if (sa1100_probe_map.virt == 0) - goto fail; - simple_map_init(&sa1100_probe_map); -@@ -1160,7 +1192,7 @@ - info[0].size = SZ_16M; - nr = 1; - } -- if (machine_is_h3xxx()) { -+ if (machine_is_ipaq()) { - info[0].set_vpp = h3xxx_set_vpp; - info[0].base = SA1100_CS0_PHYS; - info[0].size = SZ_32M; -@@ -1176,6 +1208,12 @@ - info[0].size = SZ_32M; - nr = 1; - } -+ if (machine_is_jornada56x()) { -+ info[0].set_vpp = jornada56x_set_vpp; -+ info[0].base = SA1100_CS0_PHYS; -+ info[0].size = SZ_32M; -+ nr = 1; -+ } - if (machine_is_jornada720()) { - info[0].set_vpp = jornada720_set_vpp; - info[0].base = SA1100_CS0_PHYS; -@@ -1253,7 +1291,7 @@ - return nr; - - /* -- * Retrieve the buswidth from the MSC registers. -+ * Retrieve the bankwidth from the MSC registers. - * We currently only implement CS0 and CS1 here. - */ - for (i = 0; i < nr; i++) { -Index: linux-2.6.5/drivers/mtd/maps/sbc8240.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/sbc8240.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/sbc8240.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,247 @@ -+/* -+ * Handle mapping of the flash memory access routines on the SBC8240 board. -+ * -+ * Carolyn Smith, Tektronix, Inc. -+ * -+ * This code is GPLed -+ * -+ * $Id: sbc8240.c,v 1.4 2004/07/12 22:38:29 dwmw2 Exp $ -+ * -+ */ -+ -+/* -+ * The SBC8240 has 2 flash banks. -+ * Bank 0 is a 512 KiB AMD AM29F040B; 8 x 64 KiB sectors. -+ * It contains the U-Boot code (7 sectors) and the environment (1 sector). -+ * Bank 1 is 4 x 1 MiB AMD AM29LV800BT; 15 x 64 KiB sectors, 1 x 32 KiB sector, -+ * 2 x 8 KiB sectors, 1 x 16 KiB sectors. -+ * Both parts are JEDEC compatible. -+ */ -+ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <asm/io.h> -+ -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/cfi.h> -+ -+#ifdef CONFIG_MTD_PARTITIONS -+#include <linux/mtd/partitions.h> -+#endif -+ -+#define DEBUG -+ -+#ifdef DEBUG -+# define debugk(fmt,args...) printk(fmt ,##args) -+#else -+# define debugk(fmt,args...) -+#endif -+ -+ -+#define WINDOW_ADDR0 0xFFF00000 /* 512 KiB */ -+#define WINDOW_SIZE0 0x00080000 -+#define BUSWIDTH0 1 -+ -+#define WINDOW_ADDR1 0xFF000000 /* 4 MiB */ -+#define WINDOW_SIZE1 0x00400000 -+#define BUSWIDTH1 8 -+ -+#define MSG_PREFIX "sbc8240:" /* prefix for our printk()'s */ -+#define MTDID "sbc8240-%d" /* for mtdparts= partitioning */ -+ -+ -+static struct map_info sbc8240_map[2] = { -+ { -+ .name = "sbc8240 Flash Bank #0", -+ .size = WINDOW_SIZE0, -+ .bankwidth = BUSWIDTH0, -+ }, -+ { -+ .name = "sbc8240 Flash Bank #1", -+ .size = WINDOW_SIZE1, -+ .bankwidth = BUSWIDTH1, -+ } -+}; -+ -+#define NUM_FLASH_BANKS (sizeof(sbc8240_map) / sizeof(struct map_info)) -+ -+/* -+ * The following defines the partition layout of SBC8240 boards. -+ * -+ * See include/linux/mtd/partitions.h for definition of the -+ * mtd_partition structure. -+ * -+ * The *_max_flash_size is the maximum possible mapped flash size -+ * which is not necessarily the actual flash size. It must correspond -+ * to the value specified in the mapping definition defined by the -+ * "struct map_desc *_io_desc" for the corresponding machine. -+ */ -+ -+#ifdef CONFIG_MTD_PARTITIONS -+ -+static struct mtd_partition sbc8240_uboot_partitions [] = { -+ /* Bank 0 */ -+ { -+ .name = "U-boot", /* U-Boot Firmware */ -+ .offset = 0, -+ .size = 0x00070000, /* 7 x 64 KiB sectors */ -+ .mask_flags = MTD_WRITEABLE, /* force read-only */ -+ }, -+ { -+ .name = "environment", /* U-Boot environment */ -+ .offset = 0x00070000, -+ .size = 0x00010000, /* 1 x 64 KiB sector */ -+ }, -+}; -+ -+static struct mtd_partition sbc8240_fs_partitions [] = { -+ { -+ .name = "jffs", /* JFFS filesystem */ -+ .offset = 0, -+ .size = 0x003C0000, /* 4 * 15 * 64KiB */ -+ }, -+ { -+ .name = "tmp32", -+ .offset = 0x003C0000, -+ .size = 0x00020000, /* 4 * 32KiB */ -+ }, -+ { -+ .name = "tmp8a", -+ .offset = 0x003E0000, -+ .size = 0x00008000, /* 4 * 8KiB */ -+ }, -+ { -+ .name = "tmp8b", -+ .offset = 0x003E8000, -+ .size = 0x00008000, /* 4 * 8KiB */ -+ }, -+ { -+ .name = "tmp16", -+ .offset = 0x003F0000, -+ .size = 0x00010000, /* 4 * 16KiB */ -+ } -+}; -+ -+#define NB_OF(x) (sizeof (x) / sizeof (x[0])) -+ -+/* trivial struct to describe partition information */ -+struct mtd_part_def -+{ -+ int nums; -+ unsigned char *type; -+ struct mtd_partition* mtd_part; -+}; -+ -+static struct mtd_info *sbc8240_mtd[NUM_FLASH_BANKS]; -+static struct mtd_part_def sbc8240_part_banks[NUM_FLASH_BANKS]; -+ -+ -+#endif /* CONFIG_MTD_PARTITIONS */ -+ -+ -+int __init init_sbc8240_mtd (void) -+{ -+ static struct _cjs { -+ u_long addr; -+ u_long size; -+ } pt[NUM_FLASH_BANKS] = { -+ { -+ .addr = WINDOW_ADDR0, -+ .size = WINDOW_SIZE0 -+ }, -+ { -+ .addr = WINDOW_ADDR1, -+ .size = WINDOW_SIZE1 -+ }, -+ }; -+ -+ int devicesfound = 0; -+ int i; -+ -+ for (i = 0; i < NUM_FLASH_BANKS; i++) { -+ printk (KERN_NOTICE MSG_PREFIX -+ "Probing 0x%08lx at 0x%08lx\n", pt[i].size, pt[i].addr); -+ -+ sbc8240_map[i].map_priv_1 = -+ (unsigned long) ioremap (pt[i].addr, pt[i].size); -+ if (!sbc8240_map[i].map_priv_1) { -+ printk (MSG_PREFIX "failed to ioremap\n"); -+ return -EIO; -+ } -+ simple_map_init(&sbc8240_mtd[i]); -+ -+ sbc8240_mtd[i] = do_map_probe("jedec_probe", &sbc8240_map[i]); -+ -+ if (sbc8240_mtd[i]) { -+ sbc8240_mtd[i]->module = THIS_MODULE; -+ devicesfound++; -+ } -+ } -+ -+ if (!devicesfound) { -+ printk(KERN_NOTICE MSG_PREFIX -+ "No suppported flash chips found!\n"); -+ return -ENXIO; -+ } -+ -+#ifdef CONFIG_MTD_PARTITIONS -+ sbc8240_part_banks[0].mtd_part = sbc8240_uboot_partitions; -+ sbc8240_part_banks[0].type = "static image"; -+ sbc8240_part_banks[0].nums = NB_OF(sbc8240_uboot_partitions); -+ sbc8240_part_banks[1].mtd_part = sbc8240_fs_partitions; -+ sbc8240_part_banks[1].type = "static file system"; -+ sbc8240_part_banks[1].nums = NB_OF(sbc8240_fs_partitions); -+ -+ for (i = 0; i < NUM_FLASH_BANKS; i++) { -+ -+ if (!sbc8240_mtd[i]) continue; -+ if (sbc8240_part_banks[i].nums == 0) { -+ printk (KERN_NOTICE MSG_PREFIX -+ "No partition info available, registering whole device\n"); -+ add_mtd_device(sbc8240_mtd[i]); -+ } else { -+ printk (KERN_NOTICE MSG_PREFIX -+ "Using %s partition definition\n", sbc8240_part_banks[i].mtd_part->name); -+ add_mtd_partitions (sbc8240_mtd[i], -+ sbc8240_part_banks[i].mtd_part, -+ sbc8240_part_banks[i].nums); -+ } -+ } -+#else -+ printk(KERN_NOTICE MSG_PREFIX -+ "Registering %d flash banks at once\n", devicesfound); -+ -+ for (i = 0; i < devicesfound; i++) { -+ add_mtd_device(sbc8240_mtd[i]); -+ } -+#endif /* CONFIG_MTD_PARTITIONS */ -+ -+ return devicesfound == 0 ? -ENXIO : 0; -+} -+ -+static void __exit cleanup_sbc8240_mtd (void) -+{ -+ int i; -+ -+ for (i = 0; i < NUM_FLASH_BANKS; i++) { -+ if (sbc8240_mtd[i]) { -+ del_mtd_device (sbc8240_mtd[i]); -+ map_destroy (sbc8240_mtd[i]); -+ } -+ if (sbc8240_map[i].map_priv_1) { -+ iounmap ((void *) sbc8240_map[i].map_priv_1); -+ sbc8240_map[i].map_priv_1 = 0; -+ } -+ } -+} -+ -+module_init (init_sbc8240_mtd); -+module_exit (cleanup_sbc8240_mtd); -+ -+MODULE_LICENSE ("GPL"); -+MODULE_AUTHOR ("Carolyn Smith <carolyn.smith@tektronix.com>"); -+MODULE_DESCRIPTION ("MTD map driver for SBC8240 boards"); -+ -Index: linux-2.6.5/drivers/mtd/maps/sbc_gxx.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/sbc_gxx.c 2004-04-03 22:37:23.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/sbc_gxx.c 2005-02-01 17:11:17.000000000 -0500 -@@ -17,7 +17,7 @@ - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - -- $Id: sbc_gxx.c,v 1.26 2003/05/26 08:50:36 dwmw2 Exp $ -+ $Id: sbc_gxx.c,v 1.30 2004/09/16 23:27:14 gleixner Exp $ - - The SBC-MediaGX / SBC-GXx has up to 16 MiB of - Intel StrataFlash (28F320/28F640) in x8 mode. -@@ -84,7 +84,7 @@ - // Globals - - static volatile int page_in_window = -1; // Current page in window. --static unsigned long iomapadr; -+static void __iomem *iomapadr; - static spinlock_t sbc_gxx_spin = SPIN_LOCK_UNLOCKED; - - /* partition_info gives details on the logical partitions that the split the -@@ -114,32 +114,12 @@ - } - - --static __u8 sbc_gxx_read8(struct map_info *map, unsigned long ofs) -+static map_word sbc_gxx_read8(struct map_info *map, unsigned long ofs) - { -- __u8 ret; -+ map_word ret; - spin_lock(&sbc_gxx_spin); - sbc_gxx_page(map, ofs); -- ret = readb(iomapadr + (ofs & WINDOW_MASK)); -- spin_unlock(&sbc_gxx_spin); -- return ret; --} -- --static __u16 sbc_gxx_read16(struct map_info *map, unsigned long ofs) --{ -- __u16 ret; -- spin_lock(&sbc_gxx_spin); -- sbc_gxx_page(map, ofs); -- ret = readw(iomapadr + (ofs & WINDOW_MASK)); -- spin_unlock(&sbc_gxx_spin); -- return ret; --} -- --static __u32 sbc_gxx_read32(struct map_info *map, unsigned long ofs) --{ -- __u32 ret; -- spin_lock(&sbc_gxx_spin); -- sbc_gxx_page(map, ofs); -- ret = readl(iomapadr + (ofs & WINDOW_MASK)); -+ ret.x[0] = readb(iomapadr + (ofs & WINDOW_MASK)); - spin_unlock(&sbc_gxx_spin); - return ret; - } -@@ -161,27 +141,11 @@ - } - } - --static void sbc_gxx_write8(struct map_info *map, __u8 d, unsigned long adr) --{ -- spin_lock(&sbc_gxx_spin); -- sbc_gxx_page(map, adr); -- writeb(d, iomapadr + (adr & WINDOW_MASK)); -- spin_unlock(&sbc_gxx_spin); --} -- --static void sbc_gxx_write16(struct map_info *map, __u16 d, unsigned long adr) --{ -- spin_lock(&sbc_gxx_spin); -- sbc_gxx_page(map, adr); -- writew(d, iomapadr + (adr & WINDOW_MASK)); -- spin_unlock(&sbc_gxx_spin); --} -- --static void sbc_gxx_write32(struct map_info *map, __u32 d, unsigned long adr) -+static void sbc_gxx_write8(struct map_info *map, map_word d, unsigned long adr) - { - spin_lock(&sbc_gxx_spin); - sbc_gxx_page(map, adr); -- writel(d, iomapadr + (adr & WINDOW_MASK)); -+ writeb(d.x[0], iomapadr + (adr & WINDOW_MASK)); - spin_unlock(&sbc_gxx_spin); - } - -@@ -208,14 +172,10 @@ - .size = MAX_SIZE_KiB*1024, /* this must be set to a maximum possible amount - of flash so the cfi probe routines find all - the chips */ -- .buswidth = 1, -- .read8 = sbc_gxx_read8, -- .read16 = sbc_gxx_read16, -- .read32 = sbc_gxx_read32, -+ .bankwidth = 1, -+ .read = sbc_gxx_read8, - .copy_from = sbc_gxx_copy_from, -- .write8 = sbc_gxx_write8, -- .write16 = sbc_gxx_write16, -- .write32 = sbc_gxx_write32, -+ .write = sbc_gxx_write8, - .copy_to = sbc_gxx_copy_to - }; - -@@ -235,7 +195,7 @@ - - int __init init_sbc_gxx(void) - { -- iomapadr = (unsigned long)ioremap(WINDOW_START, WINDOW_LENGTH); -+ iomapadr = (void __iomem *)ioremap(WINDOW_START, WINDOW_LENGTH); - if (!iomapadr) { - printk( KERN_ERR"%s: failed to ioremap memory region\n", - sbc_gxx_map.name ); -Index: linux-2.6.5/drivers/mtd/maps/sc520cdp.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/sc520cdp.c 2004-04-03 22:38:17.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/sc520cdp.c 2005-02-01 17:11:17.000000000 -0500 -@@ -16,7 +16,7 @@ - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * -- * $Id: sc520cdp.c,v 1.15 2003/05/21 12:45:20 dwmw2 Exp $ -+ * $Id: sc520cdp.c,v 1.17 2004/09/16 23:27:14 gleixner Exp $ - * - * - * The SC520CDP is an evaluation board for the Elan SC520 processor available -@@ -90,19 +90,19 @@ - { - .name = "SC520CDP Flash Bank #0", - .size = WINDOW_SIZE_0, -- .buswidth = 4, -+ .bankwidth = 4, - .phys = WINDOW_ADDR_0 - }, - { - .name = "SC520CDP Flash Bank #1", - .size = WINDOW_SIZE_1, -- .buswidth = 4, -+ .bankwidth = 4, - .phys = WINDOW_ADDR_1 - }, - { - .name = "SC520CDP DIL Flash", - .size = WINDOW_SIZE_2, -- .buswidth = 1, -+ .bankwidth = 1, - .phys = WINDOW_ADDR_2 - }, - }; -@@ -241,7 +241,7 @@ - printk(KERN_NOTICE "SC520 CDP flash device: 0x%lx at 0x%lx\n", - sc520cdp_map[i].size, sc520cdp_map[i].phys); - -- sc520cdp_map[i].virt = (unsigned long)ioremap_nocache(sc520cdp_map[i].phys, sc520cdp_map[i].size); -+ sc520cdp_map[i].virt = (void __iomem *)ioremap_nocache(sc520cdp_map[i].phys, sc520cdp_map[i].size); - - if (!sc520cdp_map[i].virt) { - printk("Failed to ioremap_nocache\n"); -Index: linux-2.6.5/drivers/mtd/maps/scb2_flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/scb2_flash.c 2004-04-03 22:37:23.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/scb2_flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,6 +1,6 @@ - /* - * MTD map driver for BIOS Flash on Intel SCB2 boards -- * $Id: scb2_flash.c,v 1.6 2003/05/21 12:45:20 dwmw2 Exp $ -+ * $Id: scb2_flash.c,v 1.9 2004/09/16 23:27:14 gleixner Exp $ - * Copyright (C) 2002 Sun Microsystems, Inc. - * Tim Hockin <thockin@sun.com> - * -@@ -67,7 +67,7 @@ - struct map_info scb2_map = { - .name = "SCB2 BIOS Flash", - .size = 0, -- .buswidth = 1, -+ .bankwidth = 1, - }; - static int region_fail; - -@@ -163,7 +163,7 @@ - } - - scb2_map.phys = SCB2_ADDR; -- scb2_map.virt = (unsigned long)scb2_ioaddr; -+ scb2_map.virt = (void __iomem *)scb2_ioaddr; - scb2_map.size = SCB2_WINDOW; - - simple_map_init(&scb2_map); -Index: linux-2.6.5/drivers/mtd/maps/scx200_docflash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/scx200_docflash.c 2004-04-03 22:37:37.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/scx200_docflash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -2,7 +2,7 @@ - - Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com> - -- $Id: scx200_docflash.c,v 1.5 2003/05/21 12:45:20 dwmw2 Exp $ -+ $Id: scx200_docflash.c,v 1.7 2004/09/16 23:27:14 gleixner Exp $ - - National Semiconductor SCx200 flash mapped with DOCCS - */ -@@ -173,14 +173,14 @@ - - scx200_docflash_map.size = size; - if (width == 8) -- scx200_docflash_map.buswidth = 1; -+ scx200_docflash_map.bankwidth = 1; - else -- scx200_docflash_map.buswidth = 2; -+ scx200_docflash_map.bankwidth = 2; - - simple_map_init(&scx200_docflash_map); - - scx200_docflash_map.phys = docmem.start; -- scx200_docflash_map.virt = (unsigned long)ioremap(docmem.start, scx200_docflash_map.size); -+ scx200_docflash_map.virt = (void __iomem *)ioremap(docmem.start, scx200_docflash_map.size); - if (!scx200_docflash_map.virt) { - printk(KERN_ERR NAME ": failed to ioremap the flash\n"); - release_resource(&docmem); -Index: linux-2.6.5/drivers/mtd/maps/solutionengine.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/solutionengine.c 2004-04-03 22:38:18.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/solutionengine.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: solutionengine.c,v 1.10 2003/05/21 12:45:20 dwmw2 Exp $ -+ * $Id: solutionengine.c,v 1.14 2004/09/16 23:27:14 gleixner Exp $ - * - * Flash and EPROM on Hitachi Solution Engine and similar boards. - * -@@ -17,7 +17,7 @@ - #include <linux/mtd/map.h> - #include <linux/mtd/partitions.h> - #include <linux/config.h> -- -+#include <linux/errno.h> - - static struct mtd_info *flash_mtd; - static struct mtd_info *eprom_mtd; -@@ -27,13 +27,13 @@ - struct map_info soleng_eprom_map = { - .name = "Solution Engine EPROM", - .size = 0x400000, -- .buswidth = 4, -+ .bankwidth = 4, - }; - - struct map_info soleng_flash_map = { - .name = "Solution Engine FLASH", - .size = 0x400000, -- .buswidth = 4, -+ .bankwidth = 4, - }; - - static const char *probes[] = { "RedBoot", "cmdlinepart", NULL }; -@@ -62,9 +62,9 @@ - - /* First probe at offset 0 */ - soleng_flash_map.phys = 0; -- soleng_flash_map.virt = P2SEGADDR(0); -+ soleng_flash_map.virt = (void __iomem *)P2SEGADDR(0); - soleng_eprom_map.phys = 0x01000000; -- soleng_eprom_map.virt = P1SEGADDR(0x01000000); -+ soleng_eprom_map.virt = (void __iomem *)P1SEGADDR(0x01000000); - simple_map_init(&soleng_eprom_map); - simple_map_init(&soleng_flash_map); - -Index: linux-2.6.5/drivers/mtd/maps/sun_uflash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/sun_uflash.c 2004-04-03 22:37:36.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/sun_uflash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,4 +1,4 @@ --/* $Id: sun_uflash.c,v 1.7 2003/05/20 20:59:32 dwmw2 Exp $ -+/* $Id: sun_uflash.c,v 1.10 2004/09/16 23:27:14 gleixner Exp $ - * - * sun_uflash - Driver implementation for user-programmable flash - * present on many Sun Microsystems SME boardsets. -@@ -51,7 +51,7 @@ - struct map_info uflash_map_templ = { - .name = "SUNW,???-????", - .size = UFLASH_WINDOW_SIZE, -- .buswidth = UFLASH_BUSWIDTH, -+ .bankwidth = UFLASH_BUSWIDTH, - }; - - int uflash_devinit(struct linux_ebus_device* edev) -@@ -97,7 +97,7 @@ - } - pdev->map.phys = edev->resource[0].start; - pdev->map.virt = -- (unsigned long)ioremap_nocache(edev->resource[0].start, pdev->map.size); -+ (void __iomem *)ioremap_nocache(edev->resource[0].start, pdev->map.size); - if(0 == pdev->map.virt) { - printk("%s: failed to map device\n", __FUNCTION__); - kfree(pdev->name); -Index: linux-2.6.5/drivers/mtd/maps/tqm8xxl.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/tqm8xxl.c 2004-04-03 22:36:26.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/tqm8xxl.c 2005-02-01 17:11:17.000000000 -0500 -@@ -2,7 +2,7 @@ - * Handle mapping of the flash memory access routines - * on TQM8xxL based devices. - * -- * $Id: tqm8xxl.c,v 1.9 2003/06/23 11:48:18 dwmw2 Exp $ -+ * $Id: tqm8xxl.c,v 1.12 2004/09/16 23:27:14 gleixner Exp $ - * - * based on rpxlite.c - * -@@ -105,7 +105,7 @@ - .name = "jffs", - .offset = 0x00200000, - .size = 0x00200000, -- .//size = MTDPART_SIZ_FULL, -+ //.size = MTDPART_SIZ_FULL, - } - }; - #endif -@@ -151,11 +151,11 @@ - sprintf(map_banks[idx]->name, "TQM8xxL%d", idx); - - map_banks[idx]->size = flash_size; -- map_banks[idx]->buswidth = 4; -+ map_banks[idx]->bankwidth = 4; - - simple_map_init(map_banks[idx]); - -- map_banks[idx]->virt = start_scan_addr; -+ map_banks[idx]->virt = (void __iomem *)start_scan_addr; - map_banks[idx]->phys = flash_addr; - /* FIXME: This looks utterly bogus, but I'm trying to - preserve the behaviour of the original (shown here)... -Index: linux-2.6.5/drivers/mtd/maps/tsunami_flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/tsunami_flash.c 2004-04-03 22:38:16.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/tsunami_flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -2,7 +2,7 @@ - * tsunami_flash.c - * - * flash chip on alpha ds10... -- * $Id: tsunami_flash.c,v 1.6 2003/05/21 15:15:08 dwmw2 Exp $ -+ * $Id: tsunami_flash.c,v 1.9 2004/07/14 09:52:55 dwmw2 Exp $ - */ - #include <asm/io.h> - #include <asm/core_tsunami.h> -@@ -15,14 +15,16 @@ - #define FLASH_DISABLE_BYTE 0x00 - - #define MAX_TIG_FLASH_SIZE (12*1024*1024) --static inline __u8 tsunami_flash_read8(struct map_info *map, unsigned long offset) -+static inline map_word tsunami_flash_read8(struct map_info *map, unsigned long offset) - { -- return tsunami_tig_readb(offset); -+ map_word val; -+ val.x[0] = tsunami_tig_readb(offset); -+ return val; - } - --static void tsunami_flash_write8(struct map_info *map, __u8 value, unsigned long offset) -+static void tsunami_flash_write8(struct map_info *map, map_word value, unsigned long offset) - { -- tsunami_tig_writeb(value, offset); -+ tsunami_tig_writeb(value.x[0], offset); - } - - static void tsunami_flash_copy_from( -@@ -61,10 +63,10 @@ - .name = "flash chip on the Tsunami TIG bus", - .size = MAX_TIG_FLASH_SIZE, - .phys = NO_XIP; -- .buswidth = 1, -- .read8 = tsunami_flash_read8, -+ .bankwidth = 1, -+ .read = tsunami_flash_read8, - .copy_from = tsunami_flash_copy_from, -- .write8 = tsunami_flash_write8, -+ .write = tsunami_flash_write8, - .copy_to = tsunami_flash_copy_to, - }; - -@@ -84,7 +86,7 @@ - - static int __init init_tsunami_flash(void) - { -- static const char *rom_probe_types[] = { "cfi_probe", "jedec_probe", "map_rom", 0 }; -+ static const char *rom_probe_types[] = { "cfi_probe", "jedec_probe", "map_rom", NULL }; - char **type; - - tsunami_tig_writeb(FLASH_ENABLE_BYTE, FLASH_ENABLE_PORT); -Index: linux-2.6.5/drivers/mtd/maps/uclinux.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/uclinux.c 2004-04-03 22:38:16.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/uclinux.c 2005-02-01 17:11:17.000000000 -0500 -@@ -5,7 +5,7 @@ - * - * (C) Copyright 2002, Greg Ungerer (gerg@snapgear.com) - * -- * $Id: uclinux.c,v 1.5 2003/05/20 20:59:32 dwmw2 Exp $ -+ * $Id: uclinux.c,v 1.8 2004/09/16 23:27:14 gleixner Exp $ - */ - - /****************************************************************************/ -@@ -17,6 +17,7 @@ - #include <linux/kernel.h> - #include <linux/fs.h> - #include <linux/major.h> -+#include <linux/root_dev.h> - #include <linux/mtd/mtd.h> - #include <linux/mtd/map.h> - #include <linux/mtd/partitions.h> -@@ -63,12 +64,12 @@ - mapp = &uclinux_ram_map; - mapp->phys = (unsigned long) &_ebss; - mapp->size = PAGE_ALIGN(*((unsigned long *)((&_ebss) + 8))); -- mapp->buswidth = 4; -+ mapp->bankwidth = 4; - - printk("uclinux[mtd]: RAM probe address=0x%x size=0x%x\n", - (int) mapp->map_priv_2, (int) mapp->size); - -- mapp->virt = (unsigned long) -+ mapp->virt = (void __iomem *) - ioremap_nocache(mapp->phys, mapp->size); - - if (mapp->virt == 0) { -Index: linux-2.6.5/drivers/mtd/maps/vmax301.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/vmax301.c 2004-04-03 22:36:54.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/vmax301.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,4 +1,4 @@ --// $Id: vmax301.c,v 1.28 2003/05/21 15:15:08 dwmw2 Exp $ -+// $Id: vmax301.c,v 1.30 2004/07/12 22:38:29 dwmw2 Exp $ - /* ###################################################################### - - Tempustech VMAX SBC301 MTD Driver. -@@ -54,32 +54,12 @@ - __vmax301_page(map, page); - } - --static __u8 vmax301_read8(struct map_info *map, unsigned long ofs) -+static map_word vmax301_read8(struct map_info *map, unsigned long ofs) - { -- __u8 ret; -+ map_word ret; - spin_lock(&vmax301_spin); - vmax301_page(map, ofs); -- ret = readb(map->map_priv_2 + (ofs & WINDOW_MASK)); -- spin_unlock(&vmax301_spin); -- return ret; --} -- --static __u16 vmax301_read16(struct map_info *map, unsigned long ofs) --{ -- __u16 ret; -- spin_lock(&vmax301_spin); -- vmax301_page(map, ofs); -- ret = readw(map->map_priv_2 + (ofs & WINDOW_MASK)); -- spin_unlock(&vmax301_spin); -- return ret; --} -- --static __u32 vmax301_read32(struct map_info *map, unsigned long ofs) --{ -- __u32 ret; -- spin_lock(&vmax301_spin); -- vmax301_page(map, ofs); -- ret = readl(map->map_priv_2 + (ofs & WINDOW_MASK)); -+ ret.x[0] = readb(map->map_priv_2 + (ofs & WINDOW_MASK)); - spin_unlock(&vmax301_spin); - return ret; - } -@@ -100,27 +80,11 @@ - } - } - --static void vmax301_write8(struct map_info *map, __u8 d, unsigned long adr) --{ -- spin_lock(&vmax301_spin); -- vmax301_page(map, adr); -- writeb(d, map->map_priv_2 + (adr & WINDOW_MASK)); -- spin_unlock(&vmax301_spin); --} -- --static void vmax301_write16(struct map_info *map, __u16 d, unsigned long adr) --{ -- spin_lock(&vmax301_spin); -- vmax301_page(map, adr); -- writew(d, map->map_priv_2 + (adr & WINDOW_MASK)); -- spin_unlock(&vmax301_spin); --} -- --static void vmax301_write32(struct map_info *map, __u32 d, unsigned long adr) -+static void vmax301_write8(struct map_info *map, map_word d, unsigned long adr) - { - spin_lock(&vmax301_spin); - vmax301_page(map, adr); -- writel(d, map->map_priv_2 + (adr & WINDOW_MASK)); -+ writeb(d.x[0], map->map_priv_2 + (adr & WINDOW_MASK)); - spin_unlock(&vmax301_spin); - } - -@@ -146,14 +110,10 @@ - .name = "VMAX301 Internal Flash", - .phys = NO_XIP, - .size = 3*2*1024*1024, -- .buswidth = 1, -- .read8 = vmax301_read8, -- .read16 = vmax301_read16, -- .read32 = vmax301_read32, -+ .bankwidth = 1, -+ .read = vmax301_read8, - .copy_from = vmax301_copy_from, -- .write8 = vmax301_write8, -- .write16 = vmax301_write16, -- .write32 = vmax301_write32, -+ .write = vmax301_write8, - .copy_to = vmax301_copy_to, - .map_priv_1 = WINDOW_START + WINDOW_LENGTH, - .map_priv_2 = 0xFFFFFFFF -@@ -162,14 +122,10 @@ - .name = "VMAX301 Socket", - .phys = NO_XIP, - .size = 0, -- .buswidth = 1, -- .read8 = vmax301_read8, -- .read16 = vmax301_read16, -- .read32 = vmax301_read32, -+ .bankwidth = 1, -+ .read = vmax301_read8, - .copy_from = vmax301_copy_from, -- .write8 = vmax301_write8, -- .write16 = vmax301_write16, -- .write32 = vmax301_write32, -+ .write = vmax301_write8, - .copy_to = vmax301_copy_to, - .map_priv_1 = WINDOW_START + (3*WINDOW_LENGTH), - .map_priv_2 = 0xFFFFFFFF -Index: linux-2.6.5/drivers/mtd/maps/wr_sbc82xx_flash.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/maps/wr_sbc82xx_flash.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/maps/wr_sbc82xx_flash.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,181 @@ -+/* -+ * $Id: wr_sbc82xx_flash.c,v 1.6 2004/09/16 23:27:14 gleixner Exp $ -+ * -+ * Map for flash chips on Wind River PowerQUICC II SBC82xx board. -+ * -+ * Copyright (C) 2004 Red Hat, Inc. -+ * -+ * Author: David Woodhouse <dwmw2@infradead.org> -+ * -+ */ -+ -+#include <linux/module.h> -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <linux/init.h> -+#include <linux/slab.h> -+#include <asm/io.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/config.h> -+#include <linux/mtd/partitions.h> -+ -+#include <asm/immap_cpm2.h> -+ -+static struct mtd_info *sbcmtd[3]; -+static struct mtd_partition *sbcmtd_parts[3]; -+ -+struct map_info sbc82xx_flash_map[3] = { -+ {.name = "Boot flash"}, -+ {.name = "Alternate boot flash"}, -+ {.name = "User flash"} -+}; -+ -+static struct mtd_partition smallflash_parts[] = { -+ { -+ .name = "space", -+ .size = 0x100000, -+ .offset = 0, -+ }, { -+ .name = "bootloader", -+ .size = MTDPART_SIZ_FULL, -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+ -+static struct mtd_partition bigflash_parts[] = { -+ { -+ .name = "bootloader", -+ .size = 0x00100000, -+ .offset = 0, -+ }, { -+ .name = "file system", -+ .size = 0x01f00000, -+ .offset = MTDPART_OFS_APPEND, -+ }, { -+ .name = "boot config", -+ .size = 0x00100000, -+ .offset = MTDPART_OFS_APPEND, -+ }, { -+ .name = "space", -+ .size = 0x01f00000, -+ .offset = MTDPART_OFS_APPEND, -+ } -+}; -+ -+static const char *part_probes[] __initdata = {"cmdlinepart", "RedBoot", NULL}; -+ -+#define init_sbc82xx_one_flash(map, br, or) \ -+do { \ -+ (map).phys = (br & 1) ? (br & 0xffff8000) : 0; \ -+ (map).size = (br & 1) ? (~(or & 0xffff8000) + 1) : 0; \ -+ switch (br & 0x00001800) { \ -+ case 0x00000000: \ -+ case 0x00000800: (map).bankwidth = 1; break; \ -+ case 0x00001000: (map).bankwidth = 2; break; \ -+ case 0x00001800: (map).bankwidth = 4; break; \ -+ } \ -+} while (0); -+ -+int __init init_sbc82xx_flash(void) -+{ -+ volatile memctl_cpm2_t *mc = &cpm2_immr->im_memctl; -+ int bigflash; -+ int i; -+ -+#ifdef CONFIG_SBC8560 -+ mc = ioremap(0xff700000 + 0x5000, sizeof(memctl_cpm2_t)); -+#else -+ mc = &cpm2_immr->im_memctl; -+#endif -+ -+ bigflash = 1; -+ if ((mc->memc_br0 & 0x00001800) == 0x00001800) -+ bigflash = 0; -+ -+ init_sbc82xx_one_flash(sbc82xx_flash_map[0], mc->memc_br0, mc->memc_or0); -+ init_sbc82xx_one_flash(sbc82xx_flash_map[1], mc->memc_br6, mc->memc_or6); -+ init_sbc82xx_one_flash(sbc82xx_flash_map[2], mc->memc_br1, mc->memc_or1); -+ -+#ifdef CONFIG_SBC8560 -+ iounmap((void *) mc); -+#endif -+ -+ for (i=0; i<3; i++) { -+ int8_t flashcs[3] = { 0, 6, 1 }; -+ int nr_parts; -+ -+ printk(KERN_NOTICE "PowerQUICC II %s (%ld MiB on CS%d", -+ sbc82xx_flash_map[i].name, -+ (sbc82xx_flash_map[i].size >> 20), -+ flashcs[i]); -+ if (!sbc82xx_flash_map[i].phys) { -+ /* We know it can't be at zero. */ -+ printk("): disabled by bootloader.\n"); -+ continue; -+ } -+ printk(" at %08lx)\n", sbc82xx_flash_map[i].phys); -+ -+ sbc82xx_flash_map[i].virt = (void __iomem *)ioremap(sbc82xx_flash_map[i].phys, sbc82xx_flash_map[i].size); -+ -+ if (!sbc82xx_flash_map[i].virt) { -+ printk("Failed to ioremap\n"); -+ continue; -+ } -+ -+ simple_map_init(&sbc82xx_flash_map[i]); -+ -+ sbcmtd[i] = do_map_probe("cfi_probe", &sbc82xx_flash_map[i]); -+ -+ if (!sbcmtd[i]) -+ continue; -+ -+ sbcmtd[i]->owner = THIS_MODULE; -+ -+ nr_parts = parse_mtd_partitions(sbcmtd[i], part_probes, -+ &sbcmtd_parts[i], 0); -+ if (nr_parts > 0) { -+ add_mtd_partitions (sbcmtd[i], sbcmtd_parts[i], nr_parts); -+ continue; -+ } -+ -+ /* No partitioning detected. Use default */ -+ if (i == 2) { -+ add_mtd_device(sbcmtd[i]); -+ } else if (i == bigflash) { -+ add_mtd_partitions (sbcmtd[i], bigflash_parts, ARRAY_SIZE(bigflash_parts)); -+ } else { -+ add_mtd_partitions (sbcmtd[i], smallflash_parts, ARRAY_SIZE(smallflash_parts)); -+ } -+ } -+ return 0; -+} -+ -+static void __exit cleanup_sbc82xx_flash(void) -+{ -+ int i; -+ -+ for (i=0; i<3; i++) { -+ if (!sbcmtd[i]) -+ continue; -+ -+ if (i<2 || sbcmtd_parts[i]) -+ del_mtd_partitions(sbcmtd[i]); -+ else -+ del_mtd_device(sbcmtd[i]); -+ -+ kfree(sbcmtd_parts[i]); -+ map_destroy(sbcmtd[i]); -+ -+ iounmap((void *)sbc82xx_flash_map[i].virt); -+ sbc82xx_flash_map[i].virt = 0; -+ } -+} -+ -+module_init(init_sbc82xx_flash); -+module_exit(cleanup_sbc82xx_flash); -+ -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>"); -+MODULE_DESCRIPTION("Flash map driver for WindRiver PowerQUICC II"); -Index: linux-2.6.5/drivers/mtd/mtd_blkdevs-24.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/mtd_blkdevs-24.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/mtd_blkdevs-24.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,692 @@ -+/* -+ * $Id: mtd_blkdevs-24.c,v 1.16 2004/08/11 15:29:24 dmarlin Exp $ -+ * -+ * (C) 2003 David Woodhouse <dwmw2@infradead.org> -+ * -+ * Interface to Linux 2.4 block layer for MTD 'translation layers'. -+ * -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/slab.h> -+#include <linux/module.h> -+#include <linux/list.h> -+#include <linux/fs.h> -+#include <linux/mtd/blktrans.h> -+#include <linux/mtd/mtd.h> -+#include <linux/blkdev.h> -+#include <linux/blk.h> -+#include <linux/blkpg.h> -+#include <linux/spinlock.h> -+#include <linux/hdreg.h> -+#include <linux/init.h> -+#include <asm/semaphore.h> -+#include <asm/uaccess.h> -+ -+static LIST_HEAD(blktrans_majors); -+ -+extern struct semaphore mtd_table_mutex; -+extern struct mtd_info *mtd_table[]; -+ -+struct mtd_blkcore_priv { -+ devfs_handle_t devfs_dir_handle; -+ int blksizes[256]; -+ int sizes[256]; -+ struct hd_struct part_table[256]; -+ struct gendisk gd; -+ spinlock_t devs_lock; /* See comment in _request function */ -+ struct completion thread_dead; -+ int exiting; -+ wait_queue_head_t thread_wq; -+}; -+ -+static inline struct mtd_blktrans_dev *tr_get_dev(struct mtd_blktrans_ops *tr, -+ int devnum) -+{ -+ struct list_head *this; -+ struct mtd_blktrans_dev *d; -+ -+ list_for_each(this, &tr->devs) { -+ d = list_entry(this, struct mtd_blktrans_dev, list); -+ -+ if (d->devnum == devnum) -+ return d; -+ } -+ return NULL; -+} -+ -+static inline struct mtd_blktrans_ops *get_tr(int major) -+{ -+ struct list_head *this; -+ struct mtd_blktrans_ops *t; -+ -+ list_for_each(this, &blktrans_majors) { -+ t = list_entry(this, struct mtd_blktrans_ops, list); -+ -+ if (t->major == major) -+ return t; -+ } -+ return NULL; -+} -+ -+static int do_blktrans_request(struct mtd_blktrans_ops *tr, -+ struct mtd_blktrans_dev *dev, -+ struct request *req) -+{ -+ unsigned long block, nsect; -+ char *buf; -+ int minor; -+ -+ minor = MINOR(req->rq_dev); -+ block = req->sector; -+ nsect = req->current_nr_sectors; -+ buf = req->buffer; -+ -+ if (block + nsect > tr->blkcore_priv->part_table[minor].nr_sects) { -+ printk(KERN_WARNING "Access beyond end of device.\n"); -+ return 0; -+ } -+ block += tr->blkcore_priv->part_table[minor].start_sect; -+ -+ switch(req->cmd) { -+ case READ: -+ for (; nsect > 0; nsect--, block++, buf += 512) -+ if (tr->readsect(dev, block, buf)) -+ return 0; -+ return 1; -+ -+ case WRITE: -+ if (!tr->writesect) -+ return 0; -+ -+ for (; nsect > 0; nsect--, block++, buf += 512) -+ if (tr->writesect(dev, block, buf)) -+ return 0; -+ return 1; -+ -+ default: -+ printk(KERN_NOTICE "Unknown request cmd %d\n", req->cmd); -+ return 0; -+ } -+} -+ -+static int mtd_blktrans_thread(void *arg) -+{ -+ struct mtd_blktrans_ops *tr = arg; -+ struct request_queue *rq = BLK_DEFAULT_QUEUE(tr->major); -+ -+ /* we might get involved when memory gets low, so use PF_MEMALLOC */ -+ current->flags |= PF_MEMALLOC; -+ -+ snprintf(current->comm, sizeof(current->comm), "%sd", tr->name); -+ -+ /* daemonize() doesn't do this for us since some kernel threads -+ actually want to deal with signals. We can't just call -+ exit_sighand() since that'll cause an oops when we finally -+ do exit. */ -+ spin_lock_irq(¤t->sigmask_lock); -+ sigfillset(¤t->blocked); -+ recalc_sigpending(); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ daemonize("%sd", tr->name); -+ -+ while (!tr->blkcore_priv->exiting) { -+ struct request *req; -+ struct mtd_blktrans_dev *dev; -+ int devnum; -+ int res = 0; -+ DECLARE_WAITQUEUE(wait, current); -+ -+ spin_lock_irq(&io_request_lock); -+ -+ if (list_empty(&rq->queue_head)) { -+ -+ add_wait_queue(&tr->blkcore_priv->thread_wq, &wait); -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ spin_unlock_irq(&io_request_lock); -+ -+ schedule(); -+ remove_wait_queue(&tr->blkcore_priv->thread_wq, &wait); -+ -+ continue; -+ } -+ -+ req = blkdev_entry_next_request(&rq->queue_head); -+ -+ devnum = MINOR(req->rq_dev) >> tr->part_bits; -+ -+ /* The ll_rw_blk code knows not to touch the request -+ at the head of the queue */ -+ spin_unlock_irq(&io_request_lock); -+ -+ /* FIXME: Where can we store the dev, on which -+ we already have a refcount anyway? We need to -+ lock against concurrent addition/removal of devices, -+ but if we use the mtd_table_mutex we deadlock when -+ grok_partitions is called from the registration -+ callbacks. */ -+ spin_lock(&tr->blkcore_priv->devs_lock); -+ dev = tr_get_dev(tr, devnum); -+ spin_unlock(&tr->blkcore_priv->devs_lock); -+ -+ BUG_ON(!dev); -+ -+ /* Ensure serialisation of requests */ -+ down(&dev->sem); -+ -+ res = do_blktrans_request(tr, dev, req); -+ up(&dev->sem); -+ -+ if (!end_that_request_first(req, res, tr->name)) { -+ spin_lock_irq(&io_request_lock); -+ blkdev_dequeue_request(req); -+ end_that_request_last(req); -+ spin_unlock_irq(&io_request_lock); -+ } -+ } -+ complete_and_exit(&tr->blkcore_priv->thread_dead, 0); -+} -+ -+static void mtd_blktrans_request(struct request_queue *rq) -+{ -+ struct mtd_blktrans_ops *tr = rq->queuedata; -+ wake_up(&tr->blkcore_priv->thread_wq); -+} -+ -+int blktrans_open(struct inode *i, struct file *f) -+{ -+ struct mtd_blktrans_ops *tr = NULL; -+ struct mtd_blktrans_dev *dev = NULL; -+ int major_nr = MAJOR(i->i_rdev); -+ int minor_nr = MINOR(i->i_rdev); -+ int devnum; -+ int ret = -ENODEV; -+ -+ if (is_read_only(i->i_rdev) && (f->f_mode & FMODE_WRITE)) -+ return -EROFS; -+ -+ down(&mtd_table_mutex); -+ -+ tr = get_tr(major_nr); -+ -+ if (!tr) -+ goto out; -+ -+ devnum = minor_nr >> tr->part_bits; -+ -+ dev = tr_get_dev(tr, devnum); -+ -+ if (!dev) -+ goto out; -+ -+ if (!tr->blkcore_priv->part_table[minor_nr].nr_sects) { -+ ret = -ENODEV; -+ goto out; -+ } -+ -+ if (!try_inc_mod_count(dev->mtd->owner)) -+ goto out; -+ -+ if (!try_inc_mod_count(tr->owner)) -+ goto out_tr; -+ -+ dev->mtd->usecount++; -+ -+ ret = 0; -+ if (tr->open && (ret = tr->open(dev))) { -+ dev->mtd->usecount--; -+ if (dev->mtd->owner) -+ __MOD_DEC_USE_COUNT(dev->mtd->owner); -+ out_tr: -+ if (tr->owner) -+ __MOD_DEC_USE_COUNT(tr->owner); -+ } -+ out: -+ up(&mtd_table_mutex); -+ -+ return ret; -+} -+ -+int blktrans_release(struct inode *i, struct file *f) -+{ -+ struct mtd_blktrans_dev *dev; -+ struct mtd_blktrans_ops *tr; -+ int ret = 0; -+ int devnum; -+ -+ down(&mtd_table_mutex); -+ -+ tr = get_tr(MAJOR(i->i_rdev)); -+ if (!tr) { -+ up(&mtd_table_mutex); -+ return -ENODEV; -+ } -+ -+ devnum = MINOR(i->i_rdev) >> tr->part_bits; -+ dev = tr_get_dev(tr, devnum); -+ -+ if (!dev) { -+ up(&mtd_table_mutex); -+ return -ENODEV; -+ } -+ -+ if (tr->release) -+ ret = tr->release(dev); -+ -+ if (!ret) { -+ dev->mtd->usecount--; -+ if (dev->mtd->owner) -+ __MOD_DEC_USE_COUNT(dev->mtd->owner); -+ if (tr->owner) -+ __MOD_DEC_USE_COUNT(tr->owner); -+ } -+ -+ up(&mtd_table_mutex); -+ -+ return ret; -+} -+ -+static int mtd_blktrans_rrpart(kdev_t rdev, struct mtd_blktrans_ops *tr, -+ struct mtd_blktrans_dev *dev) -+{ -+ struct gendisk *gd = &(tr->blkcore_priv->gd); -+ int i; -+ int minor = MINOR(rdev); -+ -+ if (minor & ((1<<tr->part_bits)-1) || !tr->part_bits) { -+ /* BLKRRPART on a partition. Go away. */ -+ return -ENOTTY; -+ } -+ -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EACCES; -+ -+ /* We are required to prevent simultaneous open() ourselves. -+ The core doesn't do that for us. Did I ever mention how -+ much the Linux block layer sucks? Sledgehammer approach... */ -+ down(&mtd_table_mutex); -+ -+ for (i=0; i < (1<<tr->part_bits); i++) { -+ invalidate_device(MKDEV(tr->major, minor+i), 1); -+ gd->part[minor + i].start_sect = 0; -+ gd->part[minor + i].nr_sects = 0; -+ } -+ -+ grok_partitions(gd, minor, 1 << tr->part_bits, -+ tr->blkcore_priv->sizes[minor]); -+ up(&mtd_table_mutex); -+ -+ return 0; -+} -+ -+static int blktrans_ioctl(struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct mtd_blktrans_dev *dev; -+ struct mtd_blktrans_ops *tr; -+ int devnum; -+ -+ switch(cmd) { -+ case BLKGETSIZE: -+ case BLKGETSIZE64: -+ case BLKBSZSET: -+ case BLKBSZGET: -+ case BLKROSET: -+ case BLKROGET: -+ case BLKRASET: -+ case BLKRAGET: -+ case BLKPG: -+ case BLKELVGET: -+ case BLKELVSET: -+ return blk_ioctl(inode->i_rdev, cmd, arg); -+ } -+ -+ down(&mtd_table_mutex); -+ -+ tr = get_tr(MAJOR(inode->i_rdev)); -+ if (!tr) { -+ up(&mtd_table_mutex); -+ return -ENODEV; -+ } -+ -+ devnum = MINOR(inode->i_rdev) >> tr->part_bits; -+ dev = tr_get_dev(tr, devnum); -+ -+ up(&mtd_table_mutex); -+ -+ if (!dev) -+ return -ENODEV; -+ -+ switch(cmd) { -+ case BLKRRPART: -+ return mtd_blktrans_rrpart(inode->i_rdev, tr, dev); -+ -+ case BLKFLSBUF: -+ blk_ioctl(inode->i_rdev, cmd, arg); -+ if (tr->flush) -+ return tr->flush(dev); -+ /* The core code did the work, we had nothing to do. */ -+ return 0; -+ -+ case HDIO_GETGEO: -+ if (tr->getgeo) { -+ struct hd_geometry g; -+ struct gendisk *gd = &(tr->blkcore_priv->gd); -+ int ret; -+ -+ memset(&g, 0, sizeof(g)); -+ ret = tr->getgeo(dev, &g); -+ if (ret) -+ return ret; -+ -+ g.start = gd->part[MINOR(inode->i_rdev)].start_sect; -+ if (copy_to_user((void *)arg, &g, sizeof(g))) -+ return -EFAULT; -+ return 0; -+ } /* else */ -+ default: -+ return -ENOTTY; -+ } -+} -+ -+struct block_device_operations mtd_blktrans_ops = { -+ .owner = THIS_MODULE, -+ .open = blktrans_open, -+ .release = blktrans_release, -+ .ioctl = blktrans_ioctl, -+}; -+ -+int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) -+{ -+ struct mtd_blktrans_ops *tr = new->tr; -+ struct list_head *this; -+ int last_devnum = -1; -+ int i; -+ -+ if (!down_trylock(&mtd_table_mutex)) { -+ up(&mtd_table_mutex); -+ BUG(); -+ } -+ -+ spin_lock(&tr->blkcore_priv->devs_lock); -+ -+ list_for_each(this, &tr->devs) { -+ struct mtd_blktrans_dev *d = list_entry(this, struct mtd_blktrans_dev, list); -+ if (new->devnum == -1) { -+ /* Use first free number */ -+ if (d->devnum != last_devnum+1) { -+ /* Found a free devnum. Plug it in here */ -+ new->devnum = last_devnum+1; -+ list_add_tail(&new->list, &d->list); -+ goto added; -+ } -+ } else if (d->devnum == new->devnum) { -+ /* Required number taken */ -+ spin_unlock(&tr->blkcore_priv->devs_lock); -+ return -EBUSY; -+ } else if (d->devnum > new->devnum) { -+ /* Required number was free */ -+ list_add_tail(&new->list, &d->list); -+ goto added; -+ } -+ last_devnum = d->devnum; -+ } -+ if (new->devnum == -1) -+ new->devnum = last_devnum+1; -+ -+ if ((new->devnum << tr->part_bits) > 256) { -+ spin_unlock(&tr->blkcore_priv->devs_lock); -+ return -EBUSY; -+ } -+ -+ init_MUTEX(&new->sem); -+ list_add_tail(&new->list, &tr->devs); -+ added: -+ spin_unlock(&tr->blkcore_priv->devs_lock); -+ -+ if (!tr->writesect) -+ new->readonly = 1; -+ -+ for (i = new->devnum << tr->part_bits; -+ i < (new->devnum+1) << tr->part_bits; -+ i++) { -+ set_device_ro(MKDEV(tr->major, i), new->readonly); -+ tr->blkcore_priv->blksizes[i] = new->blksize; -+ tr->blkcore_priv->sizes[i] = 0; -+ tr->blkcore_priv->part_table[i].nr_sects = 0; -+ tr->blkcore_priv->part_table[i].start_sect = 0; -+ } -+ -+ /* -+ <viro_zzz> dwmw2: BLOCK_SIZE_BITS has nothing to do with block devices -+ <viro> dwmw2: any code which sets blk_size[][] should be -+ size >> 10 /+ 2.4 and its dumb units */ -+ -+ tr->blkcore_priv->sizes[new->devnum << tr->part_bits] = -+ (new->size * new->blksize) >> 10; /* 2.4 and its dumb units */ -+ -+ /* But this is still in device's sectors? $DEITY knows */ -+ tr->blkcore_priv->part_table[new->devnum << tr->part_bits].nr_sects = new->size; -+ -+ if (tr->part_bits) { -+ grok_partitions(&tr->blkcore_priv->gd, new->devnum, -+ 1 << tr->part_bits, new->size); -+ } -+#ifdef CONFIG_DEVFS_FS -+ if (!tr->part_bits) { -+ char name[2]; -+ -+ name[0] = '0' + new->devnum; -+ name[1] = 0; -+ -+ new->blkcore_priv = -+ devfs_register(tr->blkcore_priv->devfs_dir_handle, -+ name, DEVFS_FL_DEFAULT, tr->major, -+ new->devnum, S_IFBLK|S_IRUGO|S_IWUGO, -+ &mtd_blktrans_ops, NULL); -+ } -+#endif -+ return 0; -+} -+ -+int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old) -+{ -+ struct mtd_blktrans_ops *tr = old->tr; -+ int i; -+ -+ if (!down_trylock(&mtd_table_mutex)) { -+ up(&mtd_table_mutex); -+ BUG(); -+ } -+ -+#ifdef CONFIG_DEVFS_FS -+ if (!tr->part_bits) { -+ devfs_unregister(old->blkcore_priv); -+ old->blkcore_priv = NULL; -+ } else { -+ devfs_register_partitions(&tr->blkcore_priv->gd, -+ old->devnum << tr->part_bits, 1); -+ } -+#endif -+ spin_lock(&tr->blkcore_priv->devs_lock); -+ list_del(&old->list); -+ spin_unlock(&tr->blkcore_priv->devs_lock); -+ -+ for (i = (old->devnum << tr->part_bits); -+ i < ((old->devnum+1) << tr->part_bits); i++) { -+ tr->blkcore_priv->sizes[i] = 0; -+ tr->blkcore_priv->part_table[i].nr_sects = 0; -+ tr->blkcore_priv->part_table[i].start_sect = 0; -+ } -+ -+ return 0; -+} -+ -+void blktrans_notify_remove(struct mtd_info *mtd) -+{ -+ struct list_head *this, *this2, *next; -+ -+ list_for_each(this, &blktrans_majors) { -+ struct mtd_blktrans_ops *tr = list_entry(this, struct mtd_blktrans_ops, list); -+ -+ list_for_each_safe(this2, next, &tr->devs) { -+ struct mtd_blktrans_dev *dev = list_entry(this2, struct mtd_blktrans_dev, list); -+ -+ if (dev->mtd == mtd) -+ tr->remove_dev(dev); -+ } -+ } -+} -+ -+void blktrans_notify_add(struct mtd_info *mtd) -+{ -+ struct list_head *this; -+ -+ if (mtd->type == MTD_ABSENT) -+ return; -+ -+ list_for_each(this, &blktrans_majors) { -+ struct mtd_blktrans_ops *tr = list_entry(this, struct mtd_blktrans_ops, list); -+ -+ tr->add_mtd(tr, mtd); -+ } -+ -+} -+ -+static struct mtd_notifier blktrans_notifier = { -+ .add = blktrans_notify_add, -+ .remove = blktrans_notify_remove, -+}; -+ -+int register_mtd_blktrans(struct mtd_blktrans_ops *tr) -+{ -+ int ret, i; -+ -+ /* Register the notifier if/when the first device type is -+ registered, to prevent the link/init ordering from fucking -+ us over. */ -+ if (!blktrans_notifier.list.next) -+ register_mtd_user(&blktrans_notifier); -+ -+ tr->blkcore_priv = kmalloc(sizeof(*tr->blkcore_priv), GFP_KERNEL); -+ if (!tr->blkcore_priv) -+ return -ENOMEM; -+ -+ memset(tr->blkcore_priv, 0, sizeof(*tr->blkcore_priv)); -+ -+ down(&mtd_table_mutex); -+ -+ ret = devfs_register_blkdev(tr->major, tr->name, &mtd_blktrans_ops); -+ if (ret) { -+ printk(KERN_WARNING "Unable to register %s block device on major %d: %d\n", -+ tr->name, tr->major, ret); -+ kfree(tr->blkcore_priv); -+ up(&mtd_table_mutex); -+ return ret; -+ } -+ -+ blk_init_queue(BLK_DEFAULT_QUEUE(tr->major), &mtd_blktrans_request); -+ (BLK_DEFAULT_QUEUE(tr->major))->queuedata = tr; -+ -+ init_completion(&tr->blkcore_priv->thread_dead); -+ init_waitqueue_head(&tr->blkcore_priv->thread_wq); -+ -+ ret = kernel_thread(mtd_blktrans_thread, tr, -+ CLONE_FS|CLONE_FILES|CLONE_SIGHAND); -+ if (ret < 0) { -+ blk_cleanup_queue(BLK_DEFAULT_QUEUE(tr->major)); -+ devfs_unregister_blkdev(tr->major, tr->name); -+ kfree(tr->blkcore_priv); -+ up(&mtd_table_mutex); -+ return ret; -+ } -+ -+ tr->blkcore_priv->devfs_dir_handle = -+ devfs_mk_dir(NULL, tr->name, NULL); -+ -+ blksize_size[tr->major] = tr->blkcore_priv->blksizes; -+ blk_size[tr->major] = tr->blkcore_priv->sizes; -+ -+ tr->blkcore_priv->gd.major = tr->major; -+ tr->blkcore_priv->gd.major_name = tr->name; -+ tr->blkcore_priv->gd.minor_shift = tr->part_bits; -+ tr->blkcore_priv->gd.max_p = (1<<tr->part_bits) - 1; -+ tr->blkcore_priv->gd.part = tr->blkcore_priv->part_table; -+ tr->blkcore_priv->gd.sizes = tr->blkcore_priv->sizes; -+ tr->blkcore_priv->gd.nr_real = 256 >> tr->part_bits; -+ -+ spin_lock_init(&tr->blkcore_priv->devs_lock); -+ -+ add_gendisk(&tr->blkcore_priv->gd); -+ -+ INIT_LIST_HEAD(&tr->devs); -+ list_add(&tr->list, &blktrans_majors); -+ -+ for (i=0; i<MAX_MTD_DEVICES; i++) { -+ if (mtd_table[i] && mtd_table[i]->type != MTD_ABSENT) -+ tr->add_mtd(tr, mtd_table[i]); -+ } -+ up(&mtd_table_mutex); -+ -+ return 0; -+} -+ -+int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr) -+{ -+ struct list_head *this, *next; -+ -+ down(&mtd_table_mutex); -+ -+ /* Clean up the kernel thread */ -+ tr->blkcore_priv->exiting = 1; -+ wake_up(&tr->blkcore_priv->thread_wq); -+ wait_for_completion(&tr->blkcore_priv->thread_dead); -+ -+ /* Remove it from the list of active majors */ -+ list_del(&tr->list); -+ -+ /* Remove each of its devices */ -+ list_for_each_safe(this, next, &tr->devs) { -+ struct mtd_blktrans_dev *dev = list_entry(this, struct mtd_blktrans_dev, list); -+ tr->remove_dev(dev); -+ } -+ -+ blksize_size[tr->major] = NULL; -+ blk_size[tr->major] = NULL; -+ -+ del_gendisk(&tr->blkcore_priv->gd); -+ -+ blk_cleanup_queue(BLK_DEFAULT_QUEUE(tr->major)); -+ devfs_unregister_blkdev(tr->major, tr->name); -+ -+ devfs_unregister(tr->blkcore_priv->devfs_dir_handle); -+ -+ up(&mtd_table_mutex); -+ -+ kfree(tr->blkcore_priv); -+ -+ if (!list_empty(&tr->devs)) -+ BUG(); -+ return 0; -+} -+ -+static void __exit mtd_blktrans_exit(void) -+{ -+ /* No race here -- if someone's currently in register_mtd_blktrans -+ we're screwed anyway. */ -+ if (blktrans_notifier.list.next) -+ unregister_mtd_user(&blktrans_notifier); -+} -+ -+module_exit(mtd_blktrans_exit); -+ -+EXPORT_SYMBOL_GPL(register_mtd_blktrans); -+EXPORT_SYMBOL_GPL(deregister_mtd_blktrans); -+EXPORT_SYMBOL_GPL(add_mtd_blktrans_dev); -+EXPORT_SYMBOL_GPL(del_mtd_blktrans_dev); -+ -+MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("Common interface to block layer for MTD 'translation layers'"); -Index: linux-2.6.5/drivers/mtd/mtd_blkdevs.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/mtd_blkdevs.c 2004-04-03 22:36:14.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/mtd_blkdevs.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: mtd_blkdevs.c,v 1.16 2003/06/23 13:34:43 dwmw2 Exp $ -+ * $Id: mtd_blkdevs.c,v 1.23 2004/08/19 01:54:36 tpoynor Exp $ - * - * (C) 2003 David Woodhouse <dwmw2@infradead.org> - * -@@ -220,7 +220,7 @@ - return ret; - - g.start = get_start_sect(inode->i_bdev); -- if (copy_to_user((void *)arg, &g, sizeof(g))) -+ if (copy_to_user((void __user *)arg, &g, sizeof(g))) - return -EFAULT; - return 0; - } /* else */ -@@ -295,7 +295,10 @@ - snprintf(gd->devfs_name, sizeof(gd->devfs_name), - "%s/%c", tr->name, (tr->part_bits?'a':'0') + new->devnum); - -- set_capacity(gd, new->size); -+ /* 2.5 has capacity in units of 512 bytes while still -+ having BLOCK_SIZE_BITS set to 10. Just to keep us amused. */ -+ set_capacity(gd, (new->size * new->blksize) >> 9); -+ - gd->private_data = new; - new->blkcore_priv = gd; - gd->queue = tr->blkcore_priv->rq; -Index: linux-2.6.5/drivers/mtd/mtdblock.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/mtdblock.c 2004-04-03 22:36:56.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/mtdblock.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,7 +1,7 @@ - /* - * Direct MTD block device access - * -- * $Id: mtdblock.c,v 1.63 2003/06/23 12:00:08 dwmw2 Exp $ -+ * $Id: mtdblock.c,v 1.64 2003/10/04 17:14:14 dwmw2 Exp $ - * - * (C) 2000-2003 Nicolas Pitre <nico@cam.org> - * (C) 1999-2003 David Woodhouse <dwmw2@infradead.org> -@@ -275,7 +275,7 @@ - - /* OK, it's not open. Create cache info for it */ - mtdblk = kmalloc(sizeof(struct mtdblk_dev), GFP_KERNEL); -- if (!mtdblks) -+ if (!mtdblk) - return -ENOMEM; - - memset(mtdblk, 0, sizeof(*mtdblk)); -Index: linux-2.6.5/drivers/mtd/mtdchar.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/mtdchar.c 2005-02-01 16:55:50.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/mtdchar.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: mtdchar.c,v 1.54 2003/05/21 10:50:43 dwmw2 Exp $ -+ * $Id: mtdchar.c,v 1.64 2004/08/09 13:59:46 dwmw2 Exp $ - * - * Character-device access to raw MTD devices. - * -@@ -9,6 +9,7 @@ - #include <linux/kernel.h> - #include <linux/module.h> - #include <linux/mtd/mtd.h> -+#include <linux/mtd/compatmac.h> - #include <linux/slab.h> - #include <linux/init.h> - #include <linux/fs.h> -@@ -16,22 +17,52 @@ - - #ifdef CONFIG_DEVFS_FS - #include <linux/devfs_fs_kernel.h> --static void mtd_notify_add(struct mtd_info* mtd); --static void mtd_notify_remove(struct mtd_info* mtd); -+ -+static void mtd_notify_add(struct mtd_info* mtd) -+{ -+ if (!mtd) -+ return; -+ -+ devfs_mk_cdev(MKDEV(MTD_CHAR_MAJOR, mtd->index*2), -+ S_IFCHR | S_IRUGO | S_IWUGO, "mtd/%d", mtd->index); -+ -+ devfs_mk_cdev(MKDEV(MTD_CHAR_MAJOR, mtd->index*2+1), -+ S_IFCHR | S_IRUGO, "mtd/%dro", mtd->index); -+} -+ -+static void mtd_notify_remove(struct mtd_info* mtd) -+{ -+ if (!mtd) -+ return; -+ devfs_remove("mtd/%d", mtd->index); -+ devfs_remove("mtd/%dro", mtd->index); -+} - - static struct mtd_notifier notifier = { - .add = mtd_notify_add, - .remove = mtd_notify_remove, - }; - -+static inline void mtdchar_devfs_init(void) -+{ -+ devfs_mk_dir("mtd"); -+ register_mtd_user(¬ifier); -+} -+ -+static inline void mtdchar_devfs_exit(void) -+{ -+ unregister_mtd_user(¬ifier); -+ devfs_remove("mtd"); -+} -+#else /* !DEVFS */ -+#define mtdchar_devfs_init() do { } while(0) -+#define mtdchar_devfs_exit() do { } while(0) - #endif - - static loff_t mtd_lseek (struct file *file, loff_t offset, int orig) - { - struct mtd_info *mtd=(struct mtd_info *)file->private_data; - -- -- down(&mtd->mutex); - switch (orig) { - case 0: - /* SEEK_SET */ -@@ -46,16 +77,14 @@ - file->f_pos =mtd->size + offset; - break; - default: -- up(&mtd->mutex); - return -EINVAL; - } - -- /* XXX Should return -EINVAL surely ?? */ - if (file->f_pos < 0) - file->f_pos = 0; - else if (file->f_pos >= mtd->size) - file->f_pos = mtd->size - 1; -- up(&mtd->mutex); -+ - return file->f_pos; - } - -@@ -120,7 +149,7 @@ - */ - #define MAX_KMALLOC_SIZE 0x20000 - --static ssize_t mtd_read(struct file *file, char *buf, size_t count,loff_t *ppos) -+static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t *ppos) - { - struct mtd_info *mtd = (struct mtd_info *)file->private_data; - size_t retlen=0; -@@ -131,16 +160,11 @@ - - DEBUG(MTD_DEBUG_LEVEL0,"MTD_read\n"); - -- down(&mtd->mutex); -- -- if (count > mtd->size - *ppos) -+ if (*ppos + count > mtd->size) - count = mtd->size - *ppos; - - if (!count) -- { -- up(&mtd->mutex); - return 0; -- } - - /* FIXME: Use kiovec in 2.5 to lock down the user's buffers - and pass them directly to the MTD functions */ -@@ -152,18 +176,13 @@ - - kbuf=kmalloc(len,GFP_KERNEL); - if (!kbuf) -- { -- up(&mtd->mutex); -- /* API error - should return I/O done so far if > 0 */ - return -ENOMEM; -- } -+ - ret = MTD_READ(mtd, *ppos, len, &retlen, kbuf); - if (!ret) { - *ppos += retlen; - if (copy_to_user(buf, kbuf, retlen)) { - kfree(kbuf); -- up(&mtd->mutex); -- /* API error - should return I/O done so far if > 0 */ - return -EFAULT; - } - else -@@ -174,17 +193,16 @@ - } - else { - kfree(kbuf); -- up(&mtd->mutex); - return ret; - } - - kfree(kbuf); - } -- up(&mtd->mutex); -+ - return total_retlen; - } /* mtd_read */ - --static ssize_t mtd_write(struct file *file, const char *buf, size_t count,loff_t *ppos) -+static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count,loff_t *ppos) - { - struct mtd_info *mtd = (struct mtd_info *)file->private_data; - char *kbuf; -@@ -194,22 +212,15 @@ - int len; - - DEBUG(MTD_DEBUG_LEVEL0,"MTD_write\n"); -- -- down(&mtd->mutex); -- if (*ppos >= mtd->size) -- { -- up(&mtd->mutex); -+ -+ if (*ppos == mtd->size) - return -ENOSPC; -- } - -- if (count > mtd->size - *ppos) -+ if (*ppos + count > mtd->size) - count = mtd->size - *ppos; - - if (!count) -- { -- up(&mtd->mutex); - return 0; -- } - - while (count) { - if (count > MAX_KMALLOC_SIZE) -@@ -219,14 +230,11 @@ - - kbuf=kmalloc(len,GFP_KERNEL); - if (!kbuf) { --// printk("kmalloc is null\n"); -- /* API bug should report I/O completed */ -- up(&mtd->mutex); -+ printk("kmalloc is null\n"); - return -ENOMEM; - } - - if (copy_from_user(kbuf, buf, len)) { -- up(&mtd->mutex); - kfree(kbuf); - return -EFAULT; - } -@@ -239,15 +247,13 @@ - buf += retlen; - } - else { -- up(&mtd->mutex); - kfree(kbuf); -- /* API bug ?? */ - return ret; - } - - kfree(kbuf); - } -- up(&mtd->mutex); -+ - return total_retlen; - } /* mtd_write */ - -@@ -256,7 +262,7 @@ - IOCTL calls for getting device parameters. - - ======================================================================*/ --static void mtd_erase_callback (struct erase_info *instr) -+static void mtdchar_erase_callback (struct erase_info *instr) - { - wake_up((wait_queue_head_t *)instr->priv); - } -@@ -265,6 +271,7 @@ - u_int cmd, u_long arg) - { - struct mtd_info *mtd = (struct mtd_info *)file->private_data; -+ void __user *argp = (void __user *)arg; - int ret = 0; - u_long size; - -@@ -272,17 +279,17 @@ - - size = (cmd & IOCSIZE_MASK) >> IOCSIZE_SHIFT; - if (cmd & IOC_IN) { -- ret = verify_area(VERIFY_READ, (char *)arg, size); -+ ret = verify_area(VERIFY_READ, argp, size); - if (ret) return ret; - } - if (cmd & IOC_OUT) { -- ret = verify_area(VERIFY_WRITE, (char *)arg, size); -+ ret = verify_area(VERIFY_WRITE, argp, size); - if (ret) return ret; - } - - switch (cmd) { - case MEMGETREGIONCOUNT: -- if (copy_to_user((int *) arg, &(mtd->numeraseregions), sizeof(int))) -+ if (copy_to_user(argp, &(mtd->numeraseregions), sizeof(int))) - return -EFAULT; - break; - -@@ -290,24 +297,19 @@ - { - struct region_info_user ur; - -- if (copy_from_user( &ur, -- (struct region_info_user *)arg, -- sizeof(struct region_info_user))) { -+ if (copy_from_user(&ur, argp, sizeof(struct region_info_user))) - return -EFAULT; -- } - - if (ur.regionindex >= mtd->numeraseregions) - return -EINVAL; -- if (copy_to_user((struct mtd_erase_region_info *) arg, -- &(mtd->eraseregions[ur.regionindex]), -+ if (copy_to_user(argp, &(mtd->eraseregions[ur.regionindex]), - sizeof(struct mtd_erase_region_info))) - return -EFAULT; - break; - } - - case MEMGETINFO: -- if (copy_to_user((struct mtd_info *)arg, mtd, -- sizeof(struct mtd_info_user))) -+ if (copy_to_user(argp, mtd, sizeof(struct mtd_info_user))) - return -EFAULT; - break; - -@@ -328,13 +330,13 @@ - init_waitqueue_head(&waitq); - - memset (erase,0,sizeof(struct erase_info)); -- if (copy_from_user(&erase->addr, (u_long *)arg, -- 2 * sizeof(u_long))) { -+ if (copy_from_user(&erase->addr, argp, -+ sizeof(struct erase_info_user))) { - kfree(erase); - return -EFAULT; - } - erase->mtd = mtd; -- erase->callback = mtd_erase_callback; -+ erase->callback = mtdchar_erase_callback; - erase->priv = (unsigned long)&waitq; - - /* -@@ -372,7 +374,7 @@ - if(!(file->f_mode & 2)) - return -EPERM; - -- if (copy_from_user(&buf, (struct mtd_oob_buf *)arg, sizeof(struct mtd_oob_buf))) -+ if (copy_from_user(&buf, argp, sizeof(struct mtd_oob_buf))) - return -EFAULT; - - if (buf.length > 0x4096) -@@ -381,7 +383,7 @@ - if (!mtd->write_oob) - ret = -EOPNOTSUPP; - else -- ret = verify_area(VERIFY_READ, (char *)buf.ptr, buf.length); -+ ret = verify_area(VERIFY_READ, buf.ptr, buf.length); - - if (ret) - return ret; -@@ -397,7 +399,7 @@ - - ret = (mtd->write_oob)(mtd, buf.start, buf.length, &retlen, databuf); - -- if (copy_to_user((void *)arg + sizeof(u_int32_t), &retlen, sizeof(u_int32_t))) -+ if (copy_to_user(argp + sizeof(uint32_t), &retlen, sizeof(uint32_t))) - ret = -EFAULT; - - kfree(databuf); -@@ -411,7 +413,7 @@ - void *databuf; - ssize_t retlen; - -- if (copy_from_user(&buf, (struct mtd_oob_buf *)arg, sizeof(struct mtd_oob_buf))) -+ if (copy_from_user(&buf, argp, sizeof(struct mtd_oob_buf))) - return -EFAULT; - - if (buf.length > 0x4096) -@@ -420,7 +422,7 @@ - if (!mtd->read_oob) - ret = -EOPNOTSUPP; - else -- ret = verify_area(VERIFY_WRITE, (char *)buf.ptr, buf.length); -+ ret = verify_area(VERIFY_WRITE, buf.ptr, buf.length); - - if (ret) - return ret; -@@ -431,7 +433,7 @@ - - ret = (mtd->read_oob)(mtd, buf.start, buf.length, &retlen, databuf); - -- if (copy_to_user((void *)arg + sizeof(u_int32_t), &retlen, sizeof(u_int32_t))) -+ if (put_user(retlen, (uint32_t __user *)argp)) - ret = -EFAULT; - else if (retlen && copy_to_user(buf.ptr, databuf, retlen)) - ret = -EFAULT; -@@ -442,41 +444,73 @@ - - case MEMLOCK: - { -- unsigned long adrs[2]; -+ struct erase_info_user info; - -- if (copy_from_user(adrs ,(void *)arg, 2* sizeof(unsigned long))) -+ if (copy_from_user(&info, argp, sizeof(info))) - return -EFAULT; - - if (!mtd->lock) - ret = -EOPNOTSUPP; - else -- ret = mtd->lock(mtd, adrs[0], adrs[1]); -+ ret = mtd->lock(mtd, info.start, info.length); - break; - } - - case MEMUNLOCK: - { -- unsigned long adrs[2]; -+ struct erase_info_user info; - -- if (copy_from_user(adrs, (void *)arg, 2* sizeof(unsigned long))) -+ if (copy_from_user(&info, argp, sizeof(info))) - return -EFAULT; - - if (!mtd->unlock) - ret = -EOPNOTSUPP; - else -- ret = mtd->unlock(mtd, adrs[0], adrs[1]); -+ ret = mtd->unlock(mtd, info.start, info.length); - break; - } - - case MEMSETOOBSEL: - { -- if (copy_from_user(&mtd->oobinfo ,(void *)arg, sizeof(struct nand_oobinfo))) -+ if (copy_from_user(&mtd->oobinfo, argp, sizeof(struct nand_oobinfo))) -+ return -EFAULT; -+ break; -+ } -+ -+ case MEMGETOOBSEL: -+ { -+ if (copy_to_user(argp, &(mtd->oobinfo), sizeof(struct nand_oobinfo))) - return -EFAULT; - break; - } -+ -+ case MEMGETBADBLOCK: -+ { -+ loff_t offs; - -+ if (copy_from_user(&offs, argp, sizeof(loff_t))) -+ return -EFAULT; -+ if (!mtd->block_isbad) -+ ret = -EOPNOTSUPP; -+ else -+ return mtd->block_isbad(mtd, offs); -+ break; -+ } -+ -+ case MEMSETBADBLOCK: -+ { -+ loff_t offs; -+ -+ if (copy_from_user(&offs, argp, sizeof(loff_t))) -+ return -EFAULT; -+ if (!mtd->block_markbad) -+ ret = -EOPNOTSUPP; -+ else -+ return mtd->block_markbad(mtd, offs); -+ break; -+ } -+ - default: -- DEBUG(MTD_DEBUG_LEVEL0, "Invalid ioctl %x (MEMGETINFO = %x)\n", cmd, MEMGETINFO); - ret = -ENOTTY; - } - -@@ -493,30 +527,6 @@ - .release = mtd_close, - }; - -- --#ifdef CONFIG_DEVFS_FS --/* Notification that a new device has been added. Create the devfs entry for -- * it. */ -- --static void mtd_notify_add(struct mtd_info* mtd) --{ -- if (!mtd) -- return; -- devfs_mk_cdev(MKDEV(MTD_CHAR_MAJOR, mtd->index*2), -- S_IFCHR | S_IRUGO | S_IWUGO, "mtd/%d", mtd->index); -- devfs_mk_cdev(MKDEV(MTD_CHAR_MAJOR, mtd->index*2+1), -- S_IFCHR | S_IRUGO | S_IWUGO, "mtd/%dro", mtd->index); --} -- --static void mtd_notify_remove(struct mtd_info* mtd) --{ -- if (!mtd) -- return; -- devfs_remove("mtd/%d", mtd->index); -- devfs_remove("mtd/%dro", mtd->index); --} --#endif -- - static int __init init_mtdchar(void) - { - if (register_chrdev(MTD_CHAR_MAJOR, "mtd", &mtd_fops)) { -@@ -525,20 +535,13 @@ - return -EAGAIN; - } - --#ifdef CONFIG_DEVFS_FS -- devfs_mk_dir("mtd"); -- -- register_mtd_user(¬ifier); --#endif -+ mtdchar_devfs_init(); - return 0; - } - - static void __exit cleanup_mtdchar(void) - { --#ifdef CONFIG_DEVFS_FS -- unregister_mtd_user(¬ifier); -- devfs_remove("mtd"); --#endif -+ mtdchar_devfs_exit(); - unregister_chrdev(MTD_CHAR_MAJOR, "mtd"); - } - -Index: linux-2.6.5/drivers/mtd/mtdconcat.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/mtdconcat.c 2004-04-03 22:37:37.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/mtdconcat.c 2005-02-01 17:11:17.000000000 -0500 -@@ -7,7 +7,7 @@ - * - * This code is GPL - * -- * $Id: mtdconcat.c,v 1.4 2003/03/07 17:44:59 rkaiser Exp $ -+ * $Id: mtdconcat.c,v 1.9 2004/06/30 15:17:41 dbrown Exp $ - */ - - #include <linux/module.h> -@@ -26,7 +26,7 @@ - */ - struct mtd_concat { - struct mtd_info mtd; -- int num_subdev; -+ int num_subdev; - struct mtd_info **subdev; - }; - -@@ -37,21 +37,20 @@ - #define SIZEOF_STRUCT_MTD_CONCAT(num_subdev) \ - ((sizeof(struct mtd_concat) + (num_subdev) * sizeof(struct mtd_info *))) - -- - /* - * Given a pointer to the MTD object in the mtd_concat structure, - * we can retrieve the pointer to that structure with this macro. - */ - #define CONCAT(x) ((struct mtd_concat *)(x)) - -- - /* - * MTD methods which look up the relevant subdevice, translate the - * effective address and pass through to the subdevice. - */ - --static int concat_read (struct mtd_info *mtd, loff_t from, size_t len, -- size_t *retlen, u_char *buf) -+static int -+concat_read(struct mtd_info *mtd, loff_t from, size_t len, -+ size_t * retlen, u_char * buf) - { - struct mtd_concat *concat = CONCAT(mtd); - int err = -EINVAL; -@@ -59,43 +58,43 @@ - - *retlen = 0; - -- for(i = 0; i < concat->num_subdev; i++) -- { -+ for (i = 0; i < concat->num_subdev; i++) { - struct mtd_info *subdev = concat->subdev[i]; - size_t size, retsize; - -- if (from >= subdev->size) -- { /* Not destined for this subdev */ -- size = 0; -+ if (from >= subdev->size) { -+ /* Not destined for this subdev */ -+ size = 0; - from -= subdev->size; -+ continue; - } -+ if (from + len > subdev->size) -+ /* First part goes into this subdev */ -+ size = subdev->size - from; - else -- { -- if (from + len > subdev->size) -- size = subdev->size - from; /* First part goes into this subdev */ -- else -- size = len; /* Entire transaction goes into this subdev */ -- -- err = subdev->read(subdev, from, size, &retsize, buf); -- -- if(err) -- break; -- -- *retlen += retsize; -- len -= size; -- if(len == 0) -- break; -+ /* Entire transaction goes into this subdev */ -+ size = len; - -- err = -EINVAL; -- buf += size; -- from = 0; -- } -+ err = subdev->read(subdev, from, size, &retsize, buf); -+ -+ if (err) -+ break; -+ -+ *retlen += retsize; -+ len -= size; -+ if (len == 0) -+ break; -+ -+ err = -EINVAL; -+ buf += size; -+ from = 0; - } - return err; - } - --static int concat_write (struct mtd_info *mtd, loff_t to, size_t len, -- size_t *retlen, const u_char *buf) -+static int -+concat_write(struct mtd_info *mtd, loff_t to, size_t len, -+ size_t * retlen, const u_char * buf) - { - struct mtd_concat *concat = CONCAT(mtd); - int err = -EINVAL; -@@ -106,46 +105,44 @@ - - *retlen = 0; - -- for(i = 0; i < concat->num_subdev; i++) -- { -+ for (i = 0; i < concat->num_subdev; i++) { - struct mtd_info *subdev = concat->subdev[i]; - size_t size, retsize; - -- if (to >= subdev->size) -- { -- size = 0; -+ if (to >= subdev->size) { -+ size = 0; - to -= subdev->size; -+ continue; - } -+ if (to + len > subdev->size) -+ size = subdev->size - to; - else -- { -- if (to + len > subdev->size) -- size = subdev->size - to; -- else -- size = len; -- -- if (!(subdev->flags & MTD_WRITEABLE)) -- err = -EROFS; -- else -- err = subdev->write(subdev, to, size, &retsize, buf); -- -- if(err) -- break; -- -- *retlen += retsize; -- len -= size; -- if(len == 0) -- break; -+ size = len; - -- err = -EINVAL; -- buf += size; -- to = 0; -- } -+ if (!(subdev->flags & MTD_WRITEABLE)) -+ err = -EROFS; -+ else -+ err = subdev->write(subdev, to, size, &retsize, buf); -+ -+ if (err) -+ break; -+ -+ *retlen += retsize; -+ len -= size; -+ if (len == 0) -+ break; -+ -+ err = -EINVAL; -+ buf += size; -+ to = 0; - } - return err; - } - --static int concat_read_ecc (struct mtd_info *mtd, loff_t from, size_t len, -- size_t *retlen, u_char *buf, u_char *eccbuf, struct nand_oobinfo *oobsel) -+static int -+concat_read_ecc(struct mtd_info *mtd, loff_t from, size_t len, -+ size_t * retlen, u_char * buf, u_char * eccbuf, -+ struct nand_oobinfo *oobsel) - { - struct mtd_concat *concat = CONCAT(mtd); - int err = -EINVAL; -@@ -153,53 +150,56 @@ - - *retlen = 0; - -- for(i = 0; i < concat->num_subdev; i++) -- { -+ for (i = 0; i < concat->num_subdev; i++) { - struct mtd_info *subdev = concat->subdev[i]; - size_t size, retsize; -- -- if (from >= subdev->size) -- { /* Not destined for this subdev */ -- size = 0; -+ -+ if (from >= subdev->size) { -+ /* Not destined for this subdev */ -+ size = 0; - from -= subdev->size; -+ continue; - } -+ -+ if (from + len > subdev->size) -+ /* First part goes into this subdev */ -+ size = subdev->size - from; - else -- { -- if (from + len > subdev->size) -- size = subdev->size - from; /* First part goes into this subdev */ -- else -- size = len; /* Entire transaction goes into this subdev */ -- -- if (subdev->read_ecc) -- err = subdev->read_ecc(subdev, from, size, &retsize, buf, eccbuf, oobsel); -- else -- err = -EINVAL; -- -- if(err) -- break; -- -- *retlen += retsize; -- len -= size; -- if(len == 0) -- break; -+ /* Entire transaction goes into this subdev */ -+ size = len; - -+ if (subdev->read_ecc) -+ err = subdev->read_ecc(subdev, from, size, -+ &retsize, buf, eccbuf, oobsel); -+ else - err = -EINVAL; -- buf += size; -- if (eccbuf) -- { -- eccbuf += subdev->oobsize; -- /* in nand.c at least, eccbufs are tagged with 2 (int)eccstatus', -- we must account for these */ -- eccbuf += 2 * (sizeof(int)); -- } -- from = 0; -+ -+ if (err) -+ break; -+ -+ *retlen += retsize; -+ len -= size; -+ if (len == 0) -+ break; -+ -+ err = -EINVAL; -+ buf += size; -+ if (eccbuf) { -+ eccbuf += subdev->oobsize; -+ /* in nand.c at least, eccbufs are -+ tagged with 2 (int)eccstatus'; we -+ must account for these */ -+ eccbuf += 2 * (sizeof (int)); - } -+ from = 0; - } - return err; - } - --static int concat_write_ecc (struct mtd_info *mtd, loff_t to, size_t len, -- size_t *retlen, const u_char *buf, u_char *eccbuf, struct nand_oobinfo *oobsel) -+static int -+concat_write_ecc(struct mtd_info *mtd, loff_t to, size_t len, -+ size_t * retlen, const u_char * buf, u_char * eccbuf, -+ struct nand_oobinfo *oobsel) - { - struct mtd_concat *concat = CONCAT(mtd); - int err = -EINVAL; -@@ -210,50 +210,48 @@ - - *retlen = 0; - -- for(i = 0; i < concat->num_subdev; i++) -- { -+ for (i = 0; i < concat->num_subdev; i++) { - struct mtd_info *subdev = concat->subdev[i]; - size_t size, retsize; -- -- if (to >= subdev->size) -- { -- size = 0; -+ -+ if (to >= subdev->size) { -+ size = 0; - to -= subdev->size; -+ continue; - } -+ if (to + len > subdev->size) -+ size = subdev->size - to; - else -- { -- if (to + len > subdev->size) -- size = subdev->size - to; -- else -- size = len; -- -- if (!(subdev->flags & MTD_WRITEABLE)) -- err = -EROFS; -- else if (subdev->write_ecc) -- err = subdev->write_ecc(subdev, to, size, &retsize, buf, eccbuf, oobsel); -- else -- err = -EINVAL; -- -- if(err) -- break; -- -- *retlen += retsize; -- len -= size; -- if(len == 0) -- break; -+ size = len; - -+ if (!(subdev->flags & MTD_WRITEABLE)) -+ err = -EROFS; -+ else if (subdev->write_ecc) -+ err = subdev->write_ecc(subdev, to, size, -+ &retsize, buf, eccbuf, oobsel); -+ else - err = -EINVAL; -- buf += size; -- if (eccbuf) -- eccbuf += subdev->oobsize; -- to = 0; -- } -+ -+ if (err) -+ break; -+ -+ *retlen += retsize; -+ len -= size; -+ if (len == 0) -+ break; -+ -+ err = -EINVAL; -+ buf += size; -+ if (eccbuf) -+ eccbuf += subdev->oobsize; -+ to = 0; - } - return err; - } - --static int concat_read_oob (struct mtd_info *mtd, loff_t from, size_t len, -- size_t *retlen, u_char *buf) -+static int -+concat_read_oob(struct mtd_info *mtd, loff_t from, size_t len, -+ size_t * retlen, u_char * buf) - { - struct mtd_concat *concat = CONCAT(mtd); - int err = -EINVAL; -@@ -261,46 +259,47 @@ - - *retlen = 0; - -- for(i = 0; i < concat->num_subdev; i++) -- { -+ for (i = 0; i < concat->num_subdev; i++) { - struct mtd_info *subdev = concat->subdev[i]; - size_t size, retsize; -- -- if (from >= subdev->size) -- { /* Not destined for this subdev */ -- size = 0; -+ -+ if (from >= subdev->size) { -+ /* Not destined for this subdev */ -+ size = 0; - from -= subdev->size; -+ continue; - } -+ if (from + len > subdev->size) -+ /* First part goes into this subdev */ -+ size = subdev->size - from; -+ else -+ /* Entire transaction goes into this subdev */ -+ size = len; -+ -+ if (subdev->read_oob) -+ err = subdev->read_oob(subdev, from, size, -+ &retsize, buf); - else -- { -- if (from + len > subdev->size) -- size = subdev->size - from; /* First part goes into this subdev */ -- else -- size = len; /* Entire transaction goes into this subdev */ -- -- if (subdev->read_oob) -- err = subdev->read_oob(subdev, from, size, &retsize, buf); -- else -- err = -EINVAL; -- -- if(err) -- break; -- -- *retlen += retsize; -- len -= size; -- if(len == 0) -- break; -- - err = -EINVAL; -- buf += size; -- from = 0; -- } -+ -+ if (err) -+ break; -+ -+ *retlen += retsize; -+ len -= size; -+ if (len == 0) -+ break; -+ -+ err = -EINVAL; -+ buf += size; -+ from = 0; - } - return err; - } - --static int concat_write_oob (struct mtd_info *mtd, loff_t to, size_t len, -- size_t *retlen, const u_char *buf) -+static int -+concat_write_oob(struct mtd_info *mtd, loff_t to, size_t len, -+ size_t * retlen, const u_char * buf) - { - struct mtd_concat *concat = CONCAT(mtd); - int err = -EINVAL; -@@ -311,50 +310,46 @@ - - *retlen = 0; - -- for(i = 0; i < concat->num_subdev; i++) -- { -+ for (i = 0; i < concat->num_subdev; i++) { - struct mtd_info *subdev = concat->subdev[i]; - size_t size, retsize; -- -- if (to >= subdev->size) -- { -- size = 0; -+ -+ if (to >= subdev->size) { -+ size = 0; - to -= subdev->size; -+ continue; - } -+ if (to + len > subdev->size) -+ size = subdev->size - to; - else -- { -- if (to + len > subdev->size) -- size = subdev->size - to; -- else -- size = len; -- -- if (!(subdev->flags & MTD_WRITEABLE)) -- err = -EROFS; -- else if (subdev->write_oob) -- err = subdev->write_oob(subdev, to, size, &retsize, buf); -- else -- err = -EINVAL; -- -- if(err) -- break; -- -- *retlen += retsize; -- len -= size; -- if(len == 0) -- break; -+ size = len; - -+ if (!(subdev->flags & MTD_WRITEABLE)) -+ err = -EROFS; -+ else if (subdev->write_oob) -+ err = subdev->write_oob(subdev, to, size, &retsize, -+ buf); -+ else - err = -EINVAL; -- buf += size; -- to = 0; -- } -+ -+ if (err) -+ break; -+ -+ *retlen += retsize; -+ len -= size; -+ if (len == 0) -+ break; -+ -+ err = -EINVAL; -+ buf += size; -+ to = 0; - } - return err; - } - -- --static void concat_erase_callback (struct erase_info *instr) -+static void concat_erase_callback(struct erase_info *instr) - { -- wake_up((wait_queue_head_t *)instr->priv); -+ wake_up((wait_queue_head_t *) instr->priv); - } - - static int concat_dev_erase(struct mtd_info *mtd, struct erase_info *erase) -@@ -370,18 +365,18 @@ - - erase->mtd = mtd; - erase->callback = concat_erase_callback; -- erase->priv = (unsigned long)&waitq; -- -+ erase->priv = (unsigned long) &waitq; -+ - /* - * FIXME: Allow INTERRUPTIBLE. Which means - * not having the wait_queue head on the stack. - */ - err = mtd->erase(mtd, erase); -- if (!err) -- { -+ if (!err) { - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&waitq, &wait); -- if (erase->state != MTD_ERASE_DONE && erase->state != MTD_ERASE_FAILED) -+ if (erase->state != MTD_ERASE_DONE -+ && erase->state != MTD_ERASE_FAILED) - schedule(); - remove_wait_queue(&waitq, &wait); - set_current_state(TASK_RUNNING); -@@ -391,21 +386,21 @@ - return err; - } - --static int concat_erase (struct mtd_info *mtd, struct erase_info *instr) -+static int concat_erase(struct mtd_info *mtd, struct erase_info *instr) - { - struct mtd_concat *concat = CONCAT(mtd); - struct mtd_info *subdev; - int i, err; -- u_int32_t length; -+ u_int32_t length, offset = 0; - struct erase_info *erase; - - if (!(mtd->flags & MTD_WRITEABLE)) - return -EROFS; - -- if(instr->addr > concat->mtd.size) -+ if (instr->addr > concat->mtd.size) - return -EINVAL; - -- if(instr->len + instr->addr > concat->mtd.size) -+ if (instr->len + instr->addr > concat->mtd.size) - return -EINVAL; - - /* -@@ -414,23 +409,22 @@ - * region info rather than looking at each particular sub-device - * in turn. - */ -- if (!concat->mtd.numeraseregions) -- { /* the easy case: device has uniform erase block size */ -- if(instr->addr & (concat->mtd.erasesize - 1)) -+ if (!concat->mtd.numeraseregions) { -+ /* the easy case: device has uniform erase block size */ -+ if (instr->addr & (concat->mtd.erasesize - 1)) - return -EINVAL; -- if(instr->len & (concat->mtd.erasesize - 1)) -+ if (instr->len & (concat->mtd.erasesize - 1)) - return -EINVAL; -- } -- else -- { /* device has variable erase size */ -- struct mtd_erase_region_info *erase_regions = concat->mtd.eraseregions; -+ } else { -+ /* device has variable erase size */ -+ struct mtd_erase_region_info *erase_regions = -+ concat->mtd.eraseregions; - - /* - * Find the erase region where the to-be-erased area begins: - */ -- for(i = 0; i < concat->mtd.numeraseregions && -- instr->addr >= erase_regions[i].offset; i++) -- ; -+ for (i = 0; i < concat->mtd.numeraseregions && -+ instr->addr >= erase_regions[i].offset; i++) ; - --i; - - /* -@@ -438,25 +432,28 @@ - * to-be-erased area begins. Verify that the starting - * offset is aligned to this region's erase size: - */ -- if (instr->addr & (erase_regions[i].erasesize-1)) -+ if (instr->addr & (erase_regions[i].erasesize - 1)) - return -EINVAL; - - /* - * now find the erase region where the to-be-erased area ends: - */ -- for(; i < concat->mtd.numeraseregions && -- (instr->addr + instr->len) >= erase_regions[i].offset ; ++i) -- ; -+ for (; i < concat->mtd.numeraseregions && -+ (instr->addr + instr->len) >= erase_regions[i].offset; -+ ++i) ; - --i; - /* - * check if the ending offset is aligned to this region's erase size - */ -- if ((instr->addr + instr->len) & (erase_regions[i].erasesize-1)) -+ if ((instr->addr + instr->len) & (erase_regions[i].erasesize - -+ 1)) - return -EINVAL; - } - -+ instr->fail_addr = 0xffffffff; -+ - /* make a local copy of instr to avoid modifying the caller's struct */ -- erase = kmalloc(sizeof(struct erase_info),GFP_KERNEL); -+ erase = kmalloc(sizeof (struct erase_info), GFP_KERNEL); - - if (!erase) - return -ENOMEM; -@@ -468,39 +465,44 @@ - * find the subdevice where the to-be-erased area begins, adjust - * starting offset to be relative to the subdevice start - */ -- for(i = 0; i < concat->num_subdev; i++) -- { -+ for (i = 0; i < concat->num_subdev; i++) { - subdev = concat->subdev[i]; -- if(subdev->size <= erase->addr) -+ if (subdev->size <= erase->addr) { - erase->addr -= subdev->size; -- else -+ offset += subdev->size; -+ } else { - break; -- } -- if(i >= concat->num_subdev) /* must never happen since size */ -- BUG(); /* limit has been verified above */ -+ } -+ } -+ -+ /* must never happen since size limit has been verified above */ -+ if (i >= concat->num_subdev) -+ BUG(); - - /* now do the erase: */ - err = 0; -- for(;length > 0; i++) /* loop for all subevices affected by this request */ -- { -- subdev = concat->subdev[i]; /* get current subdevice */ -+ for (; length > 0; i++) { -+ /* loop for all subdevices affected by this request */ -+ subdev = concat->subdev[i]; /* get current subdevice */ - - /* limit length to subdevice's size: */ -- if(erase->addr + length > subdev->size) -+ if (erase->addr + length > subdev->size) - erase->len = subdev->size - erase->addr; - else - erase->len = length; - -- if (!(subdev->flags & MTD_WRITEABLE)) -- { -+ if (!(subdev->flags & MTD_WRITEABLE)) { - err = -EROFS; - break; - } - length -= erase->len; -- if ((err = concat_dev_erase(subdev, erase))) -- { -- if(err == -EINVAL) /* sanity check: must never happen since */ -- BUG(); /* block alignment has been checked above */ -+ if ((err = concat_dev_erase(subdev, erase))) { -+ /* sanity check: should never happen since -+ * block alignment has been checked above */ -+ if (err == -EINVAL) -+ BUG(); -+ if (erase->fail_addr != 0xffffffff) -+ instr->fail_addr = erase->fail_addr + offset; - break; - } - /* -@@ -512,96 +514,91 @@ - * current subdevice, i.e. at offset zero. - */ - erase->addr = 0; -+ offset += subdev->size; - } -+ instr->state = erase->state; - kfree(erase); - if (err) - return err; - -- instr->state = MTD_ERASE_DONE; - if (instr->callback) - instr->callback(instr); - return 0; - } - --static int concat_lock (struct mtd_info *mtd, loff_t ofs, size_t len) -+static int concat_lock(struct mtd_info *mtd, loff_t ofs, size_t len) - { - struct mtd_concat *concat = CONCAT(mtd); - int i, err = -EINVAL; - -- if ((len + ofs) > mtd->size) -+ if ((len + ofs) > mtd->size) - return -EINVAL; - -- for(i = 0; i < concat->num_subdev; i++) -- { -+ for (i = 0; i < concat->num_subdev; i++) { - struct mtd_info *subdev = concat->subdev[i]; - size_t size; - -- if (ofs >= subdev->size) -- { -- size = 0; -+ if (ofs >= subdev->size) { -+ size = 0; - ofs -= subdev->size; -+ continue; - } -+ if (ofs + len > subdev->size) -+ size = subdev->size - ofs; - else -- { -- if (ofs + len > subdev->size) -- size = subdev->size - ofs; -- else -- size = len; -- -- err = subdev->lock(subdev, ofs, size); -- -- if(err) -- break; -- -- len -= size; -- if(len == 0) -- break; -+ size = len; - -- err = -EINVAL; -- ofs = 0; -- } -+ err = subdev->lock(subdev, ofs, size); -+ -+ if (err) -+ break; -+ -+ len -= size; -+ if (len == 0) -+ break; -+ -+ err = -EINVAL; -+ ofs = 0; - } -+ - return err; - } - --static int concat_unlock (struct mtd_info *mtd, loff_t ofs, size_t len) -+static int concat_unlock(struct mtd_info *mtd, loff_t ofs, size_t len) - { - struct mtd_concat *concat = CONCAT(mtd); - int i, err = 0; - -- if ((len + ofs) > mtd->size) -+ if ((len + ofs) > mtd->size) - return -EINVAL; - -- for(i = 0; i < concat->num_subdev; i++) -- { -+ for (i = 0; i < concat->num_subdev; i++) { - struct mtd_info *subdev = concat->subdev[i]; - size_t size; - -- if (ofs >= subdev->size) -- { -- size = 0; -+ if (ofs >= subdev->size) { -+ size = 0; - ofs -= subdev->size; -+ continue; - } -+ if (ofs + len > subdev->size) -+ size = subdev->size - ofs; - else -- { -- if (ofs + len > subdev->size) -- size = subdev->size - ofs; -- else -- size = len; -- -- err = subdev->unlock(subdev, ofs, size); -- -- if(err) -- break; -- -- len -= size; -- if(len == 0) -- break; -+ size = len; - -- err = -EINVAL; -- ofs = 0; -- } -+ err = subdev->unlock(subdev, ofs, size); -+ -+ if (err) -+ break; -+ -+ len -= size; -+ if (len == 0) -+ break; -+ -+ err = -EINVAL; -+ ofs = 0; - } -+ - return err; - } - -@@ -610,8 +607,7 @@ - struct mtd_concat *concat = CONCAT(mtd); - int i; - -- for(i = 0; i < concat->num_subdev; i++) -- { -+ for (i = 0; i < concat->num_subdev; i++) { - struct mtd_info *subdev = concat->subdev[i]; - subdev->sync(subdev); - } -@@ -622,10 +618,9 @@ - struct mtd_concat *concat = CONCAT(mtd); - int i, rc = 0; - -- for(i = 0; i < concat->num_subdev; i++) -- { -+ for (i = 0; i < concat->num_subdev; i++) { - struct mtd_info *subdev = concat->subdev[i]; -- if((rc = subdev->suspend(subdev)) < 0) -+ if ((rc = subdev->suspend(subdev)) < 0) - return rc; - } - return rc; -@@ -636,8 +631,7 @@ - struct mtd_concat *concat = CONCAT(mtd); - int i; - -- for(i = 0; i < concat->num_subdev; i++) -- { -+ for (i = 0; i < concat->num_subdev; i++) { - struct mtd_info *subdev = concat->subdev[i]; - subdev->resume(subdev); - } -@@ -649,11 +643,10 @@ - * stored to *new_dev upon success. This function does _not_ - * register any devices: this is the caller's responsibility. - */ --struct mtd_info *mtd_concat_create( -- struct mtd_info *subdev[], /* subdevices to concatenate */ -- int num_devs, /* number of subdevices */ -- char *name) /* name for the new device */ --{ -+struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to concatenate */ -+ int num_devs, /* number of subdevices */ -+ char *name) -+{ /* name for the new device */ - int i; - size_t size; - struct mtd_concat *concat; -@@ -661,94 +654,103 @@ - int num_erase_region; - - printk(KERN_NOTICE "Concatenating MTD devices:\n"); -- for(i = 0; i < num_devs; i++) -+ for (i = 0; i < num_devs; i++) - printk(KERN_NOTICE "(%d): \"%s\"\n", i, subdev[i]->name); - printk(KERN_NOTICE "into device \"%s\"\n", name); - - /* allocate the device structure */ - size = SIZEOF_STRUCT_MTD_CONCAT(num_devs); -- concat = kmalloc (size, GFP_KERNEL); -- if(!concat) -- { -- printk ("memory allocation error while creating concatenated device \"%s\"\n", -- name); -- return NULL; -+ concat = kmalloc(size, GFP_KERNEL); -+ if (!concat) { -+ printk -+ ("memory allocation error while creating concatenated device \"%s\"\n", -+ name); -+ return NULL; - } - memset(concat, 0, size); -- concat->subdev = (struct mtd_info **)(concat + 1); -+ concat->subdev = (struct mtd_info **) (concat + 1); - - /* - * Set up the new "super" device's MTD object structure, check for - * incompatibilites between the subdevices. - */ -- concat->mtd.type = subdev[0]->type; -- concat->mtd.flags = subdev[0]->flags; -- concat->mtd.size = subdev[0]->size; -+ concat->mtd.type = subdev[0]->type; -+ concat->mtd.flags = subdev[0]->flags; -+ concat->mtd.size = subdev[0]->size; - concat->mtd.erasesize = subdev[0]->erasesize; -- concat->mtd.oobblock = subdev[0]->oobblock; -- concat->mtd.oobsize = subdev[0]->oobsize; -- concat->mtd.ecctype = subdev[0]->ecctype; -- concat->mtd.eccsize = subdev[0]->eccsize; -- -- concat->subdev[0] = subdev[0]; -- -- for(i = 1; i < num_devs; i++) -- { -- if(concat->mtd.type != subdev[i]->type) -- { -+ concat->mtd.oobblock = subdev[0]->oobblock; -+ concat->mtd.oobsize = subdev[0]->oobsize; -+ concat->mtd.ecctype = subdev[0]->ecctype; -+ concat->mtd.eccsize = subdev[0]->eccsize; -+ if (subdev[0]->read_ecc) -+ concat->mtd.read_ecc = concat_read_ecc; -+ if (subdev[0]->write_ecc) -+ concat->mtd.write_ecc = concat_write_ecc; -+ if (subdev[0]->read_oob) -+ concat->mtd.read_oob = concat_read_oob; -+ if (subdev[0]->write_oob) -+ concat->mtd.write_oob = concat_write_oob; -+ -+ concat->subdev[0] = subdev[0]; -+ -+ for (i = 1; i < num_devs; i++) { -+ if (concat->mtd.type != subdev[i]->type) { - kfree(concat); -- printk ("Incompatible device type on \"%s\"\n", subdev[i]->name); -+ printk("Incompatible device type on \"%s\"\n", -+ subdev[i]->name); - return NULL; - } -- if(concat->mtd.flags != subdev[i]->flags) -- { /* -- * Expect all flags except MTD_WRITEABLE to be equal on -- * all subdevices. -+ if (concat->mtd.flags != subdev[i]->flags) { -+ /* -+ * Expect all flags except MTD_WRITEABLE to be -+ * equal on all subdevices. - */ -- if((concat->mtd.flags ^ subdev[i]->flags) & ~MTD_WRITEABLE) -- { -+ if ((concat->mtd.flags ^ subdev[i]-> -+ flags) & ~MTD_WRITEABLE) { - kfree(concat); -- printk ("Incompatible device flags on \"%s\"\n", subdev[i]->name); -+ printk("Incompatible device flags on \"%s\"\n", -+ subdev[i]->name); - return NULL; -- } -- else /* if writeable attribute differs, make super device writeable */ -- concat->mtd.flags |= subdev[i]->flags & MTD_WRITEABLE; -+ } else -+ /* if writeable attribute differs, -+ make super device writeable */ -+ concat->mtd.flags |= -+ subdev[i]->flags & MTD_WRITEABLE; - } - concat->mtd.size += subdev[i]->size; -- if(concat->mtd.oobblock != subdev[i]->oobblock || -- concat->mtd.oobsize != subdev[i]->oobsize || -- concat->mtd.ecctype != subdev[i]->ecctype || -- concat->mtd.eccsize != subdev[i]->eccsize) -- { -+ if (concat->mtd.oobblock != subdev[i]->oobblock || -+ concat->mtd.oobsize != subdev[i]->oobsize || -+ concat->mtd.ecctype != subdev[i]->ecctype || -+ concat->mtd.eccsize != subdev[i]->eccsize || -+ !concat->mtd.read_ecc != !subdev[i]->read_ecc || -+ !concat->mtd.write_ecc != !subdev[i]->write_ecc || -+ !concat->mtd.read_oob != !subdev[i]->read_oob || -+ !concat->mtd.write_oob != !subdev[i]->write_oob) { - kfree(concat); -- printk ("Incompatible OOB or ECC data on \"%s\"\n", subdev[i]->name); -+ printk("Incompatible OOB or ECC data on \"%s\"\n", -+ subdev[i]->name); - return NULL; - } - concat->subdev[i] = subdev[i]; -- -+ - } - -- concat->num_subdev = num_devs; -- concat->mtd.name = name; -+ concat->num_subdev = num_devs; -+ concat->mtd.name = name; - - /* - * NOTE: for now, we do not provide any readv()/writev() methods - * because they are messy to implement and they are not - * used to a great extent anyway. - */ -- concat->mtd.erase = concat_erase; -- concat->mtd.read = concat_read; -- concat->mtd.write = concat_write; -- concat->mtd.read_ecc = concat_read_ecc; -- concat->mtd.write_ecc = concat_write_ecc; -- concat->mtd.read_oob = concat_read_oob; -- concat->mtd.write_oob = concat_write_oob; -- concat->mtd.sync = concat_sync; -- concat->mtd.lock = concat_lock; -- concat->mtd.unlock = concat_unlock; -- concat->mtd.suspend = concat_suspend; -- concat->mtd.resume = concat_resume; -- -+ concat->mtd.erase = concat_erase; -+ concat->mtd.read = concat_read; -+ concat->mtd.write = concat_write; -+ concat->mtd.sync = concat_sync; -+ concat->mtd.lock = concat_lock; -+ concat->mtd.unlock = concat_unlock; -+ concat->mtd.suspend = concat_suspend; -+ concat->mtd.resume = concat_resume; - - /* - * Combine the erase block size info of the subdevices: -@@ -758,44 +760,44 @@ - */ - max_erasesize = curr_erasesize = subdev[0]->erasesize; - num_erase_region = 1; -- for(i = 0; i < num_devs; i++) -- { -- if(subdev[i]->numeraseregions == 0) -- { /* current subdevice has uniform erase size */ -- if(subdev[i]->erasesize != curr_erasesize) -- { /* if it differs from the last subdevice's erase size, count it */ -+ for (i = 0; i < num_devs; i++) { -+ if (subdev[i]->numeraseregions == 0) { -+ /* current subdevice has uniform erase size */ -+ if (subdev[i]->erasesize != curr_erasesize) { -+ /* if it differs from the last subdevice's erase size, count it */ - ++num_erase_region; - curr_erasesize = subdev[i]->erasesize; -- if(curr_erasesize > max_erasesize) -+ if (curr_erasesize > max_erasesize) - max_erasesize = curr_erasesize; - } -- } -- else -- { /* current subdevice has variable erase size */ -+ } else { -+ /* current subdevice has variable erase size */ - int j; -- for(j = 0; j < subdev[i]->numeraseregions; j++) -- { /* walk the list of erase regions, count any changes */ -- if(subdev[i]->eraseregions[j].erasesize != curr_erasesize) -- { -+ for (j = 0; j < subdev[i]->numeraseregions; j++) { -+ -+ /* walk the list of erase regions, count any changes */ -+ if (subdev[i]->eraseregions[j].erasesize != -+ curr_erasesize) { - ++num_erase_region; -- curr_erasesize = subdev[i]->eraseregions[j].erasesize; -- if(curr_erasesize > max_erasesize) -+ curr_erasesize = -+ subdev[i]->eraseregions[j]. -+ erasesize; -+ if (curr_erasesize > max_erasesize) - max_erasesize = curr_erasesize; - } - } - } - } - -- if(num_erase_region == 1) -- { /* -+ if (num_erase_region == 1) { -+ /* - * All subdevices have the same uniform erase size. - * This is easy: - */ - concat->mtd.erasesize = curr_erasesize; - concat->mtd.numeraseregions = 0; -- } -- else -- { /* -+ } else { -+ /* - * erase block size varies across the subdevices: allocate - * space to store the data describing the variable erase regions - */ -@@ -804,13 +806,14 @@ - - concat->mtd.erasesize = max_erasesize; - concat->mtd.numeraseregions = num_erase_region; -- concat->mtd.eraseregions = erase_region_p = kmalloc ( -- num_erase_region * sizeof(struct mtd_erase_region_info), GFP_KERNEL); -- if(!erase_region_p) -- { -+ concat->mtd.eraseregions = erase_region_p = -+ kmalloc(num_erase_region * -+ sizeof (struct mtd_erase_region_info), GFP_KERNEL); -+ if (!erase_region_p) { - kfree(concat); -- printk ("memory allocation error while creating erase region list" -- " for device \"%s\"\n", name); -+ printk -+ ("memory allocation error while creating erase region list" -+ " for device \"%s\"\n", name); - return NULL; - } - -@@ -820,46 +823,53 @@ - */ - curr_erasesize = subdev[0]->erasesize; - begin = position = 0; -- for(i = 0; i < num_devs; i++) -- { -- if(subdev[i]->numeraseregions == 0) -- { /* current subdevice has uniform erase size */ -- if(subdev[i]->erasesize != curr_erasesize) -- { /* -+ for (i = 0; i < num_devs; i++) { -+ if (subdev[i]->numeraseregions == 0) { -+ /* current subdevice has uniform erase size */ -+ if (subdev[i]->erasesize != curr_erasesize) { -+ /* - * fill in an mtd_erase_region_info structure for the area - * we have walked so far: - */ -- erase_region_p->offset = begin; -- erase_region_p->erasesize = curr_erasesize; -- erase_region_p->numblocks = (position - begin) / curr_erasesize; -+ erase_region_p->offset = begin; -+ erase_region_p->erasesize = -+ curr_erasesize; -+ erase_region_p->numblocks = -+ (position - begin) / curr_erasesize; - begin = position; - - curr_erasesize = subdev[i]->erasesize; - ++erase_region_p; - } - position += subdev[i]->size; -- } -- else -- { /* current subdevice has variable erase size */ -+ } else { -+ /* current subdevice has variable erase size */ - int j; -- for(j = 0; j < subdev[i]->numeraseregions; j++) -- { /* walk the list of erase regions, count any changes */ -- if(subdev[i]->eraseregions[j].erasesize != curr_erasesize) -- { -- erase_region_p->offset = begin; -- erase_region_p->erasesize = curr_erasesize; -- erase_region_p->numblocks = (position - begin) / curr_erasesize; -+ for (j = 0; j < subdev[i]->numeraseregions; j++) { -+ /* walk the list of erase regions, count any changes */ -+ if (subdev[i]->eraseregions[j]. -+ erasesize != curr_erasesize) { -+ erase_region_p->offset = begin; -+ erase_region_p->erasesize = -+ curr_erasesize; -+ erase_region_p->numblocks = -+ (position - -+ begin) / curr_erasesize; - begin = position; - -- curr_erasesize = subdev[i]->eraseregions[j].erasesize; -+ curr_erasesize = -+ subdev[i]->eraseregions[j]. -+ erasesize; - ++erase_region_p; - } -- position += subdev[i]->eraseregions[j].numblocks * curr_erasesize; -+ position += -+ subdev[i]->eraseregions[j]. -+ numblocks * curr_erasesize; - } - } - } - /* Now write the final entry */ -- erase_region_p->offset = begin; -+ erase_region_p->offset = begin; - erase_region_p->erasesize = curr_erasesize; - erase_region_p->numblocks = (position - begin) / curr_erasesize; - } -@@ -874,16 +884,14 @@ - void mtd_concat_destroy(struct mtd_info *mtd) - { - struct mtd_concat *concat = CONCAT(mtd); -- if(concat->mtd.numeraseregions) -+ if (concat->mtd.numeraseregions) - kfree(concat->mtd.eraseregions); - kfree(concat); - } - -- - EXPORT_SYMBOL(mtd_concat_create); - EXPORT_SYMBOL(mtd_concat_destroy); - -- - MODULE_LICENSE("GPL"); - MODULE_AUTHOR("Robert Kaiser <rkaiser@sysgo.de>"); - MODULE_DESCRIPTION("Generic support for concatenating of MTD devices"); -Index: linux-2.6.5/drivers/mtd/mtdcore.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/mtdcore.c 2005-02-01 16:55:50.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/mtdcore.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,12 +1,11 @@ - /* -- * $Id: mtdcore.c,v 1.39 2003/05/21 15:15:03 dwmw2 Exp $ -+ * $Id: mtdcore.c,v 1.43 2004/07/23 15:20:46 dwmw2 Exp $ - * - * Core registration and callback routines for MTD - * drivers and users. - * - */ - --#include <linux/version.h> - #include <linux/config.h> - #include <linux/module.h> - #include <linux/kernel.h> -@@ -59,7 +58,6 @@ - mtd_table[i] = mtd; - mtd->index = i; - mtd->usecount = 0; -- init_MUTEX(&mtd->mutex); - - DEBUG(0, "mtd: Giving out device %d to %s\n",i, mtd->name); - /* No need to get a refcount on the module containing -@@ -233,7 +231,7 @@ - * dont implement their own - */ - --int default_mtd_writev(struct mtd_info *mtd, const struct iovec *vecs, -+int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, - unsigned long count, loff_t to, size_t *retlen) - { - unsigned long i; -@@ -263,7 +261,7 @@ - * implement their own - */ - --int default_mtd_readv(struct mtd_info *mtd, struct iovec *vecs, -+int default_mtd_readv(struct mtd_info *mtd, struct kvec *vecs, - unsigned long count, loff_t from, size_t *retlen) - { - unsigned long i; -@@ -335,10 +333,7 @@ - /* Support for /proc/mtd */ - - #ifdef CONFIG_PROC_FS -- --#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,0) - static struct proc_dir_entry *proc_mtd; --#endif - - static inline int mtd_proc_info (char *buf, int i) - { -@@ -351,13 +346,8 @@ - this->erasesize, this->name); - } - --static int mtd_read_proc ( char *page, char **start, off_t off,int count --#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,0) -- ,int *eof, void *data_unused --#else -- ,int unused --#endif -- ) -+static int mtd_read_proc (char *page, char **start, off_t off, int count, -+ int *eof, void *data_unused) - { - int len, l, i; - off_t begin = 0; -@@ -377,9 +367,7 @@ - } - } - --#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,0) - *eof = 1; --#endif - - done: - up(&mtd_table_mutex); -@@ -389,18 +377,6 @@ - return ((count < begin+len-off) ? count : begin+len-off); - } - --#if LINUX_VERSION_CODE < KERNEL_VERSION(2,2,0) --struct proc_dir_entry mtd_proc_entry = { -- 0, /* low_ino: the inode -- dynamic */ -- 3, "mtd", /* len of name and name */ -- S_IFREG | S_IRUGO, /* mode */ -- 1, 0, 0, /* nlinks, owner, group */ -- 0, NULL, /* size - unused; operations -- use default */ -- &mtd_read_proc, /* function used to read data */ -- /* nothing more */ -- }; --#endif -- - #endif /* CONFIG_PROC_FS */ - - /*====================================================================*/ -@@ -409,16 +385,8 @@ - int __init init_mtd(void) - { - #ifdef CONFIG_PROC_FS --#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,0) -- if ((proc_mtd = create_proc_entry( "mtd", 0, 0 ))) -- proc_mtd->read_proc = mtd_read_proc; --#else -- proc_register_dynamic(&proc_root,&mtd_proc_entry); --#endif --#endif -- --#if LINUX_VERSION_CODE < 0x20212 -- init_mtd_devices(); -+ if ((proc_mtd = create_proc_entry( "mtd", 0, NULL ))) -+ proc_mtd->read_proc = mtd_read_proc; - #endif - - #ifdef CONFIG_PM -@@ -437,12 +405,8 @@ - #endif - - #ifdef CONFIG_PROC_FS --#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,0) - if (proc_mtd) -- remove_proc_entry( "mtd", 0); --#else -- proc_unregister(&proc_root,mtd_proc_entry.low_ino); --#endif -+ remove_proc_entry( "mtd", NULL); - #endif - } - -Index: linux-2.6.5/drivers/mtd/mtdpart.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/mtdpart.c 2004-04-03 22:37:38.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/mtdpart.c 2005-02-01 17:11:17.000000000 -0500 -@@ -5,7 +5,7 @@ - * - * This code is GPL - * -- * $Id: mtdpart.c,v 1.41 2003/06/18 14:53:02 dwmw2 Exp $ -+ * $Id: mtdpart.c,v 1.50 2004/08/10 16:18:34 dwmw2 Exp $ - * - * 02-21-2002 Thomas Gleixner <gleixner@autronix.de> - * added support for read_oob, write_oob -@@ -182,7 +182,7 @@ - len, retlen, buf); - } - --static int part_writev (struct mtd_info *mtd, const struct iovec *vecs, -+static int part_writev (struct mtd_info *mtd, const struct kvec *vecs, - unsigned long count, loff_t to, size_t *retlen) - { - struct mtd_part *part = PART(mtd); -@@ -197,7 +197,7 @@ - NULL, &mtd->oobinfo); - } - --static int part_readv (struct mtd_info *mtd, struct iovec *vecs, -+static int part_readv (struct mtd_info *mtd, struct kvec *vecs, - unsigned long count, loff_t from, size_t *retlen) - { - struct mtd_part *part = PART(mtd); -@@ -210,7 +210,7 @@ - NULL, &mtd->oobinfo); - } - --static int part_writev_ecc (struct mtd_info *mtd, const struct iovec *vecs, -+static int part_writev_ecc (struct mtd_info *mtd, const struct kvec *vecs, - unsigned long count, loff_t to, size_t *retlen, - u_char *eccbuf, struct nand_oobinfo *oobsel) - { -@@ -224,7 +224,7 @@ - eccbuf, oobsel); - } - --static int part_readv_ecc (struct mtd_info *mtd, struct iovec *vecs, -+static int part_readv_ecc (struct mtd_info *mtd, struct kvec *vecs, - unsigned long count, loff_t from, size_t *retlen, - u_char *eccbuf, struct nand_oobinfo *oobsel) - { -@@ -239,13 +239,29 @@ - static int part_erase (struct mtd_info *mtd, struct erase_info *instr) - { - struct mtd_part *part = PART(mtd); -+ int ret; - if (!(mtd->flags & MTD_WRITEABLE)) - return -EROFS; - if (instr->addr >= mtd->size) - return -EINVAL; - instr->addr += part->offset; -- return part->master->erase(part->master, instr); -+ ret = part->master->erase(part->master, instr); -+ return ret; -+} -+ -+void mtd_erase_callback(struct erase_info *instr) -+{ -+ if (instr->mtd->erase == part_erase) { -+ struct mtd_part *part = PART(instr->mtd); -+ -+ if (instr->fail_addr != 0xffffffff) -+ instr->fail_addr -= part->offset; -+ instr->addr -= part->offset; -+ } -+ if (instr->callback) -+ instr->callback(instr); - } -+EXPORT_SYMBOL_GPL(mtd_erase_callback); - - static int part_lock (struct mtd_info *mtd, loff_t ofs, size_t len) - { -@@ -281,6 +297,26 @@ - part->master->resume(part->master); - } - -+static int part_block_isbad (struct mtd_info *mtd, loff_t ofs) -+{ -+ struct mtd_part *part = PART(mtd); -+ if (ofs >= mtd->size) -+ return -EINVAL; -+ ofs += part->offset; -+ return part->master->block_isbad(part->master, ofs); -+} -+ -+static int part_block_markbad (struct mtd_info *mtd, loff_t ofs) -+{ -+ struct mtd_part *part = PART(mtd); -+ if (!(mtd->flags & MTD_WRITEABLE)) -+ return -EROFS; -+ if (ofs >= mtd->size) -+ return -EINVAL; -+ ofs += part->offset; -+ return part->master->block_markbad(part->master, ofs); -+} -+ - /* - * This function unregisters and destroy all slave MTD objects which are - * attached to the given master MTD object. -@@ -316,7 +352,7 @@ - */ - - int add_mtd_partitions(struct mtd_info *master, -- struct mtd_partition *parts, -+ const struct mtd_partition *parts, - int nbparts) - { - struct mtd_part *slave; -@@ -391,6 +427,10 @@ - slave->mtd.lock = part_lock; - if (master->unlock) - slave->mtd.unlock = part_unlock; -+ if (master->block_isbad) -+ slave->mtd.block_isbad = part_block_isbad; -+ if (master->block_markbad) -+ slave->mtd.block_markbad = part_block_markbad; - slave->mtd.erase = part_erase; - slave->master = master; - slave->offset = parts[i].offset; -@@ -461,6 +501,9 @@ - parts[i].name); - } - -+ /* copy oobinfo from master */ -+ memcpy(&slave->mtd.oobinfo, &master->oobinfo, sizeof(slave->mtd.oobinfo)); -+ - if(parts[i].mtdp) - { /* store the object pointer (caller may or may not register it */ - *parts[i].mtdp = &slave->mtd; -Index: linux-2.6.5/drivers/mtd/nand/Kconfig -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/Kconfig 2004-04-03 22:37:41.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/Kconfig 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - # drivers/mtd/nand/Kconfig --# $Id: Kconfig,v 1.4 2003/05/28 10:04:23 dwmw2 Exp $ -+# $Id: Kconfig,v 1.19 2004/09/16 23:23:42 gleixner Exp $ - - menu "NAND Flash Device Drivers" - depends on MTD!=n -@@ -9,8 +9,8 @@ - depends on MTD - help - This enables support for accessing all type of NAND flash -- devices with an 8-bit data bus interface. For further -- information see www.linux-mtd.infradead.org/tech/nand.html. -+ devices. For further information see -+ <http://www.linux-mtd.infradead.org/tech/nand.html>. - - config MTD_NAND_VERIFY_WRITE - bool "Verify NAND page writes" -@@ -36,16 +36,129 @@ - This enables the driver for the Cirrus Logic EBD7312 evaluation - board to access the onboard NAND Flash. - -+config MTD_NAND_H1900 -+ tristate "iPAQ H1900 flash" -+ depends on ARM && MTD_NAND && ARCH_PXA && MTD_PARTITIONS -+ help -+ This enables the driver for the iPAQ h1900 flash. -+ - config MTD_NAND_SPIA - tristate "NAND Flash device on SPIA board" - depends on ARM && ARCH_P720T && MTD_NAND - help - If you had to ask, you don't have one. Say 'N'. - -+config MTD_NAND_TOTO -+ tristate "NAND Flash device on TOTO board" -+ depends on ARM && ARCH_OMAP && MTD_NAND -+ help -+ Support for NAND flash on Texas Instruments Toto platform. -+ - config MTD_NAND_IDS - tristate - default y if MTD_NAND = y || MTD_DOC2000 = y || MTD_DOC2001 = y || MTD_DOC2001PLUS = y - default m if MTD_NAND = m || MTD_DOC2000 = m || MTD_DOC2001 = m || MTD_DOC2001PLUS = m -- --endmenu - -+config MTD_NAND_TX4925NDFMC -+ tristate "SmartMedia Card on Toshiba RBTX4925 reference board" -+ depends on TOSHIBA_RBTX4925 && MTD_NAND && TOSHIBA_RBTX4925_MPLEX_NAND -+ help -+ This enables the driver for the NAND flash device found on the -+ Toshiba RBTX4925 reference board, which is a SmartMediaCard. -+ -+config MTD_NAND_TX4938NDFMC -+ tristate "NAND Flash device on Toshiba RBTX4938 reference board" -+ depends on TOSHIBA_RBTX4938 && MTD_NAND && TOSHIBA_RBTX4938_MPLEX_NAND -+ help -+ This enables the driver for the NAND flash device found on the -+ Toshiba RBTX4938 reference board. -+ -+config MTD_NAND_AU1550 -+ tristate "Au1550 NAND support" -+ depends on SOC_AU1550 && MTD_NAND -+ help -+ This enables the driver for the NAND flash controller on the -+ AMD/Alchemy 1550 SOC. -+ -+config MTD_NAND_RTC_FROM4 -+ tristate "Renesas Flash ROM 4-slot interface board (FROM_BOARD4)" -+ depends on MTD_NAND && SH_SOLUTION_ENGINE -+ help -+ This enables the driver for the Renesas Technology AG-AND -+ flash interface board (FROM_BOARD4) -+ -+config MTD_NAND_PPCHAMELEONEVB -+ tristate "NAND Flash device on PPChameleonEVB board" -+ depends on PPCHAMELEONEVB && MTD_NAND -+ help -+ This enables the NAND flash driver on the PPChameleon EVB Board. -+ -+config MTD_NAND_DISKONCHIP -+ tristate "DiskOnChip 2000, Millennium and Millennium Plus (NAND reimplementation) (EXPERIMENTAL)" -+ depends on MTD_NAND && EXPERIMENTAL -+ select REED_SOLOMON -+ help -+ This is a reimplementation of M-Systems DiskOnChip 2000, -+ Millennium and Millennium Plus as a standard NAND device driver, -+ as opposed to the earlier self-contained MTD device drivers. -+ This should enable, among other things, proper JFFS2 operation on -+ these devices. -+ -+config MTD_NAND_DISKONCHIP_PROBE_ADVANCED -+ bool "Advanced detection options for DiskOnChip" -+ depends on MTD_NAND_DISKONCHIP -+ help -+ This option allows you to specify nonstandard address at which to -+ probe for a DiskOnChip, or to change the detection options. You -+ are unlikely to need any of this unless you are using LinuxBIOS. -+ Say 'N'. -+ -+config MTD_NAND_DISKONCHIP_PROBE_ADDRESS -+ hex "Physical address of DiskOnChip" if MTD_NAND_DISKONCHIP_PROBE_ADVANCED -+ depends on MTD_NAND_DISKONCHIP -+ default "0" -+ ---help--- -+ By default, the probe for DiskOnChip devices will look for a -+ DiskOnChip at every multiple of 0x2000 between 0xC8000 and 0xEE000. -+ This option allows you to specify a single address at which to probe -+ for the device, which is useful if you have other devices in that -+ range which get upset when they are probed. -+ -+ (Note that on PowerPC, the normal probe will only check at -+ 0xE4000000.) -+ -+ Normally, you should leave this set to zero, to allow the probe at -+ the normal addresses. -+ -+config MTD_NAND_DISKONCHIP_PROBE_HIGH -+ bool "Probe high addresses" -+ depends on MTD_NAND_DISKONCHIP_PROBE_ADVANCED -+ help -+ By default, the probe for DiskOnChip devices will look for a -+ DiskOnChip at every multiple of 0x2000 between 0xC8000 and 0xEE000. -+ This option changes to make it probe between 0xFFFC8000 and -+ 0xFFFEE000. Unless you are using LinuxBIOS, this is unlikely to be -+ useful to you. Say 'N'. -+ -+config MTD_NAND_DISKONCHIP_BBTWRITE -+ bool "Allow BBT writes on DiskOnChip Millennium and 2000TSOP" -+ depends on MTD_NAND_DISKONCHIP -+ help -+ On DiskOnChip devices shipped with the INFTL filesystem (Millennium -+ and 2000 TSOP/Alon), Linux reserves some space at the end of the -+ device for the Bad Block Table (BBT). If you have existing INFTL -+ data on your device (created by non-Linux tools such as M-Systems' -+ DOS drivers), your data might overlap the area Linux wants to use for -+ the BBT. If this is a concern for you, leave this option disabled and -+ Linux will not write BBT data into this area. -+ The downside of leaving this option disabled is that if bad blocks -+ are detected by Linux, they will not be recorded in the BBT, which -+ could cause future problems. -+ Once you enable this option, new filesystems (INFTL or others, created -+ in Linux or other operating systems) will not use the reserved area. -+ The only reason not to enable this option is to prevent damage to -+ preexisting filesystems. -+ Even if you leave this disabled, you can enable BBT writes at module -+ load time (assuming you build diskonchip as a module) with the module -+ parameter "inftl_bbt_write=1". -+endmenu -Index: linux-2.6.5/drivers/mtd/nand/Makefile -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/Makefile 2004-04-03 22:36:14.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/Makefile 2005-02-01 17:11:17.000000000 -0500 -@@ -1,10 +1,21 @@ - # - # linux/drivers/nand/Makefile - # --# $Id: Makefile.common,v 1.2 2003/05/28 11:38:54 dwmw2 Exp $ -+# $Id: Makefile.common,v 1.11 2004/09/16 23:23:42 gleixner Exp $ - --obj-$(CONFIG_MTD_NAND) += nand.o nand_ecc.o --obj-$(CONFIG_MTD_NAND_SPIA) += spia.o --obj-$(CONFIG_MTD_NAND_AUTCPU12) += autcpu12.o --obj-$(CONFIG_MTD_NAND_EDB7312) += edb7312.o --obj-$(CONFIG_MTD_NAND_IDS) += nand_ids.o -+obj-$(CONFIG_MTD_NAND) += nand.o nand_ecc.o -+obj-$(CONFIG_MTD_NAND_IDS) += nand_ids.o -+ -+obj-$(CONFIG_MTD_NAND_SPIA) += spia.o -+obj-$(CONFIG_MTD_NAND_TOTO) += toto.o -+obj-$(CONFIG_MTD_NAND_AUTCPU12) += autcpu12.o -+obj-$(CONFIG_MTD_NAND_EDB7312) += edb7312.o -+obj-$(CONFIG_MTD_NAND_TX4925NDFMC) += tx4925ndfmc.o -+obj-$(CONFIG_MTD_NAND_TX4938NDFMC) += tx4938ndfmc.o -+obj-$(CONFIG_MTD_NAND_AU1550) += au1550nd.o -+obj-$(CONFIG_MTD_NAND_PPCHAMELEONEVB) += ppchameleonevb.o -+obj-$(CONFIG_MTD_NAND_DISKONCHIP) += diskonchip.o -+obj-$(CONFIG_MTD_NAND_H1900) += h1910.o -+obj-$(CONFIG_MTD_NAND_FROM4) += rtc_from4.o -+ -+nand-objs = nand_base.o nand_bbt.o -Index: linux-2.6.5/drivers/mtd/nand/au1550nd.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/au1550nd.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/au1550nd.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,481 @@ -+/* -+ * drivers/mtd/nand/au1550nd.c -+ * -+ * Copyright (C) 2004 Embedded Edge, LLC -+ * -+ * $Id: au1550nd.c,v 1.8 2004/09/16 23:27:14 gleixner Exp $ -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ */ -+ -+#include <linux/slab.h> -+#include <linux/init.h> -+#include <linux/module.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/nand.h> -+#include <linux/mtd/partitions.h> -+#include <asm/io.h> -+#include <asm/au1000.h> -+#ifdef CONFIG_MIPS_PB1550 -+#include <asm/pb1550.h> -+#endif -+#ifdef CONFIG_MIPS_DB1550 -+#include <asm/db1x00.h> -+#endif -+ -+ -+/* -+ * MTD structure for NAND controller -+ */ -+static struct mtd_info *au1550_mtd = NULL; -+static void __iomem *p_nand; -+static int nand_width = 1; /* default x8*/ -+ -+/* -+ * Define partitions for flash device -+ */ -+const static struct mtd_partition partition_info[] = { -+#ifdef CONFIG_MIPS_PB1550 -+#define NUM_PARTITIONS 2 -+ { -+ .name = "Pb1550 NAND FS 0", -+ .offset = 0, -+ .size = 8*1024*1024 -+ }, -+ { -+ .name = "Pb1550 NAND FS 1", -+ .offset = MTDPART_OFS_APPEND, -+ .size = MTDPART_SIZ_FULL -+ } -+#endif -+#ifdef CONFIG_MIPS_DB1550 -+#define NUM_PARTITIONS 2 -+ { -+ .name = "Db1550 NAND FS 0", -+ .offset = 0, -+ .size = 8*1024*1024 -+ }, -+ { -+ .name = "Db1550 NAND FS 1", -+ .offset = MTDPART_OFS_APPEND, -+ .size = MTDPART_SIZ_FULL -+ } -+#endif -+}; -+ -+ -+/** -+ * au_read_byte - read one byte from the chip -+ * @mtd: MTD device structure -+ * -+ * read function for 8bit buswith -+ */ -+static u_char au_read_byte(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ u_char ret = readb(this->IO_ADDR_R); -+ au_sync(); -+ return ret; -+} -+ -+/** -+ * au_write_byte - write one byte to the chip -+ * @mtd: MTD device structure -+ * @byte: pointer to data byte to write -+ * -+ * write function for 8it buswith -+ */ -+static void au_write_byte(struct mtd_info *mtd, u_char byte) -+{ -+ struct nand_chip *this = mtd->priv; -+ writeb(byte, this->IO_ADDR_W); -+ au_sync(); -+} -+ -+/** -+ * au_read_byte16 - read one byte endianess aware from the chip -+ * @mtd: MTD device structure -+ * -+ * read function for 16bit buswith with -+ * endianess conversion -+ */ -+static u_char au_read_byte16(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ u_char ret = (u_char) cpu_to_le16(readw(this->IO_ADDR_R)); -+ au_sync(); -+ return ret; -+} -+ -+/** -+ * au_write_byte16 - write one byte endianess aware to the chip -+ * @mtd: MTD device structure -+ * @byte: pointer to data byte to write -+ * -+ * write function for 16bit buswith with -+ * endianess conversion -+ */ -+static void au_write_byte16(struct mtd_info *mtd, u_char byte) -+{ -+ struct nand_chip *this = mtd->priv; -+ writew(le16_to_cpu((u16) byte), this->IO_ADDR_W); -+ au_sync(); -+} -+ -+/** -+ * au_read_word - read one word from the chip -+ * @mtd: MTD device structure -+ * -+ * read function for 16bit buswith without -+ * endianess conversion -+ */ -+static u16 au_read_word(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ u16 ret = readw(this->IO_ADDR_R); -+ au_sync(); -+ return ret; -+} -+ -+/** -+ * au_write_word - write one word to the chip -+ * @mtd: MTD device structure -+ * @word: data word to write -+ * -+ * write function for 16bit buswith without -+ * endianess conversion -+ */ -+static void au_write_word(struct mtd_info *mtd, u16 word) -+{ -+ struct nand_chip *this = mtd->priv; -+ writew(word, this->IO_ADDR_W); -+ au_sync(); -+} -+ -+/** -+ * au_write_buf - write buffer to chip -+ * @mtd: MTD device structure -+ * @buf: data buffer -+ * @len: number of bytes to write -+ * -+ * write function for 8bit buswith -+ */ -+static void au_write_buf(struct mtd_info *mtd, const u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ -+ for (i=0; i<len; i++) { -+ writeb(buf[i], this->IO_ADDR_W); -+ au_sync(); -+ } -+} -+ -+/** -+ * au_read_buf - read chip data into buffer -+ * @mtd: MTD device structure -+ * @buf: buffer to store date -+ * @len: number of bytes to read -+ * -+ * read function for 8bit buswith -+ */ -+static void au_read_buf(struct mtd_info *mtd, u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ -+ for (i=0; i<len; i++) { -+ buf[i] = readb(this->IO_ADDR_R); -+ au_sync(); -+ } -+} -+ -+/** -+ * au_verify_buf - Verify chip data against buffer -+ * @mtd: MTD device structure -+ * @buf: buffer containing the data to compare -+ * @len: number of bytes to compare -+ * -+ * verify function for 8bit buswith -+ */ -+static int au_verify_buf(struct mtd_info *mtd, const u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ -+ for (i=0; i<len; i++) { -+ if (buf[i] != readb(this->IO_ADDR_R)) -+ return -EFAULT; -+ au_sync(); -+ } -+ -+ return 0; -+} -+ -+/** -+ * au_write_buf16 - write buffer to chip -+ * @mtd: MTD device structure -+ * @buf: data buffer -+ * @len: number of bytes to write -+ * -+ * write function for 16bit buswith -+ */ -+static void au_write_buf16(struct mtd_info *mtd, const u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ u16 *p = (u16 *) buf; -+ len >>= 1; -+ -+ for (i=0; i<len; i++) { -+ writew(p[i], this->IO_ADDR_W); -+ au_sync(); -+ } -+ -+} -+ -+/** -+ * au_read_buf16 - read chip data into buffer -+ * @mtd: MTD device structure -+ * @buf: buffer to store date -+ * @len: number of bytes to read -+ * -+ * read function for 16bit buswith -+ */ -+static void au_read_buf16(struct mtd_info *mtd, u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ u16 *p = (u16 *) buf; -+ len >>= 1; -+ -+ for (i=0; i<len; i++) { -+ p[i] = readw(this->IO_ADDR_R); -+ au_sync(); -+ } -+} -+ -+/** -+ * au_verify_buf16 - Verify chip data against buffer -+ * @mtd: MTD device structure -+ * @buf: buffer containing the data to compare -+ * @len: number of bytes to compare -+ * -+ * verify function for 16bit buswith -+ */ -+static int au_verify_buf16(struct mtd_info *mtd, const u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ u16 *p = (u16 *) buf; -+ len >>= 1; -+ -+ for (i=0; i<len; i++) { -+ if (p[i] != readw(this->IO_ADDR_R)) -+ return -EFAULT; -+ au_sync(); -+ } -+ return 0; -+} -+ -+ -+static void au1550_hwcontrol(struct mtd_info *mtd, int cmd) -+{ -+ register struct nand_chip *this = mtd->priv; -+ -+ switch(cmd){ -+ -+ case NAND_CTL_SETCLE: this->IO_ADDR_W = p_nand + MEM_STNAND_CMD; break; -+ case NAND_CTL_CLRCLE: this->IO_ADDR_W = p_nand + MEM_STNAND_DATA; break; -+ -+ case NAND_CTL_SETALE: this->IO_ADDR_W = p_nand + MEM_STNAND_ADDR; break; -+ case NAND_CTL_CLRALE: -+ this->IO_ADDR_W = p_nand + MEM_STNAND_DATA; -+ /* FIXME: Nobody knows why this is neccecary, -+ * but it works onlythat way */ -+ udelay(1); -+ break; -+ -+ case NAND_CTL_SETNCE: -+ /* assert (force assert) chip enable */ -+ au_writel(au_readl(MEM_STNDCTL) | 0x20 , MEM_STNDCTL); -+ break; -+ -+ case NAND_CTL_CLRNCE: -+ /* deassert chip enable */ -+ au_writel(au_readl(MEM_STNDCTL) & ~0x20 , MEM_STNDCTL); -+ break; -+ } -+ -+ this->IO_ADDR_R = this->IO_ADDR_W; -+ -+ /* Drain the writebuffer */ -+ au_sync(); -+} -+ -+int au1550_device_ready(struct mtd_info *mtd) -+{ -+ int ret = (au_readl(MEM_STSTAT) & 0x1) ? 1 : 0; -+ au_sync(); -+ return ret; -+} -+ -+/* -+ * Main initialization routine -+ */ -+int __init au1550_init (void) -+{ -+ struct nand_chip *this; -+ u16 boot_swapboot = 0; /* default value */ -+ u32 mem_time; -+ int retval; -+ -+ /* Allocate memory for MTD device structure and private data */ -+ au1550_mtd = kmalloc (sizeof(struct mtd_info) + -+ sizeof (struct nand_chip), GFP_KERNEL); -+ if (!au1550_mtd) { -+ printk ("Unable to allocate NAND MTD dev structure.\n"); -+ return -ENOMEM; -+ } -+ -+ /* Get pointer to private data */ -+ this = (struct nand_chip *) (&au1550_mtd[1]); -+ -+ /* Initialize structures */ -+ memset((char *) au1550_mtd, 0, sizeof(struct mtd_info)); -+ memset((char *) this, 0, sizeof(struct nand_chip)); -+ -+ /* Link the private data with the MTD structure */ -+ au1550_mtd->priv = this; -+ -+ /* disable interrupts */ -+ au_writel(au_readl(MEM_STNDCTL) & ~(1<<8), MEM_STNDCTL); -+ -+ /* disable NAND boot */ -+ au_writel(au_readl(MEM_STNDCTL) & ~(1<<0), MEM_STNDCTL); -+ -+#ifdef CONFIG_MIPS_PB1550 -+ /* set gpio206 high */ -+ au_writel(au_readl(GPIO2_DIR) & ~(1<<6), GPIO2_DIR); -+ -+ boot_swapboot = (au_readl(MEM_STSTAT) & (0x7<<1)) | -+ ((bcsr->status >> 6) & 0x1); -+ switch (boot_swapboot) { -+ case 0: -+ case 2: -+ case 8: -+ case 0xC: -+ case 0xD: -+ /* x16 NAND Flash */ -+ nand_width = 0; -+ break; -+ case 1: -+ case 9: -+ case 3: -+ case 0xE: -+ case 0xF: -+ /* x8 NAND Flash */ -+ nand_width = 1; -+ break; -+ default: -+ printk("Pb1550 NAND: bad boot:swap\n"); -+ retval = -EINVAL; -+ goto outmem; -+ } -+ -+ /* Configure RCE1 - should be done by YAMON */ -+ au_writel(0x5 | (nand_width << 22), MEM_STCFG1); -+ au_writel(NAND_TIMING, MEM_STTIME1); -+ mem_time = au_readl(MEM_STTIME1); -+ au_sync(); -+ -+ /* setup and enable chip select */ -+ /* we really need to decode offsets only up till 0x20 */ -+ au_writel((1<<28) | (NAND_PHYS_ADDR>>4) | -+ (((NAND_PHYS_ADDR + 0x1000)-1) & (0x3fff<<18)>>18), -+ MEM_STADDR1); -+ au_sync(); -+#endif -+ -+#ifdef CONFIG_MIPS_DB1550 -+ /* FIXME: should be done by the bootloader -+ * -+ * tglx: stcfg1 was set to 0x00400005. I changed -+ * this as it does not work with all chips. -+ * someone should look into the correct timing -+ * values, as bit 8 does a clock / 4 prescale -+ */ -+ au_writel(0x00400105, MEM_STCFG1); -+ au_writel(0x00007774, MEM_STTIME1); -+ au_writel(0x12003FFF, MEM_STADDR1); -+#endif -+ -+ p_nand = (void __iomem *)ioremap(NAND_PHYS_ADDR, 0x1000); -+ -+ /* Set address of hardware control function */ -+ this->hwcontrol = au1550_hwcontrol; -+ this->dev_ready = au1550_device_ready; -+ /* 30 us command delay time */ -+ this->chip_delay = 30; -+ this->eccmode = NAND_ECC_SOFT; -+ -+ this->options = NAND_NO_AUTOINCR; -+ -+ if (!nand_width) -+ this->options |= NAND_BUSWIDTH_16; -+ -+ this->read_byte = (!nand_width) ? au_read_byte16 : au_read_byte; -+ this->write_byte = (!nand_width) ? au_write_byte16 : au_write_byte; -+ this->write_word = au_write_word; -+ this->read_word = au_read_word; -+ this->write_buf = (!nand_width) ? au_write_buf16 : au_write_buf; -+ this->read_buf = (!nand_width) ? au_read_buf16 : au_read_buf; -+ this->verify_buf = (!nand_width) ? au_verify_buf16 : au_verify_buf; -+ -+ /* Scan to find existence of the device */ -+ if (nand_scan (au1550_mtd, 1)) { -+ retval = -ENXIO; -+ goto outio; -+ } -+ -+ /* Register the partitions */ -+ add_mtd_partitions(au1550_mtd, partition_info, NUM_PARTITIONS); -+ -+ return 0; -+ -+ outio: -+ iounmap ((void *)p_nand); -+ -+ outmem: -+ kfree (au1550_mtd); -+ return retval; -+} -+ -+module_init(au1550_init); -+ -+/* -+ * Clean up routine -+ */ -+#ifdef MODULE -+static void __exit au1550_cleanup (void) -+{ -+ struct nand_chip *this = (struct nand_chip *) &au1550_mtd[1]; -+ -+ /* Release resources, unregister device */ -+ nand_release (au1550_mtd); -+ -+ /* Free the MTD device structure */ -+ kfree (au1550_mtd); -+ -+ /* Unmap */ -+ iounmap ((void *)p_nand); -+} -+module_exit(au1550_cleanup); -+#endif -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Embedded Edge, LLC"); -+MODULE_DESCRIPTION("Board-specific glue layer for NAND flash on Pb1550 board"); -Index: linux-2.6.5/drivers/mtd/nand/autcpu12.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/autcpu12.c 2004-04-03 22:36:25.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/autcpu12.c 2005-02-01 17:11:17.000000000 -0500 -@@ -6,7 +6,7 @@ - * Derived from drivers/mtd/spia.c - * Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com) - * -- * $Id: autcpu12.c,v 1.11 2003/06/04 17:04:09 gleixner Exp $ -+ * $Id: autcpu12.c,v 1.21 2004/09/16 23:27:14 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as -@@ -15,7 +15,7 @@ - * Overview: - * This is a device driver for the NAND flash device found on the - * autronix autcpu12 board, which is a SmartMediaCard. It supports -- * 16MB, 32MB and 64MB cards. -+ * 16MiB, 32MiB and 64MiB cards. - * - * - * 02-12-2002 TG Cleanup of module params -@@ -44,19 +44,11 @@ - */ - static struct mtd_info *autcpu12_mtd = NULL; - --/* -- * Module stuff -- */ --#if LINUX_VERSION_CODE < 0x20212 && defined(MODULE) --#define autcpu12_init init_module --#define autcpu12_cleanup cleanup_module --#endif -- - static int autcpu12_io_base = CS89712_VIRT_BASE; - static int autcpu12_fio_pbase = AUTCPU12_PHYS_SMC; - static int autcpu12_fio_ctrl = AUTCPU12_SMC_SELECT_OFFSET; - static int autcpu12_pedr = AUTCPU12_SMC_PORT_OFFSET; --static int autcpu12_fio_base; -+static void __iomem * autcpu12_fio_base; - - #ifdef MODULE - MODULE_PARM(autcpu12_fio_pbase, "i"); -@@ -71,42 +63,40 @@ - /* - * Define partitions for flash devices - */ --extern struct nand_oobinfo jffs2_oobinfo; -- - static struct mtd_partition partition_info16k[] = { -- { .name = "AUTCPU12 flash partition 1", -- .offset = 0, -- .size = 8 * SZ_1M }, -- { .name = "AUTCPU12 flash partition 2", -- .offset = 8 * SZ_1M, -- .size = 8 * SZ_1M }, -+ { .name = "AUTCPU12 flash partition 1", -+ .offset = 0, -+ .size = 8 * SZ_1M }, -+ { .name = "AUTCPU12 flash partition 2", -+ .offset = 8 * SZ_1M, -+ .size = 8 * SZ_1M }, - }; - - static struct mtd_partition partition_info32k[] = { -- { .name = "AUTCPU12 flash partition 1", -- .offset = 0, -- .size = 8 * SZ_1M }, -- { .name = "AUTCPU12 flash partition 2", -- .offset = 8 * SZ_1M, -- .size = 24 * SZ_1M }, -+ { .name = "AUTCPU12 flash partition 1", -+ .offset = 0, -+ .size = 8 * SZ_1M }, -+ { .name = "AUTCPU12 flash partition 2", -+ .offset = 8 * SZ_1M, -+ .size = 24 * SZ_1M }, - }; - - static struct mtd_partition partition_info64k[] = { -- { .name = "AUTCPU12 flash partition 1", -- .offset = 0, -- .size = 16 * SZ_1M }, -- { .name = "AUTCPU12 flash partition 2", -- .offset = 16 * SZ_1M, -- .size = 48 * SZ_1M }, -+ { .name = "AUTCPU12 flash partition 1", -+ .offset = 0, -+ .size = 16 * SZ_1M }, -+ { .name = "AUTCPU12 flash partition 2", -+ .offset = 16 * SZ_1M, -+ .size = 48 * SZ_1M }, - }; - - static struct mtd_partition partition_info128k[] = { -- { .name = "AUTCPU12 flash partition 1", -- .offset = 0, -- .size = 16 * SZ_1M }, -- { .name = "AUTCPU12 flash partition 2", -- .offset = 16 * SZ_1M, -- .size = 112 * SZ_1M }, -+ { .name = "AUTCPU12 flash partition 1", -+ .offset = 0, -+ .size = 16 * SZ_1M }, -+ { .name = "AUTCPU12 flash partition 2", -+ .offset = 16 * SZ_1M, -+ .size = 112 * SZ_1M }, - }; - - #define NUM_PARTITIONS16K 2 -@@ -116,7 +106,7 @@ - /* - * hardware specific access to control-lines - */ --void autcpu12_hwcontrol(int cmd) -+static void autcpu12_hwcontrol(struct mtd_info *mtd, int cmd) - { - - switch(cmd){ -@@ -135,12 +125,13 @@ - /* - * read device ready pin - */ --int autcpu12_device_ready(void) -+int autcpu12_device_ready(struct mtd_info *mtd) - { - - return ( (*(volatile unsigned char *) (autcpu12_io_base + autcpu12_pedr)) & AUTCPU12_SMC_RDY) ? 1 : 0; - - } -+ - /* - * Main initialization routine - */ -@@ -159,7 +150,7 @@ - } - - /* map physical adress */ -- autcpu12_fio_base=(unsigned long)ioremap(autcpu12_fio_pbase,SZ_1K); -+ autcpu12_fio_base=(void __iomem *)ioremap(autcpu12_fio_pbase,SZ_1K); - if(!autcpu12_fio_base){ - printk("Ioremap autcpu12 SmartMedia Card failed\n"); - err = -EIO; -@@ -185,20 +176,18 @@ - this->chip_delay = 20; - this->eccmode = NAND_ECC_SOFT; - -+ /* Enable the following for a flash based bad block table */ -+ /* -+ this->options = NAND_USE_FLASH_BBT; -+ */ -+ this->options = NAND_USE_FLASH_BBT; -+ - /* Scan to find existance of the device */ -- if (nand_scan (autcpu12_mtd)) { -+ if (nand_scan (autcpu12_mtd, 1)) { - err = -ENXIO; - goto out_ior; - } -- -- /* Allocate memory for internal data buffer */ -- this->data_buf = kmalloc (sizeof(u_char) * (autcpu12_mtd->oobblock + autcpu12_mtd->oobsize), GFP_KERNEL); -- if (!this->data_buf) { -- printk ("Unable to allocate NAND data buffer for AUTCPU12.\n"); -- err = -ENOMEM; -- goto out_ior; -- } -- -+ - /* Register the partitions */ - switch(autcpu12_mtd->size){ - case SZ_16M: add_mtd_partitions(autcpu12_mtd, partition_info16k, NUM_PARTITIONS16K); break; -@@ -208,13 +197,11 @@ - default: { - printk ("Unsupported SmartMedia device\n"); - err = -ENXIO; -- goto out_buf; -+ goto out_ior; - } - } - goto out; - --out_buf: -- kfree (this->data_buf); - out_ior: - iounmap((void *)autcpu12_fio_base); - out_mtd: -@@ -231,20 +218,12 @@ - #ifdef MODULE - static void __exit autcpu12_cleanup (void) - { -- struct nand_chip *this = (struct nand_chip *) &autcpu12_mtd[1]; -- -- /* Unregister partitions */ -- del_mtd_partitions(autcpu12_mtd); -- -- /* Unregister the device */ -- del_mtd_device (autcpu12_mtd); -- -- /* Free internal data buffers */ -- kfree (this->data_buf); -+ /* Release resources, unregister device */ -+ nand_release (autcpu12_mtd); - - /* unmap physical adress */ - iounmap((void *)autcpu12_fio_base); -- -+ - /* Free the MTD device structure */ - kfree (autcpu12_mtd); - } -Index: linux-2.6.5/drivers/mtd/nand/diskonchip.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/diskonchip.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/diskonchip.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,1637 @@ -+/* -+ * drivers/mtd/nand/diskonchip.c -+ * -+ * (C) 2003 Red Hat, Inc. -+ * (C) 2004 Dan Brown <dan_brown@ieee.org> -+ * (C) 2004 Kalev Lember <kalev@smartlink.ee> -+ * -+ * Author: David Woodhouse <dwmw2@infradead.org> -+ * Additional Diskonchip 2000 and Millennium support by Dan Brown <dan_brown@ieee.org> -+ * Diskonchip Millennium Plus support by Kalev Lember <kalev@smartlink.ee> -+ * -+ * Interface to generic NAND code for M-Systems DiskOnChip devices -+ * -+ * $Id: diskonchip.c,v 1.35 2004/09/16 23:27:14 gleixner Exp $ -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/init.h> -+#include <linux/sched.h> -+#include <linux/delay.h> -+#include <asm/io.h> -+ -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/nand.h> -+#include <linux/mtd/doc2000.h> -+#include <linux/mtd/compatmac.h> -+#include <linux/mtd/partitions.h> -+#include <linux/mtd/inftl.h> -+ -+/* Where to look for the devices? */ -+#ifndef CONFIG_MTD_DISKONCHIP_PROBE_ADDRESS -+#define CONFIG_MTD_DISKONCHIP_PROBE_ADDRESS 0 -+#endif -+ -+static unsigned long __initdata doc_locations[] = { -+#if defined (__alpha__) || defined(__i386__) || defined(__x86_64__) -+#ifdef CONFIG_MTD_DISKONCHIP_PROBE_HIGH -+ 0xfffc8000, 0xfffca000, 0xfffcc000, 0xfffce000, -+ 0xfffd0000, 0xfffd2000, 0xfffd4000, 0xfffd6000, -+ 0xfffd8000, 0xfffda000, 0xfffdc000, 0xfffde000, -+ 0xfffe0000, 0xfffe2000, 0xfffe4000, 0xfffe6000, -+ 0xfffe8000, 0xfffea000, 0xfffec000, 0xfffee000, -+#else /* CONFIG_MTD_DOCPROBE_HIGH */ -+ 0xc8000, 0xca000, 0xcc000, 0xce000, -+ 0xd0000, 0xd2000, 0xd4000, 0xd6000, -+ 0xd8000, 0xda000, 0xdc000, 0xde000, -+ 0xe0000, 0xe2000, 0xe4000, 0xe6000, -+ 0xe8000, 0xea000, 0xec000, 0xee000, -+#endif /* CONFIG_MTD_DOCPROBE_HIGH */ -+#elif defined(__PPC__) -+ 0xe4000000, -+#elif defined(CONFIG_MOMENCO_OCELOT) -+ 0x2f000000, -+ 0xff000000, -+#elif defined(CONFIG_MOMENCO_OCELOT_G) || defined (CONFIG_MOMENCO_OCELOT_C) -+ 0xff000000, -+##else -+#warning Unknown architecture for DiskOnChip. No default probe locations defined -+#endif -+ 0xffffffff }; -+ -+static struct mtd_info *doclist = NULL; -+ -+struct doc_priv { -+ void __iomem *virtadr; -+ unsigned long physadr; -+ u_char ChipID; -+ u_char CDSNControl; -+ int chips_per_floor; /* The number of chips detected on each floor */ -+ int curfloor; -+ int curchip; -+ int mh0_page; -+ int mh1_page; -+ struct mtd_info *nextdoc; -+}; -+ -+/* Max number of eraseblocks to scan (from start of device) for the (I)NFTL -+ MediaHeader. The spec says to just keep going, I think, but that's just -+ silly. */ -+#define MAX_MEDIAHEADER_SCAN 8 -+ -+/* This is the syndrome computed by the HW ecc generator upon reading an empty -+ page, one with all 0xff for data and stored ecc code. */ -+static u_char empty_read_syndrome[6] = { 0x26, 0xff, 0x6d, 0x47, 0x73, 0x7a }; -+/* This is the ecc value computed by the HW ecc generator upon writing an empty -+ page, one with all 0xff for data. */ -+static u_char empty_write_ecc[6] = { 0x4b, 0x00, 0xe2, 0x0e, 0x93, 0xf7 }; -+ -+#define INFTL_BBT_RESERVED_BLOCKS 4 -+ -+#define DoC_is_MillenniumPlus(doc) ((doc)->ChipID == DOC_ChipID_DocMilPlus16 || (doc)->ChipID == DOC_ChipID_DocMilPlus32) -+#define DoC_is_Millennium(doc) ((doc)->ChipID == DOC_ChipID_DocMil) -+#define DoC_is_2000(doc) ((doc)->ChipID == DOC_ChipID_Doc2k) -+ -+static void doc200x_hwcontrol(struct mtd_info *mtd, int cmd); -+static void doc200x_select_chip(struct mtd_info *mtd, int chip); -+ -+static int debug=0; -+MODULE_PARM(debug, "i"); -+ -+static int try_dword=1; -+MODULE_PARM(try_dword, "i"); -+ -+static int no_ecc_failures=0; -+MODULE_PARM(no_ecc_failures, "i"); -+ -+#ifdef CONFIG_MTD_PARTITIONS -+static int no_autopart=0; -+MODULE_PARM(no_autopart, "i"); -+#endif -+ -+#ifdef MTD_NAND_DISKONCHIP_BBTWRITE -+static int inftl_bbt_write=1; -+#else -+static int inftl_bbt_write=0; -+#endif -+MODULE_PARM(inftl_bbt_write, "i"); -+ -+static unsigned long doc_config_location = CONFIG_MTD_DISKONCHIP_PROBE_ADDRESS; -+MODULE_PARM(doc_config_location, "l"); -+MODULE_PARM_DESC(doc_config_location, "Physical memory address at which to probe for DiskOnChip"); -+ -+static void DoC_Delay(struct doc_priv *doc, unsigned short cycles) -+{ -+ volatile char dummy; -+ int i; -+ -+ for (i = 0; i < cycles; i++) { -+ if (DoC_is_Millennium(doc)) -+ dummy = ReadDOC(doc->virtadr, NOP); -+ else if (DoC_is_MillenniumPlus(doc)) -+ dummy = ReadDOC(doc->virtadr, Mplus_NOP); -+ else -+ dummy = ReadDOC(doc->virtadr, DOCStatus); -+ } -+ -+} -+ -+#define CDSN_CTRL_FR_B_MASK (CDSN_CTRL_FR_B0 | CDSN_CTRL_FR_B1) -+ -+/* DOC_WaitReady: Wait for RDY line to be asserted by the flash chip */ -+static int _DoC_WaitReady(struct doc_priv *doc) -+{ -+ void __iomem *docptr = doc->virtadr; -+ unsigned long timeo = jiffies + (HZ * 10); -+ -+ if(debug) printk("_DoC_WaitReady...\n"); -+ /* Out-of-line routine to wait for chip response */ -+ if (DoC_is_MillenniumPlus(doc)) { -+ while ((ReadDOC(docptr, Mplus_FlashControl) & CDSN_CTRL_FR_B_MASK) != CDSN_CTRL_FR_B_MASK) { -+ if (time_after(jiffies, timeo)) { -+ printk("_DoC_WaitReady timed out.\n"); -+ return -EIO; -+ } -+ udelay(1); -+ cond_resched(); -+ } -+ } else { -+ while (!(ReadDOC(docptr, CDSNControl) & CDSN_CTRL_FR_B)) { -+ if (time_after(jiffies, timeo)) { -+ printk("_DoC_WaitReady timed out.\n"); -+ return -EIO; -+ } -+ udelay(1); -+ cond_resched(); -+ } -+ } -+ -+ return 0; -+} -+ -+static inline int DoC_WaitReady(struct doc_priv *doc) -+{ -+ void __iomem *docptr = doc->virtadr; -+ int ret = 0; -+ -+ if (DoC_is_MillenniumPlus(doc)) { -+ DoC_Delay(doc, 4); -+ -+ if ((ReadDOC(docptr, Mplus_FlashControl) & CDSN_CTRL_FR_B_MASK) != CDSN_CTRL_FR_B_MASK) -+ /* Call the out-of-line routine to wait */ -+ ret = _DoC_WaitReady(doc); -+ } else { -+ DoC_Delay(doc, 4); -+ -+ if (!(ReadDOC(docptr, CDSNControl) & CDSN_CTRL_FR_B)) -+ /* Call the out-of-line routine to wait */ -+ ret = _DoC_WaitReady(doc); -+ DoC_Delay(doc, 2); -+ } -+ -+ if(debug) printk("DoC_WaitReady OK\n"); -+ return ret; -+} -+ -+static void doc2000_write_byte(struct mtd_info *mtd, u_char datum) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ -+ if(debug)printk("write_byte %02x\n", datum); -+ WriteDOC(datum, docptr, CDSNSlowIO); -+ WriteDOC(datum, docptr, 2k_CDSN_IO); -+} -+ -+static u_char doc2000_read_byte(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ u_char ret; -+ -+ ReadDOC(docptr, CDSNSlowIO); -+ DoC_Delay(doc, 2); -+ ret = ReadDOC(docptr, 2k_CDSN_IO); -+ if (debug) printk("read_byte returns %02x\n", ret); -+ return ret; -+} -+ -+static void doc2000_writebuf(struct mtd_info *mtd, -+ const u_char *buf, int len) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int i; -+ if (debug)printk("writebuf of %d bytes: ", len); -+ for (i=0; i < len; i++) { -+ WriteDOC_(buf[i], docptr, DoC_2k_CDSN_IO + i); -+ if (debug && i < 16) -+ printk("%02x ", buf[i]); -+ } -+ if (debug) printk("\n"); -+} -+ -+static void doc2000_readbuf(struct mtd_info *mtd, -+ u_char *buf, int len) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int i; -+ -+ if (debug)printk("readbuf of %d bytes: ", len); -+ -+ for (i=0; i < len; i++) { -+ buf[i] = ReadDOC(docptr, 2k_CDSN_IO + i); -+ } -+} -+ -+static void doc2000_readbuf_dword(struct mtd_info *mtd, -+ u_char *buf, int len) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int i; -+ -+ if (debug) printk("readbuf_dword of %d bytes: ", len); -+ -+ if (unlikely((((unsigned long)buf)|len) & 3)) { -+ for (i=0; i < len; i++) { -+ *(uint8_t *)(&buf[i]) = ReadDOC(docptr, 2k_CDSN_IO + i); -+ } -+ } else { -+ for (i=0; i < len; i+=4) { -+ *(uint32_t*)(&buf[i]) = readl(docptr + DoC_2k_CDSN_IO + i); -+ } -+ } -+} -+ -+static int doc2000_verifybuf(struct mtd_info *mtd, -+ const u_char *buf, int len) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int i; -+ -+ for (i=0; i < len; i++) -+ if (buf[i] != ReadDOC(docptr, 2k_CDSN_IO)) -+ return -EFAULT; -+ return 0; -+} -+ -+static uint16_t __init doc200x_ident_chip(struct mtd_info *mtd, int nr) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ uint16_t ret; -+ -+ doc200x_select_chip(mtd, nr); -+ doc200x_hwcontrol(mtd, NAND_CTL_SETCLE); -+ this->write_byte(mtd, NAND_CMD_READID); -+ doc200x_hwcontrol(mtd, NAND_CTL_CLRCLE); -+ doc200x_hwcontrol(mtd, NAND_CTL_SETALE); -+ this->write_byte(mtd, 0); -+ doc200x_hwcontrol(mtd, NAND_CTL_CLRALE); -+ -+ ret = this->read_byte(mtd) << 8; -+ ret |= this->read_byte(mtd); -+ -+ if (doc->ChipID == DOC_ChipID_Doc2k && try_dword && !nr) { -+ /* First chip probe. See if we get same results by 32-bit access */ -+ union { -+ uint32_t dword; -+ uint8_t byte[4]; -+ } ident; -+ void __iomem *docptr = doc->virtadr; -+ -+ doc200x_hwcontrol(mtd, NAND_CTL_SETCLE); -+ doc2000_write_byte(mtd, NAND_CMD_READID); -+ doc200x_hwcontrol(mtd, NAND_CTL_CLRCLE); -+ doc200x_hwcontrol(mtd, NAND_CTL_SETALE); -+ doc2000_write_byte(mtd, 0); -+ doc200x_hwcontrol(mtd, NAND_CTL_CLRALE); -+ -+ ident.dword = readl(docptr + DoC_2k_CDSN_IO); -+ if (((ident.byte[0] << 8) | ident.byte[1]) == ret) { -+ printk(KERN_INFO "DiskOnChip 2000 responds to DWORD access\n"); -+ this->read_buf = &doc2000_readbuf_dword; -+ } -+ } -+ -+ return ret; -+} -+ -+static void __init doc2000_count_chips(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ uint16_t mfrid; -+ int i; -+ -+ /* Max 4 chips per floor on DiskOnChip 2000 */ -+ doc->chips_per_floor = 4; -+ -+ /* Find out what the first chip is */ -+ mfrid = doc200x_ident_chip(mtd, 0); -+ -+ /* Find how many chips in each floor. */ -+ for (i = 1; i < 4; i++) { -+ if (doc200x_ident_chip(mtd, i) != mfrid) -+ break; -+ } -+ doc->chips_per_floor = i; -+ printk(KERN_DEBUG "Detected %d chips per floor.\n", i); -+} -+ -+static int doc200x_wait(struct mtd_info *mtd, struct nand_chip *this, int state) -+{ -+ struct doc_priv *doc = (void *)this->priv; -+ -+ int status; -+ -+ DoC_WaitReady(doc); -+ this->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); -+ DoC_WaitReady(doc); -+ status = (int)this->read_byte(mtd); -+ -+ return status; -+} -+ -+static void doc2001_write_byte(struct mtd_info *mtd, u_char datum) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ -+ WriteDOC(datum, docptr, CDSNSlowIO); -+ WriteDOC(datum, docptr, Mil_CDSN_IO); -+ WriteDOC(datum, docptr, WritePipeTerm); -+} -+ -+static u_char doc2001_read_byte(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ -+ //ReadDOC(docptr, CDSNSlowIO); -+ /* 11.4.5 -- delay twice to allow extended length cycle */ -+ DoC_Delay(doc, 2); -+ ReadDOC(docptr, ReadPipeInit); -+ //return ReadDOC(docptr, Mil_CDSN_IO); -+ return ReadDOC(docptr, LastDataRead); -+} -+ -+static void doc2001_writebuf(struct mtd_info *mtd, -+ const u_char *buf, int len) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int i; -+ -+ for (i=0; i < len; i++) -+ WriteDOC_(buf[i], docptr, DoC_Mil_CDSN_IO + i); -+ /* Terminate write pipeline */ -+ WriteDOC(0x00, docptr, WritePipeTerm); -+} -+ -+static void doc2001_readbuf(struct mtd_info *mtd, -+ u_char *buf, int len) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int i; -+ -+ /* Start read pipeline */ -+ ReadDOC(docptr, ReadPipeInit); -+ -+ for (i=0; i < len-1; i++) -+ buf[i] = ReadDOC(docptr, Mil_CDSN_IO + (i & 0xff)); -+ -+ /* Terminate read pipeline */ -+ buf[i] = ReadDOC(docptr, LastDataRead); -+} -+ -+static int doc2001_verifybuf(struct mtd_info *mtd, -+ const u_char *buf, int len) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int i; -+ -+ /* Start read pipeline */ -+ ReadDOC(docptr, ReadPipeInit); -+ -+ for (i=0; i < len-1; i++) -+ if (buf[i] != ReadDOC(docptr, Mil_CDSN_IO)) { -+ ReadDOC(docptr, LastDataRead); -+ return i; -+ } -+ if (buf[i] != ReadDOC(docptr, LastDataRead)) -+ return i; -+ return 0; -+} -+ -+static u_char doc2001plus_read_byte(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ u_char ret; -+ -+ ReadDOC(docptr, Mplus_ReadPipeInit); -+ ReadDOC(docptr, Mplus_ReadPipeInit); -+ ret = ReadDOC(docptr, Mplus_LastDataRead); -+ if (debug) printk("read_byte returns %02x\n", ret); -+ return ret; -+} -+ -+static void doc2001plus_writebuf(struct mtd_info *mtd, -+ const u_char *buf, int len) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int i; -+ -+ if (debug)printk("writebuf of %d bytes: ", len); -+ for (i=0; i < len; i++) { -+ WriteDOC_(buf[i], docptr, DoC_Mil_CDSN_IO + i); -+ if (debug && i < 16) -+ printk("%02x ", buf[i]); -+ } -+ if (debug) printk("\n"); -+} -+ -+static void doc2001plus_readbuf(struct mtd_info *mtd, -+ u_char *buf, int len) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int i; -+ -+ if (debug)printk("readbuf of %d bytes: ", len); -+ -+ /* Start read pipeline */ -+ ReadDOC(docptr, Mplus_ReadPipeInit); -+ ReadDOC(docptr, Mplus_ReadPipeInit); -+ -+ for (i=0; i < len-2; i++) { -+ buf[i] = ReadDOC(docptr, Mil_CDSN_IO); -+ if (debug && i < 16) -+ printk("%02x ", buf[i]); -+ } -+ -+ /* Terminate read pipeline */ -+ buf[len-2] = ReadDOC(docptr, Mplus_LastDataRead); -+ if (debug && i < 16) -+ printk("%02x ", buf[len-2]); -+ buf[len-1] = ReadDOC(docptr, Mplus_LastDataRead); -+ if (debug && i < 16) -+ printk("%02x ", buf[len-1]); -+ if (debug) printk("\n"); -+} -+ -+static int doc2001plus_verifybuf(struct mtd_info *mtd, -+ const u_char *buf, int len) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int i; -+ -+ if (debug)printk("verifybuf of %d bytes: ", len); -+ -+ /* Start read pipeline */ -+ ReadDOC(docptr, Mplus_ReadPipeInit); -+ ReadDOC(docptr, Mplus_ReadPipeInit); -+ -+ for (i=0; i < len-2; i++) -+ if (buf[i] != ReadDOC(docptr, Mil_CDSN_IO)) { -+ ReadDOC(docptr, Mplus_LastDataRead); -+ ReadDOC(docptr, Mplus_LastDataRead); -+ return i; -+ } -+ if (buf[len-2] != ReadDOC(docptr, Mplus_LastDataRead)) -+ return len-2; -+ if (buf[len-1] != ReadDOC(docptr, Mplus_LastDataRead)) -+ return len-1; -+ return 0; -+} -+ -+static void doc2001plus_select_chip(struct mtd_info *mtd, int chip) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int floor = 0; -+ -+ if(debug)printk("select chip (%d)\n", chip); -+ -+ if (chip == -1) { -+ /* Disable flash internally */ -+ WriteDOC(0, docptr, Mplus_FlashSelect); -+ return; -+ } -+ -+ floor = chip / doc->chips_per_floor; -+ chip -= (floor * doc->chips_per_floor); -+ -+ /* Assert ChipEnable and deassert WriteProtect */ -+ WriteDOC((DOC_FLASH_CE), docptr, Mplus_FlashSelect); -+ this->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); -+ -+ doc->curchip = chip; -+ doc->curfloor = floor; -+} -+ -+static void doc200x_select_chip(struct mtd_info *mtd, int chip) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int floor = 0; -+ -+ if(debug)printk("select chip (%d)\n", chip); -+ -+ if (chip == -1) -+ return; -+ -+ floor = chip / doc->chips_per_floor; -+ chip -= (floor * doc->chips_per_floor); -+ -+ /* 11.4.4 -- deassert CE before changing chip */ -+ doc200x_hwcontrol(mtd, NAND_CTL_CLRNCE); -+ -+ WriteDOC(floor, docptr, FloorSelect); -+ WriteDOC(chip, docptr, CDSNDeviceSelect); -+ -+ doc200x_hwcontrol(mtd, NAND_CTL_SETNCE); -+ -+ doc->curchip = chip; -+ doc->curfloor = floor; -+} -+ -+static void doc200x_hwcontrol(struct mtd_info *mtd, int cmd) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ -+ switch(cmd) { -+ case NAND_CTL_SETNCE: -+ doc->CDSNControl |= CDSN_CTRL_CE; -+ break; -+ case NAND_CTL_CLRNCE: -+ doc->CDSNControl &= ~CDSN_CTRL_CE; -+ break; -+ case NAND_CTL_SETCLE: -+ doc->CDSNControl |= CDSN_CTRL_CLE; -+ break; -+ case NAND_CTL_CLRCLE: -+ doc->CDSNControl &= ~CDSN_CTRL_CLE; -+ break; -+ case NAND_CTL_SETALE: -+ doc->CDSNControl |= CDSN_CTRL_ALE; -+ break; -+ case NAND_CTL_CLRALE: -+ doc->CDSNControl &= ~CDSN_CTRL_ALE; -+ break; -+ case NAND_CTL_SETWP: -+ doc->CDSNControl |= CDSN_CTRL_WP; -+ break; -+ case NAND_CTL_CLRWP: -+ doc->CDSNControl &= ~CDSN_CTRL_WP; -+ break; -+ } -+ if (debug)printk("hwcontrol(%d): %02x\n", cmd, doc->CDSNControl); -+ WriteDOC(doc->CDSNControl, docptr, CDSNControl); -+ /* 11.4.3 -- 4 NOPs after CSDNControl write */ -+ DoC_Delay(doc, 4); -+} -+ -+static void doc2001plus_command (struct mtd_info *mtd, unsigned command, int column, int page_addr) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ -+ /* -+ * Must terminate write pipeline before sending any commands -+ * to the device. -+ */ -+ if (command == NAND_CMD_PAGEPROG) { -+ WriteDOC(0x00, docptr, Mplus_WritePipeTerm); -+ WriteDOC(0x00, docptr, Mplus_WritePipeTerm); -+ } -+ -+ /* -+ * Write out the command to the device. -+ */ -+ if (command == NAND_CMD_SEQIN) { -+ int readcmd; -+ -+ if (column >= mtd->oobblock) { -+ /* OOB area */ -+ column -= mtd->oobblock; -+ readcmd = NAND_CMD_READOOB; -+ } else if (column < 256) { -+ /* First 256 bytes --> READ0 */ -+ readcmd = NAND_CMD_READ0; -+ } else { -+ column -= 256; -+ readcmd = NAND_CMD_READ1; -+ } -+ WriteDOC(readcmd, docptr, Mplus_FlashCmd); -+ } -+ WriteDOC(command, docptr, Mplus_FlashCmd); -+ WriteDOC(0, docptr, Mplus_WritePipeTerm); -+ WriteDOC(0, docptr, Mplus_WritePipeTerm); -+ -+ if (column != -1 || page_addr != -1) { -+ /* Serially input address */ -+ if (column != -1) { -+ /* Adjust columns for 16 bit buswidth */ -+ if (this->options & NAND_BUSWIDTH_16) -+ column >>= 1; -+ WriteDOC(column, docptr, Mplus_FlashAddress); -+ } -+ if (page_addr != -1) { -+ WriteDOC((unsigned char) (page_addr & 0xff), docptr, Mplus_FlashAddress); -+ WriteDOC((unsigned char) ((page_addr >> 8) & 0xff), docptr, Mplus_FlashAddress); -+ /* One more address cycle for higher density devices */ -+ if (this->chipsize & 0x0c000000) { -+ WriteDOC((unsigned char) ((page_addr >> 16) & 0x0f), docptr, Mplus_FlashAddress); -+ printk("high density\n"); -+ } -+ } -+ WriteDOC(0, docptr, Mplus_WritePipeTerm); -+ WriteDOC(0, docptr, Mplus_WritePipeTerm); -+ /* deassert ALE */ -+ if (command == NAND_CMD_READ0 || command == NAND_CMD_READ1 || command == NAND_CMD_READOOB || command == NAND_CMD_READID) -+ WriteDOC(0, docptr, Mplus_FlashControl); -+ } -+ -+ /* -+ * program and erase have their own busy handlers -+ * status and sequential in needs no delay -+ */ -+ switch (command) { -+ -+ case NAND_CMD_PAGEPROG: -+ case NAND_CMD_ERASE1: -+ case NAND_CMD_ERASE2: -+ case NAND_CMD_SEQIN: -+ case NAND_CMD_STATUS: -+ return; -+ -+ case NAND_CMD_RESET: -+ if (this->dev_ready) -+ break; -+ udelay(this->chip_delay); -+ WriteDOC(NAND_CMD_STATUS, docptr, Mplus_FlashCmd); -+ WriteDOC(0, docptr, Mplus_WritePipeTerm); -+ WriteDOC(0, docptr, Mplus_WritePipeTerm); -+ while ( !(this->read_byte(mtd) & 0x40)); -+ return; -+ -+ /* This applies to read commands */ -+ default: -+ /* -+ * If we don't have access to the busy pin, we apply the given -+ * command delay -+ */ -+ if (!this->dev_ready) { -+ udelay (this->chip_delay); -+ return; -+ } -+ } -+ -+ /* Apply this short delay always to ensure that we do wait tWB in -+ * any case on any machine. */ -+ ndelay (100); -+ /* wait until command is processed */ -+ while (!this->dev_ready(mtd)); -+} -+ -+static int doc200x_dev_ready(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ -+ if (DoC_is_MillenniumPlus(doc)) { -+ /* 11.4.2 -- must NOP four times before checking FR/B# */ -+ DoC_Delay(doc, 4); -+ if ((ReadDOC(docptr, Mplus_FlashControl) & CDSN_CTRL_FR_B_MASK) != CDSN_CTRL_FR_B_MASK) { -+ if(debug) -+ printk("not ready\n"); -+ return 0; -+ } -+ if (debug)printk("was ready\n"); -+ return 1; -+ } else { -+ /* 11.4.2 -- must NOP four times before checking FR/B# */ -+ DoC_Delay(doc, 4); -+ if (!(ReadDOC(docptr, CDSNControl) & CDSN_CTRL_FR_B)) { -+ if(debug) -+ printk("not ready\n"); -+ return 0; -+ } -+ /* 11.4.2 -- Must NOP twice if it's ready */ -+ DoC_Delay(doc, 2); -+ if (debug)printk("was ready\n"); -+ return 1; -+ } -+} -+ -+static int doc200x_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip) -+{ -+ /* This is our last resort if we couldn't find or create a BBT. Just -+ pretend all blocks are good. */ -+ return 0; -+} -+ -+static void doc200x_enable_hwecc(struct mtd_info *mtd, int mode) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ -+ /* Prime the ECC engine */ -+ switch(mode) { -+ case NAND_ECC_READ: -+ WriteDOC(DOC_ECC_RESET, docptr, ECCConf); -+ WriteDOC(DOC_ECC_EN, docptr, ECCConf); -+ break; -+ case NAND_ECC_WRITE: -+ WriteDOC(DOC_ECC_RESET, docptr, ECCConf); -+ WriteDOC(DOC_ECC_EN | DOC_ECC_RW, docptr, ECCConf); -+ break; -+ } -+} -+ -+static void doc2001plus_enable_hwecc(struct mtd_info *mtd, int mode) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ -+ /* Prime the ECC engine */ -+ switch(mode) { -+ case NAND_ECC_READ: -+ WriteDOC(DOC_ECC_RESET, docptr, Mplus_ECCConf); -+ WriteDOC(DOC_ECC_EN, docptr, Mplus_ECCConf); -+ break; -+ case NAND_ECC_WRITE: -+ WriteDOC(DOC_ECC_RESET, docptr, Mplus_ECCConf); -+ WriteDOC(DOC_ECC_EN | DOC_ECC_RW, docptr, Mplus_ECCConf); -+ break; -+ } -+} -+ -+/* This code is only called on write */ -+static int doc200x_calculate_ecc(struct mtd_info *mtd, const u_char *dat, -+ unsigned char *ecc_code) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ int i; -+ int emptymatch = 1; -+ -+ /* flush the pipeline */ -+ if (DoC_is_2000(doc)) { -+ WriteDOC(doc->CDSNControl & ~CDSN_CTRL_FLASH_IO, docptr, CDSNControl); -+ WriteDOC(0, docptr, 2k_CDSN_IO); -+ WriteDOC(0, docptr, 2k_CDSN_IO); -+ WriteDOC(0, docptr, 2k_CDSN_IO); -+ WriteDOC(doc->CDSNControl, docptr, CDSNControl); -+ } else if (DoC_is_MillenniumPlus(doc)) { -+ WriteDOC(0, docptr, Mplus_NOP); -+ WriteDOC(0, docptr, Mplus_NOP); -+ WriteDOC(0, docptr, Mplus_NOP); -+ } else { -+ WriteDOC(0, docptr, NOP); -+ WriteDOC(0, docptr, NOP); -+ WriteDOC(0, docptr, NOP); -+ } -+ -+ for (i = 0; i < 6; i++) { -+ if (DoC_is_MillenniumPlus(doc)) -+ ecc_code[i] = ReadDOC_(docptr, DoC_Mplus_ECCSyndrome0 + i); -+ else -+ ecc_code[i] = ReadDOC_(docptr, DoC_ECCSyndrome0 + i); -+ if (ecc_code[i] != empty_write_ecc[i]) -+ emptymatch = 0; -+ } -+ if (DoC_is_MillenniumPlus(doc)) -+ WriteDOC(DOC_ECC_DIS, docptr, Mplus_ECCConf); -+ else -+ WriteDOC(DOC_ECC_DIS, docptr, ECCConf); -+#if 0 -+ /* If emptymatch=1, we might have an all-0xff data buffer. Check. */ -+ if (emptymatch) { -+ /* Note: this somewhat expensive test should not be triggered -+ often. It could be optimized away by examining the data in -+ the writebuf routine, and remembering the result. */ -+ for (i = 0; i < 512; i++) { -+ if (dat[i] == 0xff) continue; -+ emptymatch = 0; -+ break; -+ } -+ } -+ /* If emptymatch still =1, we do have an all-0xff data buffer. -+ Return all-0xff ecc value instead of the computed one, so -+ it'll look just like a freshly-erased page. */ -+ if (emptymatch) memset(ecc_code, 0xff, 6); -+#endif -+ return 0; -+} -+ -+static int doc200x_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc, u_char *calc_ecc) -+{ -+ int i, ret = 0; -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ void __iomem *docptr = doc->virtadr; -+ volatile u_char dummy; -+ int emptymatch = 1; -+ -+ /* flush the pipeline */ -+ if (DoC_is_2000(doc)) { -+ dummy = ReadDOC(docptr, 2k_ECCStatus); -+ dummy = ReadDOC(docptr, 2k_ECCStatus); -+ dummy = ReadDOC(docptr, 2k_ECCStatus); -+ } else if (DoC_is_MillenniumPlus(doc)) { -+ dummy = ReadDOC(docptr, Mplus_ECCConf); -+ dummy = ReadDOC(docptr, Mplus_ECCConf); -+ dummy = ReadDOC(docptr, Mplus_ECCConf); -+ } else { -+ dummy = ReadDOC(docptr, ECCConf); -+ dummy = ReadDOC(docptr, ECCConf); -+ dummy = ReadDOC(docptr, ECCConf); -+ } -+ -+ /* Error occured ? */ -+ if (dummy & 0x80) { -+ for (i = 0; i < 6; i++) { -+ if (DoC_is_MillenniumPlus(doc)) -+ calc_ecc[i] = ReadDOC_(docptr, DoC_Mplus_ECCSyndrome0 + i); -+ else -+ calc_ecc[i] = ReadDOC_(docptr, DoC_ECCSyndrome0 + i); -+ if (calc_ecc[i] != empty_read_syndrome[i]) -+ emptymatch = 0; -+ } -+ /* If emptymatch=1, the read syndrome is consistent with an -+ all-0xff data and stored ecc block. Check the stored ecc. */ -+ if (emptymatch) { -+ for (i = 0; i < 6; i++) { -+ if (read_ecc[i] == 0xff) continue; -+ emptymatch = 0; -+ break; -+ } -+ } -+ /* If emptymatch still =1, check the data block. */ -+ if (emptymatch) { -+ /* Note: this somewhat expensive test should not be triggered -+ often. It could be optimized away by examining the data in -+ the readbuf routine, and remembering the result. */ -+ for (i = 0; i < 512; i++) { -+ if (dat[i] == 0xff) continue; -+ emptymatch = 0; -+ break; -+ } -+ } -+ /* If emptymatch still =1, this is almost certainly a freshly- -+ erased block, in which case the ECC will not come out right. -+ We'll suppress the error and tell the caller everything's -+ OK. Because it is. */ -+ if (!emptymatch) ret = doc_decode_ecc (dat, calc_ecc); -+ if (ret > 0) -+ printk(KERN_ERR "doc200x_correct_data corrected %d errors\n", ret); -+ } -+ if (DoC_is_MillenniumPlus(doc)) -+ WriteDOC(DOC_ECC_DIS, docptr, Mplus_ECCConf); -+ else -+ WriteDOC(DOC_ECC_DIS, docptr, ECCConf); -+ if (no_ecc_failures && (ret == -1)) { -+ printk(KERN_ERR "suppressing ECC failure\n"); -+ ret = 0; -+ } -+ return ret; -+} -+ -+//u_char mydatabuf[528]; -+ -+static struct nand_oobinfo doc200x_oobinfo = { -+ .useecc = MTD_NANDECC_AUTOPLACE, -+ .eccbytes = 6, -+ .eccpos = {0, 1, 2, 3, 4, 5}, -+ .oobfree = { {8, 8} } -+}; -+ -+/* Find the (I)NFTL Media Header, and optionally also the mirror media header. -+ On sucessful return, buf will contain a copy of the media header for -+ further processing. id is the string to scan for, and will presumably be -+ either "ANAND" or "BNAND". If findmirror=1, also look for the mirror media -+ header. The page #s of the found media headers are placed in mh0_page and -+ mh1_page in the DOC private structure. */ -+static int __init find_media_headers(struct mtd_info *mtd, u_char *buf, -+ const char *id, int findmirror) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ unsigned offs, end = (MAX_MEDIAHEADER_SCAN << this->phys_erase_shift); -+ int ret; -+ size_t retlen; -+ -+ end = min(end, mtd->size); // paranoia -+ for (offs = 0; offs < end; offs += mtd->erasesize) { -+ ret = mtd->read(mtd, offs, mtd->oobblock, &retlen, buf); -+ if (retlen != mtd->oobblock) continue; -+ if (ret) { -+ printk(KERN_WARNING "ECC error scanning DOC at 0x%x\n", -+ offs); -+ } -+ if (memcmp(buf, id, 6)) continue; -+ printk(KERN_INFO "Found DiskOnChip %s Media Header at 0x%x\n", id, offs); -+ if (doc->mh0_page == -1) { -+ doc->mh0_page = offs >> this->page_shift; -+ if (!findmirror) return 1; -+ continue; -+ } -+ doc->mh1_page = offs >> this->page_shift; -+ return 2; -+ } -+ if (doc->mh0_page == -1) { -+ printk(KERN_WARNING "DiskOnChip %s Media Header not found.\n", id); -+ return 0; -+ } -+ /* Only one mediaheader was found. We want buf to contain a -+ mediaheader on return, so we'll have to re-read the one we found. */ -+ offs = doc->mh0_page << this->page_shift; -+ ret = mtd->read(mtd, offs, mtd->oobblock, &retlen, buf); -+ if (retlen != mtd->oobblock) { -+ /* Insanity. Give up. */ -+ printk(KERN_ERR "Read DiskOnChip Media Header once, but can't reread it???\n"); -+ return 0; -+ } -+ return 1; -+} -+ -+static inline int __init nftl_partscan(struct mtd_info *mtd, -+ struct mtd_partition *parts) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ int ret = 0; -+ u_char *buf; -+ struct NFTLMediaHeader *mh; -+ const unsigned psize = 1 << this->page_shift; -+ unsigned blocks, maxblocks; -+ int offs, numheaders; -+ -+ buf = (u_char *) kmalloc(mtd->oobblock, GFP_KERNEL); -+ if (!buf) { -+ printk(KERN_ERR "DiskOnChip mediaheader kmalloc failed!\n"); -+ return 0; -+ } -+ if (!(numheaders=find_media_headers(mtd, buf, "ANAND", 1))) goto out; -+ mh = (struct NFTLMediaHeader *) buf; -+ -+//#ifdef CONFIG_MTD_DEBUG_VERBOSE -+// if (CONFIG_MTD_DEBUG_VERBOSE >= 2) -+ printk(KERN_INFO " DataOrgID = %s\n" -+ " NumEraseUnits = %d\n" -+ " FirstPhysicalEUN = %d\n" -+ " FormattedSize = %d\n" -+ " UnitSizeFactor = %d\n", -+ mh->DataOrgID, mh->NumEraseUnits, -+ mh->FirstPhysicalEUN, mh->FormattedSize, -+ mh->UnitSizeFactor); -+//#endif -+ -+ blocks = mtd->size >> this->phys_erase_shift; -+ maxblocks = min(32768U, mtd->erasesize - psize); -+ -+ if (mh->UnitSizeFactor == 0x00) { -+ /* Auto-determine UnitSizeFactor. The constraints are: -+ - There can be at most 32768 virtual blocks. -+ - There can be at most (virtual block size - page size) -+ virtual blocks (because MediaHeader+BBT must fit in 1). -+ */ -+ mh->UnitSizeFactor = 0xff; -+ while (blocks > maxblocks) { -+ blocks >>= 1; -+ maxblocks = min(32768U, (maxblocks << 1) + psize); -+ mh->UnitSizeFactor--; -+ } -+ printk(KERN_WARNING "UnitSizeFactor=0x00 detected. Correct value is assumed to be 0x%02x.\n", mh->UnitSizeFactor); -+ } -+ -+ /* NOTE: The lines below modify internal variables of the NAND and MTD -+ layers; variables with have already been configured by nand_scan. -+ Unfortunately, we didn't know before this point what these values -+ should be. Thus, this code is somewhat dependant on the exact -+ implementation of the NAND layer. */ -+ if (mh->UnitSizeFactor != 0xff) { -+ this->bbt_erase_shift += (0xff - mh->UnitSizeFactor); -+ mtd->erasesize <<= (0xff - mh->UnitSizeFactor); -+ printk(KERN_INFO "Setting virtual erase size to %d\n", mtd->erasesize); -+ blocks = mtd->size >> this->bbt_erase_shift; -+ maxblocks = min(32768U, mtd->erasesize - psize); -+ } -+ -+ if (blocks > maxblocks) { -+ printk(KERN_ERR "UnitSizeFactor of 0x%02x is inconsistent with device size. Aborting.\n", mh->UnitSizeFactor); -+ goto out; -+ } -+ -+ /* Skip past the media headers. */ -+ offs = max(doc->mh0_page, doc->mh1_page); -+ offs <<= this->page_shift; -+ offs += mtd->erasesize; -+ -+ //parts[0].name = " DiskOnChip Boot / Media Header partition"; -+ //parts[0].offset = 0; -+ //parts[0].size = offs; -+ -+ parts[0].name = " DiskOnChip BDTL partition"; -+ parts[0].offset = offs; -+ parts[0].size = (mh->NumEraseUnits - numheaders) << this->bbt_erase_shift; -+ -+ offs += parts[0].size; -+ if (offs < mtd->size) { -+ parts[1].name = " DiskOnChip Remainder partition"; -+ parts[1].offset = offs; -+ parts[1].size = mtd->size - offs; -+ ret = 2; -+ goto out; -+ } -+ ret = 1; -+out: -+ kfree(buf); -+ return ret; -+} -+ -+/* This is a stripped-down copy of the code in inftlmount.c */ -+static inline int __init inftl_partscan(struct mtd_info *mtd, -+ struct mtd_partition *parts) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ int ret = 0; -+ u_char *buf; -+ struct INFTLMediaHeader *mh; -+ struct INFTLPartition *ip; -+ int numparts = 0; -+ int blocks; -+ int vshift, lastvunit = 0; -+ int i; -+ int end = mtd->size; -+ -+ if (inftl_bbt_write) -+ end -= (INFTL_BBT_RESERVED_BLOCKS << this->phys_erase_shift); -+ -+ buf = (u_char *) kmalloc(mtd->oobblock, GFP_KERNEL); -+ if (!buf) { -+ printk(KERN_ERR "DiskOnChip mediaheader kmalloc failed!\n"); -+ return 0; -+ } -+ -+ if (!find_media_headers(mtd, buf, "BNAND", 0)) goto out; -+ doc->mh1_page = doc->mh0_page + (4096 >> this->page_shift); -+ mh = (struct INFTLMediaHeader *) buf; -+ -+ mh->NoOfBootImageBlocks = le32_to_cpu(mh->NoOfBootImageBlocks); -+ mh->NoOfBinaryPartitions = le32_to_cpu(mh->NoOfBinaryPartitions); -+ mh->NoOfBDTLPartitions = le32_to_cpu(mh->NoOfBDTLPartitions); -+ mh->BlockMultiplierBits = le32_to_cpu(mh->BlockMultiplierBits); -+ mh->FormatFlags = le32_to_cpu(mh->FormatFlags); -+ mh->PercentUsed = le32_to_cpu(mh->PercentUsed); -+ -+//#ifdef CONFIG_MTD_DEBUG_VERBOSE -+// if (CONFIG_MTD_DEBUG_VERBOSE >= 2) -+ printk(KERN_INFO " bootRecordID = %s\n" -+ " NoOfBootImageBlocks = %d\n" -+ " NoOfBinaryPartitions = %d\n" -+ " NoOfBDTLPartitions = %d\n" -+ " BlockMultiplerBits = %d\n" -+ " FormatFlgs = %d\n" -+ " OsakVersion = %d.%d.%d.%d\n" -+ " PercentUsed = %d\n", -+ mh->bootRecordID, mh->NoOfBootImageBlocks, -+ mh->NoOfBinaryPartitions, -+ mh->NoOfBDTLPartitions, -+ mh->BlockMultiplierBits, mh->FormatFlags, -+ ((unsigned char *) &mh->OsakVersion)[0] & 0xf, -+ ((unsigned char *) &mh->OsakVersion)[1] & 0xf, -+ ((unsigned char *) &mh->OsakVersion)[2] & 0xf, -+ ((unsigned char *) &mh->OsakVersion)[3] & 0xf, -+ mh->PercentUsed); -+//#endif -+ -+ vshift = this->phys_erase_shift + mh->BlockMultiplierBits; -+ -+ blocks = mtd->size >> vshift; -+ if (blocks > 32768) { -+ printk(KERN_ERR "BlockMultiplierBits=%d is inconsistent with device size. Aborting.\n", mh->BlockMultiplierBits); -+ goto out; -+ } -+ -+ blocks = doc->chips_per_floor << (this->chip_shift - this->phys_erase_shift); -+ if (inftl_bbt_write && (blocks > mtd->erasesize)) { -+ printk(KERN_ERR "Writeable BBTs spanning more than one erase block are not yet supported. FIX ME!\n"); -+ goto out; -+ } -+ -+ /* Scan the partitions */ -+ for (i = 0; (i < 4); i++) { -+ ip = &(mh->Partitions[i]); -+ ip->virtualUnits = le32_to_cpu(ip->virtualUnits); -+ ip->firstUnit = le32_to_cpu(ip->firstUnit); -+ ip->lastUnit = le32_to_cpu(ip->lastUnit); -+ ip->flags = le32_to_cpu(ip->flags); -+ ip->spareUnits = le32_to_cpu(ip->spareUnits); -+ ip->Reserved0 = le32_to_cpu(ip->Reserved0); -+ -+//#ifdef CONFIG_MTD_DEBUG_VERBOSE -+// if (CONFIG_MTD_DEBUG_VERBOSE >= 2) -+ printk(KERN_INFO " PARTITION[%d] ->\n" -+ " virtualUnits = %d\n" -+ " firstUnit = %d\n" -+ " lastUnit = %d\n" -+ " flags = 0x%x\n" -+ " spareUnits = %d\n", -+ i, ip->virtualUnits, ip->firstUnit, -+ ip->lastUnit, ip->flags, -+ ip->spareUnits); -+//#endif -+ -+/* -+ if ((i == 0) && (ip->firstUnit > 0)) { -+ parts[0].name = " DiskOnChip IPL / Media Header partition"; -+ parts[0].offset = 0; -+ parts[0].size = mtd->erasesize * ip->firstUnit; -+ numparts = 1; -+ } -+*/ -+ -+ if (ip->flags & INFTL_BINARY) -+ parts[numparts].name = " DiskOnChip BDK partition"; -+ else -+ parts[numparts].name = " DiskOnChip BDTL partition"; -+ parts[numparts].offset = ip->firstUnit << vshift; -+ parts[numparts].size = (1 + ip->lastUnit - ip->firstUnit) << vshift; -+ numparts++; -+ if (ip->lastUnit > lastvunit) lastvunit = ip->lastUnit; -+ if (ip->flags & INFTL_LAST) break; -+ } -+ lastvunit++; -+ if ((lastvunit << vshift) < end) { -+ parts[numparts].name = " DiskOnChip Remainder partition"; -+ parts[numparts].offset = lastvunit << vshift; -+ parts[numparts].size = end - parts[numparts].offset; -+ numparts++; -+ } -+ ret = numparts; -+out: -+ kfree(buf); -+ return ret; -+} -+ -+static int __init nftl_scan_bbt(struct mtd_info *mtd) -+{ -+ int ret, numparts; -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ struct mtd_partition parts[2]; -+ -+ memset((char *) parts, 0, sizeof(parts)); -+ /* On NFTL, we have to find the media headers before we can read the -+ BBTs, since they're stored in the media header eraseblocks. */ -+ numparts = nftl_partscan(mtd, parts); -+ if (!numparts) return -EIO; -+ this->bbt_td->options = NAND_BBT_ABSPAGE | NAND_BBT_8BIT | -+ NAND_BBT_SAVECONTENT | NAND_BBT_WRITE | -+ NAND_BBT_VERSION; -+ this->bbt_td->veroffs = 7; -+ this->bbt_td->pages[0] = doc->mh0_page + 1; -+ if (doc->mh1_page != -1) { -+ this->bbt_md->options = NAND_BBT_ABSPAGE | NAND_BBT_8BIT | -+ NAND_BBT_SAVECONTENT | NAND_BBT_WRITE | -+ NAND_BBT_VERSION; -+ this->bbt_md->veroffs = 7; -+ this->bbt_md->pages[0] = doc->mh1_page + 1; -+ } else { -+ this->bbt_md = NULL; -+ } -+ -+ /* It's safe to set bd=NULL below because NAND_BBT_CREATE is not set. -+ At least as nand_bbt.c is currently written. */ -+ if ((ret = nand_scan_bbt(mtd, NULL))) -+ return ret; -+ add_mtd_device(mtd); -+#ifdef CONFIG_MTD_PARTITIONS -+ if (!no_autopart) -+ add_mtd_partitions(mtd, parts, numparts); -+#endif -+ return 0; -+} -+ -+static int __init inftl_scan_bbt(struct mtd_info *mtd) -+{ -+ int ret, numparts; -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ struct mtd_partition parts[5]; -+ -+ if (this->numchips > doc->chips_per_floor) { -+ printk(KERN_ERR "Multi-floor INFTL devices not yet supported.\n"); -+ return -EIO; -+ } -+ -+ if (DoC_is_MillenniumPlus(doc)) { -+ this->bbt_td->options = NAND_BBT_2BIT | NAND_BBT_ABSPAGE; -+ if (inftl_bbt_write) -+ this->bbt_td->options |= NAND_BBT_WRITE; -+ this->bbt_td->pages[0] = 2; -+ this->bbt_md = NULL; -+ } else { -+ this->bbt_td->options = NAND_BBT_LASTBLOCK | NAND_BBT_8BIT | -+ NAND_BBT_VERSION; -+ if (inftl_bbt_write) -+ this->bbt_td->options |= NAND_BBT_WRITE; -+ this->bbt_td->offs = 8; -+ this->bbt_td->len = 8; -+ this->bbt_td->veroffs = 7; -+ this->bbt_td->maxblocks = INFTL_BBT_RESERVED_BLOCKS; -+ this->bbt_td->reserved_block_code = 0x01; -+ this->bbt_td->pattern = "MSYS_BBT"; -+ -+ this->bbt_md->options = NAND_BBT_LASTBLOCK | NAND_BBT_8BIT | -+ NAND_BBT_VERSION; -+ if (inftl_bbt_write) -+ this->bbt_md->options |= NAND_BBT_WRITE; -+ this->bbt_md->offs = 8; -+ this->bbt_md->len = 8; -+ this->bbt_md->veroffs = 7; -+ this->bbt_md->maxblocks = INFTL_BBT_RESERVED_BLOCKS; -+ this->bbt_md->reserved_block_code = 0x01; -+ this->bbt_md->pattern = "TBB_SYSM"; -+ } -+ -+ /* It's safe to set bd=NULL below because NAND_BBT_CREATE is not set. -+ At least as nand_bbt.c is currently written. */ -+ if ((ret = nand_scan_bbt(mtd, NULL))) -+ return ret; -+ memset((char *) parts, 0, sizeof(parts)); -+ numparts = inftl_partscan(mtd, parts); -+ /* At least for now, require the INFTL Media Header. We could probably -+ do without it for non-INFTL use, since all it gives us is -+ autopartitioning, but I want to give it more thought. */ -+ if (!numparts) return -EIO; -+ add_mtd_device(mtd); -+#ifdef CONFIG_MTD_PARTITIONS -+ if (!no_autopart) -+ add_mtd_partitions(mtd, parts, numparts); -+#endif -+ return 0; -+} -+ -+static inline int __init doc2000_init(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ -+ this->write_byte = doc2000_write_byte; -+ this->read_byte = doc2000_read_byte; -+ this->write_buf = doc2000_writebuf; -+ this->read_buf = doc2000_readbuf; -+ this->verify_buf = doc2000_verifybuf; -+ this->scan_bbt = nftl_scan_bbt; -+ -+ doc->CDSNControl = CDSN_CTRL_FLASH_IO | CDSN_CTRL_ECC_IO; -+ doc2000_count_chips(mtd); -+ mtd->name = "DiskOnChip 2000 (NFTL Model)"; -+ return (4 * doc->chips_per_floor); -+} -+ -+static inline int __init doc2001_init(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ -+ this->write_byte = doc2001_write_byte; -+ this->read_byte = doc2001_read_byte; -+ this->write_buf = doc2001_writebuf; -+ this->read_buf = doc2001_readbuf; -+ this->verify_buf = doc2001_verifybuf; -+ -+ ReadDOC(doc->virtadr, ChipID); -+ ReadDOC(doc->virtadr, ChipID); -+ ReadDOC(doc->virtadr, ChipID); -+ if (ReadDOC(doc->virtadr, ChipID) != DOC_ChipID_DocMil) { -+ /* It's not a Millennium; it's one of the newer -+ DiskOnChip 2000 units with a similar ASIC. -+ Treat it like a Millennium, except that it -+ can have multiple chips. */ -+ doc2000_count_chips(mtd); -+ mtd->name = "DiskOnChip 2000 (INFTL Model)"; -+ this->scan_bbt = inftl_scan_bbt; -+ return (4 * doc->chips_per_floor); -+ } else { -+ /* Bog-standard Millennium */ -+ doc->chips_per_floor = 1; -+ mtd->name = "DiskOnChip Millennium"; -+ this->scan_bbt = nftl_scan_bbt; -+ return 1; -+ } -+} -+ -+static inline int __init doc2001plus_init(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct doc_priv *doc = (void *)this->priv; -+ -+ this->write_byte = NULL; -+ this->read_byte = doc2001plus_read_byte; -+ this->write_buf = doc2001plus_writebuf; -+ this->read_buf = doc2001plus_readbuf; -+ this->verify_buf = doc2001plus_verifybuf; -+ this->scan_bbt = inftl_scan_bbt; -+ this->hwcontrol = NULL; -+ this->select_chip = doc2001plus_select_chip; -+ this->cmdfunc = doc2001plus_command; -+ this->enable_hwecc = doc2001plus_enable_hwecc; -+ -+ doc->chips_per_floor = 1; -+ mtd->name = "DiskOnChip Millennium Plus"; -+ -+ return 1; -+} -+ -+static inline int __init doc_probe(unsigned long physadr) -+{ -+ unsigned char ChipID; -+ struct mtd_info *mtd; -+ struct nand_chip *nand; -+ struct doc_priv *doc; -+ void __iomem *virtadr; -+ unsigned char save_control; -+ unsigned char tmp, tmpb, tmpc; -+ int reg, len, numchips; -+ int ret = 0; -+ -+ virtadr = (void __iomem *)ioremap(physadr, DOC_IOREMAP_LEN); -+ if (!virtadr) { -+ printk(KERN_ERR "Diskonchip ioremap failed: 0x%x bytes at 0x%lx\n", DOC_IOREMAP_LEN, physadr); -+ return -EIO; -+ } -+ -+ /* It's not possible to cleanly detect the DiskOnChip - the -+ * bootup procedure will put the device into reset mode, and -+ * it's not possible to talk to it without actually writing -+ * to the DOCControl register. So we store the current contents -+ * of the DOCControl register's location, in case we later decide -+ * that it's not a DiskOnChip, and want to put it back how we -+ * found it. -+ */ -+ save_control = ReadDOC(virtadr, DOCControl); -+ -+ /* Reset the DiskOnChip ASIC */ -+ WriteDOC(DOC_MODE_CLR_ERR | DOC_MODE_MDWREN | DOC_MODE_RESET, -+ virtadr, DOCControl); -+ WriteDOC(DOC_MODE_CLR_ERR | DOC_MODE_MDWREN | DOC_MODE_RESET, -+ virtadr, DOCControl); -+ -+ /* Enable the DiskOnChip ASIC */ -+ WriteDOC(DOC_MODE_CLR_ERR | DOC_MODE_MDWREN | DOC_MODE_NORMAL, -+ virtadr, DOCControl); -+ WriteDOC(DOC_MODE_CLR_ERR | DOC_MODE_MDWREN | DOC_MODE_NORMAL, -+ virtadr, DOCControl); -+ -+ ChipID = ReadDOC(virtadr, ChipID); -+ -+ switch(ChipID) { -+ case DOC_ChipID_Doc2k: -+ reg = DoC_2k_ECCStatus; -+ break; -+ case DOC_ChipID_DocMil: -+ reg = DoC_ECCConf; -+ break; -+ case DOC_ChipID_DocMilPlus16: -+ case DOC_ChipID_DocMilPlus32: -+ case 0: -+ /* Possible Millennium Plus, need to do more checks */ -+ /* Possibly release from power down mode */ -+ for (tmp = 0; (tmp < 4); tmp++) -+ ReadDOC(virtadr, Mplus_Power); -+ -+ /* Reset the Millennium Plus ASIC */ -+ tmp = DOC_MODE_RESET | DOC_MODE_MDWREN | DOC_MODE_RST_LAT | -+ DOC_MODE_BDECT; -+ WriteDOC(tmp, virtadr, Mplus_DOCControl); -+ WriteDOC(~tmp, virtadr, Mplus_CtrlConfirm); -+ -+ mdelay(1); -+ /* Enable the Millennium Plus ASIC */ -+ tmp = DOC_MODE_NORMAL | DOC_MODE_MDWREN | DOC_MODE_RST_LAT | -+ DOC_MODE_BDECT; -+ WriteDOC(tmp, virtadr, Mplus_DOCControl); -+ WriteDOC(~tmp, virtadr, Mplus_CtrlConfirm); -+ mdelay(1); -+ -+ ChipID = ReadDOC(virtadr, ChipID); -+ -+ switch (ChipID) { -+ case DOC_ChipID_DocMilPlus16: -+ reg = DoC_Mplus_Toggle; -+ break; -+ case DOC_ChipID_DocMilPlus32: -+ printk(KERN_ERR "DiskOnChip Millennium Plus 32MB is not supported, ignoring.\n"); -+ default: -+ ret = -ENODEV; -+ goto notfound; -+ } -+ break; -+ -+ default: -+ ret = -ENODEV; -+ goto notfound; -+ } -+ /* Check the TOGGLE bit in the ECC register */ -+ tmp = ReadDOC_(virtadr, reg) & DOC_TOGGLE_BIT; -+ tmpb = ReadDOC_(virtadr, reg) & DOC_TOGGLE_BIT; -+ tmpc = ReadDOC_(virtadr, reg) & DOC_TOGGLE_BIT; -+ if ((tmp == tmpb) || (tmp != tmpc)) { -+ printk(KERN_WARNING "Possible DiskOnChip at 0x%lx failed TOGGLE test, dropping.\n", physadr); -+ ret = -ENODEV; -+ goto notfound; -+ } -+ -+ for (mtd = doclist; mtd; mtd = doc->nextdoc) { -+ unsigned char oldval; -+ unsigned char newval; -+ nand = mtd->priv; -+ doc = (void *)nand->priv; -+ /* Use the alias resolution register to determine if this is -+ in fact the same DOC aliased to a new address. If writes -+ to one chip's alias resolution register change the value on -+ the other chip, they're the same chip. */ -+ if (ChipID == DOC_ChipID_DocMilPlus16) { -+ oldval = ReadDOC(doc->virtadr, Mplus_AliasResolution); -+ newval = ReadDOC(virtadr, Mplus_AliasResolution); -+ } else { -+ oldval = ReadDOC(doc->virtadr, AliasResolution); -+ newval = ReadDOC(virtadr, AliasResolution); -+ } -+ if (oldval != newval) -+ continue; -+ if (ChipID == DOC_ChipID_DocMilPlus16) { -+ WriteDOC(~newval, virtadr, Mplus_AliasResolution); -+ oldval = ReadDOC(doc->virtadr, Mplus_AliasResolution); -+ WriteDOC(newval, virtadr, Mplus_AliasResolution); // restore it -+ } else { -+ WriteDOC(~newval, virtadr, AliasResolution); -+ oldval = ReadDOC(doc->virtadr, AliasResolution); -+ WriteDOC(newval, virtadr, AliasResolution); // restore it -+ } -+ newval = ~newval; -+ if (oldval == newval) { -+ printk(KERN_DEBUG "Found alias of DOC at 0x%lx to 0x%lx\n", doc->physadr, physadr); -+ goto notfound; -+ } -+ } -+ -+ printk(KERN_NOTICE "DiskOnChip found at 0x%lx\n", physadr); -+ -+ len = sizeof(struct mtd_info) + -+ sizeof(struct nand_chip) + -+ sizeof(struct doc_priv) + -+ (2 * sizeof(struct nand_bbt_descr)); -+ mtd = (struct mtd_info *) kmalloc(len, GFP_KERNEL); -+ if (!mtd) { -+ printk(KERN_ERR "DiskOnChip kmalloc (%d bytes) failed!\n", len); -+ ret = -ENOMEM; -+ goto fail; -+ } -+ memset(mtd, 0, len); -+ -+ nand = (struct nand_chip *) (mtd + 1); -+ doc = (struct doc_priv *) (nand + 1); -+ nand->bbt_td = (struct nand_bbt_descr *) (doc + 1); -+ nand->bbt_md = nand->bbt_td + 1; -+ -+ mtd->priv = (void *) nand; -+ mtd->owner = THIS_MODULE; -+ -+ nand->priv = (void *) doc; -+ nand->select_chip = doc200x_select_chip; -+ nand->hwcontrol = doc200x_hwcontrol; -+ nand->dev_ready = doc200x_dev_ready; -+ nand->waitfunc = doc200x_wait; -+ nand->block_bad = doc200x_block_bad; -+ nand->enable_hwecc = doc200x_enable_hwecc; -+ nand->calculate_ecc = doc200x_calculate_ecc; -+ nand->correct_data = doc200x_correct_data; -+ -+ nand->autooob = &doc200x_oobinfo; -+ nand->eccmode = NAND_ECC_HW6_512; -+ nand->options = NAND_USE_FLASH_BBT | NAND_HWECC_SYNDROME; -+ -+ doc->physadr = physadr; -+ doc->virtadr = virtadr; -+ doc->ChipID = ChipID; -+ doc->curfloor = -1; -+ doc->curchip = -1; -+ doc->mh0_page = -1; -+ doc->mh1_page = -1; -+ doc->nextdoc = doclist; -+ -+ if (ChipID == DOC_ChipID_Doc2k) -+ numchips = doc2000_init(mtd); -+ else if (ChipID == DOC_ChipID_DocMilPlus16) -+ numchips = doc2001plus_init(mtd); -+ else -+ numchips = doc2001_init(mtd); -+ -+ if ((ret = nand_scan(mtd, numchips))) { -+ /* DBB note: i believe nand_release is necessary here, as -+ buffers may have been allocated in nand_base. Check with -+ Thomas. FIX ME! */ -+ /* nand_release will call del_mtd_device, but we haven't yet -+ added it. This is handled without incident by -+ del_mtd_device, as far as I can tell. */ -+ nand_release(mtd); -+ kfree(mtd); -+ goto fail; -+ } -+ -+ /* Success! */ -+ doclist = mtd; -+ return 0; -+ -+notfound: -+ /* Put back the contents of the DOCControl register, in case it's not -+ actually a DiskOnChip. */ -+ WriteDOC(save_control, virtadr, DOCControl); -+fail: -+ iounmap((void *)virtadr); -+ return ret; -+} -+ -+int __init init_nanddoc(void) -+{ -+ int i; -+ -+ if (doc_config_location) { -+ printk(KERN_INFO "Using configured DiskOnChip probe address 0x%lx\n", doc_config_location); -+ return doc_probe(doc_config_location); -+ } else { -+ for (i=0; (doc_locations[i] != 0xffffffff); i++) { -+ doc_probe(doc_locations[i]); -+ } -+ } -+ /* No banner message any more. Print a message if no DiskOnChip -+ found, so the user knows we at least tried. */ -+ if (!doclist) { -+ printk(KERN_INFO "No valid DiskOnChip devices found\n"); -+ return -ENODEV; -+ } -+ return 0; -+} -+ -+void __exit cleanup_nanddoc(void) -+{ -+ struct mtd_info *mtd, *nextmtd; -+ struct nand_chip *nand; -+ struct doc_priv *doc; -+ -+ for (mtd = doclist; mtd; mtd = nextmtd) { -+ nand = mtd->priv; -+ doc = (void *)nand->priv; -+ -+ nextmtd = doc->nextdoc; -+ nand_release(mtd); -+ iounmap((void *)doc->virtadr); -+ kfree(mtd); -+ } -+} -+ -+module_init(init_nanddoc); -+module_exit(cleanup_nanddoc); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>"); -+MODULE_DESCRIPTION("M-Systems DiskOnChip 2000, Millennium and Millennium Plus device driver\n"); -Index: linux-2.6.5/drivers/mtd/nand/edb7312.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/edb7312.c 2004-04-03 22:38:22.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/edb7312.c 2005-02-01 17:11:17.000000000 -0500 -@@ -6,7 +6,7 @@ - * Derived from drivers/mtd/nand/autcpu12.c - * Copyright (c) 2001 Thomas Gleixner (gleixner@autronix.de) - * -- * $Id: edb7312.c,v 1.5 2003/04/20 07:24:40 gleixner Exp $ -+ * $Id: edb7312.c,v 1.9 2004/09/16 23:27:14 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as -@@ -20,6 +20,7 @@ - - #include <linux/slab.h> - #include <linux/module.h> -+#include <linux/init.h> - #include <linux/mtd/mtd.h> - #include <linux/mtd/nand.h> - #include <linux/mtd/partitions.h> -@@ -52,9 +53,9 @@ - * Module stuff - */ - --static int ep7312_fio_pbase = EP7312_FIO_PBASE; --static int ep7312_pxdr = EP7312_PXDR; --static int ep7312_pxddr = EP7312_PXDDR; -+static unsigned long ep7312_fio_pbase = EP7312_FIO_PBASE; -+static void __iomem * ep7312_pxdr = (void __iomem *) EP7312_PXDR; -+static void __iomem * ep7312_pxddr = (void __iomem *) EP7312_PXDDR; - - #ifdef MODULE - MODULE_PARM(ep7312_fio_pbase, "i"); -@@ -83,7 +84,7 @@ - /* - * hardware specific access to control-lines - */ --static void ep7312_hwcontrol(int cmd) -+static void ep7312_hwcontrol(struct mtd_info *mtd, int cmd) - { - switch(cmd) { - -@@ -113,10 +114,13 @@ - /* - * read device ready pin - */ --static int ep7312_device_ready(void) -+static int ep7312_device_ready(struct mtd_info *mtd) - { - return 1; - } -+#ifdef CONFIG_MTD_PARTITIONS -+const char *part_probes[] = { "cmdlinepart", NULL }; -+#endif - - /* - * Main initialization routine -@@ -127,10 +131,10 @@ - const char *part_type = 0; - int mtd_parts_nb = 0; - struct mtd_partition *mtd_parts = 0; -- int ep7312_fio_base; -+ void __iomem * ep7312_fio_base; - - /* Allocate memory for MTD device structure and private data */ -- ep7312_mtd = kmalloc(sizeof(struct mtd_info) + -+ ep7312_mtd = (struct mtd_info *) kmalloc(sizeof(struct mtd_info) + - sizeof(struct nand_chip), - GFP_KERNEL); - if (!ep7312_mtd) { -@@ -139,7 +143,7 @@ - } - - /* map physical adress */ -- ep7312_fio_base = (unsigned long)ioremap(ep7312_fio_pbase, SZ_1K); -+ ep7312_fio_base = (void __iomem *)ioremap(ep7312_fio_pbase, SZ_1K); - if(!ep7312_fio_base) { - printk("ioremap EDB7312 NAND flash failed\n"); - kfree(ep7312_mtd); -@@ -171,31 +175,22 @@ - this->chip_delay = 15; - - /* Scan to find existence of the device */ -- if (nand_scan (ep7312_mtd)) { -+ if (nand_scan (ep7312_mtd, 1)) { - iounmap((void *)ep7312_fio_base); - kfree (ep7312_mtd); - return -ENXIO; - } - -- /* Allocate memory for internal data buffer */ -- this->data_buf = kmalloc (sizeof(u_char) * (ep7312_mtd->oobblock + ep7312_mtd->oobsize), GFP_KERNEL); -- if (!this->data_buf) { -- printk("Unable to allocate NAND data buffer for EDB7312.\n"); -- iounmap((void *)ep7312_fio_base); -- kfree (ep7312_mtd); -- return -ENOMEM; -- } -- --#ifdef CONFIG_MTD_CMDLINE_PARTS -- mtd_parts_nb = parse_cmdline_partitions(ep7312_mtd, &mtd_parts, -- "edb7312-nand"); -+#ifdef CONFIG_MTD_PARTITIONS -+ ep7312_mtd->name = "edb7312-nand"; -+ mtd_parts_nb = parse_mtd_partitions(ep7312_mtd, part_probes, -+ &mtd_parts, 0); - if (mtd_parts_nb > 0) -- part_type = "command line"; -+ part_type = "command line"; - else -- mtd_parts_nb = 0; -+ mtd_parts_nb = 0; - #endif -- if (mtd_parts_nb == 0) -- { -+ if (mtd_parts_nb == 0) { - mtd_parts = partition_info; - mtd_parts_nb = NUM_PARTITIONS; - part_type = "static"; -@@ -217,8 +212,8 @@ - { - struct nand_chip *this = (struct nand_chip *) &ep7312_mtd[1]; - -- /* Unregister the device */ -- del_mtd_device (ep7312_mtd); -+ /* Release resources, unregister device */ -+ nand_release (ap7312_mtd); - - /* Free internal data buffer */ - kfree (this->data_buf); -Index: linux-2.6.5/drivers/mtd/nand/h1910.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/h1910.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/h1910.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,208 @@ -+/* -+ * drivers/mtd/nand/h1910.c -+ * -+ * Copyright (C) 2003 Joshua Wise (joshua@joshuawise.com) -+ * -+ * Derived from drivers/mtd/nand/edb7312.c -+ * Copyright (C) 2002 Marius Gröger (mag@sysgo.de) -+ * Copyright (c) 2001 Thomas Gleixner (gleixner@autronix.de) -+ * -+ * $Id: h1910.c,v 1.3 2004/09/16 23:27:14 gleixner Exp $ -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * Overview: -+ * This is a device driver for the NAND flash device found on the -+ * iPAQ h1910 board which utilizes the Samsung K9F2808 part. This is -+ * a 128Mibit (16MiB x 8 bits) NAND flash device. -+ */ -+ -+#include <linux/slab.h> -+#include <linux/init.h> -+#include <linux/module.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/nand.h> -+#include <linux/mtd/partitions.h> -+#include <asm/io.h> -+#include <asm/arch/hardware.h> /* for CLPS7111_VIRT_BASE */ -+#include <asm/sizes.h> -+#include <asm/arch/h1900-gpio.h> -+#include <asm/arch/ipaq.h> -+ -+/* -+ * MTD structure for EDB7312 board -+ */ -+static struct mtd_info *h1910_nand_mtd = NULL; -+ -+/* -+ * Module stuff -+ */ -+ -+#ifdef CONFIG_MTD_PARTITIONS -+/* -+ * Define static partitions for flash device -+ */ -+static struct mtd_partition partition_info[] = { -+ { name: "h1910 NAND Flash", -+ offset: 0, -+ size: 16*1024*1024 } -+}; -+#define NUM_PARTITIONS 1 -+ -+#endif -+ -+ -+/* -+ * hardware specific access to control-lines -+ */ -+static void h1910_hwcontrol(struct mtd_info *mtd, int cmd) -+{ -+ struct nand_chip* this = (struct nand_chip *) (mtd->priv); -+ -+ switch(cmd) { -+ -+ case NAND_CTL_SETCLE: -+ this->IO_ADDR_R |= (1 << 2); -+ this->IO_ADDR_W |= (1 << 2); -+ break; -+ case NAND_CTL_CLRCLE: -+ this->IO_ADDR_R &= ~(1 << 2); -+ this->IO_ADDR_W &= ~(1 << 2); -+ break; -+ -+ case NAND_CTL_SETALE: -+ this->IO_ADDR_R |= (1 << 3); -+ this->IO_ADDR_W |= (1 << 3); -+ break; -+ case NAND_CTL_CLRALE: -+ this->IO_ADDR_R &= ~(1 << 3); -+ this->IO_ADDR_W &= ~(1 << 3); -+ break; -+ -+ case NAND_CTL_SETNCE: -+ break; -+ case NAND_CTL_CLRNCE: -+ break; -+ } -+} -+ -+/* -+ * read device ready pin -+ */ -+#if 0 -+static int h1910_device_ready(struct mtd_info *mtd) -+{ -+ return (GPLR(55) & GPIO_bit(55)); -+} -+#endif -+ -+/* -+ * Main initialization routine -+ */ -+static int __init h1910_init (void) -+{ -+ struct nand_chip *this; -+ const char *part_type = 0; -+ int mtd_parts_nb = 0; -+ struct mtd_partition *mtd_parts = 0; -+ void __iomem *nandaddr; -+ -+ if (!machine_is_h1900()) -+ return -ENODEV; -+ -+ nandaddr = (void __iomem *)__ioremap(0x08000000, 0x1000, 0, 1); -+ if (!nandaddr) { -+ printk("Failed to ioremap nand flash.\n"); -+ return -ENOMEM; -+ } -+ -+ /* Allocate memory for MTD device structure and private data */ -+ h1910_nand_mtd = (struct mtd_info *) kmalloc(sizeof(struct mtd_info) + -+ sizeof(struct nand_chip), -+ GFP_KERNEL); -+ if (!h1910_nand_mtd) { -+ printk("Unable to allocate h1910 NAND MTD device structure.\n"); -+ iounmap ((void *) nandaddr); -+ return -ENOMEM; -+ } -+ -+ /* Get pointer to private data */ -+ this = (struct nand_chip *) (&h1910_nand_mtd[1]); -+ -+ /* Initialize structures */ -+ memset((char *) h1910_nand_mtd, 0, sizeof(struct mtd_info)); -+ memset((char *) this, 0, sizeof(struct nand_chip)); -+ -+ /* Link the private data with the MTD structure */ -+ h1910_nand_mtd->priv = this; -+ -+ /* -+ * Enable VPEN -+ */ -+ GPSR(37) = GPIO_bit(37); -+ -+ /* insert callbacks */ -+ this->IO_ADDR_R = nandaddr; -+ this->IO_ADDR_W = nandaddr; -+ this->hwcontrol = h1910_hwcontrol; -+ this->dev_ready = NULL; /* unknown whether that was correct or not so we will just do it like this */ -+ /* 15 us command delay time */ -+ this->chip_delay = 50; -+ this->eccmode = NAND_ECC_SOFT; -+ this->options = NAND_NO_AUTOINCR; -+ -+ /* Scan to find existence of the device */ -+ if (nand_scan (h1910_nand_mtd, 1)) { -+ printk(KERN_NOTICE "No NAND device - returning -ENXIO\n"); -+ kfree (h1910_nand_mtd); -+ iounmap ((void *) nandaddr); -+ return -ENXIO; -+ } -+ -+#ifdef CONFIG_MTD_CMDLINE_PARTS -+ mtd_parts_nb = parse_cmdline_partitions(h1910_nand_mtd, &mtd_parts, -+ "h1910-nand"); -+ if (mtd_parts_nb > 0) -+ part_type = "command line"; -+ else -+ mtd_parts_nb = 0; -+#endif -+ if (mtd_parts_nb == 0) -+ { -+ mtd_parts = partition_info; -+ mtd_parts_nb = NUM_PARTITIONS; -+ part_type = "static"; -+ } -+ -+ /* Register the partitions */ -+ printk(KERN_NOTICE "Using %s partition definition\n", part_type); -+ add_mtd_partitions(h1910_nand_mtd, mtd_parts, mtd_parts_nb); -+ -+ /* Return happy */ -+ return 0; -+} -+module_init(h1910_init); -+ -+/* -+ * Clean up routine -+ */ -+static void __exit h1910_cleanup (void) -+{ -+ struct nand_chip *this = (struct nand_chip *) &h1910_nand_mtd[1]; -+ -+ /* Release resources, unregister device */ -+ nand_release (h1910_nand_mtd); -+ -+ /* Release io resource */ -+ iounmap ((void *) this->IO_ADDR_W); -+ -+ /* Free the MTD device structure */ -+ kfree (h1910_nand_mtd); -+} -+module_exit(h1910_cleanup); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Joshua Wise <joshua at joshuawise dot com>"); -+MODULE_DESCRIPTION("NAND flash driver for iPAQ h1910"); -Index: linux-2.6.5/drivers/mtd/nand/nand_base.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/nand_base.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/nand_base.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,2581 @@ -+/* -+ * drivers/mtd/nand.c -+ * -+ * Overview: -+ * This is the generic MTD driver for NAND flash devices. It should be -+ * capable of working with almost all NAND chips currently available. -+ * Basic support for AG-AND chips is provided. -+ * -+ * Additional technical information is available on -+ * http://www.linux-mtd.infradead.org/tech/nand.html -+ * -+ * Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com) -+ * 2002 Thomas Gleixner (tglx@linutronix.de) -+ * -+ * 02-08-2004 tglx: support for strange chips, which cannot auto increment -+ * pages on read / read_oob -+ * -+ * 03-17-2004 tglx: Check ready before auto increment check. Simon Bayes -+ * pointed this out, as he marked an auto increment capable chip -+ * as NOAUTOINCR in the board driver. -+ * Make reads over block boundaries work too -+ * -+ * 04-14-2004 tglx: first working version for 2k page size chips -+ * -+ * 05-19-2004 tglx: Basic support for Renesas AG-AND chips -+ * -+ * Credits: -+ * David Woodhouse for adding multichip support -+ * -+ * Aleph One Ltd. and Toby Churchill Ltd. for supporting the -+ * rework for 2K page size chips -+ * -+ * TODO: -+ * Enable cached programming for 2k page size chips -+ * Check, if mtd->ecctype should be set to MTD_ECC_HW -+ * if we have HW ecc support. -+ * The AG-AND chips have nice features for speed improvement, -+ * which are not supported yet. Read / program 4 pages in one go. -+ * -+ * $Id: nand_base.c,v 1.116 2004/08/30 18:00:45 gleixner Exp $ -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ */ -+ -+#include <linux/delay.h> -+#include <linux/errno.h> -+#include <linux/sched.h> -+#include <linux/slab.h> -+#include <linux/types.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/nand.h> -+#include <linux/mtd/nand_ecc.h> -+#include <linux/mtd/compatmac.h> -+#include <linux/interrupt.h> -+#include <linux/bitops.h> -+#include <asm/io.h> -+ -+#ifdef CONFIG_MTD_PARTITIONS -+#include <linux/mtd/partitions.h> -+#endif -+ -+/* Define default oob placement schemes for large and small page devices */ -+static struct nand_oobinfo nand_oob_8 = { -+ .useecc = MTD_NANDECC_AUTOPLACE, -+ .eccbytes = 3, -+ .eccpos = {0, 1, 2}, -+ .oobfree = { {3, 2}, {6, 2} } -+}; -+ -+static struct nand_oobinfo nand_oob_16 = { -+ .useecc = MTD_NANDECC_AUTOPLACE, -+ .eccbytes = 6, -+ .eccpos = {0, 1, 2, 3, 6, 7}, -+ .oobfree = { {8, 8} } -+}; -+ -+static struct nand_oobinfo nand_oob_64 = { -+ .useecc = MTD_NANDECC_AUTOPLACE, -+ .eccbytes = 24, -+ .eccpos = { -+ 40, 41, 42, 43, 44, 45, 46, 47, -+ 48, 49, 50, 51, 52, 53, 54, 55, -+ 56, 57, 58, 59, 60, 61, 62, 63}, -+ .oobfree = { {2, 38} } -+}; -+ -+/* This is used for padding purposes in nand_write_oob */ -+static u_char ffchars[] = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+}; -+ -+/* -+ * NAND low-level MTD interface functions -+ */ -+static void nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len); -+static void nand_read_buf(struct mtd_info *mtd, u_char *buf, int len); -+static int nand_verify_buf(struct mtd_info *mtd, const u_char *buf, int len); -+ -+static int nand_read (struct mtd_info *mtd, loff_t from, size_t len, size_t * retlen, u_char * buf); -+static int nand_read_ecc (struct mtd_info *mtd, loff_t from, size_t len, -+ size_t * retlen, u_char * buf, u_char * eccbuf, struct nand_oobinfo *oobsel); -+static int nand_read_oob (struct mtd_info *mtd, loff_t from, size_t len, size_t * retlen, u_char * buf); -+static int nand_write (struct mtd_info *mtd, loff_t to, size_t len, size_t * retlen, const u_char * buf); -+static int nand_write_ecc (struct mtd_info *mtd, loff_t to, size_t len, -+ size_t * retlen, const u_char * buf, u_char * eccbuf, struct nand_oobinfo *oobsel); -+static int nand_write_oob (struct mtd_info *mtd, loff_t to, size_t len, size_t * retlen, const u_char *buf); -+static int nand_writev (struct mtd_info *mtd, const struct kvec *vecs, -+ unsigned long count, loff_t to, size_t * retlen); -+static int nand_writev_ecc (struct mtd_info *mtd, const struct kvec *vecs, -+ unsigned long count, loff_t to, size_t * retlen, u_char *eccbuf, struct nand_oobinfo *oobsel); -+static int nand_erase (struct mtd_info *mtd, struct erase_info *instr); -+static void nand_sync (struct mtd_info *mtd); -+ -+/* Some internal functions */ -+static int nand_write_page (struct mtd_info *mtd, struct nand_chip *this, int page, u_char *oob_buf, -+ struct nand_oobinfo *oobsel, int mode); -+#ifdef CONFIG_MTD_NAND_VERIFY_WRITE -+static int nand_verify_pages (struct mtd_info *mtd, struct nand_chip *this, int page, int numpages, -+ u_char *oob_buf, struct nand_oobinfo *oobsel, int chipnr, int oobmode); -+#else -+#define nand_verify_pages(...) (0) -+#endif -+ -+static void nand_get_chip (struct nand_chip *this, struct mtd_info *mtd, int new_state); -+ -+/** -+ * nand_release_chip - [GENERIC] release chip -+ * @mtd: MTD device structure -+ * -+ * Deselect, release chip lock and wake up anyone waiting on the device -+ */ -+static void nand_release_chip (struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ -+ /* De-select the NAND device */ -+ this->select_chip(mtd, -1); -+ /* Release the chip */ -+ spin_lock_bh (&this->chip_lock); -+ this->state = FL_READY; -+ wake_up (&this->wq); -+ spin_unlock_bh (&this->chip_lock); -+} -+ -+/** -+ * nand_read_byte - [DEFAULT] read one byte from the chip -+ * @mtd: MTD device structure -+ * -+ * Default read function for 8bit buswith -+ */ -+static u_char nand_read_byte(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ return readb(this->IO_ADDR_R); -+} -+ -+/** -+ * nand_write_byte - [DEFAULT] write one byte to the chip -+ * @mtd: MTD device structure -+ * @byte: pointer to data byte to write -+ * -+ * Default write function for 8it buswith -+ */ -+static void nand_write_byte(struct mtd_info *mtd, u_char byte) -+{ -+ struct nand_chip *this = mtd->priv; -+ writeb(byte, this->IO_ADDR_W); -+} -+ -+/** -+ * nand_read_byte16 - [DEFAULT] read one byte endianess aware from the chip -+ * @mtd: MTD device structure -+ * -+ * Default read function for 16bit buswith with -+ * endianess conversion -+ */ -+static u_char nand_read_byte16(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ return (u_char) cpu_to_le16(readw(this->IO_ADDR_R)); -+} -+ -+/** -+ * nand_write_byte16 - [DEFAULT] write one byte endianess aware to the chip -+ * @mtd: MTD device structure -+ * @byte: pointer to data byte to write -+ * -+ * Default write function for 16bit buswith with -+ * endianess conversion -+ */ -+static void nand_write_byte16(struct mtd_info *mtd, u_char byte) -+{ -+ struct nand_chip *this = mtd->priv; -+ writew(le16_to_cpu((u16) byte), this->IO_ADDR_W); -+} -+ -+/** -+ * nand_read_word - [DEFAULT] read one word from the chip -+ * @mtd: MTD device structure -+ * -+ * Default read function for 16bit buswith without -+ * endianess conversion -+ */ -+static u16 nand_read_word(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ return readw(this->IO_ADDR_R); -+} -+ -+/** -+ * nand_write_word - [DEFAULT] write one word to the chip -+ * @mtd: MTD device structure -+ * @word: data word to write -+ * -+ * Default write function for 16bit buswith without -+ * endianess conversion -+ */ -+static void nand_write_word(struct mtd_info *mtd, u16 word) -+{ -+ struct nand_chip *this = mtd->priv; -+ writew(word, this->IO_ADDR_W); -+} -+ -+/** -+ * nand_select_chip - [DEFAULT] control CE line -+ * @mtd: MTD device structure -+ * @chip: chipnumber to select, -1 for deselect -+ * -+ * Default select function for 1 chip devices. -+ */ -+static void nand_select_chip(struct mtd_info *mtd, int chip) -+{ -+ struct nand_chip *this = mtd->priv; -+ switch(chip) { -+ case -1: -+ this->hwcontrol(mtd, NAND_CTL_CLRNCE); -+ break; -+ case 0: -+ this->hwcontrol(mtd, NAND_CTL_SETNCE); -+ break; -+ -+ default: -+ BUG(); -+ } -+} -+ -+/** -+ * nand_write_buf - [DEFAULT] write buffer to chip -+ * @mtd: MTD device structure -+ * @buf: data buffer -+ * @len: number of bytes to write -+ * -+ * Default write function for 8bit buswith -+ */ -+static void nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ -+ for (i=0; i<len; i++) -+ writeb(buf[i], this->IO_ADDR_W); -+} -+ -+/** -+ * nand_read_buf - [DEFAULT] read chip data into buffer -+ * @mtd: MTD device structure -+ * @buf: buffer to store date -+ * @len: number of bytes to read -+ * -+ * Default read function for 8bit buswith -+ */ -+static void nand_read_buf(struct mtd_info *mtd, u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ -+ for (i=0; i<len; i++) -+ buf[i] = readb(this->IO_ADDR_R); -+} -+ -+/** -+ * nand_verify_buf - [DEFAULT] Verify chip data against buffer -+ * @mtd: MTD device structure -+ * @buf: buffer containing the data to compare -+ * @len: number of bytes to compare -+ * -+ * Default verify function for 8bit buswith -+ */ -+static int nand_verify_buf(struct mtd_info *mtd, const u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ -+ for (i=0; i<len; i++) -+ if (buf[i] != readb(this->IO_ADDR_R)) -+ return -EFAULT; -+ -+ return 0; -+} -+ -+/** -+ * nand_write_buf16 - [DEFAULT] write buffer to chip -+ * @mtd: MTD device structure -+ * @buf: data buffer -+ * @len: number of bytes to write -+ * -+ * Default write function for 16bit buswith -+ */ -+static void nand_write_buf16(struct mtd_info *mtd, const u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ u16 *p = (u16 *) buf; -+ len >>= 1; -+ -+ for (i=0; i<len; i++) -+ writew(p[i], this->IO_ADDR_W); -+ -+} -+ -+/** -+ * nand_read_buf16 - [DEFAULT] read chip data into buffer -+ * @mtd: MTD device structure -+ * @buf: buffer to store date -+ * @len: number of bytes to read -+ * -+ * Default read function for 16bit buswith -+ */ -+static void nand_read_buf16(struct mtd_info *mtd, u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ u16 *p = (u16 *) buf; -+ len >>= 1; -+ -+ for (i=0; i<len; i++) -+ p[i] = readw(this->IO_ADDR_R); -+} -+ -+/** -+ * nand_verify_buf16 - [DEFAULT] Verify chip data against buffer -+ * @mtd: MTD device structure -+ * @buf: buffer containing the data to compare -+ * @len: number of bytes to compare -+ * -+ * Default verify function for 16bit buswith -+ */ -+static int nand_verify_buf16(struct mtd_info *mtd, const u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ u16 *p = (u16 *) buf; -+ len >>= 1; -+ -+ for (i=0; i<len; i++) -+ if (p[i] != readw(this->IO_ADDR_R)) -+ return -EFAULT; -+ -+ return 0; -+} -+ -+/** -+ * nand_block_bad - [DEFAULT] Read bad block marker from the chip -+ * @mtd: MTD device structure -+ * @ofs: offset from device start -+ * @getchip: 0, if the chip is already selected -+ * -+ * Check, if the block is bad. -+ */ -+static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip) -+{ -+ int page, chipnr, res = 0; -+ struct nand_chip *this = mtd->priv; -+ u16 bad; -+ -+ if (getchip) { -+ page = (int)(ofs >> this->page_shift); -+ chipnr = (int)(ofs >> this->chip_shift); -+ -+ /* Grab the lock and see if the device is available */ -+ nand_get_chip (this, mtd, FL_READING); -+ -+ /* Select the NAND device */ -+ this->select_chip(mtd, chipnr); -+ } else -+ page = (int) ofs; -+ -+ if (this->options & NAND_BUSWIDTH_16) { -+ this->cmdfunc (mtd, NAND_CMD_READOOB, this->badblockpos & 0xFE, page & this->pagemask); -+ bad = cpu_to_le16(this->read_word(mtd)); -+ if (this->badblockpos & 0x1) -+ bad >>= 1; -+ if ((bad & 0xFF) != 0xff) -+ res = 1; -+ } else { -+ this->cmdfunc (mtd, NAND_CMD_READOOB, this->badblockpos, page & this->pagemask); -+ if (this->read_byte(mtd) != 0xff) -+ res = 1; -+ } -+ -+ if (getchip) { -+ /* Deselect and wake up anyone waiting on the device */ -+ nand_release_chip(mtd); -+ } -+ -+ return res; -+} -+ -+/** -+ * nand_default_block_markbad - [DEFAULT] mark a block bad -+ * @mtd: MTD device structure -+ * @ofs: offset from device start -+ * -+ * This is the default implementation, which can be overridden by -+ * a hardware specific driver. -+*/ -+static int nand_default_block_markbad(struct mtd_info *mtd, loff_t ofs) -+{ -+ struct nand_chip *this = mtd->priv; -+ u_char buf[2] = {0, 0}; -+ size_t retlen; -+ int block; -+ -+ /* Get block number */ -+ block = ((int) ofs) >> this->bbt_erase_shift; -+ this->bbt[block >> 2] |= 0x01 << ((block & 0x03) << 1); -+ -+ /* Do we have a flash based bad block table ? */ -+ if (this->options & NAND_USE_FLASH_BBT) -+ return nand_update_bbt (mtd, ofs); -+ -+ /* We write two bytes, so we dont have to mess with 16 bit access */ -+ ofs += mtd->oobsize + (this->badblockpos & ~0x01); -+ return nand_write_oob (mtd, ofs , 2, &retlen, buf); -+} -+ -+/** -+ * nand_check_wp - [GENERIC] check if the chip is write protected -+ * @mtd: MTD device structure -+ * Check, if the device is write protected -+ * -+ * The function expects, that the device is already selected -+ */ -+static int nand_check_wp (struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ /* Check the WP bit */ -+ this->cmdfunc (mtd, NAND_CMD_STATUS, -1, -1); -+ return (this->read_byte(mtd) & 0x80) ? 0 : 1; -+} -+ -+/** -+ * nand_block_checkbad - [GENERIC] Check if a block is marked bad -+ * @mtd: MTD device structure -+ * @ofs: offset from device start -+ * @getchip: 0, if the chip is already selected -+ * @allowbbt: 1, if its allowed to access the bbt area -+ * -+ * Check, if the block is bad. Either by reading the bad block table or -+ * calling of the scan function. -+ */ -+static int nand_block_checkbad (struct mtd_info *mtd, loff_t ofs, int getchip, int allowbbt) -+{ -+ struct nand_chip *this = mtd->priv; -+ -+ if (!this->bbt) -+ return this->block_bad(mtd, ofs, getchip); -+ -+ /* Return info from the table */ -+ return nand_isbad_bbt (mtd, ofs, allowbbt); -+} -+ -+/** -+ * nand_command - [DEFAULT] Send command to NAND device -+ * @mtd: MTD device structure -+ * @command: the command to be sent -+ * @column: the column address for this command, -1 if none -+ * @page_addr: the page address for this command, -1 if none -+ * -+ * Send command to NAND device. This function is used for small page -+ * devices (256/512 Bytes per page) -+ */ -+static void nand_command (struct mtd_info *mtd, unsigned command, int column, int page_addr) -+{ -+ register struct nand_chip *this = mtd->priv; -+ -+ /* Begin command latch cycle */ -+ this->hwcontrol(mtd, NAND_CTL_SETCLE); -+ /* -+ * Write out the command to the device. -+ */ -+ if (command == NAND_CMD_SEQIN) { -+ int readcmd; -+ -+ if (column >= mtd->oobblock) { -+ /* OOB area */ -+ column -= mtd->oobblock; -+ readcmd = NAND_CMD_READOOB; -+ } else if (column < 256) { -+ /* First 256 bytes --> READ0 */ -+ readcmd = NAND_CMD_READ0; -+ } else { -+ column -= 256; -+ readcmd = NAND_CMD_READ1; -+ } -+ this->write_byte(mtd, readcmd); -+ } -+ this->write_byte(mtd, command); -+ -+ /* Set ALE and clear CLE to start address cycle */ -+ this->hwcontrol(mtd, NAND_CTL_CLRCLE); -+ -+ if (column != -1 || page_addr != -1) { -+ this->hwcontrol(mtd, NAND_CTL_SETALE); -+ -+ /* Serially input address */ -+ if (column != -1) { -+ /* Adjust columns for 16 bit buswidth */ -+ if (this->options & NAND_BUSWIDTH_16) -+ column >>= 1; -+ this->write_byte(mtd, column); -+ } -+ if (page_addr != -1) { -+ this->write_byte(mtd, (unsigned char) (page_addr & 0xff)); -+ this->write_byte(mtd, (unsigned char) ((page_addr >> 8) & 0xff)); -+ /* One more address cycle for higher density devices */ -+ if (this->chipsize & 0x0c000000) -+ this->write_byte(mtd, (unsigned char) ((page_addr >> 16) & 0x0f)); -+ } -+ /* Latch in address */ -+ this->hwcontrol(mtd, NAND_CTL_CLRALE); -+ } -+ -+ /* -+ * program and erase have their own busy handlers -+ * status and sequential in needs no delay -+ */ -+ switch (command) { -+ -+ case NAND_CMD_PAGEPROG: -+ case NAND_CMD_ERASE1: -+ case NAND_CMD_ERASE2: -+ case NAND_CMD_SEQIN: -+ case NAND_CMD_STATUS: -+ return; -+ -+ case NAND_CMD_RESET: -+ if (this->dev_ready) -+ break; -+ udelay(this->chip_delay); -+ this->hwcontrol(mtd, NAND_CTL_SETCLE); -+ this->write_byte(mtd, NAND_CMD_STATUS); -+ this->hwcontrol(mtd, NAND_CTL_CLRCLE); -+ while ( !(this->read_byte(mtd) & 0x40)); -+ return; -+ -+ /* This applies to read commands */ -+ default: -+ /* -+ * If we don't have access to the busy pin, we apply the given -+ * command delay -+ */ -+ if (!this->dev_ready) { -+ udelay (this->chip_delay); -+ return; -+ } -+ } -+ -+ /* Apply this short delay always to ensure that we do wait tWB in -+ * any case on any machine. */ -+ ndelay (100); -+ /* wait until command is processed */ -+ while (!this->dev_ready(mtd)); -+} -+ -+/** -+ * nand_command_lp - [DEFAULT] Send command to NAND large page device -+ * @mtd: MTD device structure -+ * @command: the command to be sent -+ * @column: the column address for this command, -1 if none -+ * @page_addr: the page address for this command, -1 if none -+ * -+ * Send command to NAND device. This is the version for the new large page devices -+ * We dont have the seperate regions as we have in the small page devices. -+ * We must emulate NAND_CMD_READOOB to keep the code compatible. -+ * -+ */ -+static void nand_command_lp (struct mtd_info *mtd, unsigned command, int column, int page_addr) -+{ -+ register struct nand_chip *this = mtd->priv; -+ -+ /* Emulate NAND_CMD_READOOB */ -+ if (command == NAND_CMD_READOOB) { -+ column += mtd->oobblock; -+ command = NAND_CMD_READ0; -+ } -+ -+ -+ /* Begin command latch cycle */ -+ this->hwcontrol(mtd, NAND_CTL_SETCLE); -+ /* Write out the command to the device. */ -+ this->write_byte(mtd, command); -+ /* End command latch cycle */ -+ this->hwcontrol(mtd, NAND_CTL_CLRCLE); -+ -+ if (column != -1 || page_addr != -1) { -+ this->hwcontrol(mtd, NAND_CTL_SETALE); -+ -+ /* Serially input address */ -+ if (column != -1) { -+ /* Adjust columns for 16 bit buswidth */ -+ if (this->options & NAND_BUSWIDTH_16) -+ column >>= 1; -+ this->write_byte(mtd, column & 0xff); -+ this->write_byte(mtd, column >> 8); -+ } -+ if (page_addr != -1) { -+ this->write_byte(mtd, (unsigned char) (page_addr & 0xff)); -+ this->write_byte(mtd, (unsigned char) ((page_addr >> 8) & 0xff)); -+ /* One more address cycle for devices > 128MiB */ -+ if (this->chipsize > (128 << 20)) -+ this->write_byte(mtd, (unsigned char) ((page_addr >> 16) & 0xff)); -+ } -+ /* Latch in address */ -+ this->hwcontrol(mtd, NAND_CTL_CLRALE); -+ } -+ -+ /* -+ * program and erase have their own busy handlers -+ * status and sequential in needs no delay -+ */ -+ switch (command) { -+ -+ case NAND_CMD_CACHEDPROG: -+ case NAND_CMD_PAGEPROG: -+ case NAND_CMD_ERASE1: -+ case NAND_CMD_ERASE2: -+ case NAND_CMD_SEQIN: -+ case NAND_CMD_STATUS: -+ return; -+ -+ -+ case NAND_CMD_RESET: -+ if (this->dev_ready) -+ break; -+ udelay(this->chip_delay); -+ this->hwcontrol(mtd, NAND_CTL_SETCLE); -+ this->write_byte(mtd, NAND_CMD_STATUS); -+ this->hwcontrol(mtd, NAND_CTL_CLRCLE); -+ while ( !(this->read_byte(mtd) & 0x40)); -+ return; -+ -+ case NAND_CMD_READ0: -+ /* Begin command latch cycle */ -+ this->hwcontrol(mtd, NAND_CTL_SETCLE); -+ /* Write out the start read command */ -+ this->write_byte(mtd, NAND_CMD_READSTART); -+ /* End command latch cycle */ -+ this->hwcontrol(mtd, NAND_CTL_CLRCLE); -+ /* Fall through into ready check */ -+ -+ /* This applies to read commands */ -+ default: -+ /* -+ * If we don't have access to the busy pin, we apply the given -+ * command delay -+ */ -+ if (!this->dev_ready) { -+ udelay (this->chip_delay); -+ return; -+ } -+ } -+ -+ /* Apply this short delay always to ensure that we do wait tWB in -+ * any case on any machine. */ -+ ndelay (100); -+ /* wait until command is processed */ -+ while (!this->dev_ready(mtd)); -+} -+ -+/** -+ * nand_get_chip - [GENERIC] Get chip for selected access -+ * @this: the nand chip descriptor -+ * @mtd: MTD device structure -+ * @new_state: the state which is requested -+ * -+ * Get the device and lock it for exclusive access -+ */ -+static void nand_get_chip (struct nand_chip *this, struct mtd_info *mtd, int new_state) -+{ -+ -+ DECLARE_WAITQUEUE (wait, current); -+ -+ /* -+ * Grab the lock and see if the device is available -+ */ -+retry: -+ spin_lock_bh (&this->chip_lock); -+ -+ if (this->state == FL_READY) { -+ this->state = new_state; -+ spin_unlock_bh (&this->chip_lock); -+ return; -+ } -+ -+ set_current_state (TASK_UNINTERRUPTIBLE); -+ add_wait_queue (&this->wq, &wait); -+ spin_unlock_bh (&this->chip_lock); -+ schedule (); -+ remove_wait_queue (&this->wq, &wait); -+ goto retry; -+} -+ -+/** -+ * nand_wait - [DEFAULT] wait until the command is done -+ * @mtd: MTD device structure -+ * @this: NAND chip structure -+ * @state: state to select the max. timeout value -+ * -+ * Wait for command done. This applies to erase and program only -+ * Erase can take up to 400ms and program up to 20ms according to -+ * general NAND and SmartMedia specs -+ * -+*/ -+static int nand_wait(struct mtd_info *mtd, struct nand_chip *this, int state) -+{ -+ -+ unsigned long timeo = jiffies; -+ int status; -+ -+ if (state == FL_ERASING) -+ timeo += (HZ * 400) / 1000; -+ else -+ timeo += (HZ * 20) / 1000; -+ -+ /* Apply this short delay always to ensure that we do wait tWB in -+ * any case on any machine. */ -+ ndelay (100); -+ -+ spin_lock_bh (&this->chip_lock); -+ if ((state == FL_ERASING) && (this->options & NAND_IS_AND)) -+ this->cmdfunc (mtd, NAND_CMD_STATUS_MULTI, -1, -1); -+ else -+ this->cmdfunc (mtd, NAND_CMD_STATUS, -1, -1); -+ -+ while (time_before(jiffies, timeo)) { -+ /* Check, if we were interrupted */ -+ if (this->state != state) { -+ spin_unlock_bh (&this->chip_lock); -+ return 0; -+ } -+ if (this->dev_ready) { -+ if (this->dev_ready(mtd)) -+ break; -+ } else { -+ if (this->read_byte(mtd) & NAND_STATUS_READY) -+ break; -+ } -+ spin_unlock_bh (&this->chip_lock); -+ yield (); -+ spin_lock_bh (&this->chip_lock); -+ } -+ status = (int) this->read_byte(mtd); -+ spin_unlock_bh (&this->chip_lock); -+ -+ return status; -+} -+ -+/** -+ * nand_write_page - [GENERIC] write one page -+ * @mtd: MTD device structure -+ * @this: NAND chip structure -+ * @page: startpage inside the chip, must be called with (page & this->pagemask) -+ * @oob_buf: out of band data buffer -+ * @oobsel: out of band selecttion structre -+ * @cached: 1 = enable cached programming if supported by chip -+ * -+ * Nand_page_program function is used for write and writev ! -+ * This function will always program a full page of data -+ * If you call it with a non page aligned buffer, you're lost :) -+ * -+ * Cached programming is not supported yet. -+ */ -+static int nand_write_page (struct mtd_info *mtd, struct nand_chip *this, int page, -+ u_char *oob_buf, struct nand_oobinfo *oobsel, int cached) -+{ -+ int i, status; -+ u_char ecc_code[8]; -+ int eccmode = oobsel->useecc ? this->eccmode : NAND_ECC_NONE; -+ int *oob_config = oobsel->eccpos; -+ int datidx = 0, eccidx = 0, eccsteps = this->eccsteps; -+ int eccbytes = 0; -+ -+ /* FIXME: Enable cached programming */ -+ cached = 0; -+ -+ /* Send command to begin auto page programming */ -+ this->cmdfunc (mtd, NAND_CMD_SEQIN, 0x00, page); -+ -+ /* Write out complete page of data, take care of eccmode */ -+ switch (eccmode) { -+ /* No ecc, write all */ -+ case NAND_ECC_NONE: -+ printk (KERN_WARNING "Writing data without ECC to NAND-FLASH is not recommended\n"); -+ this->write_buf(mtd, this->data_poi, mtd->oobblock); -+ break; -+ -+ /* Software ecc 3/256, write all */ -+ case NAND_ECC_SOFT: -+ for (; eccsteps; eccsteps--) { -+ this->calculate_ecc(mtd, &this->data_poi[datidx], ecc_code); -+ for (i = 0; i < 3; i++, eccidx++) -+ oob_buf[oob_config[eccidx]] = ecc_code[i]; -+ datidx += this->eccsize; -+ } -+ this->write_buf(mtd, this->data_poi, mtd->oobblock); -+ break; -+ -+ /* Hardware ecc 8 byte / 512 byte data */ -+ case NAND_ECC_HW8_512: -+ eccbytes += 2; -+ /* Hardware ecc 6 byte / 512 byte data */ -+ case NAND_ECC_HW6_512: -+ eccbytes += 3; -+ /* Hardware ecc 3 byte / 256 data */ -+ /* Hardware ecc 3 byte / 512 byte data */ -+ case NAND_ECC_HW3_256: -+ case NAND_ECC_HW3_512: -+ eccbytes += 3; -+ for (; eccsteps; eccsteps--) { -+ /* enable hardware ecc logic for write */ -+ this->enable_hwecc(mtd, NAND_ECC_WRITE); -+ this->write_buf(mtd, &this->data_poi[datidx], this->eccsize); -+ this->calculate_ecc(mtd, &this->data_poi[datidx], ecc_code); -+ for (i = 0; i < eccbytes; i++, eccidx++) -+ oob_buf[oob_config[eccidx]] = ecc_code[i]; -+ /* If the hardware ecc provides syndromes then -+ * the ecc code must be written immidiately after -+ * the data bytes (words) */ -+ if (this->options & NAND_HWECC_SYNDROME) -+ this->write_buf(mtd, ecc_code, eccbytes); -+ -+ datidx += this->eccsize; -+ } -+ break; -+ -+ default: -+ printk (KERN_WARNING "Invalid NAND_ECC_MODE %d\n", this->eccmode); -+ BUG(); -+ } -+ -+ /* Write out OOB data */ -+ if (this->options & NAND_HWECC_SYNDROME) -+ this->write_buf(mtd, &oob_buf[oobsel->eccbytes], mtd->oobsize - oobsel->eccbytes); -+ else -+ this->write_buf(mtd, oob_buf, mtd->oobsize); -+ -+ /* Send command to actually program the data */ -+ this->cmdfunc (mtd, cached ? NAND_CMD_CACHEDPROG : NAND_CMD_PAGEPROG, -1, -1); -+ -+ if (!cached) { -+ /* call wait ready function */ -+ status = this->waitfunc (mtd, this, FL_WRITING); -+ /* See if device thinks it succeeded */ -+ if (status & 0x01) { -+ DEBUG (MTD_DEBUG_LEVEL0, "%s: " "Failed write, page 0x%08x, ", __FUNCTION__, page); -+ return -EIO; -+ } -+ } else { -+ /* FIXME: Implement cached programming ! */ -+ /* wait until cache is ready*/ -+ // status = this->waitfunc (mtd, this, FL_CACHEDRPG); -+ } -+ return 0; -+} -+ -+#ifdef CONFIG_MTD_NAND_VERIFY_WRITE -+/** -+ * nand_verify_pages - [GENERIC] verify the chip contents after a write -+ * @mtd: MTD device structure -+ * @this: NAND chip structure -+ * @page: startpage inside the chip, must be called with (page & this->pagemask) -+ * @numpages: number of pages to verify -+ * @oob_buf: out of band data buffer -+ * @oobsel: out of band selecttion structre -+ * @chipnr: number of the current chip -+ * @oobmode: 1 = full buffer verify, 0 = ecc only -+ * -+ * The NAND device assumes that it is always writing to a cleanly erased page. -+ * Hence, it performs its internal write verification only on bits that -+ * transitioned from 1 to 0. The device does NOT verify the whole page on a -+ * byte by byte basis. It is possible that the page was not completely erased -+ * or the page is becoming unusable due to wear. The read with ECC would catch -+ * the error later when the ECC page check fails, but we would rather catch -+ * it early in the page write stage. Better to write no data than invalid data. -+ */ -+static int nand_verify_pages (struct mtd_info *mtd, struct nand_chip *this, int page, int numpages, -+ u_char *oob_buf, struct nand_oobinfo *oobsel, int chipnr, int oobmode) -+{ -+ int i, j, datidx = 0, oobofs = 0, res = -EIO; -+ int eccsteps = this->eccsteps; -+ int hweccbytes; -+ u_char oobdata[64]; -+ -+ hweccbytes = (this->options & NAND_HWECC_SYNDROME) ? (oobsel->eccbytes / eccsteps) : 0; -+ -+ /* Send command to read back the first page */ -+ this->cmdfunc (mtd, NAND_CMD_READ0, 0, page); -+ -+ for(;;) { -+ for (j = 0; j < eccsteps; j++) { -+ /* Loop through and verify the data */ -+ if (this->verify_buf(mtd, &this->data_poi[datidx], mtd->eccsize)) { -+ DEBUG (MTD_DEBUG_LEVEL0, "%s: " "Failed write verify, page 0x%08x ", __FUNCTION__, page); -+ goto out; -+ } -+ datidx += mtd->eccsize; -+ /* Have we a hw generator layout ? */ -+ if (!hweccbytes) -+ continue; -+ if (this->verify_buf(mtd, &this->oob_buf[oobofs], hweccbytes)) { -+ DEBUG (MTD_DEBUG_LEVEL0, "%s: " "Failed write verify, page 0x%08x ", __FUNCTION__, page); -+ goto out; -+ } -+ oobofs += hweccbytes; -+ } -+ -+ /* check, if we must compare all data or if we just have to -+ * compare the ecc bytes -+ */ -+ if (oobmode) { -+ if (this->verify_buf(mtd, &oob_buf[oobofs], mtd->oobsize - hweccbytes * eccsteps)) { -+ DEBUG (MTD_DEBUG_LEVEL0, "%s: " "Failed write verify, page 0x%08x ", __FUNCTION__, page); -+ goto out; -+ } -+ } else { -+ /* Read always, else autoincrement fails */ -+ this->read_buf(mtd, oobdata, mtd->oobsize - hweccbytes * eccsteps); -+ -+ if (oobsel->useecc != MTD_NANDECC_OFF && !hweccbytes) { -+ int ecccnt = oobsel->eccbytes; -+ -+ for (i = 0; i < ecccnt; i++) { -+ int idx = oobsel->eccpos[i]; -+ if (oobdata[idx] != oob_buf[oobofs + idx] ) { -+ DEBUG (MTD_DEBUG_LEVEL0, -+ "%s: Failed ECC write " -+ "verify, page 0x%08x, " "%6i bytes were succesful\n", __FUNCTION__, page, i); -+ goto out; -+ } -+ } -+ } -+ } -+ oobofs += mtd->oobsize - hweccbytes * eccsteps; -+ page++; -+ numpages--; -+ -+ /* Apply delay or wait for ready/busy pin -+ * Do this before the AUTOINCR check, so no problems -+ * arise if a chip which does auto increment -+ * is marked as NOAUTOINCR by the board driver. -+ * Do this also before returning, so the chip is -+ * ready for the next command. -+ */ -+ if (!this->dev_ready) -+ udelay (this->chip_delay); -+ else -+ while (!this->dev_ready(mtd)); -+ -+ /* All done, return happy */ -+ if (!numpages) -+ return 0; -+ -+ -+ /* Check, if the chip supports auto page increment */ -+ if (!NAND_CANAUTOINCR(this)) -+ this->cmdfunc (mtd, NAND_CMD_READ0, 0x00, page); -+ } -+ /* -+ * Terminate the read command. We come here in case of an error -+ * So we must issue a reset command. -+ */ -+out: -+ this->cmdfunc (mtd, NAND_CMD_RESET, -1, -1); -+ return res; -+} -+#endif -+ -+/** -+ * nand_read - [MTD Interface] MTD compability function for nand_read_ecc -+ * @mtd: MTD device structure -+ * @from: offset to read from -+ * @len: number of bytes to read -+ * @retlen: pointer to variable to store the number of read bytes -+ * @buf: the databuffer to put data -+ * -+ * This function simply calls nand_read_ecc with oob buffer and oobsel = NULL -+*/ -+static int nand_read (struct mtd_info *mtd, loff_t from, size_t len, size_t * retlen, u_char * buf) -+{ -+ return nand_read_ecc (mtd, from, len, retlen, buf, NULL, NULL); -+} -+ -+ -+/** -+ * nand_read_ecc - [MTD Interface] Read data with ECC -+ * @mtd: MTD device structure -+ * @from: offset to read from -+ * @len: number of bytes to read -+ * @retlen: pointer to variable to store the number of read bytes -+ * @buf: the databuffer to put data -+ * @oob_buf: filesystem supplied oob data buffer -+ * @oobsel: oob selection structure -+ * -+ * NAND read with ECC -+ */ -+static int nand_read_ecc (struct mtd_info *mtd, loff_t from, size_t len, -+ size_t * retlen, u_char * buf, u_char * oob_buf, struct nand_oobinfo *oobsel) -+{ -+ int i, j, col, realpage, page, end, ecc, chipnr, sndcmd = 1; -+ int read = 0, oob = 0, ecc_status = 0, ecc_failed = 0; -+ struct nand_chip *this = mtd->priv; -+ u_char *data_poi, *oob_data = oob_buf; -+ u_char ecc_calc[32]; -+ u_char ecc_code[32]; -+ int eccmode, eccsteps; -+ int *oob_config, datidx; -+ int blockcheck = (1 << (this->phys_erase_shift - this->page_shift)) - 1; -+ int eccbytes = 3; -+ int compareecc = 1; -+ int oobreadlen; -+ -+ -+ DEBUG (MTD_DEBUG_LEVEL3, "nand_read_ecc: from = 0x%08x, len = %i\n", (unsigned int) from, (int) len); -+ -+ /* Do not allow reads past end of device */ -+ if ((from + len) > mtd->size) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_read_ecc: Attempt read beyond end of device\n"); -+ *retlen = 0; -+ return -EINVAL; -+ } -+ -+ /* Grab the lock and see if the device is available */ -+ nand_get_chip (this, mtd ,FL_READING); -+ -+ /* use userspace supplied oobinfo, if zero */ -+ if (oobsel == NULL) -+ oobsel = &mtd->oobinfo; -+ -+ /* Autoplace of oob data ? Use the default placement scheme */ -+ if (oobsel->useecc == MTD_NANDECC_AUTOPLACE) -+ oobsel = this->autooob; -+ -+ eccmode = oobsel->useecc ? this->eccmode : NAND_ECC_NONE; -+ oob_config = oobsel->eccpos; -+ -+ /* Select the NAND device */ -+ chipnr = (int)(from >> this->chip_shift); -+ this->select_chip(mtd, chipnr); -+ -+ /* First we calculate the starting page */ -+ realpage = (int) (from >> this->page_shift); -+ page = realpage & this->pagemask; -+ -+ /* Get raw starting column */ -+ col = from & (mtd->oobblock - 1); -+ -+ end = mtd->oobblock; -+ ecc = this->eccsize; -+ switch (eccmode) { -+ case NAND_ECC_HW6_512: /* Hardware ECC 6 byte / 512 byte data */ -+ eccbytes = 6; -+ break; -+ case NAND_ECC_HW8_512: /* Hardware ECC 8 byte / 512 byte data */ -+ eccbytes = 8; -+ break; -+ case NAND_ECC_NONE: -+ compareecc = 0; -+ break; -+ } -+ -+ if (this->options & NAND_HWECC_SYNDROME) -+ compareecc = 0; -+ -+ oobreadlen = mtd->oobsize; -+ if (this->options & NAND_HWECC_SYNDROME) -+ oobreadlen -= oobsel->eccbytes; -+ -+ /* Loop until all data read */ -+ while (read < len) { -+ -+ int aligned = (!col && (len - read) >= end); -+ /* -+ * If the read is not page aligned, we have to read into data buffer -+ * due to ecc, else we read into return buffer direct -+ */ -+ if (aligned) -+ data_poi = &buf[read]; -+ else -+ data_poi = this->data_buf; -+ -+ /* Check, if we have this page in the buffer -+ * -+ * FIXME: Make it work when we must provide oob data too, -+ * check the usage of data_buf oob field -+ */ -+ if (realpage == this->pagebuf && !oob_buf) { -+ /* aligned read ? */ -+ if (aligned) -+ memcpy (data_poi, this->data_buf, end); -+ goto readdata; -+ } -+ -+ /* Check, if we must send the read command */ -+ if (sndcmd) { -+ this->cmdfunc (mtd, NAND_CMD_READ0, 0x00, page); -+ sndcmd = 0; -+ } -+ -+ /* get oob area, if we have no oob buffer from fs-driver */ -+ if (!oob_buf || oobsel->useecc == MTD_NANDECC_AUTOPLACE) -+ oob_data = &this->data_buf[end]; -+ -+ eccsteps = this->eccsteps; -+ -+ switch (eccmode) { -+ case NAND_ECC_NONE: { /* No ECC, Read in a page */ -+ static unsigned long lastwhinge = 0; -+ if ((lastwhinge / HZ) != (jiffies / HZ)) { -+ printk (KERN_WARNING "Reading data from NAND FLASH without ECC is not recommended\n"); -+ lastwhinge = jiffies; -+ } -+ this->read_buf(mtd, data_poi, end); -+ break; -+ } -+ -+ case NAND_ECC_SOFT: /* Software ECC 3/256: Read in a page + oob data */ -+ this->read_buf(mtd, data_poi, end); -+ for (i = 0, datidx = 0; eccsteps; eccsteps--, i+=3, datidx += ecc) -+ this->calculate_ecc(mtd, &data_poi[datidx], &ecc_calc[i]); -+ break; -+ -+ case NAND_ECC_HW3_256: /* Hardware ECC 3 byte /256 byte data */ -+ case NAND_ECC_HW3_512: /* Hardware ECC 3 byte /512 byte data */ -+ case NAND_ECC_HW6_512: /* Hardware ECC 6 byte / 512 byte data */ -+ case NAND_ECC_HW8_512: /* Hardware ECC 8 byte / 512 byte data */ -+ for (i = 0, datidx = 0; eccsteps; eccsteps--, i+=eccbytes, datidx += ecc) { -+ this->enable_hwecc(mtd, NAND_ECC_READ); -+ this->read_buf(mtd, &data_poi[datidx], ecc); -+ -+ /* HW ecc with syndrome calculation must read the -+ * syndrome from flash immidiately after the data */ -+ if (!compareecc) { -+ /* Some hw ecc generators need to know when the -+ * syndrome is read from flash */ -+ this->enable_hwecc(mtd, NAND_ECC_READSYN); -+ this->read_buf(mtd, &oob_data[i], eccbytes); -+ /* We calc error correction directly, it checks the hw -+ * generator for an error, reads back the syndrome and -+ * does the error correction on the fly */ -+ if (this->correct_data(mtd, &data_poi[datidx], &oob_data[i], &ecc_code[i]) == -1) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_read_ecc: " -+ "Failed ECC read, page 0x%08x on chip %d\n", page, chipnr); -+ ecc_failed++; -+ } -+ } else { -+ this->calculate_ecc(mtd, &data_poi[datidx], &ecc_calc[i]); -+ } -+ } -+ break; -+ -+ default: -+ printk (KERN_WARNING "Invalid NAND_ECC_MODE %d\n", this->eccmode); -+ BUG(); -+ } -+ -+ /* read oobdata */ -+ this->read_buf(mtd, &oob_data[mtd->oobsize - oobreadlen], oobreadlen); -+ -+ /* Skip ECC check, if not requested (ECC_NONE or HW_ECC with syndromes) */ -+ if (!compareecc) -+ goto readoob; -+ -+ /* Pick the ECC bytes out of the oob data */ -+ for (j = 0; j < oobsel->eccbytes; j++) -+ ecc_code[j] = oob_data[oob_config[j]]; -+ -+ /* correct data, if neccecary */ -+ for (i = 0, j = 0, datidx = 0; i < this->eccsteps; i++, datidx += ecc) { -+ ecc_status = this->correct_data(mtd, &data_poi[datidx], &ecc_code[j], &ecc_calc[j]); -+ -+ /* Get next chunk of ecc bytes */ -+ j += eccbytes; -+ -+ /* Check, if we have a fs supplied oob-buffer, -+ * This is the legacy mode. Used by YAFFS1 -+ * Should go away some day -+ */ -+ if (oob_buf && oobsel->useecc == MTD_NANDECC_PLACE) { -+ int *p = (int *)(&oob_data[mtd->oobsize]); -+ p[i] = ecc_status; -+ } -+ -+ if (ecc_status == -1) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_read_ecc: " "Failed ECC read, page 0x%08x\n", page); -+ ecc_failed++; -+ } -+ } -+ -+ readoob: -+ /* check, if we have a fs supplied oob-buffer */ -+ if (oob_buf) { -+ /* without autoplace. Legacy mode used by YAFFS1 */ -+ switch(oobsel->useecc) { -+ case MTD_NANDECC_AUTOPLACE: -+ /* Walk through the autoplace chunks */ -+ for (i = 0, j = 0; j < mtd->oobavail; i++) { -+ int from = oobsel->oobfree[i][0]; -+ int num = oobsel->oobfree[i][1]; -+ memcpy(&oob_buf[oob], &oob_data[from], num); -+ j+= num; -+ } -+ oob += mtd->oobavail; -+ break; -+ case MTD_NANDECC_PLACE: -+ /* YAFFS1 legacy mode */ -+ oob_data += this->eccsteps * sizeof (int); -+ default: -+ oob_data += mtd->oobsize; -+ } -+ } -+ readdata: -+ /* Partial page read, transfer data into fs buffer */ -+ if (!aligned) { -+ for (j = col; j < end && read < len; j++) -+ buf[read++] = data_poi[j]; -+ this->pagebuf = realpage; -+ } else -+ read += mtd->oobblock; -+ -+ /* Apply delay or wait for ready/busy pin -+ * Do this before the AUTOINCR check, so no problems -+ * arise if a chip which does auto increment -+ * is marked as NOAUTOINCR by the board driver. -+ */ -+ if (!this->dev_ready) -+ udelay (this->chip_delay); -+ else -+ while (!this->dev_ready(mtd)); -+ -+ if (read == len) -+ break; -+ -+ /* For subsequent reads align to page boundary. */ -+ col = 0; -+ /* Increment page address */ -+ realpage++; -+ -+ page = realpage & this->pagemask; -+ /* Check, if we cross a chip boundary */ -+ if (!page) { -+ chipnr++; -+ this->select_chip(mtd, -1); -+ this->select_chip(mtd, chipnr); -+ } -+ /* Check, if the chip supports auto page increment -+ * or if we have hit a block boundary. -+ */ -+ if (!NAND_CANAUTOINCR(this) || !(page & blockcheck)) -+ sndcmd = 1; -+ } -+ -+ /* Deselect and wake up anyone waiting on the device */ -+ nand_release_chip(mtd); -+ -+ /* -+ * Return success, if no ECC failures, else -EBADMSG -+ * fs driver will take care of that, because -+ * retlen == desired len and result == -EBADMSG -+ */ -+ *retlen = read; -+ return ecc_failed ? -EBADMSG : 0; -+} -+ -+/** -+ * nand_read_oob - [MTD Interface] NAND read out-of-band -+ * @mtd: MTD device structure -+ * @from: offset to read from -+ * @len: number of bytes to read -+ * @retlen: pointer to variable to store the number of read bytes -+ * @buf: the databuffer to put data -+ * -+ * NAND read out-of-band data from the spare area -+ */ -+static int nand_read_oob (struct mtd_info *mtd, loff_t from, size_t len, size_t * retlen, u_char * buf) -+{ -+ int i, col, page, chipnr; -+ struct nand_chip *this = mtd->priv; -+ int blockcheck = (1 << (this->phys_erase_shift - this->page_shift)) - 1; -+ -+ DEBUG (MTD_DEBUG_LEVEL3, "nand_read_oob: from = 0x%08x, len = %i\n", (unsigned int) from, (int) len); -+ -+ /* Shift to get page */ -+ page = (int)(from >> this->page_shift); -+ chipnr = (int)(from >> this->chip_shift); -+ -+ /* Mask to get column */ -+ col = from & (mtd->oobsize - 1); -+ -+ /* Initialize return length value */ -+ *retlen = 0; -+ -+ /* Do not allow reads past end of device */ -+ if ((from + len) > mtd->size) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_read_oob: Attempt read beyond end of device\n"); -+ *retlen = 0; -+ return -EINVAL; -+ } -+ -+ /* Grab the lock and see if the device is available */ -+ nand_get_chip (this, mtd , FL_READING); -+ -+ /* Select the NAND device */ -+ this->select_chip(mtd, chipnr); -+ -+ /* Send the read command */ -+ this->cmdfunc (mtd, NAND_CMD_READOOB, col, page & this->pagemask); -+ /* -+ * Read the data, if we read more than one page -+ * oob data, let the device transfer the data ! -+ */ -+ i = 0; -+ while (i < len) { -+ int thislen = mtd->oobsize - col; -+ thislen = min_t(int, thislen, len); -+ this->read_buf(mtd, &buf[i], thislen); -+ i += thislen; -+ -+ /* Apply delay or wait for ready/busy pin -+ * Do this before the AUTOINCR check, so no problems -+ * arise if a chip which does auto increment -+ * is marked as NOAUTOINCR by the board driver. -+ */ -+ if (!this->dev_ready) -+ udelay (this->chip_delay); -+ else -+ while (!this->dev_ready(mtd)); -+ -+ /* Read more ? */ -+ if (i < len) { -+ page++; -+ col = 0; -+ -+ /* Check, if we cross a chip boundary */ -+ if (!(page & this->pagemask)) { -+ chipnr++; -+ this->select_chip(mtd, -1); -+ this->select_chip(mtd, chipnr); -+ } -+ -+ /* Check, if the chip supports auto page increment -+ * or if we have hit a block boundary. -+ */ -+ if (!NAND_CANAUTOINCR(this) || !(page & blockcheck)) { -+ /* For subsequent page reads set offset to 0 */ -+ this->cmdfunc (mtd, NAND_CMD_READOOB, 0x0, page & this->pagemask); -+ } -+ } -+ } -+ -+ /* Deselect and wake up anyone waiting on the device */ -+ nand_release_chip(mtd); -+ -+ /* Return happy */ -+ *retlen = len; -+ return 0; -+} -+ -+/** -+ * nand_read_raw - [GENERIC] Read raw data including oob into buffer -+ * @mtd: MTD device structure -+ * @buf: temporary buffer -+ * @from: offset to read from -+ * @len: number of bytes to read -+ * @ooblen: number of oob data bytes to read -+ * -+ * Read raw data including oob into buffer -+ */ -+int nand_read_raw (struct mtd_info *mtd, uint8_t *buf, loff_t from, size_t len, size_t ooblen) -+{ -+ struct nand_chip *this = mtd->priv; -+ int page = (int) (from >> this->page_shift); -+ int chip = (int) (from >> this->chip_shift); -+ int sndcmd = 1; -+ int cnt = 0; -+ int pagesize = mtd->oobblock + mtd->oobsize; -+ int blockcheck = (1 << (this->phys_erase_shift - this->page_shift)) - 1; -+ -+ /* Do not allow reads past end of device */ -+ if ((from + len) > mtd->size) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_read_raw: Attempt read beyond end of device\n"); -+ return -EINVAL; -+ } -+ -+ /* Grab the lock and see if the device is available */ -+ nand_get_chip (this, mtd , FL_READING); -+ -+ this->select_chip (mtd, chip); -+ -+ /* Add requested oob length */ -+ len += ooblen; -+ -+ while (len) { -+ if (sndcmd) -+ this->cmdfunc (mtd, NAND_CMD_READ0, 0, page & this->pagemask); -+ sndcmd = 0; -+ -+ this->read_buf (mtd, &buf[cnt], pagesize); -+ -+ len -= pagesize; -+ cnt += pagesize; -+ page++; -+ -+ if (!this->dev_ready) -+ udelay (this->chip_delay); -+ else -+ while (!this->dev_ready(mtd)); -+ -+ /* Check, if the chip supports auto page increment */ -+ if (!NAND_CANAUTOINCR(this) || !(page & blockcheck)) -+ sndcmd = 1; -+ } -+ -+ /* Deselect and wake up anyone waiting on the device */ -+ nand_release_chip(mtd); -+ return 0; -+} -+ -+ -+/** -+ * nand_prepare_oobbuf - [GENERIC] Prepare the out of band buffer -+ * @mtd: MTD device structure -+ * @fsbuf: buffer given by fs driver -+ * @oobsel: out of band selection structre -+ * @autoplace: 1 = place given buffer into the oob bytes -+ * @numpages: number of pages to prepare -+ * -+ * Return: -+ * 1. Filesystem buffer available and autoplacement is off, -+ * return filesystem buffer -+ * 2. No filesystem buffer or autoplace is off, return internal -+ * buffer -+ * 3. Filesystem buffer is given and autoplace selected -+ * put data from fs buffer into internal buffer and -+ * retrun internal buffer -+ * -+ * Note: The internal buffer is filled with 0xff. This must -+ * be done only once, when no autoplacement happens -+ * Autoplacement sets the buffer dirty flag, which -+ * forces the 0xff fill before using the buffer again. -+ * -+*/ -+static u_char * nand_prepare_oobbuf (struct mtd_info *mtd, u_char *fsbuf, struct nand_oobinfo *oobsel, -+ int autoplace, int numpages) -+{ -+ struct nand_chip *this = mtd->priv; -+ int i, len, ofs; -+ -+ /* Zero copy fs supplied buffer */ -+ if (fsbuf && !autoplace) -+ return fsbuf; -+ -+ /* Check, if the buffer must be filled with ff again */ -+ if (this->oobdirty) { -+ memset (this->oob_buf, 0xff, -+ mtd->oobsize << (this->phys_erase_shift - this->page_shift)); -+ this->oobdirty = 0; -+ } -+ -+ /* If we have no autoplacement or no fs buffer use the internal one */ -+ if (!autoplace || !fsbuf) -+ return this->oob_buf; -+ -+ /* Walk through the pages and place the data */ -+ this->oobdirty = 1; -+ ofs = 0; -+ while (numpages--) { -+ for (i = 0, len = 0; len < mtd->oobavail; i++) { -+ int to = ofs + oobsel->oobfree[i][0]; -+ int num = oobsel->oobfree[i][1]; -+ memcpy (&this->oob_buf[to], fsbuf, num); -+ len += num; -+ fsbuf += num; -+ } -+ ofs += mtd->oobavail; -+ } -+ return this->oob_buf; -+} -+ -+#define NOTALIGNED(x) (x & (mtd->oobblock-1)) != 0 -+ -+/** -+ * nand_write - [MTD Interface] compability function for nand_write_ecc -+ * @mtd: MTD device structure -+ * @to: offset to write to -+ * @len: number of bytes to write -+ * @retlen: pointer to variable to store the number of written bytes -+ * @buf: the data to write -+ * -+ * This function simply calls nand_write_ecc with oob buffer and oobsel = NULL -+ * -+*/ -+static int nand_write (struct mtd_info *mtd, loff_t to, size_t len, size_t * retlen, const u_char * buf) -+{ -+ return (nand_write_ecc (mtd, to, len, retlen, buf, NULL, NULL)); -+} -+ -+/** -+ * nand_write_ecc - [MTD Interface] NAND write with ECC -+ * @mtd: MTD device structure -+ * @to: offset to write to -+ * @len: number of bytes to write -+ * @retlen: pointer to variable to store the number of written bytes -+ * @buf: the data to write -+ * @eccbuf: filesystem supplied oob data buffer -+ * @oobsel: oob selection structure -+ * -+ * NAND write with ECC -+ */ -+static int nand_write_ecc (struct mtd_info *mtd, loff_t to, size_t len, -+ size_t * retlen, const u_char * buf, u_char * eccbuf, struct nand_oobinfo *oobsel) -+{ -+ int startpage, page, ret = -EIO, oob = 0, written = 0, chipnr; -+ int autoplace = 0, numpages, totalpages; -+ struct nand_chip *this = mtd->priv; -+ u_char *oobbuf, *bufstart; -+ int ppblock = (1 << (this->phys_erase_shift - this->page_shift)); -+ -+ DEBUG (MTD_DEBUG_LEVEL3, "nand_write_ecc: to = 0x%08x, len = %i\n", (unsigned int) to, (int) len); -+ -+ /* Initialize retlen, in case of early exit */ -+ *retlen = 0; -+ -+ /* Do not allow write past end of device */ -+ if ((to + len) > mtd->size) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_write_ecc: Attempt to write past end of page\n"); -+ return -EINVAL; -+ } -+ -+ /* reject writes, which are not page aligned */ -+ if (NOTALIGNED (to) || NOTALIGNED(len)) { -+ printk (KERN_NOTICE "nand_write_ecc: Attempt to write not page aligned data\n"); -+ return -EINVAL; -+ } -+ -+ /* Grab the lock and see if the device is available */ -+ nand_get_chip (this, mtd, FL_WRITING); -+ -+ /* Calculate chipnr */ -+ chipnr = (int)(to >> this->chip_shift); -+ /* Select the NAND device */ -+ this->select_chip(mtd, chipnr); -+ -+ /* Check, if it is write protected */ -+ if (nand_check_wp(mtd)) -+ goto out; -+ -+ /* if oobsel is NULL, use chip defaults */ -+ if (oobsel == NULL) -+ oobsel = &mtd->oobinfo; -+ -+ /* Autoplace of oob data ? Use the default placement scheme */ -+ if (oobsel->useecc == MTD_NANDECC_AUTOPLACE) { -+ oobsel = this->autooob; -+ autoplace = 1; -+ } -+ -+ /* Setup variables and oob buffer */ -+ totalpages = len >> this->page_shift; -+ page = (int) (to >> this->page_shift); -+ /* Invalidate the page cache, if we write to the cached page */ -+ if (page <= this->pagebuf && this->pagebuf < (page + totalpages)) -+ this->pagebuf = -1; -+ -+ /* Set it relative to chip */ -+ page &= this->pagemask; -+ startpage = page; -+ /* Calc number of pages we can write in one go */ -+ numpages = min (ppblock - (startpage & (ppblock - 1)), totalpages); -+ oobbuf = nand_prepare_oobbuf (mtd, eccbuf, oobsel, autoplace, numpages); -+ bufstart = (u_char *)buf; -+ -+ /* Loop until all data is written */ -+ while (written < len) { -+ -+ this->data_poi = (u_char*) &buf[written]; -+ /* Write one page. If this is the last page to write -+ * or the last page in this block, then use the -+ * real pageprogram command, else select cached programming -+ * if supported by the chip. -+ */ -+ ret = nand_write_page (mtd, this, page, &oobbuf[oob], oobsel, (--numpages > 0)); -+ if (ret) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_write_ecc: write_page failed %d\n", ret); -+ goto out; -+ } -+ /* Next oob page */ -+ oob += mtd->oobsize; -+ /* Update written bytes count */ -+ written += mtd->oobblock; -+ if (written == len) -+ goto cmp; -+ -+ /* Increment page address */ -+ page++; -+ -+ /* Have we hit a block boundary ? Then we have to verify and -+ * if verify is ok, we have to setup the oob buffer for -+ * the next pages. -+ */ -+ if (!(page & (ppblock - 1))){ -+ int ofs; -+ this->data_poi = bufstart; -+ ret = nand_verify_pages (mtd, this, startpage, -+ page - startpage, -+ oobbuf, oobsel, chipnr, (eccbuf != NULL)); -+ if (ret) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_write_ecc: verify_pages failed %d\n", ret); -+ goto out; -+ } -+ *retlen = written; -+ -+ ofs = autoplace ? mtd->oobavail : mtd->oobsize; -+ if (eccbuf) -+ eccbuf += (page - startpage) * ofs; -+ totalpages -= page - startpage; -+ numpages = min (totalpages, ppblock); -+ page &= this->pagemask; -+ startpage = page; -+ oobbuf = nand_prepare_oobbuf (mtd, eccbuf, oobsel, -+ autoplace, numpages); -+ /* Check, if we cross a chip boundary */ -+ if (!page) { -+ chipnr++; -+ this->select_chip(mtd, -1); -+ this->select_chip(mtd, chipnr); -+ } -+ } -+ } -+ /* Verify the remaining pages */ -+cmp: -+ this->data_poi = bufstart; -+ ret = nand_verify_pages (mtd, this, startpage, totalpages, -+ oobbuf, oobsel, chipnr, (eccbuf != NULL)); -+ if (!ret) -+ *retlen = written; -+ else -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_write_ecc: verify_pages failed %d\n", ret); -+ -+out: -+ /* Deselect and wake up anyone waiting on the device */ -+ nand_release_chip(mtd); -+ -+ return ret; -+} -+ -+ -+/** -+ * nand_write_oob - [MTD Interface] NAND write out-of-band -+ * @mtd: MTD device structure -+ * @to: offset to write to -+ * @len: number of bytes to write -+ * @retlen: pointer to variable to store the number of written bytes -+ * @buf: the data to write -+ * -+ * NAND write out-of-band -+ */ -+static int nand_write_oob (struct mtd_info *mtd, loff_t to, size_t len, size_t * retlen, const u_char * buf) -+{ -+ int column, page, status, ret = -EIO, chipnr; -+ struct nand_chip *this = mtd->priv; -+ -+ DEBUG (MTD_DEBUG_LEVEL3, "nand_write_oob: to = 0x%08x, len = %i\n", (unsigned int) to, (int) len); -+ -+ /* Shift to get page */ -+ page = (int) (to >> this->page_shift); -+ chipnr = (int) (to >> this->chip_shift); -+ -+ /* Mask to get column */ -+ column = to & (mtd->oobsize - 1); -+ -+ /* Initialize return length value */ -+ *retlen = 0; -+ -+ /* Do not allow write past end of page */ -+ if ((column + len) > mtd->oobsize) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_write_oob: Attempt to write past end of page\n"); -+ return -EINVAL; -+ } -+ -+ /* Grab the lock and see if the device is available */ -+ nand_get_chip (this, mtd, FL_WRITING); -+ -+ /* Select the NAND device */ -+ this->select_chip(mtd, chipnr); -+ -+ /* Reset the chip. Some chips (like the Toshiba TC5832DC found -+ in one of my DiskOnChip 2000 test units) will clear the whole -+ data page too if we don't do this. I have no clue why, but -+ I seem to have 'fixed' it in the doc2000 driver in -+ August 1999. dwmw2. */ -+ this->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); -+ -+ /* Check, if it is write protected */ -+ if (nand_check_wp(mtd)) -+ goto out; -+ -+ /* Invalidate the page cache, if we write to the cached page */ -+ if (page == this->pagebuf) -+ this->pagebuf = -1; -+ -+ if (NAND_MUST_PAD(this)) { -+ /* Write out desired data */ -+ this->cmdfunc (mtd, NAND_CMD_SEQIN, mtd->oobblock, page & this->pagemask); -+ /* prepad 0xff for partial programming */ -+ this->write_buf(mtd, ffchars, column); -+ /* write data */ -+ this->write_buf(mtd, buf, len); -+ /* postpad 0xff for partial programming */ -+ this->write_buf(mtd, ffchars, mtd->oobsize - (len+column)); -+ } else { -+ /* Write out desired data */ -+ this->cmdfunc (mtd, NAND_CMD_SEQIN, mtd->oobblock + column, page & this->pagemask); -+ /* write data */ -+ this->write_buf(mtd, buf, len); -+ } -+ /* Send command to program the OOB data */ -+ this->cmdfunc (mtd, NAND_CMD_PAGEPROG, -1, -1); -+ -+ status = this->waitfunc (mtd, this, FL_WRITING); -+ -+ /* See if device thinks it succeeded */ -+ if (status & 0x01) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_write_oob: " "Failed write, page 0x%08x\n", page); -+ ret = -EIO; -+ goto out; -+ } -+ /* Return happy */ -+ *retlen = len; -+ -+#ifdef CONFIG_MTD_NAND_VERIFY_WRITE -+ /* Send command to read back the data */ -+ this->cmdfunc (mtd, NAND_CMD_READOOB, column, page & this->pagemask); -+ -+ if (this->verify_buf(mtd, buf, len)) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_write_oob: " "Failed write verify, page 0x%08x\n", page); -+ ret = -EIO; -+ goto out; -+ } -+#endif -+ ret = 0; -+out: -+ /* Deselect and wake up anyone waiting on the device */ -+ nand_release_chip(mtd); -+ -+ return ret; -+} -+ -+ -+/** -+ * nand_writev - [MTD Interface] compabilty function for nand_writev_ecc -+ * @mtd: MTD device structure -+ * @vecs: the iovectors to write -+ * @count: number of vectors -+ * @to: offset to write to -+ * @retlen: pointer to variable to store the number of written bytes -+ * -+ * NAND write with kvec. This just calls the ecc function -+ */ -+static int nand_writev (struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, -+ loff_t to, size_t * retlen) -+{ -+ return (nand_writev_ecc (mtd, vecs, count, to, retlen, NULL, NULL)); -+} -+ -+/** -+ * nand_writev_ecc - [MTD Interface] write with iovec with ecc -+ * @mtd: MTD device structure -+ * @vecs: the iovectors to write -+ * @count: number of vectors -+ * @to: offset to write to -+ * @retlen: pointer to variable to store the number of written bytes -+ * @eccbuf: filesystem supplied oob data buffer -+ * @oobsel: oob selection structure -+ * -+ * NAND write with iovec with ecc -+ */ -+static int nand_writev_ecc (struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, -+ loff_t to, size_t * retlen, u_char *eccbuf, struct nand_oobinfo *oobsel) -+{ -+ int i, page, len, total_len, ret = -EIO, written = 0, chipnr; -+ int oob, numpages, autoplace = 0, startpage; -+ struct nand_chip *this = mtd->priv; -+ int ppblock = (1 << (this->phys_erase_shift - this->page_shift)); -+ u_char *oobbuf, *bufstart; -+ -+ /* Preset written len for early exit */ -+ *retlen = 0; -+ -+ /* Calculate total length of data */ -+ total_len = 0; -+ for (i = 0; i < count; i++) -+ total_len += (int) vecs[i].iov_len; -+ -+ DEBUG (MTD_DEBUG_LEVEL3, -+ "nand_writev: to = 0x%08x, len = %i, count = %ld\n", (unsigned int) to, (unsigned int) total_len, count); -+ -+ /* Do not allow write past end of page */ -+ if ((to + total_len) > mtd->size) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_writev: Attempted write past end of device\n"); -+ return -EINVAL; -+ } -+ -+ /* reject writes, which are not page aligned */ -+ if (NOTALIGNED (to) || NOTALIGNED(total_len)) { -+ printk (KERN_NOTICE "nand_write_ecc: Attempt to write not page aligned data\n"); -+ return -EINVAL; -+ } -+ -+ /* Grab the lock and see if the device is available */ -+ nand_get_chip (this, mtd, FL_WRITING); -+ -+ /* Get the current chip-nr */ -+ chipnr = (int) (to >> this->chip_shift); -+ /* Select the NAND device */ -+ this->select_chip(mtd, chipnr); -+ -+ /* Check, if it is write protected */ -+ if (nand_check_wp(mtd)) -+ goto out; -+ -+ /* if oobsel is NULL, use chip defaults */ -+ if (oobsel == NULL) -+ oobsel = &mtd->oobinfo; -+ -+ /* Autoplace of oob data ? Use the default placement scheme */ -+ if (oobsel->useecc == MTD_NANDECC_AUTOPLACE) { -+ oobsel = this->autooob; -+ autoplace = 1; -+ } -+ -+ /* Setup start page */ -+ page = (int) (to >> this->page_shift); -+ /* Invalidate the page cache, if we write to the cached page */ -+ if (page <= this->pagebuf && this->pagebuf < ((to + total_len) >> this->page_shift)) -+ this->pagebuf = -1; -+ -+ startpage = page & this->pagemask; -+ -+ /* Loop until all kvec' data has been written */ -+ len = 0; -+ while (count) { -+ /* If the given tuple is >= pagesize then -+ * write it out from the iov -+ */ -+ if ((vecs->iov_len - len) >= mtd->oobblock) { -+ /* Calc number of pages we can write -+ * out of this iov in one go */ -+ numpages = (vecs->iov_len - len) >> this->page_shift; -+ /* Do not cross block boundaries */ -+ numpages = min (ppblock - (startpage & (ppblock - 1)), numpages); -+ oobbuf = nand_prepare_oobbuf (mtd, NULL, oobsel, autoplace, numpages); -+ bufstart = (u_char *)vecs->iov_base; -+ bufstart += len; -+ this->data_poi = bufstart; -+ oob = 0; -+ for (i = 1; i <= numpages; i++) { -+ /* Write one page. If this is the last page to write -+ * then use the real pageprogram command, else select -+ * cached programming if supported by the chip. -+ */ -+ ret = nand_write_page (mtd, this, page & this->pagemask, -+ &oobbuf[oob], oobsel, i != numpages); -+ if (ret) -+ goto out; -+ this->data_poi += mtd->oobblock; -+ len += mtd->oobblock; -+ oob += mtd->oobsize; -+ page++; -+ } -+ /* Check, if we have to switch to the next tuple */ -+ if (len >= (int) vecs->iov_len) { -+ vecs++; -+ len = 0; -+ count--; -+ } -+ } else { -+ /* We must use the internal buffer, read data out of each -+ * tuple until we have a full page to write -+ */ -+ int cnt = 0; -+ while (cnt < mtd->oobblock) { -+ if (vecs->iov_base != NULL && vecs->iov_len) -+ this->data_buf[cnt++] = ((u_char *) vecs->iov_base)[len++]; -+ /* Check, if we have to switch to the next tuple */ -+ if (len >= (int) vecs->iov_len) { -+ vecs++; -+ len = 0; -+ count--; -+ } -+ } -+ this->pagebuf = page; -+ this->data_poi = this->data_buf; -+ bufstart = this->data_poi; -+ numpages = 1; -+ oobbuf = nand_prepare_oobbuf (mtd, NULL, oobsel, autoplace, numpages); -+ ret = nand_write_page (mtd, this, page & this->pagemask, -+ oobbuf, oobsel, 0); -+ if (ret) -+ goto out; -+ page++; -+ } -+ -+ this->data_poi = bufstart; -+ ret = nand_verify_pages (mtd, this, startpage, numpages, oobbuf, oobsel, chipnr, 0); -+ if (ret) -+ goto out; -+ -+ written += mtd->oobblock * numpages; -+ /* All done ? */ -+ if (!count) -+ break; -+ -+ startpage = page & this->pagemask; -+ /* Check, if we cross a chip boundary */ -+ if (!startpage) { -+ chipnr++; -+ this->select_chip(mtd, -1); -+ this->select_chip(mtd, chipnr); -+ } -+ } -+ ret = 0; -+out: -+ /* Deselect and wake up anyone waiting on the device */ -+ nand_release_chip(mtd); -+ -+ *retlen = written; -+ return ret; -+} -+ -+/** -+ * single_erease_cmd - [GENERIC] NAND standard block erase command function -+ * @mtd: MTD device structure -+ * @page: the page address of the block which will be erased -+ * -+ * Standard erase command for NAND chips -+ */ -+static void single_erase_cmd (struct mtd_info *mtd, int page) -+{ -+ struct nand_chip *this = mtd->priv; -+ /* Send commands to erase a block */ -+ this->cmdfunc (mtd, NAND_CMD_ERASE1, -1, page); -+ this->cmdfunc (mtd, NAND_CMD_ERASE2, -1, -1); -+} -+ -+/** -+ * multi_erease_cmd - [GENERIC] AND specific block erase command function -+ * @mtd: MTD device structure -+ * @page: the page address of the block which will be erased -+ * -+ * AND multi block erase command function -+ * Erase 4 consecutive blocks -+ */ -+static void multi_erase_cmd (struct mtd_info *mtd, int page) -+{ -+ struct nand_chip *this = mtd->priv; -+ /* Send commands to erase a block */ -+ this->cmdfunc (mtd, NAND_CMD_ERASE1, -1, page++); -+ this->cmdfunc (mtd, NAND_CMD_ERASE1, -1, page++); -+ this->cmdfunc (mtd, NAND_CMD_ERASE1, -1, page++); -+ this->cmdfunc (mtd, NAND_CMD_ERASE1, -1, page); -+ this->cmdfunc (mtd, NAND_CMD_ERASE2, -1, -1); -+} -+ -+/** -+ * nand_erase - [MTD Interface] erase block(s) -+ * @mtd: MTD device structure -+ * @instr: erase instruction -+ * -+ * Erase one ore more blocks -+ */ -+static int nand_erase (struct mtd_info *mtd, struct erase_info *instr) -+{ -+ return nand_erase_nand (mtd, instr, 0); -+} -+ -+/** -+ * nand_erase_intern - [NAND Interface] erase block(s) -+ * @mtd: MTD device structure -+ * @instr: erase instruction -+ * @allowbbt: allow erasing the bbt area -+ * -+ * Erase one ore more blocks -+ */ -+int nand_erase_nand (struct mtd_info *mtd, struct erase_info *instr, int allowbbt) -+{ -+ int page, len, status, pages_per_block, ret, chipnr; -+ struct nand_chip *this = mtd->priv; -+ -+ DEBUG (MTD_DEBUG_LEVEL3, -+ "nand_erase: start = 0x%08x, len = %i\n", (unsigned int) instr->addr, (unsigned int) instr->len); -+ -+ /* Start address must align on block boundary */ -+ if (instr->addr & ((1 << this->phys_erase_shift) - 1)) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_erase: Unaligned address\n"); -+ return -EINVAL; -+ } -+ -+ /* Length must align on block boundary */ -+ if (instr->len & ((1 << this->phys_erase_shift) - 1)) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_erase: Length not block aligned\n"); -+ return -EINVAL; -+ } -+ -+ /* Do not allow erase past end of device */ -+ if ((instr->len + instr->addr) > mtd->size) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_erase: Erase past end of device\n"); -+ return -EINVAL; -+ } -+ -+ instr->fail_addr = 0xffffffff; -+ -+ /* Grab the lock and see if the device is available */ -+ nand_get_chip (this, mtd, FL_ERASING); -+ -+ /* Shift to get first page */ -+ page = (int) (instr->addr >> this->page_shift); -+ chipnr = (int) (instr->addr >> this->chip_shift); -+ -+ /* Calculate pages in each block */ -+ pages_per_block = 1 << (this->phys_erase_shift - this->page_shift); -+ -+ /* Select the NAND device */ -+ this->select_chip(mtd, chipnr); -+ -+ /* Check the WP bit */ -+ /* Check, if it is write protected */ -+ if (nand_check_wp(mtd)) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_erase: Device is write protected!!!\n"); -+ instr->state = MTD_ERASE_FAILED; -+ goto erase_exit; -+ } -+ -+ /* Loop through the pages */ -+ len = instr->len; -+ -+ instr->state = MTD_ERASING; -+ -+ while (len) { -+ /* Check if we have a bad block, we do not erase bad blocks ! */ -+ if (nand_block_checkbad(mtd, ((loff_t) page) << this->page_shift, 0, allowbbt)) { -+ printk (KERN_WARNING "nand_erase: attempt to erase a bad block at page 0x%08x\n", page); -+ instr->state = MTD_ERASE_FAILED; -+ goto erase_exit; -+ } -+ -+ /* Invalidate the page cache, if we erase the block which contains -+ the current cached page */ -+ if (page <= this->pagebuf && this->pagebuf < (page + pages_per_block)) -+ this->pagebuf = -1; -+ -+ this->erase_cmd (mtd, page & this->pagemask); -+ -+ status = this->waitfunc (mtd, this, FL_ERASING); -+ -+ /* See if block erase succeeded */ -+ if (status & 0x01) { -+ DEBUG (MTD_DEBUG_LEVEL0, "nand_erase: " "Failed erase, page 0x%08x\n", page); -+ instr->state = MTD_ERASE_FAILED; -+ instr->fail_addr = (page << this->page_shift); -+ goto erase_exit; -+ } -+ -+ /* Increment page address and decrement length */ -+ len -= (1 << this->phys_erase_shift); -+ page += pages_per_block; -+ -+ /* Check, if we cross a chip boundary */ -+ if (len && !(page & this->pagemask)) { -+ chipnr++; -+ this->select_chip(mtd, -1); -+ this->select_chip(mtd, chipnr); -+ } -+ } -+ instr->state = MTD_ERASE_DONE; -+ -+erase_exit: -+ -+ ret = instr->state == MTD_ERASE_DONE ? 0 : -EIO; -+ /* Do call back function */ -+ if (!ret) -+ mtd_erase_callback(instr); -+ -+ /* Deselect and wake up anyone waiting on the device */ -+ nand_release_chip(mtd); -+ -+ /* Return more or less happy */ -+ return ret; -+} -+ -+/** -+ * nand_sync - [MTD Interface] sync -+ * @mtd: MTD device structure -+ * -+ * Sync is actually a wait for chip ready function -+ */ -+static void nand_sync (struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ DECLARE_WAITQUEUE (wait, current); -+ -+ DEBUG (MTD_DEBUG_LEVEL3, "nand_sync: called\n"); -+ -+retry: -+ /* Grab the spinlock */ -+ spin_lock_bh (&this->chip_lock); -+ -+ /* See what's going on */ -+ switch (this->state) { -+ case FL_READY: -+ case FL_SYNCING: -+ this->state = FL_SYNCING; -+ spin_unlock_bh (&this->chip_lock); -+ break; -+ -+ default: -+ /* Not an idle state */ -+ add_wait_queue (&this->wq, &wait); -+ spin_unlock_bh (&this->chip_lock); -+ schedule (); -+ -+ remove_wait_queue (&this->wq, &wait); -+ goto retry; -+ } -+ -+ /* Lock the device */ -+ spin_lock_bh (&this->chip_lock); -+ -+ /* Set the device to be ready again */ -+ if (this->state == FL_SYNCING) { -+ this->state = FL_READY; -+ wake_up (&this->wq); -+ } -+ -+ /* Unlock the device */ -+ spin_unlock_bh (&this->chip_lock); -+} -+ -+ -+/** -+ * nand_block_isbad - [MTD Interface] Check whether the block at the given offset is bad -+ * @mtd: MTD device structure -+ * @ofs: offset relative to mtd start -+ */ -+static int nand_block_isbad (struct mtd_info *mtd, loff_t ofs) -+{ -+ /* Check for invalid offset */ -+ if (ofs > mtd->size) -+ return -EINVAL; -+ -+ return nand_block_checkbad (mtd, ofs, 1, 0); -+} -+ -+/** -+ * nand_block_markbad - [MTD Interface] Mark the block at the given offset as bad -+ * @mtd: MTD device structure -+ * @ofs: offset relative to mtd start -+ */ -+static int nand_block_markbad (struct mtd_info *mtd, loff_t ofs) -+{ -+ struct nand_chip *this = mtd->priv; -+ int ret; -+ -+ if ((ret = nand_block_isbad(mtd, ofs))) { -+ /* If it was bad already, return success and do nothing. */ -+ if (ret > 0) -+ return 0; -+ return ret; -+ } -+ -+ return this->block_markbad(mtd, ofs); -+} -+ -+/** -+ * nand_scan - [NAND Interface] Scan for the NAND device -+ * @mtd: MTD device structure -+ * @maxchips: Number of chips to scan for -+ * -+ * This fills out all the not initialized function pointers -+ * with the defaults. -+ * The flash ID is read and the mtd/chip structures are -+ * filled with the appropriate values. Buffers are allocated if -+ * they are not provided by the board driver -+ * -+ */ -+int nand_scan (struct mtd_info *mtd, int maxchips) -+{ -+ int i, j, nand_maf_id, nand_dev_id, busw; -+ struct nand_chip *this = mtd->priv; -+ -+ /* Get buswidth to select the correct functions*/ -+ busw = this->options & NAND_BUSWIDTH_16; -+ -+ /* check for proper chip_delay setup, set 20us if not */ -+ if (!this->chip_delay) -+ this->chip_delay = 20; -+ -+ /* check, if a user supplied command function given */ -+ if (this->cmdfunc == NULL) -+ this->cmdfunc = nand_command; -+ -+ /* check, if a user supplied wait function given */ -+ if (this->waitfunc == NULL) -+ this->waitfunc = nand_wait; -+ -+ if (!this->select_chip) -+ this->select_chip = nand_select_chip; -+ if (!this->write_byte) -+ this->write_byte = busw ? nand_write_byte16 : nand_write_byte; -+ if (!this->read_byte) -+ this->read_byte = busw ? nand_read_byte16 : nand_read_byte; -+ if (!this->write_word) -+ this->write_word = nand_write_word; -+ if (!this->read_word) -+ this->read_word = nand_read_word; -+ if (!this->block_bad) -+ this->block_bad = nand_block_bad; -+ if (!this->block_markbad) -+ this->block_markbad = nand_default_block_markbad; -+ if (!this->write_buf) -+ this->write_buf = busw ? nand_write_buf16 : nand_write_buf; -+ if (!this->read_buf) -+ this->read_buf = busw ? nand_read_buf16 : nand_read_buf; -+ if (!this->verify_buf) -+ this->verify_buf = busw ? nand_verify_buf16 : nand_verify_buf; -+ if (!this->scan_bbt) -+ this->scan_bbt = nand_default_bbt; -+ -+ /* Select the device */ -+ this->select_chip(mtd, 0); -+ -+ /* Send the command for reading device ID */ -+ this->cmdfunc (mtd, NAND_CMD_READID, 0x00, -1); -+ -+ /* Read manufacturer and device IDs */ -+ nand_maf_id = this->read_byte(mtd); -+ nand_dev_id = this->read_byte(mtd); -+ -+ /* Print and store flash device information */ -+ for (i = 0; nand_flash_ids[i].name != NULL; i++) { -+ -+ if (nand_dev_id != nand_flash_ids[i].id) -+ continue; -+ -+ if (!mtd->name) mtd->name = nand_flash_ids[i].name; -+ this->chipsize = nand_flash_ids[i].chipsize << 20; -+ -+ /* New devices have all the information in additional id bytes */ -+ if (!nand_flash_ids[i].pagesize) { -+ int extid; -+ /* The 3rd id byte contains non relevant data ATM */ -+ extid = this->read_byte(mtd); -+ /* The 4th id byte is the important one */ -+ extid = this->read_byte(mtd); -+ /* Calc pagesize */ -+ mtd->oobblock = 1024 << (extid & 0x3); -+ extid >>= 2; -+ /* Calc oobsize */ -+ mtd->oobsize = (8 << (extid & 0x03)) * (mtd->oobblock / 512); -+ extid >>= 2; -+ /* Calc blocksize. Blocksize is multiples of 64KiB */ -+ mtd->erasesize = (64 * 1024) << (extid & 0x03); -+ extid >>= 2; -+ /* Get buswidth information */ -+ busw = (extid & 0x01) ? NAND_BUSWIDTH_16 : 0; -+ -+ } else { -+ /* Old devices have this data hardcoded in the -+ * device id table */ -+ mtd->erasesize = nand_flash_ids[i].erasesize; -+ mtd->oobblock = nand_flash_ids[i].pagesize; -+ mtd->oobsize = mtd->oobblock / 32; -+ busw = nand_flash_ids[i].options & NAND_BUSWIDTH_16; -+ } -+ -+ /* Check, if buswidth is correct. Hardware drivers should set -+ * this correct ! */ -+ if (busw != (this->options & NAND_BUSWIDTH_16)) { -+ printk (KERN_INFO "NAND device: Manufacturer ID:" -+ " 0x%02x, Chip ID: 0x%02x (%s %s)\n", nand_maf_id, nand_dev_id, -+ nand_manuf_ids[i].name , mtd->name); -+ printk (KERN_WARNING -+ "NAND bus width %d instead %d bit\n", -+ (this->options & NAND_BUSWIDTH_16) ? 16 : 8, -+ busw ? 16 : 8); -+ this->select_chip(mtd, -1); -+ return 1; -+ } -+ -+ /* Calculate the address shift from the page size */ -+ this->page_shift = ffs(mtd->oobblock) - 1; -+ this->bbt_erase_shift = this->phys_erase_shift = ffs(mtd->erasesize) - 1; -+ this->chip_shift = ffs(this->chipsize) - 1; -+ -+ /* Set the bad block position */ -+ this->badblockpos = mtd->oobblock > 512 ? -+ NAND_LARGE_BADBLOCK_POS : NAND_SMALL_BADBLOCK_POS; -+ -+ /* Get chip options, preserve non chip based options */ -+ this->options &= ~NAND_CHIPOPTIONS_MSK; -+ this->options |= nand_flash_ids[i].options & NAND_CHIPOPTIONS_MSK; -+ /* Set this as a default. Board drivers can override it, if neccecary */ -+ this->options |= NAND_NO_AUTOINCR; -+ /* Check if this is a not a samsung device. Do not clear the options -+ * for chips which are not having an extended id. -+ */ -+ if (nand_maf_id != NAND_MFR_SAMSUNG && !nand_flash_ids[i].pagesize) -+ this->options &= ~NAND_SAMSUNG_LP_OPTIONS; -+ -+ /* Check for AND chips with 4 page planes */ -+ if (this->options & NAND_4PAGE_ARRAY) -+ this->erase_cmd = multi_erase_cmd; -+ else -+ this->erase_cmd = single_erase_cmd; -+ -+ /* Do not replace user supplied command function ! */ -+ if (mtd->oobblock > 512 && this->cmdfunc == nand_command) -+ this->cmdfunc = nand_command_lp; -+ -+ /* Try to identify manufacturer */ -+ for (j = 0; nand_manuf_ids[j].id != 0x0; j++) { -+ if (nand_manuf_ids[j].id == nand_maf_id) -+ break; -+ } -+ printk (KERN_INFO "NAND device: Manufacturer ID:" -+ " 0x%02x, Chip ID: 0x%02x (%s %s)\n", nand_maf_id, nand_dev_id, -+ nand_manuf_ids[j].name , nand_flash_ids[i].name); -+ break; -+ } -+ -+ if (!nand_flash_ids[i].name) { -+ printk (KERN_WARNING "No NAND device found!!!\n"); -+ this->select_chip(mtd, -1); -+ return 1; -+ } -+ -+ for (i=1; i < maxchips; i++) { -+ this->select_chip(mtd, i); -+ -+ /* Send the command for reading device ID */ -+ this->cmdfunc (mtd, NAND_CMD_READID, 0x00, -1); -+ -+ /* Read manufacturer and device IDs */ -+ if (nand_maf_id != this->read_byte(mtd) || -+ nand_dev_id != this->read_byte(mtd)) -+ break; -+ } -+ if (i > 1) -+ printk(KERN_INFO "%d NAND chips detected\n", i); -+ -+ /* Allocate buffers, if neccecary */ -+ if (!this->oob_buf) { -+ size_t len; -+ len = mtd->oobsize << (this->phys_erase_shift - this->page_shift); -+ this->oob_buf = kmalloc (len, GFP_KERNEL); -+ if (!this->oob_buf) { -+ printk (KERN_ERR "nand_scan(): Cannot allocate oob_buf\n"); -+ return -ENOMEM; -+ } -+ this->options |= NAND_OOBBUF_ALLOC; -+ } -+ -+ if (!this->data_buf) { -+ size_t len; -+ len = mtd->oobblock + mtd->oobsize; -+ this->data_buf = kmalloc (len, GFP_KERNEL); -+ if (!this->data_buf) { -+ if (this->options & NAND_OOBBUF_ALLOC) -+ kfree (this->oob_buf); -+ printk (KERN_ERR "nand_scan(): Cannot allocate data_buf\n"); -+ return -ENOMEM; -+ } -+ this->options |= NAND_DATABUF_ALLOC; -+ } -+ -+ /* Store the number of chips and calc total size for mtd */ -+ this->numchips = i; -+ mtd->size = i * this->chipsize; -+ /* Convert chipsize to number of pages per chip -1. */ -+ this->pagemask = (this->chipsize >> this->page_shift) - 1; -+ /* Preset the internal oob buffer */ -+ memset(this->oob_buf, 0xff, mtd->oobsize << (this->phys_erase_shift - this->page_shift)); -+ -+ /* If no default placement scheme is given, select an -+ * appropriate one */ -+ if (!this->autooob) { -+ /* Select the appropriate default oob placement scheme for -+ * placement agnostic filesystems */ -+ switch (mtd->oobsize) { -+ case 8: -+ this->autooob = &nand_oob_8; -+ break; -+ case 16: -+ this->autooob = &nand_oob_16; -+ break; -+ case 64: -+ this->autooob = &nand_oob_64; -+ break; -+ default: -+ printk (KERN_WARNING "No oob scheme defined for oobsize %d\n", -+ mtd->oobsize); -+ BUG(); -+ } -+ } -+ -+ /* The number of bytes available for the filesystem to place fs dependend -+ * oob data */ -+ if (this->options & NAND_BUSWIDTH_16) { -+ mtd->oobavail = mtd->oobsize - (this->autooob->eccbytes + 2); -+ if (this->autooob->eccbytes & 0x01) -+ mtd->oobavail--; -+ } else -+ mtd->oobavail = mtd->oobsize - (this->autooob->eccbytes + 1); -+ -+ /* -+ * check ECC mode, default to software -+ * if 3byte/512byte hardware ECC is selected and we have 256 byte pagesize -+ * fallback to software ECC -+ */ -+ this->eccsize = 256; /* set default eccsize */ -+ -+ switch (this->eccmode) { -+ -+ case NAND_ECC_HW3_512: -+ case NAND_ECC_HW6_512: -+ case NAND_ECC_HW8_512: -+ if (mtd->oobblock == 256) { -+ printk (KERN_WARNING "512 byte HW ECC not possible on 256 Byte pagesize, fallback to SW ECC \n"); -+ this->eccmode = NAND_ECC_SOFT; -+ this->calculate_ecc = nand_calculate_ecc; -+ this->correct_data = nand_correct_data; -+ break; -+ } else -+ this->eccsize = 512; /* set eccsize to 512 and fall through for function check */ -+ -+ case NAND_ECC_HW3_256: -+ if (this->calculate_ecc && this->correct_data && this->enable_hwecc) -+ break; -+ printk (KERN_WARNING "No ECC functions supplied, Hardware ECC not possible\n"); -+ BUG(); -+ -+ case NAND_ECC_NONE: -+ printk (KERN_WARNING "NAND_ECC_NONE selected by board driver. This is not recommended !!\n"); -+ this->eccmode = NAND_ECC_NONE; -+ break; -+ -+ case NAND_ECC_SOFT: -+ this->calculate_ecc = nand_calculate_ecc; -+ this->correct_data = nand_correct_data; -+ break; -+ -+ default: -+ printk (KERN_WARNING "Invalid NAND_ECC_MODE %d\n", this->eccmode); -+ BUG(); -+ } -+ -+ mtd->eccsize = this->eccsize; -+ -+ /* Set the number of read / write steps for one page to ensure ECC generation */ -+ switch (this->eccmode) { -+ case NAND_ECC_HW3_512: -+ case NAND_ECC_HW6_512: -+ case NAND_ECC_HW8_512: -+ this->eccsteps = mtd->oobblock / 512; -+ break; -+ case NAND_ECC_HW3_256: -+ case NAND_ECC_SOFT: -+ this->eccsteps = mtd->oobblock / 256; -+ break; -+ -+ case NAND_ECC_NONE: -+ this->eccsteps = 1; -+ break; -+ } -+ -+ /* Initialize state, waitqueue and spinlock */ -+ this->state = FL_READY; -+ init_waitqueue_head (&this->wq); -+ spin_lock_init (&this->chip_lock); -+ -+ /* De-select the device */ -+ this->select_chip(mtd, -1); -+ -+ /* Invalidate the pagebuffer reference */ -+ this->pagebuf = -1; -+ -+ /* Fill in remaining MTD driver data */ -+ mtd->type = MTD_NANDFLASH; -+ mtd->flags = MTD_CAP_NANDFLASH | MTD_ECC; -+ mtd->ecctype = MTD_ECC_SW; -+ mtd->erase = nand_erase; -+ mtd->point = NULL; -+ mtd->unpoint = NULL; -+ mtd->read = nand_read; -+ mtd->write = nand_write; -+ mtd->read_ecc = nand_read_ecc; -+ mtd->write_ecc = nand_write_ecc; -+ mtd->read_oob = nand_read_oob; -+ mtd->write_oob = nand_write_oob; -+ mtd->readv = NULL; -+ mtd->writev = nand_writev; -+ mtd->writev_ecc = nand_writev_ecc; -+ mtd->sync = nand_sync; -+ mtd->lock = NULL; -+ mtd->unlock = NULL; -+ mtd->suspend = NULL; -+ mtd->resume = NULL; -+ mtd->block_isbad = nand_block_isbad; -+ mtd->block_markbad = nand_block_markbad; -+ -+ /* and make the autooob the default one */ -+ memcpy(&mtd->oobinfo, this->autooob, sizeof(mtd->oobinfo)); -+ -+ mtd->owner = THIS_MODULE; -+ -+ /* Build bad block table */ -+ return this->scan_bbt (mtd); -+} -+ -+/** -+ * nand_release - [NAND Interface] Free resources held by the NAND device -+ * @mtd: MTD device structure -+*/ -+void nand_release (struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ -+#ifdef CONFIG_MTD_PARTITIONS -+ /* Deregister partitions */ -+ del_mtd_partitions (mtd); -+#endif -+ /* Deregister the device */ -+ del_mtd_device (mtd); -+ -+ /* Free bad block table memory, if allocated */ -+ if (this->bbt) -+ kfree (this->bbt); -+ /* Buffer allocated by nand_scan ? */ -+ if (this->options & NAND_OOBBUF_ALLOC) -+ kfree (this->oob_buf); -+ /* Buffer allocated by nand_scan ? */ -+ if (this->options & NAND_DATABUF_ALLOC) -+ kfree (this->data_buf); -+} -+ -+EXPORT_SYMBOL (nand_scan); -+EXPORT_SYMBOL (nand_release); -+ -+MODULE_LICENSE ("GPL"); -+MODULE_AUTHOR ("Steven J. Hill <sjhill@realitydiluted.com>, Thomas Gleixner <tglx@linutronix.de>"); -+MODULE_DESCRIPTION ("Generic NAND flash driver code"); -Index: linux-2.6.5/drivers/mtd/nand/nand_bbt.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/nand_bbt.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/nand_bbt.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,1053 @@ -+/* -+ * drivers/mtd/nand_bbt.c -+ * -+ * Overview: -+ * Bad block table support for the NAND driver -+ * -+ * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de) -+ * -+ * $Id: nand_bbt.c,v 1.24 2004/06/28 08:25:35 gleixner Exp $ -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * Description: -+ * -+ * When nand_scan_bbt is called, then it tries to find the bad block table -+ * depending on the options in the bbt descriptor(s). If a bbt is found -+ * then the contents are read and the memory based bbt is created. If a -+ * mirrored bbt is selected then the mirror is searched too and the -+ * versions are compared. If the mirror has a greater version number -+ * than the mirror bbt is used to build the memory based bbt. -+ * If the tables are not versioned, then we "or" the bad block information. -+ * If one of the bbt's is out of date or does not exist it is (re)created. -+ * If no bbt exists at all then the device is scanned for factory marked -+ * good / bad blocks and the bad block tables are created. -+ * -+ * For manufacturer created bbts like the one found on M-SYS DOC devices -+ * the bbt is searched and read but never created -+ * -+ * The autogenerated bad block table is located in the last good blocks -+ * of the device. The table is mirrored, so it can be updated eventually. -+ * The table is marked in the oob area with an ident pattern and a version -+ * number which indicates which of both tables is more up to date. -+ * -+ * The table uses 2 bits per block -+ * 11b: block is good -+ * 00b: block is factory marked bad -+ * 01b, 10b: block is marked bad due to wear -+ * -+ * The memory bad block table uses the following scheme: -+ * 00b: block is good -+ * 01b: block is marked bad due to wear -+ * 10b: block is reserved (to protect the bbt area) -+ * 11b: block is factory marked bad -+ * -+ * Multichip devices like DOC store the bad block info per floor. -+ * -+ * Following assumptions are made: -+ * - bbts start at a page boundary, if autolocated on a block boundary -+ * - the space neccecary for a bbt in FLASH does not exceed a block boundary -+ * -+ */ -+ -+#include <linux/slab.h> -+#include <linux/types.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/nand.h> -+#include <linux/mtd/nand_ecc.h> -+#include <linux/mtd/compatmac.h> -+#include <linux/bitops.h> -+#include <linux/delay.h> -+ -+ -+/** -+ * check_pattern - [GENERIC] check if a pattern is in the buffer -+ * @buf: the buffer to search -+ * @len: the length of buffer to search -+ * @paglen: the pagelength -+ * @td: search pattern descriptor -+ * -+ * Check for a pattern at the given place. Used to search bad block -+ * tables and good / bad block identifiers. -+ * If the SCAN_EMPTY option is set then check, if all bytes except the -+ * pattern area contain 0xff -+ * -+*/ -+static int check_pattern (uint8_t *buf, int len, int paglen, struct nand_bbt_descr *td) -+{ -+ int i, end; -+ uint8_t *p = buf; -+ -+ end = paglen + td->offs; -+ if (td->options & NAND_BBT_SCANEMPTY) { -+ for (i = 0; i < end; i++) { -+ if (p[i] != 0xff) -+ return -1; -+ } -+ } -+ p += end; -+ -+ /* Compare the pattern */ -+ for (i = 0; i < td->len; i++) { -+ if (p[i] != td->pattern[i]) -+ return -1; -+ } -+ -+ p += td->len; -+ end += td->len; -+ if (td->options & NAND_BBT_SCANEMPTY) { -+ for (i = end; i < len; i++) { -+ if (*p++ != 0xff) -+ return -1; -+ } -+ } -+ return 0; -+} -+ -+/** -+ * read_bbt - [GENERIC] Read the bad block table starting from page -+ * @mtd: MTD device structure -+ * @buf: temporary buffer -+ * @page: the starting page -+ * @num: the number of bbt descriptors to read -+ * @bits: number of bits per block -+ * @offs: offset in the memory table -+ * -+ * Read the bad block table starting from page. -+ * -+ */ -+static int read_bbt (struct mtd_info *mtd, uint8_t *buf, int page, int num, -+ int bits, int offs, int reserved_block_code) -+{ -+ int res, i, j, act = 0; -+ struct nand_chip *this = mtd->priv; -+ size_t retlen, len, totlen; -+ loff_t from; -+ uint8_t msk = (uint8_t) ((1 << bits) - 1); -+ -+ totlen = (num * bits) >> 3; -+ from = ((loff_t)page) << this->page_shift; -+ -+ while (totlen) { -+ len = min (totlen, (size_t) (1 << this->bbt_erase_shift)); -+ res = mtd->read_ecc (mtd, from, len, &retlen, buf, NULL, this->autooob); -+ if (res < 0) { -+ if (retlen != len) { -+ printk (KERN_INFO "nand_bbt: Error reading bad block table\n"); -+ return res; -+ } -+ printk (KERN_WARNING "nand_bbt: ECC error while reading bad block table\n"); -+ } -+ -+ /* Analyse data */ -+ for (i = 0; i < len; i++) { -+ uint8_t dat = buf[i]; -+ for (j = 0; j < 8; j += bits, act += 2) { -+ uint8_t tmp = (dat >> j) & msk; -+ if (tmp == msk) -+ continue; -+ if (reserved_block_code && -+ (tmp == reserved_block_code)) { -+ printk (KERN_DEBUG "nand_read_bbt: Reserved block at 0x%08x\n", -+ ((offs << 2) + (act >> 1)) << this->bbt_erase_shift); -+ this->bbt[offs + (act >> 3)] |= 0x2 << (act & 0x06); -+ continue; -+ } -+ /* Leave it for now, if its matured we can move this -+ * message to MTD_DEBUG_LEVEL0 */ -+ printk (KERN_DEBUG "nand_read_bbt: Bad block at 0x%08x\n", -+ ((offs << 2) + (act >> 1)) << this->bbt_erase_shift); -+ /* Factory marked bad or worn out ? */ -+ if (tmp == 0) -+ this->bbt[offs + (act >> 3)] |= 0x3 << (act & 0x06); -+ else -+ this->bbt[offs + (act >> 3)] |= 0x1 << (act & 0x06); -+ } -+ } -+ totlen -= len; -+ from += len; -+ } -+ return 0; -+} -+ -+/** -+ * read_abs_bbt - [GENERIC] Read the bad block table starting at a given page -+ * @mtd: MTD device structure -+ * @buf: temporary buffer -+ * @td: descriptor for the bad block table -+ * @chip: read the table for a specific chip, -1 read all chips. -+ * Applies only if NAND_BBT_PERCHIP option is set -+ * -+ * Read the bad block table for all chips starting at a given page -+ * We assume that the bbt bits are in consecutive order. -+*/ -+static int read_abs_bbt (struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr *td, int chip) -+{ -+ struct nand_chip *this = mtd->priv; -+ int res = 0, i; -+ int bits; -+ -+ bits = td->options & NAND_BBT_NRBITS_MSK; -+ if (td->options & NAND_BBT_PERCHIP) { -+ int offs = 0; -+ for (i = 0; i < this->numchips; i++) { -+ if (chip == -1 || chip == i) -+ res = read_bbt (mtd, buf, td->pages[i], this->chipsize >> this->bbt_erase_shift, bits, offs, td->reserved_block_code); -+ if (res) -+ return res; -+ offs += this->chipsize >> (this->bbt_erase_shift + 2); -+ } -+ } else { -+ res = read_bbt (mtd, buf, td->pages[0], mtd->size >> this->bbt_erase_shift, bits, 0, td->reserved_block_code); -+ if (res) -+ return res; -+ } -+ return 0; -+} -+ -+/** -+ * read_abs_bbts - [GENERIC] Read the bad block table(s) for all chips starting at a given page -+ * @mtd: MTD device structure -+ * @buf: temporary buffer -+ * @td: descriptor for the bad block table -+ * @md: descriptor for the bad block table mirror -+ * -+ * Read the bad block table(s) for all chips starting at a given page -+ * We assume that the bbt bits are in consecutive order. -+ * -+*/ -+static int read_abs_bbts (struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr *td, -+ struct nand_bbt_descr *md) -+{ -+ struct nand_chip *this = mtd->priv; -+ -+ /* Read the primary version, if available */ -+ if (td->options & NAND_BBT_VERSION) { -+ nand_read_raw (mtd, buf, td->pages[0] << this->page_shift, mtd->oobblock, mtd->oobsize); -+ td->version[0] = buf[mtd->oobblock + td->veroffs]; -+ printk (KERN_DEBUG "Bad block table at page %d, version 0x%02X\n", td->pages[0], td->version[0]); -+ } -+ -+ /* Read the mirror version, if available */ -+ if (md && (md->options & NAND_BBT_VERSION)) { -+ nand_read_raw (mtd, buf, md->pages[0] << this->page_shift, mtd->oobblock, mtd->oobsize); -+ md->version[0] = buf[mtd->oobblock + md->veroffs]; -+ printk (KERN_DEBUG "Bad block table at page %d, version 0x%02X\n", md->pages[0], md->version[0]); -+ } -+ -+ return 1; -+} -+ -+/** -+ * create_bbt - [GENERIC] Create a bad block table by scanning the device -+ * @mtd: MTD device structure -+ * @buf: temporary buffer -+ * @bd: descriptor for the good/bad block search pattern -+ * @chip: create the table for a specific chip, -1 read all chips. -+ * Applies only if NAND_BBT_PERCHIP option is set -+ * -+ * Create a bad block table by scanning the device -+ * for the given good/bad block identify pattern -+ */ -+static void create_bbt (struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr *bd, int chip) -+{ -+ struct nand_chip *this = mtd->priv; -+ int i, j, numblocks, len, scanlen; -+ int startblock; -+ loff_t from; -+ size_t readlen, ooblen; -+ -+ printk (KERN_INFO "Scanning device for bad blocks\n"); -+ -+ if (bd->options & NAND_BBT_SCANALLPAGES) -+ len = 1 << (this->bbt_erase_shift - this->page_shift); -+ else { -+ if (bd->options & NAND_BBT_SCAN2NDPAGE) -+ len = 2; -+ else -+ len = 1; -+ } -+ scanlen = mtd->oobblock + mtd->oobsize; -+ readlen = len * mtd->oobblock; -+ ooblen = len * mtd->oobsize; -+ -+ if (chip == -1) { -+ /* Note that numblocks is 2 * (real numblocks) here, see i+=2 below as it -+ * makes shifting and masking less painful */ -+ numblocks = mtd->size >> (this->bbt_erase_shift - 1); -+ startblock = 0; -+ from = 0; -+ } else { -+ if (chip >= this->numchips) { -+ printk (KERN_WARNING "create_bbt(): chipnr (%d) > available chips (%d)\n", -+ chip + 1, this->numchips); -+ return; -+ } -+ numblocks = this->chipsize >> (this->bbt_erase_shift - 1); -+ startblock = chip * numblocks; -+ numblocks += startblock; -+ from = startblock << (this->bbt_erase_shift - 1); -+ } -+ -+ for (i = startblock; i < numblocks;) { -+ nand_read_raw (mtd, buf, from, readlen, ooblen); -+ for (j = 0; j < len; j++) { -+ if (check_pattern (&buf[j * scanlen], scanlen, mtd->oobblock, bd)) { -+ this->bbt[i >> 3] |= 0x03 << (i & 0x6); -+ printk (KERN_WARNING "Bad eraseblock %d at 0x%08x\n", -+ i >> 1, (unsigned int) from); -+ break; -+ } -+ } -+ i += 2; -+ from += (1 << this->bbt_erase_shift); -+ } -+} -+ -+/** -+ * search_bbt - [GENERIC] scan the device for a specific bad block table -+ * @mtd: MTD device structure -+ * @buf: temporary buffer -+ * @td: descriptor for the bad block table -+ * -+ * Read the bad block table by searching for a given ident pattern. -+ * Search is preformed either from the beginning up or from the end of -+ * the device downwards. The search starts always at the start of a -+ * block. -+ * If the option NAND_BBT_PERCHIP is given, each chip is searched -+ * for a bbt, which contains the bad block information of this chip. -+ * This is neccecary to provide support for certain DOC devices. -+ * -+ * The bbt ident pattern resides in the oob area of the first page -+ * in a block. -+ */ -+static int search_bbt (struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr *td) -+{ -+ struct nand_chip *this = mtd->priv; -+ int i, chips; -+ int bits, startblock, block, dir; -+ int scanlen = mtd->oobblock + mtd->oobsize; -+ int bbtblocks; -+ -+ /* Search direction top -> down ? */ -+ if (td->options & NAND_BBT_LASTBLOCK) { -+ startblock = (mtd->size >> this->bbt_erase_shift) -1; -+ dir = -1; -+ } else { -+ startblock = 0; -+ dir = 1; -+ } -+ -+ /* Do we have a bbt per chip ? */ -+ if (td->options & NAND_BBT_PERCHIP) { -+ chips = this->numchips; -+ bbtblocks = this->chipsize >> this->bbt_erase_shift; -+ startblock &= bbtblocks - 1; -+ } else { -+ chips = 1; -+ bbtblocks = mtd->size >> this->bbt_erase_shift; -+ } -+ -+ /* Number of bits for each erase block in the bbt */ -+ bits = td->options & NAND_BBT_NRBITS_MSK; -+ -+ for (i = 0; i < chips; i++) { -+ /* Reset version information */ -+ td->version[i] = 0; -+ td->pages[i] = -1; -+ /* Scan the maximum number of blocks */ -+ for (block = 0; block < td->maxblocks; block++) { -+ int actblock = startblock + dir * block; -+ /* Read first page */ -+ nand_read_raw (mtd, buf, actblock << this->bbt_erase_shift, mtd->oobblock, mtd->oobsize); -+ if (!check_pattern(buf, scanlen, mtd->oobblock, td)) { -+ td->pages[i] = actblock << (this->bbt_erase_shift - this->page_shift); -+ if (td->options & NAND_BBT_VERSION) { -+ td->version[i] = buf[mtd->oobblock + td->veroffs]; -+ } -+ break; -+ } -+ } -+ startblock += this->chipsize >> this->bbt_erase_shift; -+ } -+ /* Check, if we found a bbt for each requested chip */ -+ for (i = 0; i < chips; i++) { -+ if (td->pages[i] == -1) -+ printk (KERN_WARNING "Bad block table not found for chip %d\n", i); -+ else -+ printk (KERN_DEBUG "Bad block table found at page %d, version 0x%02X\n", td->pages[i], td->version[i]); -+ } -+ return 0; -+} -+ -+/** -+ * search_read_bbts - [GENERIC] scan the device for bad block table(s) -+ * @mtd: MTD device structure -+ * @buf: temporary buffer -+ * @td: descriptor for the bad block table -+ * @md: descriptor for the bad block table mirror -+ * -+ * Search and read the bad block table(s) -+*/ -+static int search_read_bbts (struct mtd_info *mtd, uint8_t *buf, -+ struct nand_bbt_descr *td, struct nand_bbt_descr *md) -+{ -+ /* Search the primary table */ -+ search_bbt (mtd, buf, td); -+ -+ /* Search the mirror table */ -+ if (md) -+ search_bbt (mtd, buf, md); -+ -+ /* Force result check */ -+ return 1; -+} -+ -+ -+/** -+ * write_bbt - [GENERIC] (Re)write the bad block table -+ * -+ * @mtd: MTD device structure -+ * @buf: temporary buffer -+ * @td: descriptor for the bad block table -+ * @md: descriptor for the bad block table mirror -+ * @chipsel: selector for a specific chip, -1 for all -+ * -+ * (Re)write the bad block table -+ * -+*/ -+static int write_bbt (struct mtd_info *mtd, uint8_t *buf, -+ struct nand_bbt_descr *td, struct nand_bbt_descr *md, int chipsel) -+{ -+ struct nand_chip *this = mtd->priv; -+ struct nand_oobinfo oobinfo; -+ struct erase_info einfo; -+ int i, j, res, chip = 0; -+ int bits, startblock, dir, page, offs, numblocks, sft, sftmsk; -+ int nrchips, bbtoffs, pageoffs; -+ uint8_t msk[4]; -+ uint8_t rcode = td->reserved_block_code; -+ size_t retlen, len = 0; -+ loff_t to; -+ -+ if (!rcode) -+ rcode = 0xff; -+ /* Write bad block table per chip rather than per device ? */ -+ if (td->options & NAND_BBT_PERCHIP) { -+ numblocks = (int) (this->chipsize >> this->bbt_erase_shift); -+ /* Full device write or specific chip ? */ -+ if (chipsel == -1) { -+ nrchips = this->numchips; -+ } else { -+ nrchips = chipsel + 1; -+ chip = chipsel; -+ } -+ } else { -+ numblocks = (int) (mtd->size >> this->bbt_erase_shift); -+ nrchips = 1; -+ } -+ -+ /* Loop through the chips */ -+ for (; chip < nrchips; chip++) { -+ -+ /* There was already a version of the table, reuse the page -+ * This applies for absolute placement too, as we have the -+ * page nr. in td->pages. -+ */ -+ if (td->pages[chip] != -1) { -+ page = td->pages[chip]; -+ goto write; -+ } -+ -+ /* Automatic placement of the bad block table */ -+ /* Search direction top -> down ? */ -+ if (td->options & NAND_BBT_LASTBLOCK) { -+ startblock = numblocks * (chip + 1) - 1; -+ dir = -1; -+ } else { -+ startblock = chip * numblocks; -+ dir = 1; -+ } -+ -+ for (i = 0; i < td->maxblocks; i++) { -+ int block = startblock + dir * i; -+ /* Check, if the block is bad */ -+ switch ((this->bbt[block >> 2] >> (2 * (block & 0x03))) & 0x03) { -+ case 0x01: -+ case 0x03: -+ continue; -+ } -+ page = block << (this->bbt_erase_shift - this->page_shift); -+ /* Check, if the block is used by the mirror table */ -+ if (!md || md->pages[chip] != page) -+ goto write; -+ } -+ printk (KERN_ERR "No space left to write bad block table\n"); -+ return -ENOSPC; -+write: -+ -+ /* Set up shift count and masks for the flash table */ -+ bits = td->options & NAND_BBT_NRBITS_MSK; -+ switch (bits) { -+ case 1: sft = 3; sftmsk = 0x07; msk[0] = 0x00; msk[1] = 0x01; msk[2] = ~rcode; msk[3] = 0x01; break; -+ case 2: sft = 2; sftmsk = 0x06; msk[0] = 0x00; msk[1] = 0x01; msk[2] = ~rcode; msk[3] = 0x03; break; -+ case 4: sft = 1; sftmsk = 0x04; msk[0] = 0x00; msk[1] = 0x0C; msk[2] = ~rcode; msk[3] = 0x0f; break; -+ case 8: sft = 0; sftmsk = 0x00; msk[0] = 0x00; msk[1] = 0x0F; msk[2] = ~rcode; msk[3] = 0xff; break; -+ default: return -EINVAL; -+ } -+ -+ bbtoffs = chip * (numblocks >> 2); -+ -+ to = ((loff_t) page) << this->page_shift; -+ -+ memcpy (&oobinfo, this->autooob, sizeof(oobinfo)); -+ oobinfo.useecc = MTD_NANDECC_PLACEONLY; -+ -+ /* Must we save the block contents ? */ -+ if (td->options & NAND_BBT_SAVECONTENT) { -+ /* Make it block aligned */ -+ to &= ~((loff_t) ((1 << this->bbt_erase_shift) - 1)); -+ len = 1 << this->bbt_erase_shift; -+ res = mtd->read_ecc (mtd, to, len, &retlen, buf, &buf[len], &oobinfo); -+ if (res < 0) { -+ if (retlen != len) { -+ printk (KERN_INFO "nand_bbt: Error reading block for writing the bad block table\n"); -+ return res; -+ } -+ printk (KERN_WARNING "nand_bbt: ECC error while reading block for writing bad block table\n"); -+ } -+ /* Calc the byte offset in the buffer */ -+ pageoffs = page - (int)(to >> this->page_shift); -+ offs = pageoffs << this->page_shift; -+ /* Preset the bbt area with 0xff */ -+ memset (&buf[offs], 0xff, (size_t)(numblocks >> sft)); -+ /* Preset the bbt's oob area with 0xff */ -+ memset (&buf[len + pageoffs * mtd->oobsize], 0xff, -+ ((len >> this->page_shift) - pageoffs) * mtd->oobsize); -+ if (td->options & NAND_BBT_VERSION) { -+ buf[len + (pageoffs * mtd->oobsize) + td->veroffs] = td->version[chip]; -+ } -+ } else { -+ /* Calc length */ -+ len = (size_t) (numblocks >> sft); -+ /* Make it page aligned ! */ -+ len = (len + (mtd->oobblock-1)) & ~(mtd->oobblock-1); -+ /* Preset the buffer with 0xff */ -+ memset (buf, 0xff, len + (len >> this->page_shift) * mtd->oobsize); -+ offs = 0; -+ /* Pattern is located in oob area of first page */ -+ memcpy (&buf[len + td->offs], td->pattern, td->len); -+ if (td->options & NAND_BBT_VERSION) { -+ buf[len + td->veroffs] = td->version[chip]; -+ } -+ } -+ -+ /* walk through the memory table */ -+ for (i = 0; i < numblocks; ) { -+ uint8_t dat; -+ dat = this->bbt[bbtoffs + (i >> 2)]; -+ for (j = 0; j < 4; j++ , i++) { -+ int sftcnt = (i << (3 - sft)) & sftmsk; -+ /* Do not store the reserved bbt blocks ! */ -+ buf[offs + (i >> sft)] &= ~(msk[dat & 0x03] << sftcnt); -+ dat >>= 2; -+ } -+ } -+ -+ memset (&einfo, 0, sizeof (einfo)); -+ einfo.mtd = mtd; -+ einfo.addr = (unsigned long) to; -+ einfo.len = 1 << this->bbt_erase_shift; -+ res = nand_erase_nand (mtd, &einfo, 1); -+ if (res < 0) { -+ printk (KERN_WARNING "nand_bbt: Error during block erase: %d\n", res); -+ return res; -+ } -+ -+ res = mtd->write_ecc (mtd, to, len, &retlen, buf, &buf[len], &oobinfo); -+ if (res < 0) { -+ printk (KERN_WARNING "nand_bbt: Error while writing bad block table %d\n", res); -+ return res; -+ } -+ printk (KERN_DEBUG "Bad block table written to 0x%08x, version 0x%02X\n", -+ (unsigned int) to, td->version[chip]); -+ -+ /* Mark it as used */ -+ td->pages[chip] = page; -+ } -+ return 0; -+} -+ -+/** -+ * nand_memory_bbt - [GENERIC] create a memory based bad block table -+ * @mtd: MTD device structure -+ * @bd: descriptor for the good/bad block search pattern -+ * -+ * The function creates a memory based bbt by scanning the device -+ * for manufacturer / software marked good / bad blocks -+*/ -+static int nand_memory_bbt (struct mtd_info *mtd, struct nand_bbt_descr *bd) -+{ -+ struct nand_chip *this = mtd->priv; -+ -+ /* Ensure that we only scan for the pattern and nothing else */ -+ bd->options = 0; -+ create_bbt (mtd, this->data_buf, bd, -1); -+ return 0; -+} -+ -+/** -+ * check_create - [GENERIC] create and write bbt(s) if neccecary -+ * @mtd: MTD device structure -+ * @buf: temporary buffer -+ * @bd: descriptor for the good/bad block search pattern -+ * -+ * The function checks the results of the previous call to read_bbt -+ * and creates / updates the bbt(s) if neccecary -+ * Creation is neccecary if no bbt was found for the chip/device -+ * Update is neccecary if one of the tables is missing or the -+ * version nr. of one table is less than the other -+*/ -+static int check_create (struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr *bd) -+{ -+ int i, chips, writeops, chipsel, res; -+ struct nand_chip *this = mtd->priv; -+ struct nand_bbt_descr *td = this->bbt_td; -+ struct nand_bbt_descr *md = this->bbt_md; -+ struct nand_bbt_descr *rd, *rd2; -+ -+ /* Do we have a bbt per chip ? */ -+ if (td->options & NAND_BBT_PERCHIP) -+ chips = this->numchips; -+ else -+ chips = 1; -+ -+ for (i = 0; i < chips; i++) { -+ writeops = 0; -+ rd = NULL; -+ rd2 = NULL; -+ /* Per chip or per device ? */ -+ chipsel = (td->options & NAND_BBT_PERCHIP) ? i : -1; -+ /* Mirrored table avilable ? */ -+ if (md) { -+ if (td->pages[i] == -1 && md->pages[i] == -1) { -+ writeops = 0x03; -+ goto create; -+ } -+ -+ if (td->pages[i] == -1) { -+ rd = md; -+ td->version[i] = md->version[i]; -+ writeops = 1; -+ goto writecheck; -+ } -+ -+ if (md->pages[i] == -1) { -+ rd = td; -+ md->version[i] = td->version[i]; -+ writeops = 2; -+ goto writecheck; -+ } -+ -+ if (td->version[i] == md->version[i]) { -+ rd = td; -+ if (!(td->options & NAND_BBT_VERSION)) -+ rd2 = md; -+ goto writecheck; -+ } -+ -+ if (((int8_t) (td->version[i] - md->version[i])) > 0) { -+ rd = td; -+ md->version[i] = td->version[i]; -+ writeops = 2; -+ } else { -+ rd = md; -+ td->version[i] = md->version[i]; -+ writeops = 1; -+ } -+ -+ goto writecheck; -+ -+ } else { -+ if (td->pages[i] == -1) { -+ writeops = 0x01; -+ goto create; -+ } -+ rd = td; -+ goto writecheck; -+ } -+create: -+ /* Create the bad block table by scanning the device ? */ -+ if (!(td->options & NAND_BBT_CREATE)) -+ continue; -+ -+ /* Create the table in memory by scanning the chip(s) */ -+ create_bbt (mtd, buf, bd, chipsel); -+ -+ td->version[i] = 1; -+ if (md) -+ md->version[i] = 1; -+writecheck: -+ /* read back first ? */ -+ if (rd) -+ read_abs_bbt (mtd, buf, rd, chipsel); -+ /* If they weren't versioned, read both. */ -+ if (rd2) -+ read_abs_bbt (mtd, buf, rd2, chipsel); -+ -+ /* Write the bad block table to the device ? */ -+ if ((writeops & 0x01) && (td->options & NAND_BBT_WRITE)) { -+ res = write_bbt (mtd, buf, td, md, chipsel); -+ if (res < 0) -+ return res; -+ } -+ -+ /* Write the mirror bad block table to the device ? */ -+ if ((writeops & 0x02) && md && (md->options & NAND_BBT_WRITE)) { -+ res = write_bbt (mtd, buf, md, td, chipsel); -+ if (res < 0) -+ return res; -+ } -+ } -+ return 0; -+} -+ -+/** -+ * mark_bbt_regions - [GENERIC] mark the bad block table regions -+ * @mtd: MTD device structure -+ * @td: bad block table descriptor -+ * -+ * The bad block table regions are marked as "bad" to prevent -+ * accidental erasures / writes. The regions are identified by -+ * the mark 0x02. -+*/ -+static void mark_bbt_region (struct mtd_info *mtd, struct nand_bbt_descr *td) -+{ -+ struct nand_chip *this = mtd->priv; -+ int i, j, chips, block, nrblocks, update; -+ uint8_t oldval, newval; -+ -+ /* Do we have a bbt per chip ? */ -+ if (td->options & NAND_BBT_PERCHIP) { -+ chips = this->numchips; -+ nrblocks = (int)(this->chipsize >> this->bbt_erase_shift); -+ } else { -+ chips = 1; -+ nrblocks = (int)(mtd->size >> this->bbt_erase_shift); -+ } -+ -+ for (i = 0; i < chips; i++) { -+ if ((td->options & NAND_BBT_ABSPAGE) || -+ !(td->options & NAND_BBT_WRITE)) { -+ if (td->pages[i] == -1) continue; -+ block = td->pages[i] >> (this->bbt_erase_shift - this->page_shift); -+ block <<= 1; -+ oldval = this->bbt[(block >> 3)]; -+ newval = oldval | (0x2 << (block & 0x06)); -+ this->bbt[(block >> 3)] = newval; -+ if ((oldval != newval) && td->reserved_block_code) -+ nand_update_bbt(mtd, block << (this->bbt_erase_shift - 1)); -+ continue; -+ } -+ update = 0; -+ if (td->options & NAND_BBT_LASTBLOCK) -+ block = ((i + 1) * nrblocks) - td->maxblocks; -+ else -+ block = i * nrblocks; -+ block <<= 1; -+ for (j = 0; j < td->maxblocks; j++) { -+ oldval = this->bbt[(block >> 3)]; -+ newval = oldval | (0x2 << (block & 0x06)); -+ this->bbt[(block >> 3)] = newval; -+ if (oldval != newval) update = 1; -+ block += 2; -+ } -+ /* If we want reserved blocks to be recorded to flash, and some -+ new ones have been marked, then we need to update the stored -+ bbts. This should only happen once. */ -+ if (update && td->reserved_block_code) -+ nand_update_bbt(mtd, (block - 2) << (this->bbt_erase_shift - 1)); -+ } -+} -+ -+/** -+ * nand_scan_bbt - [NAND Interface] scan, find, read and maybe create bad block table(s) -+ * @mtd: MTD device structure -+ * @bd: descriptor for the good/bad block search pattern -+ * -+ * The function checks, if a bad block table(s) is/are already -+ * available. If not it scans the device for manufacturer -+ * marked good / bad blocks and writes the bad block table(s) to -+ * the selected place. -+ * -+ * The bad block table memory is allocated here. It must be freed -+ * by calling the nand_free_bbt function. -+ * -+*/ -+int nand_scan_bbt (struct mtd_info *mtd, struct nand_bbt_descr *bd) -+{ -+ struct nand_chip *this = mtd->priv; -+ int len, res = 0; -+ uint8_t *buf; -+ struct nand_bbt_descr *td = this->bbt_td; -+ struct nand_bbt_descr *md = this->bbt_md; -+ -+ len = mtd->size >> (this->bbt_erase_shift + 2); -+ /* Allocate memory (2bit per block) */ -+ this->bbt = (uint8_t *) kmalloc (len, GFP_KERNEL); -+ if (!this->bbt) { -+ printk (KERN_ERR "nand_scan_bbt: Out of memory\n"); -+ return -ENOMEM; -+ } -+ /* Clear the memory bad block table */ -+ memset (this->bbt, 0x00, len); -+ -+ /* If no primary table decriptor is given, scan the device -+ * to build a memory based bad block table -+ */ -+ if (!td) -+ return nand_memory_bbt(mtd, bd); -+ -+ /* Allocate a temporary buffer for one eraseblock incl. oob */ -+ len = (1 << this->bbt_erase_shift); -+ len += (len >> this->page_shift) * mtd->oobsize; -+ buf = kmalloc (len, GFP_KERNEL); -+ if (!buf) { -+ printk (KERN_ERR "nand_bbt: Out of memory\n"); -+ kfree (this->bbt); -+ this->bbt = NULL; -+ return -ENOMEM; -+ } -+ -+ /* Is the bbt at a given page ? */ -+ if (td->options & NAND_BBT_ABSPAGE) { -+ res = read_abs_bbts (mtd, buf, td, md); -+ } else { -+ /* Search the bad block table using a pattern in oob */ -+ res = search_read_bbts (mtd, buf, td, md); -+ } -+ -+ if (res) -+ res = check_create (mtd, buf, bd); -+ -+ /* Prevent the bbt regions from erasing / writing */ -+ mark_bbt_region (mtd, td); -+ if (md) -+ mark_bbt_region (mtd, md); -+ -+ kfree (buf); -+ return res; -+} -+ -+ -+/** -+ * nand_update_bbt - [NAND Interface] update bad block table(s) -+ * @mtd: MTD device structure -+ * @offs: the offset of the newly marked block -+ * -+ * The function updates the bad block table(s) -+*/ -+int nand_update_bbt (struct mtd_info *mtd, loff_t offs) -+{ -+ struct nand_chip *this = mtd->priv; -+ int len, res = 0, writeops = 0; -+ int chip, chipsel; -+ uint8_t *buf; -+ struct nand_bbt_descr *td = this->bbt_td; -+ struct nand_bbt_descr *md = this->bbt_md; -+ -+ if (!this->bbt || !td) -+ return -EINVAL; -+ -+ len = mtd->size >> (this->bbt_erase_shift + 2); -+ /* Allocate a temporary buffer for one eraseblock incl. oob */ -+ len = (1 << this->bbt_erase_shift); -+ len += (len >> this->page_shift) * mtd->oobsize; -+ buf = kmalloc (len, GFP_KERNEL); -+ if (!buf) { -+ printk (KERN_ERR "nand_update_bbt: Out of memory\n"); -+ return -ENOMEM; -+ } -+ -+ writeops = md != NULL ? 0x03 : 0x01; -+ -+ /* Do we have a bbt per chip ? */ -+ if (td->options & NAND_BBT_PERCHIP) { -+ chip = (int) (offs >> this->chip_shift); -+ chipsel = chip; -+ } else { -+ chip = 0; -+ chipsel = -1; -+ } -+ -+ td->version[chip]++; -+ if (md) -+ md->version[chip]++; -+ -+ /* Write the bad block table to the device ? */ -+ if ((writeops & 0x01) && (td->options & NAND_BBT_WRITE)) { -+ res = write_bbt (mtd, buf, td, md, chipsel); -+ if (res < 0) -+ goto out; -+ } -+ /* Write the mirror bad block table to the device ? */ -+ if ((writeops & 0x02) && md && (md->options & NAND_BBT_WRITE)) { -+ res = write_bbt (mtd, buf, md, td, chipsel); -+ } -+ -+out: -+ kfree (buf); -+ return res; -+} -+ -+/* Define some generic bad / good block scan pattern which are used -+ * while scanning a device for factory marked good / bad blocks -+ * -+ * The memory based patterns just -+ */ -+static uint8_t scan_ff_pattern[] = { 0xff, 0xff }; -+ -+static struct nand_bbt_descr smallpage_memorybased = { -+ .options = 0, -+ .offs = 5, -+ .len = 1, -+ .pattern = scan_ff_pattern -+}; -+ -+static struct nand_bbt_descr largepage_memorybased = { -+ .options = 0, -+ .offs = 0, -+ .len = 2, -+ .pattern = scan_ff_pattern -+}; -+ -+static struct nand_bbt_descr smallpage_flashbased = { -+ .options = NAND_BBT_SCANEMPTY | NAND_BBT_SCANALLPAGES, -+ .offs = 5, -+ .len = 1, -+ .pattern = scan_ff_pattern -+}; -+ -+static struct nand_bbt_descr largepage_flashbased = { -+ .options = NAND_BBT_SCANEMPTY | NAND_BBT_SCANALLPAGES, -+ .offs = 0, -+ .len = 2, -+ .pattern = scan_ff_pattern -+}; -+ -+static uint8_t scan_agand_pattern[] = { 0x1C, 0x71, 0xC7, 0x1C, 0x71, 0xC7 }; -+ -+static struct nand_bbt_descr agand_flashbased = { -+ .options = NAND_BBT_SCANEMPTY | NAND_BBT_SCANALLPAGES, -+ .offs = 0x20, -+ .len = 6, -+ .pattern = scan_agand_pattern -+}; -+ -+/* Generic flash bbt decriptors -+*/ -+static uint8_t bbt_pattern[] = {'B', 'b', 't', '0' }; -+static uint8_t mirror_pattern[] = {'1', 't', 'b', 'B' }; -+ -+static struct nand_bbt_descr bbt_main_descr = { -+ .options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE -+ | NAND_BBT_2BIT | NAND_BBT_VERSION | NAND_BBT_PERCHIP, -+ .offs = 8, -+ .len = 4, -+ .veroffs = 12, -+ .maxblocks = 4, -+ .pattern = bbt_pattern -+}; -+ -+static struct nand_bbt_descr bbt_mirror_descr = { -+ .options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE -+ | NAND_BBT_2BIT | NAND_BBT_VERSION | NAND_BBT_PERCHIP, -+ .offs = 8, -+ .len = 4, -+ .veroffs = 12, -+ .maxblocks = 4, -+ .pattern = mirror_pattern -+}; -+ -+/** -+ * nand_default_bbt - [NAND Interface] Select a default bad block table for the device -+ * @mtd: MTD device structure -+ * -+ * This function selects the default bad block table -+ * support for the device and calls the nand_scan_bbt function -+ * -+*/ -+int nand_default_bbt (struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ -+ /* Default for AG-AND. We must use a flash based -+ * bad block table as the devices have factory marked -+ * _good_ blocks. Erasing those blocks leads to loss -+ * of the good / bad information, so we _must_ store -+ * this information in a good / bad table during -+ * startup -+ */ -+ if (this->options & NAND_IS_AND) { -+ /* Use the default pattern descriptors */ -+ if (!this->bbt_td) { -+ this->bbt_td = &bbt_main_descr; -+ this->bbt_md = &bbt_mirror_descr; -+ } -+ this->options |= NAND_USE_FLASH_BBT; -+ return nand_scan_bbt (mtd, &agand_flashbased); -+ } -+ -+ /* Is a flash based bad block table requested ? */ -+ if (this->options & NAND_USE_FLASH_BBT) { -+ /* Use the default pattern descriptors */ -+ if (!this->bbt_td) { -+ this->bbt_td = &bbt_main_descr; -+ this->bbt_md = &bbt_mirror_descr; -+ } -+ if (mtd->oobblock > 512) -+ return nand_scan_bbt (mtd, &largepage_flashbased); -+ else -+ return nand_scan_bbt (mtd, &smallpage_flashbased); -+ } else { -+ this->bbt_td = NULL; -+ this->bbt_md = NULL; -+ if (mtd->oobblock > 512) -+ return nand_scan_bbt (mtd, &largepage_memorybased); -+ else -+ return nand_scan_bbt (mtd, &smallpage_memorybased); -+ } -+} -+ -+/** -+ * nand_isbad_bbt - [NAND Interface] Check if a block is bad -+ * @mtd: MTD device structure -+ * @offs: offset in the device -+ * @allowbbt: allow access to bad block table region -+ * -+*/ -+int nand_isbad_bbt (struct mtd_info *mtd, loff_t offs, int allowbbt) -+{ -+ struct nand_chip *this = mtd->priv; -+ int block; -+ uint8_t res; -+ -+ /* Get block number * 2 */ -+ block = (int) (offs >> (this->bbt_erase_shift - 1)); -+ res = (this->bbt[block >> 3] >> (block & 0x06)) & 0x03; -+ -+ DEBUG (MTD_DEBUG_LEVEL2, "nand_isbad_bbt(): bbt info for offs 0x%08x: (block %d) 0x%02x\n", -+ (unsigned int)offs, res, block >> 1); -+ -+ switch ((int)res) { -+ case 0x00: return 0; -+ case 0x01: return 1; -+ case 0x02: return allowbbt ? 0 : 1; -+ } -+ return 1; -+} -+ -+EXPORT_SYMBOL (nand_scan_bbt); -+EXPORT_SYMBOL (nand_default_bbt); -Index: linux-2.6.5/drivers/mtd/nand/nand_ecc.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/nand_ecc.c 2004-04-03 22:36:54.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/nand_ecc.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,22 +1,44 @@ - /* -- * drivers/mtd/nand_ecc.c -+ * This file contains an ECC algorithm from Toshiba that detects and -+ * corrects 1 bit errors in a 256 byte block of data. - * -- * Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com) -- * Toshiba America Electronics Components, Inc. -+ * drivers/mtd/nand/nand_ecc.c - * -- * $Id: nand_ecc.c,v 1.9 2003/02/20 13:34:19 sjhill Exp $ -+ * Copyright (C) 2000-2004 Steven J. Hill (sjhill@realitydiluted.com) -+ * Toshiba America Electronics Components, Inc. - * -- * This program is free software; you can redistribute it and/or -- * modify it under the terms of the GNU Lesser General Public License -- * version 2.1 as published by the Free Software Foundation. -+ * $Id: nand_ecc.c,v 1.14 2004/06/16 15:34:37 gleixner Exp $ - * -- * This file contains an ECC algorithm from Toshiba that detects and -- * corrects 1 bit errors in a 256 byte block of data. -+ * This file is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2 or (at your option) any -+ * later version. -+ * -+ * This file is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * for more details. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this file; if not, write to the Free Software Foundation, Inc., -+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -+ * -+ * As a special exception, if other files instantiate templates or use -+ * macros or inline functions from these files, or you compile these -+ * files and link them with other works to produce a work based on these -+ * files, these files do not by themselves cause the resulting work to be -+ * covered by the GNU General Public License. However the source code for -+ * these files must still be made available in accordance with section (3) -+ * of the GNU General Public License. -+ * -+ * This exception does not invalidate any other reasons why a work based on -+ * this file might be covered by the GNU General Public License. - */ - - #include <linux/types.h> - #include <linux/kernel.h> - #include <linux/module.h> -+#include <linux/mtd/nand_ecc.h> - - /* - * Pre-calculated 256-way 1 byte column parity -@@ -41,7 +63,12 @@ - }; - - --/* -+/** -+ * nand_trans_result - [GENERIC] create non-inverted ECC -+ * @reg2: line parity reg 2 -+ * @reg3: line parity reg 3 -+ * @ecc_code: ecc -+ * - * Creates non-inverted ECC code from line parity - */ - static void nand_trans_result(u_char reg2, u_char reg3, -@@ -81,10 +108,13 @@ - ecc_code[1] = tmp2; - } - --/* -- * Calculate 3 byte ECC code for 256 byte block -+/** -+ * nand_calculate_ecc - [NAND Interface] Calculate 3 byte ECC code for 256 byte block -+ * @mtd: MTD block structure -+ * @dat: raw data -+ * @ecc_code: buffer for ECC - */ --void nand_calculate_ecc (const u_char *dat, u_char *ecc_code) -+int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code) - { - u_char idx, reg1, reg2, reg3; - int j; -@@ -114,12 +144,19 @@ - ecc_code[0] = ~ecc_code[0]; - ecc_code[1] = ~ecc_code[1]; - ecc_code[2] = ((~reg1) << 2) | 0x03; -+ return 0; - } - --/* -+/** -+ * nand_correct_data - [NAND Interface] Detect and correct bit error(s) -+ * @mtd: MTD block structure -+ * @dat: raw data read from the chip -+ * @read_ecc: ECC from the chip -+ * @calc_ecc: the ECC calculated from raw data -+ * - * Detect and correct a 1 bit error for 256 byte block - */ --int nand_correct_data (u_char *dat, u_char *read_ecc, u_char *calc_ecc) -+int nand_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc, u_char *calc_ecc) - { - u_char a, b, c, d1, d2, d3, add, bit, i; - -Index: linux-2.6.5/drivers/mtd/nand/nand_ids.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/nand_ids.c 2004-04-03 22:36:56.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/nand_ids.c 2005-02-01 17:11:17.000000000 -0500 -@@ -2,9 +2,8 @@ - * drivers/mtd/nandids.c - * - * Copyright (C) 2002 Thomas Gleixner (tglx@linutronix.de) -- * -- * -- * $Id: nand_ids.c,v 1.4 2003/05/21 15:15:08 dwmw2 Exp $ -+ * -+ * $Id: nand_ids.c,v 1.10 2004/05/26 13:40:12 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as -@@ -13,26 +12,99 @@ - */ - #include <linux/module.h> - #include <linux/mtd/nand.h> -- - /* - * Chip ID list -+* -+* Name. ID code, pagesize, chipsize in MegaByte, eraseblock size, -+* options -+* -+* Pagesize; 0, 256, 512 -+* 0 get this information from the extended chip ID -++ 256 256 Byte page size -+* 512 512 Byte page size - */ - struct nand_flash_dev nand_flash_ids[] = { -- {"NAND 1MiB 5V", 0x6e, 20, 0x1000, 1}, -- {"NAND 2MiB 5V", 0x64, 21, 0x1000, 1}, -- {"NAND 4MiB 5V", 0x6b, 22, 0x2000, 0}, -- {"NAND 1MiB 3,3V", 0xe8, 20, 0x1000, 1}, -- {"NAND 1MiB 3,3V", 0xec, 20, 0x1000, 1}, -- {"NAND 2MiB 3,3V", 0xea, 21, 0x1000, 1}, -- {"NAND 4MiB 3,3V", 0xd5, 22, 0x2000, 0}, -- {"NAND 4MiB 3,3V", 0xe3, 22, 0x2000, 0}, -- {"NAND 4MiB 3,3V", 0xe5, 22, 0x2000, 0}, -- {"NAND 8MiB 3,3V", 0xd6, 23, 0x2000, 0}, -- {"NAND 8MiB 3,3V", 0xe6, 23, 0x2000, 0}, -- {"NAND 16MiB 3,3V", 0x73, 24, 0x4000, 0}, -- {"NAND 32MiB 3,3V", 0x75, 25, 0x4000, 0}, -- {"NAND 64MiB 3,3V", 0x76, 26, 0x4000, 0}, -- {"NAND 128MiB 3,3V", 0x79, 27, 0x4000, 0}, -+ {"NAND 1MiB 5V 8-bit", 0x6e, 256, 1, 0x1000, 0}, -+ {"NAND 2MiB 5V 8-bit", 0x64, 256, 2, 0x1000, 0}, -+ {"NAND 4MiB 5V 8-bit", 0x6b, 512, 4, 0x2000, 0}, -+ {"NAND 1MiB 3,3V 8-bit", 0xe8, 256, 1, 0x1000, 0}, -+ {"NAND 1MiB 3,3V 8-bit", 0xec, 256, 1, 0x1000, 0}, -+ {"NAND 2MiB 3,3V 8-bit", 0xea, 256, 2, 0x1000, 0}, -+ {"NAND 4MiB 3,3V 8-bit", 0xd5, 512, 4, 0x2000, 0}, -+ {"NAND 4MiB 3,3V 8-bit", 0xe3, 512, 4, 0x2000, 0}, -+ {"NAND 4MiB 3,3V 8-bit", 0xe5, 512, 4, 0x2000, 0}, -+ {"NAND 8MiB 3,3V 8-bit", 0xd6, 512, 8, 0x2000, 0}, -+ -+ {"NAND 8MiB 1,8V 8-bit", 0x39, 512, 8, 0x2000, 0}, -+ {"NAND 8MiB 3,3V 8-bit", 0xe6, 512, 8, 0x2000, 0}, -+ {"NAND 8MiB 1,8V 16-bit", 0x49, 512, 8, 0x2000, NAND_BUSWIDTH_16}, -+ {"NAND 8MiB 3,3V 16-bit", 0x59, 512, 8, 0x2000, NAND_BUSWIDTH_16}, -+ -+ {"NAND 16MiB 1,8V 8-bit", 0x33, 512, 16, 0x4000, 0}, -+ {"NAND 16MiB 3,3V 8-bit", 0x73, 512, 16, 0x4000, 0}, -+ {"NAND 16MiB 1,8V 16-bit", 0x43, 512, 16, 0x4000, NAND_BUSWIDTH_16}, -+ {"NAND 16MiB 3,3V 16-bit", 0x53, 512, 16, 0x4000, NAND_BUSWIDTH_16}, -+ -+ {"NAND 32MiB 1,8V 8-bit", 0x35, 512, 32, 0x4000, 0}, -+ {"NAND 32MiB 3,3V 8-bit", 0x75, 512, 32, 0x4000, 0}, -+ {"NAND 32MiB 1,8V 16-bit", 0x45, 512, 32, 0x4000, NAND_BUSWIDTH_16}, -+ {"NAND 32MiB 3,3V 16-bit", 0x55, 512, 32, 0x4000, NAND_BUSWIDTH_16}, -+ -+ {"NAND 64MiB 1,8V 8-bit", 0x36, 512, 64, 0x4000, 0}, -+ {"NAND 64MiB 3,3V 8-bit", 0x76, 512, 64, 0x4000, 0}, -+ {"NAND 64MiB 1,8V 16-bit", 0x46, 512, 64, 0x4000, NAND_BUSWIDTH_16}, -+ {"NAND 64MiB 3,3V 16-bit", 0x56, 512, 64, 0x4000, NAND_BUSWIDTH_16}, -+ -+ {"NAND 128MiB 1,8V 8-bit", 0x78, 512, 128, 0x4000, 0}, -+ {"NAND 128MiB 3,3V 8-bit", 0x79, 512, 128, 0x4000, 0}, -+ {"NAND 128MiB 1,8V 16-bit", 0x72, 512, 128, 0x4000, NAND_BUSWIDTH_16}, -+ {"NAND 128MiB 3,3V 16-bit", 0x74, 512, 128, 0x4000, NAND_BUSWIDTH_16}, -+ -+ {"NAND 256MiB 3,3V 8-bit", 0x71, 512, 256, 0x4000, 0}, -+ -+ {"NAND 512MiB 3,3V 8-bit", 0xDC, 512, 512, 0x4000, 0}, -+ -+ /* These are the new chips with large page size. The pagesize -+ * and the erasesize is determined from the extended id bytes -+ */ -+ /* 1 Gigabit */ -+ {"NAND 128MiB 1,8V 8-bit", 0xA1, 0, 128, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_NO_AUTOINCR}, -+ {"NAND 128MiB 3,3V 8-bit", 0xF1, 0, 128, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_NO_AUTOINCR}, -+ {"NAND 128MiB 1,8V 16-bit", 0xB1, 0, 128, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_BUSWIDTH_16 | NAND_NO_AUTOINCR}, -+ {"NAND 128MiB 3,3V 16-bit", 0xC1, 0, 128, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_BUSWIDTH_16 | NAND_NO_AUTOINCR}, -+ -+ /* 2 Gigabit */ -+ {"NAND 256MiB 1,8V 8-bit", 0xAA, 0, 256, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_NO_AUTOINCR}, -+ {"NAND 256MiB 3,3V 8-bit", 0xDA, 0, 256, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_NO_AUTOINCR}, -+ {"NAND 256MiB 1,8V 16-bit", 0xBA, 0, 256, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_BUSWIDTH_16 | NAND_NO_AUTOINCR}, -+ {"NAND 256MiB 3,3V 16-bit", 0xCA, 0, 256, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_BUSWIDTH_16 | NAND_NO_AUTOINCR}, -+ -+ /* 4 Gigabit */ -+ {"NAND 512MiB 1,8V 8-bit", 0xAC, 0, 512, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_NO_AUTOINCR}, -+ {"NAND 512MiB 3,3V 8-bit", 0xDC, 0, 512, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_NO_AUTOINCR}, -+ {"NAND 512MiB 1,8V 16-bit", 0xBC, 0, 512, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_BUSWIDTH_16 | NAND_NO_AUTOINCR}, -+ {"NAND 512MiB 3,3V 16-bit", 0xCC, 0, 512, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_BUSWIDTH_16 | NAND_NO_AUTOINCR}, -+ -+ /* 8 Gigabit */ -+ {"NAND 1GiB 1,8V 8-bit", 0xA3, 0, 1024, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_NO_AUTOINCR}, -+ {"NAND 1GiB 3,3V 8-bit", 0xD3, 0, 1024, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_NO_AUTOINCR}, -+ {"NAND 1GiB 1,8V 16-bit", 0xB3, 0, 1024, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_BUSWIDTH_16 | NAND_NO_AUTOINCR}, -+ {"NAND 1GiB 3,3V 16-bit", 0xC3, 0, 1024, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_BUSWIDTH_16 | NAND_NO_AUTOINCR}, -+ -+ /* 16 Gigabit */ -+ {"NAND 2GiB 1,8V 8-bit", 0xA5, 0, 2048, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_NO_AUTOINCR}, -+ {"NAND 2GiB 3,3V 8-bit", 0xD5, 0, 2048, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_NO_AUTOINCR}, -+ {"NAND 2GiB 1,8V 16-bit", 0xB5, 0, 2048, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_BUSWIDTH_16 | NAND_NO_AUTOINCR}, -+ {"NAND 2GiB 3,3V 16-bit", 0xC5, 0, 2048, 0, NAND_SAMSUNG_LP_OPTIONS | NAND_BUSWIDTH_16 | NAND_NO_AUTOINCR}, -+ -+ /* Renesas AND 1 Gigabit. Those chips do not support extended id and have a strange page/block layout ! -+ * The chosen minimum erasesize is 4 * 2 * 2048 = 16384 Byte, as those chips have an array of 4 page planes -+ * 1 block = 2 pages, but due to plane arrangement the blocks 0-3 consists of page 0 + 4,1 + 5, 2 + 6, 3 + 7 -+ * Anyway JFFS2 would increase the eraseblock size so we chose a combined one which can be erased in one go -+ * There are more speed improvements for reads and writes possible, but not implemented now -+ */ -+ {"AND 128MiB 3,3V 8-bit", 0x01, 2048, 128, 0x4000, NAND_IS_AND | NAND_NO_AUTOINCR | NAND_4PAGE_ARRAY}, -+ - {NULL,} - }; - -@@ -44,10 +116,11 @@ - {NAND_MFR_SAMSUNG, "Samsung"}, - {NAND_MFR_FUJITSU, "Fujitsu"}, - {NAND_MFR_NATIONAL, "National"}, -+ {NAND_MFR_RENESAS, "Renesas"}, -+ {NAND_MFR_STMICRO, "ST Micro"}, - {0x0, "Unknown"} - }; - -- - EXPORT_SYMBOL (nand_manuf_ids); - EXPORT_SYMBOL (nand_flash_ids); - -Index: linux-2.6.5/drivers/mtd/nand/ppchameleonevb.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/ppchameleonevb.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/ppchameleonevb.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,419 @@ -+/* -+ * drivers/mtd/nand/ppchameleonevb.c -+ * -+ * Copyright (C) 2003 DAVE Srl (info@wawnet.biz) -+ * -+ * Derived from drivers/mtd/nand/edb7312.c -+ * -+ * -+ * $Id: ppchameleonevb.c,v 1.3 2004/09/16 23:27:14 gleixner Exp $ -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * Overview: -+ * This is a device driver for the NAND flash devices found on the -+ * PPChameleon/PPChameleonEVB system. -+ * PPChameleon options (autodetected): -+ * - BA model: no NAND -+ * - ME model: 32MB (Samsung K9F5608U0B) -+ * - HI model: 128MB (Samsung K9F1G08UOM) -+ * PPChameleonEVB options: -+ * - 32MB (Samsung K9F5608U0B) -+ */ -+ -+#include <linux/init.h> -+#include <linux/slab.h> -+#include <linux/module.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/nand.h> -+#include <linux/mtd/partitions.h> -+#include <asm/io.h> -+#include <platforms/PPChameleonEVB.h> -+ -+#undef USE_READY_BUSY_PIN -+#define USE_READY_BUSY_PIN -+/* see datasheets (tR) */ -+#define NAND_BIG_DELAY_US 25 -+#define NAND_SMALL_DELAY_US 10 -+ -+/* handy sizes */ -+#define SZ_4M 0x00400000 -+#define NAND_SMALL_SIZE 0x02000000 -+#define NAND_MTD_NAME "ppchameleon-nand" -+#define NAND_EVB_MTD_NAME "ppchameleonevb-nand" -+ -+/* GPIO pins used to drive NAND chip mounted on processor module */ -+#define NAND_nCE_GPIO_PIN (0x80000000 >> 1) -+#define NAND_CLE_GPIO_PIN (0x80000000 >> 2) -+#define NAND_ALE_GPIO_PIN (0x80000000 >> 3) -+#define NAND_RB_GPIO_PIN (0x80000000 >> 4) -+/* GPIO pins used to drive NAND chip mounted on EVB */ -+#define NAND_EVB_nCE_GPIO_PIN (0x80000000 >> 14) -+#define NAND_EVB_CLE_GPIO_PIN (0x80000000 >> 15) -+#define NAND_EVB_ALE_GPIO_PIN (0x80000000 >> 16) -+#define NAND_EVB_RB_GPIO_PIN (0x80000000 >> 31) -+ -+/* -+ * MTD structure for PPChameleonEVB board -+ */ -+static struct mtd_info *ppchameleon_mtd = NULL; -+static struct mtd_info *ppchameleonevb_mtd = NULL; -+ -+/* -+ * Module stuff -+ */ -+static int ppchameleon_fio_pbase = CFG_NAND0_PADDR; -+static int ppchameleonevb_fio_pbase = CFG_NAND1_PADDR; -+ -+#ifdef MODULE -+MODULE_PARM(ppchameleon_fio_pbase, "i"); -+__setup("ppchameleon_fio_pbase=",ppchameleon_fio_pbase); -+MODULE_PARM(ppchameleonevb_fio_pbase, "i"); -+__setup("ppchameleonevb_fio_pbase=",ppchameleonevb_fio_pbase); -+#endif -+ -+#ifdef CONFIG_MTD_PARTITIONS -+/* -+ * Define static partitions for flash devices -+ */ -+static struct mtd_partition partition_info_hi[] = { -+ { name: "PPChameleon HI Nand Flash", -+ offset: 0, -+ size: 128*1024*1024 } -+}; -+ -+static struct mtd_partition partition_info_me[] = { -+ { name: "PPChameleon ME Nand Flash", -+ offset: 0, -+ size: 32*1024*1024 } -+}; -+ -+static struct mtd_partition partition_info_evb[] = { -+ { name: "PPChameleonEVB Nand Flash", -+ offset: 0, -+ size: 32*1024*1024 } -+}; -+ -+#define NUM_PARTITIONS 1 -+ -+extern int parse_cmdline_partitions(struct mtd_info *master, -+ struct mtd_partition **pparts, -+ const char *mtd_id); -+#endif -+ -+ -+/* -+ * hardware specific access to control-lines -+ */ -+static void ppchameleon_hwcontrol(struct mtd_info *mtdinfo, int cmd) -+{ -+ switch(cmd) { -+ -+ case NAND_CTL_SETCLE: -+ MACRO_NAND_CTL_SETCLE((unsigned long)CFG_NAND0_PADDR); -+ break; -+ case NAND_CTL_CLRCLE: -+ MACRO_NAND_CTL_CLRCLE((unsigned long)CFG_NAND0_PADDR); -+ break; -+ case NAND_CTL_SETALE: -+ MACRO_NAND_CTL_SETALE((unsigned long)CFG_NAND0_PADDR); -+ break; -+ case NAND_CTL_CLRALE: -+ MACRO_NAND_CTL_CLRALE((unsigned long)CFG_NAND0_PADDR); -+ break; -+ case NAND_CTL_SETNCE: -+ MACRO_NAND_ENABLE_CE((unsigned long)CFG_NAND0_PADDR); -+ break; -+ case NAND_CTL_CLRNCE: -+ MACRO_NAND_DISABLE_CE((unsigned long)CFG_NAND0_PADDR); -+ break; -+ } -+} -+ -+static void ppchameleonevb_hwcontrol(struct mtd_info *mtdinfo, int cmd) -+{ -+ switch(cmd) { -+ -+ case NAND_CTL_SETCLE: -+ MACRO_NAND_CTL_SETCLE((unsigned long)CFG_NAND1_PADDR); -+ break; -+ case NAND_CTL_CLRCLE: -+ MACRO_NAND_CTL_CLRCLE((unsigned long)CFG_NAND1_PADDR); -+ break; -+ case NAND_CTL_SETALE: -+ MACRO_NAND_CTL_SETALE((unsigned long)CFG_NAND1_PADDR); -+ break; -+ case NAND_CTL_CLRALE: -+ MACRO_NAND_CTL_CLRALE((unsigned long)CFG_NAND1_PADDR); -+ break; -+ case NAND_CTL_SETNCE: -+ MACRO_NAND_ENABLE_CE((unsigned long)CFG_NAND1_PADDR); -+ break; -+ case NAND_CTL_CLRNCE: -+ MACRO_NAND_DISABLE_CE((unsigned long)CFG_NAND1_PADDR); -+ break; -+ } -+} -+ -+#ifdef USE_READY_BUSY_PIN -+/* -+ * read device ready pin -+ */ -+static int ppchameleon_device_ready(struct mtd_info *minfo) -+{ -+ if (in_be32((volatile unsigned*)GPIO0_IR) & NAND_RB_GPIO_PIN) -+ return 1; -+ return 0; -+} -+ -+static int ppchameleonevb_device_ready(struct mtd_info *minfo) -+{ -+ if (in_be32((volatile unsigned*)GPIO0_IR) & NAND_EVB_RB_GPIO_PIN) -+ return 1; -+ return 0; -+} -+#endif -+ -+#ifdef CONFIG_MTD_PARTITIONS -+const char *part_probes[] = { "cmdlinepart", NULL }; -+const char *part_probes_evb[] = { "cmdlinepart", NULL }; -+#endif -+ -+/* -+ * Main initialization routine -+ */ -+static int __init ppchameleonevb_init (void) -+{ -+ struct nand_chip *this; -+ const char *part_type = 0; -+ int mtd_parts_nb = 0; -+ struct mtd_partition *mtd_parts = 0; -+ void __iomem *ppchameleon_fio_base; -+ void __iomem *ppchameleonevb_fio_base; -+ -+ -+ /********************************* -+ * Processor module NAND (if any) * -+ *********************************/ -+ /* Allocate memory for MTD device structure and private data */ -+ ppchameleon_mtd = (struct mtd_info *) kmalloc(sizeof(struct mtd_info) + -+ sizeof(struct nand_chip), GFP_KERNEL); -+ if (!ppchameleon_mtd) { -+ printk("Unable to allocate PPChameleon NAND MTD device structure.\n"); -+ return -ENOMEM; -+ } -+ -+ /* map physical address */ -+ ppchameleon_fio_base = (void __iomem *) ioremap(ppchameleon_fio_pbase, SZ_4M); -+ if(!ppchameleon_fio_base) { -+ printk("ioremap PPChameleon NAND flash failed\n"); -+ kfree(ppchameleon_mtd); -+ return -EIO; -+ } -+ -+ /* Get pointer to private data */ -+ this = (struct nand_chip *) (&ppchameleon_mtd[1]); -+ -+ /* Initialize structures */ -+ memset((char *) ppchameleon_mtd, 0, sizeof(struct mtd_info)); -+ memset((char *) this, 0, sizeof(struct nand_chip)); -+ -+ /* Link the private data with the MTD structure */ -+ ppchameleon_mtd->priv = this; -+ -+ /* Initialize GPIOs */ -+ /* Pin mapping for NAND chip */ -+ /* -+ CE GPIO_01 -+ CLE GPIO_02 -+ ALE GPIO_03 -+ R/B GPIO_04 -+ */ -+ /* output select */ -+ out_be32((volatile unsigned*)GPIO0_OSRH, in_be32((volatile unsigned*)GPIO0_OSRH) & 0xC0FFFFFF); -+ /* three-state select */ -+ out_be32((volatile unsigned*)GPIO0_TSRH, in_be32((volatile unsigned*)GPIO0_TSRH) & 0xC0FFFFFF); -+ /* enable output driver */ -+ out_be32((volatile unsigned*)GPIO0_TCR, in_be32((volatile unsigned*)GPIO0_TCR) | NAND_nCE_GPIO_PIN | NAND_CLE_GPIO_PIN | NAND_ALE_GPIO_PIN); -+#ifdef USE_READY_BUSY_PIN -+ /* three-state select */ -+ out_be32((volatile unsigned*)GPIO0_TSRH, in_be32((volatile unsigned*)GPIO0_TSRH) & 0xFF3FFFFF); -+ /* high-impedecence */ -+ out_be32((volatile unsigned*)GPIO0_TCR, in_be32((volatile unsigned*)GPIO0_TCR) & (~NAND_RB_GPIO_PIN)); -+ /* input select */ -+ out_be32((volatile unsigned*)GPIO0_ISR1H, (in_be32((volatile unsigned*)GPIO0_ISR1H) & 0xFF3FFFFF) | 0x00400000); -+#endif -+ -+ /* insert callbacks */ -+ this->IO_ADDR_R = ppchameleon_fio_base; -+ this->IO_ADDR_W = ppchameleon_fio_base; -+ this->hwcontrol = ppchameleon_hwcontrol; -+#ifdef USE_READY_BUSY_PIN -+ this->dev_ready = ppchameleon_device_ready; -+#endif -+ this->chip_delay = NAND_BIG_DELAY_US; -+ /* ECC mode */ -+ this->eccmode = NAND_ECC_SOFT; -+ -+ /* Scan to find existence of the device (it could not be mounted) */ -+ if (nand_scan (ppchameleon_mtd, 1)) { -+ iounmap((void *)ppchameleon_fio_base); -+ kfree (ppchameleon_mtd); -+ goto nand_evb_init; -+ } -+ -+#ifndef USE_READY_BUSY_PIN -+ /* Adjust delay if necessary */ -+ if (ppchameleon_mtd->size == NAND_SMALL_SIZE) -+ this->chip_delay = NAND_SMALL_DELAY_US; -+#endif -+ -+#ifdef CONFIG_MTD_PARTITIONS -+ ppchameleon_mtd->name = "ppchameleon-nand"; -+ mtd_parts_nb = parse_mtd_partitions(ppchameleon_mtd, part_probes, &mtd_parts, 0); -+ if (mtd_parts_nb > 0) -+ part_type = "command line"; -+ else -+ mtd_parts_nb = 0; -+#endif -+ if (mtd_parts_nb == 0) -+ { -+ if (ppchameleon_mtd->size == NAND_SMALL_SIZE) -+ mtd_parts = partition_info_me; -+ else -+ mtd_parts = partition_info_hi; -+ mtd_parts_nb = NUM_PARTITIONS; -+ part_type = "static"; -+ } -+ -+ /* Register the partitions */ -+ printk(KERN_NOTICE "Using %s partition definition\n", part_type); -+ add_mtd_partitions(ppchameleon_mtd, mtd_parts, mtd_parts_nb); -+ -+nand_evb_init: -+ /**************************** -+ * EVB NAND (always present) * -+ ****************************/ -+ /* Allocate memory for MTD device structure and private data */ -+ ppchameleonevb_mtd = (struct mtd_info *) kmalloc(sizeof(struct mtd_info) + -+ sizeof(struct nand_chip), GFP_KERNEL); -+ if (!ppchameleonevb_mtd) { -+ printk("Unable to allocate PPChameleonEVB NAND MTD device structure.\n"); -+ return -ENOMEM; -+ } -+ -+ /* map physical address */ -+ ppchameleonevb_fio_base = (void __iomem *)ioremap(ppchameleonevb_fio_pbase, SZ_4M); -+ if(!ppchameleonevb_fio_base) { -+ printk("ioremap PPChameleonEVB NAND flash failed\n"); -+ kfree(ppchameleonevb_mtd); -+ return -EIO; -+ } -+ -+ /* Get pointer to private data */ -+ this = (struct nand_chip *) (&ppchameleonevb_mtd[1]); -+ -+ /* Initialize structures */ -+ memset((char *) ppchameleonevb_mtd, 0, sizeof(struct mtd_info)); -+ memset((char *) this, 0, sizeof(struct nand_chip)); -+ -+ /* Link the private data with the MTD structure */ -+ ppchameleonevb_mtd->priv = this; -+ -+ /* Initialize GPIOs */ -+ /* Pin mapping for NAND chip */ -+ /* -+ CE GPIO_14 -+ CLE GPIO_15 -+ ALE GPIO_16 -+ R/B GPIO_31 -+ */ -+ /* output select */ -+ out_be32((volatile unsigned*)GPIO0_OSRH, in_be32((volatile unsigned*)GPIO0_OSRH) & 0xFFFFFFF0); -+ out_be32((volatile unsigned*)GPIO0_OSRL, in_be32((volatile unsigned*)GPIO0_OSRL) & 0x3FFFFFFF); -+ /* three-state select */ -+ out_be32((volatile unsigned*)GPIO0_TSRH, in_be32((volatile unsigned*)GPIO0_TSRH) & 0xFFFFFFF0); -+ out_be32((volatile unsigned*)GPIO0_TSRL, in_be32((volatile unsigned*)GPIO0_TSRL) & 0x3FFFFFFF); -+ /* enable output driver */ -+ out_be32((volatile unsigned*)GPIO0_TCR, in_be32((volatile unsigned*)GPIO0_TCR) | NAND_EVB_nCE_GPIO_PIN | -+ NAND_EVB_CLE_GPIO_PIN | NAND_EVB_ALE_GPIO_PIN); -+#ifdef USE_READY_BUSY_PIN -+ /* three-state select */ -+ out_be32((volatile unsigned*)GPIO0_TSRL, in_be32((volatile unsigned*)GPIO0_TSRL) & 0xFFFFFFFC); -+ /* high-impedecence */ -+ out_be32((volatile unsigned*)GPIO0_TCR, in_be32((volatile unsigned*)GPIO0_TCR) & (~NAND_EVB_RB_GPIO_PIN)); -+ /* input select */ -+ out_be32((volatile unsigned*)GPIO0_ISR1L, (in_be32((volatile unsigned*)GPIO0_ISR1L) & 0xFFFFFFFC) | 0x00000001); -+#endif -+ -+ /* insert callbacks */ -+ this->IO_ADDR_R = ppchameleonevb_fio_base; -+ this->IO_ADDR_W = ppchameleonevb_fio_base; -+ this->hwcontrol = ppchameleonevb_hwcontrol; -+#ifdef USE_READY_BUSY_PIN -+ this->dev_ready = ppchameleonevb_device_ready; -+#endif -+ this->chip_delay = NAND_SMALL_DELAY_US; -+ -+ /* ECC mode */ -+ this->eccmode = NAND_ECC_SOFT; -+ -+ /* Scan to find existence of the device */ -+ if (nand_scan (ppchameleonevb_mtd, 1)) { -+ iounmap((void *)ppchameleonevb_fio_base); -+ kfree (ppchameleonevb_mtd); -+ return -ENXIO; -+ } -+ -+#ifdef CONFIG_MTD_PARTITIONS -+ ppchameleonevb_mtd->name = NAND_EVB_MTD_NAME; -+ mtd_parts_nb = parse_mtd_partitions(ppchameleonevb_mtd, part_probes_evb, &mtd_parts, 0); -+ if (mtd_parts_nb > 0) -+ part_type = "command line"; -+ else -+ mtd_parts_nb = 0; -+#endif -+ if (mtd_parts_nb == 0) -+ { -+ mtd_parts = partition_info_evb; -+ mtd_parts_nb = NUM_PARTITIONS; -+ part_type = "static"; -+ } -+ -+ /* Register the partitions */ -+ printk(KERN_NOTICE "Using %s partition definition\n", part_type); -+ add_mtd_partitions(ppchameleonevb_mtd, mtd_parts, mtd_parts_nb); -+ -+ /* Return happy */ -+ return 0; -+} -+module_init(ppchameleonevb_init); -+ -+/* -+ * Clean up routine -+ */ -+static void __exit ppchameleonevb_cleanup (void) -+{ -+ struct nand_chip *this; -+ -+ /* Release resources, unregister device(s) */ -+ nand_release (ppchameleon_mtd); -+ nand_release (ppchameleonevb_mtd); -+ -+ /* Release iomaps */ -+ this = (struct nand_chip *) &ppchameleon_mtd[1]; -+ iounmap((void *) this->IO_ADDR_R; -+ this = (struct nand_chip *) &ppchameleonevb_mtd[1]; -+ iounmap((void *) this->IO_ADDR_R; -+ -+ /* Free the MTD device structure */ -+ kfree (ppchameleon_mtd); -+ kfree (ppchameleonevb_mtd); -+} -+module_exit(ppchameleonevb_cleanup); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("DAVE Srl <support-ppchameleon@dave-tech.it>"); -+MODULE_DESCRIPTION("MTD map driver for DAVE Srl PPChameleonEVB board"); -Index: linux-2.6.5/drivers/mtd/nand/rtc_from4.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/rtc_from4.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/rtc_from4.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,500 @@ -+/* -+ * drivers/mtd/nand/rtc_from4.c -+ * -+ * Copyright (C) 2004 Red Hat, Inc. -+ * -+ * Derived from drivers/mtd/nand/spia.c -+ * Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com) -+ * -+ * $Id: rtc_from4.c,v 1.1 2004/09/16 23:23:42 gleixner Exp $ -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * Overview: -+ * This is a device driver for the AG-AND flash device found on the -+ * Renesas Technology Corp. Flash ROM 4-slot interface board (FROM_BOARD4), -+ * which utilizes the Renesas HN29V1G91T-30 part. -+ * This chip is a 1 GBibit (128MiB x 8 bits) AG-AND flash device. -+ */ -+ -+#include <linux/delay.h> -+#include <linux/kernel.h> -+#include <linux/init.h> -+#include <linux/slab.h> -+#include <linux/rslib.h> -+#include <linux/module.h> -+#include <linux/mtd/compatmac.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/nand.h> -+#include <linux/mtd/partitions.h> -+#include <asm/io.h> -+ -+/* -+ * MTD structure for Renesas board -+ */ -+static struct mtd_info *rtc_from4_mtd = NULL; -+ -+#define RTC_FROM4_MAX_CHIPS 2 -+ -+/* HS77x9 processor register defines */ -+#define SH77X9_BCR1 ((volatile unsigned short *)(0xFFFFFF60)) -+#define SH77X9_BCR2 ((volatile unsigned short *)(0xFFFFFF62)) -+#define SH77X9_WCR1 ((volatile unsigned short *)(0xFFFFFF64)) -+#define SH77X9_WCR2 ((volatile unsigned short *)(0xFFFFFF66)) -+#define SH77X9_MCR ((volatile unsigned short *)(0xFFFFFF68)) -+#define SH77X9_PCR ((volatile unsigned short *)(0xFFFFFF6C)) -+#define SH77X9_FRQCR ((volatile unsigned short *)(0xFFFFFF80)) -+ -+/* -+ * Values specific to the Renesas Technology Corp. FROM_BOARD4 (used with HS77x9 processor) -+ */ -+/* Address where flash is mapped */ -+#define RTC_FROM4_FIO_BASE 0x14000000 -+ -+/* CLE and ALE are tied to address lines 5 & 4, respectively */ -+#define RTC_FROM4_CLE (1 << 5) -+#define RTC_FROM4_ALE (1 << 4) -+ -+/* address lines A24-A22 used for chip selection */ -+#define RTC_FROM4_NAND_ADDR_SLOT3 (0x00800000) -+#define RTC_FROM4_NAND_ADDR_SLOT4 (0x00C00000) -+#define RTC_FROM4_NAND_ADDR_FPGA (0x01000000) -+/* mask address lines A24-A22 used for chip selection */ -+#define RTC_FROM4_NAND_ADDR_MASK (RTC_FROM4_NAND_ADDR_SLOT3 | RTC_FROM4_NAND_ADDR_SLOT4 | RTC_FROM4_NAND_ADDR_FPGA) -+ -+/* FPGA status register for checking device ready (bit zero) */ -+#define RTC_FROM4_FPGA_SR (RTC_FROM4_NAND_ADDR_FPGA | 0x00000002) -+#define RTC_FROM4_DEVICE_READY 0x0001 -+ -+/* FPGA Reed-Solomon ECC Control register */ -+ -+#define RTC_FROM4_RS_ECC_CTL (RTC_FROM4_NAND_ADDR_FPGA | 0x00000050) -+#define RTC_FROM4_RS_ECC_CTL_CLR (1 << 7) -+#define RTC_FROM4_RS_ECC_CTL_GEN (1 << 6) -+#define RTC_FROM4_RS_ECC_CTL_FD_E (1 << 5) -+ -+/* FPGA Reed-Solomon ECC code base */ -+#define RTC_FROM4_RS_ECC (RTC_FROM4_NAND_ADDR_FPGA | 0x00000060) -+#define RTC_FROM4_RS_ECCN (RTC_FROM4_NAND_ADDR_FPGA | 0x00000080) -+ -+/* FPGA Reed-Solomon ECC check register */ -+#define RTC_FROM4_RS_ECC_CHK (RTC_FROM4_NAND_ADDR_FPGA | 0x00000070) -+#define RTC_FROM4_RS_ECC_CHK_ERROR (1 << 7) -+ -+/* Undefine for software ECC */ -+#define RTC_FROM4_HWECC 1 -+ -+/* -+ * Module stuff -+ */ -+static void __iomem *rtc_from4_fio_base = P2SEGADDR(RTC_FROM4_FIO_BASE); -+ -+MODULE_PARM(rtc_from4_fio_base, "i"); -+ -+const static struct mtd_partition partition_info[] = { -+ { -+ .name = "Renesas flash partition 1", -+ .offset = 0, -+ .size = MTDPART_SIZ_FULL -+ }, -+}; -+#define NUM_PARTITIONS 1 -+ -+/* -+ * hardware specific flash bbt decriptors -+ * Note: this is to allow debugging by disabling -+ * NAND_BBT_CREATE and/or NAND_BBT_WRITE -+ * -+ */ -+static uint8_t bbt_pattern[] = {'B', 'b', 't', '0' }; -+static uint8_t mirror_pattern[] = {'1', 't', 'b', 'B' }; -+ -+static struct nand_bbt_descr rtc_from4_bbt_main_descr = { -+ .options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE -+ | NAND_BBT_2BIT | NAND_BBT_VERSION | NAND_BBT_PERCHIP, -+ .offs = 40, -+ .len = 4, -+ .veroffs = 44, -+ .maxblocks = 4, -+ .pattern = bbt_pattern -+}; -+ -+static struct nand_bbt_descr rtc_from4_bbt_mirror_descr = { -+ .options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE -+ | NAND_BBT_2BIT | NAND_BBT_VERSION | NAND_BBT_PERCHIP, -+ .offs = 40, -+ .len = 4, -+ .veroffs = 44, -+ .maxblocks = 4, -+ .pattern = mirror_pattern -+}; -+ -+ -+ -+#ifdef RTC_FROM4_HWECC -+ -+/* the Reed Solomon control structure */ -+static struct rs_control *rs_decoder; -+ -+/* -+ * hardware specific Out Of Band information -+ */ -+static struct nand_oobinfo rtc_from4_nand_oobinfo = { -+ .useecc = MTD_NANDECC_AUTOPLACE, -+ .eccbytes = 32, -+ .eccpos = { -+ 0, 1, 2, 3, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ 16, 17, 18, 19, 20, 21, 22, 23, -+ 24, 25, 26, 27, 28, 29, 30, 31}, -+ .oobfree = { {32, 32} } -+}; -+#endif -+ -+ -+ -+/* -+ * rtc_from4_hwcontrol - hardware specific access to control-lines -+ * @mtd: MTD device structure -+ * @cmd: hardware control command -+ * -+ * Address lines (A5 and A4) are used to control Command and Address Latch -+ * Enable on this board, so set the read/write address appropriately. -+ * -+ * Chip Enable is also controlled by the Chip Select (CS5) and -+ * Address lines (A24-A22), so no action is required here. -+ * -+ */ -+static void rtc_from4_hwcontrol(struct mtd_info *mtd, int cmd) -+{ -+ struct nand_chip* this = (struct nand_chip *) (mtd->priv); -+ -+ switch(cmd) { -+ -+ case NAND_CTL_SETCLE: -+ this->IO_ADDR_W |= RTC_FROM4_CLE; -+ break; -+ case NAND_CTL_CLRCLE: -+ this->IO_ADDR_W &= ~RTC_FROM4_CLE; -+ break; -+ -+ case NAND_CTL_SETALE: -+ this->IO_ADDR_W |= RTC_FROM4_ALE; -+ break; -+ case NAND_CTL_CLRALE: -+ this->IO_ADDR_W &= ~RTC_FROM4_ALE; -+ break; -+ -+ case NAND_CTL_SETNCE: -+ break; -+ case NAND_CTL_CLRNCE: -+ break; -+ -+ } -+} -+ -+ -+/* -+ * rtc_from4_nand_select_chip - hardware specific chip select -+ * @mtd: MTD device structure -+ * @chip: Chip to select (0 == slot 3, 1 == slot 4) -+ * -+ * The chip select is based on address lines A24-A22. -+ * This driver uses flash slots 3 and 4 (A23-A22). -+ * -+ */ -+static void rtc_from4_nand_select_chip(struct mtd_info *mtd, int chip) -+{ -+ struct nand_chip *this = mtd->priv; -+ -+ this->IO_ADDR_R &= ~RTC_FROM4_NAND_ADDR_MASK; -+ this->IO_ADDR_W &= ~RTC_FROM4_NAND_ADDR_MASK; -+ -+ switch(chip) { -+ -+ case 0: /* select slot 3 chip */ -+ this->IO_ADDR_R |= RTC_FROM4_NAND_ADDR_SLOT3; -+ this->IO_ADDR_W |= RTC_FROM4_NAND_ADDR_SLOT3; -+ break; -+ case 1: /* select slot 4 chip */ -+ this->IO_ADDR_R |= RTC_FROM4_NAND_ADDR_SLOT4; -+ this->IO_ADDR_W |= RTC_FROM4_NAND_ADDR_SLOT4; -+ break; -+ -+ } -+} -+ -+ -+ -+/* -+ * rtc_from4_nand_device_ready - hardware specific ready/busy check -+ * @mtd: MTD device structure -+ * -+ * This board provides the Ready/Busy state in the status register -+ * of the FPGA. Bit zero indicates the RDY(1)/BSY(0) signal. -+ * -+ */ -+static int rtc_from4_nand_device_ready(struct mtd_info *mtd) -+{ -+ unsigned short status; -+ -+ status = *((volatile unsigned short *)(rtc_from4_fio_base + RTC_FROM4_FPGA_SR)); -+ -+ return (status & RTC_FROM4_DEVICE_READY); -+ -+} -+ -+#ifdef RTC_FROM4_HWECC -+/* -+ * rtc_from4_enable_hwecc - hardware specific hardware ECC enable function -+ * @mtd: MTD device structure -+ * @mode: I/O mode; read or write -+ * -+ * enable hardware ECC for data read or write -+ * -+ */ -+static void rtc_from4_enable_hwecc(struct mtd_info *mtd, int mode) -+{ -+ volatile unsigned short * rs_ecc_ctl = (volatile unsigned short *)(rtc_from4_fio_base + RTC_FROM4_RS_ECC_CTL); -+ unsigned short status; -+ -+ switch (mode) { -+ case NAND_ECC_READ : -+ status = RTC_FROM4_RS_ECC_CTL_CLR -+ | RTC_FROM4_RS_ECC_CTL_FD_E; -+ -+ *rs_ecc_ctl = status; -+ break; -+ -+ case NAND_ECC_READSYN : -+ status = 0x00; -+ -+ *rs_ecc_ctl = status; -+ break; -+ -+ case NAND_ECC_WRITE : -+ status = RTC_FROM4_RS_ECC_CTL_CLR -+ | RTC_FROM4_RS_ECC_CTL_GEN -+ | RTC_FROM4_RS_ECC_CTL_FD_E; -+ -+ *rs_ecc_ctl = status; -+ break; -+ -+ default: -+ BUG(); -+ break; -+ } -+ -+} -+ -+/* -+ * rtc_from4_calculate_ecc - hardware specific code to read ECC code -+ * @mtd: MTD device structure -+ * @dat: buffer containing the data to generate ECC codes -+ * @ecc_code ECC codes calculated -+ * -+ * The ECC code is calculated by the FPGA. All we have to do is read the values -+ * from the FPGA registers. -+ * -+ * Note: We read from the inverted registers, since data is inverted before -+ * the code is calculated. So all 0xff data (blank page) results in all 0xff rs code -+ * -+ */ -+static void rtc_from4_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code) -+{ -+ volatile unsigned short * rs_eccn = (volatile unsigned short *)(rtc_from4_fio_base + RTC_FROM4_RS_ECCN); -+ unsigned short value; -+ int i; -+ -+ for (i = 0; i < 8; i++) { -+ value = *rs_eccn; -+ ecc_code[i] = (unsigned char)value; -+ rs_eccn++; -+ } -+ ecc_code[7] |= 0x0f; /* set the last four bits (not used) */ -+} -+ -+ -+/* -+ * rtc_from4_correct_data - hardware specific code to correct data using ECC code -+ * @mtd: MTD device structure -+ * @buf: buffer containing the data to generate ECC codes -+ * @ecc1 ECC codes read -+ * @ecc2 ECC codes calculated -+ * -+ * The FPGA tells us fast, if there's an error or not. If no, we go back happy -+ * else we read the ecc results from the fpga and call the rs library to decode -+ * and hopefully correct the error -+ * -+ * For now I use the code, which we read from the FLASH to use the RS lib, -+ * as the syndrom conversion has a unresolved issue. -+ */ -+static int rtc_from4_correct_data(struct mtd_info *mtd, const u_char *buf, u_char *ecc1, u_char *ecc2) -+{ -+ int i, res; -+ unsigned short status; -+ uint16_t rpar[6]; -+ -+ status = *((volatile unsigned short *)(rtc_from4_fio_base + RTC_FROM4_RS_ECC_CHK)); -+ -+ if (!(status & RTC_FROM4_RS_ECC_CHK_ERROR)) -+ return 0; -+ -+ /* convert into 6 10bit parity fields */ -+ rpar[0] = ((uint16_t)ecc1[7] >> 4) | (((uint16_t)ecc1[6] << 4) & 0x3f0); -+ rpar[1] = ((uint16_t)ecc1[6] >> 6) | (((uint16_t)ecc1[5] << 2) & 0x3f3); -+ rpar[2] = ((uint16_t)ecc1[4] >> 0) | (((uint16_t)ecc1[3] << 8) & 0x300); -+ rpar[3] = ((uint16_t)ecc1[3] >> 2) | (((uint16_t)ecc1[2] << 6) & 0x3c0); -+ rpar[4] = ((uint16_t)ecc1[2] >> 4) | (((uint16_t)ecc1[1] << 4) & 0x3f0); -+ rpar[5] = ((uint16_t)ecc1[1] >> 6) | (((uint16_t)ecc1[0] << 2) & 0x3f3); -+ -+ /* Invert the codes */ -+ for (i = 0; i < 6; i++) -+ rpar[i] ^= 0x3ff; -+ -+ /* Let the library code do its magic. -+ * Set the data inversion mask to 0xff, as the FPGA inverts data on read -+ * except for the virtual bits 9 /10 which are tied low -+ */ -+ res = decode_rs8 (rs_decoder, buf, rpar, 512, NULL, 0, NULL, 0xff); -+ if (res > 0) -+ DEBUG (MTD_DEBUG_LEVEL0, "rtc_from4_correct_data: " -+ "ECC corrected %d errors on read\n", res); -+ -+ return res; -+} -+#endif -+ -+/* -+ * Main initialization routine -+ */ -+int __init rtc_from4_init (void) -+{ -+ struct nand_chip *this; -+ unsigned short bcr1, bcr2, wcr2; -+ -+ /* Allocate memory for MTD device structure and private data */ -+ rtc_from4_mtd = kmalloc (sizeof(struct mtd_info) + sizeof (struct nand_chip), -+ GFP_KERNEL); -+ if (!rtc_from4_mtd) { -+ printk ("Unable to allocate Renesas NAND MTD device structure.\n"); -+ return -ENOMEM; -+ } -+ -+ /* Get pointer to private data */ -+ this = (struct nand_chip *) (&rtc_from4_mtd[1]); -+ -+ /* Initialize structures */ -+ memset((char *) rtc_from4_mtd, 0, sizeof(struct mtd_info)); -+ memset((char *) this, 0, sizeof(struct nand_chip)); -+ -+ /* Link the private data with the MTD structure */ -+ rtc_from4_mtd->priv = this; -+ -+ /* set area 5 as PCMCIA mode to clear the spec of tDH(Data hold time;9ns min) */ -+ bcr1 = *SH77X9_BCR1 & ~0x0002; -+ bcr1 |= 0x0002; -+ *SH77X9_BCR1 = bcr1; -+ -+ /* set */ -+ bcr2 = *SH77X9_BCR2 & ~0x0c00; -+ bcr2 |= 0x0800; -+ *SH77X9_BCR2 = bcr2; -+ -+ /* set area 5 wait states */ -+ wcr2 = *SH77X9_WCR2 & ~0x1c00; -+ wcr2 |= 0x1c00; -+ *SH77X9_WCR2 = wcr2; -+ -+ /* Set address of NAND IO lines */ -+ this->IO_ADDR_R = rtc_from4_fio_base; -+ this->IO_ADDR_W = rtc_from4_fio_base; -+ /* Set address of hardware control function */ -+ this->hwcontrol = rtc_from4_hwcontrol; -+ /* Set address of chip select function */ -+ this->select_chip = rtc_from4_nand_select_chip; -+ /* command delay time (in us) */ -+ this->chip_delay = 100; -+ /* return the status of the Ready/Busy line */ -+ this->dev_ready = rtc_from4_nand_device_ready; -+ -+#ifdef RTC_FROM4_HWECC -+ printk(KERN_INFO "rtc_from4_init: using hardware ECC detection.\n"); -+ -+ this->eccmode = NAND_ECC_HW8_512; -+ this->options |= NAND_HWECC_SYNDROME; -+ /* set the nand_oobinfo to support FPGA H/W error detection */ -+ this->autooob = &rtc_from4_nand_oobinfo; -+ this->enable_hwecc = rtc_from4_enable_hwecc; -+ this->calculate_ecc = rtc_from4_calculate_ecc; -+ this->correct_data = rtc_from4_correct_data; -+#else -+ printk(KERN_INFO "rtc_from4_init: using software ECC detection.\n"); -+ -+ this->eccmode = NAND_ECC_SOFT; -+#endif -+ -+ /* set the bad block tables to support debugging */ -+ this->bbt_td = &rtc_from4_bbt_main_descr; -+ this->bbt_md = &rtc_from4_bbt_mirror_descr; -+ -+ /* Scan to find existence of the device */ -+ if (nand_scan (rtc_from4_mtd, RTC_FROM4_MAX_CHIPS)) { -+ kfree (rtc_from4_mtd); -+ return -ENXIO; -+ } -+ -+ /* Register the partitions */ -+ add_mtd_partitions(rtc_from4_mtd, partition_info, NUM_PARTITIONS); -+ -+#ifdef RTC_FROM4_HWECC -+ /* We could create the decoder on demand, if memory is a concern. -+ * This way we have it handy, if an error happens -+ * -+ * Symbolsize is 10 (bits) -+ * Primitve polynomial is x^10+x^3+1 -+ * first consecutive root is 0 -+ * primitve element to generate roots = 1 -+ * generator polinomial degree = 6 -+ */ -+ rs_decoder = init_rs (10, 0x409, 0, 1, 6); -+ if (!rs_decoder) { -+ printk (KERN_ERROR "Could not create a RS decoder\n"); -+ nand_release(rtc_from4_mtd); -+ kfree (rtc_from4_mtd); -+ return -ENOMEM; -+ } -+#endif -+ /* Return happy */ -+ return 0; -+} -+module_init(rtc_from4_init); -+ -+ -+/* -+ * Clean up routine -+ */ -+#ifdef MODULE -+static void __exit rtc_from4_cleanup (void) -+{ -+ /* Release resource, unregister partitions */ -+ nand_release(rtc_from4_mtd); -+ -+ /* Free the MTD device structure */ -+ kfree (rtc_from4_mtd); -+ -+#ifdef RTC_FROM4_HWECC -+ /* Free the reed solomon resources */ -+ if (rs_decoder) -+ free_rs(rs_decoder); -+#endif -+} -+module_exit(rtc_from4_cleanup); -+#endif -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("d.marlin <dmarlin@redhat.com"); -+MODULE_DESCRIPTION("Board-specific glue layer for AG-AND flash on Renesas FROM_BOARD4"); -+ -Index: linux-2.6.5/drivers/mtd/nand/spia.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/spia.c 2004-04-03 22:37:23.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/spia.c 2005-02-01 17:11:17.000000000 -0500 -@@ -8,7 +8,7 @@ - * to controllines (due to change in nand.c) - * page_cache added - * -- * $Id: spia.c,v 1.19 2003/04/20 07:24:40 gleixner Exp $ -+ * $Id: spia.c,v 1.22 2004/09/16 23:27:14 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as -@@ -20,6 +20,8 @@ - * a 64Mibit (8MiB x 8 bits) NAND flash device. - */ - -+#include <linux/kernel.h> -+#include <linux/init.h> - #include <linux/slab.h> - #include <linux/module.h> - #include <linux/mtd/mtd.h> -@@ -35,14 +37,14 @@ - /* - * Values specific to the SPIA board (used with EP7212 processor) - */ --#define SPIA_IO_ADDR = 0xd0000000 /* Start of EP7212 IO address space */ --#define SPIA_FIO_ADDR = 0xf0000000 /* Address where flash is mapped */ --#define SPIA_PEDR = 0x0080 /* -+#define SPIA_IO_BASE 0xd0000000 /* Start of EP7212 IO address space */ -+#define SPIA_FIO_BASE 0xf0000000 /* Address where flash is mapped */ -+#define SPIA_PEDR 0x0080 /* - * IO offset to Port E data register - * where the CLE, ALE and NCE pins - * are wired to. - */ --#define SPIA_PEDDR = 0x00c0 /* -+#define SPIA_PEDDR 0x00c0 /* - * IO offset to Port E data direction - * register so we can control the IO - * lines. -@@ -62,11 +64,6 @@ - MODULE_PARM(spia_pedr, "i"); - MODULE_PARM(spia_peddr, "i"); - --__setup("spia_io_base=",spia_io_base); --__setup("spia_fio_base=",spia_fio_base); --__setup("spia_pedr=",spia_pedr); --__setup("spia_peddr=",spia_peddr); -- - /* - * Define partitions for flash device - */ -@@ -88,7 +85,7 @@ - /* - * hardware specific access to control-lines - */ --void spia_hwcontrol(int cmd){ -+static void spia_hwcontrol(struct mtd_info *mtd, int cmd){ - - switch(cmd){ - -@@ -111,7 +108,7 @@ - struct nand_chip *this; - - /* Allocate memory for MTD device structure and private data */ -- spia_mtd = kmalloc (sizeof(struct mtd_info) + sizeof (struct nand_chip), -+ spia_mtd = (struct mtd_info *) kmalloc (sizeof(struct mtd_info) + sizeof (struct nand_chip), - GFP_KERNEL); - if (!spia_mtd) { - printk ("Unable to allocate SPIA NAND MTD device structure.\n"); -@@ -135,27 +132,19 @@ - (*(volatile unsigned char *) (spia_io_base + spia_peddr)) = 0x07; - - /* Set address of NAND IO lines */ -- this->IO_ADDR_R = spia_fio_base; -- this->IO_ADDR_W = spia_fio_base; -+ this->IO_ADDR_R = (void __iomem *) spia_fio_base; -+ this->IO_ADDR_W = (void __iomem *) spia_fio_base; - /* Set address of hardware control function */ - this->hwcontrol = spia_hwcontrol; - /* 15 us command delay time */ - this->chip_delay = 15; - - /* Scan to find existence of the device */ -- if (nand_scan (spia_mtd)) { -+ if (nand_scan (spia_mtd, 1)) { - kfree (spia_mtd); - return -ENXIO; - } - -- /* Allocate memory for internal data buffer */ -- this->data_buf = kmalloc (sizeof(u_char) * (spia_mtd->oobblock + spia_mtd->oobsize), GFP_KERNEL); -- if (!this->data_buf) { -- printk ("Unable to allocate NAND data buffer for SPIA.\n"); -- kfree (spia_mtd); -- return -ENOMEM; -- } -- - /* Register the partitions */ - add_mtd_partitions(spia_mtd, partition_info, NUM_PARTITIONS); - -@@ -170,13 +159,8 @@ - #ifdef MODULE - static void __exit spia_cleanup (void) - { -- struct nand_chip *this = (struct nand_chip *) &spia_mtd[1]; -- -- /* Unregister the device */ -- del_mtd_device (spia_mtd); -- -- /* Free internal data buffer */ -- kfree (this->data_buf); -+ /* Release resources, unregister device */ -+ nand_release (spia_mtd); - - /* Free the MTD device structure */ - kfree (spia_mtd); -Index: linux-2.6.5/drivers/mtd/nand/toto.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/toto.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/toto.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,205 @@ -+/* -+ * drivers/mtd/nand/toto.c -+ * -+ * Copyright (c) 2003 Texas Instruments -+ * -+ * Derived from drivers/mtd/autcpu12.c -+ * -+ * Copyright (c) 2002 Thomas Gleixner <tgxl@linutronix.de> -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * Overview: -+ * This is a device driver for the NAND flash device found on the -+ * TI fido board. It supports 32MiB and 64MiB cards -+ * -+ * $Id: toto.c,v 1.3 2004/09/16 23:27:15 gleixner Exp $ -+ */ -+ -+#include <linux/slab.h> -+#include <linux/init.h> -+#include <linux/module.h> -+#include <linux/delay.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/nand.h> -+#include <linux/mtd/partitions.h> -+#include <asm/io.h> -+#include <asm/arch/hardware.h> -+#include <asm/sizes.h> -+#include <asm/arch/toto.h> -+#include <asm/arch-omap1510/hardware.h> -+#include <asm/arch/gpio.h> -+ -+/* -+ * MTD structure for TOTO board -+ */ -+static struct mtd_info *toto_mtd = NULL; -+ -+static unsigned long toto_io_base = OMAP_FLASH_1_BASE; -+ -+#define CONFIG_NAND_WORKAROUND 1 -+ -+#define NAND_NCE 0x4000 -+#define NAND_CLE 0x1000 -+#define NAND_ALE 0x0002 -+#define NAND_MASK (NAND_CLE | NAND_ALE | NAND_NCE) -+ -+#define T_NAND_CTL_CLRALE(iob) gpiosetout(NAND_ALE, 0) -+#define T_NAND_CTL_SETALE(iob) gpiosetout(NAND_ALE, NAND_ALE) -+#ifdef CONFIG_NAND_WORKAROUND /* "some" dev boards busted, blue wired to rts2 :( */ -+#define T_NAND_CTL_CLRCLE(iob) gpiosetout(NAND_CLE, 0); rts2setout(2, 2) -+#define T_NAND_CTL_SETCLE(iob) gpiosetout(NAND_CLE, NAND_CLE); rts2setout(2, 0) -+#else -+#define T_NAND_CTL_CLRCLE(iob) gpiosetout(NAND_CLE, 0) -+#define T_NAND_CTL_SETCLE(iob) gpiosetout(NAND_CLE, NAND_CLE) -+#endif -+#define T_NAND_CTL_SETNCE(iob) gpiosetout(NAND_NCE, 0) -+#define T_NAND_CTL_CLRNCE(iob) gpiosetout(NAND_NCE, NAND_NCE) -+ -+/* -+ * Define partitions for flash devices -+ */ -+ -+static struct mtd_partition partition_info64M[] = { -+ { .name = "toto kernel partition 1", -+ .offset = 0, -+ .size = 2 * SZ_1M }, -+ { .name = "toto file sys partition 2", -+ .offset = 2 * SZ_1M, -+ .size = 14 * SZ_1M }, -+ { .name = "toto user partition 3", -+ .offset = 16 * SZ_1M, -+ .size = 16 * SZ_1M }, -+ { .name = "toto devboard extra partition 4", -+ .offset = 32 * SZ_1M, -+ .size = 32 * SZ_1M }, -+}; -+ -+static struct mtd_partition partition_info32M[] = { -+ { .name = "toto kernel partition 1", -+ .offset = 0, -+ .size = 2 * SZ_1M }, -+ { .name = "toto file sys partition 2", -+ .offset = 2 * SZ_1M, -+ .size = 14 * SZ_1M }, -+ { .name = "toto user partition 3", -+ .offset = 16 * SZ_1M, -+ .size = 16 * SZ_1M }, -+}; -+ -+#define NUM_PARTITIONS32M 3 -+#define NUM_PARTITIONS64M 4 -+/* -+ * hardware specific access to control-lines -+*/ -+ -+static void toto_hwcontrol(struct mtd_info *mtd, int cmd) -+{ -+ -+ udelay(1); /* hopefully enough time for tc make proceding write to clear */ -+ switch(cmd){ -+ -+ case NAND_CTL_SETCLE: T_NAND_CTL_SETCLE(cmd); break; -+ case NAND_CTL_CLRCLE: T_NAND_CTL_CLRCLE(cmd); break; -+ -+ case NAND_CTL_SETALE: T_NAND_CTL_SETALE(cmd); break; -+ case NAND_CTL_CLRALE: T_NAND_CTL_CLRALE(cmd); break; -+ -+ case NAND_CTL_SETNCE: T_NAND_CTL_SETNCE(cmd); break; -+ case NAND_CTL_CLRNCE: T_NAND_CTL_CLRNCE(cmd); break; -+ } -+ udelay(1); /* allow time to ensure gpio state to over take memory write */ -+} -+ -+/* -+ * Main initialization routine -+ */ -+int __init toto_init (void) -+{ -+ struct nand_chip *this; -+ int err = 0; -+ -+ /* Allocate memory for MTD device structure and private data */ -+ toto_mtd = (struct mtd_info *) kmalloc (sizeof(struct mtd_info) + sizeof (struct nand_chip), -+ GFP_KERNEL); -+ if (!toto_mtd) { -+ printk (KERN_WARNING "Unable to allocate toto NAND MTD device structure.\n"); -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ /* Get pointer to private data */ -+ this = (struct nand_chip *) (&toto_mtd[1]); -+ -+ /* Initialize structures */ -+ memset((char *) toto_mtd, 0, sizeof(struct mtd_info)); -+ memset((char *) this, 0, sizeof(struct nand_chip)); -+ -+ /* Link the private data with the MTD structure */ -+ toto_mtd->priv = this; -+ -+ /* Set address of NAND IO lines */ -+ this->IO_ADDR_R = toto_io_base; -+ this->IO_ADDR_W = toto_io_base; -+ this->hwcontrol = toto_hwcontrol; -+ this->dev_ready = NULL; -+ /* 25 us command delay time */ -+ this->chip_delay = 30; -+ this->eccmode = NAND_ECC_SOFT; -+ -+ /* Scan to find existance of the device */ -+ if (nand_scan (toto_mtd, 1)) { -+ err = -ENXIO; -+ goto out_mtd; -+ } -+ -+ /* Register the partitions */ -+ switch(toto_mtd->size){ -+ case SZ_64M: add_mtd_partitions(toto_mtd, partition_info64M, NUM_PARTITIONS64M); break; -+ case SZ_32M: add_mtd_partitions(toto_mtd, partition_info32M, NUM_PARTITIONS32M); break; -+ default: { -+ printk (KERN_WARNING "Unsupported Nand device\n"); -+ err = -ENXIO; -+ goto out_buf; -+ } -+ } -+ -+ gpioreserve(NAND_MASK); /* claim our gpios */ -+ archflashwp(0,0); /* open up flash for writing */ -+ -+ goto out; -+ -+out_buf: -+ kfree (this->data_buf); -+out_mtd: -+ kfree (toto_mtd); -+out: -+ return err; -+} -+ -+module_init(toto_init); -+ -+/* -+ * Clean up routine -+ */ -+static void __exit toto_cleanup (void) -+{ -+ /* Release resources, unregister device */ -+ nand_release (toto_mtd); -+ -+ /* Free the MTD device structure */ -+ kfree (toto_mtd); -+ -+ /* stop flash writes */ -+ archflashwp(0,1); -+ -+ /* release gpios to system */ -+ gpiorelease(NAND_MASK); -+} -+module_exit(toto_cleanup); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Richard Woodruff <r-woodruff2@ti.com>"); -+MODULE_DESCRIPTION("Glue layer for NAND flash on toto board"); -Index: linux-2.6.5/drivers/mtd/nand/tx4925ndfmc.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/tx4925ndfmc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/tx4925ndfmc.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,416 @@ -+/* -+ * drivers/mtd/tx4925ndfmc.c -+ * -+ * Overview: -+ * This is a device driver for the NAND flash device found on the -+ * Toshiba RBTX4925 reference board, which is a SmartMediaCard. It supports -+ * 16MiB, 32MiB and 64MiB cards. -+ * -+ * Author: MontaVista Software, Inc. source@mvista.com -+ * -+ * Derived from drivers/mtd/autcpu12.c -+ * Copyright (c) 2001 Thomas Gleixner (gleixner@autronix.de) -+ * -+ * $Id: tx4925ndfmc.c,v 1.4 2004/09/16 23:27:15 gleixner Exp $ -+ * -+ * Copyright (C) 2001 Toshiba Corporation -+ * -+ * 2003 (c) MontaVista Software, Inc. This file is licensed under -+ * the terms of the GNU General Public License version 2. This program -+ * is licensed "as is" without any warranty of any kind, whether express -+ * or implied. -+ * -+ */ -+ -+#include <linux/slab.h> -+#include <linux/init.h> -+#include <linux/module.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/nand.h> -+#include <linux/mtd/partitions.h> -+#include <linux/delay.h> -+#include <asm/io.h> -+#include <asm/tx4925/tx4925_nand.h> -+ -+extern struct nand_oobinfo jffs2_oobinfo; -+ -+/* -+ * MTD structure for RBTX4925 board -+ */ -+static struct mtd_info *tx4925ndfmc_mtd = NULL; -+ -+/* -+ * Define partitions for flash devices -+ */ -+ -+static struct mtd_partition partition_info16k[] = { -+ { .name = "RBTX4925 flash partition 1", -+ .offset = 0, -+ .size = 8 * 0x00100000 }, -+ { .name = "RBTX4925 flash partition 2", -+ .offset = 8 * 0x00100000, -+ .size = 8 * 0x00100000 }, -+}; -+ -+static struct mtd_partition partition_info32k[] = { -+ { .name = "RBTX4925 flash partition 1", -+ .offset = 0, -+ .size = 8 * 0x00100000 }, -+ { .name = "RBTX4925 flash partition 2", -+ .offset = 8 * 0x00100000, -+ .size = 24 * 0x00100000 }, -+}; -+ -+static struct mtd_partition partition_info64k[] = { -+ { .name = "User FS", -+ .offset = 0, -+ .size = 16 * 0x00100000 }, -+ { .name = "RBTX4925 flash partition 2", -+ .offset = 16 * 0x00100000, -+ .size = 48 * 0x00100000}, -+}; -+ -+static struct mtd_partition partition_info128k[] = { -+ { .name = "Skip bad section", -+ .offset = 0, -+ .size = 16 * 0x00100000 }, -+ { .name = "User FS", -+ .offset = 16 * 0x00100000, -+ .size = 112 * 0x00100000 }, -+}; -+#define NUM_PARTITIONS16K 2 -+#define NUM_PARTITIONS32K 2 -+#define NUM_PARTITIONS64K 2 -+#define NUM_PARTITIONS128K 2 -+ -+/* -+ * hardware specific access to control-lines -+*/ -+static void tx4925ndfmc_hwcontrol(struct mtd_info *mtd, int cmd) -+{ -+ -+ switch(cmd){ -+ -+ case NAND_CTL_SETCLE: -+ tx4925_ndfmcptr->mcr |= TX4925_NDFMCR_CLE; -+ break; -+ case NAND_CTL_CLRCLE: -+ tx4925_ndfmcptr->mcr &= ~TX4925_NDFMCR_CLE; -+ break; -+ case NAND_CTL_SETALE: -+ tx4925_ndfmcptr->mcr |= TX4925_NDFMCR_ALE; -+ break; -+ case NAND_CTL_CLRALE: -+ tx4925_ndfmcptr->mcr &= ~TX4925_NDFMCR_ALE; -+ break; -+ case NAND_CTL_SETNCE: -+ tx4925_ndfmcptr->mcr |= TX4925_NDFMCR_CE; -+ break; -+ case NAND_CTL_CLRNCE: -+ tx4925_ndfmcptr->mcr &= ~TX4925_NDFMCR_CE; -+ break; -+ case NAND_CTL_SETWP: -+ tx4925_ndfmcptr->mcr |= TX4925_NDFMCR_WE; -+ break; -+ case NAND_CTL_CLRWP: -+ tx4925_ndfmcptr->mcr &= ~TX4925_NDFMCR_WE; -+ break; -+ } -+} -+ -+/* -+* read device ready pin -+*/ -+static int tx4925ndfmc_device_ready(struct mtd_info *mtd) -+{ -+ int ready; -+ ready = (tx4925_ndfmcptr->sr & TX4925_NDSFR_BUSY) ? 0 : 1; -+ return ready; -+} -+void tx4925ndfmc_enable_hwecc(struct mtd_info *mtd, int mode) -+{ -+ /* reset first */ -+ tx4925_ndfmcptr->mcr |= TX4925_NDFMCR_ECC_CNTL_MASK; -+ tx4925_ndfmcptr->mcr &= ~TX4925_NDFMCR_ECC_CNTL_MASK; -+ tx4925_ndfmcptr->mcr |= TX4925_NDFMCR_ECC_CNTL_ENAB; -+} -+static void tx4925ndfmc_disable_ecc(void) -+{ -+ tx4925_ndfmcptr->mcr &= ~TX4925_NDFMCR_ECC_CNTL_MASK; -+} -+static void tx4925ndfmc_enable_read_ecc(void) -+{ -+ tx4925_ndfmcptr->mcr &= ~TX4925_NDFMCR_ECC_CNTL_MASK; -+ tx4925_ndfmcptr->mcr |= TX4925_NDFMCR_ECC_CNTL_READ; -+} -+void tx4925ndfmc_readecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code){ -+ int i; -+ u_char *ecc = ecc_code; -+ tx4925ndfmc_enable_read_ecc(); -+ for (i = 0;i < 6;i++,ecc++) -+ *ecc = tx4925_read_nfmc(&(tx4925_ndfmcptr->dtr)); -+ tx4925ndfmc_disable_ecc(); -+} -+void tx4925ndfmc_device_setup(void) -+{ -+ -+ *(unsigned char *)0xbb005000 &= ~0x08; -+ -+ /* reset NDFMC */ -+ tx4925_ndfmcptr->rstr |= TX4925_NDFRSTR_RST; -+ while (tx4925_ndfmcptr->rstr & TX4925_NDFRSTR_RST); -+ -+ /* setup BusSeparete, Hold Time, Strobe Pulse Width */ -+ tx4925_ndfmcptr->mcr = TX4925_BSPRT ? TX4925_NDFMCR_BSPRT : 0; -+ tx4925_ndfmcptr->spr = TX4925_HOLD << 4 | TX4925_SPW; -+} -+static u_char tx4925ndfmc_nand_read_byte(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ return tx4925_read_nfmc(this->IO_ADDR_R); -+} -+ -+static void tx4925ndfmc_nand_write_byte(struct mtd_info *mtd, u_char byte) -+{ -+ struct nand_chip *this = mtd->priv; -+ tx4925_write_nfmc(byte, this->IO_ADDR_W); -+} -+ -+static void tx4925ndfmc_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ -+ for (i=0; i<len; i++) -+ tx4925_write_nfmc(buf[i], this->IO_ADDR_W); -+} -+ -+static void tx4925ndfmc_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ -+ for (i=0; i<len; i++) -+ buf[i] = tx4925_read_nfmc(this->IO_ADDR_R); -+} -+ -+static int tx4925ndfmc_nand_verify_buf(struct mtd_info *mtd, const u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ -+ for (i=0; i<len; i++) -+ if (buf[i] != tx4925_read_nfmc(this->IO_ADDR_R)) -+ return -EFAULT; -+ -+ return 0; -+} -+ -+/* -+ * Send command to NAND device -+ */ -+static void tx4925ndfmc_nand_command (struct mtd_info *mtd, unsigned command, int column, int page_addr) -+{ -+ register struct nand_chip *this = mtd->priv; -+ -+ /* Begin command latch cycle */ -+ this->hwcontrol(mtd, NAND_CTL_SETCLE); -+ /* -+ * Write out the command to the device. -+ */ -+ if (command == NAND_CMD_SEQIN) { -+ int readcmd; -+ -+ if (column >= mtd->oobblock) { -+ /* OOB area */ -+ column -= mtd->oobblock; -+ readcmd = NAND_CMD_READOOB; -+ } else if (column < 256) { -+ /* First 256 bytes --> READ0 */ -+ readcmd = NAND_CMD_READ0; -+ } else { -+ column -= 256; -+ readcmd = NAND_CMD_READ1; -+ } -+ this->write_byte(mtd, readcmd); -+ } -+ this->write_byte(mtd, command); -+ -+ /* Set ALE and clear CLE to start address cycle */ -+ this->hwcontrol(mtd, NAND_CTL_CLRCLE); -+ -+ if (column != -1 || page_addr != -1) { -+ this->hwcontrol(mtd, NAND_CTL_SETALE); -+ -+ /* Serially input address */ -+ if (column != -1) -+ this->write_byte(mtd, column); -+ if (page_addr != -1) { -+ this->write_byte(mtd, (unsigned char) (page_addr & 0xff)); -+ this->write_byte(mtd, (unsigned char) ((page_addr >> 8) & 0xff)); -+ /* One more address cycle for higher density devices */ -+ if (mtd->size & 0x0c000000) -+ this->write_byte(mtd, (unsigned char) ((page_addr >> 16) & 0x0f)); -+ } -+ /* Latch in address */ -+ this->hwcontrol(mtd, NAND_CTL_CLRALE); -+ } -+ -+ /* -+ * program and erase have their own busy handlers -+ * status and sequential in needs no delay -+ */ -+ switch (command) { -+ -+ case NAND_CMD_PAGEPROG: -+ /* Turn off WE */ -+ this->hwcontrol (mtd, NAND_CTL_CLRWP); -+ return; -+ -+ case NAND_CMD_SEQIN: -+ /* Turn on WE */ -+ this->hwcontrol (mtd, NAND_CTL_SETWP); -+ return; -+ -+ case NAND_CMD_ERASE1: -+ case NAND_CMD_ERASE2: -+ case NAND_CMD_STATUS: -+ return; -+ -+ case NAND_CMD_RESET: -+ if (this->dev_ready) -+ break; -+ this->hwcontrol(mtd, NAND_CTL_SETCLE); -+ this->write_byte(mtd, NAND_CMD_STATUS); -+ this->hwcontrol(mtd, NAND_CTL_CLRCLE); -+ while ( !(this->read_byte(mtd) & 0x40)); -+ return; -+ -+ /* This applies to read commands */ -+ default: -+ /* -+ * If we don't have access to the busy pin, we apply the given -+ * command delay -+ */ -+ if (!this->dev_ready) { -+ udelay (this->chip_delay); -+ return; -+ } -+ } -+ -+ /* wait until command is processed */ -+ while (!this->dev_ready(mtd)); -+} -+ -+#ifdef CONFIG_MTD_CMDLINE_PARTS -+extern int parse_cmdline_partitions(struct mtd_info *master, struct mtd_partitio -+n **pparts, char *); -+#endif -+ -+/* -+ * Main initialization routine -+ */ -+extern int nand_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc, u_char *calc_ecc); -+int __init tx4925ndfmc_init (void) -+{ -+ struct nand_chip *this; -+ int err = 0; -+ -+ /* Allocate memory for MTD device structure and private data */ -+ tx4925ndfmc_mtd = (struct mtd_info *) kmalloc (sizeof(struct mtd_info) + sizeof (struct nand_chip), -+ GFP_KERNEL); -+ if (!tx4925ndfmc_mtd) { -+ printk ("Unable to allocate RBTX4925 NAND MTD device structure.\n"); -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ tx4925ndfmc_device_setup(); -+ -+ /* io is indirect via a register so don't need to ioremap address */ -+ -+ /* Get pointer to private data */ -+ this = (struct nand_chip *) (&tx4925ndfmc_mtd[1]); -+ -+ /* Initialize structures */ -+ memset((char *) tx4925ndfmc_mtd, 0, sizeof(struct mtd_info)); -+ memset((char *) this, 0, sizeof(struct nand_chip)); -+ -+ /* Link the private data with the MTD structure */ -+ tx4925ndfmc_mtd->priv = this; -+ -+ /* Set address of NAND IO lines */ -+ this->IO_ADDR_R = (void __iomem *)&(tx4925_ndfmcptr->dtr); -+ this->IO_ADDR_W = (void __iomem *)&(tx4925_ndfmcptr->dtr); -+ this->hwcontrol = tx4925ndfmc_hwcontrol; -+ this->enable_hwecc = tx4925ndfmc_enable_hwecc; -+ this->calculate_ecc = tx4925ndfmc_readecc; -+ this->correct_data = nand_correct_data; -+ this->eccmode = NAND_ECC_HW6_512; -+ this->dev_ready = tx4925ndfmc_device_ready; -+ /* 20 us command delay time */ -+ this->chip_delay = 20; -+ this->read_byte = tx4925ndfmc_nand_read_byte; -+ this->write_byte = tx4925ndfmc_nand_write_byte; -+ this->cmdfunc = tx4925ndfmc_nand_command; -+ this->write_buf = tx4925ndfmc_nand_write_buf; -+ this->read_buf = tx4925ndfmc_nand_read_buf; -+ this->verify_buf = tx4925ndfmc_nand_verify_buf; -+ -+ /* Scan to find existance of the device */ -+ if (nand_scan (tx4925ndfmc_mtd, 1)) { -+ err = -ENXIO; -+ goto out_ior; -+ } -+ -+ /* Register the partitions */ -+#ifdef CONFIG_MTD_CMDLINE_PARTS -+ { -+ int mtd_parts_nb = 0; -+ struct mtd_partition *mtd_parts = 0; -+ mtd_parts_nb = parse_cmdline_partitions(tx4925ndfmc_mtd, &mtd_parts, "tx4925ndfmc"); -+ if (mtd_parts_nb > 0) -+ add_mtd_partitions(tx4925ndfmc_mtd, mtd_parts, mtd_parts_nb); -+ else -+ add_mtd_device(tx4925ndfmc_mtd); -+ } -+#else /* ifdef CONFIG_MTD_CMDLINE_PARTS */ -+ switch(tx4925ndfmc_mtd->size){ -+ case 0x01000000: add_mtd_partitions(tx4925ndfmc_mtd, partition_info16k, NUM_PARTITIONS16K); break; -+ case 0x02000000: add_mtd_partitions(tx4925ndfmc_mtd, partition_info32k, NUM_PARTITIONS32K); break; -+ case 0x04000000: add_mtd_partitions(tx4925ndfmc_mtd, partition_info64k, NUM_PARTITIONS64K); break; -+ case 0x08000000: add_mtd_partitions(tx4925ndfmc_mtd, partition_info128k, NUM_PARTITIONS128K); break; -+ default: { -+ printk ("Unsupported SmartMedia device\n"); -+ err = -ENXIO; -+ goto out_ior; -+ } -+ } -+#endif /* ifdef CONFIG_MTD_CMDLINE_PARTS */ -+ goto out; -+ -+out_ior: -+out: -+ return err; -+} -+ -+module_init(tx4925ndfmc_init); -+ -+/* -+ * Clean up routine -+ */ -+#ifdef MODULE -+static void __exit tx4925ndfmc_cleanup (void) -+{ -+ /* Release resources, unregister device */ -+ nand_release (tx4925ndfmc_mtd); -+ -+ /* Free the MTD device structure */ -+ kfree (tx4925ndfmc_mtd); -+} -+module_exit(tx4925ndfmc_cleanup); -+#endif -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Alice Hennessy <ahennessy@mvista.com>"); -+MODULE_DESCRIPTION("Glue layer for SmartMediaCard on Toshiba RBTX4925"); -Index: linux-2.6.5/drivers/mtd/nand/tx4938ndfmc.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nand/tx4938ndfmc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nand/tx4938ndfmc.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,406 @@ -+/* -+ * drivers/mtd/nand/tx4938ndfmc.c -+ * -+ * Overview: -+ * This is a device driver for the NAND flash device connected to -+ * TX4938 internal NAND Memory Controller. -+ * TX4938 NDFMC is almost same as TX4925 NDFMC, but register size are 64 bit. -+ * -+ * Author: source@mvista.com -+ * -+ * Based on spia.c by Steven J. Hill -+ * -+ * $Id: tx4938ndfmc.c,v 1.3 2004/09/16 23:27:15 gleixner Exp $ -+ * -+ * Copyright (C) 2000-2001 Toshiba Corporation -+ * -+ * 2003 (c) MontaVista Software, Inc. This file is licensed under the -+ * terms of the GNU General Public License version 2. This program is -+ * licensed "as is" without any warranty of any kind, whether express -+ * or implied. -+ */ -+#include <linux/config.h> -+#include <linux/slab.h> -+#include <linux/init.h> -+#include <linux/module.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/nand.h> -+#include <linux/mtd/nand_ecc.h> -+#include <linux/mtd/partitions.h> -+#include <asm/io.h> -+#include <asm/bootinfo.h> -+#include <linux/delay.h> -+#include <asm/tx4938/rbtx4938.h> -+ -+extern struct nand_oobinfo jffs2_oobinfo; -+ -+/* -+ * MTD structure for TX4938 NDFMC -+ */ -+static struct mtd_info *tx4938ndfmc_mtd; -+ -+/* -+ * Define partitions for flash device -+ */ -+#define flush_wb() (void)tx4938_ndfmcptr->mcr; -+ -+#define NUM_PARTITIONS 3 -+#define NUMBER_OF_CIS_BLOCKS 24 -+#define SIZE_OF_BLOCK 0x00004000 -+#define NUMBER_OF_BLOCK_PER_ZONE 1024 -+#define SIZE_OF_ZONE (NUMBER_OF_BLOCK_PER_ZONE * SIZE_OF_BLOCK) -+#ifndef CONFIG_MTD_CMDLINE_PARTS -+/* -+ * You can use the following sample of MTD partitions -+ * on the NAND Flash Memory 32MB or more. -+ * -+ * The following figure shows the image of the sample partition on -+ * the 32MB NAND Flash Memory. -+ * -+ * Block No. -+ * 0 +-----------------------------+ ------ -+ * | CIS | ^ -+ * 24 +-----------------------------+ | -+ * | kernel image | | Zone 0 -+ * | | | -+ * +-----------------------------+ | -+ * 1023 | unused area | v -+ * +-----------------------------+ ------ -+ * 1024 | JFFS2 | ^ -+ * | | | -+ * | | | Zone 1 -+ * | | | -+ * | | | -+ * | | v -+ * 2047 +-----------------------------+ ------ -+ * -+ */ -+static struct mtd_partition partition_info[NUM_PARTITIONS] = { -+ { -+ .name = "RBTX4938 CIS Area", -+ .offset = 0, -+ .size = (NUMBER_OF_CIS_BLOCKS * SIZE_OF_BLOCK), -+ .mask_flags = MTD_WRITEABLE /* This partition is NOT writable */ -+ }, -+ { -+ .name = "RBTX4938 kernel image", -+ .offset = MTDPART_OFS_APPEND, -+ .size = 8 * 0x00100000, /* 8MB (Depends on size of kernel image) */ -+ .mask_flags = MTD_WRITEABLE /* This partition is NOT writable */ -+ }, -+ { -+ .name = "Root FS (JFFS2)", -+ .offset = (0 + SIZE_OF_ZONE), /* start address of next zone */ -+ .size = MTDPART_SIZ_FULL -+ }, -+}; -+#endif -+ -+static void tx4938ndfmc_hwcontrol(struct mtd_info *mtd, int cmd) -+{ -+ switch (cmd) { -+ case NAND_CTL_SETCLE: -+ tx4938_ndfmcptr->mcr |= TX4938_NDFMCR_CLE; -+ break; -+ case NAND_CTL_CLRCLE: -+ tx4938_ndfmcptr->mcr &= ~TX4938_NDFMCR_CLE; -+ break; -+ case NAND_CTL_SETALE: -+ tx4938_ndfmcptr->mcr |= TX4938_NDFMCR_ALE; -+ break; -+ case NAND_CTL_CLRALE: -+ tx4938_ndfmcptr->mcr &= ~TX4938_NDFMCR_ALE; -+ break; -+ /* TX4938_NDFMCR_CE bit is 0:high 1:low */ -+ case NAND_CTL_SETNCE: -+ tx4938_ndfmcptr->mcr |= TX4938_NDFMCR_CE; -+ break; -+ case NAND_CTL_CLRNCE: -+ tx4938_ndfmcptr->mcr &= ~TX4938_NDFMCR_CE; -+ break; -+ case NAND_CTL_SETWP: -+ tx4938_ndfmcptr->mcr |= TX4938_NDFMCR_WE; -+ break; -+ case NAND_CTL_CLRWP: -+ tx4938_ndfmcptr->mcr &= ~TX4938_NDFMCR_WE; -+ break; -+ } -+} -+static int tx4938ndfmc_dev_ready(struct mtd_info *mtd) -+{ -+ flush_wb(); -+ return !(tx4938_ndfmcptr->sr & TX4938_NDFSR_BUSY); -+} -+static void tx4938ndfmc_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code) -+{ -+ u32 mcr = tx4938_ndfmcptr->mcr; -+ mcr &= ~TX4938_NDFMCR_ECC_ALL; -+ tx4938_ndfmcptr->mcr = mcr | TX4938_NDFMCR_ECC_OFF; -+ tx4938_ndfmcptr->mcr = mcr | TX4938_NDFMCR_ECC_READ; -+ ecc_code[1] = tx4938_ndfmcptr->dtr; -+ ecc_code[0] = tx4938_ndfmcptr->dtr; -+ ecc_code[2] = tx4938_ndfmcptr->dtr; -+ tx4938_ndfmcptr->mcr = mcr | TX4938_NDFMCR_ECC_OFF; -+} -+static void tx4938ndfmc_enable_hwecc(struct mtd_info *mtd, int mode) -+{ -+ u32 mcr = tx4938_ndfmcptr->mcr; -+ mcr &= ~TX4938_NDFMCR_ECC_ALL; -+ tx4938_ndfmcptr->mcr = mcr | TX4938_NDFMCR_ECC_RESET; -+ tx4938_ndfmcptr->mcr = mcr | TX4938_NDFMCR_ECC_OFF; -+ tx4938_ndfmcptr->mcr = mcr | TX4938_NDFMCR_ECC_ON; -+} -+ -+static u_char tx4938ndfmc_nand_read_byte(struct mtd_info *mtd) -+{ -+ struct nand_chip *this = mtd->priv; -+ return tx4938_read_nfmc(this->IO_ADDR_R); -+} -+ -+static void tx4938ndfmc_nand_write_byte(struct mtd_info *mtd, u_char byte) -+{ -+ struct nand_chip *this = mtd->priv; -+ tx4938_write_nfmc(byte, this->IO_ADDR_W); -+} -+ -+static void tx4938ndfmc_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ -+ for (i=0; i<len; i++) -+ tx4938_write_nfmc(buf[i], this->IO_ADDR_W); -+} -+ -+static void tx4938ndfmc_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ -+ for (i=0; i<len; i++) -+ buf[i] = tx4938_read_nfmc(this->IO_ADDR_R); -+} -+ -+static int tx4938ndfmc_nand_verify_buf(struct mtd_info *mtd, const u_char *buf, int len) -+{ -+ int i; -+ struct nand_chip *this = mtd->priv; -+ -+ for (i=0; i<len; i++) -+ if (buf[i] != tx4938_read_nfmc(this->IO_ADDR_R)) -+ return -EFAULT; -+ -+ return 0; -+} -+ -+/* -+ * Send command to NAND device -+ */ -+static void tx4938ndfmc_nand_command (struct mtd_info *mtd, unsigned command, int column, int page_addr) -+{ -+ register struct nand_chip *this = mtd->priv; -+ -+ /* Begin command latch cycle */ -+ this->hwcontrol(mtd, NAND_CTL_SETCLE); -+ /* -+ * Write out the command to the device. -+ */ -+ if (command == NAND_CMD_SEQIN) { -+ int readcmd; -+ -+ if (column >= mtd->oobblock) { -+ /* OOB area */ -+ column -= mtd->oobblock; -+ readcmd = NAND_CMD_READOOB; -+ } else if (column < 256) { -+ /* First 256 bytes --> READ0 */ -+ readcmd = NAND_CMD_READ0; -+ } else { -+ column -= 256; -+ readcmd = NAND_CMD_READ1; -+ } -+ this->write_byte(mtd, readcmd); -+ } -+ this->write_byte(mtd, command); -+ -+ /* Set ALE and clear CLE to start address cycle */ -+ this->hwcontrol(mtd, NAND_CTL_CLRCLE); -+ -+ if (column != -1 || page_addr != -1) { -+ this->hwcontrol(mtd, NAND_CTL_SETALE); -+ -+ /* Serially input address */ -+ if (column != -1) -+ this->write_byte(mtd, column); -+ if (page_addr != -1) { -+ this->write_byte(mtd, (unsigned char) (page_addr & 0xff)); -+ this->write_byte(mtd, (unsigned char) ((page_addr >> 8) & 0xff)); -+ /* One more address cycle for higher density devices */ -+ if (mtd->size & 0x0c000000) -+ this->write_byte(mtd, (unsigned char) ((page_addr >> 16) & 0x0f)); -+ } -+ /* Latch in address */ -+ this->hwcontrol(mtd, NAND_CTL_CLRALE); -+ } -+ -+ /* -+ * program and erase have their own busy handlers -+ * status and sequential in needs no delay -+ */ -+ switch (command) { -+ -+ case NAND_CMD_PAGEPROG: -+ /* Turn off WE */ -+ this->hwcontrol (mtd, NAND_CTL_CLRWP); -+ return; -+ -+ case NAND_CMD_SEQIN: -+ /* Turn on WE */ -+ this->hwcontrol (mtd, NAND_CTL_SETWP); -+ return; -+ -+ case NAND_CMD_ERASE1: -+ case NAND_CMD_ERASE2: -+ case NAND_CMD_STATUS: -+ return; -+ -+ case NAND_CMD_RESET: -+ if (this->dev_ready) -+ break; -+ this->hwcontrol(mtd, NAND_CTL_SETCLE); -+ this->write_byte(mtd, NAND_CMD_STATUS); -+ this->hwcontrol(mtd, NAND_CTL_CLRCLE); -+ while ( !(this->read_byte(mtd) & 0x40)); -+ return; -+ -+ /* This applies to read commands */ -+ default: -+ /* -+ * If we don't have access to the busy pin, we apply the given -+ * command delay -+ */ -+ if (!this->dev_ready) { -+ udelay (this->chip_delay); -+ return; -+ } -+ } -+ -+ /* wait until command is processed */ -+ while (!this->dev_ready(mtd)); -+} -+ -+#ifdef CONFIG_MTD_CMDLINE_PARTS -+extern int parse_cmdline_partitions(struct mtd_info *master, struct mtd_partition **pparts, char *); -+#endif -+/* -+ * Main initialization routine -+ */ -+int __init tx4938ndfmc_init (void) -+{ -+ struct nand_chip *this; -+ int bsprt = 0, hold = 0xf, spw = 0xf; -+ int protected = 0; -+ -+ if ((*rbtx4938_piosel_ptr & 0x0c) != 0x08) { -+ printk("TX4938 NDFMC: disabled by IOC PIOSEL\n"); -+ return -ENODEV; -+ } -+ bsprt = 1; -+ hold = 2; -+ spw = 9 - 1; /* 8 GBUSCLK = 80ns (@ GBUSCLK 100MHz) */ -+ -+ if ((tx4938_ccfgptr->pcfg & -+ (TX4938_PCFG_ATA_SEL|TX4938_PCFG_ISA_SEL|TX4938_PCFG_NDF_SEL)) -+ != TX4938_PCFG_NDF_SEL) { -+ printk("TX4938 NDFMC: disabled by PCFG.\n"); -+ return -ENODEV; -+ } -+ -+ /* reset NDFMC */ -+ tx4938_ndfmcptr->rstr |= TX4938_NDFRSTR_RST; -+ while (tx4938_ndfmcptr->rstr & TX4938_NDFRSTR_RST) -+ ; -+ /* setup BusSeparete, Hold Time, Strobe Pulse Width */ -+ tx4938_ndfmcptr->mcr = bsprt ? TX4938_NDFMCR_BSPRT : 0; -+ tx4938_ndfmcptr->spr = hold << 4 | spw; -+ -+ /* Allocate memory for MTD device structure and private data */ -+ tx4938ndfmc_mtd = (struct mtd_info *) kmalloc (sizeof(struct mtd_info) + sizeof (struct nand_chip), -+ GFP_KERNEL); -+ if (!tx4938ndfmc_mtd) { -+ printk ("Unable to allocate TX4938 NDFMC MTD device structure.\n"); -+ return -ENOMEM; -+ } -+ -+ /* Get pointer to private data */ -+ this = (struct nand_chip *) (&tx4938ndfmc_mtd[1]); -+ -+ /* Initialize structures */ -+ memset((char *) tx4938ndfmc_mtd, 0, sizeof(struct mtd_info)); -+ memset((char *) this, 0, sizeof(struct nand_chip)); -+ -+ /* Link the private data with the MTD structure */ -+ tx4938ndfmc_mtd->priv = this; -+ -+ /* Set address of NAND IO lines */ -+ this->IO_ADDR_R = (unsigned long)&tx4938_ndfmcptr->dtr; -+ this->IO_ADDR_W = (unsigned long)&tx4938_ndfmcptr->dtr; -+ this->hwcontrol = tx4938ndfmc_hwcontrol; -+ this->dev_ready = tx4938ndfmc_dev_ready; -+ this->calculate_ecc = tx4938ndfmc_calculate_ecc; -+ this->correct_data = nand_correct_data; -+ this->enable_hwecc = tx4938ndfmc_enable_hwecc; -+ this->eccmode = NAND_ECC_HW3_256; -+ this->chip_delay = 100; -+ this->read_byte = tx4938ndfmc_nand_read_byte; -+ this->write_byte = tx4938ndfmc_nand_write_byte; -+ this->cmdfunc = tx4938ndfmc_nand_command; -+ this->write_buf = tx4938ndfmc_nand_write_buf; -+ this->read_buf = tx4938ndfmc_nand_read_buf; -+ this->verify_buf = tx4938ndfmc_nand_verify_buf; -+ -+ /* Scan to find existance of the device */ -+ if (nand_scan (tx4938ndfmc_mtd, 1)) { -+ kfree (tx4938ndfmc_mtd); -+ return -ENXIO; -+ } -+ -+ if (protected) { -+ printk(KERN_INFO "TX4938 NDFMC: write protected.\n"); -+ tx4938ndfmc_mtd->flags &= ~(MTD_WRITEABLE | MTD_ERASEABLE); -+ } -+ -+#ifdef CONFIG_MTD_CMDLINE_PARTS -+ { -+ int mtd_parts_nb = 0; -+ struct mtd_partition *mtd_parts = 0; -+ mtd_parts_nb = parse_cmdline_partitions(tx4938ndfmc_mtd, &mtd_parts, "tx4938ndfmc"); -+ if (mtd_parts_nb > 0) -+ add_mtd_partitions(tx4938ndfmc_mtd, mtd_parts, mtd_parts_nb); -+ else -+ add_mtd_device(tx4938ndfmc_mtd); -+ } -+#else -+ add_mtd_partitions(tx4938ndfmc_mtd, partition_info, NUM_PARTITIONS ); -+#endif -+ -+ return 0; -+} -+module_init(tx4938ndfmc_init); -+ -+/* -+ * Clean up routine -+ */ -+static void __exit tx4938ndfmc_cleanup (void) -+{ -+ /* Release resources, unregister device */ -+ nand_release (tx4938ndfmc_mtd); -+ -+ /* Free the MTD device structure */ -+ kfree (tx4938ndfmc_mtd); -+} -+module_exit(tx4938ndfmc_cleanup); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Alice Hennessy <ahennessy@mvista.com>"); -+MODULE_DESCRIPTION("Board-specific glue layer for NAND flash on TX4938 NDFMC"); -Index: linux-2.6.5/drivers/mtd/nftlcore.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nftlcore.c 2004-04-03 22:36:15.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nftlcore.c 2005-02-01 17:11:29.000000000 -0500 -@@ -1,7 +1,7 @@ - /* Linux driver for NAND Flash Translation Layer */ - /* (c) 1999 Machine Vision Holdings, Inc. */ - /* Author: David Woodhouse <dwmw2@infradead.org> */ --/* $Id: nftlcore.c,v 1.94 2003/06/23 12:00:08 dwmw2 Exp $ */ -+/* $Id: nftlcore.c,v 1.96 2004/06/28 13:52:55 dbrown Exp $ */ - - /* - The contents of this file are distributed under the GNU General -@@ -43,9 +43,19 @@ - struct NFTLrecord *nftl; - unsigned long temp; - -- if (mtd->ecctype != MTD_ECC_RS_DiskOnChip) -+ if (mtd->type != MTD_NANDFLASH) -+ return; -+ /* OK, this is moderately ugly. But probably safe. Alternatives? */ -+ if (memcmp(mtd->name, "DiskOnChip", 10)) - return; - -+ if (!mtd->block_isbad) { -+ printk(KERN_ERR -+"NFTL no longer supports the old DiskOnChip drivers loaded via docprobe.\n" -+"Please use the new diskonchip driver under the NAND subsystem.\n"); -+ return; -+ } -+ - DEBUG(MTD_DEBUG_LEVEL1, "NFTL: add_mtd for %s\n", mtd->name); - - nftl = kmalloc(sizeof(struct NFTLrecord), GFP_KERNEL); -@@ -60,6 +70,8 @@ - nftl->mbd.devnum = -1; - nftl->mbd.blksize = 512; - nftl->mbd.tr = tr; -+ memcpy(&nftl->oobinfo, &mtd->oobinfo, sizeof(struct nand_oobinfo)); -+ nftl->oobinfo.useecc = MTD_NANDECC_PLACEONLY; - - if (NFTL_mount(nftl) < 0) { - printk(KERN_WARNING "NFTL: could not mount device\n"); -@@ -350,17 +362,19 @@ - if (BlockMap[block] == BLOCK_NIL) - continue; - -- ret = MTD_READECC(nftl->mbd.mtd, (nftl->EraseSize * BlockMap[block]) + (block * 512), -- 512, &retlen, movebuf, (char *)&oob, NAND_ECC_DISKONCHIP); -+ ret = MTD_READ(nftl->mbd.mtd, (nftl->EraseSize * BlockMap[block]) + (block * 512), -+ 512, &retlen, movebuf); - if (ret < 0) { -- ret = MTD_READECC(nftl->mbd.mtd, (nftl->EraseSize * BlockMap[block]) -+ ret = MTD_READ(nftl->mbd.mtd, (nftl->EraseSize * BlockMap[block]) - + (block * 512), 512, &retlen, -- movebuf, (char *)&oob, NAND_ECC_DISKONCHIP); -+ movebuf); - if (ret != -EIO) - printk("Error went away on retry.\n"); - } -+ memset(&oob, 0xff, sizeof(struct nftl_oob)); -+ oob.b.Status = oob.b.Status1 = SECTOR_USED; - MTD_WRITEECC(nftl->mbd.mtd, (nftl->EraseSize * targetEUN) + (block * 512), -- 512, &retlen, movebuf, (char *)&oob, NAND_ECC_DISKONCHIP); -+ 512, &retlen, movebuf, (char *)&oob, &nftl->oobinfo); - } - - /* add the header so that it is now a valid chain */ -@@ -390,7 +404,6 @@ - - if (NFTL_formatblock(nftl, thisEUN) < 0) { - /* could not erase : mark block as reserved -- * FixMe: Update Bad Unit Table on disk - */ - nftl->ReplUnitTable[thisEUN] = BLOCK_RESERVED; - } else { -@@ -617,7 +630,7 @@ - u16 writeEUN; - unsigned long blockofs = (block * 512) & (nftl->EraseSize - 1); - size_t retlen; -- u8 eccbuf[6]; -+ struct nftl_oob oob; - - writeEUN = NFTL_findwriteunit(nftl, block); - -@@ -628,9 +641,11 @@ - return 1; - } - -+ memset(&oob, 0xff, sizeof(struct nftl_oob)); -+ oob.b.Status = oob.b.Status1 = SECTOR_USED; - MTD_WRITEECC(nftl->mbd.mtd, (writeEUN * nftl->EraseSize) + blockofs, -- 512, &retlen, (char *)buffer, (char *)eccbuf, NAND_ECC_DISKONCHIP); -- /* no need to write SECTOR_USED flags since they are written in mtd_writeecc */ -+ 512, &retlen, (char *)buffer, (char *)&oob, &nftl->oobinfo); -+ /* need to write SECTOR_USED flags since they are not written in mtd_writeecc */ - - return 0; - } -@@ -692,8 +707,7 @@ - } else { - loff_t ptr = (lastgoodEUN * nftl->EraseSize) + blockofs; - size_t retlen; -- u_char eccbuf[6]; -- if (MTD_READECC(nftl->mbd.mtd, ptr, 512, &retlen, buffer, eccbuf, NAND_ECC_DISKONCHIP)) -+ if (MTD_READ(nftl->mbd.mtd, ptr, 512, &retlen, buffer)) - return -EIO; - } - return 0; -@@ -735,7 +749,7 @@ - - int __init init_nftl(void) - { -- printk(KERN_INFO "NFTL driver: nftlcore.c $Revision: 1.94 $, nftlmount.c %s\n", nftlmountrev); -+ printk(KERN_INFO "NFTL driver: nftlcore.c $Revision: 1.96 $, nftlmount.c %s\n", nftlmountrev); - - return register_mtd_blktrans(&nftl_tr); - } -Index: linux-2.6.5/drivers/mtd/nftlmount.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/nftlmount.c 2004-04-03 22:37:36.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/nftlmount.c 2005-02-01 17:11:29.000000000 -0500 -@@ -4,7 +4,7 @@ - * Author: Fabrice Bellard (fabrice.bellard@netgem.com) - * Copyright (C) 2000 Netgem S.A. - * -- * $Id: nftlmount.c,v 1.34 2003/05/21 10:54:10 dwmw2 Exp $ -+ * $Id: nftlmount.c,v 1.37 2004/09/16 23:32:37 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by -@@ -31,7 +31,7 @@ - - #define SECTORSIZE 512 - --char nftlmountrev[]="$Revision: 1.34 $"; -+char nftlmountrev[]="$Revision: 1.37 $"; - - /* find_boot_record: Find the NFTL Media Header and its Spare copy which contains the - * various device information of the NFTL partition and Bad Unit Table. Update -@@ -41,7 +41,6 @@ - static int find_boot_record(struct NFTLrecord *nftl) - { - struct nftl_uci1 h1; -- struct nftl_oob oob; - unsigned int block, boot_record_count = 0; - size_t retlen; - u8 buf[SECTORSIZE]; -@@ -50,6 +49,10 @@ - - /* Assume logical EraseSize == physical erasesize for starting the scan. - We'll sort it out later if we find a MediaHeader which says otherwise */ -+ /* Actually, we won't. The new DiskOnChip driver has already scanned -+ the MediaHeader and adjusted the virtual erasesize it presents in -+ the mtd device accordingly. We could even get rid of -+ nftl->EraseSize if there were any point in doing so. */ - nftl->EraseSize = nftl->mbd.mtd->erasesize; - nftl->nb_blocks = nftl->mbd.mtd->size / nftl->EraseSize; - -@@ -62,7 +65,10 @@ - - /* Check for ANAND header first. Then can whinge if it's found but later - checks fail */ -- if ((ret = MTD_READ(nftl->mbd.mtd, block * nftl->EraseSize, SECTORSIZE, &retlen, buf))) { -+ ret = MTD_READ(nftl->mbd.mtd, block * nftl->EraseSize, SECTORSIZE, &retlen, buf); -+ /* We ignore ret in case the ECC of the MediaHeader is invalid -+ (which is apparently acceptable) */ -+ if (retlen != SECTORSIZE) { - static int warncount = 5; - - if (warncount) { -@@ -104,7 +110,7 @@ - - /* Finally reread to check ECC */ - if ((ret = MTD_READECC(nftl->mbd.mtd, block * nftl->EraseSize, SECTORSIZE, -- &retlen, buf, (char *)&oob, NAND_ECC_DISKONCHIP) < 0)) { -+ &retlen, buf, (char *)&oob, NULL) < 0)) { - printk(KERN_NOTICE "ANAND header found at 0x%x in mtd%d, but ECC read failed (err %d)\n", - block * nftl->EraseSize, nftl->mbd.mtd->index, ret); - continue; -@@ -149,6 +155,10 @@ - memcpy(mh, buf, sizeof(struct NFTLMediaHeader)); - - /* Do some sanity checks on it */ -+#if 0 -+The new DiskOnChip driver scans the MediaHeader itself, and presents a virtual -+erasesize based on UnitSizeFactor. So the erasesize we read from the mtd -+device is already correct. - if (mh->UnitSizeFactor == 0) { - printk(KERN_NOTICE "NFTL: UnitSizeFactor 0x00 detected. This violates the spec but we think we know what it means...\n"); - } else if (mh->UnitSizeFactor < 0xfc) { -@@ -161,6 +171,7 @@ - nftl->EraseSize = nftl->mbd.mtd->erasesize << (0xff - mh->UnitSizeFactor); - nftl->nb_blocks = nftl->mbd.mtd->size / nftl->EraseSize; - } -+#endif - nftl->nb_boot_blocks = le16_to_cpu(mh->FirstPhysicalEUN); - if ((nftl->nb_boot_blocks + 2) >= nftl->nb_blocks) { - printk(KERN_NOTICE "NFTL Media Header sanity check failed:\n"); -@@ -213,11 +224,13 @@ - - /* read the Bad Erase Unit Table and modify ReplUnitTable[] accordingly */ - for (i = 0; i < nftl->nb_blocks; i++) { -+#if 0 -+The new DiskOnChip driver already scanned the bad block table. Just query it. - if ((i & (SECTORSIZE - 1)) == 0) { - /* read one sector for every SECTORSIZE of blocks */ - if ((ret = MTD_READECC(nftl->mbd.mtd, block * nftl->EraseSize + - i + SECTORSIZE, SECTORSIZE, &retlen, buf, -- (char *)&oob, NAND_ECC_DISKONCHIP)) < 0) { -+ (char *)&oob, NULL)) < 0) { - printk(KERN_NOTICE "Read of bad sector table failed (err %d)\n", - ret); - kfree(nftl->ReplUnitTable); -@@ -228,6 +241,9 @@ - /* mark the Bad Erase Unit as RESERVED in ReplUnitTable */ - if (buf[i & (SECTORSIZE - 1)] != 0xff) - nftl->ReplUnitTable[i] = BLOCK_RESERVED; -+#endif -+ if (nftl->mbd.mtd->block_isbad(nftl->mbd.mtd, i * nftl->EraseSize)) -+ nftl->ReplUnitTable[i] = BLOCK_RESERVED; - } - - nftl->MediaUnit = block; -@@ -253,21 +269,16 @@ - int check_oob) - { - int i, retlen; -- u8 buf[SECTORSIZE]; -+ u8 buf[SECTORSIZE + nftl->mbd.mtd->oobsize]; - - for (i = 0; i < len; i += SECTORSIZE) { -- /* we want to read the sector without ECC check here since a free -- sector does not have ECC syndrome on it yet */ -- if (MTD_READ(nftl->mbd.mtd, address, SECTORSIZE, &retlen, buf) < 0) -+ if (MTD_READECC(nftl->mbd.mtd, address, SECTORSIZE, &retlen, buf, &buf[SECTORSIZE], &nftl->oobinfo) < 0) - return -1; - if (memcmpb(buf, 0xff, SECTORSIZE) != 0) - return -1; - - if (check_oob) { -- if (MTD_READOOB(nftl->mbd.mtd, address, nftl->mbd.mtd->oobsize, -- &retlen, buf) < 0) -- return -1; -- if (memcmpb(buf, 0xff, nftl->mbd.mtd->oobsize) != 0) -+ if (memcmpb(buf + SECTORSIZE, 0xff, nftl->mbd.mtd->oobsize) != 0) - return -1; - } - address += SECTORSIZE; -@@ -282,7 +293,6 @@ - * Return: 0 when succeed, -1 on error. - * - * ToDo: 1. Is it neceressary to check_free_sector after erasing ?? -- * 2. UnitSizeFactor != 0xFF - */ - int NFTL_formatblock(struct NFTLrecord *nftl, int block) - { -@@ -312,11 +322,10 @@ - MTD_ERASE(nftl->mbd.mtd, instr); - - if (instr->state == MTD_ERASE_FAILED) { -- /* could not format, FixMe: We should update the BadUnitTable -- both in memory and on disk */ - printk("Error while formatting block %d\n", block); -- return -1; -- } else { -+ goto fail; -+ } -+ - /* increase and write Wear-Leveling info */ - nb_erases = le32_to_cpu(uci.WearInfo); - nb_erases++; -@@ -329,14 +338,18 @@ - * FixMe: is this check really necessary ? since we have check the - * return code after the erase operation. */ - if (check_free_sectors(nftl, instr->addr, nftl->EraseSize, 1) != 0) -- return -1; -+ goto fail; - - uci.WearInfo = le32_to_cpu(nb_erases); - if (MTD_WRITEOOB(nftl->mbd.mtd, block * nftl->EraseSize + SECTORSIZE + 8, 8, - &retlen, (char *)&uci) < 0) -- return -1; -+ goto fail; - return 0; -- } -+fail: -+ /* could not format, update the bad block table (caller is responsible -+ for setting the ReplUnitTable to BLOCK_RESERVED on failure) */ -+ nftl->mbd.mtd->block_markbad(nftl->mbd.mtd, instr->addr); -+ return -1; - } - - /* check_sectors_in_chain: Check that each sector of a Virtual Unit Chain is correct. -@@ -441,8 +454,7 @@ - - printk("Formatting block %d\n", block); - if (NFTL_formatblock(nftl, block) < 0) { -- /* cannot format !!!! Mark it as Bad Unit, -- FixMe: update the BadUnitTable on disk */ -+ /* cannot format !!!! Mark it as Bad Unit */ - nftl->ReplUnitTable[block] = BLOCK_RESERVED; - } else { - nftl->ReplUnitTable[block] = BLOCK_FREE; -Index: linux-2.6.5/drivers/mtd/redboot.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/redboot.c 2004-04-03 22:37:23.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/redboot.c 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: redboot.c,v 1.11 2003/05/21 10:39:26 dwmw2 Exp $ -+ * $Id: redboot.c,v 1.15 2004/08/10 07:55:16 dwmw2 Exp $ - * - * Parse RedBoot-style Flash Image System (FIS) tables and - * produce a Linux partition array to match. -@@ -8,6 +8,7 @@ - #include <linux/kernel.h> - #include <linux/slab.h> - #include <linux/init.h> -+#include <linux/vmalloc.h> - - #include <linux/mtd/mtd.h> - #include <linux/mtd/partitions.h> -@@ -48,21 +49,24 @@ - char *names; - char *nullname; - int namelen = 0; -+ int nulllen = 0; -+#ifdef CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED - static char nullstring[] = "unallocated"; -+#endif - -- buf = kmalloc(PAGE_SIZE, GFP_KERNEL); -+ buf = vmalloc(master->erasesize); - - if (!buf) - return -ENOMEM; - - /* Read the start of the last erase block */ - ret = master->read(master, master->size - master->erasesize, -- PAGE_SIZE, &retlen, (void *)buf); -+ master->erasesize, &retlen, (void *)buf); - - if (ret) - goto out; - -- if (retlen != PAGE_SIZE) { -+ if (retlen != master->erasesize) { - ret = -EIO; - goto out; - } -@@ -80,7 +84,7 @@ - goto out; - } - -- for (i = 0; i < PAGE_SIZE / sizeof(struct fis_image_desc); i++) { -+ for (i = 0; i < master->erasesize / sizeof(struct fis_image_desc); i++) { - struct fis_list *new_fl, **prev; - - if (buf[i].name[0] == 0xff) -@@ -112,48 +116,69 @@ - - nrparts++; - } -- if (fl->img->flash_base) -+#ifdef CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED -+ if (fl->img->flash_base) { - nrparts++; -+ nulllen = sizeof(nullstring); -+ } - - for (tmp_fl = fl; tmp_fl->next; tmp_fl = tmp_fl->next) { -- if (tmp_fl->img->flash_base + tmp_fl->img->size + master->erasesize < tmp_fl->next->img->flash_base) -+ if (tmp_fl->img->flash_base + tmp_fl->img->size + master->erasesize <= tmp_fl->next->img->flash_base) { - nrparts++; -+ nulllen = sizeof(nullstring); -+ } - } -- parts = kmalloc(sizeof(*parts)*nrparts + sizeof(nullstring) + namelen, GFP_KERNEL); -+#endif -+ parts = kmalloc(sizeof(*parts)*nrparts + nulllen + namelen, GFP_KERNEL); - - if (!parts) { - ret = -ENOMEM; - goto out; - } - -- memset(parts, 0, sizeof(*parts)*nrparts + namelen); -+ memset(parts, 0, sizeof(*parts)*nrparts + nulllen + namelen); - -- /* FIXME: Include nullname only if it's used */ - nullname = (char *)&parts[nrparts]; -- sprintf(nullname, nullstring); -- names = nullname + sizeof(nullstring); -+#ifdef CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED -+ if (nulllen > 0) { -+ strcpy(nullname, nullstring); -+ } -+#endif -+ names = nullname + nulllen; - - i=0; - -+#ifdef CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED - if (fl->img->flash_base) { - parts[0].name = nullname; - parts[0].size = fl->img->flash_base; - parts[0].offset = 0; -+ i++; - } -+#endif - for ( ; i<nrparts; i++) { - parts[i].size = fl->img->size; - parts[i].offset = fl->img->flash_base; - parts[i].name = names; - - strcpy(names, fl->img->name); -+#ifdef CONFIG_MTD_REDBOOT_PARTS_READONLY -+ if (!memcmp(names, "RedBoot", 8) || -+ !memcmp(names, "RedBoot config", 15) || -+ !memcmp(names, "FIS directory", 14)) { -+ parts[i].mask_flags = MTD_WRITEABLE; -+ } -+#endif - names += strlen(names)+1; - -- if(fl->next && fl->img->flash_base + fl->img->size + master->erasesize < fl->next->img->flash_base) { -+#ifdef CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED -+ if(fl->next && fl->img->flash_base + fl->img->size + master->erasesize <= fl->next->img->flash_base) { - i++; - parts[i].offset = parts[i-1].size + parts[i-1].offset; - parts[i].size = fl->next->img->flash_base - parts[i].offset; - parts[i].name = nullname; - } -+#endif - tmp_fl = fl; - fl = fl->next; - kfree(tmp_fl); -@@ -166,7 +191,7 @@ - fl = fl->next; - kfree(old); - } -- kfree(buf); -+ vfree(buf); - return ret; - } - -Index: linux-2.6.5/drivers/mtd/ssfdc.c -=================================================================== ---- linux-2.6.5.orig/drivers/mtd/ssfdc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/drivers/mtd/ssfdc.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,1132 @@ -+/* -+ * drivers/mtd/ssfdc.c -+ * -+ * Copyright (C) 2003 Simon Haynes (simon@baydel.con) -+ * Baydel Ltd -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public License -+ * version 2.1 as published by the Free Software Foundation. -+ * -+ * This module provides a translation layer, via mtd, for smart -+ * media card access. It essentially enables the possibility -+ * of using cards on a hardware which does not have a hardware translation -+ * layer and interchanging them with hardware that does ie: PC card readers -+ * -+ * I had to write this module for a specific task and in a short timeframe -+ * for this reason I have imposed some restricions to make the job easier. -+ * -+ * To build an compile the driver I added the following lines -+ * to mtd/Config.in -+ * -+ * dep_tristate ' SSFDC support' CONFIG_SSFDC $CONFIG_MTD -+ * -+ * to /mtd/Makefile -+ * -+ * obj-$(CONFIG_SSFDC) += ssfdc.o -+ * -+ * and compiled the kernel via the usual methods. -+ * -+ * I am sure that there are many problems I don't know about but here are -+ * some that I know of -+ * -+ * Currently the driver uses MAJOR number 44 which I think is FTL or NFTL -+ * I did this because I wanted a static number and I didn't know -+ * how to go about getting a new one. This needs addressing -+ * The dev nodes required are like standard. I only use minor 0 -+ * (/dev/ssfdca), and minor 1 (/dev/ssfdca1). -+ * You should be able to run fdisk on /dev/ssfdca and the first partition -+ * is /dev/ssfdca1. There is no working code in the module for changing the -+ * SMC and rebuilding the maps so the card should not be changed once the -+ * module is loaded. At present I only look for 1 partition. But this is a -+ * small commented hack. -+ * -+ * There is no support cards which do not have a 512 byte page size with 16 -+ * bytes of oob and an erase size of 16K. -+ * There are no checks for this at present. In addition the MTD reported size -+ * must be 16M or a multiple. -+ * -+ * Code to handle multiple partitions or multiple cards is incomplete -+ * Need to allocate data buffer and oob buffer on a per partition basis. -+ * As I am only concerned with one partition I will do this if I ever need to. -+ * The cached physical address variable also needs this attention. -+ * -+ * Recently I have started to work on media changes. Some of this is specific -+ * to my hardware and you will see references to pt_ssfdc_smc and smc_status. -+ * This code is incomplete and does not work. I have commented it for the moment -+ * but it should give an indication of what I think is required. Maybe there is -+ * something it mtd that can help -+ * -+ * 17th August 2004 MHB -+ * -+ * Following updating CVS I noticed some single bit data corruption. I believe -+ * that this was down to the fact that I was using mtd->read instead of mtd->read_ecc -+ * and that mtd->read was applying it's own error corretion from the wrong ecc bytes -+ * I have now corrected this. -+ * -+ * During this time I noticed that while in allocate new I only seem to look for blocks -+ * in 1 zone. So this limits the partition size to 16MB with all the other SMC size -+ * restrictions -+ -+ -+*/ -+ -+#include <linux/config.h> -+#include <linux/types.h> -+#include <linux/module.h> -+#include <linux/kernel.h> -+#include <linux/fs.h> -+#include <linux/init.h> -+#include <linux/slab.h> -+#include <linux/vmalloc.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/blktrans.h> -+#include <linux/mtd/nand_ecc.h> -+#include <linux/sched.h> -+#include <linux/ptrace.h> -+#include <linux/string.h> -+#include <linux/timer.h> -+#include <linux/major.h> -+#include <linux/ioctl.h> -+#include <linux/hdreg.h> -+#include <linux/list.h> -+#include <asm/semaphore.h> -+#include <asm/uaccess.h> -+ -+ -+#if (LINUX_VERSION_CODE >= 0x20100) -+#include <linux/vmalloc.h> -+#endif -+#if (LINUX_VERSION_CODE >= 0x20303) -+#include <linux/blkpg.h> -+#endif -+ -+#include <asm/semaphore.h> -+ -+#define SSFDC_FORMAT 1 -+ -+#define PDEBUG(fmt, args...) -+ -+#define BLK_INC_USE_COUNT MOD_INC_USE_COUNT -+#define BLK_DEC_USE_COUNT MOD_DEC_USE_COUNT -+ -+#if (LINUX_VERSION_CODE < 0x20320) -+#define BLK_DEFAULT_QUEUE(n) blk_dev[n].request_fn -+#define blk_init_queue(q, req) q = (req) -+#define blk_cleanup_queue(q) q = NULL -+#define request_arg_t void -+#else -+#define request_arg_t request_queue_t *q -+#endif -+ -+#define TRUE 1 -+#define FALSE 0 -+ -+#define SSFDC_MAJOR 44 -+ -+#define MAJOR_NR SSFDC_MAJOR -+#define DEVICE_NAME "ssfdc" -+#define DEVICE_REQUEST do_ssfdc_request -+#define DEVICE_ON(device) -+#define DEVICE_OFF(device) -+ -+#include <linux/blk.h> -+ -+#include "/home/simon/ebony/dbwhatu/dbwhatu/smccontrol.h" -+ -+ -+ -+#define ZONE_SIZE (16 * 1024 * 1024) -+#define SMC_BLOCK_SIZE (16 * 1024) -+#define SECTOR_SIZE 512 -+#define SECTORS_PER_ZONE (ZONE_SIZE / SECTOR_SIZE) -+#define BLOCKS_PER_ZONE (ZONE_SIZE / SMC_BLOCK_SIZE) -+#define SECTORS_PER_BLOCK (SMC_BLOCK_SIZE / SECTOR_SIZE) -+#define OOB_SIZE 16 -+ -+ -+#define MAX_DEVICES 4 -+#define MAX_PARTITIONS 8 -+#define PARTITION_BITS 3 -+#define MAX_ZONES 8 -+ -+ -+int ssfdc_major = SSFDC_MAJOR; -+unsigned int ssfdc_cached = 0xFFFFFFFF; -+static unsigned char ssfdc_scratch[16384]; -+static unsigned char ssfdc_buffer[16]; -+static unsigned char ssfdc_ffoob_buf[OOB_SIZE * SECTORS_PER_BLOCK]; -+static unsigned char ssfdc_oob_buf[OOB_SIZE * SECTORS_PER_BLOCK]; -+ -+ -+static struct nand_oobinfo ssfdc_ffoob_info = { -+ .useecc = 0, -+}; -+ -+ -+typedef struct minor_t { -+ atomic_t open; -+ int cached; -+ unsigned char * pt_data; -+ unsigned char * pt_oob; -+} minor_t; -+ -+ -+ -+typedef struct partition_t { -+ int type; -+ struct mtd_info *mtd; -+ int count; -+ unsigned int *zone; -+ unsigned int zoneCount; -+ minor_t minor[MAX_PARTITIONS]; -+ unsigned int last_written[MAX_ZONES]; -+} partition_t; -+ -+partition_t SMCParts[MAX_DEVICES]; -+ -+ -+static unsigned char ssfdc_ecc[] = {14, 13, 15, 9, 8, 10}; -+ -+static struct hd_struct ssfdc_hd[MAX_DEVICES * MAX_PARTITIONS]; -+static int ssfdc_sizes[MAX_DEVICES * MAX_PARTITIONS]; -+static int ssfdc_blocksizes[MAX_DEVICES * MAX_PARTITIONS]; -+smc_control * pt_ssfdc_smc; -+ -+ -+static struct gendisk ssfdc_gendisk = { -+ major: SSFDC_MAJOR, -+ major_name: "ssfdc", -+ minor_shift: PARTITION_BITS, -+ max_p: MAX_PARTITIONS, -+ part: ssfdc_hd, -+ sizes: ssfdc_sizes, -+}; -+ -+ -+static int ssfdc_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long arg); -+static int ssfdc_open(struct inode *inode, struct file *file); -+static int ssfdc_close(struct inode *inode, struct file *file); -+static int ssfdc_write(partition_t *part, caddr_t buffer, u_long sector, u_long nblocks); -+static int ssfdc_read(partition_t *part, caddr_t buffer, u_long sector, u_long nblocks); -+static int ssfdc_physical(partition_t * pt_smcpart, int zone, int block); -+static int ssfdc_erase(partition_t *pt_smcpart, unsigned int offset); -+static int ssfdc_read_partitions(partition_t * pt_smcpart); -+static void ssfdc_notify_add(struct mtd_info *mtd); -+static void ssfdc_notify_remove(struct mtd_info *mtd); -+static void ssfdc_tables(partition_t * pt_smcpart); -+static int ssfdc_sector_blank(partition_t * pt_smcpart, int sc); -+static int ssfdc_allocate_new(partition_t * pt_smcpart, int zone); -+int ssfdc_parity(int number); -+static void ssfdc_erase_callback(struct erase_info *erase); -+ -+ -+ -+static DECLARE_WAIT_QUEUE_HEAD(ssfdc_wq); -+ -+ -+static struct mtd_notifier ssfdc_notifier = { -+ add: ssfdc_notify_add, -+ remove: ssfdc_notify_remove, -+}; -+ -+ -+ -+static struct block_device_operations ssfdc_fops = { -+ open: ssfdc_open, -+ release: ssfdc_close, -+ ioctl: ssfdc_ioctl, -+}; -+ -+static struct semaphore ssfdc_semaphore; -+ -+static void ssfdc_notify_add(struct mtd_info *mtd) { -+ -+ -+ -+ -+ if(mtd->index >= 1) return; // Hack to limit SSFDC to 1 partition -+ -+ if( ((mtd->size % ZONE_SIZE) != 0) && (mtd->size < (ZONE_SIZE * MAX_ZONES)) ){ -+ PDEBUG("ssfdc_notify_add : mtd partition %d is not modulus 16M, not SSFDC\n", mtd->index); -+ } -+ else { -+ memset((void *)&SMCParts[mtd->index].type, 0, sizeof(partition_t)); -+ SMCParts[mtd->index].mtd = mtd; -+ SMCParts[mtd->index].count = mtd->index; -+ SMCParts[mtd->index].type = 1; -+ SMCParts[mtd->index].zoneCount = mtd->size / ZONE_SIZE; -+ SMCParts[mtd->index].zone = kmalloc(SMCParts[mtd->index].zoneCount * 8192, GFP_KERNEL); -+ -+ -+ if(!SMCParts[mtd->index].zone) { -+ printk(KERN_NOTICE "ssfdc_notify_add : mtd partition %d, failed to allocate mapping table\n", mtd->index); -+ SMCParts[mtd->index].type = 0; -+ } -+ else { -+ memset((void *)SMCParts[mtd->index].zone, 0xFF, SMCParts[mtd->index].zoneCount * 8192); -+ } -+ -+ ssfdc_read_partitions((partition_t *)&SMCParts[mtd->index].type); -+ } -+ return; -+ -+} -+static int ssfdc_read_partitions(partition_t * pt_smcpart) { -+ -+ int whole, i, j, size; -+ -+//=printk("ssfdc_read_partitions : start\n"); -+ -+ for(i=0; i<MAX_PARTITIONS; i++) -+ if ((atomic_read(&pt_smcpart->minor[i].open) > 1)) { -+//=printk("ssfdc_read_partitions : part %d busy\n", i); -+ -+ return -EBUSY; -+ } -+ -+ -+//=printk("ssfdc_read_partitions : tables start\n"); -+ ssfdc_tables(pt_smcpart); -+//=printk("ssfdc_read_partitions : tables end\n"); -+ -+ whole = pt_smcpart->count << PARTITION_BITS; -+ -+ -+ j = MAX_PARTITIONS - 1; -+ while (j-- > 0) { -+ if (ssfdc_hd[whole+j].nr_sects > 0) { -+ kdev_t rdev = MKDEV(SSFDC_MAJOR, whole+j); -+ invalidate_device(rdev, 1); -+ } -+ ssfdc_hd[whole+j].start_sect = 0; -+ ssfdc_hd[whole+j].nr_sects = 0; -+ } -+ -+ -+ size = (((pt_smcpart->mtd->size / 16384) * 1000) / 1024) * 32; -+ size /= (0x8 * 0x20); -+ size = size * (0x8 * 0x20); -+ -+//=printk("ssfdc_read_partitions : register start\n"); -+ -+ register_disk(&ssfdc_gendisk, whole >> PARTITION_BITS, MAX_PARTITIONS, -+ &ssfdc_fops, size); -+ -+//=printk("ssfdc_read_partitions : register end\n"); -+ -+ -+ return 0; -+} -+ -+ -+static void ssfdc_notify_remove(struct mtd_info *mtd) { -+int i, j, whole; -+ -+ i=mtd->index; -+ whole = i << PARTITION_BITS; -+ if(SMCParts[i].mtd == mtd) { -+ if(SMCParts[i].zone)kfree(SMCParts[i].zone); -+ memset((void *)&SMCParts[i].type, 0, sizeof(partition_t)); -+ for (j = 0; j < MAX_PARTITIONS; j++) { -+ if (ssfdc_hd[whole+j].nr_sects > 0) { -+ ssfdc_hd[whole+j].start_sect = 0; -+ ssfdc_hd[whole+j].nr_sects=0; -+ } -+ } -+ return; -+ } -+ return; -+} -+ -+ -+ -+static int ssfdc_ioctl(struct inode *inode, struct file *file, -+ u_int cmd, u_long arg) { -+ -+ int minor = MINOR(inode->i_rdev); -+ int ret = -EINVAL; -+ partition_t * pt_smcpart = (partition_t *)&SMCParts[(minor & ~(MAX_PARTITIONS -1)) >> PARTITION_BITS].type; -+ struct hd_geometry geo; -+ int size; -+/* -+ unsigned char smc_status; -+ -+ smc_status = in_8((void *)&pt_ssfdc_smc->smc_status); -+ if(!(smc_status & SMC_PRESENT)) { -+ printk("ssfdc : media not present\n"); -+ ret = 1; -+ goto ssfdc_ioctl_error; -+ } -+ -+ if(smc_status & SMC_CHANGED) { -+ out_8((void *)&pt_ssfdc_smc->smc_status, smc_status); -+ if(minor & ((1<< PARTITION_BITS) - 1)) return -ENOTTY; -+ ssfdc_read_partitions(pt_smcpart); -+ printk("ssfdc : media change\n"); -+ } -+*/ -+ switch(cmd) { -+ -+ case HDIO_GETGEO: -+ memset(&geo, 0, sizeof(geo)); -+ size = (((pt_smcpart->mtd->size / 16384) * 1000) / 1024) * 32; -+ size /= (0x8 * 0x20); -+ geo.heads = 0x8; -+ geo.sectors = 0x20; -+ geo.cylinders = size; -+ geo.start = ssfdc_hd[minor].start_sect; -+// printk(KERN_WARNING "ssfdc : HDIO_GETGEO heads %d, sectors %d, cylinders %d, start %lu\n", -+// geo.heads, geo.sectors, geo.cylinders, geo.start); -+ copy_to_user((void *)arg, &geo, sizeof(geo)); -+ ret = 0; -+ break; -+ -+ case BLKGETSIZE64: -+ case BLKGETSIZE: -+ size = (((pt_smcpart->mtd->size / 16384) * 1000) / 1024) * 32; -+ //=printk(KERN_WARNING "ssfdc : BLKGETSIZE %d, minor %d\n", size, minor); -+ ret = copy_to_user((unsigned long *)arg, &size, sizeof(size)); -+ break; -+ case BLKSSZGET: -+ size = 512; -+ ret = copy_to_user((unsigned long *)arg, &size, sizeof(size)); -+ break; -+ break; -+ -+ case BLKRRPART: -+ if(minor & ((1<< PARTITION_BITS) - 1)) return -ENOTTY; -+ ssfdc_read_partitions(pt_smcpart); -+ ret=0; -+ break; -+ case BLKFLSBUF: -+ printk(KERN_WARNING "ssfdc : block ioctl 0x%x\n", cmd); -+ break; -+ -+ default: -+ printk(KERN_WARNING "ssfdc: unknown ioctl 0x%x\n", cmd); -+ } -+ -+//ssfdc_ioctl_error: -+ return(ret); -+ -+} -+static int ssfdc_open(struct inode *inode, struct file *file) -+{ -+ int minor = MINOR(inode->i_rdev); -+ partition_t *pt_smcpart; -+ int index; -+ -+ if (minor >= MAX_MTD_DEVICES) -+ return -ENODEV; -+ -+ index = (minor & ~(MAX_PARTITIONS -1)) >> PARTITION_BITS; -+ -+ -+ if(SMCParts[index].type != SSFDC_FORMAT) -+ return -ENXIO; -+ -+ pt_smcpart = &SMCParts[index]; -+ -+ -+ if(!pt_smcpart->zone) -+ return -ENXIO; -+ -+ -+ BLK_INC_USE_COUNT; -+ -+ if (!get_mtd_device(pt_smcpart->mtd, -1)) { -+ BLK_DEC_USE_COUNT; -+ return -ENXIO; -+ } -+ -+ if ((file->f_mode & 2) && !(pt_smcpart->mtd->flags & MTD_CLEAR_BITS) ) { -+ put_mtd_device(pt_smcpart->mtd); -+ BLK_DEC_USE_COUNT; -+ return -EROFS; -+ } -+ -+ -+ atomic_inc(&pt_smcpart->minor[minor & ~(MAX_PARTITIONS -1)].open); -+ -+ PDEBUG("ssfdc_open : device %d\n", minor); -+ -+ return(0); -+} -+ -+static void ssfdc_tables(partition_t * pt_smcpart) { -+ -+ int * logical, * physical; -+ int offset = 0; -+ int zone, block; -+ int i, retlen; -+ int block_address, parity; -+ int h, l; -+ -+ for(zone=0; zone<pt_smcpart->zoneCount; zone++) { -+ logical = pt_smcpart->zone + (2048 * zone); -+ memset((void *)logical, 0xFF, 1024 * sizeof(int)); -+ physical = pt_smcpart->zone + (2048 * zone) + 1024; -+ memset((void *)physical, 0xFF, 1024 * sizeof(int)); -+ -+ for(block=0; block < 1024; block++) { -+ offset = (zone * ZONE_SIZE) + (block * SMC_BLOCK_SIZE); -+ pt_smcpart->mtd->read_oob(pt_smcpart->mtd, offset, sizeof(ssfdc_buffer), &retlen, ssfdc_buffer); -+ if(retlen != sizeof(ssfdc_buffer)) { -+ printk(KERN_WARNING "ssfdc_tables : failed to read OOB\n"); -+ pt_smcpart->type = 0; -+ return; -+ } -+ -+ l = (ssfdc_buffer[7] & 0xFF); -+ h = (ssfdc_buffer[6] & 0xFF); -+ block_address = l + (h << 8L); -+ -+ if((block_address & ~0x7FF) != 0x1000) { -+ continue; -+ } -+ -+ parity = block_address & 0x01; -+ -+ block_address &= 0x7FF; -+ block_address >>= 1; -+ -+ -+ if(ssfdc_parity(block_address) != parity) { -+ printk(KERN_WARNING "ssfdc_tables : parity error offset 0x%x, block 0x%x, parity 0x%x\nOOB : " -+ , offset, block_address, parity); -+ for(i=0; i<16; i++) { -+ printk("0x%02x ", (unsigned char)ssfdc_buffer[i]); -+ } -+ printk("\n"); -+ pt_smcpart->type = 0; -+ return; -+ } -+ -+ -+ /* Ok we have a valid block number so insert it */ -+ *(logical + block_address) = (offset/SMC_BLOCK_SIZE); -+ PDEBUG("ssfdc_tables : logical 0x%x + 0x%x = 0x%x\n", -+ (unsigned int)logical, block_address, (offset/SMC_BLOCK_SIZE)); -+ *(physical + block) = block_address; -+ PDEBUG("ssfdc_tables : physical 0x%x + 0x%x = 0x%x\n", (unsigned int)physical, block, block_address); -+ -+ -+ } -+ } -+ return; -+} -+int ssfdc_parity(int number) { -+ int i; -+ int parity = 1; // the 0x1000 bit -+ -+ for(i=0; i<10; i++) { -+ parity += ((number >> i) & 1); -+ } -+ PDEBUG("ssfdc_parity : number 0x%x, parity 0x%x\n", number, parity); -+ return(parity % 2); -+} -+static int ssfdc_physical(partition_t * pt_smcpart, int zone, int block) { -+ -+ unsigned int * logical; -+ -+ logical = pt_smcpart->zone + (zone * 2048); -+ -+ logical += block; -+ -+ if(*logical == 0xFFFFFFFF) { -+ PDEBUG("ssfdc_physical : physical for zone %d, block %d invalid\n", zone, block); -+ return(-1); -+ } -+ -+ PDEBUG("ssfdc_physical : physical for zone %d, block %d, 0x%x\n", zone, block, (*logical * SMC_BLOCK_SIZE)); -+ return(*logical * SMC_BLOCK_SIZE); -+} -+ -+static int ssfdc_close(struct inode *inode, struct file *file) -+{ -+ int minor = MINOR(inode->i_rdev); -+ partition_t *pt_smcpart; -+ int index = (minor & ~(MAX_PARTITIONS -1)) >> PARTITION_BITS; -+ -+ if (minor >= MAX_MTD_DEVICES) -+ return -ENODEV; -+ -+ if(SMCParts[index].type != SSFDC_FORMAT) -+ return -ENXIO; -+ -+ pt_smcpart = &SMCParts[index]; -+ atomic_dec(&pt_smcpart->minor[minor & ~(MAX_PARTITIONS -1)].open); -+ put_mtd_device(pt_smcpart->mtd); -+ BLK_DEC_USE_COUNT; -+ -+ return(0); -+} -+ -+ -+static void do_ssfdc_request(request_arg_t) -+{ -+ int ret, minor; -+ partition_t *pt_smcpart; -+ int index; -+ do { -+ -+ INIT_REQUEST; -+ -+ -+ -+ minor = MINOR(CURRENT->rq_dev); -+ index = (minor & ~(MAX_PARTITIONS -1)) >> PARTITION_BITS; -+ -+ pt_smcpart = &SMCParts[index]; -+ if (pt_smcpart->type == SSFDC_FORMAT) { -+ ret = 0; -+ switch (CURRENT->cmd) { -+ case READ: -+ ret = ssfdc_read(pt_smcpart, CURRENT->buffer, -+ CURRENT->sector + ssfdc_hd[minor].start_sect, -+ CURRENT->current_nr_sectors); -+ break; -+ -+ case WRITE: -+ ret = ssfdc_write(pt_smcpart, CURRENT->buffer, -+ CURRENT->sector + ssfdc_hd[minor].start_sect, -+ CURRENT->current_nr_sectors); -+ break; -+ -+ default: -+ panic("do_ssfdc_request : unknown block command!\n"); -+ } -+ -+ } else { -+ ret = 1; -+ PDEBUG("not ssfdc partition type\n"); -+ } -+ -+ if (!ret) { -+ CURRENT->sector += CURRENT->current_nr_sectors; -+ } -+ -+ end_request((ret == 0) ? 1 : 0); -+ } while (1); -+} -+ -+static int ssfdc_write(partition_t *pt_smcpart, caddr_t buffer, -+ u_long sector, u_long nblocks) -+{ -+ int zone, block, offset; -+ int sectors_written = 0; -+ int physical; -+ int * pt_logical; -+ int * pt_physical; -+ int new = -1; -+ int size; -+ int retlen; -+ int i; -+ int sc; -+ int ptr_done = 0; -+ unsigned char * ptr = (unsigned char *)buffer; -+ unsigned char ecc_code[6], ecc_calc[6]; -+ int do_erase; -+// unsigned char smc_status; -+ -+ -+ -+ offset = (sector % SECTORS_PER_ZONE) % SECTORS_PER_BLOCK ; -+ -+ PDEBUG("write device %d, sector %d, count %d\n", -+ pt_smcpart->count, sector, nblocks); -+/* -+ smc_status = in_8((void *)&pt_ssfdc_smc->smc_status); -+ if(!(smc_status & SMC_PRESENT)) { -+ printk("ssfdc : media not present\n"); -+ return -ENXIO; -+ } -+ -+ if(smc_status & SMC_CHANGED) { -+ out_8((void *)&pt_ssfdc_smc->smc_status, smc_status); -+ ssfdc_read_partitions(pt_smcpart); -+ printk("ssfdc : media change\n"); -+ } -+*/ -+ while(sectors_written < nblocks) { -+ -+ new = -1; -+ do_erase = FALSE; -+ -+ zone = (sector + sectors_written) / SECTORS_PER_ZONE; -+ block = ((sector + sectors_written) % SECTORS_PER_ZONE) / SECTORS_PER_BLOCK ; -+ offset = ((sector + sectors_written) % SECTORS_PER_ZONE) % SECTORS_PER_BLOCK ; -+ -+ pt_logical = pt_smcpart->zone + (zone * 2048); -+ pt_physical = pt_smcpart->zone + (zone * 2048) + 1024; -+ -+ size = ((SECTORS_PER_BLOCK - offset) < (nblocks - sectors_written)) ? -+ (SECTORS_PER_BLOCK - offset) : (nblocks - sectors_written); -+ size *= SECTOR_SIZE; -+ -+ PDEBUG("write device %d, sector %d, count %d, zone %d, block %d, offset %d, done %d, size %d, address 0x%x\n", -+ pt_smcpart->count, sector, nblocks, zone, block, offset, sectors_written, size, (unsigned int)ptr); -+ -+ physical = ssfdc_physical(pt_smcpart, zone, block); -+ -+ -+ if(physical >= 0) { -+ if(ssfdc_cached != physical) { -+ pt_smcpart->mtd->read_ecc(pt_smcpart->mtd, physical, SMC_BLOCK_SIZE, &retlen, ssfdc_scratch, -+ ssfdc_oob_buf, &ssfdc_ffoob_info); -+ if(retlen != SMC_BLOCK_SIZE) { -+ printk(KERN_WARNING "ssfdc_write : failed to read physical\n"); -+ return -ENXIO; -+ } -+ -+ for(sc=0; sc<SECTORS_PER_BLOCK; sc++) { -+ pt_smcpart->mtd->read_oob(pt_smcpart->mtd, physical + (sc * SECTOR_SIZE), sizeof(ssfdc_buffer), &retlen, ssfdc_buffer); -+ if(retlen != sizeof(ssfdc_buffer)) { -+ printk(KERN_WARNING "ssfdc_write : failed to read physical oob\n"); -+ return -ENXIO; -+ } -+ -+ nand_calculate_ecc (pt_smcpart->mtd, &ssfdc_scratch[sc * SECTOR_SIZE], &ecc_calc[0]); -+ nand_calculate_ecc (pt_smcpart->mtd, &ssfdc_scratch[(sc * SECTOR_SIZE) + 256], &ecc_calc[3]); -+ for(i=0; i<6; i++) ecc_code[i] = ssfdc_buffer[ssfdc_ecc[i]]; -+ nand_correct_data(pt_smcpart->mtd, &ssfdc_scratch[sc * SECTOR_SIZE], &ecc_code[0], &ecc_calc[0]); -+ nand_correct_data(pt_smcpart->mtd, &ssfdc_scratch[(sc * SECTOR_SIZE) + 256], &ecc_code[3], &ecc_calc[3]); -+ } -+ -+ } -+ -+ for(sc=0; sc<SECTORS_PER_BLOCK; sc++) { -+ if(offset > sc) { -+ PDEBUG("offset %d, sector %d\n", offset, sc); -+ continue; -+ } -+ pt_smcpart->mtd->read_oob(pt_smcpart->mtd, physical + (sc * SECTOR_SIZE), sizeof(ssfdc_buffer), &retlen, ssfdc_buffer); -+ if(retlen != sizeof(ssfdc_buffer)) { -+ printk(KERN_WARNING "ssfdc_write : failed to read physical oob\n"); -+ return -ENXIO; -+ } -+ -+ nand_calculate_ecc (pt_smcpart->mtd, &ssfdc_scratch[sc * SECTOR_SIZE], &ecc_calc[0]); -+ nand_calculate_ecc (pt_smcpart->mtd, &ssfdc_scratch[(sc * SECTOR_SIZE) + 256], &ecc_calc[3]); -+ for(i=0; i<6; i++) ecc_code[i] = ssfdc_buffer[ssfdc_ecc[i]]; -+ nand_correct_data(pt_smcpart->mtd, &ssfdc_scratch[sc * SECTOR_SIZE], &ecc_code[0], &ecc_calc[0]); -+ nand_correct_data(pt_smcpart->mtd, &ssfdc_scratch[(sc * SECTOR_SIZE) + 256], &ecc_code[3], &ecc_calc[3]); -+ -+ /* find out if the block is being used */ -+ -+ -+ if(ssfdc_sector_blank(pt_smcpart, sc)) { -+ PDEBUG("ssfdc_write : zone %d, block %d, sector %d, lbn %d, blank, physical 0x%x\n", -+ zone, block, sc, sector, physical); -+ memcpy(&ssfdc_scratch[(sc * SECTOR_SIZE)], ptr+ptr_done, SECTOR_SIZE); -+ nand_calculate_ecc (pt_smcpart->mtd, (ptr + ptr_done), &ecc_calc[0]); -+ nand_calculate_ecc (pt_smcpart->mtd, (ptr + ptr_done + 256), &ecc_calc[3]); -+ for(i=0; i<6; i++) ssfdc_buffer[ssfdc_ecc[i]] = ecc_calc[i]; -+ i = (block << 1) | 0x1000; -+ i |= ssfdc_parity(block); -+ ssfdc_buffer[7] = ssfdc_buffer[12] = i & 0xFF; -+ ssfdc_buffer[6] = ssfdc_buffer[11] = (i & 0xFF00) >> 0x08; -+ -+ pt_smcpart->mtd->write_ecc(pt_smcpart->mtd, physical + (sc * SECTOR_SIZE), SECTOR_SIZE, &retlen, -+ ptr + ptr_done, ssfdc_buffer, &ssfdc_ffoob_info); -+ if(retlen != SECTOR_SIZE) { -+ printk(KERN_WARNING "ssfdc_write : failed to write physical 0x%x, sector 0x%x, blank, retlen %d\n" -+ , physical, sc, retlen); -+ return -ENXIO; -+ } -+ -+ ptr_done += SECTOR_SIZE; -+ if(ptr_done >= size) break; -+ } -+ else { -+ new = ssfdc_allocate_new(pt_smcpart, zone); -+ /* erase the old block */ -+ *(pt_physical + ((physical % ZONE_SIZE) / SMC_BLOCK_SIZE)) = 0xFFFFFFFF; -+ -+ PDEBUG("ssfdc_write : physical 0x%x + 0x%x = 0x%x\n", -+ (unsigned int)pt_physical, ((physical % ZONE_SIZE) / SMC_BLOCK_SIZE), 0xFFFFFFFF); -+ do_erase = TRUE; -+ PDEBUG("ssfdc_write : zone %d, block %d, sector %d, lbn %d, written, physical 0x%x, new 0x%x\n", -+ zone, block, sc, sector, physical, new); -+ break; -+ } -+ } -+ } -+ else { -+ ssfdc_cached = 0xFFFFFFFF; -+ memset(ssfdc_scratch, 0xFF, sizeof(ssfdc_scratch)); -+ new = ssfdc_allocate_new(pt_smcpart, zone); -+ PDEBUG("ssfdc_write : zone %d, block %d, lbn %d, physical 0x%x, unallocated, new 0x%x\n", -+ zone, block, sector, physical, new); -+ } -+ -+ -+ -+ if(new != -1) { -+ -+ -+ memcpy(&ssfdc_scratch[(offset * SECTOR_SIZE)], ptr, size); -+ PDEBUG("ssfdc_write : new 0x%x, offset 0x%x, size 0x%x, block 0x%x\n", new, offset, size, block); -+ for(sc=0; sc<SECTORS_PER_BLOCK; sc++) { -+ memset(ssfdc_buffer, 0xFF, OOB_SIZE); -+ nand_calculate_ecc (pt_smcpart->mtd, &ssfdc_scratch[sc * SECTOR_SIZE], &ecc_calc[0]); -+ nand_calculate_ecc (pt_smcpart->mtd, &ssfdc_scratch[(sc * SECTOR_SIZE) + 256], &ecc_calc[3]); -+ for(i=0; i<6; i++) ssfdc_buffer[ssfdc_ecc[i]] = ecc_calc[i]; -+ i = (block << 1) | 0x1000; -+ i |= ssfdc_parity(block); -+ ssfdc_buffer[7] = ssfdc_buffer[12] = i & 0xFF; -+ ssfdc_buffer[6] = ssfdc_buffer[11] = (i & 0xFF00) >> 0x08; -+ memcpy(&ssfdc_oob_buf[sc * OOB_SIZE], ssfdc_buffer, OOB_SIZE); -+ } -+ -+ -+ pt_smcpart->mtd->write_ecc(pt_smcpart->mtd, new, SMC_BLOCK_SIZE, &retlen, ssfdc_scratch, -+ ssfdc_oob_buf, &ssfdc_ffoob_info); -+ if(retlen != SMC_BLOCK_SIZE) { -+ printk(KERN_WARNING "ssfdc_write : failed to write block, physical 0x%x, returned 0x%x\n", new, retlen); -+ return -ENXIO; -+ } -+ /* change the mapping table to reflect the new block placement */ -+ -+ *(pt_logical + block) = (new % ZONE_SIZE) / SMC_BLOCK_SIZE; -+ PDEBUG("ssfdc_write : logical 0x%x + 0x%x = 0x%x\n", -+ (unsigned int)pt_logical, block, (new % ZONE_SIZE) / SMC_BLOCK_SIZE); -+ -+ *(pt_physical + ((new % ZONE_SIZE) / SMC_BLOCK_SIZE)) = block; -+ PDEBUG("ssfdc_write : physical 0x%x + 0x%x = 0x%x\n", -+ (unsigned int)pt_physical, ((new % ZONE_SIZE) / SMC_BLOCK_SIZE), block); -+ -+ -+ ssfdc_cached = new; -+ } -+ -+ -+ ptr += size; -+ ptr_done = 0; -+ sectors_written += (size / SECTOR_SIZE); -+ if(do_erase) ssfdc_erase(pt_smcpart, physical); -+ -+ } -+ -+ -+ -+ -+ return(0); -+} -+static int ssfdc_sector_blank(partition_t * pt_smcpart, int sc) { -+int b; -+ -+ for(b=0; b<SECTOR_SIZE; b++) { -+ if(ssfdc_scratch[b + (sc * SECTOR_SIZE)] != 0xFF) return(0); -+ } -+ for(b=0; b<OOB_SIZE; b++) { -+ if((b==6) || (b==7) || (b==11) || (b==12)) continue; // Block address fields -+ if(ssfdc_buffer[b] != 0xFF) return(0); -+ } -+ return(1); -+} -+static int ssfdc_allocate_new(partition_t * pt_smcpart, int zone) { -+ -+ int new = pt_smcpart->last_written[zone] + 1; -+ int * pt_physical; -+ int physical; -+ int block; -+ int retlen; -+ unsigned char oob[16]; -+ -+ -+ if(new >= BLOCKS_PER_ZONE) new = 0; -+ -+ -+ while (new != pt_smcpart->last_written[zone]) { -+ block = new % BLOCKS_PER_ZONE; -+ pt_physical = pt_smcpart->zone + (zone * 2048) + 1024 + block; -+ physical = (zone * ZONE_SIZE) + (block * SMC_BLOCK_SIZE); -+ -+ PDEBUG("ssfdc_allocate_new : zone %d, block %d, address 0x%08x, data 0x%08x\n", -+ zone, block, (unsigned int)pt_physical, *pt_physical); -+ if(*pt_physical == 0xFFFFFFFF) { -+ PDEBUG("ssfdc_allocate_new : physical 0x%x = 0x%x\n", (unsigned int)pt_physical, *pt_physical); -+ memset(oob, 0, OOB_SIZE); -+ pt_smcpart->mtd->read_oob(pt_smcpart->mtd, physical, OOB_SIZE, &retlen, oob); -+ if((oob[5] == 0xFF) && (retlen == OOB_SIZE)) { // If not a bad block -+ pt_smcpart->last_written[zone] = new; -+ return((new * SMC_BLOCK_SIZE) + (zone * ZONE_SIZE)); -+ } -+ else { -+ PDEBUG("ssfdc_allocate_new : new 0x%x, physical 0x%x, block status 0x%x, oob length 0x%x\n", new, physical, oob[5], retlen); -+ } -+ } -+ new++; -+ if(new >= BLOCKS_PER_ZONE) new = 0; -+ } -+ -+ panic("ssfdc_allocate_new : cant find free block\n"); -+ -+} -+ -+ -+ -+static int ssfdc_read(partition_t *pt_smcpart, caddr_t buffer, -+ u_long sector, u_long nblocks) -+{ -+ int zone, block, offset; -+ int sectors_read = 0; -+ int physical; -+ int size; -+ int retlen; -+ int i; -+ int sc; -+ unsigned char * ptr = (unsigned char *)buffer; -+ unsigned char ecc_code[6], ecc_calc[6]; -+/* -+ unsigned char smc_status; -+ -+ smc_status = in_8((void *)&pt_ssfdc_smc->smc_status); -+ if(!(smc_status & SMC_PRESENT)) { -+ printk("ssfdc : media not present\n"); -+ return -ENXIO; -+ } -+ -+ -+ -+ if(smc_status & SMC_CHANGED) { -+ out_8((void *)&pt_ssfdc_smc->smc_status, smc_status); -+ ssfdc_read_partitions(pt_smcpart); -+ printk("ssfdc : media change\n"); -+ } -+*/ -+ while(sectors_read < nblocks) { -+ -+ zone = (sector + sectors_read) / SECTORS_PER_ZONE; -+ block = ((sector + sectors_read) % SECTORS_PER_ZONE) / SECTORS_PER_BLOCK ; -+ offset = ((sector + sectors_read) % SECTORS_PER_ZONE) % SECTORS_PER_BLOCK ; -+ -+ -+ if(offset) { -+ size = ((SECTORS_PER_BLOCK - offset) < (nblocks - sectors_read)) ? -+ (SECTORS_PER_BLOCK - offset) : (nblocks - sectors_read); -+ } -+ else { -+ size = (SECTORS_PER_BLOCK < (nblocks - sectors_read)) ? SECTORS_PER_BLOCK : nblocks - sectors_read; -+ } -+ size *= SECTOR_SIZE; -+ -+ PDEBUG("ssfdc_read : device %d, sector %d, count %d, zone %d, block %d, offset %d, done %d, size %d, address 0x%x\n", -+ pt_smcpart->count, sector, nblocks, zone, block, offset, sectors_read, size, (unsigned int)ptr); -+ -+ -+ physical = ssfdc_physical(pt_smcpart, zone, block); -+ if(physical >= 0) { -+ if(ssfdc_cached != physical) { -+ pt_smcpart->mtd->read_ecc(pt_smcpart->mtd, physical, SMC_BLOCK_SIZE, &retlen, ssfdc_scratch, -+ ssfdc_oob_buf, &ssfdc_ffoob_info); -+ if(retlen != SMC_BLOCK_SIZE) { -+ printk(KERN_WARNING "ssfdc_read : failed to read physical\n"); -+ return -ENXIO; -+ } -+ for(sc=0; sc<SECTORS_PER_BLOCK; sc++) { -+ pt_smcpart->mtd->read_oob(pt_smcpart->mtd, physical + (sc * SECTOR_SIZE), sizeof(ssfdc_buffer), &retlen, ssfdc_buffer); -+ if(retlen != sizeof(ssfdc_buffer)) { -+ printk(KERN_WARNING "ssfdc_read : failed to read physical oob\n"); -+ return -ENXIO; -+ } -+ nand_calculate_ecc (pt_smcpart->mtd, &ssfdc_scratch[sc * SECTOR_SIZE], &ecc_calc[0]); -+ nand_calculate_ecc (pt_smcpart->mtd, &ssfdc_scratch[(sc * SECTOR_SIZE) + 256], &ecc_calc[3]); -+ for(i=0; i<3; i++) ecc_code[i] = ssfdc_buffer[ssfdc_ecc[i]]; -+ for(i=3; i<6; i++) ecc_code[i] = ssfdc_buffer[ssfdc_ecc[i]]; -+ nand_correct_data(pt_smcpart->mtd, &ssfdc_scratch[sc * SECTOR_SIZE], &ecc_code[0], &ecc_calc[0]); -+ nand_correct_data(pt_smcpart->mtd, &ssfdc_scratch[(sc * SECTOR_SIZE) + 256], &ecc_code[3], &ecc_calc[3]); -+ } -+ -+ /* Get the ecc bytes and check that they are ok */ -+ -+ -+ } -+ ssfdc_cached = physical; -+ -+ -+ } -+ else { -+ memset(ssfdc_scratch, 0xFF, sizeof(ssfdc_scratch)); -+ ssfdc_cached = 0xFFFFFFFF; -+ } -+ -+ -+ memcpy(ptr, &ssfdc_scratch[(offset * SECTOR_SIZE)], size); -+ ptr += size; -+ sectors_read += (size / SECTOR_SIZE); -+ } -+ -+ -+ -+ return(0); -+} -+ -+static void ssfdc_erase_callback(struct erase_info *erase) { -+ -+ PDEBUG("ssfdc_erase_callback : wake erase\n"); -+ up(&ssfdc_semaphore); -+ PDEBUG("ssfdc_erase_callback : woken erase\n"); -+} -+ -+static int ssfdc_erase(partition_t *pt_smcpart, unsigned int offset) -+{ -+ int ret = 0; -+ struct erase_info *erase; -+ unsigned char * junk; -+ unsigned char * oob; -+ int retlen; -+ int b, sc; -+ -+ -+ PDEBUG("ssfdc_erase : offset 0x%08x\n", offset); -+ -+ erase=kmalloc(sizeof(struct erase_info), GFP_KERNEL); -+ junk=kmalloc(pt_smcpart->mtd->erasesize + 16, GFP_KERNEL); -+ oob = junk + pt_smcpart->mtd->erasesize; -+ -+ if (!erase) -+ return -ENOMEM; -+ if (!junk) -+ return -ENOMEM; -+ -+ erase->addr = offset; -+ erase->len = pt_smcpart->mtd->erasesize; -+ erase->callback = ssfdc_erase_callback; -+ ret = pt_smcpart->mtd->erase(pt_smcpart->mtd, erase); -+ if(ret) { -+ printk(KERN_WARNING "ssfdc_erase : failed status 0x%x\n", ret); -+ goto end; -+ -+ } -+ -+ down(&ssfdc_semaphore); -+ -+ pt_smcpart->mtd->read_ecc(pt_smcpart->mtd, offset, SMC_BLOCK_SIZE, &retlen, junk, -+ ssfdc_oob_buf, &ssfdc_ffoob_info); -+ if(retlen != SMC_BLOCK_SIZE) { -+ printk(KERN_WARNING "ssfdc_erase : offset 0x%x, read returned length %d\n", offset, retlen); -+ goto end; -+ } -+ -+ -+ for(sc=0; sc < SECTORS_PER_BLOCK; sc++) { -+ for(b=0; b<SECTOR_SIZE; b++) { -+ if(*(junk + (b + (sc * SECTOR_SIZE))) != 0xFF) { -+ printk(KERN_WARNING "ssfdc_erase : offset 0x%x, sector 0x%x, byte 0x%x, data 0x%02x, expected 0xff\n" -+ , offset, sc, b, *(junk + (b + (sc * SECTOR_SIZE)))); -+ goto end; -+ } -+ } -+ pt_smcpart->mtd->read_oob(pt_smcpart->mtd, offset + (sc * SECTOR_SIZE), OOB_SIZE, &retlen, oob); -+ if(retlen != OOB_SIZE) { -+ printk(KERN_WARNING "ssfdc_erase : offset 0x%x, read oob returned length %d\n", offset, retlen); -+ goto end; -+ } -+ for(b=0; b<OOB_SIZE; b++) { -+ if(*(oob+b) != 0xFF) { -+ printk(KERN_WARNING "ssfdc_erase : offset 0x%x, byte 0x%x, oob got 0x%02x, expected 0xff\n", -+ offset, b, *(oob+b)); -+ goto end; -+ } -+ } -+ } -+ -+end: -+ -+ kfree(erase); -+ kfree(junk); -+ -+ return ret; -+} /* erase_xfer */ -+ -+ -+ -+ -+ -+int init_ssfdc(void) -+{ -+ int result, i; -+ -+// unsigned char smc_status; -+// #define B01159_FIO_PBASE 0x0000000148000000 /* Physical Base address of SMC control chip */ -+ -+ printk(KERN_INFO "SSFDC block device translation layer V1.0\n"); -+/* -+ pt_ssfdc_smc = ioremap64(B01159_FIO_PBASE, 1024); -+ if(!pt_ssfdc_smc){ -+ printk("ssfdc : failed to map SMC control device\n"); -+ return(-EFAULT); -+ } -+ -+ smc_status = in_8((void *)&pt_ssfdc_smc->smc_status); -+*/ -+ memset(ssfdc_ffoob_buf, 0xFF, sizeof(ssfdc_ffoob_buf)); -+ -+ for (i = 0; i < MAX_DEVICES*MAX_PARTITIONS; i++) { -+ ssfdc_hd[i].nr_sects = 0; -+ ssfdc_hd[i].start_sect = 0; -+ ssfdc_blocksizes[i] = 4096; -+ } -+ blksize_size[SSFDC_MAJOR] = ssfdc_blocksizes; -+ ssfdc_gendisk.major = SSFDC_MAJOR; -+ -+ -+ memset(ssfdc_scratch, 0xFF, sizeof(ssfdc_scratch)); -+ -+ result = register_blkdev(ssfdc_major, "ssfdc", &ssfdc_fops); -+ if(result != 0) { -+ printk(KERN_WARNING "ssfdc : failed to get a major number\n"); -+ return(result); -+ } -+// if(ssfdc_major == 0) ssfdc_major = result; -+ -+ blk_init_queue(BLK_DEFAULT_QUEUE(ssfdc_major), &do_ssfdc_request); -+ -+ add_gendisk(&ssfdc_gendisk); -+ -+ -+ -+ register_mtd_user(&ssfdc_notifier); -+ -+ -+ init_MUTEX_LOCKED(&ssfdc_semaphore); -+ -+ -+ -+ return 0; -+} -+ -+static void __exit cleanup_ssfdc(void) -+{ -+ int i; -+ -+ for(i=0; i<MAX_DEVICES; i++) { -+ if(SMCParts[i].zone)kfree(SMCParts[i].zone); -+ } -+ -+ -+ unregister_mtd_user(&ssfdc_notifier); -+ unregister_blkdev(ssfdc_major, "ssfdc"); -+ blk_cleanup_queue(BLK_DEFAULT_QUEUE(ssfdc_major)); -+ -+ -+ -+ blksize_size[SSFDC_MAJOR] = NULL; -+ del_gendisk(&ssfdc_gendisk); -+ -+} -+ -+module_init(init_ssfdc); -+module_exit(cleanup_ssfdc); -+ -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Simon Haynes <simon@baydel.com>"); -+MODULE_DESCRIPTION("SSFDC translation layer support for MTD"); -+ -+ -+ -+ -Index: linux-2.6.5/include/linux/mtd/cfi.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/cfi.h 2004-04-03 22:36:56.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/cfi.h 2005-02-01 17:11:17.000000000 -0500 -@@ -1,7 +1,7 @@ - - /* Common Flash Interface structures - * See http://support.intel.com/design/flash/technote/index.htm -- * $Id: cfi.h,v 1.35 2003/05/28 15:37:32 dwmw2 Exp $ -+ * $Id: cfi.h,v 1.46 2004/08/12 07:49:04 eric Exp $ - */ - - #ifndef __MTD_CFI_H__ -@@ -13,200 +13,74 @@ - #include <linux/types.h> - #include <linux/interrupt.h> - #include <linux/mtd/flashchip.h> -+#include <linux/mtd/map.h> - #include <linux/mtd/cfi_endian.h> - --/* -- * You can optimize the code size and performance by defining only -- * the geometry(ies) available on your hardware. -- * CFIDEV_INTERLEAVE_n, where represents the interleave (number of chips to fill the bus width) -- * CFIDEV_BUSWIDTH_n, where n is the bus width in bytes (1, 2, 4 or 8 bytes) -- * -- * By default, all (known) geometries are supported. -- */ -- --#ifndef CONFIG_MTD_CFI_GEOMETRY -- --/* The default case - support all but 64-bit, which has -- a performance penalty */ -- --#define CFIDEV_INTERLEAVE_1 (1) --#define CFIDEV_INTERLEAVE_2 (2) --#define CFIDEV_INTERLEAVE_4 (4) -- --#define CFIDEV_BUSWIDTH_1 (1) --#define CFIDEV_BUSWIDTH_2 (2) --#define CFIDEV_BUSWIDTH_4 (4) -- --typedef __u32 cfi_word; -- --#else -- --/* Explicitly configured buswidth/interleave support */ -- - #ifdef CONFIG_MTD_CFI_I1 --#define CFIDEV_INTERLEAVE_1 (1) --#endif --#ifdef CONFIG_MTD_CFI_I2 --#define CFIDEV_INTERLEAVE_2 (2) --#endif --#ifdef CONFIG_MTD_CFI_I4 --#define CFIDEV_INTERLEAVE_4 (4) --#endif --#ifdef CONFIG_MTD_CFI_I8 --#define CFIDEV_INTERLEAVE_8 (8) --#endif -- --#ifdef CONFIG_MTD_CFI_B1 --#define CFIDEV_BUSWIDTH_1 (1) --#endif --#ifdef CONFIG_MTD_CFI_B2 --#define CFIDEV_BUSWIDTH_2 (2) --#endif --#ifdef CONFIG_MTD_CFI_B4 --#define CFIDEV_BUSWIDTH_4 (4) --#endif --#ifdef CONFIG_MTD_CFI_B8 --#define CFIDEV_BUSWIDTH_8 (8) --#endif -- --/* pick the largest necessary */ --#ifdef CONFIG_MTD_CFI_B8 --typedef __u64 cfi_word; -- --/* This only works if asm/io.h is included first */ --#ifndef __raw_readll --#define __raw_readll(addr) (*(volatile __u64 *)(addr)) --#endif --#ifndef __raw_writell --#define __raw_writell(v, addr) (*(volatile __u64 *)(addr) = (v)) --#endif --#define CFI_WORD_64 --#else /* CONFIG_MTD_CFI_B8 */ --/* All others can use 32-bits. It's probably more efficient than -- the smaller types anyway */ --typedef __u32 cfi_word; --#endif /* CONFIG_MTD_CFI_B8 */ -- --#endif -- --/* -- * The following macros are used to select the code to execute: -- * cfi_buswidth_is_*() -- * cfi_interleave_is_*() -- * [where * is either 1, 2, 4, or 8] -- * Those macros should be used with 'if' statements. If only one of few -- * geometry arrangements are selected, they expand to constants thus allowing -- * the compiler (most of them being 0) to optimize away all the unneeded code, -- * while still validating the syntax (which is not possible with embedded -- * #if ... #endif constructs). -- * The exception to this is the 64-bit versions, which need an extension -- * to the cfi_word type, and cause compiler warnings about shifts being -- * out of range. -- */ -- --#ifdef CFIDEV_INTERLEAVE_1 --# ifdef CFIDEV_INTERLEAVE --# undef CFIDEV_INTERLEAVE --# define CFIDEV_INTERLEAVE (cfi->interleave) --# else --# define CFIDEV_INTERLEAVE CFIDEV_INTERLEAVE_1 --# endif --# define cfi_interleave_is_1() (CFIDEV_INTERLEAVE == CFIDEV_INTERLEAVE_1) -+#define cfi_interleave(cfi) 1 -+#define cfi_interleave_is_1(cfi) (cfi_interleave(cfi) == 1) - #else --# define cfi_interleave_is_1() (0) -+#define cfi_interleave_is_1(cfi) (0) - #endif - --#ifdef CFIDEV_INTERLEAVE_2 --# ifdef CFIDEV_INTERLEAVE --# undef CFIDEV_INTERLEAVE --# define CFIDEV_INTERLEAVE (cfi->interleave) -+#ifdef CONFIG_MTD_CFI_I2 -+# ifdef cfi_interleave -+# undef cfi_interleave -+# define cfi_interleave(cfi) ((cfi)->interleave) - # else --# define CFIDEV_INTERLEAVE CFIDEV_INTERLEAVE_2 -+# define cfi_interleave(cfi) 2 - # endif --# define cfi_interleave_is_2() (CFIDEV_INTERLEAVE == CFIDEV_INTERLEAVE_2) -+#define cfi_interleave_is_2(cfi) (cfi_interleave(cfi) == 2) - #else --# define cfi_interleave_is_2() (0) -+#define cfi_interleave_is_2(cfi) (0) - #endif - --#ifdef CFIDEV_INTERLEAVE_4 --# ifdef CFIDEV_INTERLEAVE --# undef CFIDEV_INTERLEAVE --# define CFIDEV_INTERLEAVE (cfi->interleave) -+#ifdef CONFIG_MTD_CFI_I4 -+# ifdef cfi_interleave -+# undef cfi_interleave -+# define cfi_interleave(cfi) ((cfi)->interleave) - # else --# define CFIDEV_INTERLEAVE CFIDEV_INTERLEAVE_4 -+# define cfi_interleave(cfi) 4 - # endif --# define cfi_interleave_is_4() (CFIDEV_INTERLEAVE == CFIDEV_INTERLEAVE_4) -+#define cfi_interleave_is_4(cfi) (cfi_interleave(cfi) == 4) - #else --# define cfi_interleave_is_4() (0) -+#define cfi_interleave_is_4(cfi) (0) - #endif - --#ifdef CFIDEV_INTERLEAVE_8 --# ifdef CFIDEV_INTERLEAVE --# undef CFIDEV_INTERLEAVE --# define CFIDEV_INTERLEAVE (cfi->interleave) -+#ifdef CONFIG_MTD_CFI_I8 -+# ifdef cfi_interleave -+# undef cfi_interleave -+# define cfi_interleave(cfi) ((cfi)->interleave) - # else --# define CFIDEV_INTERLEAVE CFIDEV_INTERLEAVE_8 -+# define cfi_interleave(cfi) 8 - # endif --# define cfi_interleave_is_8() (CFIDEV_INTERLEAVE == CFIDEV_INTERLEAVE_8) -+#define cfi_interleave_is_8(cfi) (cfi_interleave(cfi) == 8) - #else --# define cfi_interleave_is_8() (0) -+#define cfi_interleave_is_8(cfi) (0) - #endif - --#ifndef CFIDEV_INTERLEAVE --#error You must define at least one interleave to support! -+static inline int cfi_interleave_supported(int i) -+{ -+ switch (i) { -+#ifdef CONFIG_MTD_CFI_I1 -+ case 1: - #endif -- --#ifdef CFIDEV_BUSWIDTH_1 --# ifdef CFIDEV_BUSWIDTH --# undef CFIDEV_BUSWIDTH --# define CFIDEV_BUSWIDTH (map->buswidth) --# else --# define CFIDEV_BUSWIDTH CFIDEV_BUSWIDTH_1 --# endif --# define cfi_buswidth_is_1() (CFIDEV_BUSWIDTH == CFIDEV_BUSWIDTH_1) --#else --# define cfi_buswidth_is_1() (0) -+#ifdef CONFIG_MTD_CFI_I2 -+ case 2: - #endif -- --#ifdef CFIDEV_BUSWIDTH_2 --# ifdef CFIDEV_BUSWIDTH --# undef CFIDEV_BUSWIDTH --# define CFIDEV_BUSWIDTH (map->buswidth) --# else --# define CFIDEV_BUSWIDTH CFIDEV_BUSWIDTH_2 --# endif --# define cfi_buswidth_is_2() (CFIDEV_BUSWIDTH == CFIDEV_BUSWIDTH_2) --#else --# define cfi_buswidth_is_2() (0) -+#ifdef CONFIG_MTD_CFI_I4 -+ case 4: - #endif -- --#ifdef CFIDEV_BUSWIDTH_4 --# ifdef CFIDEV_BUSWIDTH --# undef CFIDEV_BUSWIDTH --# define CFIDEV_BUSWIDTH (map->buswidth) --# else --# define CFIDEV_BUSWIDTH CFIDEV_BUSWIDTH_4 --# endif --# define cfi_buswidth_is_4() (CFIDEV_BUSWIDTH == CFIDEV_BUSWIDTH_4) --#else --# define cfi_buswidth_is_4() (0) -+#ifdef CONFIG_MTD_CFI_I8 -+ case 8: - #endif -+ return 1; - --#ifdef CFIDEV_BUSWIDTH_8 --# ifdef CFIDEV_BUSWIDTH --# undef CFIDEV_BUSWIDTH --# define CFIDEV_BUSWIDTH (map->buswidth) --# else --# define CFIDEV_BUSWIDTH CFIDEV_BUSWIDTH_8 --# endif --# define cfi_buswidth_is_8() (CFIDEV_BUSWIDTH == CFIDEV_BUSWIDTH_8) --#else --# define cfi_buswidth_is_8() (0) --#endif -+ default: -+ return 0; -+ } -+} - --#ifndef CFIDEV_BUSWIDTH --#error You must define at least one bus width to support! --#endif - - /* NB: these values must represents the number of bytes needed to meet the - * device type (x8, x16, x32). Eg. a 32 bit device is 4 x 8 bytes. -@@ -223,64 +97,84 @@ - - /* Basic Query Structure */ - struct cfi_ident { -- __u8 qry[3]; -- __u16 P_ID; -- __u16 P_ADR; -- __u16 A_ID; -- __u16 A_ADR; -- __u8 VccMin; -- __u8 VccMax; -- __u8 VppMin; -- __u8 VppMax; -- __u8 WordWriteTimeoutTyp; -- __u8 BufWriteTimeoutTyp; -- __u8 BlockEraseTimeoutTyp; -- __u8 ChipEraseTimeoutTyp; -- __u8 WordWriteTimeoutMax; -- __u8 BufWriteTimeoutMax; -- __u8 BlockEraseTimeoutMax; -- __u8 ChipEraseTimeoutMax; -- __u8 DevSize; -- __u16 InterfaceDesc; -- __u16 MaxBufWriteSize; -- __u8 NumEraseRegions; -- __u32 EraseRegionInfo[0]; /* Not host ordered */ -+ uint8_t qry[3]; -+ uint16_t P_ID; -+ uint16_t P_ADR; -+ uint16_t A_ID; -+ uint16_t A_ADR; -+ uint8_t VccMin; -+ uint8_t VccMax; -+ uint8_t VppMin; -+ uint8_t VppMax; -+ uint8_t WordWriteTimeoutTyp; -+ uint8_t BufWriteTimeoutTyp; -+ uint8_t BlockEraseTimeoutTyp; -+ uint8_t ChipEraseTimeoutTyp; -+ uint8_t WordWriteTimeoutMax; -+ uint8_t BufWriteTimeoutMax; -+ uint8_t BlockEraseTimeoutMax; -+ uint8_t ChipEraseTimeoutMax; -+ uint8_t DevSize; -+ uint16_t InterfaceDesc; -+ uint16_t MaxBufWriteSize; -+ uint8_t NumEraseRegions; -+ uint32_t EraseRegionInfo[0]; /* Not host ordered */ - } __attribute__((packed)); - - /* Extended Query Structure for both PRI and ALT */ - - struct cfi_extquery { -- __u8 pri[3]; -- __u8 MajorVersion; -- __u8 MinorVersion; -+ uint8_t pri[3]; -+ uint8_t MajorVersion; -+ uint8_t MinorVersion; - } __attribute__((packed)); - - /* Vendor-Specific PRI for Intel/Sharp Extended Command Set (0x0001) */ - - struct cfi_pri_intelext { -- __u8 pri[3]; -- __u8 MajorVersion; -- __u8 MinorVersion; -- __u32 FeatureSupport; -- __u8 SuspendCmdSupport; -- __u16 BlkStatusRegMask; -- __u8 VccOptimal; -- __u8 VppOptimal; -- __u8 NumProtectionFields; -- __u16 ProtRegAddr; -- __u8 FactProtRegSize; -- __u8 UserProtRegSize; -+ uint8_t pri[3]; -+ uint8_t MajorVersion; -+ uint8_t MinorVersion; -+ uint32_t FeatureSupport; /* if bit 31 is set then an additional uint32_t feature -+ block follows - FIXME - not currently supported */ -+ uint8_t SuspendCmdSupport; -+ uint16_t BlkStatusRegMask; -+ uint8_t VccOptimal; -+ uint8_t VppOptimal; -+ uint8_t NumProtectionFields; -+ uint16_t ProtRegAddr; -+ uint8_t FactProtRegSize; -+ uint8_t UserProtRegSize; -+} __attribute__((packed)); -+ -+/* Vendor-Specific PRI for AMD/Fujitsu Extended Command Set (0x0002) */ -+ -+struct cfi_pri_amdstd { -+ uint8_t pri[3]; -+ uint8_t MajorVersion; -+ uint8_t MinorVersion; -+ uint8_t SiliconRevision; /* bits 1-0: Address Sensitive Unlock */ -+ uint8_t EraseSuspend; -+ uint8_t BlkProt; -+ uint8_t TmpBlkUnprotect; -+ uint8_t BlkProtUnprot; -+ uint8_t SimultaneousOps; -+ uint8_t BurstMode; -+ uint8_t PageMode; -+ uint8_t VppMin; -+ uint8_t VppMax; -+ uint8_t TopBottom; - } __attribute__((packed)); - - struct cfi_pri_query { -- __u8 NumFields; -- __u32 ProtField[1]; /* Not host ordered */ -+ uint8_t NumFields; -+ uint32_t ProtField[1]; /* Not host ordered */ - } __attribute__((packed)); - - struct cfi_bri_query { -- __u8 PageModeReadCap; -- __u8 NumFields; -- __u32 ConfField[1]; /* Not host ordered */ -+ uint8_t PageModeReadCap; -+ uint8_t NumFields; -+ uint32_t ConfField[1]; /* Not host ordered */ - } __attribute__((packed)); - - #define P_ID_NONE 0 -@@ -288,8 +182,10 @@ - #define P_ID_AMD_STD 2 - #define P_ID_INTEL_STD 3 - #define P_ID_AMD_EXT 4 -+#define P_ID_ST_ADV 32 - #define P_ID_MITSUBISHI_STD 256 - #define P_ID_MITSUBISHI_EXT 257 -+#define P_ID_SST_PAGE 258 - #define P_ID_RESERVED 65535 - - -@@ -297,14 +193,13 @@ - #define CFI_MODE_JEDEC 0 - - struct cfi_private { -- __u16 cmdset; -+ uint16_t cmdset; - void *cmdset_priv; - int interleave; - int device_type; - int cfi_mode; /* Are we a JEDEC device pretending to be CFI? */ - int addr_unlock1; - int addr_unlock2; -- int fast_prog; - struct mtd_info *(*cmdset_setup)(struct map_info *); - struct cfi_ident *cfiq; /* For now only one. We insist that all devs - must be of the same type. */ -@@ -315,107 +210,81 @@ - struct flchip chips[0]; /* per-chip data structure for each chip */ - }; - --#define MAX_CFI_CHIPS 8 /* Entirely arbitrary to avoid realloc() */ -- - /* - * Returns the command address according to the given geometry. - */ --static inline __u32 cfi_build_cmd_addr(__u32 cmd_ofs, int interleave, int type) -+static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, int interleave, int type) - { - return (cmd_ofs * type) * interleave; - } - - /* -- * Transforms the CFI command for the given geometry (bus width & interleave. -+ * Transforms the CFI command for the given geometry (bus width & interleave). -+ * It looks too long to be inline, but in the common case it should almost all -+ * get optimised away. - */ --static inline cfi_word cfi_build_cmd(u_char cmd, struct map_info *map, struct cfi_private *cfi) -+static inline map_word cfi_build_cmd(u_char cmd, struct map_info *map, struct cfi_private *cfi) - { -- cfi_word val = 0; -- -- if (cfi_buswidth_is_1()) { -- /* 1 x8 device */ -- val = cmd; -- } else if (cfi_buswidth_is_2()) { -- if (cfi_interleave_is_1()) { -- /* 1 x16 device in x16 mode */ -- val = cpu_to_cfi16(cmd); -- } else if (cfi_interleave_is_2()) { -- /* 2 (x8, x16 or x32) devices in x8 mode */ -- val = cpu_to_cfi16((cmd << 8) | cmd); -- } -- } else if (cfi_buswidth_is_4()) { -- if (cfi_interleave_is_1()) { -- /* 1 x32 device in x32 mode */ -- val = cpu_to_cfi32(cmd); -- } else if (cfi_interleave_is_2()) { -- /* 2 x16 device in x16 mode */ -- val = cpu_to_cfi32((cmd << 16) | cmd); -- } else if (cfi_interleave_is_4()) { -- /* 4 (x8, x16 or x32) devices in x8 mode */ -- val = (cmd << 16) | cmd; -- val = cpu_to_cfi32((val << 8) | val); -- } --#ifdef CFI_WORD_64 -- } else if (cfi_buswidth_is_8()) { -- if (cfi_interleave_is_1()) { -- /* 1 x64 device in x64 mode */ -- val = cpu_to_cfi64(cmd); -- } else if (cfi_interleave_is_2()) { -- /* 2 x32 device in x32 mode */ -- val = cmd; -- val = cpu_to_cfi64((val << 32) | val); -- } else if (cfi_interleave_is_4()) { -- /* 4 (x16, x32 or x64) devices in x16 mode */ -- val = (cmd << 16) | cmd; -- val = cpu_to_cfi64((val << 32) | val); -- } else if (cfi_interleave_is_8()) { -- /* 8 (x8, x16 or x32) devices in x8 mode */ -- val = (cmd << 8) | cmd; -- val = (val << 16) | val; -- val = (val << 32) | val; -- val = cpu_to_cfi64(val); -- } --#endif /* CFI_WORD_64 */ -- } -- return val; --} --#define CMD(x) cfi_build_cmd((x), map, cfi) -- --/* -- * Read a value according to the bus width. -- */ -- --static inline cfi_word cfi_read(struct map_info *map, __u32 addr) --{ -- if (cfi_buswidth_is_1()) { -- return map_read8(map, addr); -- } else if (cfi_buswidth_is_2()) { -- return map_read16(map, addr); -- } else if (cfi_buswidth_is_4()) { -- return map_read32(map, addr); -- } else if (cfi_buswidth_is_8()) { -- return map_read64(map, addr); -+ map_word val = { {0} }; -+ int wordwidth, words_per_bus, chip_mode, chips_per_word; -+ unsigned long onecmd; -+ int i; -+ -+ /* We do it this way to give the compiler a fighting chance -+ of optimising away all the crap for 'bankwidth' larger than -+ an unsigned long, in the common case where that support is -+ disabled */ -+ if (map_bankwidth_is_large(map)) { -+ wordwidth = sizeof(unsigned long); -+ words_per_bus = (map_bankwidth(map)) / wordwidth; // i.e. normally 1 - } else { -- return 0; -+ wordwidth = map_bankwidth(map); -+ words_per_bus = 1; -+ } -+ -+ chip_mode = map_bankwidth(map) / cfi_interleave(cfi); -+ chips_per_word = wordwidth * cfi_interleave(cfi) / map_bankwidth(map); -+ -+ /* First, determine what the bit-pattern should be for a single -+ device, according to chip mode and endianness... */ -+ switch (chip_mode) { -+ default: BUG(); -+ case 1: -+ onecmd = cmd; -+ break; -+ case 2: -+ onecmd = cpu_to_cfi16(cmd); -+ break; -+ case 4: -+ onecmd = cpu_to_cfi32(cmd); -+ break; - } --} - --/* -- * Write a value according to the bus width. -- */ -+ /* Now replicate it across the size of an unsigned long, or -+ just to the bus width as appropriate */ -+ switch (chips_per_word) { -+ default: BUG(); -+#if BITS_PER_LONG >= 64 -+ case 8: -+ onecmd |= (onecmd << (chip_mode * 32)); -+#endif -+ case 4: -+ onecmd |= (onecmd << (chip_mode * 16)); -+ case 2: -+ onecmd |= (onecmd << (chip_mode * 8)); -+ case 1: -+ ; -+ } - --static inline void cfi_write(struct map_info *map, cfi_word val, __u32 addr) --{ -- if (cfi_buswidth_is_1()) { -- map_write8(map, val, addr); -- } else if (cfi_buswidth_is_2()) { -- map_write16(map, val, addr); -- } else if (cfi_buswidth_is_4()) { -- map_write32(map, val, addr); -- } else if (cfi_buswidth_is_8()) { -- map_write64(map, val, addr); -+ /* And finally, for the multi-word case, replicate it -+ in all words in the structure */ -+ for (i=0; i < words_per_bus; i++) { -+ val.x[i] = onecmd; - } -+ -+ return val; - } -+#define CMD(x) cfi_build_cmd((x), map, cfi) - - /* - * Sends a CFI command to a bank of flash for the given geometry. -@@ -424,48 +293,47 @@ - * If prev_val is non-null, it will be set to the value at the command address, - * before the command was written. - */ --static inline __u32 cfi_send_gen_cmd(u_char cmd, __u32 cmd_addr, __u32 base, -+static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t base, - struct map_info *map, struct cfi_private *cfi, -- int type, cfi_word *prev_val) -+ int type, map_word *prev_val) - { -- cfi_word val; -- __u32 addr = base + cfi_build_cmd_addr(cmd_addr, CFIDEV_INTERLEAVE, type); -+ map_word val; -+ uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type); - - val = cfi_build_cmd(cmd, map, cfi); - - if (prev_val) -- *prev_val = cfi_read(map, addr); -+ *prev_val = map_read(map, addr); - -- cfi_write(map, val, addr); -+ map_write(map, val, addr); - - return addr - base; - } - --static inline __u8 cfi_read_query(struct map_info *map, __u32 addr) -+static inline uint8_t cfi_read_query(struct map_info *map, uint32_t addr) - { -- if (cfi_buswidth_is_1()) { -- return map_read8(map, addr); -- } else if (cfi_buswidth_is_2()) { -- return cfi16_to_cpu(map_read16(map, addr)); -- } else if (cfi_buswidth_is_4()) { -- return cfi32_to_cpu(map_read32(map, addr)); -- } else if (cfi_buswidth_is_8()) { -- return cfi64_to_cpu(map_read64(map, addr)); -+ map_word val = map_read(map, addr); -+ -+ if (map_bankwidth_is_1(map)) { -+ return val.x[0]; -+ } else if (map_bankwidth_is_2(map)) { -+ return cfi16_to_cpu(val.x[0]); - } else { -- return 0; -+ /* No point in a 64-bit byteswap since that would just be -+ swapping the responses from different chips, and we are -+ only interested in one chip (a representative sample) */ -+ return cfi32_to_cpu(val.x[0]); - } - } - - static inline void cfi_udelay(int us) - { --#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,2,0) - unsigned long t = us * HZ / 1000000; - if (t) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(t); - return; - } --#endif - udelay(us); - cond_resched(); - } -@@ -480,4 +348,28 @@ - spin_unlock_bh(mutex); - } - -+struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t size, -+ const char* name); -+struct cfi_fixup { -+ uint16_t mfr; -+ uint16_t id; -+ void (*fixup)(struct mtd_info *mtd, void* param); -+ void* param; -+}; -+ -+#define CFI_MFR_ANY 0xffff -+#define CFI_ID_ANY 0xffff -+ -+#define CFI_MFR_AMD 0x0001 -+#define CFI_MFR_ST 0x0020 /* STMicroelectronics */ -+ -+void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups); -+ -+typedef int (*varsize_frob_t)(struct map_info *map, struct flchip *chip, -+ unsigned long adr, int len, void *thunk); -+ -+int cfi_varsize_frob(struct mtd_info *mtd, varsize_frob_t frob, -+ loff_t ofs, size_t len, void *thunk); -+ -+ - #endif /* __MTD_CFI_H__ */ -Index: linux-2.6.5/include/linux/mtd/compatmac.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/compatmac.h 2004-04-03 22:38:00.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/compatmac.h 2005-02-01 17:11:17.000000000 -0500 -@@ -1,10 +1,210 @@ -+/* -+ * $Id: compatmac.h,v 1.68 2004/09/17 22:00:30 eric Exp $ -+ * -+ * Extensions and omissions from the normal 'linux/compatmac.h' -+ * files. hopefully this will end up empty as the 'real' one -+ * becomes fully-featured. -+ */ - - #ifndef __LINUX_MTD_COMPATMAC_H__ - #define __LINUX_MTD_COMPATMAC_H__ - --/* Nothing to see here. We write 2.5-compatible code and this -- file makes it all OK in older kernels, but it's empty in _current_ -- kernels. Include guard just to make GCC ignore it in future inclusions -- anyway... */ -+#include <linux/version.h> -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) -+#error "This kernel is too old: not supported by this file" -+#endif -+ -+ /* O(1) scheduler stuff. */ -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,5) && !defined(__rh_config_h__) -+#include <linux/sched.h> -+static inline void __recalc_sigpending(void) -+{ -+ recalc_sigpending(current); -+} -+#undef recalc_sigpending -+#define recalc_sigpending() __recalc_sigpending () -+ -+#define set_user_nice(tsk, n) do { (tsk)->nice = n; } while(0) -+#endif -+ -+ -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) -+ -+#ifndef yield -+#define yield() do { set_current_state(TASK_RUNNING); schedule(); } while(0) -+#endif -+ -+#ifndef minor -+#define major(d) (MAJOR(to_kdev_t(d))) -+#define minor(d) (MINOR(to_kdev_t(d))) -+#endif -+ -+#ifndef mk_kdev -+#define mk_kdev(ma,mi) MKDEV(ma,mi) -+#define kdev_t_to_nr(x) (x) -+#endif -+ -+#define need_resched() (current->need_resched) -+#define cond_resched() do { if need_resched() { yield(); } } while(0) -+ -+#endif /* < 2.4.20 */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,73) -+#define iminor(i) minor((i)->i_rdev) -+#define imajor(i) major((i)->i_rdev) -+#define old_encode_dev(d) ( (major(d)<<8) | minor(d) ) -+#define old_decode_dev(rdev) (kdev_t_to_nr(mk_kdev((rdev)>>8, (rdev)&0xff))) -+#define old_valid_dev(d) (1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,61) -+ -+#include <linux/sched.h> -+ -+#ifdef __rh_config_h__ -+#define sigmask_lock sighand->siglock -+#define sig sighand -+#endif -+ -+static inline void __daemonize_modvers(void) -+{ -+ daemonize(); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ sigfillset(¤t->blocked); -+ recalc_sigpending(); -+ spin_unlock_irq(¤t->sigmask_lock); -+} -+#undef daemonize -+#define daemonize(fmt, ...) do { \ -+ snprintf(current->comm, sizeof(current->comm), fmt ,##__VA_ARGS__); \ -+ __daemonize_modvers(); \ -+ } while(0) -+ -+static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) -+{ -+ unsigned long flags; -+ unsigned long ret; -+ -+ spin_lock_irqsave(¤t->sigmask_lock, flags); -+ ret = dequeue_signal(mask, info); -+ spin_unlock_irqrestore(¤t->sigmask_lock, flags); -+ -+ return ret; -+} -+ -+static inline int allow_signal(int sig) -+{ -+ if (sig < 1 || sig > _NSIG) -+ return -EINVAL; -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ sigdelset(¤t->blocked, sig); -+ recalc_sigpending(); -+ /* Make sure the kernel neither eats it now converts to SIGKILL */ -+ current->sig->action[sig-1].sa.sa_handler = (void *)2; -+ spin_unlock_irq(¤t->sigmask_lock); -+ return 0; -+} -+static inline int disallow_signal(int sig) -+{ -+ if (sig < 1 || sig > _NSIG) -+ return -EINVAL; -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ sigaddset(¤t->blocked, sig); -+ recalc_sigpending(); -+ -+ current->sig->action[sig-1].sa.sa_handler = SIG_DFL; -+ spin_unlock_irq(¤t->sigmask_lock); -+ return 0; -+} -+ -+#define PF_FREEZE 0 -+#define refrigerator(x) do { ; } while(0) -+#endif -+ -+ /* Module bits */ -+ -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,60) -+#define try_module_get(m) try_inc_mod_count(m) -+#define __module_get(m) do { if (!try_inc_mod_count(m)) BUG(); } while(0) -+#define module_put(m) do { if (m) __MOD_DEC_USE_COUNT((struct module *)(m)); } while(0) -+#define set_module_owner(x) do { x->owner = THIS_MODULE; } while(0) -+#endif -+ -+ -+ /* Random filesystem stuff, only for JFFS2 really */ -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,5) -+#define parent_ino(d) ((d)->d_parent->d_inode->i_ino) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,12) -+#define PageUptodate(x) Page_Uptodate(x) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,48) -+#define get_seconds() CURRENT_TIME -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,53) -+#define generic_file_readonly_mmap generic_file_mmap -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,70) -+ -+#include <linux/kmod.h> -+#include <linux/string.h> -+ -+static inline char *strlcpy(char *dest, const char *src, int len) -+{ -+ dest[len-1] = 0; -+ return strncpy(dest, src, len-1); -+} -+ -+static inline int do_old_request_module(const char *mod) -+{ -+ return request_module(mod); -+} -+#undef request_module -+#define request_module(fmt, ...) \ -+ ({ char modname[32]; snprintf(modname, 31, fmt ,##__VA_ARGS__); do_old_request_module(modname); }) -+ -+#endif /* 2.5.70 */ -+ -+#ifndef container_of -+#define container_of(ptr, type, member) ({ \ -+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ -+ (type *)( (char *)__mptr - offsetof(type,member) );}) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) -+#define kvec iovec -+#define __user -+#endif -+ -+#ifndef __iomem -+#define __iomem -+#endif -+ -+#ifndef list_for_each_entry_safe -+/** -+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry -+ * @pos: the type * to use as a loop counter. -+ * @n: another type * to use as temporary storage -+ * @head: the head for your list. -+ * @member: the name of the list_struct within the struct. -+ */ -+#define list_for_each_entry_safe(pos, n, head, member) \ -+ for (pos = list_entry((head)->next, typeof(*pos), member), \ -+ n = list_entry(pos->member.next, typeof(*pos), member); \ -+ &pos->member != (head); \ -+ pos = n, n = list_entry(n->member.next, typeof(*n), member)) -+ -+#endif - - #endif /* __LINUX_MTD_COMPATMAC_H__ */ -Index: linux-2.6.5/include/linux/mtd/doc2000.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/doc2000.h 2004-04-03 22:37:07.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/doc2000.h 2005-02-01 17:11:17.000000000 -0500 -@@ -1,13 +1,21 @@ -- --/* Linux driver for Disk-On-Chip 2000 */ --/* (c) 1999 Machine Vision Holdings, Inc. */ --/* Author: David Woodhouse <dwmw2@mvhi.com> */ --/* $Id: doc2000.h,v 1.17 2003/06/12 01:20:46 gerg Exp $ */ -+/* -+ * Linux driver for Disk-On-Chip devices -+ * -+ * Copyright (C) 1999 Machine Vision Holdings, Inc. -+ * Copyright (C) 2001-2003 David Woodhouse <dwmw2@infradead.org> -+ * Copyright (C) 2002-2003 Greg Ungerer <gerg@snapgear.com> -+ * Copyright (C) 2002-2003 SnapGear Inc -+ * -+ * $Id: doc2000.h,v 1.23 2004/09/16 23:26:08 gleixner Exp $ -+ * -+ * Released under GPL -+ */ - - #ifndef __MTD_DOC2000_H__ - #define __MTD_DOC2000_H__ - - #include <linux/mtd/mtd.h> -+#include <asm/semaphore.h> - - #define DoC_Sig1 0 - #define DoC_Sig2 1 -@@ -73,16 +81,16 @@ - * Others use readb/writeb - */ - #if defined(__arm__) --#define ReadDOC_(adr, reg) ((unsigned char)(*(__u32 *)(((unsigned long)adr)+((reg)<<2)))) --#define WriteDOC_(d, adr, reg) do{ *(__u32 *)(((unsigned long)adr)+((reg)<<2)) = (__u32)d; wmb();} while(0) -+#define ReadDOC_(adr, reg) ((unsigned char)(*(volatile __u32 *)(((unsigned long)adr)+((reg)<<2)))) -+#define WriteDOC_(d, adr, reg) do{ *(volatile __u32 *)(((unsigned long)adr)+((reg)<<2)) = (__u32)d; wmb();} while(0) - #define DOC_IOREMAP_LEN 0x8000 - #elif defined(__ppc__) --#define ReadDOC_(adr, reg) ((unsigned char)(*(__u16 *)(((unsigned long)adr)+((reg)<<1)))) --#define WriteDOC_(d, adr, reg) do{ *(__u16 *)(((unsigned long)adr)+((reg)<<1)) = (__u16)d; wmb();} while(0) -+#define ReadDOC_(adr, reg) ((unsigned char)(*(volatile __u16 *)(((unsigned long)adr)+((reg)<<1)))) -+#define WriteDOC_(d, adr, reg) do{ *(volatile __u16 *)(((unsigned long)adr)+((reg)<<1)) = (__u16)d; wmb();} while(0) - #define DOC_IOREMAP_LEN 0x4000 - #else --#define ReadDOC_(adr, reg) readb(((unsigned long)adr) + (reg)) --#define WriteDOC_(d, adr, reg) writeb(d, ((unsigned long)adr) + (reg)) -+#define ReadDOC_(adr, reg) readb((void __iomem *)(((unsigned long)adr) + (reg))) -+#define WriteDOC_(d, adr, reg) writeb(d, (void __iomem *)(((unsigned long)adr) + (reg))) - #define DOC_IOREMAP_LEN 0x2000 - - #endif -@@ -106,6 +114,7 @@ - #define DOC_MODE_MDWREN 0x04 - - #define DOC_ChipID_Doc2k 0x20 -+#define DOC_ChipID_Doc2kTSOP 0x21 /* internal number for MTD */ - #define DOC_ChipID_DocMil 0x30 - #define DOC_ChipID_DocMilPlus32 0x40 - #define DOC_ChipID_DocMilPlus16 0x41 -@@ -147,10 +156,10 @@ - #define MAX_FLOORS 4 - #define MAX_CHIPS 4 - --#define MAX_FLOORS_MIL 4 -+#define MAX_FLOORS_MIL 1 - #define MAX_CHIPS_MIL 1 - --#define MAX_FLOORS_MPLUS 1 -+#define MAX_FLOORS_MPLUS 2 - #define MAX_CHIPS_MPLUS 1 - - #define ADDR_COLUMN 1 -@@ -159,9 +168,9 @@ - - struct DiskOnChip { - unsigned long physadr; -- unsigned long virtadr; -+ void __iomem *virtadr; - unsigned long totlen; -- char ChipID; /* Type of DiskOnChip */ -+ unsigned char ChipID; /* Type of DiskOnChip */ - int ioreg; - - unsigned long mfr; /* Flash IDs - only one type of flash per device */ -Index: linux-2.6.5/include/linux/mtd/flashchip.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/flashchip.h 2004-04-03 22:38:13.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/flashchip.h 2005-02-01 17:11:17.000000000 -0500 -@@ -6,7 +6,7 @@ - * - * (C) 2000 Red Hat. GPLd. - * -- * $Id: flashchip.h,v 1.9 2003/04/30 11:15:22 dwmw2 Exp $ -+ * $Id: flashchip.h,v 1.14 2004/06/15 16:44:59 nico Exp $ - * - */ - -@@ -43,7 +43,8 @@ - - - /* NOTE: confusingly, this can be used to refer to more than one chip at a time, -- if they're interleaved. */ -+ if they're interleaved. This can even refer to individual partitions on -+ the same physical chip when present. */ - - struct flchip { - unsigned long start; /* Offset within the map */ -@@ -61,6 +62,7 @@ - - int write_suspended:1; - int erase_suspended:1; -+ unsigned long in_progress_block_addr; - - spinlock_t *mutex; - spinlock_t _spinlock; /* We do it like this because sometimes they'll be shared. */ -@@ -69,8 +71,17 @@ - int word_write_time; - int buffer_write_time; - int erase_time; -+ -+ void *priv; - }; - -+/* This is used to handle contention on write/erase operations -+ between partitions of the same physical chip. */ -+struct flchip_shared { -+ spinlock_t lock; -+ struct flchip *writing; -+ struct flchip *erasing; -+}; - - - #endif /* __MTD_FLASHCHIP_H__ */ -Index: linux-2.6.5/include/linux/mtd/ftl.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/ftl.h 2004-04-03 22:37:37.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/ftl.h 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: ftl.h,v 1.5 2001/06/02 20:35:51 dwmw2 Exp $ -+ * $Id: ftl.h,v 1.6 2003/01/24 13:20:04 dwmw2 Exp $ - * - * Derived from (and probably identical to): - * ftl.h 1.7 1999/10/25 20:23:17 -Index: linux-2.6.5/include/linux/mtd/gen_probe.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/gen_probe.h 2004-04-03 22:37:25.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/gen_probe.h 2005-02-01 17:11:17.000000000 -0500 -@@ -1,7 +1,7 @@ - /* - * (C) 2001, 2001 Red Hat, Inc. - * GPL'd -- * $Id: gen_probe.h,v 1.1 2001/09/02 18:50:13 dwmw2 Exp $ -+ * $Id: gen_probe.h,v 1.2 2003/11/08 00:51:21 dsaxena Exp $ - */ - - #ifndef __LINUX_MTD_GEN_PROBE_H__ -@@ -10,12 +10,12 @@ - #include <linux/mtd/flashchip.h> - #include <linux/mtd/map.h> - #include <linux/mtd/cfi.h> -+#include <asm/bitops.h> - - struct chip_probe { - char *name; - int (*probe_chip)(struct map_info *map, __u32 base, -- struct flchip *chips, struct cfi_private *cfi); -- -+ unsigned long *chip_map, struct cfi_private *cfi); - }; - - struct mtd_info *mtd_do_chip_probe(struct map_info *map, struct chip_probe *cp); -Index: linux-2.6.5/include/linux/mtd/inftl.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/inftl.h 2004-04-03 22:38:14.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/inftl.h 2005-02-01 17:11:17.000000000 -0500 -@@ -3,105 +3,32 @@ - * - * (C) Copyright 2002, Greg Ungerer (gerg@snapgear.com) - * -- * $Id: inftl.h,v 1.3 2003/05/23 11:35:34 dwmw2 Exp $ -+ * $Id: inftl.h,v 1.6 2004/06/30 14:49:00 dbrown Exp $ - */ - - #ifndef __MTD_INFTL_H__ - #define __MTD_INFTL_H__ - -+#ifndef __KERNEL__ -+#error This is a kernel header. Perhaps include nftl-user.h instead? -+#endif -+ - #include <linux/mtd/blktrans.h> - #include <linux/mtd/mtd.h> - #include <linux/mtd/nftl.h> - --#define OSAK_VERSION 0x5120 --#define PERCENTUSED 98 -- --#define SECTORSIZE 512 -+#include <mtd/inftl-user.h> - - #ifndef INFTL_MAJOR --#define INFTL_MAJOR 93 /* FIXME */ -+#define INFTL_MAJOR 94 - #endif - #define INFTL_PARTN_BITS 4 - --/* Block Control Information */ -- --struct inftl_bci { -- __u8 ECCsig[6]; -- __u8 Status; -- __u8 Status1; --} __attribute__((packed)); -- --struct inftl_unithead1 { -- __u16 virtualUnitNo; -- __u16 prevUnitNo; -- __u8 ANAC; -- __u8 NACs; -- __u8 parityPerField; -- __u8 discarded; --} __attribute__((packed)); -- --struct inftl_unithead2 { -- __u8 parityPerField; -- __u8 ANAC; -- __u16 prevUnitNo; -- __u16 virtualUnitNo; -- __u8 NACs; -- __u8 discarded; --} __attribute__((packed)); -- --struct inftl_unittail { -- __u8 Reserved[4]; -- __u16 EraseMark; -- __u16 EraseMark1; --} __attribute__((packed)); -- --union inftl_uci { -- struct inftl_unithead1 a; -- struct inftl_unithead2 b; -- struct inftl_unittail c; --}; -- --struct inftl_oob { -- struct inftl_bci b; -- union inftl_uci u; --}; -- -- --/* INFTL Media Header */ -- --struct INFTLPartition { -- __u32 virtualUnits; -- __u32 firstUnit; -- __u32 lastUnit; -- __u32 flags; -- __u32 spareUnits; -- __u32 Reserved0; -- __u32 Reserved1; --} __attribute__((packed)); -- --struct INFTLMediaHeader { -- char bootRecordID[8]; -- __u32 NoOfBootImageBlocks; -- __u32 NoOfBinaryPartitions; -- __u32 NoOfBDTLPartitions; -- __u32 BlockMultiplierBits; -- __u32 FormatFlags; -- __u32 OsakVersion; -- __u32 PercentUsed; -- struct INFTLPartition Partitions[4]; --} __attribute__((packed)); -- --/* Partition flag types */ --#define INFTL_BINARY 0x20000000 --#define INFTL_BDTL 0x40000000 --#define INFTL_LAST 0x80000000 -- -- - #ifdef __KERNEL__ - - struct INFTLrecord { - struct mtd_blktrans_dev mbd; -- __u16 MediaUnit, SpareMediaUnit; -+ __u16 MediaUnit; - __u32 EraseSize; - struct INFTLMediaHeader MediaHdr; - int usecount; -@@ -119,6 +46,7 @@ - unsigned int nb_blocks; /* number of physical blocks */ - unsigned int nb_boot_blocks; /* number of blocks used by the bios */ - struct erase_info instr; -+ struct nand_oobinfo oobinfo; - }; - - int INFTL_mount(struct INFTLrecord *s); -Index: linux-2.6.5/include/linux/mtd/map.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/map.h 2004-04-03 22:36:56.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/map.h 2005-02-01 17:11:17.000000000 -0500 -@@ -1,6 +1,6 @@ - - /* Overhauled routines for dealing with different mmap regions of flash */ --/* $Id: map.h,v 1.34 2003/05/28 12:42:22 dwmw2 Exp $ */ -+/* $Id: map.h,v 1.44 2004/09/16 23:26:08 gleixner Exp $ */ - - #ifndef __LINUX_MTD_MAP_H__ - #define __LINUX_MTD_MAP_H__ -@@ -8,17 +8,164 @@ - #include <linux/config.h> - #include <linux/types.h> - #include <linux/list.h> -+#include <linux/mtd/compatmac.h> -+#include <asm/unaligned.h> - #include <asm/system.h> - #include <asm/io.h> -+#include <asm/bug.h> -+ -+#ifdef CONFIG_MTD_MAP_BANK_WIDTH_1 -+#define map_bankwidth(map) 1 -+#define map_bankwidth_is_1(map) (map_bankwidth(map) == 1) -+#define map_bankwidth_is_large(map) (0) -+#define map_words(map) (1) -+#define MAX_MAP_BANKWIDTH 1 -+#else -+#define map_bankwidth_is_1(map) (0) -+#endif -+ -+#ifdef CONFIG_MTD_MAP_BANK_WIDTH_2 -+# ifdef map_bankwidth -+# undef map_bankwidth -+# define map_bankwidth(map) ((map)->bankwidth) -+# else -+# define map_bankwidth(map) 2 -+# define map_bankwidth_is_large(map) (0) -+# define map_words(map) (1) -+# endif -+#define map_bankwidth_is_2(map) (map_bankwidth(map) == 2) -+#undef MAX_MAP_BANKWIDTH -+#define MAX_MAP_BANKWIDTH 2 -+#else -+#define map_bankwidth_is_2(map) (0) -+#endif -+ -+#ifdef CONFIG_MTD_MAP_BANK_WIDTH_4 -+# ifdef map_bankwidth -+# undef map_bankwidth -+# define map_bankwidth(map) ((map)->bankwidth) -+# else -+# define map_bankwidth(map) 4 -+# define map_bankwidth_is_large(map) (0) -+# define map_words(map) (1) -+# endif -+#define map_bankwidth_is_4(map) (map_bankwidth(map) == 4) -+#undef MAX_MAP_BANKWIDTH -+#define MAX_MAP_BANKWIDTH 4 -+#else -+#define map_bankwidth_is_4(map) (0) -+#endif -+ -+#ifdef CONFIG_MTD_MAP_BANK_WIDTH_8 -+# ifdef map_bankwidth -+# undef map_bankwidth -+# define map_bankwidth(map) ((map)->bankwidth) -+# if BITS_PER_LONG < 64 -+# undef map_bankwidth_is_large -+# define map_bankwidth_is_large(map) (map_bankwidth(map) > BITS_PER_LONG/8) -+# undef map_words -+# define map_words(map) (map_bankwidth(map) / sizeof(unsigned long)) -+# endif -+# else -+# define map_bankwidth(map) 8 -+# define map_bankwidth_is_large(map) (BITS_PER_LONG < 64) -+# define map_words(map) (map_bankwidth(map) / sizeof(unsigned long)) -+# endif -+#define map_bankwidth_is_8(map) (map_bankwidth(map) == 8) -+#undef MAX_MAP_BANKWIDTH -+#define MAX_MAP_BANKWIDTH 8 -+#else -+#define map_bankwidth_is_8(map) (0) -+#endif -+ -+#ifdef CONFIG_MTD_MAP_BANK_WIDTH_16 -+# ifdef map_bankwidth -+# undef map_bankwidth -+# define map_bankwidth(map) ((map)->bankwidth) -+# undef map_bankwidth_is_large -+# define map_bankwidth_is_large(map) (map_bankwidth(map) > BITS_PER_LONG/8) -+# undef map_words -+# define map_words(map) (map_bankwidth(map) / sizeof(unsigned long)) -+# else -+# define map_bankwidth(map) 16 -+# define map_bankwidth_is_large(map) (1) -+# define map_words(map) (map_bankwidth(map) / sizeof(unsigned long)) -+# endif -+#define map_bankwidth_is_16(map) (map_bankwidth(map) == 16) -+#undef MAX_MAP_BANKWIDTH -+#define MAX_MAP_BANKWIDTH 16 -+#else -+#define map_bankwidth_is_16(map) (0) -+#endif -+ -+#ifdef CONFIG_MTD_MAP_BANK_WIDTH_32 -+# ifdef map_bankwidth -+# undef map_bankwidth -+# define map_bankwidth(map) ((map)->bankwidth) -+# undef map_bankwidth_is_large -+# define map_bankwidth_is_large(map) (map_bankwidth(map) > BITS_PER_LONG/8) -+# undef map_words -+# define map_words(map) (map_bankwidth(map) / sizeof(unsigned long)) -+# else -+# define map_bankwidth(map) 32 -+# define map_bankwidth_is_large(map) (1) -+# define map_words(map) (map_bankwidth(map) / sizeof(unsigned long)) -+# endif -+#define map_bankwidth_is_32(map) (map_bankwidth(map) == 32) -+#undef MAX_MAP_BANKWIDTH -+#define MAX_MAP_BANKWIDTH 32 -+#else -+#define map_bankwidth_is_32(map) (0) -+#endif -+ -+#ifndef map_bankwidth -+#error "No bus width supported. What's the point?" -+#endif -+ -+static inline int map_bankwidth_supported(int w) -+{ -+ switch (w) { -+#ifdef CONFIG_MTD_MAP_BANK_WIDTH_1 -+ case 1: -+#endif -+#ifdef CONFIG_MTD_MAP_BANK_WIDTH_2 -+ case 2: -+#endif -+#ifdef CONFIG_MTD_MAP_BANK_WIDTH_4 -+ case 4: -+#endif -+#ifdef CONFIG_MTD_MAP_BANK_WIDTH_8 -+ case 8: -+#endif -+#ifdef CONFIG_MTD_MAP_BANK_WIDTH_16 -+ case 16: -+#endif -+#ifdef CONFIG_MTD_MAP_BANK_WIDTH_32 -+ case 32: -+#endif -+ return 1; -+ -+ default: -+ return 0; -+ } -+} -+ -+#define MAX_MAP_LONGS ( ((MAX_MAP_BANKWIDTH*8) + BITS_PER_LONG - 1) / BITS_PER_LONG ) -+ -+typedef union { -+ unsigned long x[MAX_MAP_LONGS]; -+} map_word; - - /* The map stuff is very simple. You fill in your struct map_info with - a handful of routines for accessing the device, making sure they handle - paging etc. correctly if your device needs it. Then you pass it off -- to a chip driver which deals with a mapped device - generally either -- do_cfi_probe() or do_ram_probe(), either of which will return a -- struct mtd_info if they liked what they saw. At which point, you -- fill in the mtd->module with your own module address, and register -- it. -+ to a chip probe routine -- either JEDEC or CFI probe or both -- via -+ do_map_probe(). If a chip is recognised, the probe code will invoke the -+ appropriate chip driver (if present) and return a struct mtd_info. -+ At which point, you fill in the mtd->module with your own module -+ address, and register it with the MTD core code. Or you could partition -+ it and register the partitions instead, or keep it for your own private -+ use; whatever. - - The mtd->priv field will point to the struct map_info, and any further - private data required by the chip driver is linked from the -@@ -33,31 +180,32 @@ - unsigned long phys; - #define NO_XIP (-1UL) - -- unsigned long virt; -+ void __iomem *virt; - void *cached; - -- int buswidth; /* in octets */ -+ int bankwidth; /* in octets. This isn't necessarily the width -+ of actual bus cycles -- it's the repeat interval -+ in bytes, before you are talking to the first chip again. -+ */ - - #ifdef CONFIG_MTD_COMPLEX_MAPPINGS -- u8 (*read8)(struct map_info *, unsigned long); -- u16 (*read16)(struct map_info *, unsigned long); -- u32 (*read32)(struct map_info *, unsigned long); -- u64 (*read64)(struct map_info *, unsigned long); -- /* If it returned a 'long' I'd call it readl. -- * It doesn't. -- * I won't. -- * dwmw2 */ -- -+ map_word (*read)(struct map_info *, unsigned long); - void (*copy_from)(struct map_info *, void *, unsigned long, ssize_t); -- void (*write8)(struct map_info *, u8, unsigned long); -- void (*write16)(struct map_info *, u16, unsigned long); -- void (*write32)(struct map_info *, u32, unsigned long); -- void (*write64)(struct map_info *, u64, unsigned long); -+ -+ void (*write)(struct map_info *, const map_word, unsigned long); - void (*copy_to)(struct map_info *, unsigned long, const void *, ssize_t); - - /* We can perhaps put in 'point' and 'unpoint' methods, if we really - want to enable XIP for non-linear mappings. Not yet though. */ - #endif -+ /* It's possible for the map driver to use cached memory in its -+ copy_from implementation (and _only_ with copy_from). However, -+ when the chip driver knows some flash area has changed contents, -+ it will signal it to the map driver through this routine to let -+ the map driver invalidate the corresponding cache as needed. -+ If there is no cache to care about this can be set to NULL. */ -+ void (*inval_cache)(struct map_info *, unsigned long, ssize_t); -+ - /* set_vpp() must handle being reentered -- enable, enable, disable - must leave it enabled. */ - void (*set_vpp)(struct map_info *, int); -@@ -85,86 +233,173 @@ - #define ENABLE_VPP(map) do { if(map->set_vpp) map->set_vpp(map, 1); } while(0) - #define DISABLE_VPP(map) do { if(map->set_vpp) map->set_vpp(map, 0); } while(0) - --#ifdef CONFIG_MTD_COMPLEX_MAPPINGS --#define map_read8(map, ofs) (map)->read8(map, ofs) --#define map_read16(map, ofs) (map)->read16(map, ofs) --#define map_read32(map, ofs) (map)->read32(map, ofs) --#define map_read64(map, ofs) (map)->read64(map, ofs) --#define map_copy_from(map, to, from, len) (map)->copy_from(map, to, from, len) --#define map_write8(map, datum, ofs) (map)->write8(map, datum, ofs) --#define map_write16(map, datum, ofs) (map)->write16(map, datum, ofs) --#define map_write32(map, datum, ofs) (map)->write32(map, datum, ofs) --#define map_write64(map, datum, ofs) (map)->write64(map, datum, ofs) --#define map_copy_to(map, to, from, len) (map)->copy_to(map, to, from, len) -+#define INVALIDATE_CACHED_RANGE(map, from, size) \ -+ do { if(map->inval_cache) map->inval_cache(map, from, size); } while(0) - --extern void simple_map_init(struct map_info *); --#define map_is_linear(map) (map->phys != NO_XIP) - --#else --static inline u8 map_read8(struct map_info *map, unsigned long ofs) -+static inline int map_word_equal(struct map_info *map, map_word val1, map_word val2) - { -- return __raw_readb(map->virt + ofs); -+ int i; -+ for (i=0; i<map_words(map); i++) { -+ if (val1.x[i] != val2.x[i]) -+ return 0; -+ } -+ return 1; - } - --static inline u16 map_read16(struct map_info *map, unsigned long ofs) -+static inline map_word map_word_and(struct map_info *map, map_word val1, map_word val2) - { -- return __raw_readw(map->virt + ofs); -+ map_word r; -+ int i; -+ -+ for (i=0; i<map_words(map); i++) { -+ r.x[i] = val1.x[i] & val2.x[i]; -+ } -+ return r; - } - --static inline u32 map_read32(struct map_info *map, unsigned long ofs) -+static inline map_word map_word_or(struct map_info *map, map_word val1, map_word val2) - { -- return __raw_readl(map->virt + ofs); -+ map_word r; -+ int i; -+ -+ for (i=0; i<map_words(map); i++) { -+ r.x[i] = val1.x[i] | val2.x[i]; -+ } -+ return r; - } -+#define map_word_andequal(m, a, b, z) map_word_equal(m, z, map_word_and(m, a, b)) - --static inline u64 map_read64(struct map_info *map, unsigned long ofs) -+static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word val2) - { --#ifndef CONFIG_MTD_CFI_B8 /* 64-bit mappings */ -- BUG(); -+ int i; -+ -+ for (i=0; i<map_words(map); i++) { -+ if (val1.x[i] & val2.x[i]) -+ return 1; -+ } - return 0; --#else -- return __raw_readll(map->virt + ofs); --#endif - } - --static inline void map_write8(struct map_info *map, u8 datum, unsigned long ofs) -+static inline map_word map_word_load(struct map_info *map, const void *ptr) - { -- __raw_writeb(datum, map->virt + ofs); -- mb(); -+ map_word r; -+ -+ if (map_bankwidth_is_1(map)) -+ r.x[0] = *(unsigned char *)ptr; -+ else if (map_bankwidth_is_2(map)) -+ r.x[0] = get_unaligned((uint16_t *)ptr); -+ else if (map_bankwidth_is_4(map)) -+ r.x[0] = get_unaligned((uint32_t *)ptr); -+#if BITS_PER_LONG >= 64 -+ else if (map_bankwidth_is_8(map)) -+ r.x[0] = get_unaligned((uint64_t *)ptr); -+#endif -+ else if (map_bankwidth_is_large(map)) -+ memcpy(r.x, ptr, map->bankwidth); -+ -+ return r; - } - --static inline void map_write16(struct map_info *map, u16 datum, unsigned long ofs) -+static inline map_word map_word_load_partial(struct map_info *map, map_word orig, const unsigned char *buf, int start, int len) - { -- __raw_writew(datum, map->virt + ofs); -- mb(); -+ int i; -+ -+ if (map_bankwidth_is_large(map)) { -+ char *dest = (char *)&orig; -+ memcpy(dest+start, buf, len); -+ } else { -+ for (i=start; i < start+len; i++) { -+ int bitpos; -+#ifdef __LITTLE_ENDIAN -+ bitpos = i*8; -+#else /* __BIG_ENDIAN */ -+ bitpos = (map_bankwidth(map)-1-i)*8; -+#endif -+ orig.x[0] &= ~(0xff << bitpos); -+ orig.x[0] |= buf[i] << bitpos; -+ } -+ } -+ return orig; - } - --static inline void map_write32(struct map_info *map, u32 datum, unsigned long ofs) -+static inline map_word map_word_ff(struct map_info *map) - { -- __raw_writel(datum, map->virt + ofs); -- mb(); -+ map_word r; -+ int i; -+ -+ for (i=0; i<map_words(map); i++) { -+ r.x[i] = ~0UL; -+ } -+ return r; -+} -+static inline map_word inline_map_read(struct map_info *map, unsigned long ofs) -+{ -+ map_word r; -+ -+ if (map_bankwidth_is_1(map)) -+ r.x[0] = __raw_readb(map->virt + ofs); -+ else if (map_bankwidth_is_2(map)) -+ r.x[0] = __raw_readw(map->virt + ofs); -+ else if (map_bankwidth_is_4(map)) -+ r.x[0] = __raw_readl(map->virt + ofs); -+#if BITS_PER_LONG >= 64 -+ else if (map_bankwidth_is_8(map)) -+ r.x[0] = __raw_readq(map->virt + ofs); -+#endif -+ else if (map_bankwidth_is_large(map)) -+ memcpy_fromio(r.x, map->virt+ofs, map->bankwidth); -+ -+ return r; - } - --static inline void map_write64(struct map_info *map, u64 datum, unsigned long ofs) -+static inline void inline_map_write(struct map_info *map, const map_word datum, unsigned long ofs) - { --#ifndef CONFIG_MTD_CFI_B8 /* 64-bit mappings */ -- BUG(); --#else -- __raw_writell(datum, map->virt + ofs); -+ if (map_bankwidth_is_1(map)) -+ __raw_writeb(datum.x[0], map->virt + ofs); -+ else if (map_bankwidth_is_2(map)) -+ __raw_writew(datum.x[0], map->virt + ofs); -+ else if (map_bankwidth_is_4(map)) -+ __raw_writel(datum.x[0], map->virt + ofs); -+#if BITS_PER_LONG >= 64 -+ else if (map_bankwidth_is_8(map)) -+ __raw_writeq(datum.x[0], map->virt + ofs); -+#endif -+ else if (map_bankwidth_is_large(map)) -+ memcpy_toio(map->virt+ofs, datum.x, map->bankwidth); - mb(); --#endif /* CFI_B8 */ - } - --static inline void map_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len) -+static inline void inline_map_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len) - { -- memcpy_fromio(to, map->virt + from, len); -+ if (map->cached) -+ memcpy(to, (char *)map->cached + from, len); -+ else -+ memcpy_fromio(to, map->virt + from, len); - } - --static inline void map_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len) -+static inline void inline_map_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len) - { - memcpy_toio(map->virt + to, from, len); - } - --#define simple_map_init(map) do { } while (0) -+#ifdef CONFIG_MTD_COMPLEX_MAPPINGS -+#define map_read(map, ofs) (map)->read(map, ofs) -+#define map_copy_from(map, to, from, len) (map)->copy_from(map, to, from, len) -+#define map_write(map, datum, ofs) (map)->write(map, datum, ofs) -+#define map_copy_to(map, to, from, len) (map)->copy_to(map, to, from, len) -+ -+extern void simple_map_init(struct map_info *); -+#define map_is_linear(map) (map->phys != NO_XIP) -+ -+#else -+#define map_read(map, ofs) inline_map_read(map, ofs) -+#define map_copy_from(map, to, from, len) inline_map_copy_from(map, to, from, len) -+#define map_write(map, datum, ofs) inline_map_write(map, datum, ofs) -+#define map_copy_to(map, to, from, len) inline_map_copy_to(map, to, from, len) -+ -+ -+#define simple_map_init(map) BUG_ON(!map_bankwidth_supported((map)->bankwidth)) - #define map_is_linear(map) (1) - - #endif /* !CONFIG_MTD_COMPLEX_MAPPINGS */ -Index: linux-2.6.5/include/linux/mtd/mtd.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/mtd.h 2005-02-01 16:55:50.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/mtd.h 2005-02-01 17:11:17.000000000 -0500 -@@ -1,10 +1,17 @@ -- --/* $Id: mtd.h,v 1.45 2003/05/20 21:56:40 dwmw2 Exp $ */ -+/* -+ * $Id: mtd.h,v 1.56 2004/08/09 18:46:04 dmarlin Exp $ -+ * -+ * Copyright (C) 1999-2003 David Woodhouse <dwmw2@infradead.org> et al. -+ * -+ * Released under GPL -+ */ - - #ifndef __MTD_MTD_H__ - #define __MTD_MTD_H__ - --#ifdef __KERNEL__ -+#ifndef __KERNEL__ -+#error This is a kernel header. Perhaps include mtd-user.h instead? -+#endif - - #include <linux/config.h> - #include <linux/version.h> -@@ -12,115 +19,27 @@ - #include <linux/module.h> - #include <linux/uio.h> - --#endif /* __KERNEL__ */ -- --struct erase_info_user { -- u_int32_t start; -- u_int32_t length; --}; -- --struct mtd_oob_buf { -- u_int32_t start; -- u_int32_t length; -- unsigned char *ptr; --}; -+#include <linux/mtd/compatmac.h> -+#include <mtd/mtd-abi.h> - - #define MTD_CHAR_MAJOR 90 - #define MTD_BLOCK_MAJOR 31 - #define MAX_MTD_DEVICES 16 - -- -- --#define MTD_ABSENT 0 --#define MTD_RAM 1 --#define MTD_ROM 2 --#define MTD_NORFLASH 3 --#define MTD_NANDFLASH 4 --#define MTD_PEROM 5 --#define MTD_OTHER 14 --#define MTD_UNKNOWN 15 -- -- -- --#define MTD_CLEAR_BITS 1 // Bits can be cleared (flash) --#define MTD_SET_BITS 2 // Bits can be set --#define MTD_ERASEABLE 4 // Has an erase function --#define MTD_WRITEB_WRITEABLE 8 // Direct IO is possible --#define MTD_VOLATILE 16 // Set for RAMs --#define MTD_XIP 32 // eXecute-In-Place possible --#define MTD_OOB 64 // Out-of-band data (NAND flash) --#define MTD_ECC 128 // Device capable of automatic ECC -- --// Some common devices / combinations of capabilities --#define MTD_CAP_ROM 0 --#define MTD_CAP_RAM (MTD_CLEAR_BITS|MTD_SET_BITS|MTD_WRITEB_WRITEABLE) --#define MTD_CAP_NORFLASH (MTD_CLEAR_BITS|MTD_ERASEABLE) --#define MTD_CAP_NANDFLASH (MTD_CLEAR_BITS|MTD_ERASEABLE|MTD_OOB) --#define MTD_WRITEABLE (MTD_CLEAR_BITS|MTD_SET_BITS) -- -- --// Types of automatic ECC/Checksum available --#define MTD_ECC_NONE 0 // No automatic ECC available --#define MTD_ECC_RS_DiskOnChip 1 // Automatic ECC on DiskOnChip --#define MTD_ECC_SW 2 // SW ECC for Toshiba & Samsung devices -- --struct mtd_info_user { -- u_char type; -- u_int32_t flags; -- u_int32_t size; // Total size of the MTD -- u_int32_t erasesize; -- u_int32_t oobblock; // Size of OOB blocks (e.g. 512) -- u_int32_t oobsize; // Amount of OOB data per block (e.g. 16) -- u_int32_t ecctype; -- u_int32_t eccsize; --}; -- --struct region_info_user { -- u_int32_t offset; /* At which this region starts, -- * from the beginning of the MTD */ -- u_int32_t erasesize; /* For this region */ -- u_int32_t numblocks; /* Number of blocks in this region */ -- u_int32_t regionindex; --}; -- --#define MEMGETINFO _IOR('M', 1, struct mtd_info_user) --#define MEMERASE _IOW('M', 2, struct erase_info_user) --#define MEMWRITEOOB _IOWR('M', 3, struct mtd_oob_buf) --#define MEMREADOOB _IOWR('M', 4, struct mtd_oob_buf) --#define MEMLOCK _IOW('M', 5, struct erase_info_user) --#define MEMUNLOCK _IOW('M', 6, struct erase_info_user) --#define MEMGETREGIONCOUNT _IOR('M', 7, int) --#define MEMGETREGIONINFO _IOWR('M', 8, struct region_info_user) --#define MEMSETOOBSEL _IOW('M', 9, struct nand_oobinfo) -- --struct nand_oobinfo { -- int useecc; -- int eccpos[6]; --}; -- -- --#ifndef __KERNEL__ -- --typedef struct mtd_info_user mtd_info_t; --typedef struct erase_info_user erase_info_t; --typedef struct region_info_user region_info_t; --typedef struct nand_oobinfo nand_oobinfo_t; -- -- /* User-space ioctl definitions */ -- --#else /* __KERNEL__ */ -- -- - #define MTD_ERASE_PENDING 0x01 - #define MTD_ERASING 0x02 - #define MTD_ERASE_SUSPEND 0x04 - #define MTD_ERASE_DONE 0x08 - #define MTD_ERASE_FAILED 0x10 - -+/* If the erase fails, fail_addr might indicate exactly which block failed. If -+ fail_addr = 0xffffffff, the failure was not at the device level or was not -+ specific to any particular block. */ - struct erase_info { - struct mtd_info *mtd; - u_int32_t addr; - u_int32_t len; -+ u_int32_t fail_addr; - u_long time; - u_long retries; - u_int dev; -@@ -150,6 +69,7 @@ - - u_int32_t oobblock; // Size of OOB blocks (e.g. 512) - u_int32_t oobsize; // Amount of OOB data per block (e.g. 16) -+ u_int32_t oobavail; // Number of bytes in OOB area available for fs - u_int32_t ecctype; - u_int32_t eccsize; - -@@ -200,16 +120,16 @@ - /* This function is not yet implemented */ - int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); - -- /* iovec-based read/write methods. We need these especially for NAND flash, -+ /* kvec-based read/write methods. We need these especially for NAND flash, - with its limited number of write cycles per erase. - NB: The 'count' parameter is the number of _vectors_, each of - which contains an (ofs, len) tuple. - */ -- int (*readv) (struct mtd_info *mtd, struct iovec *vecs, unsigned long count, loff_t from, size_t *retlen); -- int (*readv_ecc) (struct mtd_info *mtd, struct iovec *vecs, unsigned long count, loff_t from, -+ int (*readv) (struct mtd_info *mtd, struct kvec *vecs, unsigned long count, loff_t from, size_t *retlen); -+ int (*readv_ecc) (struct mtd_info *mtd, struct kvec *vecs, unsigned long count, loff_t from, - size_t *retlen, u_char *eccbuf, struct nand_oobinfo *oobsel); -- int (*writev) (struct mtd_info *mtd, const struct iovec *vecs, unsigned long count, loff_t to, size_t *retlen); -- int (*writev_ecc) (struct mtd_info *mtd, const struct iovec *vecs, unsigned long count, loff_t to, -+ int (*writev) (struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen); -+ int (*writev_ecc) (struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, - size_t *retlen, u_char *eccbuf, struct nand_oobinfo *oobsel); - - /* Sync */ -@@ -222,9 +142,10 @@ - /* Power Management functions */ - int (*suspend) (struct mtd_info *mtd); - void (*resume) (struct mtd_info *mtd); -- -- /* Semaphore */ -- struct semaphore mutex; -+ -+ /* Bad block management functions */ -+ int (*block_isbad) (struct mtd_info *mtd, loff_t ofs); -+ int (*block_markbad) (struct mtd_info *mtd, loff_t ofs); - - void *priv; - -@@ -253,10 +174,10 @@ - extern void register_mtd_user (struct mtd_notifier *new); - extern int unregister_mtd_user (struct mtd_notifier *old); - --int default_mtd_writev(struct mtd_info *mtd, const struct iovec *vecs, -+int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, - unsigned long count, loff_t to, size_t *retlen); - --int default_mtd_readv(struct mtd_info *mtd, struct iovec *vecs, -+int default_mtd_readv(struct mtd_info *mtd, struct kvec *vecs, - unsigned long count, loff_t from, size_t *retlen); - - #define MTD_ERASE(mtd, args...) (*(mtd->erase))(mtd, args) -@@ -272,6 +193,17 @@ - #define MTD_WRITEOOB(mtd, args...) (*(mtd->write_oob))(mtd, args) - #define MTD_SYNC(mtd) do { if (mtd->sync) (*(mtd->sync))(mtd); } while (0) - -+ -+#ifdef CONFIG_MTD_PARTITIONS -+void mtd_erase_callback(struct erase_info *instr); -+#else -+static inline void mtd_erase_callback(struct erase_info *instr) -+{ -+ if (instr->callback) -+ instr->callback(instr); -+} -+#endif -+ - /* - * Debugging macro and defines - */ -@@ -291,6 +223,4 @@ - - #endif /* CONFIG_MTD_DEBUG */ - --#endif /* __KERNEL__ */ -- - #endif /* __MTD_MTD_H__ */ -Index: linux-2.6.5/include/linux/mtd/nand.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/nand.h 2004-04-03 22:38:14.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/nand.h 2005-02-01 17:11:17.000000000 -0500 -@@ -5,7 +5,7 @@ - * Steven J. Hill <sjhill@realitydiluted.com> - * Thomas Gleixner <tglx@linutronix.de> - * -- * $Id: nand.h,v 1.25 2003/05/21 15:15:02 dwmw2 Exp $ -+ * $Id: nand.h,v 1.64 2004/09/16 23:26:08 gleixner Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as -@@ -44,6 +44,10 @@ - * NAND_YAFFS_OOB - * 11-25-2002 tglx Added Manufacturer code FUJITSU, NATIONAL - * Split manufacturer and device ID structures -+ * -+ * 02-08-2004 tglx added option field to nand structure for chip anomalities -+ * 05-25-2004 tglx added bad block table support, ST-MICRO manufacturer id -+ * update of nand_chip structure description - */ - #ifndef __LINUX_MTD_NAND_H - #define __LINUX_MTD_NAND_H -@@ -51,22 +55,46 @@ - #include <linux/config.h> - #include <linux/wait.h> - #include <linux/spinlock.h> -+#include <linux/mtd/mtd.h> - - struct mtd_info; --/* -- * Searches for a NAND device -+/* Scan and identify a NAND device */ -+extern int nand_scan (struct mtd_info *mtd, int max_chips); -+/* Free resources held by the NAND device */ -+extern void nand_release (struct mtd_info *mtd); -+ -+/* Read raw data from the device without ECC */ -+extern int nand_read_raw (struct mtd_info *mtd, uint8_t *buf, loff_t from, size_t len, size_t ooblen); -+ -+ -+/* The maximum number of NAND chips in an array */ -+#define NAND_MAX_CHIPS 8 -+ -+/* This constant declares the max. oobsize / page, which -+ * is supported now. If you add a chip with bigger oobsize/page -+ * adjust this accordingly. - */ --extern int nand_scan (struct mtd_info *mtd); -+#define NAND_MAX_OOBSIZE 64 - - /* - * Constants for hardware specific CLE/ALE/NCE function - */ -+/* Select the chip by setting nCE to low */ - #define NAND_CTL_SETNCE 1 -+/* Deselect the chip by setting nCE to high */ - #define NAND_CTL_CLRNCE 2 -+/* Select the command latch by setting CLE to high */ - #define NAND_CTL_SETCLE 3 -+/* Deselect the command latch by setting CLE to low */ - #define NAND_CTL_CLRCLE 4 -+/* Select the address latch by setting ALE to high */ - #define NAND_CTL_SETALE 5 -+/* Deselect the address latch by setting ALE to low */ - #define NAND_CTL_CLRALE 6 -+/* Set write protection by setting WP to high. Not used! */ -+#define NAND_CTL_SETWP 7 -+/* Clear write protection by setting WP to low. Not used! */ -+#define NAND_CTL_CLRWP 8 - - /* - * Standard NAND flash commands -@@ -77,35 +105,102 @@ - #define NAND_CMD_READOOB 0x50 - #define NAND_CMD_ERASE1 0x60 - #define NAND_CMD_STATUS 0x70 -+#define NAND_CMD_STATUS_MULTI 0x71 - #define NAND_CMD_SEQIN 0x80 - #define NAND_CMD_READID 0x90 - #define NAND_CMD_ERASE2 0xd0 - #define NAND_CMD_RESET 0xff - -+/* Extended commands for large page devices */ -+#define NAND_CMD_READSTART 0x30 -+#define NAND_CMD_CACHEDPROG 0x15 -+ -+/* Status bits */ -+#define NAND_STATUS_FAIL 0x01 -+#define NAND_STATUS_FAIL_N1 0x02 -+#define NAND_STATUS_TRUE_READY 0x20 -+#define NAND_STATUS_READY 0x40 -+#define NAND_STATUS_WP 0x80 -+ - /* - * Constants for ECC_MODES -- * -- * NONE: No ECC -- * SOFT: Software ECC 3 byte ECC per 256 Byte data -- * HW3_256: Hardware ECC 3 byte ECC per 256 Byte data -- * HW3_512: Hardware ECC 3 byte ECC per 512 Byte data -- * -- * --*/ -+ */ -+ -+/* No ECC. Usage is not recommended ! */ - #define NAND_ECC_NONE 0 -+/* Software ECC 3 byte ECC per 256 Byte data */ - #define NAND_ECC_SOFT 1 -+/* Hardware ECC 3 byte ECC per 256 Byte data */ - #define NAND_ECC_HW3_256 2 -+/* Hardware ECC 3 byte ECC per 512 Byte data */ - #define NAND_ECC_HW3_512 3 -+/* Hardware ECC 3 byte ECC per 512 Byte data */ - #define NAND_ECC_HW6_512 4 --#define NAND_ECC_DISKONCHIP 5 -+/* Hardware ECC 8 byte ECC per 512 Byte data */ -+#define NAND_ECC_HW8_512 6 - - /* - * Constants for Hardware ECC - */ -+/* Reset Hardware ECC for read */ - #define NAND_ECC_READ 0 -+/* Reset Hardware ECC for write */ - #define NAND_ECC_WRITE 1 -- -+/* Enable Hardware ECC before syndrom is read back from flash */ -+#define NAND_ECC_READSYN 2 -+ -+/* Option constants for bizarre disfunctionality and real -+* features -+*/ -+/* Chip can not auto increment pages */ -+#define NAND_NO_AUTOINCR 0x00000001 -+/* Buswitdh is 16 bit */ -+#define NAND_BUSWIDTH_16 0x00000002 -+/* Device supports partial programming without padding */ -+#define NAND_NO_PADDING 0x00000004 -+/* Chip has cache program function */ -+#define NAND_CACHEPRG 0x00000008 -+/* Chip has copy back function */ -+#define NAND_COPYBACK 0x00000010 -+/* AND Chip which has 4 banks and a confusing page / block -+ * assignment. See Renesas datasheet for further information */ -+#define NAND_IS_AND 0x00000020 -+/* Chip has a array of 4 pages which can be read without -+ * additional ready /busy waits */ -+#define NAND_4PAGE_ARRAY 0x00000040 -+ -+/* Options valid for Samsung large page devices */ -+#define NAND_SAMSUNG_LP_OPTIONS \ -+ (NAND_NO_PADDING | NAND_CACHEPRG | NAND_COPYBACK) -+ -+/* Macros to identify the above */ -+#define NAND_CANAUTOINCR(chip) (!(chip->options & NAND_NO_AUTOINCR)) -+#define NAND_MUST_PAD(chip) (!(chip->options & NAND_NO_PADDING)) -+#define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG)) -+#define NAND_HAS_COPYBACK(chip) ((chip->options & NAND_COPYBACK)) -+ -+/* Mask to zero out the chip options, which come from the id table */ -+#define NAND_CHIPOPTIONS_MSK (0x0000ffff & ~NAND_NO_AUTOINCR) -+ -+/* Non chip related options */ -+/* Use a flash based bad block table. This option is passed to the -+ * default bad block table function. */ -+#define NAND_USE_FLASH_BBT 0x00010000 -+/* The hw ecc generator provides a syndrome instead a ecc value on read -+ * This can only work if we have the ecc bytes directly behind the -+ * data bytes. Applies for DOC and AG-AND Renesas HW Reed Solomon generators */ -+#define NAND_HWECC_SYNDROME 0x00020000 -+ -+ -+/* Options set by nand scan */ -+/* Nand scan has allocated oob_buf */ -+#define NAND_OOBBUF_ALLOC 0x40000000 -+/* Nand scan has allocated data_buf */ -+#define NAND_DATABUF_ALLOC 0x80000000 -+ -+ - /* -+ * nand_state_t - chip states - * Enumeration for NAND flash chip state - */ - typedef enum { -@@ -113,71 +208,116 @@ - FL_READING, - FL_WRITING, - FL_ERASING, -- FL_SYNCING -+ FL_SYNCING, -+ FL_CACHEDPRG, - } nand_state_t; - - --/* -- * NAND Private Flash Chip Data -- * -- * Structure overview: -- * -- * IO_ADDR_R - address to read the 8 I/O lines of the flash device -- * -- * IO_ADDR_W - address to write the 8 I/O lines of the flash device -- * -- * hwcontrol - hardwarespecific function for accesing control-lines -- * -- * dev_ready - hardwarespecific function for accesing device ready/busy line -- * -- * waitfunc - hardwarespecific function for wait on ready -- * -- * calculate_ecc - function for ecc calculation or readback from ecc hardware -- * -- * correct_data - function for ecc correction, matching to ecc generator (sw/hw) -- * -- * enable_hwecc - function to enable (reset) hardware ecc generator -- * -- * eccmod - mode of ecc: see constants -- * -- * eccsize - databytes used per ecc-calculation -- * -- * chip_delay - chip dependent delay for transfering data from array to read regs (tR) -- * -- * chip_lock - spinlock used to protect access to this structure -- * -- * wq - wait queue to sleep on if a NAND operation is in progress -- * -- * state - give the current state of the NAND device -- * -- * page_shift - number of address bits in a page (column address bits) -- * -- * data_buf - data buffer passed to/from MTD user modules -- * -- * data_cache - data cache for redundant page access and shadow for -- * ECC failure -- * -- * cache_page - number of last valid page in page_cache -+/** -+ * struct nand_chip - NAND Private Flash Chip Data -+ * @IO_ADDR_R: [BOARDSPECIFIC] address to read the 8 I/O lines of the flash device -+ * @IO_ADDR_W: [BOARDSPECIFIC] address to write the 8 I/O lines of the flash device -+ * @read_byte: [REPLACEABLE] read one byte from the chip -+ * @write_byte: [REPLACEABLE] write one byte to the chip -+ * @read_word: [REPLACEABLE] read one word from the chip -+ * @write_word: [REPLACEABLE] write one word to the chip -+ * @write_buf: [REPLACEABLE] write data from the buffer to the chip -+ * @read_buf: [REPLACEABLE] read data from the chip into the buffer -+ * @verify_buf: [REPLACEABLE] verify buffer contents against the chip data -+ * @select_chip: [REPLACEABLE] select chip nr -+ * @block_bad: [REPLACEABLE] check, if the block is bad -+ * @block_markbad: [REPLACEABLE] mark the block bad -+ * @hwcontrol: [BOARDSPECIFIC] hardwarespecific function for accesing control-lines -+ * @dev_ready: [BOARDSPECIFIC] hardwarespecific function for accesing device ready/busy line -+ * If set to NULL no access to ready/busy is available and the ready/busy information -+ * is read from the chip status register -+ * @cmdfunc: [REPLACEABLE] hardwarespecific function for writing commands to the chip -+ * @waitfunc: [REPLACEABLE] hardwarespecific function for wait on ready -+ * @calculate_ecc: [REPLACEABLE] function for ecc calculation or readback from ecc hardware -+ * @correct_data: [REPLACEABLE] function for ecc correction, matching to ecc generator (sw/hw) -+ * @enable_hwecc: [BOARDSPECIFIC] function to enable (reset) hardware ecc generator. Must only -+ * be provided if a hardware ECC is available -+ * @erase_cmd: [INTERN] erase command write function, selectable due to AND support -+ * @scan_bbt: [REPLACEABLE] function to scan bad block table -+ * @eccmode: [BOARDSPECIFIC] mode of ecc, see defines -+ * @eccsize: [INTERN] databytes used per ecc-calculation -+ * @eccsteps: [INTERN] number of ecc calculation steps per page -+ * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transfering data from array to read regs (tR) -+ * @chip_lock: [INTERN] spinlock used to protect access to this structure and the chip -+ * @wq: [INTERN] wait queue to sleep on if a NAND operation is in progress -+ * @state: [INTERN] the current state of the NAND device -+ * @page_shift: [INTERN] number of address bits in a page (column address bits) -+ * @phys_erase_shift: [INTERN] number of address bits in a physical eraseblock -+ * @bbt_erase_shift: [INTERN] number of address bits in a bbt entry -+ * @chip_shift: [INTERN] number of address bits in one chip -+ * @data_buf: [INTERN] internal buffer for one page + oob -+ * @oob_buf: [INTERN] oob buffer for one eraseblock -+ * @oobdirty: [INTERN] indicates that oob_buf must be reinitialized -+ * @data_poi: [INTERN] pointer to a data buffer -+ * @options: [BOARDSPECIFIC] various chip options. They can partly be set to inform nand_scan about -+ * special functionality. See the defines for further explanation -+ * @badblockpos: [INTERN] position of the bad block marker in the oob area -+ * @numchips: [INTERN] number of physical chips -+ * @chipsize: [INTERN] the size of one chip for multichip arrays -+ * @pagemask: [INTERN] page number mask = number of (pages / chip) - 1 -+ * @pagebuf: [INTERN] holds the pagenumber which is currently in data_buf -+ * @autooob: [REPLACEABLE] the default (auto)placement scheme -+ * @bbt: [INTERN] bad block table pointer -+ * @bbt_td: [REPLACEABLE] bad block table descriptor for flash lookup -+ * @bbt_md: [REPLACEABLE] bad block table mirror descriptor -+ * @priv: [OPTIONAL] pointer to private chip date - */ -+ - struct nand_chip { -- unsigned long IO_ADDR_R; -- unsigned long IO_ADDR_W; -- void (*hwcontrol)(int cmd); -- int (*dev_ready)(void); -+ void __iomem *IO_ADDR_R; -+ void __iomem *IO_ADDR_W; -+ -+ u_char (*read_byte)(struct mtd_info *mtd); -+ void (*write_byte)(struct mtd_info *mtd, u_char byte); -+ u16 (*read_word)(struct mtd_info *mtd); -+ void (*write_word)(struct mtd_info *mtd, u16 word); -+ -+ void (*write_buf)(struct mtd_info *mtd, const u_char *buf, int len); -+ void (*read_buf)(struct mtd_info *mtd, u_char *buf, int len); -+ int (*verify_buf)(struct mtd_info *mtd, const u_char *buf, int len); -+ void (*select_chip)(struct mtd_info *mtd, int chip); -+ int (*block_bad)(struct mtd_info *mtd, loff_t ofs, int getchip); -+ int (*block_markbad)(struct mtd_info *mtd, loff_t ofs); -+ void (*hwcontrol)(struct mtd_info *mtd, int cmd); -+ int (*dev_ready)(struct mtd_info *mtd); - void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column, int page_addr); - int (*waitfunc)(struct mtd_info *mtd, struct nand_chip *this, int state); -- void (*calculate_ecc)(const u_char *dat, u_char *ecc_code); -- int (*correct_data)(u_char *dat, u_char *read_ecc, u_char *calc_ecc); -- void (*enable_hwecc)(int mode); -+ int (*calculate_ecc)(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code); -+ int (*correct_data)(struct mtd_info *mtd, u_char *dat, u_char *read_ecc, u_char *calc_ecc); -+ void (*enable_hwecc)(struct mtd_info *mtd, int mode); -+ void (*erase_cmd)(struct mtd_info *mtd, int page); -+ int (*scan_bbt)(struct mtd_info *mtd); - int eccmode; - int eccsize; -+ int eccsteps; - int chip_delay; -- spinlock_t chip_lock; -+ spinlock_t chip_lock; - wait_queue_head_t wq; - nand_state_t state; - int page_shift; -+ int phys_erase_shift; -+ int bbt_erase_shift; -+ int chip_shift; - u_char *data_buf; -+ u_char *oob_buf; -+ int oobdirty; - u_char *data_poi; -+ unsigned int options; -+ int badblockpos; -+ int numchips; -+ unsigned long chipsize; -+ int pagemask; -+ int pagebuf; -+ struct nand_oobinfo *autooob; -+ uint8_t *bbt; -+ struct nand_bbt_descr *bbt_td; -+ struct nand_bbt_descr *bbt_md; -+ void *priv; - }; - - /* -@@ -187,46 +327,35 @@ - #define NAND_MFR_SAMSUNG 0xec - #define NAND_MFR_FUJITSU 0x04 - #define NAND_MFR_NATIONAL 0x8f -+#define NAND_MFR_RENESAS 0x07 -+#define NAND_MFR_STMICRO 0x20 - --/* -- * NAND Flash Device ID Structure -- * -- * Structure overview: -+/** -+ * struct nand_flash_dev - NAND Flash Device ID Structure - * -- * name - Identify the device type -- * -- * id - device ID code -- * -- * chipshift - total number of address bits for the device which -- * is used to calculate address offsets and the total -- * number of bytes the device is capable of. -- * -- * page256 - denotes if flash device has 256 byte pages or not. -- * -- * pageadrlen - number of bytes minus one needed to hold the -- * complete address into the flash array. Keep in -- * mind that when a read or write is done to a -- * specific address, the address is input serially -- * 8 bits at a time. This structure member is used -- * by the read/write routines as a loop index for -- * shifting the address out 8 bits at a time. -- * -- * erasesize - size of an erase block in the flash device. -+ * @name: Identify the device type -+ * @id: device ID code -+ * @pagesize: Pagesize in bytes. Either 256 or 512 or 0 -+ * If the pagesize is 0, then the real pagesize -+ * and the eraseize are determined from the -+ * extended id bytes in the chip -+ * @erasesize: Size of an erase block in the flash device. -+ * @chipsize: Total chipsize in Mega Bytes -+ * @options: Bitfield to store chip relevant options - */ - struct nand_flash_dev { -- char * name; -+ char *name; - int id; -- int chipshift; -+ unsigned long pagesize; -+ unsigned long chipsize; - unsigned long erasesize; -- char page256; -+ unsigned long options; - }; - --/* -- * NAND Flash Manufacturer ID Structure -- * -- * name - Manufacturer name -- * -- * id - manufacturer ID code of device. -+/** -+ * struct nand_manufacturers - NAND Flash Manufacturer ID Structure -+ * @name: Manufacturer name -+ * @id: manufacturer ID code of device. - */ - struct nand_manufacturers { - int id; -@@ -236,9 +365,85 @@ - extern struct nand_flash_dev nand_flash_ids[]; - extern struct nand_manufacturers nand_manuf_ids[]; - -+/** -+ * struct nand_bbt_descr - bad block table descriptor -+ * @options: options for this descriptor -+ * @pages: the page(s) where we find the bbt, used with option BBT_ABSPAGE -+ * when bbt is searched, then we store the found bbts pages here. -+ * Its an array and supports up to 8 chips now -+ * @offs: offset of the pattern in the oob area of the page -+ * @veroffs: offset of the bbt version counter in the oob are of the page -+ * @version: version read from the bbt page during scan -+ * @len: length of the pattern, if 0 no pattern check is performed -+ * @maxblocks: maximum number of blocks to search for a bbt. This number of -+ * blocks is reserved at the end of the device where the tables are -+ * written. -+ * @reserved_block_code: if non-0, this pattern denotes a reserved (rather than -+ * bad) block in the stored bbt -+ * @pattern: pattern to identify bad block table or factory marked good / -+ * bad blocks, can be NULL, if len = 0 -+ * -+ * Descriptor for the bad block table marker and the descriptor for the -+ * pattern which identifies good and bad blocks. The assumption is made -+ * that the pattern and the version count are always located in the oob area -+ * of the first block. -+ */ -+struct nand_bbt_descr { -+ int options; -+ int pages[NAND_MAX_CHIPS]; -+ int offs; -+ int veroffs; -+ uint8_t version[NAND_MAX_CHIPS]; -+ int len; -+ int maxblocks; -+ int reserved_block_code; -+ uint8_t *pattern; -+}; -+ -+/* Options for the bad block table descriptors */ -+ -+/* The number of bits used per block in the bbt on the device */ -+#define NAND_BBT_NRBITS_MSK 0x0000000F -+#define NAND_BBT_1BIT 0x00000001 -+#define NAND_BBT_2BIT 0x00000002 -+#define NAND_BBT_4BIT 0x00000004 -+#define NAND_BBT_8BIT 0x00000008 -+/* The bad block table is in the last good block of the device */ -+#define NAND_BBT_LASTBLOCK 0x00000010 -+/* The bbt is at the given page, else we must scan for the bbt */ -+#define NAND_BBT_ABSPAGE 0x00000020 -+/* The bbt is at the given page, else we must scan for the bbt */ -+#define NAND_BBT_SEARCH 0x00000040 -+/* bbt is stored per chip on multichip devices */ -+#define NAND_BBT_PERCHIP 0x00000080 -+/* bbt has a version counter at offset veroffs */ -+#define NAND_BBT_VERSION 0x00000100 -+/* Create a bbt if none axists */ -+#define NAND_BBT_CREATE 0x00000200 -+/* Search good / bad pattern through all pages of a block */ -+#define NAND_BBT_SCANALLPAGES 0x00000400 -+/* Scan block empty during good / bad block scan */ -+#define NAND_BBT_SCANEMPTY 0x00000800 -+/* Write bbt if neccecary */ -+#define NAND_BBT_WRITE 0x00001000 -+/* Read and write back block contents when writing bbt */ -+#define NAND_BBT_SAVECONTENT 0x00002000 -+/* Search good / bad pattern on the first and the second page */ -+#define NAND_BBT_SCAN2NDPAGE 0x00004000 -+ -+/* The maximum number of blocks to scan for a bbt */ -+#define NAND_BBT_SCAN_MAXBLOCKS 4 -+ -+extern int nand_scan_bbt (struct mtd_info *mtd, struct nand_bbt_descr *bd); -+extern int nand_update_bbt (struct mtd_info *mtd, loff_t offs); -+extern int nand_default_bbt (struct mtd_info *mtd); -+extern int nand_isbad_bbt (struct mtd_info *mtd, loff_t offs, int allowbbt); -+extern int nand_erase_nand (struct mtd_info *mtd, struct erase_info *instr, int allowbbt); -+ - /* - * Constants for oob configuration - */ --#define NAND_BADBLOCK_POS 5 -+#define NAND_SMALL_BADBLOCK_POS 5 -+#define NAND_LARGE_BADBLOCK_POS 0 - - #endif /* __LINUX_MTD_NAND_H */ -Index: linux-2.6.5/include/linux/mtd/nand_ecc.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/nand_ecc.h 2004-04-03 22:37:07.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/nand_ecc.h 2005-02-01 17:11:17.000000000 -0500 -@@ -3,7 +3,7 @@ - * - * Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com) - * -- * $Id: nand_ecc.h,v 1.2 2003/02/20 13:34:20 sjhill Exp $ -+ * $Id: nand_ecc.h,v 1.4 2004/06/17 02:35:02 dbrown Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as -@@ -12,17 +12,19 @@ - * This file is the header for the ECC algorithm. - */ - --/* -- * Creates non-inverted ECC code from line parity -- */ --void nand_trans_result(u_char reg2, u_char reg3, u_char *ecc_code); -+#ifndef __MTD_NAND_ECC_H__ -+#define __MTD_NAND_ECC_H__ -+ -+struct mtd_info; - - /* - * Calculate 3 byte ECC code for 256 byte block - */ --void nand_calculate_ecc (const u_char *dat, u_char *ecc_code); -+int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code); - - /* - * Detect and correct a 1 bit error for 256 byte block - */ --int nand_correct_data (u_char *dat, u_char *read_ecc, u_char *calc_ecc); -+int nand_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc, u_char *calc_ecc); -+ -+#endif /* __MTD_NAND_ECC_H__ */ -Index: linux-2.6.5/include/linux/mtd/nftl.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/nftl.h 2004-04-03 22:37:43.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/nftl.h 2005-02-01 17:11:17.000000000 -0500 -@@ -1,5 +1,5 @@ - /* -- * $Id: nftl.h,v 1.13 2003/05/23 11:25:02 dwmw2 Exp $ -+ * $Id: nftl.h,v 1.16 2004/06/30 14:49:00 dbrown Exp $ - * - * (C) 1999-2003 David Woodhouse <dwmw2@infradead.org> - */ -@@ -10,71 +10,7 @@ - #include <linux/mtd/mtd.h> - #include <linux/mtd/blktrans.h> - --/* Block Control Information */ -- --struct nftl_bci { -- unsigned char ECCSig[6]; -- __u8 Status; -- __u8 Status1; --}__attribute__((packed)); -- --/* Unit Control Information */ -- --struct nftl_uci0 { -- __u16 VirtUnitNum; -- __u16 ReplUnitNum; -- __u16 SpareVirtUnitNum; -- __u16 SpareReplUnitNum; --} __attribute__((packed)); -- --struct nftl_uci1 { -- __u32 WearInfo; -- __u16 EraseMark; -- __u16 EraseMark1; --} __attribute__((packed)); -- --struct nftl_uci2 { -- __u16 FoldMark; -- __u16 FoldMark1; -- __u32 unused; --} __attribute__((packed)); -- --union nftl_uci { -- struct nftl_uci0 a; -- struct nftl_uci1 b; -- struct nftl_uci2 c; --}; -- --struct nftl_oob { -- struct nftl_bci b; -- union nftl_uci u; --}; -- --/* NFTL Media Header */ -- --struct NFTLMediaHeader { -- char DataOrgID[6]; -- __u16 NumEraseUnits; -- __u16 FirstPhysicalEUN; -- __u32 FormattedSize; -- unsigned char UnitSizeFactor; --} __attribute__((packed)); -- --#define MAX_ERASE_ZONES (8192 - 512) -- --#define ERASE_MARK 0x3c69 --#define SECTOR_FREE 0xff --#define SECTOR_USED 0x55 --#define SECTOR_IGNORE 0x11 --#define SECTOR_DELETED 0x00 -- --#define FOLD_MARK_IN_PROGRESS 0x5555 -- --#define ZONE_GOOD 0xff --#define ZONE_BAD_ORIGINAL 0 --#define ZONE_BAD_MARKED 7 -- --#ifdef __KERNEL__ -+#include <mtd/nftl-user.h> - - /* these info are used in ReplUnitTable */ - #define BLOCK_NIL 0xffff /* last block of a chain */ -@@ -101,6 +37,7 @@ - unsigned int nb_blocks; /* number of physical blocks */ - unsigned int nb_boot_blocks; /* number of blocks used by the bios */ - struct erase_info instr; -+ struct nand_oobinfo oobinfo; - }; - - int NFTL_mount(struct NFTLrecord *s); -@@ -114,6 +51,4 @@ - #define MAX_SECTORS_PER_UNIT 64 - #define NFTL_PARTN_BITS 4 - --#endif /* __KERNEL__ */ -- - #endif /* __MTD_NFTL_H__ */ -Index: linux-2.6.5/include/linux/mtd/partitions.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/partitions.h 2004-04-03 22:38:16.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/partitions.h 2005-02-01 17:11:17.000000000 -0500 -@@ -5,7 +5,7 @@ - * - * This code is GPL - * -- * $Id: partitions.h,v 1.14 2003/05/20 21:56:29 dwmw2 Exp $ -+ * $Id: partitions.h,v 1.15 2003/07/09 11:15:43 dwmw2 Exp $ - */ - - #ifndef MTD_PARTITIONS_H -@@ -50,7 +50,7 @@ - #define MTDPART_SIZ_FULL (0) - - --int add_mtd_partitions(struct mtd_info *, struct mtd_partition *, int); -+int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int); - int del_mtd_partitions(struct mtd_info *); - - /* -Index: linux-2.6.5/include/linux/mtd/physmap.h -=================================================================== ---- linux-2.6.5.orig/include/linux/mtd/physmap.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/include/linux/mtd/physmap.h 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,61 @@ -+/* -+ * For boards with physically mapped flash and using -+ * drivers/mtd/maps/physmap.c mapping driver. -+ * -+ * $Id: physmap.h,v 1.3 2004/07/21 00:16:15 jwboyer Exp $ -+ * -+ * Copyright (C) 2003 MontaVista Software Inc. -+ * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2 of the License, or (at your -+ * option) any later version. -+ * -+ */ -+ -+#ifndef __LINUX_MTD_PHYSMAP__ -+ -+#include <linux/config.h> -+ -+#if defined(CONFIG_MTD_PHYSMAP) -+ -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/map.h> -+#include <linux/mtd/partitions.h> -+ -+/* -+ * The map_info for physmap. Board can override size, buswidth, phys, -+ * (*set_vpp)(), etc in their initial setup routine. -+ */ -+extern struct map_info physmap_map; -+ -+/* -+ * Board needs to specify the exact mapping during their setup time. -+ */ -+static inline void physmap_configure(unsigned long addr, unsigned long size, int bankwidth, void (*set_vpp)(struct map_info *, int) ) -+{ -+ physmap_map.phys = addr; -+ physmap_map.size = size; -+ physmap_map.bankwidth = bankwidth; -+ physmap_map.set_vpp = set_vpp; -+} -+ -+#if defined(CONFIG_MTD_PARTITIONS) -+ -+/* -+ * Machines that wish to do flash partition may want to call this function in -+ * their setup routine. -+ * -+ * physmap_set_partitions(mypartitions, num_parts); -+ * -+ * Note that one can always override this hard-coded partition with -+ * command line partition (you need to enable CONFIG_MTD_CMDLINE_PARTS). -+ */ -+void physmap_set_partitions(struct mtd_partition *parts, int num_parts); -+ -+#endif /* defined(CONFIG_MTD_PARTITIONS) */ -+#endif /* defined(CONFIG_MTD) */ -+ -+#endif /* __LINUX_MTD_PHYSMAP__ */ -+ -Index: linux-2.6.5/include/linux/rslib.h -=================================================================== ---- linux-2.6.5.orig/include/linux/rslib.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/include/linux/rslib.h 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,99 @@ -+/* -+ * include/linux/rslib.h -+ * -+ * Overview: -+ * Generic Reed Solomon encoder / decoder library -+ * -+ * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de) -+ * -+ * RS code lifted from reed solomon library written by Phil Karn -+ * Copyright 2002 Phil Karn, KA9Q -+ * -+ * $Id: rslib.h,v 1.1 2004/09/16 23:58:55 gleixner Exp $ -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ */ -+ -+#ifndef _RSLIB_H_ -+#define _RSLIB_H_ -+ -+#include <linux/list.h> -+ -+/** -+ * struct rs_contol - rs control structure -+ * -+ * @mm: Bits per symbol -+ * @nn: Symbols per block (= (1<<mm)-1) -+ * @alpha_to: log lookup table -+ * @index_of: Antilog lookup table -+ * @genpoly: Generator polynomial -+ * @nroots: Number of generator roots = number of parity symbols -+ * @fcr: First consecutive root, index form -+ * @prim: Primitive element, index form -+ * @iprim: prim-th root of 1, index form -+ * @gfpoly: The primitive generator polynominal -+ * @users: Users of this structure -+ * @list: List entry for the rs control list -+*/ -+struct rs_control { -+ int mm; -+ int nn; -+ uint16_t *alpha_to; -+ uint16_t *index_of; -+ uint16_t *genpoly; -+ int nroots; -+ int fcr; -+ int prim; -+ int iprim; -+ int gfpoly; -+ int users; -+ struct list_head list; -+}; -+ -+/* General purpose RS codec, 8-bit data width, symbol width 1-15 bit */ -+int encode_rs8 (struct rs_control *rs, uint8_t *data, int len, uint16_t *par, uint16_t invmsk); -+int decode_rs8 (struct rs_control *rs, uint8_t *data, uint16_t *par, -+ int len, uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk); -+ -+/* General purpose RS codec, 16-bit data width, symbol width 1-15 bit */ -+int encode_rs16 (struct rs_control *rs, uint16_t *data, int len, uint16_t *par, uint16_t invmsk); -+int decode_rs16 (struct rs_control *rs, uint16_t *data, uint16_t *par, -+ int len, uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk); -+ -+/* General purpose RS codec, 32-bit data width, symbol width 1-15 bit */ -+int encode_rs32 (struct rs_control *rs, uint32_t *data, int len, uint16_t *par, uint16_t invmsk); -+int decode_rs32 (struct rs_control *rs, uint32_t *data, uint16_t *par, -+ int len, uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk); -+ -+/* Create or get a matching rs control structure */ -+struct rs_control *init_rs (int symsize, int gfpoly, int fcr, int prim, int nroots); -+ -+/* Release a rs control structure */ -+void free_rs (struct rs_control *rs); -+ -+/* Internal usage only */ -+static inline int modnn (struct rs_control *rs, int x) -+{ -+ while (x >= rs->nn) { -+ x -= rs->nn; -+ x = (x >> rs->mm) + (x & rs->nn); -+ } -+ return x; -+} -+ -+#define MODNN(x) modnn(rs,x) -+#define MM (rs->mm) -+#define NN (rs->nn) -+#define ALPHA_TO (rs->alpha_to) -+#define INDEX_OF (rs->index_of) -+#define GENPOLY (rs->genpoly) -+#define NROOTS (rs->nroots) -+#define FCR (rs->fcr) -+#define PRIM (rs->prim) -+#define IPRIM (rs->iprim) -+#define A0 (NN) -+ -+#endif -+ -Index: linux-2.6.5/include/mtd/inftl-user.h -=================================================================== ---- linux-2.6.5.orig/include/mtd/inftl-user.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/include/mtd/inftl-user.h 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,91 @@ -+/* -+ * $Id: inftl-user.h,v 1.1 2004/05/05 15:17:00 dwmw2 Exp $ -+ * -+ * Parts of INFTL headers shared with userspace -+ * -+ */ -+ -+#ifndef __MTD_INFTL_USER_H__ -+#define __MTD_INFTL_USER_H__ -+ -+#define OSAK_VERSION 0x5120 -+#define PERCENTUSED 98 -+ -+#define SECTORSIZE 512 -+ -+/* Block Control Information */ -+ -+struct inftl_bci { -+ uint8_t ECCsig[6]; -+ uint8_t Status; -+ uint8_t Status1; -+} __attribute__((packed)); -+ -+struct inftl_unithead1 { -+ uint16_t virtualUnitNo; -+ uint16_t prevUnitNo; -+ uint8_t ANAC; -+ uint8_t NACs; -+ uint8_t parityPerField; -+ uint8_t discarded; -+} __attribute__((packed)); -+ -+struct inftl_unithead2 { -+ uint8_t parityPerField; -+ uint8_t ANAC; -+ uint16_t prevUnitNo; -+ uint16_t virtualUnitNo; -+ uint8_t NACs; -+ uint8_t discarded; -+} __attribute__((packed)); -+ -+struct inftl_unittail { -+ uint8_t Reserved[4]; -+ uint16_t EraseMark; -+ uint16_t EraseMark1; -+} __attribute__((packed)); -+ -+union inftl_uci { -+ struct inftl_unithead1 a; -+ struct inftl_unithead2 b; -+ struct inftl_unittail c; -+}; -+ -+struct inftl_oob { -+ struct inftl_bci b; -+ union inftl_uci u; -+}; -+ -+ -+/* INFTL Media Header */ -+ -+struct INFTLPartition { -+ __u32 virtualUnits; -+ __u32 firstUnit; -+ __u32 lastUnit; -+ __u32 flags; -+ __u32 spareUnits; -+ __u32 Reserved0; -+ __u32 Reserved1; -+} __attribute__((packed)); -+ -+struct INFTLMediaHeader { -+ char bootRecordID[8]; -+ __u32 NoOfBootImageBlocks; -+ __u32 NoOfBinaryPartitions; -+ __u32 NoOfBDTLPartitions; -+ __u32 BlockMultiplierBits; -+ __u32 FormatFlags; -+ __u32 OsakVersion; -+ __u32 PercentUsed; -+ struct INFTLPartition Partitions[4]; -+} __attribute__((packed)); -+ -+/* Partition flag types */ -+#define INFTL_BINARY 0x20000000 -+#define INFTL_BDTL 0x40000000 -+#define INFTL_LAST 0x80000000 -+ -+#endif /* __MTD_INFTL_USER_H__ */ -+ -+ -Index: linux-2.6.5/include/mtd/jffs2-user.h -=================================================================== ---- linux-2.6.5.orig/include/mtd/jffs2-user.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/include/mtd/jffs2-user.h 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,35 @@ -+/* -+ * $Id: jffs2-user.h,v 1.1 2004/05/05 11:57:54 dwmw2 Exp $ -+ * -+ * JFFS2 definitions for use in user space only -+ */ -+ -+#ifndef __JFFS2_USER_H__ -+#define __JFFS2_USER_H__ -+ -+/* This file is blessed for inclusion by userspace */ -+#include <linux/jffs2.h> -+#include <endian.h> -+#include <byteswap.h> -+ -+#undef cpu_to_je16 -+#undef cpu_to_je32 -+#undef cpu_to_jemode -+#undef je16_to_cpu -+#undef je32_to_cpu -+#undef jemode_to_cpu -+ -+extern int target_endian; -+ -+#define t16(x) ({ uint16_t __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_16(__b); }) -+#define t32(x) ({ uint32_t __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_32(__b); }) -+ -+#define cpu_to_je16(x) ((jint16_t){t16(x)}) -+#define cpu_to_je32(x) ((jint32_t){t32(x)}) -+#define cpu_to_jemode(x) ((jmode_t){t32(x)}) -+ -+#define je16_to_cpu(x) (t16((x).v16)) -+#define je32_to_cpu(x) (t32((x).v32)) -+#define jemode_to_cpu(x) (t32((x).m)) -+ -+#endif /* __JFFS2_USER_H__ */ -Index: linux-2.6.5/include/mtd/mtd-abi.h -=================================================================== ---- linux-2.6.5.orig/include/mtd/mtd-abi.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/include/mtd/mtd-abi.h 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,102 @@ -+/* -+ * $Id: mtd-abi.h,v 1.6 2004/08/09 13:38:30 dwmw2 Exp $ -+ * -+ * Portions of MTD ABI definition which are shared by kernel and user space -+ */ -+ -+#ifndef __MTD_ABI_H__ -+#define __MTD_ABI_H__ -+ -+#ifndef __KERNEL__ /* Urgh. The whole point of splitting this out into -+ separate files was to avoid #ifdef __KERNEL__ */ -+#define __user -+#endif -+ -+struct erase_info_user { -+ uint32_t start; -+ uint32_t length; -+}; -+ -+struct mtd_oob_buf { -+ uint32_t start; -+ uint32_t length; -+ unsigned char __user *ptr; -+}; -+ -+#define MTD_ABSENT 0 -+#define MTD_RAM 1 -+#define MTD_ROM 2 -+#define MTD_NORFLASH 3 -+#define MTD_NANDFLASH 4 -+#define MTD_PEROM 5 -+#define MTD_OTHER 14 -+#define MTD_UNKNOWN 15 -+ -+#define MTD_CLEAR_BITS 1 // Bits can be cleared (flash) -+#define MTD_SET_BITS 2 // Bits can be set -+#define MTD_ERASEABLE 4 // Has an erase function -+#define MTD_WRITEB_WRITEABLE 8 // Direct IO is possible -+#define MTD_VOLATILE 16 // Set for RAMs -+#define MTD_XIP 32 // eXecute-In-Place possible -+#define MTD_OOB 64 // Out-of-band data (NAND flash) -+#define MTD_ECC 128 // Device capable of automatic ECC -+ -+// Some common devices / combinations of capabilities -+#define MTD_CAP_ROM 0 -+#define MTD_CAP_RAM (MTD_CLEAR_BITS|MTD_SET_BITS|MTD_WRITEB_WRITEABLE) -+#define MTD_CAP_NORFLASH (MTD_CLEAR_BITS|MTD_ERASEABLE) -+#define MTD_CAP_NANDFLASH (MTD_CLEAR_BITS|MTD_ERASEABLE|MTD_OOB) -+#define MTD_WRITEABLE (MTD_CLEAR_BITS|MTD_SET_BITS) -+ -+ -+// Types of automatic ECC/Checksum available -+#define MTD_ECC_NONE 0 // No automatic ECC available -+#define MTD_ECC_RS_DiskOnChip 1 // Automatic ECC on DiskOnChip -+#define MTD_ECC_SW 2 // SW ECC for Toshiba & Samsung devices -+ -+/* ECC byte placement */ -+#define MTD_NANDECC_OFF 0 // Switch off ECC (Not recommended) -+#define MTD_NANDECC_PLACE 1 // Use the given placement in the structure (YAFFS1 legacy mode) -+#define MTD_NANDECC_AUTOPLACE 2 // Use the default placement scheme -+#define MTD_NANDECC_PLACEONLY 3 // Use the given placement in the structure (Do not store ecc result on read) -+ -+struct mtd_info_user { -+ uint8_t type; -+ uint32_t flags; -+ uint32_t size; // Total size of the MTD -+ uint32_t erasesize; -+ uint32_t oobblock; // Size of OOB blocks (e.g. 512) -+ uint32_t oobsize; // Amount of OOB data per block (e.g. 16) -+ uint32_t ecctype; -+ uint32_t eccsize; -+}; -+ -+struct region_info_user { -+ uint32_t offset; /* At which this region starts, -+ * from the beginning of the MTD */ -+ uint32_t erasesize; /* For this region */ -+ uint32_t numblocks; /* Number of blocks in this region */ -+ uint32_t regionindex; -+}; -+ -+#define MEMGETINFO _IOR('M', 1, struct mtd_info_user) -+#define MEMERASE _IOW('M', 2, struct erase_info_user) -+#define MEMWRITEOOB _IOWR('M', 3, struct mtd_oob_buf) -+#define MEMREADOOB _IOWR('M', 4, struct mtd_oob_buf) -+#define MEMLOCK _IOW('M', 5, struct erase_info_user) -+#define MEMUNLOCK _IOW('M', 6, struct erase_info_user) -+#define MEMGETREGIONCOUNT _IOR('M', 7, int) -+#define MEMGETREGIONINFO _IOWR('M', 8, struct region_info_user) -+#define MEMSETOOBSEL _IOW('M', 9, struct nand_oobinfo) -+#define MEMGETOOBSEL _IOR('M', 10, struct nand_oobinfo) -+#define MEMGETBADBLOCK _IOW('M', 11, loff_t) -+#define MEMSETBADBLOCK _IOW('M', 12, loff_t) -+ -+struct nand_oobinfo { -+ uint32_t useecc; -+ uint32_t eccbytes; -+ uint32_t oobfree[8][2]; -+ uint32_t eccpos[32]; -+}; -+ -+#endif /* __MTD_ABI_H__ */ -Index: linux-2.6.5/include/mtd/mtd-user.h -=================================================================== ---- linux-2.6.5.orig/include/mtd/mtd-user.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/include/mtd/mtd-user.h 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,20 @@ -+/* -+ * $Id: mtd-user.h,v 1.2 2004/05/05 14:44:57 dwmw2 Exp $ -+ * -+ * MTD ABI header for use by user space only. -+ */ -+ -+#ifndef __MTD_USER_H__ -+#define __MTD_USER_H__ -+ -+#include <stdint.h> -+ -+/* This file is blessed for inclusion by userspace */ -+#include <mtd/mtd-abi.h> -+ -+typedef struct mtd_info_user mtd_info_t; -+typedef struct erase_info_user erase_info_t; -+typedef struct region_info_user region_info_t; -+typedef struct nand_oobinfo nand_oobinfo_t; -+ -+#endif /* __MTD_USER_H__ */ -Index: linux-2.6.5/include/mtd/nftl-user.h -=================================================================== ---- linux-2.6.5.orig/include/mtd/nftl-user.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/include/mtd/nftl-user.h 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,76 @@ -+/* -+ * $Id: nftl-user.h,v 1.1 2004/05/05 14:44:57 dwmw2 Exp $ -+ * -+ * Parts of NFTL headers shared with userspace -+ * -+ */ -+ -+#ifndef __MTD_NFTL_USER_H__ -+#define __MTD_NFTL_USER_H__ -+ -+/* Block Control Information */ -+ -+struct nftl_bci { -+ unsigned char ECCSig[6]; -+ uint8_t Status; -+ uint8_t Status1; -+}__attribute__((packed)); -+ -+/* Unit Control Information */ -+ -+struct nftl_uci0 { -+ uint16_t VirtUnitNum; -+ uint16_t ReplUnitNum; -+ uint16_t SpareVirtUnitNum; -+ uint16_t SpareReplUnitNum; -+} __attribute__((packed)); -+ -+struct nftl_uci1 { -+ uint32_t WearInfo; -+ uint16_t EraseMark; -+ uint16_t EraseMark1; -+} __attribute__((packed)); -+ -+struct nftl_uci2 { -+ uint16_t FoldMark; -+ uint16_t FoldMark1; -+ uint32_t unused; -+} __attribute__((packed)); -+ -+union nftl_uci { -+ struct nftl_uci0 a; -+ struct nftl_uci1 b; -+ struct nftl_uci2 c; -+}; -+ -+struct nftl_oob { -+ struct nftl_bci b; -+ union nftl_uci u; -+}; -+ -+/* NFTL Media Header */ -+ -+struct NFTLMediaHeader { -+ char DataOrgID[6]; -+ uint16_t NumEraseUnits; -+ uint16_t FirstPhysicalEUN; -+ uint32_t FormattedSize; -+ unsigned char UnitSizeFactor; -+} __attribute__((packed)); -+ -+#define MAX_ERASE_ZONES (8192 - 512) -+ -+#define ERASE_MARK 0x3c69 -+#define SECTOR_FREE 0xff -+#define SECTOR_USED 0x55 -+#define SECTOR_IGNORE 0x11 -+#define SECTOR_DELETED 0x00 -+ -+#define FOLD_MARK_IN_PROGRESS 0x5555 -+ -+#define ZONE_GOOD 0xff -+#define ZONE_BAD_ORIGINAL 0 -+#define ZONE_BAD_MARKED 7 -+ -+ -+#endif /* __MTD_NFTL_USER_H__ */ -Index: linux-2.6.5/lib/Kconfig -=================================================================== ---- linux-2.6.5.orig/lib/Kconfig 2005-02-01 16:55:08.000000000 -0500 -+++ linux-2.6.5/lib/Kconfig 2005-02-01 17:11:17.000000000 -0500 -@@ -24,5 +24,11 @@ - config ZLIB_DEFLATE - tristate - -+# -+# reed solomon support is select'ed if needed -+# -+config REED_SOLOMON -+ tristate -+ - endmenu - -Index: linux-2.6.5/lib/Kconfig.orig -=================================================================== ---- linux-2.6.5.orig/lib/Kconfig.orig 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/lib/Kconfig.orig 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,28 @@ -+# -+# Library configuration -+# -+ -+menu "Library routines" -+ -+config CRC32 -+ tristate "CRC32 functions" -+ help -+ This option is provided for the case where no in-kernel-tree -+ modules require CRC32 functions, but a module built outside the -+ kernel tree does. Such modules that use library CRC32 functions -+ require M here. -+ -+config QSORT -+ bool "Quick Sort" -+ -+# -+# compression support is select'ed if needed -+# -+config ZLIB_INFLATE -+ tristate -+ -+config ZLIB_DEFLATE -+ tristate -+ -+endmenu -+ -Index: linux-2.6.5/lib/Makefile -=================================================================== ---- linux-2.6.5.orig/lib/Makefile 2005-02-01 16:55:56.000000000 -0500 -+++ linux-2.6.5/lib/Makefile 2005-02-01 17:11:17.000000000 -0500 -@@ -23,6 +23,7 @@ - - obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ - obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ -+obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ - - host-progs := gen_crc32table - clean-files := crc32table.h -Index: linux-2.6.5/lib/reed_solomon/Makefile -=================================================================== ---- linux-2.6.5.orig/lib/reed_solomon/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/lib/reed_solomon/Makefile 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,6 @@ -+# -+# This is a modified version of reed solomon lib, -+# -+ -+obj-$(CONFIG_REED_SOLOMON) += reed_solomon.o -+ -Index: linux-2.6.5/lib/reed_solomon/decode_rs.c -=================================================================== ---- linux-2.6.5.orig/lib/reed_solomon/decode_rs.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/lib/reed_solomon/decode_rs.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,225 @@ -+/* -+ * lib/reed_solomon/decode_rs.c -+ * -+ * Overview: -+ * Generic Reed Solomon encoder / decoder library -+ * -+ * Copyright 2002, Phil Karn, KA9Q -+ * May be used under the terms of the GNU General Public License (GPL) -+ * -+ * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de) -+ * -+ * $Id: decode_rs.c,v 1.1 2004/09/16 23:58:56 gleixner Exp $ -+ * -+ */ -+ -+/* Generic data witdh independend code which is included by the -+ * wrappers. -+ */ -+{ -+ int deg_lambda, el, deg_omega; -+ int i, j, r, k, PAD; -+ uint16_t u, q, tmp, num1, num2, den, discr_r, syn_error; -+ /* Err+Eras Locator poly and syndrome poly */ -+ uint16_t lambda[NROOTS + 1], syn[NROOTS]; -+ uint16_t b[NROOTS + 1], t[NROOTS + 1], omega[NROOTS + 1]; -+ uint16_t root[NROOTS], reg[NROOTS + 1], loc[NROOTS]; -+ int count = 0; -+ uint16_t msk = (uint16_t) rs->nn; -+ -+ /* Check length parameter for validity */ -+ PAD = NN - NROOTS - len; -+ if (PAD < 0 || PAD >= NN) -+ return -ERANGE; -+ -+ /* The caller does not provide the syndrome */ -+ if (s == NULL) { -+ /* form the syndromes; i.e., evaluate data(x) at roots of g(x) */ -+ for (i = 0; i < NROOTS; i++) -+ syn[i] = (((uint16_t) data[0]) ^ invmsk) & msk; -+ -+ for (j = 1; j < len; j++) { -+ for (i = 0; i < NROOTS; i++) { -+ if (syn[i] == 0) { -+ syn[i] = (((uint16_t) data[j]) ^ invmsk) & msk; -+ } else { -+ syn[i] = ((((uint16_t) data[j]) ^ invmsk) & msk) ^ ALPHA_TO[MODNN(INDEX_OF[syn[i]] + (FCR+i)*PRIM)]; -+ } -+ } -+ } -+ -+ for (j = 0; j < NROOTS; j++) { -+ for (i = 0; i < NROOTS; i++) { -+ if (syn[i] == 0) { -+ syn[i] = ((uint16_t) par[j]) & msk; -+ } else { -+ syn[i] = (((uint16_t) par[j]) & msk) ^ ALPHA_TO[MODNN(INDEX_OF[syn[i]] + (FCR+i)*PRIM)]; -+ } -+ } -+ } -+ s = syn; -+ } -+ -+ /* Convert syndromes to index form, checking for nonzero condition */ -+ syn_error = 0; -+ for (i = 0; i < NROOTS; i++) { -+ syn_error |= s[i]; -+ s[i] = INDEX_OF[s[i]]; -+ } -+ -+ if (!syn_error) { -+ /* if syndrome is zero, data[] is a codeword and there are no -+ * errors to correct. So return data[] unmodified -+ */ -+ count = 0; -+ goto finish; -+ } -+ memset (&lambda[1], 0, NROOTS * sizeof (lambda[0])); -+ lambda[0] = 1; -+ -+ if (no_eras > 0) { -+ /* Init lambda to be the erasure locator polynomial */ -+ lambda[1] = ALPHA_TO[MODNN (PRIM * (NN - 1 - eras_pos[0]))]; -+ for (i = 1; i < no_eras; i++) { -+ u = MODNN (PRIM * (NN - 1 - eras_pos[i])); -+ for (j = i + 1; j > 0; j--) { -+ tmp = INDEX_OF[lambda[j - 1]]; -+ if (tmp != A0) -+ lambda[j] ^= ALPHA_TO[MODNN (u + tmp)]; -+ } -+ } -+ } -+ -+ for (i = 0; i < NROOTS + 1; i++) -+ b[i] = INDEX_OF[lambda[i]]; -+ -+ /* -+ * Begin Berlekamp-Massey algorithm to determine error+erasure -+ * locator polynomial -+ */ -+ r = no_eras; -+ el = no_eras; -+ while (++r <= NROOTS) { /* r is the step number */ -+ /* Compute discrepancy at the r-th step in poly-form */ -+ discr_r = 0; -+ for (i = 0; i < r; i++) { -+ if ((lambda[i] != 0) && (s[r - i - 1] != A0)) { -+ discr_r ^= ALPHA_TO[MODNN (INDEX_OF[lambda[i]] + s[r - i - 1])]; -+ } -+ } -+ discr_r = INDEX_OF[discr_r]; /* Index form */ -+ if (discr_r == A0) { -+ /* 2 lines below: B(x) <-- x*B(x) */ -+ memmove (&b[1], b, NROOTS * sizeof (b[0])); -+ b[0] = A0; -+ } else { -+ /* 7 lines below: T(x) <-- lambda(x) - discr_r*x*b(x) */ -+ t[0] = lambda[0]; -+ for (i = 0; i < NROOTS; i++) { -+ if (b[i] != A0) -+ t[i + 1] = lambda[i + 1] ^ ALPHA_TO[MODNN (discr_r + b[i])]; -+ else -+ t[i + 1] = lambda[i + 1]; -+ } -+ if (2 * el <= r + no_eras - 1) { -+ el = r + no_eras - el; -+ /* -+ * 2 lines below: B(x) <-- inv(discr_r) * -+ * lambda(x) -+ */ -+ for (i = 0; i <= NROOTS; i++) -+ b[i] = (lambda[i] == 0) ? A0 : MODNN (INDEX_OF[lambda[i]] - discr_r + NN); -+ } else { -+ /* 2 lines below: B(x) <-- x*B(x) */ -+ memmove (&b[1], b, NROOTS * sizeof (b[0])); -+ b[0] = A0; -+ } -+ memcpy (lambda, t, (NROOTS + 1) * sizeof (t[0])); -+ } -+ } -+ -+ /* Convert lambda to index form and compute deg(lambda(x)) */ -+ deg_lambda = 0; -+ for (i = 0; i < NROOTS + 1; i++) { -+ lambda[i] = INDEX_OF[lambda[i]]; -+ if (lambda[i] != A0) -+ deg_lambda = i; -+ } -+ /* Find roots of the error+erasure locator polynomial by Chien search */ -+ memcpy (®[1], &lambda[1], NROOTS * sizeof (reg[0])); -+ count = 0; /* Number of roots of lambda(x) */ -+ for (i = 1, k = IPRIM - 1; i <= NN; i++, k = MODNN (k + IPRIM)) { -+ q = 1; /* lambda[0] is always 0 */ -+ for (j = deg_lambda; j > 0; j--) { -+ if (reg[j] != A0) { -+ reg[j] = MODNN (reg[j] + j); -+ q ^= ALPHA_TO[reg[j]]; -+ } -+ } -+ if (q != 0) -+ continue; /* Not a root */ -+ /* store root (index-form) and error location number */ -+ root[count] = i; -+ loc[count] = k; -+ /* If we've already found max possible roots, -+ * abort the search to save time -+ */ -+ if (++count == deg_lambda) -+ break; -+ } -+ if (deg_lambda != count) { -+ /* -+ * deg(lambda) unequal to number of roots => uncorrectable -+ * error detected -+ */ -+ count = -1; -+ goto finish; -+ } -+ /* -+ * Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo -+ * x**NROOTS). in index form. Also find deg(omega). -+ */ -+ deg_omega = deg_lambda - 1; -+ for (i = 0; i <= deg_omega; i++) { -+ tmp = 0; -+ for (j = i; j >= 0; j--) { -+ if ((s[i - j] != A0) && (lambda[j] != A0)) -+ tmp ^= -+ ALPHA_TO[MODNN (s[i - j] + lambda[j])]; -+ } -+ omega[i] = INDEX_OF[tmp]; -+ } -+ -+ /* -+ * Compute error values in poly-form. num1 = omega(inv(X(l))), num2 = -+ * inv(X(l))**(FCR-1) and den = lambda_pr(inv(X(l))) all in poly-form -+ */ -+ for (j = count - 1; j >= 0; j--) { -+ num1 = 0; -+ for (i = deg_omega; i >= 0; i--) { -+ if (omega[i] != A0) -+ num1 ^= ALPHA_TO[MODNN (omega[i] + i * root[j])]; -+ } -+ num2 = ALPHA_TO[MODNN (root[j] * (FCR - 1) + NN)]; -+ den = 0; -+ -+ /* lambda[i+1] for i even is the formal derivative lambda_pr of lambda[i] */ -+ for (i = min (deg_lambda, NROOTS - 1) & ~1; i >= 0; i -= 2) { -+ if (lambda[i + 1] != A0) -+ den ^= ALPHA_TO[MODNN (lambda[i + 1] + i * root[j])]; -+ } -+ /* Apply error to data */ -+ if (num1 != 0 && loc[j] >= PAD) { -+ uint16_t cor = ALPHA_TO[MODNN (INDEX_OF[num1] + INDEX_OF[num2] + NN - INDEX_OF[den])]; -+ data[loc[j] - PAD] ^= cor ^ invmsk; -+ } -+ } -+ -+finish: -+ if (eras_pos != NULL) { -+ for (i = 0; i < count; i++) -+ eras_pos[i] = loc[i] - PAD; -+ } -+ return count; -+ -+} -Index: linux-2.6.5/lib/reed_solomon/encode_rs.c -=================================================================== ---- linux-2.6.5.orig/lib/reed_solomon/encode_rs.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/lib/reed_solomon/encode_rs.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,47 @@ -+/* -+ * lib/reed_solomon/encode_rs.c -+ * -+ * Overview: -+ * Generic Reed Solomon encoder / decoder library -+ * -+ * Copyright 2002, Phil Karn, KA9Q -+ * May be used under the terms of the GNU General Public License (GPL) -+ * -+ * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de) -+ * -+ * $Id: encode_rs.c,v 1.1 2004/09/16 23:58:56 gleixner Exp $ -+ * -+ */ -+ -+/* Generic data witdh independend code which is included by the -+ * wrappers. -+ * int encode_rsX (struct rs_control *rs, uintX_t *data, int len, uintY_t *par) -+ */ -+{ -+ int i, j, pad; -+ uint16_t feedback; -+ uint16_t msk = (uint16_t) NN; -+ -+ /* Check length parameter for validity */ -+ pad = NN - NROOTS - len; -+ if (pad < 0 || pad >= NN) -+ return -ERANGE; -+ -+ memset (par, 0, NROOTS * sizeof (uint16_t)); -+ -+ for (i = 0; i < len; i++) { -+ feedback = INDEX_OF[((((uint16_t) data[i])^invmsk) & msk) ^ par[0]]; -+ /* feedback term is non-zero */ -+ if (feedback != A0) { -+ for (j = 1; j < NROOTS; j++) -+ par[j] ^= ALPHA_TO[MODNN (feedback + GENPOLY[NROOTS - j])]; -+ } -+ /* Shift */ -+ memmove (&par[0], &par[1], sizeof (uint16_t) * (NROOTS - 1)); -+ if (feedback != A0) -+ par[NROOTS - 1] = ALPHA_TO[MODNN (feedback + GENPOLY[0])]; -+ else -+ par[NROOTS - 1] = 0; -+ } -+ return 0; -+} -Index: linux-2.6.5/lib/reed_solomon/rslib.c -=================================================================== ---- linux-2.6.5.orig/lib/reed_solomon/rslib.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5/lib/reed_solomon/rslib.c 2005-02-01 17:11:17.000000000 -0500 -@@ -0,0 +1,366 @@ -+/* -+ * lib/reed_solomon/lib_rs.c -+ * -+ * Overview: -+ * Generic Reed Solomon encoder / decoder library -+ * -+ * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de) -+ * -+ * Reed Solomon code lifted from reed solomon library written by Phil Karn -+ * Copyright 2002 Phil Karn, KA9Q -+ * -+ * $Id: rslib.c,v 1.1 2004/09/16 23:58:56 gleixner Exp $ -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * Description: -+ * -+ * The generic Reed Solomon library provides runtime configurable -+ * encoding / decoding of RS codes. -+ * Each user must call init_rs to get a pointer to a rs_control -+ * structure for the given rs parameters. This structure is either -+ * generated or a already available matching control structure is used. -+ * If a structure is generated then the polynominal arrays for -+ * fast encoding / decoding are built. This can take some time so -+ * make sure not to call this function from a timecritical path. -+ * Usually a module / driver should initialize the neccecary -+ * rs_control structure on module / driver init and release it -+ * on exit. -+ * The encoding puts the calculated syndrome into a given syndrom -+ * buffer. -+ * The decoding is a two step process. The first step calculates -+ * the syndrome over the received (data + syndrom) and calls the -+ * second stage, which does the decoding / error correction itself. -+ * Many hw encoders provide a syndrom calculation over the received -+ * data + syndrom and can call the second stage directly. -+ * -+ */ -+ -+#include <linux/errno.h> -+#include <linux/kernel.h> -+#include <linux/init.h> -+#include <linux/module.h> -+#include <linux/rslib.h> -+#include <linux/slab.h> -+#include <asm/semaphore.h> -+ -+/* This list holds all currently allocated rs control structures */ -+static LIST_HEAD (rslist); -+/* Protection for the list */ -+static DECLARE_MUTEX(rslistlock); -+ -+/** -+ * rs_init - Initialize a Reed-Solomon codec -+ * -+ * @symsize: symbol size, bits (1-8) -+ * @gfpoly: Field generator polynomial coefficients -+ * @fcr: first root of RS code generator polynomial, index form -+ * @prim: primitive element to generate polynomial roots -+ * @nroots: RS code generator polynomial degree (number of roots) -+ * -+ * Allocate a control structure and the polynom arrays for faster -+ * en/decoding. Fill the arrays according to the given parameters -+ */ -+static struct rs_control *rs_init (int symsize, int gfpoly, int fcr, int prim, int nroots) -+{ -+ struct rs_control *rs; -+ int i, j, sr, root, iprim; -+ -+ /* Allocate the control structure */ -+ rs = (struct rs_control *) kmalloc (sizeof (struct rs_control), GFP_KERNEL); -+ if (rs == NULL) -+ return NULL; -+ -+ INIT_LIST_HEAD(&rs->list); -+ -+ rs->mm = symsize; -+ rs->nn = (1 << symsize) - 1; -+ rs->fcr = fcr; -+ rs->prim = prim; -+ rs->nroots = nroots; -+ rs->gfpoly = gfpoly; -+ -+ /* Allocate the arrays */ -+ rs->alpha_to = (uint16_t *) kmalloc (sizeof (uint16_t) * (rs->nn + 1), GFP_KERNEL); -+ if (rs->alpha_to == NULL) -+ goto errrs; -+ -+ rs->index_of = (uint16_t *) kmalloc (sizeof (uint16_t) * (rs->nn + 1), GFP_KERNEL); -+ if (rs->index_of == NULL) -+ goto erralp; -+ -+ rs->genpoly = (uint16_t *) kmalloc (sizeof (uint16_t) * (rs->nroots + 1), GFP_KERNEL); -+ if (rs->genpoly == NULL) -+ goto erridx; -+ -+ /* Generate Galois field lookup tables */ -+ rs->index_of[0] = rs->nn; /* log(zero) = -inf */ -+ rs->alpha_to[rs->nn] = 0; /* alpha**-inf = 0 */ -+ sr = 1; -+ for (i = 0; i < rs->nn; i++) { -+ rs->index_of[sr] = i; -+ rs->alpha_to[i] = sr; -+ sr <<= 1; -+ if (sr & (1 << symsize)) -+ sr ^= gfpoly; -+ sr &= rs->nn; -+ } -+ /* If it's not primitive, exit */ -+ if (sr != 1) -+ goto errpol; -+ -+ /* Find prim-th root of 1, used in decoding */ -+ for (iprim = 1; (iprim % prim) != 0; iprim += rs->nn); -+ /* prim-th root of 1, index form */ -+ rs->iprim = iprim / prim; -+ -+ /* Form RS code generator polynomial from its roots */ -+ rs->genpoly[0] = 1; -+ for (i = 0, root = fcr * prim; i < nroots; i++, root += prim) { -+ rs->genpoly[i + 1] = 1; -+ -+ /* Multiply rs->genpoly[] by @**(root + x) */ -+ for (j = i; j > 0; j--) { -+ if (rs->genpoly[j] != 0) -+ rs->genpoly[j] = rs->genpoly[j -1] ^ rs->alpha_to[(rs->index_of[rs->genpoly[j]] + root) % rs->nn]; -+ else -+ rs->genpoly[j] = rs->genpoly[j - 1]; -+ } -+ /* rs->genpoly[0] can never be zero */ -+ rs->genpoly[0] = rs->alpha_to[(rs->index_of[rs->genpoly[0]] + root) % rs->nn]; -+ } -+ /* convert rs->genpoly[] to index form for quicker encoding */ -+ for (i = 0; i <= nroots; i++) -+ rs->genpoly[i] = rs->index_of[rs->genpoly[i]]; -+ return rs; -+ -+ /* Error exit */ -+errpol: -+ kfree (rs->genpoly); -+erridx: -+ kfree (rs->index_of); -+erralp: -+ kfree (rs->alpha_to); -+errrs: -+ kfree (rs); -+ return NULL; -+} -+ -+ -+/** -+ * free_rs - Free the rs control structure, if its not longer used -+ * -+ * @rs: the control structure which is not longer used by the -+ * caller -+ */ -+void free_rs (struct rs_control *rs) -+{ -+ down (&rslistlock); -+ rs->users--; -+ if (!rs->users) { -+ list_del (&rs->list); -+ kfree (rs->alpha_to); -+ kfree (rs->index_of); -+ kfree (rs->genpoly); -+ kfree (rs); -+ } -+ up (&rslistlock); -+} -+ -+/** -+ * init_rs - Find a matching or allocate a new rs control structure -+ * -+ * @symsize: the symbol size (number of bits) -+ * @gfpoly: the extended Galois field generator polynomial coefficients, -+ * with the 0th coefficient in the low order bit. The polynomial -+ * must be primitive; -+ * @fcr: the first consecutive root of the rs code generator polynomial -+ * in index form -+ * @prim: primitive element to generate polynomial roots -+ * @nroots: RS code generator polynomial degree (number of roots) -+ */ -+struct rs_control *init_rs (int symsize, int gfpoly, int fcr, int prim, int nroots) -+{ -+ struct list_head *tmp; -+ struct rs_control *rs; -+ -+ /* Sanity checks */ -+ if (symsize < 1) -+ return NULL; -+ if (fcr < 0 || fcr >= (1<<symsize)) -+ return NULL; -+ if (prim <= 0 || prim >= (1<<symsize)) -+ return NULL; -+ if (nroots < 0 || nroots >= (1<<symsize)) -+ return NULL; -+ -+ down (&rslistlock); -+ -+#ifdef __KERNEL__ -+ /* Walk through the list and look for a matching entry */ -+ list_for_each (tmp, &rslist) { -+ rs = list_entry (tmp, struct rs_control, list); -+ if (symsize != rs->mm) -+ continue; -+ if (gfpoly != rs->gfpoly) -+ continue; -+ if (fcr != rs->fcr) -+ continue; -+ if (prim != rs->prim) -+ continue; -+ if (nroots != rs->nroots) -+ continue; -+ /* We have a matching one already */ -+ rs->users++; -+ goto out; -+ } -+#endif -+ -+ /* Create a new one */ -+ rs = rs_init (symsize, gfpoly, fcr, prim, nroots); -+ if (rs) { -+ rs->users = 1; -+ list_add (&rs->list, &rslist); -+ } -+out: -+ up (&rslistlock); -+ return rs; -+} -+ -+/** -+ * encode_rs8 - Calculate the parity for data values (8bit data width) -+ * -+ * @rs: the rs control structure -+ * @data: data field of a given type -+ * @len: data length -+ * @par: parity data field -+ * @invmsk: invert data mask -+ * -+ * The parity uses a uint16_t data type to enable -+ * symbol size > 8. The calling code must take care of encoding of the -+ * syndrome result for storage itself. -+ */ -+int encode_rs8 (struct rs_control *rs, uint8_t *data, int len, uint16_t *par, uint16_t invmsk) -+{ -+#define RSINVMSK 0xFF -+#include "encode_rs.c" -+} -+ -+/** -+ * decode_rs8 - Decode codeword (8bit data width) -+ * -+ * @rs: the rs control structure -+ * @data: data field of a given type -+ * @par: received parity data field -+ * @len: data length -+ * @s: syndrome data field (if NULL, syndrome must be calculated) -+ * @no_eras: number of erasures -+ * @eras_pos: position of erasures, can be NULL -+ * @invmsk: invert data mask -+ * -+ * The syndrome and parity uses a uint16_t data type to enable -+ * symbol size > 8. The calling code must take care of decoding of the -+ * syndrome result and the received parity before calling this code. -+ */ -+int decode_rs8 (struct rs_control *rs, uint8_t *data, uint16_t *par, -+ int len, uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk) -+{ -+#include "decode_rs.c" -+} -+ -+/** -+ * encode_rs16 - Calculate the parity for data values (16bit data width) -+ * -+ * @rs: the rs control structure -+ * @data: data field of a given type -+ * @len: data length -+ * @par: parity data field -+ * @invmsk: invert data mask -+ * -+ * Each field in the data array contains up to symbol size bits of valid data. -+ */ -+int encode_rs16 (struct rs_control *rs, uint16_t *data, int len, uint16_t *par, uint16_t invmsk) -+{ -+#undef RSINVMSK -+#define RSINVMSK 0xFFFF -+#include "encode_rs.c" -+} -+ -+/** -+ * decode_rs16 - Decode codeword (16bit data width) -+ * -+ * @rs: the rs control structure -+ * @data: data field of a given type -+ * @par: received parity data field -+ * @len: data length -+ * @s: syndrome data field (if NULL, syndrome must be calculated) -+ * @no_eras: number of erasures -+ * @eras_pos: position of erasures, can be NULL -+ * @invmsk: invert data mask -+ * -+ * Each field in the data array contains up to symbol size bits of valid data. -+ */ -+int decode_rs16 (struct rs_control *rs, uint16_t *data, uint16_t *par, -+ int len, uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk) -+{ -+#include "decode_rs.c" -+} -+ -+/** -+ * encode_rs32 - Calculate the parity for data values (32bit data width) -+ * -+ * @rs: the rs control structure -+ * @data: data field of a given type -+ * @len: data length -+ * @par: parity data field -+ * @invmsk: invert data mask -+ * -+ * The parity uses a uint16_t data type due to the fact that -+ * we can't handle symbol size >= 16 bit as the polynominal arrays would -+ * be to large and the computation would be extreme slow. -+ * Each field in the data array contains up to symbol size bits of data. -+ */ -+int encode_rs32 (struct rs_control *rs, uint32_t *data, int len, uint16_t *par, uint16_t invmsk) -+{ -+#include "encode_rs.c" -+} -+ -+/** -+ * decode_rs32 - Decode codeword (32bit data width) -+ * -+ * @rs: the rs control structure -+ * @data: data field of a given type -+ * @par: received parity data field -+ * @len: data length -+ * @s: syndrome data field (if NULL, syndrome must be calculated) -+ * @no_eras: number of erasures -+ * @eras_pos: position of erasures, can be NULL -+ * @invmsk: invert data mask -+ * -+ * The syndrome and parity use a uint16_t data type due to the fact that -+ * we can't handle symbol size > 16 as the polynominal arrays would be to -+ * large and the computation would be extreme slow. The calling code must -+ * take care of decoding of the syndrome result and the received parity -+ * before calling this code. -+ * Each field in the data array contains up to symbol size bits of data. -+ */ -+int decode_rs32 (struct rs_control *rs, uint32_t *data, uint16_t *par, -+ int len, uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk) -+{ -+#include "decode_rs.c" -+} -+ -+EXPORT_SYMBOL(encode_rs8); -+EXPORT_SYMBOL(encode_rs16); -+EXPORT_SYMBOL(encode_rs32); -+EXPORT_SYMBOL(decode_rs8); -+EXPORT_SYMBOL(decode_rs16); -+EXPORT_SYMBOL(decode_rs32); -+EXPORT_SYMBOL(init_rs); -+EXPORT_SYMBOL(free_rs); -+ -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("Reed Solomon encoder/decoder"); -+MODULE_AUTHOR("Phil Karn, Thomas Gleixner"); diff --git a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-rhel4.patch b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-rhel4.patch deleted file mode 100644 index 4707842f3a..0000000000 --- a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-rhel4.patch +++ /dev/null @@ -1,123 +0,0 @@ -diff -urp a/fs/cifs/dir.c b/fs/cifs/dir.c ---- a/fs/cifs/dir.c 2006-03-10 18:50:15.000000000 -0800 -+++ b/fs/cifs/dir.c 2006-03-10 18:50:44.000000000 -0800 -@@ -146,23 +146,23 @@ cifs_create(struct inode *inode, struct - } - - if(nd) { -- if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY) -+ if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY) - desiredAccess = GENERIC_READ; -- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) { -+ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY) { - desiredAccess = GENERIC_WRITE; - write_only = TRUE; -- } else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) { -+ } else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) { - /* GENERIC_ALL is too much permission to request */ - /* can cause unnecessary access denied on create */ - /* desiredAccess = GENERIC_ALL; */ - desiredAccess = GENERIC_READ | GENERIC_WRITE; - } - -- if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) -+ if((nd->intent.it_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) - disposition = FILE_CREATE; -- else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) -+ else if((nd->intent.it_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) - disposition = FILE_OVERWRITE_IF; -- else if((nd->intent.open.flags & O_CREAT) == O_CREAT) -+ else if((nd->intent.it_flags & O_CREAT) == O_CREAT) - disposition = FILE_OPEN_IF; - else { - cFYI(1,("Create flag not set in create function")); -diff -urp a/fs/nfs/dir.c b/fs/nfs/dir.c ---- a/fs/nfs/dir.c 2006-03-10 19:07:50.000000000 -0800 -+++ b/fs/nfs/dir.c 2006-03-10 17:27:15.000000000 -0800 -@@ -752,7 +752,7 @@ int nfs_is_exclusive_create(struct inode - return 0; - if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE)) - return 0; -- return (nd->intent.open.flags & O_EXCL) != 0; -+ return (nd->intent.it_flags & O_EXCL) != 0; - } - - static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) -@@ -827,7 +827,7 @@ static int is_atomic_open(struct inode * - if (nd->flags & LOOKUP_DIRECTORY) - return 0; - /* Are we trying to write to a read only partition? */ -- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) -+ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) - return 0; - return 1; - } -@@ -848,7 +848,7 @@ static struct dentry *nfs_atomic_lookup( - dentry->d_op = NFS_PROTO(dir)->dentry_ops; - - /* Let vfs_create() deal with O_EXCL */ -- if (nd->intent.open.flags & O_EXCL) -+ if (nd->intent.it_flags & O_EXCL) - goto no_entry; - - /* Open the file on the server */ -@@ -860,7 +860,7 @@ static struct dentry *nfs_atomic_lookup( - goto out; - } - -- if (nd->intent.open.flags & O_CREAT) { -+ if (nd->intent.it_flags & O_CREAT) { - nfs_begin_data_update(dir); - inode = nfs4_atomic_open(dir, dentry, nd); - nfs_end_data_update(dir); -@@ -876,7 +876,7 @@ static struct dentry *nfs_atomic_lookup( - break; - /* This turned out not to be a regular file */ - case -ELOOP: -- if (!(nd->intent.open.flags & O_NOFOLLOW)) -+ if (!(nd->intent.it_flags & O_NOFOLLOW)) - goto no_open; - /* case -EISDIR: */ - /* case -EINVAL: */ -@@ -915,7 +915,7 @@ static int nfs_open_revalidate(struct de - /* NFS only supports OPEN on regular files */ - if (!S_ISREG(inode->i_mode)) - goto no_open; -- openflags = nd->intent.open.flags; -+ openflags = nd->intent.it_flags; - /* We cannot do exclusive creation on a positive dentry */ - if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) - goto no_open; -@@ -1080,7 +1080,7 @@ static int nfs_create(struct inode *dir, - attr.ia_valid = ATTR_MODE; - - if (nd && (nd->flags & LOOKUP_CREATE)) -- open_flags = nd->intent.open.flags; -+ open_flags = nd->intent.it_flags; - - /* - * The 0 argument passed into the create function should one day -diff -urp a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c ---- a/fs/nfs/nfs4proc.c 2006-03-10 17:19:45.000000000 -0800 -+++ b/fs/nfs/nfs4proc.c 2006-03-10 17:19:58.000000000 -0800 -@@ -776,17 +776,17 @@ nfs4_atomic_open(struct inode *dir, stru - struct nfs4_state *state; - - if (nd->flags & LOOKUP_CREATE) { -- attr.ia_mode = nd->intent.open.create_mode; -+ attr.ia_mode = nd->intent.it_create_mode; - attr.ia_valid = ATTR_MODE; - if (!IS_POSIXACL(dir)) - attr.ia_mode &= ~current->fs->umask; - } else { - attr.ia_valid = 0; -- BUG_ON(nd->intent.open.flags & O_CREAT); -+ BUG_ON(nd->intent.it_flags & O_CREAT); - } - - cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); -- state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); -+ state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred); - put_rpccred(cred); - if (IS_ERR(state)) - return (struct inode *)state; diff --git a/lustre/kernel_patches/patches/perfctr-2.6-suse-lnxi.patch b/lustre/kernel_patches/patches/perfctr-2.6-suse-lnxi.patch deleted file mode 100644 index 0224205855..0000000000 --- a/lustre/kernel_patches/patches/perfctr-2.6-suse-lnxi.patch +++ /dev/null @@ -1,10070 +0,0 @@ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/CREDITS -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/CREDITS 2004-11-11 10:28:48.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/CREDITS 2004-11-18 20:59:11.000000000 -0500 -@@ -2522,6 +2522,7 @@ - E: mikpe@csd.uu.se - W: http://www.csd.uu.se/~mikpe/ - D: Miscellaneous fixes -+D: Performance-monitoring counters driver - - N: Reed H. Petty - E: rhp@draper.net -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/Documentation/ioctl-number.txt -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/Documentation/ioctl-number.txt 2004-04-03 22:38:18.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/Documentation/ioctl-number.txt 2004-11-18 20:59:11.000000000 -0500 -@@ -187,5 +187,7 @@ - 0xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca> - 0xCB 00-1F CBM serial IEC bus in development: - <mailto:michael.klein@puffin.lb.shuttle.de> -+0xD0 all performance counters see drivers/perfctr/ -+ <mailto:mikpe@csd.uu.se> - 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ - <mailto:aherrman@de.ibm.com> -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/MAINTAINERS -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/MAINTAINERS 2004-11-11 10:28:39.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/MAINTAINERS 2004-11-18 20:59:11.000000000 -0500 -@@ -1608,6 +1608,12 @@ - L: linux-net@vger.kernel.org - S: Supported - -+PERFORMANCE-MONITORING COUNTERS DRIVER -+P: Mikael Pettersson -+M: mikpe@csd.uu.se -+W: http://www.csd.uu.se/~mikpe/linux/perfctr/ -+S: Maintained -+ - PNP SUPPORT - P: Adam Belay - M: ambx1@neo.rr.com -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/Kconfig -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/i386/Kconfig 2004-11-11 10:28:16.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/Kconfig 2004-11-18 20:59:11.000000000 -0500 -@@ -857,6 +857,8 @@ - generate incorrect output with certain kernel constructs when - -mregparm=3 is used. - -+source "drivers/perfctr/Kconfig" -+ - endmenu - - menu "Special options" -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/kernel/entry.S -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/i386/kernel/entry.S 2004-11-11 10:28:47.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/kernel/entry.S 2004-11-18 20:59:11.000000000 -0500 -@@ -444,6 +444,16 @@ - /* The include is where all of the SMP etc. interrupts come from */ - #include "entry_arch.h" - -+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) -+ENTRY(perfctr_interrupt) -+ pushl $LOCAL_PERFCTR_VECTOR-256 -+ SAVE_ALL -+ pushl %esp -+ call smp_perfctr_interrupt -+ addl $4, %esp -+ jmp ret_from_intr -+#endif -+ - ENTRY(divide_error) - pushl $0 # no error code - pushl $do_divide_error -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/kernel/i8259.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/i386/kernel/i8259.c 2004-11-11 10:27:12.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/kernel/i8259.c 2004-11-18 20:59:11.000000000 -0500 -@@ -23,6 +23,7 @@ - #include <asm/apic.h> - #include <asm/arch_hooks.h> - #include <asm/i8259.h> -+#include <asm/perfctr.h> - - #include <linux/irq.h> - -@@ -436,6 +437,8 @@ - */ - intr_init_hook(); - -+ perfctr_vector_init(); -+ - /* - * Set the clock to HZ Hz, we already have a valid - * vector now: -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/kernel/process.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/i386/kernel/process.c 2004-11-11 10:28:16.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/kernel/process.c 2004-11-18 20:59:11.000000000 -0500 -@@ -32,6 +32,7 @@ - #include <linux/delay.h> - #include <linux/reboot.h> - #include <linux/init.h> -+#include <linux/perfctr.h> - #include <linux/mc146818rtc.h> - #include <linux/module.h> - #include <linux/kallsyms.h> -@@ -305,6 +306,7 @@ - tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; - put_cpu(); - } -+ perfctr_exit_thread(&tsk->thread); - if (tsk->thread.debugreg[7]) - dr_dec_use_count(tsk->thread.debugreg[7]); - } -@@ -371,6 +373,8 @@ - savesegment(fs,p->thread.fs); - savesegment(gs,p->thread.gs); - -+ perfctr_copy_thread(&p->thread); -+ - tsk = current; - if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { - p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); -@@ -519,6 +523,8 @@ - - /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - -+ perfctr_suspend_thread(prev); -+ - __unlazy_fpu(prev_p); - - /* -@@ -599,6 +605,9 @@ - */ - tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; - } -+ -+ perfctr_resume_thread(next); -+ - return prev_p; - } - -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/ppc/Kconfig -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/ppc/Kconfig 2004-11-11 10:28:15.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/ppc/Kconfig 2004-11-18 20:59:11.000000000 -0500 -@@ -214,6 +214,8 @@ - depends on 4xx || 8xx - default y - -+source "drivers/perfctr/Kconfig" -+ - endmenu - - menu "Platform options" -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/ppc/kernel/process.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/ppc/kernel/process.c 2004-11-11 10:28:48.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/ppc/kernel/process.c 2004-11-18 20:59:11.000000000 -0500 -@@ -37,6 +37,7 @@ - #include <linux/module.h> - #include <linux/kallsyms.h> - #include <linux/mqueue.h> -+#include <linux/perfctr.h> - - #include <asm/pgtable.h> - #include <asm/uaccess.h> -@@ -255,7 +256,9 @@ - new->thread.regs->msr |= MSR_VEC; - new_thread = &new->thread; - old_thread = ¤t->thread; -+ perfctr_suspend_thread(&prev->thread); - last = _switch(old_thread, new_thread); -+ perfctr_resume_thread(¤t->thread); - local_irq_restore(s); - return last; - } -@@ -314,6 +317,7 @@ - last_task_used_math = NULL; - if (last_task_used_altivec == current) - last_task_used_altivec = NULL; -+ perfctr_exit_thread(¤t->thread); - } - - void flush_thread(void) -@@ -400,6 +404,8 @@ - - p->thread.last_syscall = -1; - -+ perfctr_copy_thread(&p->thread); -+ - return 0; - } - -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/Kconfig -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/Kconfig 2004-11-11 10:28:39.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/Kconfig 2004-11-18 20:59:11.000000000 -0500 -@@ -319,6 +319,8 @@ - bool - default y - -+source "drivers/perfctr/Kconfig" -+ - endmenu - - -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/entry.S -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/entry.S 2004-04-03 22:36:56.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/entry.S 2004-11-18 20:59:11.000000000 -0500 -@@ -556,6 +556,11 @@ - apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt - #endif - -+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) -+ENTRY(perfctr_interrupt) -+ apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt -+#endif -+ - /* - * Exception entry points. - */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/i8259.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/i8259.c 2004-04-03 22:36:25.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/i8259.c 2004-11-18 20:59:11.000000000 -0500 -@@ -23,6 +23,7 @@ - #include <asm/delay.h> - #include <asm/desc.h> - #include <asm/apic.h> -+#include <asm/perfctr.h> - - #include <linux/irq.h> - -@@ -484,6 +485,8 @@ - set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - #endif - -+ perfctr_vector_init(); -+ - /* - * Set the clock to HZ Hz, we already have a valid - * vector now: -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/process.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/process.c 2004-11-11 10:27:51.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/process.c 2004-11-18 20:59:11.000000000 -0500 -@@ -32,6 +32,7 @@ - #include <linux/delay.h> - #include <linux/irq.h> - #include <linux/ptrace.h> -+#include <linux/perfctr.h> - #include <linux/version.h> - - #include <asm/uaccess.h> -@@ -258,6 +259,7 @@ - (init_tss + smp_processor_id())->io_bitmap_base = - INVALID_IO_BITMAP_OFFSET; - } -+ perfctr_exit_thread(&me->thread); - } - - void flush_thread(void) -@@ -361,6 +363,8 @@ - asm("movl %%es,%0" : "=m" (p->thread.es)); - asm("movl %%ds,%0" : "=m" (p->thread.ds)); - -+ perfctr_copy_thread(&p->thread); -+ - if (unlikely(me->thread.io_bitmap_ptr != NULL)) { - p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) -@@ -407,6 +411,8 @@ - int cpu = smp_processor_id(); - struct tss_struct *tss = init_tss + cpu; - -+ perfctr_suspend_thread(prev); -+ - unlazy_fpu(prev_p); - - /* -@@ -510,6 +516,8 @@ - } - } - -+ perfctr_resume_thread(next); -+ - return prev_p; - } - -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/Makefile -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/Makefile 2004-11-18 20:59:08.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/Makefile 2004-11-18 20:59:11.000000000 -0500 -@@ -51,6 +51,7 @@ - obj-$(CONFIG_MCA) += mca/ - obj-$(CONFIG_EISA) += eisa/ - obj-$(CONFIG_CPU_FREQ) += cpufreq/ -+obj-$(CONFIG_KPERFCTR) += perfctr/ - obj-$(CONFIG_INFINIBAND) += infiniband/ - obj-y += firmware/ - obj-$(CONFIG_CRASH_DUMP) += dump/ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_compat.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_64_compat.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_compat.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,25 @@ -+/* $Id: x86_64_compat.h,v 1.1 2003/05/14 21:51:57 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * x86_64-specific compatibility definitions for 2.4/2.5 kernels. -+ * -+ * Copyright (C) 2003 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#include <linux/version.h> -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+ -+/* irq_enter() and irq_exit() take two parameters in 2.4. However, -+ we only use them to disable preemption in the interrupt handler, -+ which isn't needed in non-preemptive 2.4 kernels. */ -+#ifdef CONFIG_PREEMPT -+#error "not yet ported to 2.4+PREEMPT" -+#endif -+#undef irq_enter -+#undef irq_exit -+#define irq_enter() do{}while(0) -+#define irq_exit() do{}while(0) -+ -+#endif -+ -+extern unsigned int perfctr_cpu_khz(void); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Makefile -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/Makefile 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Makefile 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,46 @@ -+# $Id: Makefile,v 1.20.2.2 2004/08/02 22:24:58 mikpe Exp $ -+# Makefile for the Performance-monitoring counters driver. -+ -+ifeq ($(VERSION)$(PATCHLEVEL),24) -+include Makefile24 -+else -+ -+# We need -fno-unit-at-a-time with gcc-3.4 on x86 to avoid stack overflow. -+# Kernels >= 2.6.6 do that automatically but older ones do not, so we -+# unconditionally add that option here just in case. -+my_check_gcc = $(shell if $(CC) $(CFLAGS) $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi ;) -+EXTRA_CFLAGS_$(CONFIG_X86) := $(call my_check_gcc,-fno-unit-at-a-time,) -+EXTRA_CFLAGS_$(CONFIG_X86_64) := -+EXTRA_CFLAGS_$(CONFIG_PPC32) := -+EXTRA_CFLAGS := $(EXTRA_CFLAGS_y) -+ -+# construct various object file lists: -+# kernel-objs-y kernel objects -+# m-objs-m perfctr.o if driver is module, empty otherwise -+# driver-objs-y objects for perfctr.o module, or empty -+ -+# This also covers x86_64. -+driver-objs-$(CONFIG_X86) := x86.o -+tests-objs-$(CONFIG_X86) := x86_tests.o -+kernel-objs-$(CONFIG_X86) := x86_setup.o -+ -+driver-objs-$(CONFIG_PPC32) := ppc.o -+tests-objs-$(CONFIG_PPC32) := ppc_tests.o -+kernel-objs-$(CONFIG_PPC32) := ppc_setup.o -+ -+driver-objs-y += init.o marshal.o -+driver-objs-$(CONFIG_PERFCTR_INIT_TESTS) += $(tests-objs-y) -+driver-objs-$(CONFIG_PERFCTR_VIRTUAL) += virtual.o -+stub-objs-$(CONFIG_PERFCTR)-$(CONFIG_PERFCTR_VIRTUAL) := virtual_stub.o -+driver-objs-$(CONFIG_PERFCTR_GLOBAL) += global.o -+m-objs-$(CONFIG_PERFCTR) := perfctr.o -+kernel-objs-$(CONFIG_PERFCTR) += $(driver-objs-y) -+kernel-objs-y += $(stub-objs-m-y) -+ -+perfctr-objs := $(driver-objs-y) -+obj-m += $(m-objs-m) -+ -+obj-$(CONFIG_KPERFCTR) += kperfctr.o -+kperfctr-objs := $(kernel-objs-y) -+ -+endif # ifeq 24 -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/RELEASE-NOTES -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/RELEASE-NOTES 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/RELEASE-NOTES 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,1357 @@ -+$Id: RELEASE-NOTES,v 1.234.2.28 2004/10/19 16:22:47 mikpe Exp $ -+ -+RELEASE NOTES -+============= -+ -+Version 2.6.10.2, 2004-10-19 -+- virtual.c: replace nrctrs_lock with a mutex. Avoids illegal -+ may-sleep-while-holding-lock, caused by mutex operations in -+ perfctr_cpu_{reserve,release}(). -+ Backport from perfctr-2.7.6. -+- PPC32: Correct MMCR0 handling for FCECE/TRIGGER. Read -+ MMCR0 at suspend and then freeze the counters. Move -+ this code from read_counters() to suspend(). At resume, -+ reload MMCR0 to unfreeze the counters. Clean up the -+ cstatus checks controlling this behaviour. -+ Backport from perfctr-2.7.6. -+ -+Version 2.6.10, 2004-09-14 -+- Fixed p4_clear_counters() to not access IQ_ESCR{0,1} -+ on P4 models >= 3. -+ -+Version 2.6.10-pre1, 2004-08-03 -+- Changed x86-64 to use the x86 include file and driver. -+ Intel's 64-bit P4 should now work in the x86-64 kernel. -+- Replaced PERFCTR_INTERRUPT_SUPPORT and NMI_LOCAL_APIC -+ #if:s in x86 code by #ifdef:s on CONFIG_X86_LOCAL_APIC. -+- Use macros to clean up x86 per-cpu cache accesses. -+- Recognize model 13 Pentium-Ms. -+- Changed isuspend_cpu on x86 to be like x86-64's: it -+ now stores a CPU number instead of a cache pointer. -+- x86: make perfctr_cpu_name more approximate. -+- The x86 driver records a simplified CPU type for x86_tests, -+ but this only occurs if PERFCTR_INIT_TESTS is configured. -+ perfctr_info.cpu_type is now unused. -+- Changed P4 driver to set up and check an explicit flag -+ for EXTENDED_CASCADE availability. perfctr_info.cpu_type -+ is now unused except for perfctr_x86_init_tests(). -+- x86: Reformatted "if( x )" to "if (x)" and similarly for while -+ and switch statements. Deleted #if 0 blocks. -+ -+Version 2.6.9, 2004-07-27 -+- Fix ppc_check_control() to allow 7400/7410 processors to -+ specify MMCR2[THRESHMULT]. -+- PPC32 cleanups: make get_cpu_cache() return pointer not lvalue, -+ eliminate duplicated initialisation/cleanup code. -+- Makefile: enforce -fno-unit-at-a-time with gcc-3.4 on x86, -+ to prevent stack overflow in 2.6 kernels < 2.6.6. -+- Do sync_core() before rdtsc() in x86_tests, to avoid bogus -+ benchmarking data on K8. Add sync_core() implementation for -+ the 32-bit kernel. Add sync_core() benchmark. -+- Added __perfctr_mk_cstatus() to allow x86.c:finalise_backpatching() -+ to create a cstatus with i-mode counters marked as present, but -+ with zero actual counters. This prevents perfctr_cpu_isuspend() -+ from clearing the control register for counter #0 at init-time, -+ when the hardware doesn't belong to this driver. On AMD and P6 -+ this would accidentally disable the NMI watchdog. -+- x86: Marked initial targets of backpatchable calls -+ 'noinline' to prevent gcc from inlining them, which -+ completely breaks the backpatching mechanism. -+- x86_tests: fix CONFIG_X86_LOCAL_APIC=n linkage error. -+- 2.6.8-rc1 no longer makes cpu_online_map a #define on UP, -+ breaking modules. Reintroduce the macro. -+- 2.6.8-rc1 changed cpus_complement() calling convention. -+ Replace cpus_complement();cpus_and() with cpus_andnot(), -+ and provide cpus_andnot() compat macro. -+- PPC32: support generic CPUs using only the TB. -+- PPC32: query OF for CPU/TB frequencies, drop /proc/cpuinfo -+ parsing code. -+- PPC32: avoid CPU re-detection in tests code. -+- PPC32: clean up and sync with current perfctr-2.7 code. -+ -+Version 2.6.8, 2004-05-29 -+- Added recognition of PowerPC 750GX. -+- Changes for the {reserve,release}_lapic_nmi() API added in -+ kernel 2.6.6 backported from perfctr-2.7.1: -+ * Starting with kernel 2.6.6 we no longer need access to -+ nmi_perfctr_msr, so removed EXPORT_SYMBOL() and <asm/apic.h> -+ patches related to this variable (except for older kernels). -+ * Updated x86.c to use the new API. Added simulation (without -+ the non-conflict guarantees) for older kernels. -+ * Moved hardware reservation to x86.c's "reserve" procedure. -+ The init code now only does read-only hardware detection. -+ * Added a mutex to the reserve/release procedures, eliminating -+ * a long-standing race possibility. -+ * Changed x86.c to reserve and release the hardware around its -+ call to perfctr_x86_init_tests(). -+ * Similarly updated x86_64.c for the new API. -+ -+Version 2.6.7, 2004-05-04 -+- Replaced x86_64_tests.{c,h} with x86_tests.{c,h}. -+- sys_device_{,un}register() was renamed as sysdev_{,un}register() -+ in 2.6.4-rc2. Updated x86.c and x86_64.c accordingly, and -+ added a compatibility definition in compat.h. -+- Removed unnecessary '#include "compat.h"' from x86_tests.c. -+- Replaced x86_64_setup.c with x86_setup.c. -+- Replaced x86_64_compat.h with x86_compat.h. -+- Moved perfctr_interrupt entry point from x86_setup.c to patch kit, -+ for kernels older than 2.4.21. Cleanup to facilitate future merge -+ of x86_setup.c and x86_64_setup.c. -+ -+Version 2.6.6, 2004-02-21 -+- Fixed a bug in x86-64's perfctr interrupt entry code in 2.4 kernels, -+ causing it to pass the wrong value for "struct pt_regs*". This -+ was harmless since the retrieved "rip" was unused, but still wrong. -+ Renamed do_perfctr_interrupt to smp_perfctr_interrupt to allow -+ using the 2.4 kernel's standard BUILD_SMP_INTERRUPT macro. -+- Unmask LVTPC after interrupt on Pentium-M. An oprofile user -+ reports that P-M auto-masks LVTPC just like P4. Preliminary -+ measurements indicate a 40 to 60 cycle cost for the apic write -+ on P4s and P6s, so the unmask is not done unconditionally. -+- Measure LVTPC write overhead in x86{,_64}_tests.c. -+- Add Pentium 4 Model 3 detection. -+- The 2.4.21-193 SuSE kernel does EXPORT_SYMBOL(mmu_cr4_features). -+ Add compat24.h workaround for this. -+ -+Version 2.6.5, 2004-01-26 -+- Added perfctr_info.cpu_type constants to <asm-ppc/perfctr.h>. -+- Init filp->f_mapping in virtual.c for 2.6.2-rc1+ kernels. -+- Updated p4_check_control(): -+ * Allow ESCR.CPL_T1 to be non-zero when using global-mode -+ counters on HT processors. -+ * Don't require ESCR.CPL_T0 to be non-zero. CPL_T0==0b00 -+ is safe and potentially useful (global counters on HT). -+ * Require CCCR.ACTIVE_THREAD==0b11 on non-HT processors, as -+ documented in the IA32 Volume 3 manual. Old non-HT P4s -+ seem to work Ok for all four values (see perfctr-2.6.0-pre3 -+ notes), but this is neither guaranteed nor useful. -+- x86.c now detects & records P4 HT-ness also in UP kernels. -+- Added 'is_global' parameter to perfctr_cpu_update_control(). -+ This flag is ignored on everything except P4 (sigh). -+ -+Version 2.6.4, 2004-01-12 -+- Added 'tsc_to_cpu_mult' field to struct perfctr_info, replacing -+ '_reserved1'. This is needed on PowerPC to map time-base ticks -+ to actual time. On x86/AMD64, tsc_to_cpu_mult == 1. -+- Added support for PowerPC 604/7xx/74xx processors. Overflow -+ interrupts are currently not allowed due to the PMI/DECR erratum. -+- Replaced perfctr_cpus_mask() with cpus_addr(). Updated cpumask.h -+ to define cpus_addr() for kernels older than 2.6.1. -+ -+Version 2.6.3-pl1, 2004-01-01 -+- Moved the x86 interrupt handler definition from x86_setup.c to -+ the patch kit for 2.4.21 and later 2.4 kernels, like it already -+ is done for 2.6 kernels. This change is needed due to extensive -+ interrupt handler changes in RedHat's 2.4.21-6.EL kernel. -+- Simplified <asm-i386/perfctr.h>: now that early 2.4 kernels no -+ longer are supported, LOCAL_PERFCTR_VECTOR is known to be defined, -+ so CONFIG_X86_LOCAL_APIC implies PERFCTR_INTERRUPT_SUPPORT. -+ -+Version 2.6.3, 2003-12-21 -+- Removed gperfctr_cpu_state_only_cpu_sdesc's total_sizeof -+ optimisation. The ABI change in 2.6.2 broke it, leading to -+ the new fields not being cleared and later causing EOVERFLOW. -+- The perfctr_ioctl32_handler() workaround is now only applied -+ to kernels older than 2.4.23, since 2.4.23 added the "NULL -+ handler == sys_ioctl" logic. -+ -+Version 2.6.2, 2003-11-23 -+- Added 16 bytes (four fields) of reserved data to perfctr_info, -+ perfctr_cpu_control, vperfctr_control, gperfctr_cpu_control, -+ and gperfctr_cpu_state. Renumbered marshalling tags for -+ generic structures. Bumped ABI versions. -+- Only allow use of IQ_ESCR{0,1} on P4 models <= 2. These ESCRs -+ were removed from later models, according to a recent Intel -+ documentation update (252046-006). -+- Fixes for Fedora Core 1's 2.4.22-1.2115.nptl kernel: -+ * Work around their incomplete and broken cpumask_t backport. -+ * Avoid name conflict due to their on_each_cpu() backport. -+ * Handle their preempt_disable()/enable() macros. -+- Added new perfctr_cpu_is_forbidden() macro to fix a -+ compilation error affecting AMD64 in SMP 2.6 kernels. -+ SMP cpu_isset() requires that mask is an lvalue, but -+ for AMD64 the mask is a constant. -+ -+Version 2.6.1, 2003-10-05 -+- Kernel 2.6.0-test6 changed /proc/self and the /proc/<pid>/ -+ namespace to refer to "processes" (groups of CLONE tasks) -+ instead of actual kernel tasks. This forced the planned -+ transition of the vperfctr API from /proc/<pid>/perfctr -+ to /dev/perfctr to occur immediately. Changes: -+ * Moved /dev/perfctr implementation from global.c to init.c. -+ * Implemented VPERFCTR_{CREAT,OPEN}, vperfctr_attach(), and -+ the vperfctrfs pseudo-fs needed to support the magic files. -+ The fs code was ported from perfctr-1.6/3.1, but updated -+ for 2.6 and fixed to permit module unloading in 2.4. -+ * Fixed VPERFCTR_OPEN to accept tsk->thread.perfctr == NULL. -+ (Needed to info querying commands.) -+ * Removed /proc/<pid>/perfctr code. Simplified vperfctr_stub code. -+ * Updated vperfctr_attach() to mimic the old /proc vperfctr_open(). -+ This fixes some synchronisation issues. -+- Cleanups: -+ * Removed #if checks and code for kernels older than 2.4.16. -+ * Eliminated compat macros that are identical in 2.6 and 2.4. -+ * Moved ptrace_check_attach EXPORT_SYMBOL from x86{,_64}_setup.c -+ to virtual_stub.c. -+ * get_task_by_proc_pid_inode() is now trivial. Eliminated it. -+ * p4_ht_finalise() is now trivial. Eliminated it. -+- Added MODULE_ALIAS() declaration, eliminating the need for -+ an alias in /etc/modprobe.conf with 2.6 kernels. Added -+ MODULE_ALIAS() compatibility #define in compat24.h. -+- Added detection of AMD K8 Revision C processors. -+- Updated K8C detection for Revision C Athlon64s. -+ -+Version 2.6.0, 2003-09-08 -+- Handle set_cpus_allowed() when PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED: -+ * Add bad_cpus_allowed flag to struct vperfctr. -+ * Check bad_cpus_allowed in __vperfctr_resume: if resuming -+ with PMCs on forbidden CPU, kill counters and SIGILL current. -+ * __vperfctr_set_cpus_allowed() callback: set bad_cpus_allowed -+ and print warning if mask allows forbidden CPUs. -+ * Use task_lock/unlock instead of preempt_disable/enable to -+ synchronise task_struct accesses. -+ * Ensure sampling_timer and bad_cpus_allowed share cache line. -+ * #include <linux/compiler.h> explicitly for 2.4.18 and older -+ kernels; newer kernels include it from <linux/kernel.h>. -+ * Hook in virtual_stub.c. -+ * Hook and cpumask_t typedef in <linux/perfctr.h>. -+- Simplify #if test for set_cpus_allowed() emulation code. -+ Also don't define it if CONFIG_PERFCTR_VIRTUAL isn't set. -+- cpumask.h only typedefs cpumask_t if <linux/perfctr.h> hasn't. -+- Don't hide #include <linux/kernel.h> in compat24.h. -+- Fixed compat24.h to test for MODULE not CONFIG_MODULES at the -+ __module_get/module_put macros. -+ -+Version 2.6.0-pre5, 2003-08-31 -+- printk() is not allowed in switch_to(). Disabled debug code -+ which could violate that rule. Changed virtual_stub.c to BUG() -+ instead of printk() if the driver is invoked when not loaded. -+- Renamed vperfctr_exit2() to vperfctr_unlink() for clarity. -+- gcc-3.3.1 issued several "dereferencing type-punned pointer will -+ break strict-aliasing rules" warnings for marshal.c. Used explicit -+ unions to fix the warnings and clean up the code. -+- Removed compat22.h. -+- cpumask_t was included in standard 2.6.0-test4; replace #ifndef -+ test in cpumask.h with normal kernel version test. -+- x86-64 fix: sys_ioctl() isn't exported to modules, so call -+ filp->f_op->ioctl() instead in perfctr_ioctl32_handler(). -+- x86-64 fix: init.c must include <asm/ioctl32.h> not <linux/ioctl32.h> -+ for compatibility with 2.4 kernels. -+ -+Version 2.6.0-pre4, 2003-08-19 -+- Fix x86-64 register_ioctl32_conversion() usage for 2.4 kernels: -+ * Supply dummy handler since a NULL handler oopses the kernel. -+ * Test CONFIG_IA32_EMULATION since CONFIG_COMPAT is post-2.4. -+- Fixed and merged the new API struct marshalling code: -+ * New files marshal.c and marshal.h contain the marshalling code -+ and high-level helper functions (source shared with the library). -+ * User-space structs are struct perfctr_struct_buf and accessed using -+ perfctr_copy_{from,to}_user() with ptr to appropriate descriptor. -+ The cpumask stuff isn't changed. -+ * All ioctls registered as trivially 32-bit compatible on x86-64. -+ * Changed perfctr_info cpu_type/cpu_features from short to int: -+ this avoids the need for UINT16 marshalling support, and cpumask_t -+ caused perfctr_info to change binary representation anyway. -+- Declared VPERFCTR_{CREAT,OPEN} ioctls, but left them unimplemented. -+- Fixed vperfctr_open() preemption bug. The O_CREAT check+install -+ code could be preempted, leading to remote-control races. -+- Fixed perfctr_exit_thread() preemption bug. It detached the vperfctr -+ before calling __vperfctr_exit(). If current was preempted before -+ __vperfctr_exit() called vperfctr_suspend(), perfctr_suspend_thread() -+ would fail to suspend the counters. The suspend+detach is now done -+ atomically within __vperfctr_exit(). -+- Changes to handle 2.6 kernels with the cpumask_t patch (-mm, -osdl): -+ * Convert perfctr_cpus_forbidden_mask accesses to cpumask_t API. -+ Based in part on a patch for the -osdl kernel by Stephen Hemminger. -+ * Remove cpus and cpus_forbidden from struct perfctr_info, -+ since their sizes depend on the kernel configuration. -+ * Add struct perfctr_cpu_mask to export cpumask_t objects -+ sanely (i.e., using ints not longs) to user-space. -+ * Add CPUS and CPUS_FORBIDDEN commands to retrieve these sets. -+ * Add cpumask.h to emulate cpumask_t API in cpumask_t-free kernels. -+ * Move perfctr_cpus_forbidden_mask declaration/#define from -+ <asm/perfctr.h> to cpumask.h -- necessary since <asm/perfctr.h> -+ doesn't have access to the driver's compatibility definitions. -+- Cleaned up perfctr_cpu_ireload(). -+- Removed struct field offset check from init.c. -+- 2.4.22-rc1 does EXPORT_SYMBOL(mmu_cr4_features). Added -+ new compat #define to handle this. -+- Rename x86.c's rdmsrl() to rdmsr_low() to work around msr.h -+ changes in 2.6.0-test3. Also rename rdpmcl() to rdpmc_low(). -+- Replaced __attribute__((__aligned__(SMP_CACHE_BYTES))) usage -+ with the official ____cacheline_aligned macro. -+- Detect cpuid 0x69x VIA C3s (Antaur/Nehemiah). -+ -+Version 2.6.0-pre3, 2003-08-03 -+- Changed perfctr_info.cpus and cpus_forbidden to be int instead of -+ long, to make x86-32 and x86-64 compatible. This is a temporary -+ solution, as there are patches for >32 CPUs on x86-32. The real -+ solution is to make these sets variable-sized, and have user-space -+ retrieve them with a new command. -+- Simplified GPERFCTR_CONTROL to update a single CPU instead of -+ a set of CPUs. Moved cstatus clearing to release_hardware(). -+- Moved gperfctr start to new GPERFCTR_START command. -+- Simplified GPERFCTR_READ to access a single CPU instead of a -+ set of CPUs. -+- Removed the requirement that CCCR.ACTIVE_THREAD == 3 on P4. -+ HT processors define behaviour for all four possible values, -+ and non-HT processors behave sanely for all four values. -+- Moved struct perfctr_low_ctrs definition from <asm/perfctr.h> to -+ the corresponding low-level driver, since it's only used there. -+- Changed perfctr_info.cpu_khz and vperfctr_control.preserve to be -+ int instead of long. This corrects x86-64 and makes it compatible -+ with x86-32. -+- Updated x86.c to permit extended cascading on P4M2. -+- Fixed a bug where the perfctr module's refcount could be zero with -+ code still running in the module (pending returns to exit_thread()). -+ This could race with rmmod in preemptive kernels, and in theory -+ also in SMP kernels. -+ * module owner field added to vperfctr_stub -+ * _vperfctr_exit() in the modular case is now a function in -+ vperfctr_stub.c, which brackets the vperfctr_stub.exit() call -+ with __module_get() and module_put() on vperfctr_stub.owner -+ * updated 2.4 and 2.2 compat definitions of __module_get() and -+ module_put() to work for modules != THIS_MODULE -+- Replaced uses of (void)try_module_get() with __module_get() as the -+ latter is more appropriate for 2.6 kernels. Updated compat stuff. -+ -+Version 2.6.0-pre2, 2003-07-13 -+- vperfctr API fixes: -+ * The new VPERFCTR_READ_CONTROL command retrieves a vperfctr's -+ control data. -+ * Renamed VPERFCTR_SAMPLE to VPERFCTR_READ_SUM, and made it -+ write the sums to a perfctr_sum_ctrs user-space buffer. -+ * Non-write commands are now always permitted on unlinked perfctrs. -+ The first change was needed since the control data no longer is -+ accessible via the mmap()ed state. The other changes clean up and -+ simplify perfex and the library's slow-path read_ctrs() operation. -+- sys_vperfctr_ functions now mark the tsk parameter as "const" if -+ they don't need write access to it. Typically they only need to -+ compare it with current to detect self-access cases. -+- perfctr_cpu_state no longer makes the perfctr_cpu_control part -+ accessible to user-space (via mmap() of vperfctrs). -+- Simplified {set,is}_isuspend_cpu() in x86_64.c by having callers -+ pass the CPU number instead of the cache pointer (which was only -+ used to derive the CPU number). -+- Eliminated NMI_LOCAL_APIC #ifs from x86-64 code since x86-64 -+ always defines it. -+- x86.c cleanups: the non-PERFCTR_INTERRUPT_SUPPORT case now uses -+ dummy stub functions, eliminated six #ifdefs. -+- x86_64_setup.c needs <asm/fixmap.h>. -+- Protected cpu_has_mmx and cpu_has_ht #defines in x86_compat.h -+ with #ifndef since 2.4.22-pre3 added those #defines. -+- Eliminated PERFCTR_INTERRUPT_SUPPORT #ifs from x86-64 code -+ since x86-64 always defines CONFIG_X86_LOCAL_APIC. -+- Removed the P4-specific versions of isuspend() and iresume(). -+ P4 now uses p6_like_{isuspend,iresume}(), just like P6/K7/K8. -+- Long overdue cleanup in x86.c/x86_64.c: renamed per_cpu_cache -+ pointer variables from 'cpu' to 'cache'. -+- Added inline functions in virtual.c for registering the overflow -+ handler and for clearing iresume_cstatus. Cleaned out several -+ #if PERFCTR_INTERRUPT_SUPPORT occurrences from the main code. -+ (Partial backport from the abandoned perfctr-3.1 branch.) -+- Inlined now useless 'struct vperfctr_state' in 'struct vperfctr'. -+ -+Version 2.6.0-pre1, 2003-07-02 -+- Rearranged 'struct perfctr_cpu_state' to reduce the number of -+ cache lines needed to be touched by key operations (suspend, -+ resume, sample). Switched from struct-of-arrays to array-of-struct -+ for perfctr counts, and copied pmc_map into the PMC data array. -+ The old representation touched at least 3 cache lines at key -+ operations, the new one only needs one cache line in most cases. -+ The user-space mmap() view of the new representation is binary -+ compatible between x86 and x86-64. -+- Changed 'isuspend_cpu' in perfctr_cpu_state on x86-64 to be a -+ 32-bit CPU number, to maintain binary compatibility with x86. -+- Removed the union of p5_cesr and id; use id throughout. -+- Removed _filler and si_signo from 'struct vperfctr_state', making -+ the user-space view of it identical to 'struct perfctr_cpu_state'. -+ -+Version 2.5.5, 2003-06-15 -+- Updated x86 driver for 2.5.71 local APIC driver model changes. -+- Updated x86-64 driver for 2.5.71 NMI watchdog enable/disable API. -+- x86-64 is broken in 2.5.71 since x86-64 updated to driver model -+ for local APIC and NMI watchdog, at the same time as x86 moved -+ to a newer version of the "system device" driver model. Updated -+ the x86-64 driver for the new model, which is expected to be in -+ x86-64 by 2.5.72 (patch exists for 2.5.71). -+ -+Version 2.5.4, 2003-06-01 -+- The generic-x86-with-TSC driver now uses rdpmc_read_counters -+ and p6_write_control instead of its own procedures. -+- K8 docs are now available. Updated comment in x86.c accordingly. -+- P4 OVF_PMI+FORCE_OVF counters didn't work at all, resulting in -+ BUG messages from the driver since identify_overflow failed to -+ detect which counters had overflowed, and vperfctr_ihandler -+ left the vperfctr in an inconsistent state. This works now. -+ However, hardware quirks makes this configuration only useful -+ for one-shot counters, since resuming generates a new interrupt -+ and the faulting instruction again doesn't complete. The same -+ problem can occur with regular OVF_PMI counters if ireset is -+ a small-magnitude value, like -5. -+ This is a user-space problem; the driver survives. -+- On P4, OVF_PMI+FORCE_OVF counters must have an ireset value of -1. -+ This allows the regular overflow check to also handle FORCE_OVF -+ counters. Not having this restriction would lead to MAJOR -+ complications in the driver's "detect overflow counters" code. -+ There is no loss of functionality since the ireset value doesn't -+ affect the counter's PMI rate for FORCE_OVF counters. -+- Moved P4 APIC_LVTPC reinit from p4_isuspend() to identify_overflow(). -+ Reduces context-switch overheads when i-mode counters are active. -+- Corrected vperfctr_suspend()'s precondition. -+- Corrected comment in <asm/perfctr.h> to state that ireset[] -+ values must be negative rather than non-positive. -+- Made 'perfctr_cpu_name' __initdata, like its predecessor. -+ -+Version 2.5.3.1, 2003-05-21 -+- Replaced 'char *perfctr_cpu_name[]' by 'char *perfctr_cpu_name'. -+ This is needed for x86-64 and other non-x86 architectures. -+- Changed <asm-x86_64/perfctr.h> to use 'long long' for 64-bit sums. -+ This doesn't change the ABI, but improves user-space source code -+ compatibility with 32-bit x86. -+- Removed the !defined(set_cpus_allowed) check added to compat24.h -+ in 2.5.3. It's wrong for SMP builds with modules and MODVERSIONS, -+ since the set_cpus_allowed() emulation function becomes a #define -+ from include/linux/modules/x86_setup.ver. Instead add the already -+ used HAVE_SET_CPUS_ALLOWED #define to include/linux/config.h in -+ the kernel patch, but make it conditional on CONFIG_X86_64. -+ -+Version 2.5.3, 2003-05-16 -+- Added detection code for Pentium M. MISC_ENABLE_PERF_AVAIL is -+ now checked on both P4 and Pentium M. -+- Added x86_64 driver code. Both x86_64.c and asm-x86_64/perfctr.h -+ are basically simplified versions of corresponding x86 files, -+ with P5 and P4 support removed, 2.2 kernel support removed, and -+ 'long long' for sums replaced by 'long'. The last change is -+ painful for user-space and may be reverted. -+- compat24.h: don't define set_cpus_allowed() if already #defined, -+ workaround for RawHide's 2.4.20-9.2 x86_64 kernel. -+- Removed list of supported CPUs from Kconfig. That information -+ belongs elsewhere (and it's a pain to maintain for 2.2/2.4). -+ -+Version 2.5.2, 2003-04-13 -+- Minor cleanup: use PROC_I() unconditionally in virtual.c, -+ implement trivial compat macro in compat24.h. -+- Updated power management code for the local APIC and NMI -+ watchdog driver model changes in kernel 2.5.67. -+ The suspend/resume procedures are still no-ops, however. -+ This revealed a bug in the lapic_nmi_watchdog resume code: -+ it resumes the lapic_nmi_watchdog even when it was disabled -+ before suspend. Perfctr's 2.5.67 kernel patch includes a fix. -+- perfctr_sample_thread() is now used also on UP. Anton Ertl's -+ 2.26GHz UP P4 managed to execute a process for more than 2^32 -+ cycles before suspending it, causing TSC inaccuracies. -+- RH9's 2.4.20-8 kernel changed cpu_online(), put_task_struct() and -+ remap_page_range() to be more like in 2.5 kernels, and moved the -+ declaration of ptrace_check_attach() from mm.h to ptrace.h, also -+ like in 2.5 kernels, requiring fixes to compat24.h and x86_setup.c. -+- Added note in x86.c about the new Pentium M processor. -+ -+Version 2.5.1, 2003-03-23 -+- Fix P4 HT initialisation. I've seen several boot logs from -+ people running MP P4 Xeons with HT disabled: this produces -+ an ugly "restricting access for CPUs 0x0" message, and would -+ cause P4 HT init to unnecessarily return error in older kernels -+ lacking set_cpus_allowed(). Now only print the message or -+ signal error if non-zero siblings actually are found. -+- The set_cpus_allowed() emulation doesn't compile in 2.4 -+ kernels older than 2.4.15 due to the p->cpus_running field. -+ Updated version checks to skip it in 2.4.x when x<15. -+- Fix set_cpus_allowed() emulation compile error on BUG_ON() -+ in 2.4 kernels older than 2.4.19. -+- Added Nehemiah note/reminder in x86.c:centaur_init(). -+ -+Version 2.5.0, 2003-03-10 -+- Reverted the 2.5.0-pre2 change that replaced the PERFCTR_INFO -+ ioctl by read(): it made the API look too weird. -+ Added a PERFCTR_ABI ioctl which only retrieves 'abi_version'. -+- Cleaned up struct perfctr_info: renamed abi_magic to abi_version, -+ and version to driver_version. Renamed PERFCTR_*_MAGIC too. -+- Cleaned up struct perfctr_cpu_control: moved evntsel_aux[] -+ into the p4 sub-struct and renamed it as escr[]. Only P4 needs -+ it anyway, and the new name clarifies its purpose. -+- Renumbered the vperfctr ioctls to the 8-15 range (8-11 are used) -+ and reserved 0-7 (0-1 are used) for generic ioctls. -+- Added 'use_nmi' field to struct gperfctr_control, reserved for -+ future use if/when support for i-mode gperfctrs is implemented. -+- Replaced some preempt/smp_call_function combinations with 2.5.64's -+ new on_each_cpu() construct. Added compatibility definitions to -+ compat24.h and compat22.h. -+ -+Version 2.5.0-pre2, 2003-03-03 -+- Added ABI version to perfctr_info. Replaced PERFCTR_INFO ioctl -+ by read() on the fd, since that allows reading the ABI version -+ even in the case of a version mismatch. Removed binary layout -+ magic number from vperfctr_state. Rearranged perfctr_info to -+ make the 'long' fields 8-byte aligned. -+- Added #ifdef CONFIG_KPERFCTR to <linux/perfctr.h> to ensure -+ that <asm/perfctr.h> isn't included unless CONFIG_KPERFCTR=y. -+ This allows the patched kernel source to compile cleanly also -+ in archs not yet supported by perfctr. -+- Removed PERFCTR_PROC_PID_MODE #define and replaced it with -+ /*notype*/S_IRUSR in the patch files. -+- Added perfctr_vector_init() to <asm-i386/perfctr.h>. Cleaned -+ up arch/i386/kernel/i8259.c patch. -+- Removed apic_lvtpc_irqs[] array. Removed irq.c patch. -+- Updated CONFIG_PERFCTR_INIT_TESTS help text to match reality. -+- Kernel 2.4.21-pre5 added set_cpus_allowed(), which required -+ fixing compat24.h and x86_setup.c. -+- Fixed init.c for kernel 2.5.63 removing EXPORT_NO_SYMBOLS. -+- Cleaned up compat.h by moving 2.2/2.4 stuff to separate files. -+ -+Version 2.5.0-pre1, 2003-02-19 -+- Repair global perfctr API: the target CPUs are now explicit -+ in the calls to write control and read state. Global perfctrs -+ now work on 2.5 SMP kernels (which no longer have smp_num_cpus -+ or cpu_logical_map()), and HT P4s (asymmetric MPs). -+- struct perfctr_info has new bitmask fields for the set of CPUs -+ (cpu_online_map) and forbidden CPUs; dropped the nrcpus field. -+- add cpu_online() compat macro to compat.h -+- VPERFCTR_STOP is subsumed by VPERFCTR_CONTROL. Removed it. -+- Detect K8 as K8 not K7. They are not identical. -+- Makefile cleanup: moved 2.4/2.2 kernel stuff to Makefile24. -+- Makefile fix: removed export-objs for 2.5 kernels. -+- Kconfig fix: don't mention obsolete .o module suffix. -+ -+Version 2.4.5, 2003-02-09 -+- Fixed two minor compile warnings in x86_tests.c for 2.5 kernels. -+ -+Version 2.4.4, 2003-01-18 -+- Fixed a bug in iresume() where an interrupt-mode counter could -+ increment unexpectedly, and also miss the overflow interrupt. -+ The following setup would cause the problem: -+ P1 has EVNTSELn in non-interrupt mode, counting some high- -+ frequency event (e.g. INST_RETIRED) in kernel-mode. P2 has -+ EVNTSELn in interrupt-mode, counting some low-frequency event -+ (e.g. MMX_ASSIST) in user-mode. P1 suspends. Since EVNTSELn is -+ in non-interrupt mode, it is not disabled. P2 resumes. First -+ iresume() finds that the CPU cache ID is not P2's, so it reloads -+ PERFCTRn with P2's restart value. Then write_control() reloads -+ EVNTSELn with P2's EVNTSEL. At this point, P2's PERFCTRn has been -+ counting with P1's EVNTSELn since iresume(), so it will no longer -+ equal P2's restart value. And if PERFCTRn overflowed, the overflow -+ will go undetected since P1's EVNTSELn was in non-interrupt mode. -+ To avoid this problem, iresume() now ensures that a counter's -+ control register is disabled before reloading the counter. -+- Fixed some ugly log messages from the new HT P4 init code: -+ * forbidden_mask would be printed as "0X<mask>" (capital X) -+ * finalise_backpatching() could trigger a BUG! printk from -+ p4_write_control() if the CPU the init code runs on was -+ in the forbidden set. At init-time this is not an error. -+ Avoided this by temporarily resetting the forbidden_mask. -+- Added preliminary support for AMD K8 processors with the -+ regular 32-bit x86 kernel. The K8 performance counters appear -+ to be identical or very similar to the K7 performance counters. -+ -+Version 2.4.3, 2002-12-11 -+- Added x86.c:perfctr_cpus_forbidden_mask. This bitmask describes -+ the set of CPUs that must not access the perfctrs. On HT P4 MPs, -+ only logical CPU #0 in each package is allowed access -- this -+ avoids the resource conflict that would occur if both logical -+ processors were to access the perfctrs. In other cases (UP or -+ non-HT-P4 MPs) the mask is zero. -+- vperfctr_control() now calls set_cpus_allowed() to ensure that -+ the task stays away from CPUs in perfctr_cpus_forbidden_mask. -+ This is racy with sys_sched_setaffinity(), and possibly some -+ of the kernel's internal set_cpus_allowed() calls, but the race -+ is unlikely to occur in current 2.4 kernels. -+- Cleaned up the parameter passing protocol between vperfctr_ioctl() -+ and the individual vperfctr "system call" procedures. -+- Added safety check in global.c to disallow global-mode perfctrs -+ on asymmetric MPs until the API has been fixed. -+- Added set_cpus_allowed() implementation for 2.4 kernels, except -+ those that already have it as indicated by HAVE_SET_CPUS_ALLOWED: -+ this symbol is added to <linux/config.h> by the kernel patch. -+- 2.2 kernels can't enforce CPU affinity masks, so x86.c warns if -+ a HT P4 MP runs a 2.2 kernel, and falls back to generic x86 mode. -+ Added dummy set_cpus_allowed() macro for 2.2 kernels. -+- x86_compat.h now implements cpuid_ebx() and cpu_has_ht for old kernels. -+- Makefile cleanup: Rules.make is obsolete in 2.5. -+- Compile fixes in x86.c and virtual_stub.c: <linux/fs.h> needs to -+ be included explicitly for the 2.5.50 kernel. -+ -+Version 2.4.2, 2002-11-25 -+- Fixed virtual.c:inc_nrctrs() to handle the -EBUSY case correctly. -+ If the HW was busy (e.g. global running), then the first attempt -+ to open a vperfctr would fail but further attempts would succeed. -+ Updated error propagation to distinguish -EBUSY from -ENOMEM. -+- Updated global.c for preempt-safety. -+- Made the driver safe for preemptible kernels. This required a lot -+ of analysis, but resulted in relatively few actual code changes. -+ (Backport from the perfctr-3.1 branch.) -+- Ported to 2.5.48: Replaced MOD_INC_USE_COUNT by try_module_get() -+ and MOD_DEC_USE_COUNT by module_put(). Updated compat.h. -+- Ported to 2.5.45: added Kconfig, removed Config.help. -+ -+Version 2.4.1, 2002-10-12 -+- RedHat 8.0's 2.4.18-14 kernel does EXPORT_SYMBOL(cpu_khz) while -+ the vanilla 2.4.18 does not. This clashes with x86_setup.c's -+ EXPORT_SYMBOL(cpu_khz). I've found no easy way to distinguish -+ between these kernels at C preprocessing time, so I changed -+ x86_setup.c to define a trivial perfctr_cpu_khz() function and -+ EXPORT_SYMBOL that one instead. -+ -+Version 2.4.0, 2002-09-26 -+- Config.help updated to state that Pentium 4 is supported. -+- 2.5.32 moved ptrace_check_attach() declaration to <linux/ptrace.h>. -+- Removed redundant /proc/<pid>/perfctr access control check -+ from vperfctr_stub_open(). Since 2.4.0-pre1 this check didn't -+ match the real one, which prevented remote opens when the -+ driver was built as a module. -+ -+Version 2.4.0-pre2, 2002-08-27 -+- vperfctr_control() now allows the user to specify that some PMC -+ sums are not to be cleared when updating the control. -+ There is a new bitmap field `preserve' in struct vperfctr_control: -+ if bit i is set then PMC(i)'s sum is not cleared. -+ `preserve' is a simple `unsigned long' for now, since this type -+ fits all currently known CPU types. -+ This change breaks binary compatibility, but user-space code which -+ clears the entire control record before filling in relevant fields -+ will continue to work as before after a recompile. -+ This feature removes a limitation which some people felt was a -+ problem for some usage scenarios. -+ -+Version 2.4.0-pre1, 2002-08-12 -+- Initial implementation of a new remote-control API for virtual -+ per-process perfctrs. A monitor process may access a target -+ process' perfctrs via /proc/pid/perfctr and operations on that -+ file, if the monitor holds the target under ptrace ATTACH control. -+ Updated virtual.c to allow remote access. -+ Updated x86.c:perfctr_cpu_ireload() to work also in the remote -+ control case on SMP machines. -+ -+Version 2.3.12, 2002-08-12 -+- Trivial comment fixes in compat.h and x86_compat.h. -+- Removed __vperfctr_sample(), vperfctr_stub.sample, and bug_sample() -+ from UP builds, since they are needed only on SMP. -+ -+Version 2.3.11, 2002-07-21 -+- Accumulated sums are now maintained for interrupt-mode perfctrs. -+ User-space can use the standard syscall-less algorithm for computing -+ these counters' current sums, should that be needed. -+ -+Version 2.3.10, 2002-07-19 -+- Added PERFCTR_X86_INTEL_P4M2 CPU type for Model 2 P4s, since -+ they have ESCR Event Mask changes in a few events. -+- The driver now supports replay tagging events on P4, using the -+ pebs_enable and pebs_matrix_vert control fields added in 2.3.8. -+- Some Pentium MMX and Pentium Pro processors have an erratum -+ (Pentium erratum #74, Pentium Pro erratum 26) which causes SMM -+ to shut down if CR4.PCE is set. intel_init() now clears the -+ RDPMC feature on the affected steppings, to avoid the problem. -+- perfctr_cpu_release() now clears the hardware registers and -+ invalidates the per-cpu cache. This should allow the counter -+ hardware to power down when not used, especially on P4. -+- Callers of update_control() have no active i-mode counters. -+ Documented this as a precondition, and changed update_control() -+ to not call isuspend(). update_control() no longer needs hardware -+ access, which should ease a port to CONFIG_PREEMPT=y. -+ -+Version 2.3.9, 2002-06-27 -+- Updated p4_escr_addr() in x86.c to match the latest revision of -+ Intel's IA32 Volume 3 manual, #245472-007. An error in previous -+ revisions of this document caused the driver to program the wrong -+ ESCR in some cases. (CCCRs 12/13/16 with ESCR_SELECT(2) were mapped -+ to SSU_ESCR0 instead of RAT_ESCR0, affecting the uop_type event.) -+ -+Version 2.3.8, 2002-06-26 -+- Added counter overflow interrupt support for Intel P4. -+- 2.5.23 dropped smp_num_cpus and cpu_logical_map(). Added -+ temporary workarounds to x86.c and global.c to allow compilation -+ and testing under 2.5. May have to change the API (esp. global's) -+ to be based on the sparse cpu_online_map instead. -+- RedHat's 2.4.9-34 defines cpu_relax(). Updated compat.h. -+- Added pebs_enable and pebs_matrix_vert fields (currently unused) -+ to perfctr_cpu_control to support replay tagging events on P4. -+ Updated the perfctr_cpu_state binary layout magic number. -+- Silenced redefinition warnings for MSR_P6_PERFCTR0 and cpu_has_mmx. -+- Updated Makefile for the 2.5.19 kernel's Makefile changes. -+- Merged the P6 and K7 isuspend/iresume/write_control driver code. -+- Added a VC3 specific clear_counters() procedure. -+- Removed pointless code from perfctr_cpu_identify_overflow(). -+- Removed _vperfctr_get/set_thread() wrappers and thread->perfctr -+ clobber checks from the DEBUG code. Removed unused "ibuf" and -+ obsolete si_code fields from vperfctr state and control objects. -+ Updated the vperfctr state magic number. -+- Fixed the CONFIG_PREEMPT anti-dependency check in Config.in. -+- vperfctr_control() now preserves the TSC sum on STOP;CONTROL -+ transitions. The failure to do this caused problems for the -+ PAPI P4 support being developed. -+ -+Version 2.3.7, 2002-04-14 -+- Kernel 2.5.8-pre3 changed the way APIC/SMP interrupt entries -+ are defined. Defining these with asm() in C is no longer -+ practical, so the kernel patch for 2.5.8-pre3 now defines -+ the perfctr interrupt entry in arch/i386/kernel/entry.S. -+- Permit use of cascading counters on P4: in the slave counter -+ one sets the CASCADE flag instead of the ENABLE flag. -+- Added P4 hyperthreading bit field definitions. -+- Preliminary infrastructure to support a new remote-control -+ interface via ptrace(). Updates to compat.h, virtual.c, -+ virtual_stub.c, and x86_setup.c. ptrace_check_attach() -+ emulation for older kernels is in x86_setup.c since -+ virtual_stub.c isn't compiled if the driver isn't a module. -+ -+Version 2.3.6, 2002-03-21 -+- Rewrote sys_vperfctr_control() to do a proper suspend before -+ updating the control, and to skip trying to preserve the TSC -+ start value around the resume. This cleaned up the code and -+ eliminated the bogus "BUG! resuming non-suspended perfctr" -+ warnings that control calls to active perfctrs caused. -+- Rewrote sys_vperfctr_iresume() to not preserve the TSC start -+ value around the resume. Since we had just done a suspend(), -+ this would cause double-accounting of the TSC. -+ -+Version 2.3.5, 2002-03-17 -+- Added detection of the VIA C3 Ezra-T processor. -+- CPU detection now uses current_cpu_data instead of boot_cpu_data, -+ to avoid the boot_cpu_data.x86_vendor bug which is present is -+ all current 2.2/2.4/2.5 kernels. The bug caused the x86_vendor -+ field to be cleared on SMP machines, which in turn tricked the -+ driver to identify MP AMD K7 machines as MP Intel P6, with -+ disastrous results when the wrong MSRs were programmed. -+- Updated compat.h for /proc/<pid>/ inode change in 2.5.4. -+- Added a check to prevent building on preemptible 2.4/2.5 kernels, -+ since the driver isn't yet safe for those. -+- Put perfctr's configuration help text in Config.help in this -+ directory: kernel 2.5.3-pre5 changed from a having a common -+ Configure.help file to having local Config.help files. -+ -+Version 2.3.4, 2002-01-23 -+- Updated virtual.c for remap_page_range() change in 2.5.3-pre1. -+ Added emulation for older kernels to compat.h. -+- Permit use of tagging on P4 for at-retirement counting. This may -+ not yet work as expected, since up-stream (tag producing) counters -+ aren't disabled at context switches: a process may therefore see -+ more tagged uops than expected. -+- Fixed uses of __FUNCTION__ to comply with changes in GCC 3.0.3. -+ -+Version 2.3.3, 2001-12-31 -+- Minor x86.c cleanup: reordered function definitions so that -+ write_control comes after isuspend/iresume: this makes it easier -+ to follow the runtime control flow. -+- Fixed isuspend()/iresume()'s broken cache checking protocol. The -+ old protocol didn't handle process migration across CPUs in SMP -+ machines correctly, as illustrated by the following scenario: -+ P1 runs on CPU1 and suspends. P1 and CPU1 now have the same -+ cache id (->k1.id). P1 is resumed and suspended on CPU2: the state -+ in CPU1 is now stale. Then P1 is resumed on CPU1, and no other -+ process has been using CPU1's performance counters since P1's last -+ suspend on CPU1. The old protocol would see matching cache ids and -+ that P1's i-mode EVNTSELs are stopped, so it would accept the cache -+ and resume P1 with CPU1's stale PERFCTRS values. -+ In the new protocol isuspend() records the active CPU in the -+ state object, and iresume() checks if both the CPU and the control -+ id match. The new protocol is also simpler since iresume() no longer -+ checks if the i-mode EVNTSELs are cleared or not. -+- P6 nasty i-mode to a-mode context switch bug fixed: p6_isuspend() -+ used to simply clear EVNTSEL0's Enable flag in order to stop all -+ i-mode counters. Unfortunately, that was insufficient as shown by -+ the following case (which actually happened). -+ P1 has EVNTSEL0 in a-mode and EVNTSEL1 in i-mode. P1 suspends: -+ PERFCTR1 is stopped but EVNTSEL1 is still in i-mode. P2 has EVNTSEL0 -+ in a-mode and no EVNTSEL1. P2 resumes and updates EVNTSEL0. This -+ activates not only P2's PERFCTR0 but also the dormant PERFCTR1. If -+ PERFCTR1 overflows, then P2 will receive an unexpected interrupt. If -+ PERFCTR1 doesn't overflow, but P2 suspends and P1 resumes, then P1 -+ will find that PERFCTR1 has a larger than expected value. -+ p6_isuspend() and p6_iresume() were changed to ignore the global -+ Enable flag and to disable/enable each i-mode EVNTSEL individually, -+ just like how it's done on the K7. -+- x86.c cleanups: P5MMX, MII, C6, VC3, P6, K7, and P4 now all -+ use the same rdpmc_read_counters() method. VIA C3 now uses -+ p6_write_control() instead of its own method. -+- Removed "pmc_map[] must be identity" restriction from P6 and K7. -+ The API uses the virtual counter index to distinguish a-mode -+ and i-mode counters, but P6 events aren't entirely symmetric: -+ this lead to some strange cases with the old pmc_map[] rule. -+ P6 and K7 isuspend() now need access to the control, so -+ update_control() and its callers had to be changed to allow it -+ to isuspend() _before_ the new control is installed. -+- P4 write_control fixes: changed the ESCR cache to be indexed by -+ MSR offset from 0x3A0, and changed P4 write_control to index the -+ CCCR/ESCR cache with physical instead of virtual indices. Added -+ call to debug_evntsel_cache(), after updating it for pmc_map[]. -+- Added P4 and Generic support to x86_tests.c, and some cleanups. -+ -+Version 2.3.2, 2001-11-19 -+- P4 fix: the mapping from CCCR 17 to its associated ESCRs was -+ wrong due to an off-by-one error in x86.c:p4_escr_addr(). -+- P4 fix: also clear the PEBS MSRs when initialising the driver. -+- Minor cleanup in x86.c: replaced the "clear MSRs" loops with -+ calls to a helper procedure. -+ -+Version 2.3.1, 2001-11-06 -+- Microscopic P4 cleanups. Testing on my new P4 box has confirmed -+ that the PMAVAIL flag in MSR_IA32_MISC_ENABLE is read-only. -+ -+Version 2.3, 2001-10-24 -+- Added support for multiple interrupt-mode virtual perfctrs -+ with automatic restart. Added an identify_overflow() method -+ to x86.c to identify and reset the overflowed counters. -+ Added checks to ensure that the user-specified restart values -+ for interrupt-mode counters are negative. -+ Updated virtual.c's signal delivery interface to pass a -+ bitmask describing which counters overflowed; the siginfo -+ si_code is now fixed as SI_PMC_OVF (fault-class). -+- Fixed some typos in x86.c. Added a note about the C3 Ezra. -+- Added EXPORT_NO_SYMBOLS to init.c, for compatibility with -+ announced changes in modutils 2.5. -+ -+Version 2.2, 2001-10-09 -+- Added preliminary support for the Pentium 4. Only basic stuff -+ for now: no cascading counters, overflow interrupts, tagged -+ micro-ops, or use of DS/PEBS. The code compiles but hasn't been -+ tested on an actual Pentium 4. -+ -+Version 2.1.4, 2001-09-30 -+- No driver-level changes. -+ -+Version 2.1.3, 2001-09-13 -+- Fixed a compilation problem where virtual_stub couldn't be compiled -+ in modular kernels older than 2.2.20pre10 if KMOD was disabled, due -+ to an incompatible stub definition of request_module(). -+- Replaced most occurrences of "VIA Cyrix III / C3" with "VIA C3". -+ -+Version 2.1.2, 2001-09-05 -+- Added MODULE_LICENSE() tag, for compatibility with the tainted/ -+ non-tainted kernel stuff being put into 2.4.9-ac and modutils. -+- VIA C3 support is not "preliminary" any more. Testing has revealed -+ that the reserved bits in the C3's EVNTSEL1 have no function and -+ need not be preserved. The driver now fills these bits with zeroes. -+ (Thanks to Dave Jones @ SuSE for running these tests.) -+- Minor bug fix in the perfctr interrupt assembly code. -+ (Inherited from the 2.4 kernel. Fixed in 2.4.9-ac4.) -+ -+Version 2.1.1, 2001-08-28 -+- Preliminary recognition of Pentium 4 processors, including -+ checking the IA32_MISC_ENABLE MSR. -+- Moved %cr4 access functions from <asm-i386/perfctr.h> to -+ x86_compat.h, to work around changes in 2.4.9-ac3. -+- More %cr4 cleanups possible since the removal of dodgy_tsc() -+ in Version 2.1: moved {set,clear}_in_cr4_local() into x86.c, -+ and eliminated the set_in_cr4() compat macro. -+- Fixed a bug in x86.c:finalise_backpatching(): the fake cstatus -+ mustn't include i-mode counters unless we have PCINT support. -+ Failure to check this cased fatal init-time oopses in some -+ configs (CONFIG_X86_UP_APIC set but no local APIC in the CPU). -+- Minor comment updates in x86.c due to AMD #22007 Revision J. -+- Removed '%' before 'cr4' in printouts from x86_tests.c, to -+ avoid the '%' being mutated by log-reading user-space code. -+ -+Version 2.1, 2001-08-19 -+- Fixed a call backpatching bug, caused by an incompatibility -+ between the 2.4 and 2.2 kernels' xchg() macros. The 2.2 version -+ lacks a "volatile" causing gcc to remove the entire statement -+ if xchg() is used for side-effect only. Reverted to a plain -+ assignment, which is safe since the 2.0.1 backpatching changes. -+- Fixed a bug where an attempt to use /proc/<pid>/perfctr on an -+ unsupported processor would cause a (well-behaved) kernel oops, -+ due to calling a NULL function pointer in x86.c, vperfctr_open() -+ now returns -ENODEV if virtual.c hasn't been initialised. -+- Removed the WinChip configuration option, the dodgy_tsc() callback, -+ and the clr_cap_tsc() x86_compat macro. WinChip users should configure -+ for generic 586 or less and use the kernel's "notsc" boot parameter. -+ This cleans up the driver and the 2.4 kernel patches, at the expense -+ of more code in the 2.2 kernel patches to implement "notsc" support. -+- Minor cleanup: moved version number definition from init.c to -+ a separate file, version.h. -+ -+Version 2.0.1, 2001-08-14 -+- The unsynchronised backpatching in x86.c didn't work on SMP, -+ due to Pentium III erratum E49, and similar errata for other -+ P6 processors. (The change in 2.0-pre6 was insufficient.) -+ x86.c now finalises the backpatching at driver init time, -+ by "priming" the relevant code paths. To make this feasible, -+ the isuspend() and iresume() methods are now merged into -+ the other high-level methods; virtual.c became a bit cleaner. -+- Removed obsolete "WinChip pmc_map[] must be identity" check. -+ -+Version 2.0, 2001-08-08 -+- Resurrected partial support for interrupt-mode virtual perfctrs. -+ virtual.c permits a single i-mode perfctr, in addition to TSC -+ and a number of a-mode perfctrs. BUG: The i-mode PMC must be last, -+ which constrains CPUs like the P6 where we currently restrict -+ the pmc_map[] to be the identity mapping. (Not a problem for -+ K7 since it is symmetric, or P4 since it is expected to use a -+ non-identity pmc_map[].) -+ New perfctr_cpu_ireload() procedure to force reload of i-mode -+ PMCs from their start values before resuming. Currently, this -+ just invalidates the CPU cache, which forces the following -+ iresume() and resume() to do the right thing. -+ perfctr_cpu_update_control() now calls setup_imode_start_values() -+ to "prime" i-mode PMCs from the control.ireset[] array. -+- Bug fix in perfctr_cpu_update_control(): start by clearing cstatus. -+ Prevents a failed attempt to update the control from leaving the -+ object in a state with old cstatus != 0 but new control. -+ -+Version 2.0-pre7, 2001-08-07 -+- Cleaned up the driver's debugging code (virtual, x86). -+- Internal driver rearrangements. The low-level driver (x86) now handles -+ sampling/suspending/resuming counters. Merged counter state (sums and -+ start values) and CPU control data to a single "CPU state" object. -+ This simplifies the high-level drivers, and permits some optimisations -+ in the low-level driver by avoiding the need to buffer tsc/pmc samples -+ in memory before updating the accumulated sums (not yet implemented). -+- Removed the read_counters, write_control, disable_rdpmc, and enable_rdpmc -+ methods from <asm/perfctr.h>, since they have been obsoleted by the -+ new suspend/resume/sample methods. -+- Rearranged the 'cstatus' encoding slightly by putting 'nractrs' in -+ the low 7 bits; this was done because 'nractrs' is retrieved more -+ often than 'nrctrs'. -+- Removed the obsolete 'status' field from vperfctr_state. Exported -+ 'cstatus' and its access methods to user-space. (Remove the -+ control.tsc_on/nractrs/nrictrs fields entirely?) -+- Removed WinChip "fake TSC" support. The user-space library can now -+ sample with slightly less overhead on sane processors. -+- WinChip and VIA C3 now use p5mmx_read_counters() instead of their -+ own versions. -+ -+Version 2.0-pre6, 2001-07-27 -+- New patches for kernels 2.4.6, 2.4.7, and 2.4.7-ac1. -+- Sampling bug fix for SMP. Normally processes are suspended and -+ resumed many times per second, but on SMP machines it is possible -+ for a process to run for a long time without being suspended. -+ Since sampling is performed at the suspend and resume actions, -+ a performance counter may wrap around more than once between -+ sampling points. When this occurs, the accumulated counts will -+ be highly variable and much lower than expected. -+ A software timer is now used to ensure that sampling deadlines -+ aren't missed on SMP machines. (The timer is run by the same code -+ which runs the ITIMER_VIRTUAL interval timer.) -+- Bug fix in the x86 "redirect call" backpatching routine. To be -+ SMP safe, a bus-locked write to the code must be used. -+- Bug fix in the internal debugging code (CONFIG_PERFCTR_DEBUG). -+ The "shadow" data structure used to detect if a process' perfctr -+ pointer has been clobbered could cause lockups with SMP kernels. -+ Rewrote the code to be simpler and more robust. -+- Minor performance tweak for the P5/P5MMX read counters procedures, -+ to work around the P5's cache which doesn't allocate a cache line -+ on a write miss. -+- To avoid undetected data layout mismatches, the user-space library -+ now checks the data layout version field in a virtual perfctr when -+ it is being mmap:ed into the user's address space. -+- A few minor cleanups. -+ -+Version 2.0-pre5, 2001-06-11 -+- Internally use a single 'cstatus' field instead of the three -+ tsc_on/nractrs/nrictrs fields. Should reduce overhead slightly. -+- Reorder the fields in cpu_control so that 'cstatus' and other -+ frequently used fields get small offsets -- avoids some disp32 -+ addressing modes in timing-critical code. -+- Fixed a bug in p6_iresume where it forgot to invalidate the -+ EVNTSEL cache, causing p6_write_control to fail to reload the -+ MSRs. (K7 had a similar bug.) Since i-mode support is disabled -+ at the moment, no-one was actually bitten by this. -+- Fixed another iresume/write_control cache invalidation bug where a -+ switch to an "uninitialised" CPU would fail to initialise the MSRs. -+- Added a CONFIG_PERFCTR_DEBUG option to enable internal consistency -+ checks. Currently, this checks that a task's vperfctr pointer -+ isn't clobbered behind our backs, that resume and suspend for -+ a vperfctr are performed on the same CPU, and that the EVNTSEL -+ cache is semi-consistent when reloading is optimised away. -+ ("semi" because it only checks that the cache agrees with the -+ user's control data, and not that the cache agrees with the MSRs.) -+- Minor cleanups. -+ -+Version 2.0-pre4, 2001-04-30 -+- Cleanups in x86.c. #defines introduced for magic constants. -+ More sharing of procedures between different CPU drivers. -+ Fixed a bug where k7_iresume() could cause k7_write_control() -+ to fail to reload the correct EVNTSELs. -+ The WinChip C6/2/3 driver now "fakes" an incrementing TSC. -+- General cleanups: s/__inline__/inline/ following Linux kernel -+ coding standards, and renamed the low-level control objects to -+ cpu_control to distinguish them from {v,g}perfctr_control objects. -+- O_CREAT is now interpreted when /proc/self/perfctr is opened: -+ if the vperfctr does not exist, then it is created; if the -+ vperfctr does exist, then EEXIST is returned (unfortunately -+ O_EXCL doesn't work, since it's intercepted by the VFS layer). -+ "perfex -i" uses this to avoid having to create a vperfctr when -+ only an INFO command is to be issued. -+ libperfctr.c:vperfctr_open() uses this to decide whether to -+ UNLINK the newly opened vperfctr in case of errors or not. -+- Cleaned up virtual.c's 2.4/2.2 VFS interface code a little, -+ and eliminated the OWNER_THIS_MODULE compat macro. -+- Added MOD_{INC,DEC}_USE_COUNTs to virtual.c's file_operations -+ open and release procedures for 2.2 kernels. This should -+ simulate 2.4's fops_get/put at >open() and >release(). -+ -+Version 2.0-pre3, 2001-04-17 -+- Interrupt-mode virtual perfctrs are temporarily disabled since -+ x86.c doesn't yet detect which PMC overflowed. The old API -+ could be made to work, but it was broken anyway. -+- Integrated the new P4-ready data structures and APIs. -+ The driver compiles but the user-space stuff hasn't been -+ updated yet, so there may be some remaining bugs. -+ -+ I have not yet committed to all details of this API. Some -+ things, like accumulating counters in virtual.c and global.c, -+ are uglier now, and going from a single "status == nrctrs" -+ field to three separate fields (tsc_on, nrctrs, nrictrs) -+ cannot be good for performance. -+ -+ In the new API the control information is split in separate -+ arrays depending on their use, i.e. a struct-of-arrays layout -+ instead of an array-of-struct layout. The advantage of the -+ struct-of-arrays layout is that it should cause fewer cache -+ lines to be touched at the performance-critical operations. -+ The disadvantage is that the layout changes whenever the -+ number of array elements has to be increased -- as is the -+ case for the future Pentium 4 support (18 counters). -+ -+Version 2.0-pre2, 2001-04-07 -+- Removed automatic inheritance of per-process virtual perfctrs -+ across fork(). Unless wait4() is modified, it's difficult to -+ communicate the final values back to the parent: the now -+ abandoned code did this in a way which made it impossible -+ to distinguish one child's final counts from another's. -+ Inheritance can be implemented in user-space anyway, so the -+ loss is not great. The interface between the driver and the rest -+ of the kernel is now smaller and simpler than before. -+- Simulating cpu_khz by a macro in very old kernels broke since -+ there's also a struct field with that name :-( Instead of -+ putting the ugly workaround back in, I decided to drop support -+ for kernels older than 2.2.16. -+- Preliminary support for the VIA C3 processor -- the C3 is -+ apparently a faster version of the VIA Cyrix III. -+- Added rdtsc cost deduction to the init tests code, and changed -+ it to output per-instruction costs as well. -+- More cleanups, making 2.2 compatibility crud less visible. -+ -+Version 2.0-pre1, 2001-03-25 -+- First round of API and coding changes/cleanups for version 2.0: -+ made perfctr_info.version a string, moved some perfctr_info inits -+ to x86.c and eliminated some redundant variables, removed dead VFS -+ code from virtual.c, removed obsolete K7 tests from x86_tests.c, -+ removed mmu_cr4_features wrappers from x86_compat.h, minor cleanup -+ in virtual_stub.c. -+- Fixed an include file problem which made some C compilers (not gcc) -+ fail when compiling user-space applications using the driver. -+- Added missing EXPORT_SYMBOL declarations needed by the UP-APIC PM -+ code when the driver is built as a module. -+- Preliminary changes in x86.c to deal with UP-APIC power management -+ issues in 2.4-ac kernels. The PM callback is only a stub for now. -+ -+Version 1.9, 2001-02-13 -+- Fixed compilation problems for 2.2 and SMP kernels. -+- Found updated documentation on "VIA Cyrix III". Apparently, there -+ are two distinct chips: the older Joshua (a Cyrix design) and the -+ newer Samuel (a Centaur design). Our current code supported Joshua, -+ but mistook Samuel for Joshua. Corrected the identification of Samuel -+ and added explicit support for it. Samuel's EVNTSEL1 is not well- -+ documented, so there are some new Samuel-specific tests in x86_tests.c. -+- Added preliminary interrupt-mode support for AMD K7. -+- Small tweaks to virtual.c's interrupt handling. -+ -+Version 1.8, 2001-01-23 -+- Added preliminary interrupt-mode support to virtual perfctrs. -+ Currently for P6 only, and the local APIC must have been enabled. -+ Tested on 2.4.0-ac10 with CONFIG_X86_UP_APIC=y. -+ When an i-mode vperfctr interrupts on overflow, the counters are -+ suspended and a user-specified signal is sent to the process. The -+ user's signal handler can read the trap pc from the mmap:ed vperfctr, -+ and should then issue an IRESUME ioctl to restart the counters. -+ The next version will support buffering and automatic restart. -+- Some cleanups in the x86.c init and exit code. Removed the implicit -+ smp_call_function() calls from x86_compat.h. -+ -+Version 1.7, 2001-01-01 -+- Updated Makefile for 2.4.0-test13-pre3 Rules.make changes. -+- Removed PERFCTR_ATTACH ioctl from /dev/perfctr, making the -+ vperfctrs only accessible via /proc/self/perfctr. Removed -+ the "attach" code from virtual.c, and temporarily commented -+ out the "vperfctr fs" code. Moved /dev/perfctr initialisation -+ and implementation from init.c to global.c. -+- Eliminated CONFIG_VPERFCTR_PROC, making /proc/pid/perfctr -+ mandatory if CONFIG_PERFCTR_VIRTUAL is set. -+- Some 2.2/2.4 compatibility cleanups. -+- VIA Cyrix III detection bug fix. Contrary to VIA's documentation, -+ the Cyrix III vendor field is Centaur, not Cyrix. -+ -+Version 1.6, 2000-11-21 -+- Preliminary implementation of /proc/pid/perfctr. Seems to work, -+ but virtual.c and virtual_stub.c is again filled with -+ #if LINUX_VERSION_CODE crap which will need to be cleaned up. -+ The INFO ioctl is now implemented by vperfctrs too, to avoid the -+ need for opening /dev/perfctr. -+- virtual.c now puts the perfctr pointer in filp->private_data -+ instead of inode->u.generic_ip. The main reason for this change -+ is that proc-fs places a dentry pointer in inode->u.generic_ip. -+- sys_vperfctr_control() no longer resets the virtual TSC -+ if it already is active. The virtual TSC therefore runs -+ continuously from its first activation until the process -+ stops or unlinks its vperfctrs. -+- Updates for 2.4.0-test11pre6. Use 2.4-style cpu_has_XXX -+ feature testing macros. Updated x86_compat.h to implement -+ missing cpu_has_mmx and cpu_has_msr, and compatibility -+ macros for 2.2. Changed vperfctr_fs_read_super() to use -+ new_inode(sb) instead of get_empty_inode() + some init code. -+- Updates for 2.4.0-test9. Fixed x86_compat.h for cpu_khz change. -+ Since drivers/Makefile was converted to the new list style, -+ it became more difficult to handle CONFIG_PERFCTR=m. Changed -+ Config.in to set CONFIG_KPERFCTR=y when CONFIG_PERFCTR != n, -+ resulting in a much cleaner kernel patch for 2.4.0-test9. -+- Removed d_alloc_root wrapper since 2.2 doesn't need it any more. -+- When building for 2.2.18pre, use some of its 2.4 compatibility -+ features (module_init, module_exit and DECLARE_MUTEX). -+- Updates for 2.4.0-test8: repaired kernel patch for new parameter -+ in do_fork, and fixed CLONE_PERFCTR conflict with CLONE_THREAD. -+ -+Version 1.5, 2000-09-03 -+- Dropped support for intermediate 2.3 and early 2.4.0-test kernels. -+ The code now supports kernels 2.2.xx and 2.4.0-test7 or later only. -+ Cleanups in compat.h and virtual.c. -+- Rewrote the Makefile to use object file lists instead of conditionals. -+ This gets slightly hairy since kernel extensions are needed even -+ when the driver proper is built as a module. -+- Removed the definition of CONFIG_PERFCTR_X86 from Config.in. -+ Use the 2.4 standard CONFIG_X86 instead. The 2.2.xx kernel -+ patches now define CONFIG_X86 in arch/i386/config.in. -+- Cleaned up the vperfctr inheritance filter. Instead of setting -+ a disable flag (CLONE_KTHREAD) when kernel-internal threads are -+ created, I now set CLONE_PERFCTR in sys_fork and sys_vfork. -+- /dev/perfctr no longer accepts the SAMPLE and UNLINK ioctls. -+ All operations pertaining to a process' virtual perfctrs must -+ be applied to the fd returned from the ATTACH ioctl. -+- Removed the remote-control features from the virtual perfctrs. -+ Significant simplifications in virtual.c. Removed some now -+ unused stuff from compat.h and virtual_stub.c. -+ -+Version 1.4, 2000-08-11 -+- Fixed a memory leak bug in virtual.c. An extraneous dget() in -+ get_vperfctr_filp() prevented reclaiming the dentry and inode -+ allocated for a vperfctr file. -+- Major changes to the VFS interface in virtual.c. Starting with -+ 2.4.0-test6, inode->i_sb == NULL no longer works. Added code to -+ register a "vperfctr" fs and define a superblock and a mount point. -+ Completely rewrote the dentry init code. Most of the new code is -+ adapted from fs/pipe.c, with simplifications and macros to continue -+ supporting 2.2.x kernels. `ls -l /proc/*/fd/' now prints recognizable -+ names for vperfctr files. -+- Cleaned up virtual.c slightly. Removed "#if 1" tests around the -+ vperfctr inheritance code. Rewrote vperfctr_alloc and vperfctr_free -+ to use the virt_to_page and {Set,Clear}PageReserved macros; -+ also updated compat.h to provide these for older kernels. -+- Updated for 2.4.0-test3: a dummy `open' file operation is no longer -+ required by drivers/char/misc.c. -+- Updated for `owner' field in file_operations added in 2.4.0-test2. -+ Removed MOD_{INC,DEC}_USE_COUNT from init.c (except when compiling -+ for 2.2.x) and virtual.c. Added MOD_{INC,DEC}_USE_COUNT to the -+ reserve/release functions in x86.c -- needed because the driver -+ may be active even if no open file refers to it. Using can_unload -+ in the module struct instead is possible but not as tidy. -+ -+Version 1.3, 2000-06-29 -+- Implemented inheritance for virtual perfctrs: fork() copies the -+ evntsel data to the child, exit() stops the child's counters but -+ does not detach the vperfctr object, and wait() adds the child's -+ counters to the parent's `children' counters. -+ Added a CLONE_KTHREAD flag to prevent inheritance to threads -+ created implicitly by request_module() and kernel_thread(). -+- Fixed a half-broken printk() in x86_tests.c. -+- Added checks to virtual.c to prevent the remote-control interface -+ from trying to activate dead vperfctrs. -+- Updated vperfctr_attach() for changes in 2.3.99-pre7 and 2.4.0-test2. -+- Fixed a problem introduced in 1.2 which caused linker errors if -+ CONFIG_PERFCTR=m and CONFIG_PERFCTR_INIT_TESTS=y. -+- Export CPU kHz via a new field in PERFCTR_INFO ioctl, to enable -+ user-space to map accumulated TSC counts to actual time. -+ -+Version 1.2, 2000-05-24 -+- Added support for generic x86 processors with a time-stamp counter -+ but no performance-monitoring counters. By using the driver to -+ virtualise the TSC, accurate cycle-count measurements are now -+ possible on PMC-less processors like the AMD K6. -+- Removed some of the special-casing of the x86 time-stamp counter. -+ It's now "just another counter", except that no evntsel is -+ needed to enable it. -+- WinChip bug fix: the "fake TSC" code would increment an -+ uninitialised counter. -+- Reorganised the x86 driver. Moved the optional init-time testing -+ code to a separate source file. -+- Miscellaneous code cleanups and naming convention changes. -+ -+Version 1.1, 2000-05-13 -+- vperfctr_attach() now accepts pid 0 as an alias for the current -+ process. This reduces the number of getpid() calls needed in -+ the user-space library. (Suggested by Ulrich Drepper.) -+- Added support for the VIA Cyrix III processor. -+- Tuned the x86 driver interface. Replaced function pointers -+ with stubs which rewrite callers to invoke the correct callees. -+- Added ARRAY_SIZE definition to compat.h for 2.2.x builds. -+- Updated for 2.3.48 inode changes. -+- Moved code closer to 2.3.x coding standards. Removed init_module -+ and cleanup_module, added __exit, module_init, and module_exit, -+ and extended "compat.h" accordingly. Cleaned up <linux/perfctr.h> -+ and <asm-i386/perfctr.h> a little. -+ -+Version 1.0, 2000-01-31 -+- Prepared the driver to cope with non-x86 architectures: -+ - Moved generic parts of <asm-i386/perfctr.h> to <linux/perfctr.h>. -+ - Merged driver's private "x86.h" into <asm-i386/perfctr.h>. -+ - Config.in now defines CONFIG_PERFCTR_${ARCH}, and Makefile uses -+ it to select appropriate arch-dependent object files -+- The driver now reads the low 32 bits of the counters, -+ instead of 40 or 48 bits zero-extended to 64 bits. -+ Sums are still 64 bits. This was done to reduce the number -+ of cache lines needed for certain data structures, to -+ simplify and improve the performance of the sampling -+ procedures, and to change 64+(64-64) arithmetic to 64+(32-32) -+ for the benefit of gcc on x86. This change doesn't reduce -+ precision, as long as no event occurs more than 2^32 times -+ between two sampling points. -+- PERFCTR_GLOBAL_READ now forces all CPUs to be sampled, if the -+ sampling timer isn't running. -+ -+Version 0.11, 2000-01-30 -+- Added a missing EXPORT_SYMBOL which prevented the driver -+ from being built as a module in SMP kernels. -+- Support for the CPU sampling instructions (i.e. RDPMC and -+ RDTSC on x86) is now announced explicitly by PERFCTR_INFO. -+- The x86 hardware driver now keeps CR4.PCE globally enabled. -+ There are two reasons for this. First, the cost of toggling -+ this flag at process suspend/resume is high. Second, changes -+ in kernel 2.3.40 imply that any processor's %cr4 may be updated -+ asynchronously from the global variable mmu_cr4_features. -+ -+Version 0.10, 2000-01-23 -+- Added support for global-mode perfctrs (global.c). -+- There is now a config option controlling whether to -+ perform init-time hardware tests or not. -+- Added a hardware reserve/release mechanism so that multiple -+ high-level services don't simultaneously use the hardware. -+- The driver is now officially device <char,major 10,minor 182>. -+- Tuned the 64-bit tsc/msr/pmc read operations in x86.c. -+- Support for virtual perfctrs can now be enabled or disabled -+ via CONFIG_PERFCTR_VIRTUAL. -+- Added support for the WinChip 3 processor. -+- Split the code into several files: x86.c (x86 drivers), -+ virtual.c (virtualised perfctrs), setup.c (boot-time actions), -+ init.c (driver top-level and init code). -+ -+Version 0.9, 2000-01-02 -+- The driver can now be built as a module. -+- Dropped sys_perfctr() system call and went back to using a -+ /dev/perfctr character device. Generic operations are now -+ ioctl commands on /dev/perfctr, and control operations on -+ virtual perfctrs are ioctl commands on their file descriptors. -+ Initially this change was done because new system calls in 2.3.x -+ made maintenance and binary compatibility with 2.2.x hard, but -+ the new API is actually cleaner than the previous system call. -+- Moved this code from arch/i386/kernel/ to drivers/perfctr/. -+ -+Version 0.8, 1999-11-14 -+- Made the process management callback functions inline to -+ reduce scheduling overhead for processes not using perfctrs. -+- Changed the 'status' field to contain the number of active -+ counters. Changed read_counters, write_control, and accumulate -+ to use this information to avoid unnecessary work. -+- Fixed a bug in k7_check_control() which caused it to -+ require all four counters to be enabled. -+- Fixed sys_perfctr() to return -ENODEV instead of -ENOSYS -+ if the processor doesn't support perfctrs. -+- Some code cleanups. -+- Evntsel MSRs are updated lazily, and counters are not written to. -+ -+ The following table lists the costs (in cycles) of various -+ instructions which access the counter or evntsel registers. -+ The table was derived from data collected by init-time tests -+ run by previous versions of this driver. -+ -+ Processor P5 P5MMX PII PIII K7 -+ Clock freq. (MHz) 133 233 266 450 500 -+ -+ RDPMC n/a 14 31 36 13 -+ RDMSR (counter) 29 28 81 80 52 -+ WRMSR (counter) 35 37 97 115 80 -+ WRMSR (evntsel) 33 37 88 105 232 -+ -+ Several things are apparent from this table: -+ -+ 1. It's much cheaper to use RDPMC than RDMSR to read the counters. -+ 2. It's much more expensive to reset a counter than to read it. -+ 3. It's expensive to write to an evntsel register. -+ -+ As of version 0.8, this driver uses the following strategies: -+ * The evntsel registers are updated lazily. A per_cpu_control[] -+ array caches the contents of each CPU's evntsel registers, -+ and only when a process requires a different setup are the -+ evntsel registers written to. In most cases, this eliminates the -+ need to reprogram the evntsel registers when switching processes. -+ The older drivers would write to the evntsel registers both at -+ process suspend and resume. -+ * The counter registers are read both at process resume and suspend, -+ and the difference is added to the process' accumulated counters. -+ The older drivers would reset the counters at resume, read them -+ at suspend, and add the values read to the accumulated counters. -+ * Only those registers enabled by the user's control information -+ are manipulated, instead of blindly manipulating all of them. -+ -+Version 0.7 1999-10-25 -+- The init-time checks in version 0.6 of this driver showed that -+ RDMSR is a lot slower than RDPMC for reading the PMCs. The driver -+ now uses RDPMC instead of RDMSR whenever possible. -+- Added an mmap() operation to perfctr files. This allows any client -+ to read the accumulated counter state without making a system call. -+ The old "sync to user-provided buffer" method has been removed, -+ as it entailed additional copy operations and only worked for the -+ "active" process. The PERFCTR_READ operation has been replaced -+ by a simpler PERFCTR_SAMPLE operation, for the benefit of pre-MMX -+ Intel P5 processors which cannot sample counters in user-mode. -+ This rewrite actually simplified the code. -+- The AMD K7 should now be supported correctly. The init-time checks -+ in version 0.6 of this driver revealed that each K7 counter has -+ its own ENable bit. (Thanks to Nathan Slingerland for running the -+ test and reporting the results to me.) -+- Plugged a potential memory leak in perfctr_attach_task(). -+- No longer piggyback on prctl(); sys_perfctr() is a real system call. -+- Some code cleanups. -+ -+Version 0.6 1999-09-08 -+- Temporarily added some init-time code that checks the -+ costs of RDPMC/RDMSR/WRMSR operations applied to perfctr MSRs, -+ the semantics of the ENable bit on the Athlon, and gets -+ the boot-time value of the WinChip CESR register. -+ This code can be turned off by #defining INIT_DEBUG to 0. -+- Preliminary support for the AMD K7 Athlon processor. -+- The code will now build in both 2.3.x and 2.2.x kernels. -+ -+Version 0.5 1999-08-29 -+- The user-space buffer is updated whenever state.status changes, -+ even when a remote command triggers the change. -+- Reworked and simplified the high-level code. All accesses -+ now require an attached file in order to implement proper -+ accounting and syncronisation. The only exception is UNLINK: -+ a process may always UNLINK its own PMCs. -+- Fixed counting bug in sys_perfctr_read(). -+- Improved support for the Intel Pentium III. -+- Another WinChip fix: fake TSC update at process resume. -+- The code should now be safe for 'gcc -fstrict-aliasing'. -+ -+Version 0.4 1999-07-31 -+- Implemented PERFCTR_ATTACH and PERFCTR_{READ,CONTROL,STOP,UNLINK} -+ on attached perfctrs. An attached perfctr is represented as a file. -+- Fixed an error in the WinChip-specific code. -+- Perfctrs now survive exec(). -+ -+Version 0.3 1999-07-22 -+- Interface now via sys_prctl() instead of /dev/perfctr. -+- Added NYI stubs for accessing other processes' perfctrs. -+- Moved to dynamic allocation of a task's perfctr state. -+- Minor code cleanups. -+ -+Version 0.2 1999-06-07 -+- Added support for WinChip CPUs. -+- Restart counters from zero, not their previous values. This -+ corrected a problem for Intel P6 (WRMSR writes 32 bits to a PERFCTR -+ MSR and then sign-extends to 40 bits), and also simplified the code. -+- Added support for syncing the kernel's counter values to a user- -+ provided buffer each time a process is resumed. This feature, and -+ the fact that the driver enables RDPMC in processes using PMCs, -+ allows user-level computation of a process' accumulated counter -+ values without incurring the overhead of making a system call. -+ -+Version 0.1 1999-05-30 -+- First public release. -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/compat24.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/compat24.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/compat24.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,97 @@ -+/* $Id: compat24.h,v 1.22.2.1 2004/07/26 14:05:49 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * Compatibility definitions for 2.4 kernels. -+ * -+ * Copyright (C) 1999-2004 Mikael Pettersson -+ */ -+#include <linux/mm.h> /* for remap_page_range() [redefined here] */ -+ -+#include "cpumask.h" -+ -+/* 2.4.18-redhat had BUG_ON() before 2.4.19 */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19) && !defined(BUG_ON) -+#define BUG_ON(condition) do { if ((condition) != 0) BUG(); } while(0) -+#endif -+ -+/* 2.4.18-redhat had set_cpus_allowed() before 2.4.21-pre5 */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,21) && !defined(HAVE_SET_CPUS_ALLOWED) -+#if defined(CONFIG_SMP) -+extern void set_cpus_allowed(struct task_struct*, unsigned long); -+#else -+#define set_cpus_allowed(tsk, mask) do{}while(0) -+#endif -+#endif -+ -+/* 2.4.20-8-redhat added cpu_online() */ -+#if !defined(cpu_online) -+#define cpu_online(cpu) (cpu_online_map & (1UL << (cpu))) -+#endif -+ -+/* 2.4.20-8-redhat added put_task_struct() */ -+#if defined(put_task_struct) /* RH 2.4.20-8 */ -+#define EXPORT_SYMBOL___put_task_struct EXPORT_SYMBOL(__put_task_struct) -+#else /* standard 2.4 */ -+#define put_task_struct(tsk) free_task_struct((tsk)) -+#define EXPORT_SYMBOL___put_task_struct /*empty*/ -+#endif -+ -+/* remap_page_range() changed in 2.5.3-pre1 and 2.4.20-8-redhat */ -+#if !defined(HAVE_5ARG_REMAP_PAGE_RANGE) -+static inline int perfctr_remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long to, unsigned long size, pgprot_t prot) -+{ -+ return remap_page_range(from, to, size, prot); -+} -+#undef remap_page_range -+#define remap_page_range(vma,from,to,size,prot) perfctr_remap_page_range((vma),(from),(to),(size),(prot)) -+#endif -+ -+/* 2.4.22-rc1 added EXPORT_SYMBOL(mmu_cr4_features) */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22) || defined(HAVE_EXPORT_mmu_cr4_features) -+#define EXPORT_SYMBOL_mmu_cr4_features /*empty*/ -+#else -+#define EXPORT_SYMBOL_mmu_cr4_features EXPORT_SYMBOL(mmu_cr4_features) -+#endif -+ -+/* not in 2.4 proper, but some people use 2.4 with preemption patches */ -+#ifdef CONFIG_PREEMPT -+#error "not yet ported to 2.4+PREEMPT" -+#endif -+#ifndef preempt_disable -+#define preempt_disable() do{}while(0) -+#define preempt_enable() do{}while(0) -+#endif -+ -+#ifdef MODULE -+#define __module_get(module) do { if ((module)) __MOD_INC_USE_COUNT((module)); } while(0) -+#define module_put(module) do { if ((module)) __MOD_DEC_USE_COUNT((module)); } while(0) -+#else -+#define __module_get(module) do{}while(0) -+#define module_put(module) do{}while(0) -+#endif -+ -+#define MODULE_ALIAS(alias) /*empty*/ -+ -+/* introduced in 2.5.64; backported to 2.4.22-1.2115.nptl (FC1) */ -+static inline int -+perfctr_on_each_cpu(void (*func) (void *info), void *info, -+ int retry, int wait) -+{ -+ int ret = 0; -+ -+ preempt_disable(); -+ ret = smp_call_function(func, info, retry, wait); -+ func(info); -+ preempt_enable(); -+ return ret; -+} -+#undef on_each_cpu -+#define on_each_cpu(f,i,r,w) perfctr_on_each_cpu((f),(i),(r),(w)) -+ -+/* 2.6.4 added 'noinline' */ -+#if !defined(noinline) -+#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 -+#define noinline __attribute__((noinline)) -+#else -+#define noinline /* unimplemented */ -+#endif -+#endif -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/init.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/init.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/init.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,216 @@ -+/* $Id: init.c,v 1.68 2004/01/11 22:12:09 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * Top-level initialisation code. -+ * -+ * Copyright (C) 1999-2004 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/fs.h> -+#include <linux/init.h> -+#include <linux/miscdevice.h> -+#include <linux/sched.h> -+#include <linux/perfctr.h> -+ -+#include <asm/uaccess.h> -+ -+#include "compat.h" -+#include "virtual.h" -+#include "global.h" -+#include "version.h" -+#include "marshal.h" -+ -+MODULE_AUTHOR("Mikael Pettersson <mikpe@csd.uu.se>"); -+MODULE_DESCRIPTION("Performance-monitoring counters driver"); -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS("char-major-10-182"); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,63) -+EXPORT_NO_SYMBOLS; -+#endif -+ -+#ifdef CONFIG_PERFCTR_DEBUG -+#define VERSION_DEBUG " DEBUG" -+#else -+#define VERSION_DEBUG -+#endif -+ -+struct perfctr_info perfctr_info = { -+ .abi_version = PERFCTR_ABI_VERSION, -+ .driver_version = VERSION VERSION_DEBUG, -+}; -+ -+char *perfctr_cpu_name __initdata; -+ -+int sys_perfctr_abi(unsigned int *argp) -+{ -+ if( put_user(PERFCTR_ABI_VERSION, argp) ) -+ return -EFAULT; -+ return 0; -+} -+ -+int sys_perfctr_info(struct perfctr_struct_buf *argp) -+{ -+ return perfctr_copy_to_user(argp, &perfctr_info, &perfctr_info_sdesc); -+} -+ -+static int cpus_copy_to_user(const cpumask_t *cpus, struct perfctr_cpu_mask *argp) -+{ -+ const unsigned int k_nrwords = PERFCTR_CPUMASK_NRLONGS*(sizeof(long)/sizeof(int)); -+ unsigned int u_nrwords; -+ unsigned int ui, ki, j; -+ -+ if( get_user(u_nrwords, &argp->nrwords) ) -+ return -EFAULT; -+ if( put_user(k_nrwords, &argp->nrwords) ) -+ return -EFAULT; -+ if( u_nrwords < k_nrwords ) -+ return -EOVERFLOW; -+ for(ui = 0, ki = 0; ki < PERFCTR_CPUMASK_NRLONGS; ++ki) { -+ unsigned long mask = cpus_addr(*cpus)[ki]; -+ for(j = 0; j < sizeof(long)/sizeof(int); ++j) { -+ if( put_user((unsigned int)mask, &argp->mask[ui]) ) -+ return -EFAULT; -+ ++ui; -+ mask = (mask >> (8*sizeof(int)-1)) >> 1; -+ } -+ } -+ return 0; -+} -+ -+int sys_perfctr_cpus(struct perfctr_cpu_mask *argp) -+{ -+ cpumask_t cpus = cpu_online_map; -+ return cpus_copy_to_user(&cpus, argp); -+} -+ -+int sys_perfctr_cpus_forbidden(struct perfctr_cpu_mask *argp) -+{ -+ cpumask_t cpus = perfctr_cpus_forbidden_mask; -+ return cpus_copy_to_user(&cpus, argp); -+} -+ -+#ifdef CONFIG_IA32_EMULATION -+#include <asm/ioctl32.h> -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) -+static int perfctr_ioctl32_handler(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *filp) -+{ -+ /* filp->f_op->ioctl is known to exist; see sys32_ioctl() */ -+ return filp->f_op->ioctl(filp->f_dentry->d_inode, filp, cmd, arg); -+} -+#else -+#define perfctr_ioctl32_handler 0 -+#endif -+ -+static void __init perfctr_register_ioctl32_conversions(void) -+{ -+ int err; -+ -+ err = register_ioctl32_conversion(PERFCTR_ABI, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(PERFCTR_INFO, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(PERFCTR_CPUS, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(PERFCTR_CPUS_FORBIDDEN, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(VPERFCTR_CREAT, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(VPERFCTR_OPEN, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(VPERFCTR_READ_SUM, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(VPERFCTR_UNLINK, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(VPERFCTR_CONTROL, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(VPERFCTR_IRESUME, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(VPERFCTR_READ_CONTROL, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(GPERFCTR_CONTROL, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(GPERFCTR_READ, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(GPERFCTR_STOP, perfctr_ioctl32_handler); -+ err |= register_ioctl32_conversion(GPERFCTR_START, perfctr_ioctl32_handler); -+ if( err ) -+ printk(KERN_ERR "perfctr: register_ioctl32_conversion() failed\n"); -+} -+ -+static void __exit perfctr_unregister_ioctl32_conversions(void) -+{ -+ unregister_ioctl32_conversion(PERFCTR_ABI); -+ unregister_ioctl32_conversion(PERFCTR_INFO); -+ unregister_ioctl32_conversion(PERFCTR_CPUS); -+ unregister_ioctl32_conversion(PERFCTR_CPUS_FORBIDDEN); -+ unregister_ioctl32_conversion(VPERFCTR_CREAT); -+ unregister_ioctl32_conversion(VPERFCTR_OPEN); -+ unregister_ioctl32_conversion(VPERFCTR_READ_SUM); -+ unregister_ioctl32_conversion(VPERFCTR_UNLINK); -+ unregister_ioctl32_conversion(VPERFCTR_CONTROL); -+ unregister_ioctl32_conversion(VPERFCTR_IRESUME); -+ unregister_ioctl32_conversion(VPERFCTR_READ_CONTROL); -+ unregister_ioctl32_conversion(GPERFCTR_CONTROL); -+ unregister_ioctl32_conversion(GPERFCTR_READ); -+ unregister_ioctl32_conversion(GPERFCTR_STOP); -+ unregister_ioctl32_conversion(GPERFCTR_START); -+} -+ -+#else -+#define perfctr_register_ioctl32_conversions() do{}while(0) -+#define perfctr_unregister_ioctl32_conversions() do{}while(0) -+#endif -+ -+static int dev_perfctr_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg) -+{ -+ switch( cmd ) { -+ case PERFCTR_ABI: -+ return sys_perfctr_abi((unsigned int*)arg); -+ case PERFCTR_INFO: -+ return sys_perfctr_info((struct perfctr_struct_buf*)arg); -+ case PERFCTR_CPUS: -+ return sys_perfctr_cpus((struct perfctr_cpu_mask*)arg); -+ case PERFCTR_CPUS_FORBIDDEN: -+ return sys_perfctr_cpus_forbidden((struct perfctr_cpu_mask*)arg); -+ case VPERFCTR_CREAT: -+ return vperfctr_attach((int)arg, 1); -+ case VPERFCTR_OPEN: -+ return vperfctr_attach((int)arg, 0); -+ default: -+ return gperfctr_ioctl(inode, filp, cmd, arg); -+ } -+ return -EINVAL; -+} -+ -+static struct file_operations dev_perfctr_file_ops = { -+ .owner = THIS_MODULE, -+ .ioctl = dev_perfctr_ioctl, -+}; -+ -+static struct miscdevice dev_perfctr = { -+ .minor = 182, -+ .name = "perfctr", -+ .fops = &dev_perfctr_file_ops, -+}; -+ -+int __init perfctr_init(void) -+{ -+ int err; -+ if( (err = perfctr_cpu_init()) != 0 ) { -+ printk(KERN_INFO "perfctr: not supported by this processor\n"); -+ return err; -+ } -+ if( (err = vperfctr_init()) != 0 ) -+ return err; -+ gperfctr_init(); -+ if( (err = misc_register(&dev_perfctr)) != 0 ) { -+ printk(KERN_ERR "/dev/perfctr: failed to register, errno %d\n", -+ -err); -+ return err; -+ } -+ perfctr_register_ioctl32_conversions(); -+ printk(KERN_INFO "perfctr: driver %s, cpu type %s at %u kHz\n", -+ perfctr_info.driver_version, -+ perfctr_cpu_name, -+ perfctr_info.cpu_khz); -+ return 0; -+} -+ -+void __exit perfctr_exit(void) -+{ -+ perfctr_unregister_ioctl32_conversions(); -+ misc_deregister(&dev_perfctr); -+ vperfctr_exit(); -+ perfctr_cpu_exit(); -+} -+ -+module_init(perfctr_init) -+module_exit(perfctr_exit) -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/virtual.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/virtual.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/virtual.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,15 @@ -+/* $Id: virtual.h,v 1.11 2003/10/04 20:29:43 mikpe Exp $ -+ * Virtual per-process performance counters. -+ * -+ * Copyright (C) 1999-2003 Mikael Pettersson -+ */ -+ -+#ifdef CONFIG_PERFCTR_VIRTUAL -+extern int vperfctr_attach(int, int); -+extern int vperfctr_init(void); -+extern void vperfctr_exit(void); -+#else -+static inline int vperfctr_attach(int tid, int creat) { return -EINVAL; } -+static inline int vperfctr_init(void) { return 0; } -+static inline void vperfctr_exit(void) { } -+#endif -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/marshal.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/marshal.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/marshal.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,722 @@ -+/* $Id: marshal.c,v 1.6.2.1 2004/08/02 22:24:58 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * Structure marshalling support. -+ * -+ * Copyright (C) 2003-2004 Mikael Pettersson -+ */ -+#ifdef __KERNEL__ -+#include <linux/config.h> -+struct inode; -+#include <linux/sched.h> -+#include <linux/perfctr.h> -+#include <linux/errno.h> -+#include <linux/stddef.h> -+#include <linux/string.h> -+#include <asm/uaccess.h> -+#else /* !__KERNEL__ */ -+#define CONFIG_KPERFCTR -+#include <linux/perfctr.h> -+#include <sys/ioctl.h> -+#include <errno.h> -+#include <stddef.h> -+#include <string.h> -+#define put_user(w, p) (*(p) = (w), 0) -+#define get_user(w, p) ((w) = *(p), 0) -+#endif /* !__KERNEL__ */ -+ -+#include "marshal.h" -+ -+/**************************************************************** -+ * * -+ * Struct encoding support. * -+ * * -+ ****************************************************************/ -+ -+static void stream_write(struct perfctr_marshal_stream *stream, unsigned int word) -+{ -+ if( !stream->error ) { -+ if( stream->pos >= stream->size ) -+ stream->error = -EOVERFLOW; -+ else if( put_user(word, &stream->buffer[stream->pos]) ) -+ stream->error = -EFAULT; -+ } -+ ++stream->pos; -+} -+ -+static void encode_field(const void *address, -+ const struct perfctr_field_desc *field, -+ struct perfctr_marshal_stream *stream) -+{ -+ unsigned int base_type = PERFCTR_TYPE_BASE(field->type); -+ unsigned int nr_items = PERFCTR_TYPE_NRITEMS(field->type); -+ unsigned int tag = field->tag; -+ const char *pointer = (const char*)address + field->offset; -+ unsigned int uint32_val; -+ union { -+ unsigned long long ull; -+ unsigned int ui[2]; -+ } uint64_val; -+ unsigned int i = 0; -+ -+ do { -+ if( base_type == PERFCTR_TYPE_UINT64 ) { -+ uint64_val.ull = *(unsigned long long*)pointer; -+ pointer += sizeof(long long); -+ if( !uint64_val.ull ) -+ continue; -+ stream_write(stream, PERFCTR_HEADER(PERFCTR_HEADER_UINT64, tag, i)); -+ stream_write(stream, uint64_val.ui[0]); -+ stream_write(stream, uint64_val.ui[1]); -+ } else { /* PERFCTR_TYPE_BYTES4 */ -+ memcpy(&uint32_val, pointer, sizeof(int)); -+ pointer += sizeof(int); -+ if( !uint32_val ) -+ continue; -+ stream_write(stream, PERFCTR_HEADER(PERFCTR_HEADER_UINT32, tag, i)); -+ stream_write(stream, uint32_val); -+ } -+ } while( ++i < nr_items ); -+} -+ -+void perfctr_encode_struct(const void *address, -+ const struct perfctr_struct_desc *sdesc, -+ struct perfctr_marshal_stream *stream) -+{ -+ unsigned int i; -+ -+ for(i = 0; i < sdesc->nrfields; ++i) -+ encode_field(address, &sdesc->fields[i], stream); -+ for(i = 0; i < sdesc->nrsubs; ++i) { -+ const struct perfctr_sub_struct_desc *sub = &sdesc->subs[i]; -+ perfctr_encode_struct((char*)address + sub->offset, sub->sdesc, stream); -+ } -+} -+ -+/**************************************************************** -+ * * -+ * Struct decoding support. * -+ * * -+ ****************************************************************/ -+ -+static int stream_read(struct perfctr_marshal_stream *stream, unsigned int *word) -+{ -+ if( stream->pos >= stream->size ) -+ return 0; -+ if( get_user(*word, &stream->buffer[stream->pos]) ) -+ return -EFAULT; -+ ++stream->pos; -+ return 1; -+} -+ -+static const struct perfctr_field_desc* -+find_field(unsigned int *struct_offset, -+ const struct perfctr_struct_desc *sdesc, -+ unsigned int tag) -+{ -+ unsigned int low, high, mid, i; -+ const struct perfctr_field_desc *field; -+ const struct perfctr_sub_struct_desc *sub; -+ -+ low = 0; -+ high = sdesc->nrfields; /* [low,high[ */ -+ while( low < high ) { -+ mid = (low + high) / 2; -+ field = &sdesc->fields[mid]; -+ if( field->tag == tag ) -+ return field; -+ if( field->tag < tag ) -+ low = mid + 1; -+ else -+ high = mid; -+ } -+ for(i = 0; i < sdesc->nrsubs; ++i) { -+ sub = &sdesc->subs[i]; -+ field = find_field(struct_offset, sub->sdesc, tag); -+ if( field ) { -+ *struct_offset += sub->offset; -+ return field; -+ } -+ } -+ return 0; -+} -+ -+int perfctr_decode_struct(void *address, -+ const struct perfctr_struct_desc *sdesc, -+ struct perfctr_marshal_stream *stream) -+{ -+ unsigned int header; -+ int err; -+ const struct perfctr_field_desc *field; -+ unsigned int struct_offset; -+ union { -+ unsigned long long ull; -+ unsigned int ui[2]; -+ } val; -+ char *target; -+ unsigned int itemnr; -+ -+ for(;;) { -+ err = stream_read(stream, &header); -+ if( err <= 0 ) -+ return err; -+ struct_offset = 0; -+ field = find_field(&struct_offset, sdesc, PERFCTR_HEADER_TAG(header)); -+ if( !field ) -+ goto err_eproto; -+ /* a 64-bit datum must have a 64-bit target field */ -+ if( PERFCTR_HEADER_TYPE(header) != PERFCTR_HEADER_UINT32 && -+ PERFCTR_TYPE_BASE(field->type) != PERFCTR_TYPE_UINT64 ) -+ goto err_eproto; -+ err = stream_read(stream, &val.ui[0]); -+ if( err <= 0 ) -+ goto err_err; -+ target = (char*)address + struct_offset + field->offset; -+ itemnr = PERFCTR_HEADER_ITEMNR(header); -+ if( itemnr >= PERFCTR_TYPE_NRITEMS(field->type) ) -+ goto err_eproto; -+ if( PERFCTR_TYPE_BASE(field->type) == PERFCTR_TYPE_UINT64 ) { -+ /* a 64-bit field must have a 64-bit datum */ -+ if( PERFCTR_HEADER_TYPE(header) == PERFCTR_HEADER_UINT32 ) -+ goto err_eproto; -+ err = stream_read(stream, &val.ui[1]); -+ if( err <= 0 ) -+ goto err_err; -+ ((unsigned long long*)target)[itemnr] = val.ull; -+ } else -+ memcpy(&((unsigned int*)target)[itemnr], &val.ui[0], sizeof(int)); -+ } -+ err_err: /* err ? err : -EPROTO */ -+ if( err ) -+ return err; -+ err_eproto: /* saves object code over inlining it */ -+ return -EPROTO; -+} -+ -+/**************************************************************** -+ * * -+ * Structure descriptors. * -+ * * -+ ****************************************************************/ -+ -+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -+#define STRUCT_ARRAY_SIZE(TYPE, MEMBER) ARRAY_SIZE(((TYPE*)0)->MEMBER) -+ -+#if defined(__i386__) || defined(__x86_64__) -+ -+#define PERFCTR_TAG_CPU_CONTROL_TSC_ON 32 -+#define PERFCTR_TAG_CPU_CONTROL_NRACTRS 33 -+#define PERFCTR_TAG_CPU_CONTROL_NRICTRS 34 -+#define PERFCTR_TAG_CPU_CONTROL_PMC_MAP 35 -+#define PERFCTR_TAG_CPU_CONTROL_EVNTSEL 36 -+#define PERFCTR_TAG_CPU_CONTROL_IRESET 37 -+#define PERFCTR_TAG_CPU_CONTROL_P4_ESCR 38 -+#define PERFCTR_TAG_CPU_CONTROL_P4_PE 39 -+#define PERFCTR_TAG_CPU_CONTROL_P4_PMV 40 -+#define PERFCTR_TAG_CPU_CONTROL_RSVD1 41 -+#define PERFCTR_TAG_CPU_CONTROL_RSVD2 42 -+#define PERFCTR_TAG_CPU_CONTROL_RSVD3 43 -+#define PERFCTR_TAG_CPU_CONTROL_RSVD4 44 -+#define PERFCTR_CPU_CONTROL_NRFIELDS_0 (7 + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, pmc_map) + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, evntsel) + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, ireset)) -+#define PERFCTR_CPU_CONTROL_NRFIELDS_1 (2 + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, p4.escr)) -+#define PERFCTR_CPU_CONTROL_NRFIELDS (PERFCTR_CPU_CONTROL_NRFIELDS_0 + PERFCTR_CPU_CONTROL_NRFIELDS_1) -+ -+#define PERFCTR_TAG_SUM_CTRS_TSC 48 -+#define PERFCTR_TAG_SUM_CTRS_PMC 49 -+#define PERFCTR_SUM_CTRS_NRFIELDS (1 + STRUCT_ARRAY_SIZE(struct perfctr_sum_ctrs, pmc)) -+ -+static const struct perfctr_field_desc perfctr_sum_ctrs_fields[] = { -+ { .offset = offsetof(struct perfctr_sum_ctrs, tsc), -+ .tag = PERFCTR_TAG_SUM_CTRS_TSC, -+ .type = PERFCTR_TYPE_UINT64 }, -+ { .offset = offsetof(struct perfctr_sum_ctrs, pmc), -+ .tag = PERFCTR_TAG_SUM_CTRS_PMC, -+ .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_sum_ctrs,pmc), -+ PERFCTR_TYPE_UINT64) }, -+}; -+ -+const struct perfctr_struct_desc perfctr_sum_ctrs_sdesc = { -+ .total_sizeof = sizeof(struct perfctr_sum_ctrs), -+ .total_nrfields = PERFCTR_SUM_CTRS_NRFIELDS, -+ .nrfields = ARRAY_SIZE(perfctr_sum_ctrs_fields), -+ .fields = perfctr_sum_ctrs_fields, -+}; -+ -+static const struct perfctr_field_desc perfctr_cpu_control_fields[] = { -+ { .offset = offsetof(struct perfctr_cpu_control, tsc_on), -+ .tag = PERFCTR_TAG_CPU_CONTROL_TSC_ON, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, nractrs), -+ .tag = PERFCTR_TAG_CPU_CONTROL_NRACTRS, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, nrictrs), -+ .tag = PERFCTR_TAG_CPU_CONTROL_NRICTRS, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, pmc_map), -+ .tag = PERFCTR_TAG_CPU_CONTROL_PMC_MAP, -+ .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,pmc_map), -+ PERFCTR_TYPE_BYTES4) }, -+ { .offset = offsetof(struct perfctr_cpu_control, evntsel), -+ .tag = PERFCTR_TAG_CPU_CONTROL_EVNTSEL, -+ .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,evntsel), -+ PERFCTR_TYPE_BYTES4) }, -+ { .offset = offsetof(struct perfctr_cpu_control, ireset), -+ .tag = PERFCTR_TAG_CPU_CONTROL_IRESET, -+ .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,ireset), -+ PERFCTR_TYPE_BYTES4) }, -+ { .offset = offsetof(struct perfctr_cpu_control, p4.escr), -+ .tag = PERFCTR_TAG_CPU_CONTROL_P4_ESCR, -+ .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,p4.escr), -+ PERFCTR_TYPE_BYTES4) }, -+ { .offset = offsetof(struct perfctr_cpu_control, p4.pebs_enable), -+ .tag = PERFCTR_TAG_CPU_CONTROL_P4_PE, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, p4.pebs_matrix_vert), -+ .tag = PERFCTR_TAG_CPU_CONTROL_P4_PMV, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, _reserved1), -+ .tag = PERFCTR_TAG_CPU_CONTROL_RSVD1, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, _reserved2), -+ .tag = PERFCTR_TAG_CPU_CONTROL_RSVD2, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, _reserved3), -+ .tag = PERFCTR_TAG_CPU_CONTROL_RSVD3, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, _reserved4), -+ .tag = PERFCTR_TAG_CPU_CONTROL_RSVD4, -+ .type = PERFCTR_TYPE_BYTES4 }, -+}; -+ -+const struct perfctr_struct_desc perfctr_cpu_control_sdesc = { -+ .total_sizeof = sizeof(struct perfctr_cpu_control), -+ .total_nrfields = PERFCTR_CPU_CONTROL_NRFIELDS, -+ .nrfields = ARRAY_SIZE(perfctr_cpu_control_fields), -+ .fields = perfctr_cpu_control_fields, -+}; -+ -+#endif /* __i386__ || __x86_64__ */ -+ -+#if defined(__powerpc__) /* XXX: can be merged with x86/amd64 */ -+ -+#define PERFCTR_TAG_CPU_CONTROL_TSC_ON 32 -+#define PERFCTR_TAG_CPU_CONTROL_NRACTRS 33 -+#define PERFCTR_TAG_CPU_CONTROL_NRICTRS 34 -+#define PERFCTR_TAG_CPU_CONTROL_PMC_MAP 35 -+#define PERFCTR_TAG_CPU_CONTROL_EVNTSEL 36 -+#define PERFCTR_TAG_CPU_CONTROL_IRESET 37 -+#define PERFCTR_TAG_CPU_CONTROL_PPC_MMCR0 38 -+#define PERFCTR_TAG_CPU_CONTROL_PPC_MMCR2 39 -+/* 40: unused */ -+#define PERFCTR_TAG_CPU_CONTROL_RSVD1 41 -+#define PERFCTR_TAG_CPU_CONTROL_RSVD2 42 -+#define PERFCTR_TAG_CPU_CONTROL_RSVD3 43 -+#define PERFCTR_TAG_CPU_CONTROL_RSVD4 44 -+#define PERFCTR_CPU_CONTROL_NRFIELDS_0 (7 + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, pmc_map) + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, evntsel) + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, ireset)) -+#ifdef __powerpc__ -+#define PERFCTR_CPU_CONTROL_NRFIELDS_1 2 -+#endif -+#define PERFCTR_CPU_CONTROL_NRFIELDS (PERFCTR_CPU_CONTROL_NRFIELDS_0 + PERFCTR_CPU_CONTROL_NRFIELDS_1) -+ -+#define PERFCTR_TAG_SUM_CTRS_TSC 48 -+#define PERFCTR_TAG_SUM_CTRS_PMC 49 -+#define PERFCTR_SUM_CTRS_NRFIELDS (1 + STRUCT_ARRAY_SIZE(struct perfctr_sum_ctrs, pmc)) -+ -+static const struct perfctr_field_desc perfctr_sum_ctrs_fields[] = { -+ { .offset = offsetof(struct perfctr_sum_ctrs, tsc), -+ .tag = PERFCTR_TAG_SUM_CTRS_TSC, -+ .type = PERFCTR_TYPE_UINT64 }, -+ { .offset = offsetof(struct perfctr_sum_ctrs, pmc), -+ .tag = PERFCTR_TAG_SUM_CTRS_PMC, -+ .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_sum_ctrs,pmc), -+ PERFCTR_TYPE_UINT64) }, -+}; -+ -+const struct perfctr_struct_desc perfctr_sum_ctrs_sdesc = { -+ .total_sizeof = sizeof(struct perfctr_sum_ctrs), -+ .total_nrfields = PERFCTR_SUM_CTRS_NRFIELDS, -+ .nrfields = ARRAY_SIZE(perfctr_sum_ctrs_fields), -+ .fields = perfctr_sum_ctrs_fields, -+}; -+ -+static const struct perfctr_field_desc perfctr_cpu_control_fields[] = { -+ { .offset = offsetof(struct perfctr_cpu_control, tsc_on), -+ .tag = PERFCTR_TAG_CPU_CONTROL_TSC_ON, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, nractrs), -+ .tag = PERFCTR_TAG_CPU_CONTROL_NRACTRS, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, nrictrs), -+ .tag = PERFCTR_TAG_CPU_CONTROL_NRICTRS, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, pmc_map), -+ .tag = PERFCTR_TAG_CPU_CONTROL_PMC_MAP, -+ .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,pmc_map), -+ PERFCTR_TYPE_BYTES4) }, -+ { .offset = offsetof(struct perfctr_cpu_control, evntsel), -+ .tag = PERFCTR_TAG_CPU_CONTROL_EVNTSEL, -+ .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,evntsel), -+ PERFCTR_TYPE_BYTES4) }, -+ { .offset = offsetof(struct perfctr_cpu_control, ireset), -+ .tag = PERFCTR_TAG_CPU_CONTROL_IRESET, -+ .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,ireset), -+ PERFCTR_TYPE_BYTES4) }, -+#ifdef __powerpc__ -+ { .offset = offsetof(struct perfctr_cpu_control, ppc.mmcr0), -+ .tag = PERFCTR_TAG_CPU_CONTROL_PPC_MMCR0, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, ppc.mmcr2), -+ .tag = PERFCTR_TAG_CPU_CONTROL_PPC_MMCR2, -+ .type = PERFCTR_TYPE_BYTES4 }, -+#endif /* __powerpc__ */ -+ { .offset = offsetof(struct perfctr_cpu_control, _reserved1), -+ .tag = PERFCTR_TAG_CPU_CONTROL_RSVD1, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, _reserved2), -+ .tag = PERFCTR_TAG_CPU_CONTROL_RSVD2, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, _reserved3), -+ .tag = PERFCTR_TAG_CPU_CONTROL_RSVD3, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_cpu_control, _reserved4), -+ .tag = PERFCTR_TAG_CPU_CONTROL_RSVD4, -+ .type = PERFCTR_TYPE_BYTES4 }, -+}; -+ -+const struct perfctr_struct_desc perfctr_cpu_control_sdesc = { -+ .total_sizeof = sizeof(struct perfctr_cpu_control), -+ .total_nrfields = PERFCTR_CPU_CONTROL_NRFIELDS, -+ .nrfields = ARRAY_SIZE(perfctr_cpu_control_fields), -+ .fields = perfctr_cpu_control_fields, -+}; -+ -+#endif /* __powerpc__ */ -+ -+#define PERFCTR_TAG_INFO_ABI_VERSION 0 -+#define PERFCTR_TAG_INFO_DRIVER_VERSION 1 -+#define PERFCTR_TAG_INFO_CPU_TYPE 2 -+#define PERFCTR_TAG_INFO_CPU_FEATURES 3 -+#define PERFCTR_TAG_INFO_CPU_KHZ 4 -+#define PERFCTR_TAG_INFO_TSC_TO_CPU_MULT 5 -+#define PERFCTR_TAG_INFO_RSVD2 6 -+#define PERFCTR_TAG_INFO_RSVD3 7 -+#define PERFCTR_TAG_INFO_RSVD4 8 -+#define PERFCTR_INFO_NRFIELDS (8 + sizeof(((struct perfctr_info*)0)->driver_version)/sizeof(int)) -+ -+#define VPERFCTR_TAG_CONTROL_SIGNO 9 -+#define VPERFCTR_TAG_CONTROL_PRESERVE 10 -+#define VPERFCTR_TAG_CONTROL_RSVD1 11 -+#define VPERFCTR_TAG_CONTROL_RSVD2 12 -+#define VPERFCTR_TAG_CONTROL_RSVD3 13 -+#define VPERFCTR_TAG_CONTROL_RSVD4 14 -+#define VPERFCTR_CONTROL_NRFIELDS (6 + PERFCTR_CPU_CONTROL_NRFIELDS) -+ -+#define GPERFCTR_TAG_CPU_CONTROL_CPU 15 -+#define GPERFCTR_TAG_CPU_CONTROL_RSVD1 16 -+#define GPERFCTR_TAG_CPU_CONTROL_RSVD2 17 -+#define GPERFCTR_TAG_CPU_CONTROL_RSVD3 18 -+#define GPERFCTR_TAG_CPU_CONTROL_RSVD4 19 -+#define GPERFCTR_CPU_CONTROL_NRFIELDS (5 + PERFCTR_CPU_CONTROL_NRFIELDS) -+ -+#define GPERFCTR_TAG_CPU_STATE_CPU 20 -+#define GPERFCTR_TAG_CPU_STATE_RSVD1 21 -+#define GPERFCTR_TAG_CPU_STATE_RSVD2 22 -+#define GPERFCTR_TAG_CPU_STATE_RSVD3 23 -+#define GPERFCTR_TAG_CPU_STATE_RSVD4 24 -+#define GPERFCTR_CPU_STATE_ONLY_CPU_NRFIELDS 5 -+#define GPERFCTR_CPU_STATE_NRFIELDS (GPERFCTR_CPU_STATE_ONLY_CPU_NRFIELDS + PERFCTR_CPU_CONTROL_NRFIELDS + PERFCTR_SUM_CTRS_NRFIELDS) -+ -+static const struct perfctr_field_desc perfctr_info_fields[] = { -+ { .offset = offsetof(struct perfctr_info, abi_version), -+ .tag = PERFCTR_TAG_INFO_ABI_VERSION, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_info, driver_version), -+ .tag = PERFCTR_TAG_INFO_DRIVER_VERSION, -+ .type = PERFCTR_TYPE_ARRAY(sizeof(((struct perfctr_info*)0)->driver_version)/sizeof(int), PERFCTR_TYPE_BYTES4) }, -+ { .offset = offsetof(struct perfctr_info, cpu_type), -+ .tag = PERFCTR_TAG_INFO_CPU_TYPE, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_info, cpu_features), -+ .tag = PERFCTR_TAG_INFO_CPU_FEATURES, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_info, cpu_khz), -+ .tag = PERFCTR_TAG_INFO_CPU_KHZ, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_info, tsc_to_cpu_mult), -+ .tag = PERFCTR_TAG_INFO_TSC_TO_CPU_MULT, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_info, _reserved2), -+ .tag = PERFCTR_TAG_INFO_RSVD2, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_info, _reserved3), -+ .tag = PERFCTR_TAG_INFO_RSVD3, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct perfctr_info, _reserved4), -+ .tag = PERFCTR_TAG_INFO_RSVD4, -+ .type = PERFCTR_TYPE_BYTES4 }, -+}; -+ -+const struct perfctr_struct_desc perfctr_info_sdesc = { -+ .total_sizeof = sizeof(struct perfctr_info), -+ .total_nrfields = PERFCTR_INFO_NRFIELDS, -+ .nrfields = ARRAY_SIZE(perfctr_info_fields), -+ .fields = perfctr_info_fields, -+}; -+ -+#if defined(CONFIG_PERFCTR_VIRTUAL) || !defined(__KERNEL__) -+static const struct perfctr_field_desc vperfctr_control_fields[] = { -+ { .offset = offsetof(struct vperfctr_control, si_signo), -+ .tag = VPERFCTR_TAG_CONTROL_SIGNO, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct vperfctr_control, preserve), -+ .tag = VPERFCTR_TAG_CONTROL_PRESERVE, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct vperfctr_control, _reserved1), -+ .tag = VPERFCTR_TAG_CONTROL_RSVD1, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct vperfctr_control, _reserved2), -+ .tag = VPERFCTR_TAG_CONTROL_RSVD2, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct vperfctr_control, _reserved3), -+ .tag = VPERFCTR_TAG_CONTROL_RSVD3, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct vperfctr_control, _reserved4), -+ .tag = VPERFCTR_TAG_CONTROL_RSVD4, -+ .type = PERFCTR_TYPE_BYTES4 }, -+}; -+ -+static const struct perfctr_sub_struct_desc vperfctr_control_subs[] = { -+ { .offset = offsetof(struct vperfctr_control, cpu_control), -+ .sdesc = &perfctr_cpu_control_sdesc }, -+}; -+ -+const struct perfctr_struct_desc vperfctr_control_sdesc = { -+ .total_sizeof = sizeof(struct vperfctr_control), -+ .total_nrfields = VPERFCTR_CONTROL_NRFIELDS, -+ .nrfields = ARRAY_SIZE(vperfctr_control_fields), -+ .fields = vperfctr_control_fields, -+ .nrsubs = ARRAY_SIZE(vperfctr_control_subs), -+ .subs = vperfctr_control_subs, -+}; -+#endif /* CONFIG_PERFCTR_VIRTUAL || !__KERNEL__ */ -+ -+#if defined(CONFIG_PERFCTR_GLOBAL) || !defined(__KERNEL__) -+static const struct perfctr_field_desc gperfctr_cpu_control_fields[] = { -+ { .offset = offsetof(struct gperfctr_cpu_control, cpu), -+ .tag = GPERFCTR_TAG_CPU_CONTROL_CPU, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct gperfctr_cpu_control, _reserved1), -+ .tag = GPERFCTR_TAG_CPU_CONTROL_RSVD1, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct gperfctr_cpu_control, _reserved2), -+ .tag = GPERFCTR_TAG_CPU_CONTROL_RSVD2, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct gperfctr_cpu_control, _reserved3), -+ .tag = GPERFCTR_TAG_CPU_CONTROL_RSVD3, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct gperfctr_cpu_control, _reserved4), -+ .tag = GPERFCTR_TAG_CPU_CONTROL_RSVD4, -+ .type = PERFCTR_TYPE_BYTES4 }, -+}; -+ -+static const struct perfctr_sub_struct_desc gperfctr_cpu_control_subs[] = { -+ { .offset = offsetof(struct gperfctr_cpu_control, cpu_control), -+ .sdesc = &perfctr_cpu_control_sdesc }, -+}; -+ -+const struct perfctr_struct_desc gperfctr_cpu_control_sdesc = { -+ .total_sizeof = sizeof(struct gperfctr_cpu_control), -+ .total_nrfields = GPERFCTR_CPU_CONTROL_NRFIELDS, -+ .nrfields = ARRAY_SIZE(gperfctr_cpu_control_fields), -+ .fields = gperfctr_cpu_control_fields, -+ .nrsubs = ARRAY_SIZE(gperfctr_cpu_control_subs), -+ .subs = gperfctr_cpu_control_subs, -+}; -+ -+static const struct perfctr_field_desc gperfctr_cpu_state_fields[] = { -+ { .offset = offsetof(struct gperfctr_cpu_state, cpu), -+ .tag = GPERFCTR_TAG_CPU_STATE_CPU, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct gperfctr_cpu_state, _reserved1), -+ .tag = GPERFCTR_TAG_CPU_STATE_RSVD1, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct gperfctr_cpu_state, _reserved2), -+ .tag = GPERFCTR_TAG_CPU_STATE_RSVD2, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct gperfctr_cpu_state, _reserved3), -+ .tag = GPERFCTR_TAG_CPU_STATE_RSVD3, -+ .type = PERFCTR_TYPE_BYTES4 }, -+ { .offset = offsetof(struct gperfctr_cpu_state, _reserved4), -+ .tag = GPERFCTR_TAG_CPU_STATE_RSVD4, -+ .type = PERFCTR_TYPE_BYTES4 }, -+}; -+ -+static const struct perfctr_sub_struct_desc gperfctr_cpu_state_subs[] = { -+ { .offset = offsetof(struct gperfctr_cpu_state, cpu_control), -+ .sdesc = &perfctr_cpu_control_sdesc }, -+ { .offset = offsetof(struct gperfctr_cpu_state, sum), -+ .sdesc = &perfctr_sum_ctrs_sdesc }, -+}; -+ -+const struct perfctr_struct_desc gperfctr_cpu_state_only_cpu_sdesc = { -+ .total_sizeof = sizeof(struct gperfctr_cpu_state), -+ .total_nrfields = GPERFCTR_CPU_STATE_ONLY_CPU_NRFIELDS, -+ .nrfields = ARRAY_SIZE(gperfctr_cpu_state_fields), -+ .fields = gperfctr_cpu_state_fields, -+}; -+ -+const struct perfctr_struct_desc gperfctr_cpu_state_sdesc = { -+ .total_sizeof = sizeof(struct gperfctr_cpu_state), -+ .total_nrfields = GPERFCTR_CPU_STATE_NRFIELDS, -+ .nrfields = ARRAY_SIZE(gperfctr_cpu_state_fields), -+ .fields = gperfctr_cpu_state_fields, -+ .nrsubs = ARRAY_SIZE(gperfctr_cpu_state_subs), -+ .subs = gperfctr_cpu_state_subs, -+}; -+#endif /* CONFIG_PERFCTR_GLOBAL || !__KERNEL__ */ -+ -+#ifdef __KERNEL__ -+ -+int perfctr_copy_from_user(void *struct_address, -+ struct perfctr_struct_buf *argp, -+ const struct perfctr_struct_desc *sdesc) -+{ -+ struct perfctr_marshal_stream stream; -+ -+ if( get_user(stream.size, &argp->rdsize) ) -+ return -EFAULT; -+ stream.buffer = argp->buffer; -+ stream.pos = 0; -+ stream.error = 0; -+ memset(struct_address, 0, sdesc->total_sizeof); -+ return perfctr_decode_struct(struct_address, sdesc, &stream); -+} -+ -+int perfctr_copy_to_user(struct perfctr_struct_buf *argp, -+ void *struct_address, -+ const struct perfctr_struct_desc *sdesc) -+{ -+ struct perfctr_marshal_stream stream; -+ -+ if( get_user(stream.size, &argp->wrsize) ) -+ return -EFAULT; -+ stream.buffer = argp->buffer; -+ stream.pos = 0; -+ stream.error = 0; -+ perfctr_encode_struct(struct_address, sdesc, &stream); -+ if( stream.error ) -+ return stream.error; -+ if( put_user(stream.pos, &argp->rdsize) ) -+ return -EFAULT; -+ return 0; -+} -+ -+#else /* !__KERNEL__ */ -+ -+#define sdesc_bufsize(sdesc) ((sdesc)->total_nrfields + (sdesc)->total_sizeof/sizeof(int)) -+ -+static int common_ioctl_w(const void *arg, -+ const struct perfctr_struct_desc *sdesc, -+ struct perfctr_struct_buf *buf, -+ unsigned int bufsize) -+{ -+ struct perfctr_marshal_stream stream; -+ -+ stream.size = bufsize; -+ stream.buffer = buf->buffer; -+ stream.pos = 0; -+ stream.error = 0; -+ perfctr_encode_struct(arg, sdesc, &stream); -+ if( stream.error ) { -+ errno = -stream.error; -+ return -1; -+ } -+ buf->rdsize = stream.pos; -+ return 0; -+} -+ -+int perfctr_ioctl_w(int fd, unsigned int cmd, const void *arg, -+ const struct perfctr_struct_desc *sdesc) -+{ -+ unsigned int bufsize = sdesc_bufsize(sdesc); -+ union { -+ struct perfctr_struct_buf buf; -+ struct { -+ unsigned int rdsize; -+ unsigned int wrsize; -+ unsigned int buffer[bufsize]; -+ } buf_bufsize; -+ } u; -+ int err; -+ -+ err = common_ioctl_w(arg, sdesc, &u.buf, bufsize); -+ if( err < 0 ) -+ return err; -+ u.buf.wrsize = 0; -+ return ioctl(fd, cmd, &u.buf); -+} -+ -+static int common_ioctl_r(int fd, unsigned int cmd, void *res, -+ const struct perfctr_struct_desc *sdesc, -+ struct perfctr_struct_buf *buf) -+{ -+ struct perfctr_marshal_stream stream; -+ int err; -+ -+ if( ioctl(fd, cmd, buf) < 0 ) -+ return -1; -+ stream.size = buf->rdsize; -+ stream.buffer = buf->buffer; -+ stream.pos = 0; -+ stream.error = 0; -+ memset(res, 0, sdesc->total_sizeof); -+ err = perfctr_decode_struct(res, sdesc, &stream); -+ if( err < 0 ) { -+ errno = -err; -+ return -1; -+ } -+ return 0; -+} -+ -+int perfctr_ioctl_r(int fd, unsigned int cmd, void *res, -+ const struct perfctr_struct_desc *sdesc) -+{ -+ unsigned int bufsize = sdesc_bufsize(sdesc); -+ union { -+ struct perfctr_struct_buf buf; -+ struct { -+ unsigned int rdsize; -+ unsigned int wrsize; -+ unsigned int buffer[bufsize]; -+ } buf_bufsize; -+ } u; -+ -+ u.buf.rdsize = 0; -+ u.buf.wrsize = bufsize; -+ return common_ioctl_r(fd, cmd, res, sdesc, &u.buf); -+} -+ -+int perfctr_ioctl_wr(int fd, unsigned int cmd, void *argres, -+ const struct perfctr_struct_desc *arg_sdesc, -+ const struct perfctr_struct_desc *res_sdesc) -+{ -+ unsigned int arg_bufsize = sdesc_bufsize(arg_sdesc); -+ unsigned int res_bufsize = sdesc_bufsize(res_sdesc); -+ unsigned int bufsize = arg_bufsize > res_bufsize ? arg_bufsize : res_bufsize; -+ union { -+ struct perfctr_struct_buf buf; -+ struct { -+ unsigned int rdsize; -+ unsigned int wrsize; -+ unsigned int buffer[bufsize]; -+ } buf_bufsize; -+ } u; -+ int err; -+ -+ err = common_ioctl_w(argres, arg_sdesc, &u.buf, arg_bufsize); -+ if( err < 0 ) -+ return err; -+ u.buf.wrsize = res_bufsize; -+ return common_ioctl_r(fd, cmd, argres, res_sdesc, &u.buf); -+} -+ -+#endif /* !__KERNEL__ */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_compat.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/ppc_compat.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_compat.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,62 @@ -+/* $Id: ppc_compat.h,v 1.1.2.1 2004/06/21 22:32:14 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * PPC32-specific compatibility definitions for 2.4/2.6 kernels. -+ * -+ * Copyright (C) 2004 Mikael Pettersson -+ */ -+ -+#define SPRN_MMCR0 0x3B8 /* 604 and up */ -+#define SPRN_PMC1 0x3B9 /* 604 and up */ -+#define SPRN_PMC2 0x3BA /* 604 and up */ -+#define SPRN_SIA 0x3BB /* 604 and up */ -+#define SPRN_MMCR1 0x3BC /* 604e and up */ -+#define SPRN_PMC3 0x3BD /* 604e and up */ -+#define SPRN_PMC4 0x3BE /* 604e and up */ -+#define SPRN_MMCR2 0x3B0 /* 7400 and up */ -+#define SPRN_BAMR 0x3B7 /* 7400 and up */ -+#define SPRN_PMC5 0x3B1 /* 7450 and up */ -+#define SPRN_PMC6 0x3B2 /* 7450 and up */ -+ -+/* MMCR0 layout (74xx terminology) */ -+#define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ -+#define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ -+#define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ -+#define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ -+#define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ -+#define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. -+ * Cleared by hardware when a PM exception occurs. -+ * 604: PMXE is not cleared by hardware. -+ */ -+#define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. -+ * FCECE is treated as 0 if TRIGGER is 1. -+ * 74xx: FC is set when the event occurs. -+ * 604/750: ineffective when PMXE=0. -+ */ -+#define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. -+ * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. -+ */ -+#define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ -+#define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ -+#define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ -+#define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. -+ * 604/750: Overrides FCECE (DISCOUNT). -+ */ -+#define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. -+ * 74xx: cleared by hardware when the event occurs. -+ */ -+#define MMCR0_PMC1SEL 0x00001FB0 /* PMC1 event selector, 7 bits. */ -+#define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ -+ -+/* MMCR1 layout (604e-7457) */ -+#define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ -+#define MMCR1_PMC4SEL 0x07B00000 /* PMC4 event selector, 5 bits. */ -+#define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ -+#define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ -+#define MMCR1__RESERVED 0x000007FF /* should be zero */ -+ -+/* MMCR2 layout (7400-7457) */ -+#define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ -+#define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ -+#define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ -+#define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ -+#define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/compat.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/compat.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/compat.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,23 @@ -+/* $Id: compat.h,v 1.42 2004/05/02 22:52:13 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * Compatibility definitions for 2.6 kernels. -+ * -+ * Copyright (C) 1999-2004 Mikael Pettersson -+ */ -+#include <linux/version.h> -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+#include "compat24.h" -+#else -+ -+#include "cpumask.h" -+ -+#define EXPORT_SYMBOL_mmu_cr4_features EXPORT_SYMBOL(mmu_cr4_features) -+#define EXPORT_SYMBOL___put_task_struct EXPORT_SYMBOL(__put_task_struct) -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) /* names changed in 2.6.4-rc2 */ -+#define sysdev_register(dev) sys_device_register((dev)) -+#define sysdev_unregister(dev) sys_device_unregister((dev)) -+#endif -+ -+#endif -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Config.in -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/Config.in 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Config.in 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,15 @@ -+# $Id: Config.in,v 1.15 2002/11/25 13:01:46 mikpe Exp $ -+# Performance-monitoring counters driver configuration -+# -+ -+mainmenu_option next_comment -+comment 'Performance-monitoring counters support' -+tristate 'Performance-monitoring counters support' CONFIG_PERFCTR -+if [ "$CONFIG_PERFCTR" != "n" ]; then -+ define_bool CONFIG_KPERFCTR y -+ bool ' Additional internal consistency checks' CONFIG_PERFCTR_DEBUG -+ bool ' Init-time hardware tests' CONFIG_PERFCTR_INIT_TESTS -+ bool ' Virtual performance counters support' CONFIG_PERFCTR_VIRTUAL $CONFIG_PERFCTR -+ bool ' Global performance counters support' CONFIG_PERFCTR_GLOBAL $CONFIG_PERFCTR -+fi -+endmenu -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/global.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/global.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/global.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,244 @@ -+/* $Id: global.c,v 1.38 2004/01/25 14:45:35 mikpe Exp $ -+ * Global-mode performance-monitoring counters via /dev/perfctr. -+ * -+ * Copyright (C) 2000-2003 Mikael Pettersson -+ * -+ * XXX: Doesn't do any authentication yet. Should we limit control -+ * to root, or base it on having write access to /dev/perfctr? -+ */ -+#include <linux/config.h> -+#define __NO_VERSION__ -+#include <linux/module.h> -+#include <linux/sched.h> -+#include <linux/init.h> -+#include <linux/fs.h> -+#include <linux/perfctr.h> -+ -+#include <asm/uaccess.h> -+ -+#include "compat.h" -+#include "global.h" -+#include "marshal.h" -+ -+static const char this_service[] = __FILE__; -+static int hardware_is_ours = 0; -+static struct timer_list sampling_timer; -+static DECLARE_MUTEX(control_mutex); -+static unsigned int nr_active_cpus = 0; -+ -+struct gperfctr { -+ struct perfctr_cpu_state cpu_state; -+ spinlock_t lock; -+} ____cacheline_aligned; -+ -+static struct gperfctr per_cpu_gperfctr[NR_CPUS] __cacheline_aligned; -+ -+static int reserve_hardware(void) -+{ -+ const char *other; -+ -+ if( hardware_is_ours ) -+ return 0; -+ other = perfctr_cpu_reserve(this_service); -+ if( other ) { -+ printk(KERN_ERR __FILE__ ":%s: failed because hardware is taken by '%s'\n", -+ __FUNCTION__, other); -+ return -EBUSY; -+ } -+ hardware_is_ours = 1; -+ __module_get(THIS_MODULE); -+ return 0; -+} -+ -+static void release_hardware(void) -+{ -+ int i; -+ -+ nr_active_cpus = 0; -+ if( hardware_is_ours ) { -+ hardware_is_ours = 0; -+ del_timer(&sampling_timer); -+ sampling_timer.data = 0; -+ perfctr_cpu_release(this_service); -+ module_put(THIS_MODULE); -+ for(i = 0; i < NR_CPUS; ++i) -+ per_cpu_gperfctr[i].cpu_state.cstatus = 0; -+ } -+} -+ -+static void sample_this_cpu(void *unused) -+{ -+ /* PREEMPT note: when called via smp_call_function(), -+ this is in IRQ context with preemption disabled. */ -+ struct gperfctr *perfctr; -+ -+ perfctr = &per_cpu_gperfctr[smp_processor_id()]; -+ if( !perfctr_cstatus_enabled(perfctr->cpu_state.cstatus) ) -+ return; -+ spin_lock(&perfctr->lock); -+ perfctr_cpu_sample(&perfctr->cpu_state); -+ spin_unlock(&perfctr->lock); -+} -+ -+static void sample_all_cpus(void) -+{ -+ on_each_cpu(sample_this_cpu, NULL, 1, 1); -+} -+ -+static void sampling_timer_function(unsigned long interval) -+{ -+ sample_all_cpus(); -+ sampling_timer.expires = jiffies + interval; -+ add_timer(&sampling_timer); -+} -+ -+static unsigned long usectojiffies(unsigned long usec) -+{ -+ usec += 1000000 / HZ - 1; -+ usec /= 1000000 / HZ; -+ return usec; -+} -+ -+static void start_sampling_timer(unsigned long interval_usec) -+{ -+ if( interval_usec > 0 ) { -+ unsigned long interval = usectojiffies(interval_usec); -+ init_timer(&sampling_timer); -+ sampling_timer.function = sampling_timer_function; -+ sampling_timer.data = interval; -+ sampling_timer.expires = jiffies + interval; -+ add_timer(&sampling_timer); -+ } -+} -+ -+static void start_this_cpu(void *unused) -+{ -+ /* PREEMPT note: when called via smp_call_function(), -+ this is in IRQ context with preemption disabled. */ -+ struct gperfctr *perfctr; -+ -+ perfctr = &per_cpu_gperfctr[smp_processor_id()]; -+ if( perfctr_cstatus_enabled(perfctr->cpu_state.cstatus) ) -+ perfctr_cpu_resume(&perfctr->cpu_state); -+} -+ -+static void start_all_cpus(void) -+{ -+ on_each_cpu(start_this_cpu, NULL, 1, 1); -+} -+ -+static int gperfctr_control(struct perfctr_struct_buf *argp) -+{ -+ int ret; -+ struct gperfctr *perfctr; -+ struct gperfctr_cpu_control cpu_control; -+ -+ ret = perfctr_copy_from_user(&cpu_control, argp, &gperfctr_cpu_control_sdesc); -+ if( ret ) -+ return ret; -+ if( cpu_control.cpu >= NR_CPUS || -+ !cpu_online(cpu_control.cpu) || -+ perfctr_cpu_is_forbidden(cpu_control.cpu) ) -+ return -EINVAL; -+ /* we don't permit i-mode counters */ -+ if( cpu_control.cpu_control.nrictrs != 0 ) -+ return -EPERM; -+ down(&control_mutex); -+ ret = -EBUSY; -+ if( hardware_is_ours ) -+ goto out_up; /* you have to stop them first */ -+ perfctr = &per_cpu_gperfctr[cpu_control.cpu]; -+ spin_lock(&perfctr->lock); -+ perfctr->cpu_state.tsc_start = 0; -+ perfctr->cpu_state.tsc_sum = 0; -+ memset(&perfctr->cpu_state.pmc, 0, sizeof perfctr->cpu_state.pmc); -+ perfctr->cpu_state.control = cpu_control.cpu_control; -+ ret = perfctr_cpu_update_control(&perfctr->cpu_state, 1); -+ spin_unlock(&perfctr->lock); -+ if( ret < 0 ) -+ goto out_up; -+ if( perfctr_cstatus_enabled(perfctr->cpu_state.cstatus) ) -+ ++nr_active_cpus; -+ ret = nr_active_cpus; -+ out_up: -+ up(&control_mutex); -+ return ret; -+} -+ -+static int gperfctr_start(unsigned int interval_usec) -+{ -+ int ret; -+ -+ if( interval_usec < 10000 ) -+ return -EINVAL; -+ down(&control_mutex); -+ ret = nr_active_cpus; -+ if( ret > 0 ) { -+ if( reserve_hardware() < 0 ) { -+ ret = -EBUSY; -+ } else { -+ start_all_cpus(); -+ start_sampling_timer(interval_usec); -+ } -+ } -+ up(&control_mutex); -+ return ret; -+} -+ -+static int gperfctr_stop(void) -+{ -+ down(&control_mutex); -+ release_hardware(); -+ up(&control_mutex); -+ return 0; -+} -+ -+static int gperfctr_read(struct perfctr_struct_buf *argp) -+{ -+ struct gperfctr *perfctr; -+ struct gperfctr_cpu_state state; -+ int err; -+ -+ // XXX: sample_all_cpus() ??? -+ err = perfctr_copy_from_user(&state, argp, &gperfctr_cpu_state_only_cpu_sdesc); -+ if( err ) -+ return err; -+ if( state.cpu >= NR_CPUS || !cpu_online(state.cpu) ) -+ return -EINVAL; -+ perfctr = &per_cpu_gperfctr[state.cpu]; -+ spin_lock(&perfctr->lock); -+ state.cpu_control = perfctr->cpu_state.control; -+ //state.sum = perfctr->cpu_state.sum; -+ { -+ int j; -+ state.sum.tsc = perfctr->cpu_state.tsc_sum; -+ for(j = 0; j < ARRAY_SIZE(state.sum.pmc); ++j) -+ state.sum.pmc[j] = perfctr->cpu_state.pmc[j].sum; -+ } -+ spin_unlock(&perfctr->lock); -+ return perfctr_copy_to_user(argp, &state, &gperfctr_cpu_state_sdesc); -+} -+ -+int gperfctr_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg) -+{ -+ switch( cmd ) { -+ case GPERFCTR_CONTROL: -+ return gperfctr_control((struct perfctr_struct_buf*)arg); -+ case GPERFCTR_READ: -+ return gperfctr_read((struct perfctr_struct_buf*)arg); -+ case GPERFCTR_STOP: -+ return gperfctr_stop(); -+ case GPERFCTR_START: -+ return gperfctr_start(arg); -+ } -+ return -EINVAL; -+} -+ -+void __init gperfctr_init(void) -+{ -+ int i; -+ -+ for(i = 0; i < NR_CPUS; ++i) -+ per_cpu_gperfctr[i].lock = SPIN_LOCK_UNLOCKED; -+} -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_tests.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_64_tests.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_tests.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,14 @@ -+/* $Id: x86_64_tests.h,v 1.1 2003/05/14 21:51:57 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * Optional x86_64-specific init-time tests. -+ * -+ * Copyright (C) 2003 Mikael Pettersson -+ */ -+ -+#ifdef CONFIG_PERFCTR_INIT_TESTS -+extern void perfctr_k8_init_tests(void); -+extern void perfctr_generic_init_tests(void); -+#else -+#define perfctr_k8_init_tests() -+#define perfctr_generic_init_tests() -+#endif -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Kconfig -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/Kconfig 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Kconfig 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,79 @@ -+# $Id: Kconfig,v 1.7 2003/05/14 21:51:32 mikpe Exp $ -+# Performance-monitoring counters driver configuration -+# -+ -+menu "Performance-monitoring counters support" -+ -+config PERFCTR -+ tristate "Performance monitoring counters support" -+ help -+ This driver provides access to the performance-monitoring counter -+ registers available in some (but not all) modern processors. -+ These special-purpose registers can be programmed to count low-level -+ performance-related events which occur during program execution, -+ such as cache misses, pipeline stalls, etc. -+ -+ You can safely say Y here, even if you intend to run the kernel -+ on a processor without performance-monitoring counters. -+ -+ You can also say M here to compile the driver as a module; the -+ module will be called `perfctr'. -+ -+config KPERFCTR -+ bool -+ depends on PERFCTR -+ default y -+ -+config PERFCTR_DEBUG -+ bool "Additional internal consistency checks" -+ depends on PERFCTR -+ help -+ This option enables additional internal consistency checking in -+ the perfctr driver. The scope of these checks is unspecified and -+ may vary between different versions of the driver. -+ -+ Enabling this option will reduce performance, so say N unless you -+ are debugging the driver. -+ -+config PERFCTR_INIT_TESTS -+ bool "Init-time hardware tests" -+ depends on PERFCTR -+ help -+ This option makes the driver perform additional hardware tests -+ during initialisation, and log their results in the kernel's -+ message buffer. For most supported processors, these tests simply -+ measure the runtime overheads of performance counter operations. -+ -+ If you have a less well-known processor (one not listed in the -+ etc/costs/ directory in the user-space package), you should enable -+ this option and email the results to the perfctr developers. -+ -+ If unsure, say N. -+ -+config PERFCTR_VIRTUAL -+ bool "Virtual performance counters support" -+ depends on PERFCTR -+ help -+ The processor's performance-monitoring counters are special-purpose -+ global registers. This option adds support for virtual per-process -+ performance-monitoring counters which only run when the process -+ to which they belong is executing. This improves the accuracy of -+ performance measurements by reducing "noise" from other processes. -+ -+ Say Y. -+ -+config PERFCTR_GLOBAL -+ bool "Global performance counters support" -+ depends on PERFCTR -+ help -+ This option adds driver support for global-mode (system-wide) -+ performance-monitoring counters. In this mode, the driver allows -+ each performance-monitoring counter on each processor to be -+ controlled and read. The driver provides a sampling timer to -+ maintain 64-bit accumulated event counts. -+ -+ Global-mode performance counters cannot be used if some process -+ is currently using virtual-mode performance counters, and vice versa. -+ -+ Say Y. -+endmenu -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/marshal.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/marshal.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/marshal.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,104 @@ -+/* $Id: marshal.h,v 1.1 2003/08/19 13:37:07 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * Structure marshalling support. -+ * -+ * Copyright (C) 2003 Mikael Pettersson -+ */ -+ -+/* -+ * Each encoded datum starts with a 32-bit header word, containing -+ * the datum's type (1 bit: UINT32 or UINT64), the target's field -+ * tag (16 bits), and the target field's array index (15 bits). -+ * -+ * After the header follows the datum's value, in one (for UINT32) -+ * or two (for UINT64) words. Multi-word values are emitted in -+ * native word order. -+ * -+ * To encode a struct, encode each field with a non-zero value, -+ * and place the encodings in sequence. The field order is arbitrary. -+ * -+ * To decode an encoded struct, first memset() the target struct -+ * to zero. Then decode each encoded field in the sequence and -+ * update the corresponding field in the target struct. -+ */ -+#define PERFCTR_HEADER(TYPE,TAG,ITEMNR) (((TAG)<<16)|((ITEMNR)<<1)|(TYPE)) -+#define PERFCTR_HEADER_TYPE(H) ((H) & 0x1) -+#define PERFCTR_HEADER_ITEMNR(H) (((H) >> 1) & 0x7FFF) -+#define PERFCTR_HEADER_TAG(H) ((H) >> 16) -+ -+#define PERFCTR_HEADER_UINT32 0 -+#define PERFCTR_HEADER_UINT64 1 -+ -+/* -+ * A field descriptor describes a struct field to the -+ * encoding and decoding procedures. -+ * -+ * To keep the descriptors small, field tags and array sizes -+ * are currently restricted to 8 and 7 bits, respectively. -+ * This does not change the encoded format. -+ */ -+struct perfctr_field_desc { -+ unsigned short offset; /* offsetof() for this field */ -+ unsigned char tag; /* identifying tag in encoded format */ -+ unsigned char type; /* base type (1 bit), array size - 1 (7 bits) */ -+}; -+ -+#define PERFCTR_TYPE_ARRAY(N,T) ((((N) - 1) << 1) | (T)) -+#define PERFCTR_TYPE_BASE(T) ((T) & 0x1) -+#define PERFCTR_TYPE_NRITEMS(T) (((T) >> 1) + 1) -+ -+#define PERFCTR_TYPE_BYTES4 0 /* uint32 or char[4] */ -+#define PERFCTR_TYPE_UINT64 1 /* long long */ -+ -+struct perfctr_struct_desc { -+ unsigned short total_sizeof; /* for buffer allocation and decode memset() */ -+ unsigned short total_nrfields; /* for buffer allocation */ -+ unsigned short nrfields; -+ unsigned short nrsubs; -+ /* Note: the fields must be in ascending tag order */ -+ const struct perfctr_field_desc *fields; -+ const struct perfctr_sub_struct_desc { -+ unsigned short offset; -+ const struct perfctr_struct_desc *sdesc; -+ } *subs; -+}; -+ -+struct perfctr_marshal_stream { -+ unsigned int size; -+ unsigned int *buffer; -+ unsigned int pos; -+ unsigned int error; -+}; -+ -+extern void perfctr_encode_struct(const void *address, -+ const struct perfctr_struct_desc *sdesc, -+ struct perfctr_marshal_stream *stream); -+ -+extern int perfctr_decode_struct(void *address, -+ const struct perfctr_struct_desc *sdesc, -+ struct perfctr_marshal_stream *stream); -+ -+extern const struct perfctr_struct_desc perfctr_sum_ctrs_sdesc; -+extern const struct perfctr_struct_desc perfctr_cpu_control_sdesc; -+extern const struct perfctr_struct_desc perfctr_info_sdesc; -+extern const struct perfctr_struct_desc vperfctr_control_sdesc; -+extern const struct perfctr_struct_desc gperfctr_cpu_control_sdesc; -+extern const struct perfctr_struct_desc gperfctr_cpu_state_only_cpu_sdesc; -+extern const struct perfctr_struct_desc gperfctr_cpu_state_sdesc; -+ -+#ifdef __KERNEL__ -+extern int perfctr_copy_to_user(struct perfctr_struct_buf *argp, -+ void *struct_address, -+ const struct perfctr_struct_desc *sdesc); -+extern int perfctr_copy_from_user(void *struct_address, -+ struct perfctr_struct_buf *argp, -+ const struct perfctr_struct_desc *sdesc); -+#else -+extern int perfctr_ioctl_w(int fd, unsigned int cmd, const void *arg, -+ const struct perfctr_struct_desc *sdesc); -+extern int perfctr_ioctl_r(int fd, unsigned int cmd, void *res, -+ const struct perfctr_struct_desc *sdesc); -+extern int perfctr_ioctl_wr(int fd, unsigned int cmd, void *argres, -+ const struct perfctr_struct_desc *arg_sdesc, -+ const struct perfctr_struct_desc *res_sdesc); -+#endif /* __KERNEL__ */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/virtual_stub.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/virtual_stub.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/virtual_stub.c 2004-11-18 23:52:29.000000000 -0500 -@@ -0,0 +1,67 @@ -+/* $Id: virtual_stub.c,v 1.26 2003/10/04 22:53:42 mikpe Exp $ -+ * Kernel stub used to support virtual perfctrs when the -+ * perfctr driver is built as a module. -+ * -+ * Copyright (C) 2000-2003 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/kernel.h> -+#include <linux/sched.h> -+#include <linux/perfctr.h> -+#include "compat.h" -+ -+static void bug_void_perfctr(struct vperfctr *perfctr) -+{ -+ current->thread.perfctr = NULL; -+ BUG(); -+} -+ -+#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED -+static void bug_set_cpus_allowed(struct task_struct *owner, struct vperfctr *perfctr, cpumask_t new_mask) -+{ -+ owner->thread.perfctr = NULL; -+ BUG(); -+} -+#endif -+ -+struct vperfctr_stub vperfctr_stub = { -+ .exit = bug_void_perfctr, -+ .suspend = bug_void_perfctr, -+ .resume = bug_void_perfctr, -+ .sample = bug_void_perfctr, -+#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED -+ .set_cpus_allowed = bug_set_cpus_allowed, -+#endif -+}; -+ -+/* -+ * exit_thread() calls __vperfctr_exit() via vperfctr_stub.exit(). -+ * If the process' reference was the last reference to this -+ * vperfctr object, and this was the last live vperfctr object, -+ * then the perfctr module's use count will drop to zero. -+ * This is Ok, except for the fact that code is still running -+ * in the module (pending returns back to exit_thread()). This -+ * could race with rmmod in a preemptive UP kernel, leading to -+ * code running in freed memory. The race also exists in SMP -+ * kernels, but the time window is extremely small. -+ * -+ * Since exit() isn't performance-critical, we wrap the call to -+ * vperfctr_stub.exit() with code to increment the module's use -+ * count before the call, and decrement it again afterwards. Thus, -+ * the final drop to zero occurs here and not in the module itself. -+ * (All other code paths that drop the use count do so via a file -+ * object, and VFS in 2.4+ kernels also refcount the module.) -+ */ -+void _vperfctr_exit(struct vperfctr *perfctr) -+{ -+ __module_get(vperfctr_stub.owner); -+ vperfctr_stub.exit(perfctr); -+ module_put(vperfctr_stub.owner); -+} -+ -+EXPORT_SYMBOL(vperfctr_stub); -+ -+#include <linux/mm.h> /* for 2.4.15 and up, except 2.4.20-8-redhat */ -+#include <linux/ptrace.h> /* for 2.5.32 and up, and 2.4.20-8-redhat */ -+EXPORT_SYMBOL(ptrace_check_attach); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_tests.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/ppc_tests.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_tests.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,292 @@ -+/* $Id: ppc_tests.c,v 1.1.2.3 2004/07/27 16:42:03 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * Optional PPC32-specific init-time tests. -+ * -+ * Copyright (C) 2004 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#define __NO_VERSION__ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/sched.h> -+#include <linux/fs.h> -+#include <linux/perfctr.h> -+#include <asm/processor.h> -+#include <asm/time.h> /* for tb_ticks_per_jiffy */ -+#include "compat.h" -+#include "ppc_compat.h" -+#include "ppc_tests.h" -+ -+#define NITER 256 -+#define X2(S) S"; "S -+#define X8(S) X2(X2(X2(S))) -+ -+static void __init do_read_tbl(unsigned int unused) -+{ -+ unsigned int i, dummy; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("mftbl %0") : "=r"(dummy)); -+} -+ -+static void __init do_read_pmc1(unsigned int unused) -+{ -+ unsigned int i, dummy; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("mfspr %0," __stringify(SPRN_PMC1)) : "=r"(dummy)); -+} -+ -+static void __init do_read_pmc2(unsigned int unused) -+{ -+ unsigned int i, dummy; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("mfspr %0," __stringify(SPRN_PMC2)) : "=r"(dummy)); -+} -+ -+static void __init do_read_pmc3(unsigned int unused) -+{ -+ unsigned int i, dummy; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("mfspr %0," __stringify(SPRN_PMC3)) : "=r"(dummy)); -+} -+ -+static void __init do_read_pmc4(unsigned int unused) -+{ -+ unsigned int i, dummy; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("mfspr %0," __stringify(SPRN_PMC4)) : "=r"(dummy)); -+} -+ -+static void __init do_read_mmcr0(unsigned int unused) -+{ -+ unsigned int i, dummy; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("mfspr %0," __stringify(SPRN_MMCR0)) : "=r"(dummy)); -+} -+ -+static void __init do_read_mmcr1(unsigned int unused) -+{ -+ unsigned int i, dummy; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("mfspr %0," __stringify(SPRN_MMCR1)) : "=r"(dummy)); -+} -+ -+static void __init do_write_pmc2(unsigned int arg) -+{ -+ unsigned int i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("mtspr " __stringify(SPRN_PMC2) ",%0") : : "r"(arg)); -+} -+ -+static void __init do_write_pmc3(unsigned int arg) -+{ -+ unsigned int i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("mtspr " __stringify(SPRN_PMC3) ",%0") : : "r"(arg)); -+} -+ -+static void __init do_write_pmc4(unsigned int arg) -+{ -+ unsigned int i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("mtspr " __stringify(SPRN_PMC4) ",%0") : : "r"(arg)); -+} -+ -+static void __init do_write_mmcr1(unsigned int arg) -+{ -+ unsigned int i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("mtspr " __stringify(SPRN_MMCR1) ",%0") : : "r"(arg)); -+} -+ -+static void __init do_write_mmcr0(unsigned int arg) -+{ -+ unsigned int i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("mtspr " __stringify(SPRN_MMCR0) ",%0") : : "r"(arg)); -+} -+ -+static void __init do_empty_loop(unsigned int unused) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__("" : : ); -+} -+ -+static unsigned __init run(void (*doit)(unsigned int), unsigned int arg) -+{ -+ unsigned int start, stop; -+ start = mfspr(SPRN_PMC1); -+ (*doit)(arg); /* should take < 2^32 cycles to complete */ -+ stop = mfspr(SPRN_PMC1); -+ return stop - start; -+} -+ -+static void __init init_tests_message(void) -+{ -+ unsigned int pvr = mfspr(SPRN_PVR); -+ printk(KERN_INFO "Please email the following PERFCTR INIT lines " -+ "to mikpe@csd.uu.se\n" -+ KERN_INFO "To remove this message, rebuild the driver " -+ "with CONFIG_PERFCTR_INIT_TESTS=n\n"); -+ printk(KERN_INFO "PERFCTR INIT: PVR 0x%08x, CPU clock %u kHz, TB clock %u kHz\n", -+ pvr, -+ perfctr_info.cpu_khz, -+ tb_ticks_per_jiffy*(HZ/10)/(1000/10)); -+} -+ -+static void __init clear(int have_mmcr1) -+{ -+ mtspr(SPRN_MMCR0, 0); -+ mtspr(SPRN_PMC1, 0); -+ mtspr(SPRN_PMC2, 0); -+ if (have_mmcr1) { -+ mtspr(SPRN_MMCR1, 0); -+ mtspr(SPRN_PMC3, 0); -+ mtspr(SPRN_PMC4, 0); -+ } -+} -+ -+static void __init check_fcece(unsigned int pmc1ce) -+{ -+ unsigned int mmcr0; -+ -+ /* -+ * This test checks if MMCR0[FC] is set after PMC1 overflows -+ * when MMCR0[FCECE] is set. -+ * 74xx documentation states this behaviour, while documentation -+ * for 604/750 processors doesn't mention this at all. -+ * -+ * Also output the value of PMC1 shortly after the overflow. -+ * This tells us if PMC1 really was frozen. On 604/750, it may not -+ * freeze since we don't enable PMIs. [No freeze confirmed on 750.] -+ * -+ * When pmc1ce == 0, MMCR0[PMC1CE] is zero. It's unclear whether -+ * this masks all PMC1 overflow events or just PMC1 PMIs. -+ * -+ * PMC1 counts processor cycles, with 100 to go before overflowing. -+ * FCECE is set. -+ * PMC1CE is clear if !pmc1ce, otherwise set. -+ */ -+ mtspr(SPRN_PMC1, 0x80000000-100); -+ mmcr0 = (1<<(31-6)) | (0x01 << 6); -+ if (pmc1ce) -+ mmcr0 |= (1<<(31-16)); -+ mtspr(SPRN_MMCR0, mmcr0); -+ do { -+ do_empty_loop(0); -+ } while (!(mfspr(SPRN_PMC1) & 0x80000000)); -+ do_empty_loop(0); -+ printk(KERN_INFO "PERFCTR INIT: %s(%u): MMCR0[FC] is %u, PMC1 is %#x\n", -+ __FUNCTION__, pmc1ce, -+ !!(mfspr(SPRN_MMCR0) & (1<<(31-0))), mfspr(SPRN_PMC1)); -+ mtspr(SPRN_MMCR0, 0); -+ mtspr(SPRN_PMC1, 0); -+} -+ -+static void __init check_trigger(unsigned int pmc1ce) -+{ -+ unsigned int mmcr0; -+ -+ /* -+ * This test checks if MMCR0[TRIGGER] is reset after PMC1 overflows. -+ * 74xx documentation states this behaviour, while documentation -+ * for 604/750 processors doesn't mention this at all. -+ * [No reset confirmed on 750.] -+ * -+ * Also output the values of PMC1 and PMC2 shortly after the overflow. -+ * PMC2 should be equal to PMC1-0x80000000. -+ * -+ * When pmc1ce == 0, MMCR0[PMC1CE] is zero. It's unclear whether -+ * this masks all PMC1 overflow events or just PMC1 PMIs. -+ * -+ * PMC1 counts processor cycles, with 100 to go before overflowing. -+ * PMC2 counts processor cycles, starting from 0. -+ * TRIGGER is set, so PMC2 doesn't start until PMC1 overflows. -+ * PMC1CE is clear if !pmc1ce, otherwise set. -+ */ -+ mtspr(SPRN_PMC2, 0); -+ mtspr(SPRN_PMC1, 0x80000000-100); -+ mmcr0 = (1<<(31-18)) | (0x01 << 6) | (0x01 << 0); -+ if (pmc1ce) -+ mmcr0 |= (1<<(31-16)); -+ mtspr(SPRN_MMCR0, mmcr0); -+ do { -+ do_empty_loop(0); -+ } while (!(mfspr(SPRN_PMC1) & 0x80000000)); -+ do_empty_loop(0); -+ printk(KERN_INFO "PERFCTR INIT: %s(%u): MMCR0[TRIGGER] is %u, PMC1 is %#x, PMC2 is %#x\n", -+ __FUNCTION__, pmc1ce, -+ !!(mfspr(SPRN_MMCR0) & (1<<(31-18))), mfspr(SPRN_PMC1), mfspr(SPRN_PMC2)); -+ mtspr(SPRN_MMCR0, 0); -+ mtspr(SPRN_PMC1, 0); -+ mtspr(SPRN_PMC2, 0); -+} -+ -+static void __init -+measure_overheads(int have_mmcr1) -+{ -+ int i; -+ unsigned int mmcr0, loop, ticks[12]; -+ const char *name[12]; -+ -+ clear(have_mmcr1); -+ -+ /* PMC1 = "processor cycles", -+ PMC2 = "completed instructions", -+ not disabled in any mode, -+ no interrupts */ -+ mmcr0 = (0x01 << 6) | (0x02 << 0); -+ mtspr(SPRN_MMCR0, mmcr0); -+ -+ name[0] = "mftbl"; -+ ticks[0] = run(do_read_tbl, 0); -+ name[1] = "mfspr (pmc1)"; -+ ticks[1] = run(do_read_pmc1, 0); -+ name[2] = "mfspr (pmc2)"; -+ ticks[2] = run(do_read_pmc2, 0); -+ name[3] = "mfspr (pmc3)"; -+ ticks[3] = have_mmcr1 ? run(do_read_pmc3, 0) : 0; -+ name[4] = "mfspr (pmc4)"; -+ ticks[4] = have_mmcr1 ? run(do_read_pmc4, 0) : 0; -+ name[5] = "mfspr (mmcr0)"; -+ ticks[5] = run(do_read_mmcr0, 0); -+ name[6] = "mfspr (mmcr1)"; -+ ticks[6] = have_mmcr1 ? run(do_read_mmcr1, 0) : 0; -+ name[7] = "mtspr (pmc2)"; -+ ticks[7] = run(do_write_pmc2, 0); -+ name[8] = "mtspr (pmc3)"; -+ ticks[8] = have_mmcr1 ? run(do_write_pmc3, 0) : 0; -+ name[9] = "mtspr (pmc4)"; -+ ticks[9] = have_mmcr1 ? run(do_write_pmc4, 0) : 0; -+ name[10] = "mtspr (mmcr1)"; -+ ticks[10] = have_mmcr1 ? run(do_write_mmcr1, 0) : 0; -+ name[11] = "mtspr (mmcr0)"; -+ ticks[11] = run(do_write_mmcr0, mmcr0); -+ -+ loop = run(do_empty_loop, 0); -+ -+ clear(have_mmcr1); -+ -+ init_tests_message(); -+ printk(KERN_INFO "PERFCTR INIT: NITER == %u\n", NITER); -+ printk(KERN_INFO "PERFCTR INIT: loop overhead is %u cycles\n", loop); -+ for(i = 0; i < ARRAY_SIZE(ticks); ++i) { -+ unsigned int x; -+ if (!ticks[i]) -+ continue; -+ x = ((ticks[i] - loop) * 10) / NITER; -+ printk(KERN_INFO "PERFCTR INIT: %s cost is %u.%u cycles (%u total)\n", -+ name[i], x/10, x%10, ticks[i]); -+ } -+ check_fcece(0); -+ check_fcece(1); -+ check_trigger(0); -+ check_trigger(1); -+} -+ -+void __init perfctr_ppc_init_tests(int have_mmcr1) -+{ -+ preempt_disable(); -+ measure_overheads(have_mmcr1); -+ preempt_enable(); -+} -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_setup.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_setup.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_setup.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,116 @@ -+/* $Id: x86_setup.c,v 1.47.2.2 2004/08/02 19:38:51 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * x86/x86_64-specific kernel-resident code. -+ * -+ * Copyright (C) 1999-2004 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/kernel.h> -+#include <linux/sched.h> -+#include <linux/interrupt.h> -+#include <asm/processor.h> -+#include <asm/perfctr.h> -+#include <asm/fixmap.h> -+#include <asm/apic.h> -+#include "x86_compat.h" -+#include "compat.h" -+ -+/* XXX: belongs to a virtual_compat.c file */ -+#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED && defined(CONFIG_PERFCTR_VIRTUAL) && LINUX_VERSION_CODE < KERNEL_VERSION(2,4,21) && !defined(HAVE_SET_CPUS_ALLOWED) -+/** -+ * set_cpus_allowed() - change a given task's processor affinity -+ * @p: task to bind -+ * @new_mask: bitmask of allowed processors -+ * -+ * Upon return, the task is running on a legal processor. Note the caller -+ * must have a valid reference to the task: it must not exit() prematurely. -+ * This call can sleep; do not hold locks on call. -+ */ -+void set_cpus_allowed(struct task_struct *p, unsigned long new_mask) -+{ -+ new_mask &= cpu_online_map; -+ BUG_ON(!new_mask); -+ -+ /* This must be our own, safe, call from sys_vperfctr_control(). */ -+ -+ p->cpus_allowed = new_mask; -+ -+ /* -+ * If the task is on a no-longer-allowed processor, we need to move -+ * it. If the task is not current, then set need_resched and send -+ * its processor an IPI to reschedule. -+ */ -+ if (!(p->cpus_runnable & p->cpus_allowed)) { -+ if (p != current) { -+ p->need_resched = 1; -+ smp_send_reschedule(p->processor); -+ } -+ /* -+ * Wait until we are on a legal processor. If the task is -+ * current, then we should be on a legal processor the next -+ * time we reschedule. Otherwise, we need to wait for the IPI. -+ */ -+ while (!(p->cpus_runnable & p->cpus_allowed)) -+ schedule(); -+ } -+} -+EXPORT_SYMBOL(set_cpus_allowed); -+#endif -+ -+#ifdef CONFIG_X86_LOCAL_APIC -+static void perfctr_default_ihandler(unsigned long pc) -+{ -+} -+ -+static perfctr_ihandler_t perfctr_ihandler = perfctr_default_ihandler; -+ -+asmlinkage void smp_perfctr_interrupt(struct pt_regs *regs) -+{ -+ /* PREEMPT note: invoked via an interrupt gate, which -+ masks interrupts. We're still on the originating CPU. */ -+ /* XXX: recursive interrupts? delay the ACK, mask LVTPC, or queue? */ -+ ack_APIC_irq(); -+ irq_enter(); -+ (*perfctr_ihandler)(instruction_pointer(regs)); -+ irq_exit(); -+} -+ -+void perfctr_cpu_set_ihandler(perfctr_ihandler_t ihandler) -+{ -+ perfctr_ihandler = ihandler ? ihandler : perfctr_default_ihandler; -+} -+#endif -+ -+#ifdef __x86_64__ -+extern unsigned int cpu_khz; -+#else -+extern unsigned long cpu_khz; -+#endif -+ -+/* Wrapper to avoid namespace clash in RedHat 8.0's 2.4.18-14 kernel. */ -+unsigned int perfctr_cpu_khz(void) -+{ -+ return cpu_khz; -+} -+ -+#ifdef CONFIG_PERFCTR_MODULE -+EXPORT_SYMBOL_mmu_cr4_features; -+EXPORT_SYMBOL(perfctr_cpu_khz); -+ -+#ifdef CONFIG_X86_LOCAL_APIC -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) -+EXPORT_SYMBOL(nmi_perfctr_msr); -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,67) && defined(CONFIG_PM) -+EXPORT_SYMBOL(apic_pm_register); -+EXPORT_SYMBOL(apic_pm_unregister); -+EXPORT_SYMBOL(nmi_pmdev); -+#endif -+ -+EXPORT_SYMBOL(perfctr_cpu_set_ihandler); -+#endif /* CONFIG_X86_LOCAL_APIC */ -+ -+#endif /* MODULE */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/global.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/global.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/global.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,17 @@ -+/* $Id: global.h,v 1.7 2003/10/02 20:04:35 mikpe Exp $ -+ * Global-mode performance-monitoring counters. -+ * -+ * Copyright (C) 2000-2003 Mikael Pettersson -+ */ -+ -+#ifdef CONFIG_PERFCTR_GLOBAL -+extern int gperfctr_ioctl(struct inode*, struct file*, unsigned int, unsigned long); -+extern void gperfctr_init(void); -+#else -+extern int gperfctr_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg) -+{ -+ return -EINVAL; -+} -+static inline void gperfctr_init(void) { } -+#endif -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/cpumask.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/cpumask.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/cpumask.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,81 @@ -+/* $Id: cpumask.h,v 1.6.2.1 2004/07/12 21:09:45 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * Partial simulation of cpumask_t on non-cpumask_t kernels. -+ * Extension to allow inspecting a cpumask_t as array of ulong. -+ * Appropriate definition of perfctr_cpus_forbidden_mask. -+ * -+ * Copyright (C) 2003-2004 Mikael Pettersson -+ */ -+ -+/* 2.6.0-test4 changed set-of-CPUs values from ulong to cpumask_t */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -+ -+#if !defined(PERFCTR_HAVE_CPUMASK_T) && !defined(HAVE_CPUMASK_T) -+typedef unsigned long cpumask_t; -+#endif -+ -+/* RH/FC1 kernel 2.4.22-1.2115.nptl added cpumask_t, but with -+ an incomplete API and a broken cpus_and() [misspelled parameter -+ in its body]. Sigh. -+ Assume cpumask_t is unsigned long and use our own code. */ -+#undef cpu_set -+#define cpu_set(cpu, map) atomic_set_mask((1UL << (cpu)), &(map)) -+#undef cpu_isset -+#define cpu_isset(cpu, map) ((map) & (1UL << (cpu))) -+#undef cpus_and -+#define cpus_and(dst,src1,src2) do { (dst) = (src1) & (src2); } while(0) -+#undef cpus_clear -+#define cpus_clear(map) do { (map) = 0UL; } while(0) -+#undef cpus_complement -+#define cpus_complement(map) do { (map) = ~(map); } while(0) -+#undef cpus_empty -+#define cpus_empty(map) ((map) == 0UL) -+#undef cpus_equal -+#define cpus_equal(map1, map2) ((map1) == (map2)) -+#undef cpus_addr -+#define cpus_addr(map) (&(map)) -+ -+#undef CPU_MASK_NONE -+#define CPU_MASK_NONE 0UL -+ -+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,1) -+ -+/* 2.6.1-rc1 introduced cpus_addr() */ -+#ifdef CPU_ARRAY_SIZE -+#define cpus_addr(map) ((map).mask) -+#else -+#define cpus_addr(map) (&(map)) -+#endif -+ -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) && !defined(cpus_andnot) -+#define cpus_andnot(dst, src1, src2) \ -+do { \ -+ cpumask_t _tmp2; \ -+ _tmp2 = (src2); \ -+ cpus_complement(_tmp2); \ -+ cpus_and((dst), (src1), _tmp2); \ -+} while(0) -+#endif -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,8) && !defined(CONFIG_SMP) -+#undef cpu_online_map -+#define cpu_online_map cpumask_of_cpu(0) -+#endif -+ -+#ifdef CPU_ARRAY_SIZE -+#define PERFCTR_CPUMASK_NRLONGS CPU_ARRAY_SIZE -+#else -+#define PERFCTR_CPUMASK_NRLONGS 1 -+#endif -+ -+/* `perfctr_cpus_forbidden_mask' used to be defined in <asm/perfctr.h>, -+ but cpumask_t compatibility issues forced it to be moved here. */ -+#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED -+extern cpumask_t perfctr_cpus_forbidden_mask; -+#define perfctr_cpu_is_forbidden(cpu) cpu_isset((cpu), perfctr_cpus_forbidden_mask) -+#else -+#define perfctr_cpus_forbidden_mask CPU_MASK_NONE -+#define perfctr_cpu_is_forbidden(cpu) 0 /* cpu_isset() needs an lvalue :-( */ -+#endif -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_compat.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_compat.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_compat.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,41 @@ -+/* $Id: x86_compat.h,v 1.33 2004/02/29 16:03:03 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * x86/x86_64-specific compatibility definitions for 2.4/2.6 kernels. -+ * -+ * Copyright (C) 2000-2004 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#include <linux/version.h> -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,18) -+ -+/* missing from <asm-i386/cpufeature.h> */ -+#define cpu_has_msr boot_cpu_has(X86_FEATURE_MSR) -+ -+#else /* 2.4 */ -+ -+/* missing from <asm-i386/processor.h> */ -+#ifndef cpu_has_mmx /* added in 2.4.22-pre3 */ -+#define cpu_has_mmx (test_bit(X86_FEATURE_MMX, boot_cpu_data.x86_capability)) -+#endif -+#define cpu_has_msr (test_bit(X86_FEATURE_MSR, boot_cpu_data.x86_capability)) -+#ifndef cpu_has_ht /* added in 2.4.22-pre3 */ -+#define cpu_has_ht (test_bit(28, boot_cpu_data.x86_capability)) -+#endif -+ -+#endif /* 2.4 */ -+ -+/* irq_enter() and irq_exit() take two parameters in 2.4. However, -+ we only use them to disable preemption in the interrupt handler, -+ which isn't needed in non-preemptive 2.4 kernels. */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) -+#ifdef CONFIG_PREEMPT -+#error "not yet ported to 2.4+PREEMPT" -+#endif -+#undef irq_enter -+#undef irq_exit -+#define irq_enter() do{}while(0) -+#define irq_exit() do{}while(0) -+#endif -+ -+extern unsigned int perfctr_cpu_khz(void); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Makefile24 -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/Makefile24 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Makefile24 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,39 @@ -+# $Id: Makefile24,v 1.7.2.1 2004/08/02 22:24:58 mikpe Exp $ -+# Performance-monitoring counters driver Makefile for 2.4 kernels. -+ -+# construct various object file lists: -+# kernel-objs-y kernel objects exporting symbols -+# y-objs-y kernel objects not exporting symbols -+# m-objs-m perfctr.o if driver is module, empty otherwise -+# driver-objs-y objects for perfctr.o module, or empty -+ -+# This also covers x86_64. -+driver-objs-$(CONFIG_X86) := x86.o -+tests-objs-$(CONFIG_X86) := x86_tests.o -+kernel-objs-$(CONFIG_X86) := x86_setup.o -+ -+driver-objs-$(CONFIG_PPC32) := ppc.o -+tests-objs-$(CONFIG_PPC32) := ppc_tests.o -+kernel-objs-$(CONFIG_PPC32) := ppc_setup.o -+ -+driver-objs-y += init.o marshal.o -+driver-objs-$(CONFIG_PERFCTR_INIT_TESTS) += $(tests-objs-y) -+driver-objs-$(CONFIG_PERFCTR_VIRTUAL) += virtual.o -+stub-objs-$(CONFIG_PERFCTR)-$(CONFIG_PERFCTR_VIRTUAL) := virtual_stub.o -+driver-objs-$(CONFIG_PERFCTR_GLOBAL) += global.o -+m-objs-$(CONFIG_PERFCTR) := perfctr.o -+y-objs-$(CONFIG_PERFCTR) := $(driver-objs-y) -+kernel-objs-y += $(stub-objs-m-y) -+ -+perfctr-objs := $(driver-objs-y) -+obj-m += $(m-objs-m) -+ -+export-objs := $(kernel-objs-y) -+O_TARGET := kperfctr.o -+obj-y := $(kernel-objs-y) $(y-objs-y) -+list-multi := perfctr.o -+ -+include $(TOPDIR)/Rules.make -+ -+perfctr.o: $(perfctr-objs) -+ $(LD) -r -o $@ $(perfctr-objs) -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_tests.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_tests.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_tests.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,310 @@ -+/* $Id: x86_tests.c,v 1.23.2.5 2004/08/02 22:24:58 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * Optional x86/x86_64-specific init-time tests. -+ * -+ * Copyright (C) 1999-2004 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#define __NO_VERSION__ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/sched.h> -+#include <linux/fs.h> -+#include <linux/perfctr.h> -+#include <asm/msr.h> -+#undef MSR_P6_PERFCTR0 -+#undef MSR_P4_IQ_CCCR0 -+#undef MSR_P4_CRU_ESCR0 -+#include <asm/fixmap.h> -+#include <asm/apic.h> -+#include "x86_compat.h" -+#include "x86_tests.h" -+ -+#define MSR_P5_CESR 0x11 -+#define MSR_P5_CTR0 0x12 -+#define P5_CESR_VAL (0x16 | (3<<6)) -+#define MSR_P6_PERFCTR0 0xC1 -+#define MSR_P6_EVNTSEL0 0x186 -+#define P6_EVNTSEL0_VAL (0xC0 | (3<<16) | (1<<22)) -+#define MSR_K7_EVNTSEL0 0xC0010000 -+#define MSR_K7_PERFCTR0 0xC0010004 -+#define K7_EVNTSEL0_VAL (0xC0 | (3<<16) | (1<<22)) -+#define VC3_EVNTSEL1_VAL 0xC0 -+#define MSR_P4_IQ_COUNTER0 0x30C -+#define MSR_P4_IQ_CCCR0 0x36C -+#define MSR_P4_CRU_ESCR0 0x3B8 -+#define P4_CRU_ESCR0_VAL ((2<<25) | (1<<9) | (0x3<<2)) -+#define P4_IQ_CCCR0_VAL ((0x3<<16) | (4<<13) | (1<<12)) -+ -+#define NITER 64 -+#define X2(S) S";"S -+#define X8(S) X2(X2(X2(S))) -+ -+#ifdef __x86_64__ -+#define CR4MOV "movq" -+#else -+#define CR4MOV "movl" -+#endif -+ -+#ifndef CONFIG_X86_LOCAL_APIC -+#undef apic_write -+#define apic_write(reg,vector) do{}while(0) -+#endif -+ -+#if !defined(__x86_64__) -+/* Avoid speculative execution by the CPU */ -+extern inline void sync_core(void) -+{ -+ int tmp; -+ asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); -+} -+#endif -+ -+static void __init do_rdpmc(unsigned pmc, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("rdpmc") : : "c"(pmc) : "eax", "edx"); -+} -+ -+static void __init do_rdmsr(unsigned msr, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("rdmsr") : : "c"(msr) : "eax", "edx"); -+} -+ -+static void __init do_wrmsr(unsigned msr, unsigned data) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("wrmsr") : : "c"(msr), "a"(data), "d"(0)); -+} -+ -+static void __init do_rdcr4(unsigned unused1, unsigned unused2) -+{ -+ unsigned i; -+ unsigned long dummy; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8(CR4MOV" %%cr4,%0") : "=r"(dummy)); -+} -+ -+static void __init do_wrcr4(unsigned cr4, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8(CR4MOV" %0,%%cr4") : : "r"((long)cr4)); -+} -+ -+static void __init do_rdtsc(unsigned unused1, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("rdtsc") : : : "eax", "edx"); -+} -+ -+static void __init do_wrlvtpc(unsigned val, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) { -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ } -+} -+ -+static void __init do_sync_core(unsigned unused1, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) { -+ sync_core(); -+ sync_core(); -+ sync_core(); -+ sync_core(); -+ sync_core(); -+ sync_core(); -+ sync_core(); -+ sync_core(); -+ } -+} -+ -+static void __init do_empty_loop(unsigned unused1, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__("" : : "c"(0)); -+} -+ -+static unsigned __init run(void (*doit)(unsigned, unsigned), -+ unsigned arg1, unsigned arg2) -+{ -+ unsigned start, dummy, stop; -+ sync_core(); -+ rdtsc(start, dummy); -+ (*doit)(arg1, arg2); /* should take < 2^32 cycles to complete */ -+ sync_core(); -+ rdtsc(stop, dummy); -+ return stop - start; -+} -+ -+static void __init init_tests_message(void) -+{ -+ printk(KERN_INFO "Please email the following PERFCTR INIT lines " -+ "to mikpe@csd.uu.se\n" -+ KERN_INFO "To remove this message, rebuild the driver " -+ "with CONFIG_PERFCTR_INIT_TESTS=n\n"); -+ printk(KERN_INFO "PERFCTR INIT: vendor %u, family %u, model %u, stepping %u, clock %u kHz\n", -+ current_cpu_data.x86_vendor, -+ current_cpu_data.x86, -+ current_cpu_data.x86_model, -+ current_cpu_data.x86_mask, -+ perfctr_cpu_khz()); -+} -+ -+static void __init -+measure_overheads(unsigned msr_evntsel0, unsigned evntsel0, unsigned msr_perfctr0, -+ unsigned msr_cccr, unsigned cccr_val) -+{ -+ int i; -+ unsigned int loop, ticks[13]; -+ const char *name[13]; -+ -+ if (msr_evntsel0) -+ wrmsr(msr_evntsel0, 0, 0); -+ if (msr_cccr) -+ wrmsr(msr_cccr, 0, 0); -+ -+ name[0] = "rdtsc"; -+ ticks[0] = run(do_rdtsc, 0, 0); -+ name[1] = "rdpmc"; -+ ticks[1] = (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) -+ ? run(do_rdpmc,1,0) : 0; -+ name[2] = "rdmsr (counter)"; -+ ticks[2] = msr_perfctr0 ? run(do_rdmsr, msr_perfctr0, 0) : 0; -+ name[3] = msr_cccr ? "rdmsr (escr)" : "rdmsr (evntsel)"; -+ ticks[3] = msr_evntsel0 ? run(do_rdmsr, msr_evntsel0, 0) : 0; -+ name[4] = "wrmsr (counter)"; -+ ticks[4] = msr_perfctr0 ? run(do_wrmsr, msr_perfctr0, 0) : 0; -+ name[5] = msr_cccr ? "wrmsr (escr)" : "wrmsr (evntsel)"; -+ ticks[5] = msr_evntsel0 ? run(do_wrmsr, msr_evntsel0, evntsel0) : 0; -+ name[6] = "read cr4"; -+ ticks[6] = run(do_rdcr4, 0, 0); -+ name[7] = "write cr4"; -+ ticks[7] = run(do_wrcr4, read_cr4(), 0); -+ name[8] = "rdpmc (fast)"; -+ ticks[8] = msr_cccr ? run(do_rdpmc, 0x80000001, 0) : 0; -+ name[9] = "rdmsr (cccr)"; -+ ticks[9] = msr_cccr ? run(do_rdmsr, msr_cccr, 0) : 0; -+ name[10] = "wrmsr (cccr)"; -+ ticks[10] = msr_cccr ? run(do_wrmsr, msr_cccr, cccr_val) : 0; -+ name[11] = "write LVTPC"; -+ ticks[11] = (perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) -+ ? run(do_wrlvtpc, APIC_DM_NMI|APIC_LVT_MASKED, 0) : 0; -+ name[12] = "sync_core"; -+ ticks[12] = run(do_sync_core, 0, 0); -+ -+ loop = run(do_empty_loop, 0, 0); -+ -+ if (msr_evntsel0) -+ wrmsr(msr_evntsel0, 0, 0); -+ if (msr_cccr) -+ wrmsr(msr_cccr, 0, 0); -+ -+ init_tests_message(); -+ printk(KERN_INFO "PERFCTR INIT: NITER == %u\n", NITER); -+ printk(KERN_INFO "PERFCTR INIT: loop overhead is %u cycles\n", loop); -+ for(i = 0; i < ARRAY_SIZE(ticks); ++i) { -+ unsigned int x; -+ if (!ticks[i]) -+ continue; -+ x = ((ticks[i] - loop) * 10) / NITER; -+ printk(KERN_INFO "PERFCTR INIT: %s cost is %u.%u cycles (%u total)\n", -+ name[i], x/10, x%10, ticks[i]); -+ } -+} -+ -+#ifndef __x86_64__ -+static inline void perfctr_p5_init_tests(void) -+{ -+ measure_overheads(MSR_P5_CESR, P5_CESR_VAL, MSR_P5_CTR0, 0, 0); -+} -+ -+static inline void perfctr_p6_init_tests(void) -+{ -+ measure_overheads(MSR_P6_EVNTSEL0, P6_EVNTSEL0_VAL, MSR_P6_PERFCTR0, 0, 0); -+} -+ -+#if !defined(CONFIG_X86_TSC) -+static inline void perfctr_c6_init_tests(void) -+{ -+ unsigned int cesr, dummy; -+ -+ rdmsr(MSR_P5_CESR, cesr, dummy); -+ init_tests_message(); -+ printk(KERN_INFO "PERFCTR INIT: boot CESR == %#08x\n", cesr); -+} -+#endif -+ -+static inline void perfctr_vc3_init_tests(void) -+{ -+ measure_overheads(MSR_P6_EVNTSEL0+1, VC3_EVNTSEL1_VAL, MSR_P6_PERFCTR0+1, 0, 0); -+} -+#endif /* !__x86_64__ */ -+ -+static inline void perfctr_p4_init_tests(void) -+{ -+ measure_overheads(MSR_P4_CRU_ESCR0, P4_CRU_ESCR0_VAL, MSR_P4_IQ_COUNTER0, -+ MSR_P4_IQ_CCCR0, P4_IQ_CCCR0_VAL); -+} -+ -+static inline void perfctr_k7_init_tests(void) -+{ -+ measure_overheads(MSR_K7_EVNTSEL0, K7_EVNTSEL0_VAL, MSR_K7_PERFCTR0, 0, 0); -+} -+ -+static inline void perfctr_generic_init_tests(void) -+{ -+ measure_overheads(0, 0, 0, 0, 0); -+} -+ -+enum perfctr_x86_tests_type perfctr_x86_tests_type __initdata = PTT_UNKNOWN; -+ -+void __init perfctr_x86_init_tests(void) -+{ -+ switch (perfctr_x86_tests_type) { -+#ifndef __x86_64__ -+ case PTT_P5: /* Intel P5, P5MMX; Cyrix 6x86MX, MII, III */ -+ perfctr_p5_init_tests(); -+ break; -+ case PTT_P6: /* Intel PPro, PII, PIII, PENTM */ -+ perfctr_p6_init_tests(); -+ break; -+#if !defined(CONFIG_X86_TSC) -+ case PTT_WINCHIP: /* WinChip C6, 2, 3 */ -+ perfctr_c6_init_tests(); -+ break; -+#endif -+ case PTT_VC3: /* VIA C3 */ -+ perfctr_vc3_init_tests(); -+ break; -+#endif /* !__x86_64__ */ -+ case PTT_P4: /* Intel P4 */ -+ perfctr_p4_init_tests(); -+ break; -+ case PTT_AMD: /* AMD K7, K8 */ -+ perfctr_k7_init_tests(); -+ break; -+ case PTT_GENERIC: -+ perfctr_generic_init_tests(); -+ break; -+ default: -+ printk(KERN_INFO "%s: unknown CPU type %u\n", -+ __FUNCTION__, perfctr_x86_tests_type); -+ break; -+ } -+} -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/ppc.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,925 @@ -+/* $Id: ppc.c,v 1.3.2.8 2004/10/19 15:18:21 mikpe Exp $ -+ * PPC32 performance-monitoring counters driver. -+ * -+ * Copyright (C) 2004 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#define __NO_VERSION__ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/sched.h> -+#include <linux/fs.h> -+#include <linux/perfctr.h> -+#include <asm/prom.h> -+#include <asm/time.h> /* tb_ticks_per_jiffy, get_tbl() */ -+ -+#include "compat.h" -+#include "ppc_compat.h" -+#include "ppc_tests.h" -+ -+/* Support for lazy evntsel and perfctr SPR updates. */ -+struct per_cpu_cache { /* roughly a subset of perfctr_cpu_state */ -+ union { -+ unsigned int id; /* cache owner id */ -+ } k1; -+ /* Physically indexed cache of the MMCRs. */ -+ unsigned int ppc_mmcr[3]; -+} ____cacheline_aligned; -+static struct per_cpu_cache per_cpu_cache[NR_CPUS] __cacheline_aligned; -+#define get_cpu_cache() (&per_cpu_cache[smp_processor_id()]) -+ -+/* Structure for counter snapshots, as 32-bit values. */ -+struct perfctr_low_ctrs { -+ unsigned int tsc; -+ unsigned int pmc[6]; -+}; -+ -+enum pm_type { -+ PM_NONE, -+ PM_604, -+ PM_604e, -+ PM_750, /* XXX: Minor event set diffs between IBM and Moto. */ -+ PM_7400, -+ PM_7450, -+}; -+static enum pm_type pm_type; -+ -+/* Bits users shouldn't set in control.ppc.mmcr0: -+ * - PMXE because we don't yet support overflow interrupts -+ * - PMC1SEL/PMC2SEL because event selectors are in control.evntsel[] -+ */ -+#define MMCR0_RESERVED (MMCR0_PMXE | MMCR0_PMC1SEL | MMCR0_PMC2SEL) -+ -+static unsigned int new_id(void) -+{ -+ static spinlock_t lock = SPIN_LOCK_UNLOCKED; -+ static unsigned int counter; -+ int id; -+ -+ spin_lock(&lock); -+ id = ++counter; -+ spin_unlock(&lock); -+ return id; -+} -+ -+#ifndef PERFCTR_INTERRUPT_SUPPORT -+#define perfctr_cstatus_has_ictrs(cstatus) 0 -+#endif -+ -+#if defined(CONFIG_SMP) && defined(PERFCTR_INTERRUPT_SUPPORT) -+ -+static inline void -+set_isuspend_cpu(struct perfctr_cpu_state *state, int cpu) -+{ -+ state->k1.isuspend_cpu = cpu; -+} -+ -+static inline int -+is_isuspend_cpu(const struct perfctr_cpu_state *state, int cpu) -+{ -+ return state->k1.isuspend_cpu == cpu; -+} -+ -+static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) -+{ -+ state->k1.isuspend_cpu = NR_CPUS; -+} -+ -+#else -+static inline void set_isuspend_cpu(struct perfctr_cpu_state *state, int cpu) { } -+static inline int is_isuspend_cpu(const struct perfctr_cpu_state *state, int cpu) { return 1; } -+static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) { } -+#endif -+ -+/* The ppc driver internally uses cstatus & (1<<30) to record that -+ a context has an asynchronously changing MMCR0. */ -+static inline unsigned int perfctr_cstatus_set_mmcr0_quirk(unsigned int cstatus) -+{ -+ return cstatus | (1 << 30); -+} -+ -+static inline int perfctr_cstatus_has_mmcr0_quirk(unsigned int cstatus) -+{ -+ return cstatus & (1 << 30); -+} -+ -+/**************************************************************** -+ * * -+ * Driver procedures. * -+ * * -+ ****************************************************************/ -+ -+/* -+ * The PowerPC 604/750/74xx family. -+ * -+ * Common features -+ * --------------- -+ * - Per counter event selection data in subfields of control registers. -+ * MMCR0 contains both global control and PMC1/PMC2 event selectors. -+ * - Overflow interrupt support is present in all processors, but an -+ * erratum makes it difficult to use in 750/7400/7410 processors. -+ * - There is no concept of per-counter qualifiers: -+ * - User-mode/supervisor-mode restrictions are global. -+ * - Two groups of counters, PMC1 and PMC2-PMC<highest>. Each group -+ * has a single overflow interrupt/event enable/disable flag. -+ * - The instructions used to read (mfspr) and write (mtspr) the control -+ * and counter registers (SPRs) only support hardcoded register numbers. -+ * There is no support for accessing an SPR via a runtime value. -+ * - Each counter supports its own unique set of events. However, events -+ * 0-1 are common for PMC1-PMC4, and events 2-4 are common for PMC1-PMC4. -+ * - There is no separate high-resolution core clock counter. -+ * The time-base counter is available, but it typically runs an order of -+ * magnitude slower than the core clock. -+ * Any performance counter can be programmed to count core clocks, but -+ * doing this (a) reserves one PMC, and (b) needs indirect accesses -+ * since the SPR number in general isn't known at compile-time. -+ * -+ * Driver notes -+ * ------------ -+ * - The driver currently does not support performance monitor interrupts, -+ * mostly because of the 750/7400/7410 erratum. Working around it would -+ * require disabling the decrementer interrupt, reserving a performance -+ * counter and setting it up for TBL bit-flip events, and having the PMI -+ * handler invoke the decrementer handler. -+ * -+ * 604 -+ * --- -+ * 604 has MMCR0, PMC1, PMC2, SIA, and SDA. -+ * -+ * MMCR0[THRESHOLD] is not automatically multiplied. -+ * -+ * On the 604, software must always reset MMCR0[ENINT] after -+ * taking a PMI. This is not the case for the 604e. -+ * -+ * 604e -+ * ---- -+ * 604e adds MMCR1, PMC3, and PMC4. -+ * Bus-to-core multiplier is available via HID1[PLL_CFG]. -+ * -+ * MMCR0[THRESHOLD] is automatically multiplied by 4. -+ * -+ * When the 604e vectors to the PMI handler, it automatically -+ * clears any pending PMIs. Unlike the 604, the 604e does not -+ * require MMCR0[ENINT] to be cleared (and possibly reset) -+ * before external interrupts can be re-enabled. -+ * -+ * 750 -+ * --- -+ * 750 adds user-readable MMCRn/PMCn/SIA registers, and removes SDA. -+ * -+ * MMCR0[THRESHOLD] is not automatically multiplied. -+ * -+ * Motorola MPC750UM.pdf, page C-78, states: "The performance monitor -+ * of the MPC755 functions the same as that of the MPC750, (...), except -+ * that for both the MPC750 and MPC755, no combination of the thermal -+ * assist unit, the decrementer register, and the performance monitor -+ * can be used at any one time. If exceptions for any two of these -+ * functional blocks are enabled together, multiple exceptions caused -+ * by any of these three blocks cause unpredictable results." -+ * -+ * IBM 750CXe_Err_DD2X.pdf, Erratum #13, states that a PMI which -+ * occurs immediately after a delayed decrementer exception can -+ * corrupt SRR0, causing the processor to hang. It also states that -+ * PMIs via TB bit transitions can be used to simulate the decrementer. -+ * -+ * 750FX adds dual-PLL support and programmable core frequency switching. -+ * -+ * 74xx -+ * ---- -+ * 7400 adds MMCR2 and BAMR. -+ * -+ * MMCR0[THRESHOLD] is multiplied by 2 or 32, as specified -+ * by MMCR2[THRESHMULT]. -+ * -+ * 74xx changes the semantics of several MMCR0 control bits, -+ * compared to 604/750. -+ * -+ * PPC7410 Erratum No. 10: Like the MPC750 TAU/DECR/PMI erratum. -+ * Erratum No. 14 marks TAU as unsupported in 7410, but this leaves -+ * perfmon and decrementer interrupts as being mutually exclusive. -+ * Affects PPC7410 1.0-1.2 (PVR 0x800C1100-0x800C1102). 1.3 and up -+ * (PVR 0x800C1103 up) are Ok. -+ * -+ * 7450 adds PMC5 and PMC6. -+ * -+ * 7455/7445 V3.3 (PVR 80010303) and later use the 7457 PLL table, -+ * earlier revisions use the 7450 PLL table -+ */ -+ -+static inline unsigned int read_pmc(unsigned int pmc) -+{ -+ switch (pmc) { -+ default: /* impossible, but silences gcc warning */ -+ case 0: -+ return mfspr(SPRN_PMC1); -+ case 1: -+ return mfspr(SPRN_PMC2); -+ case 2: -+ return mfspr(SPRN_PMC3); -+ case 3: -+ return mfspr(SPRN_PMC4); -+ case 4: -+ return mfspr(SPRN_PMC5); -+ case 5: -+ return mfspr(SPRN_PMC6); -+ } -+} -+ -+static void ppc_read_counters(struct perfctr_cpu_state *state, -+ struct perfctr_low_ctrs *ctrs) -+{ -+ unsigned int cstatus, nrctrs, i; -+ -+ cstatus = state->cstatus; -+ if (perfctr_cstatus_has_tsc(cstatus)) -+ ctrs->tsc = get_tbl(); -+ nrctrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nrctrs; ++i) { -+ unsigned int pmc = state->pmc[i].map; -+ ctrs->pmc[i] = read_pmc(pmc); -+ } -+} -+ -+static unsigned int pmc_max_event(unsigned int pmc) -+{ -+ switch (pmc) { -+ default: /* impossible, but silences gcc warning */ -+ case 0: -+ return 127; -+ case 1: -+ return 63; -+ case 2: -+ return 31; -+ case 3: -+ return 31; -+ case 4: -+ return 31; -+ case 5: -+ return 63; -+ } -+} -+ -+static unsigned int get_nr_pmcs(void) -+{ -+ switch (pm_type) { -+ case PM_7450: -+ return 6; -+ case PM_7400: -+ case PM_750: -+ case PM_604e: -+ return 4; -+ case PM_604: -+ return 2; -+ default: /* PM_NONE, but silences gcc warning */ -+ return 0; -+ } -+} -+ -+static int ppc_check_control(struct perfctr_cpu_state *state) -+{ -+ unsigned int i, nrctrs, pmc_mask, pmc; -+ unsigned int nr_pmcs, evntsel[6]; -+ -+ nr_pmcs = get_nr_pmcs(); -+ nrctrs = state->control.nractrs; -+ if (state->control.nrictrs || nrctrs > nr_pmcs) -+ return -EINVAL; -+ -+ pmc_mask = 0; -+ memset(evntsel, 0, sizeof evntsel); -+ for(i = 0; i < nrctrs; ++i) { -+ pmc = state->control.pmc_map[i]; -+ state->pmc[i].map = pmc; -+ if (pmc >= nr_pmcs || (pmc_mask & (1<<pmc))) -+ return -EINVAL; -+ pmc_mask |= (1<<pmc); -+ -+ evntsel[pmc] = state->control.evntsel[i]; -+ if (evntsel[pmc] > pmc_max_event(pmc)) -+ return -EINVAL; -+ } -+ -+ switch (pm_type) { -+ case PM_7450: -+ case PM_7400: -+ if (state->control.ppc.mmcr2 & MMCR2_RESERVED) -+ return -EINVAL; -+ state->ppc_mmcr[2] = state->control.ppc.mmcr2; -+ break; -+ default: -+ if (state->control.ppc.mmcr2) -+ return -EINVAL; -+ state->ppc_mmcr[2] = 0; -+ } -+ -+ if (state->control.ppc.mmcr0 & MMCR0_RESERVED) -+ return -EINVAL; -+ state->ppc_mmcr[0] = (state->control.ppc.mmcr0 -+ | (evntsel[0] << (31-25)) -+ | (evntsel[1] << (31-31))); -+ -+ state->ppc_mmcr[1] = (( evntsel[2] << (31-4)) -+ | (evntsel[3] << (31-9)) -+ | (evntsel[4] << (31-14)) -+ | (evntsel[5] << (31-20))); -+ -+ state->k1.id = new_id(); -+ -+ /* -+ * MMCR0[FC] and MMCR0[TRIGGER] may change on 74xx if FCECE or -+ * TRIGGER is set. At suspends we must read MMCR0 back into -+ * the state and the cache and then freeze the counters, and -+ * at resumes we must unfreeze the counters and reload MMCR0. -+ */ -+ switch (pm_type) { -+ case PM_7450: -+ case PM_7400: -+ if (state->ppc_mmcr[0] & (MMCR0_FCECE | MMCR0_TRIGGER)) -+ state->cstatus = perfctr_cstatus_set_mmcr0_quirk(state->cstatus); -+ default: -+ ; -+ } -+ -+ return 0; -+} -+ -+#ifdef PERFCTR_INTERRUPT_SUPPORT -+static void ppc_isuspend(struct perfctr_cpu_state *state) -+{ -+ // XXX -+} -+ -+static void ppc_iresume(const struct perfctr_cpu_state *state) -+{ -+ // XXX -+} -+#endif -+ -+static void ppc_write_control(const struct perfctr_cpu_state *state) -+{ -+ struct per_cpu_cache *cache; -+ unsigned int value; -+ -+ cache = get_cpu_cache(); -+ if (cache->k1.id == state->k1.id) -+ return; -+ /* -+ * Order matters here: update threshmult and event -+ * selectors before updating global control, which -+ * potentially enables PMIs. -+ * -+ * Since mtspr doesn't accept a runtime value for the -+ * SPR number, unroll the loop so each mtspr targets -+ * a constant SPR. -+ * -+ * For processors without MMCR2, we ensure that the -+ * cache and the state indicate the same value for it, -+ * preventing any actual mtspr to it. Ditto for MMCR1. -+ */ -+ value = state->ppc_mmcr[2]; -+ if (value != cache->ppc_mmcr[2]) { -+ cache->ppc_mmcr[2] = value; -+ mtspr(SPRN_MMCR2, value); -+ } -+ value = state->ppc_mmcr[1]; -+ if (value != cache->ppc_mmcr[1]) { -+ cache->ppc_mmcr[1] = value; -+ mtspr(SPRN_MMCR1, value); -+ } -+ value = state->ppc_mmcr[0]; -+ if (value != cache->ppc_mmcr[0]) { -+ cache->ppc_mmcr[0] = value; -+ mtspr(SPRN_MMCR0, value); -+ } -+ cache->k1.id = state->k1.id; -+} -+ -+static void ppc_clear_counters(void) -+{ -+ switch (pm_type) { -+ case PM_7450: -+ case PM_7400: -+ mtspr(SPRN_MMCR2, 0); -+ mtspr(SPRN_BAMR, 0); -+ case PM_750: -+ case PM_604e: -+ mtspr(SPRN_MMCR1, 0); -+ case PM_604: -+ mtspr(SPRN_MMCR0, 0); -+ case PM_NONE: -+ ; -+ } -+ switch (pm_type) { -+ case PM_7450: -+ mtspr(SPRN_PMC6, 0); -+ mtspr(SPRN_PMC5, 0); -+ case PM_7400: -+ case PM_750: -+ case PM_604e: -+ mtspr(SPRN_PMC4, 0); -+ mtspr(SPRN_PMC3, 0); -+ case PM_604: -+ mtspr(SPRN_PMC2, 0); -+ mtspr(SPRN_PMC1, 0); -+ case PM_NONE: -+ ; -+ } -+} -+ -+/* -+ * Driver methods, internal and exported. -+ */ -+ -+static void perfctr_cpu_write_control(const struct perfctr_cpu_state *state) -+{ -+ return ppc_write_control(state); -+} -+ -+static void perfctr_cpu_read_counters(struct perfctr_cpu_state *state, -+ struct perfctr_low_ctrs *ctrs) -+{ -+ return ppc_read_counters(state, ctrs); -+} -+ -+#ifdef PERFCTR_INTERRUPT_SUPPORT -+static void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) -+{ -+ return ppc_isuspend(state); -+} -+ -+static void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) -+{ -+ return ppc_iresume(state); -+} -+ -+/* Call perfctr_cpu_ireload() just before perfctr_cpu_resume() to -+ bypass internal caching and force a reload if the I-mode PMCs. */ -+void perfctr_cpu_ireload(struct perfctr_cpu_state *state) -+{ -+#ifdef CONFIG_SMP -+ clear_isuspend_cpu(state); -+#else -+ get_cpu_cache()->k1.id = 0; -+#endif -+} -+ -+/* PRE: the counters have been suspended and sampled by perfctr_cpu_suspend() */ -+unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state *state) -+{ -+ unsigned int cstatus, nrctrs, pmc, pmc_mask; -+ -+ cstatus = state->cstatus; -+ pmc = perfctr_cstatus_nractrs(cstatus); -+ nrctrs = perfctr_cstatus_nrctrs(cstatus); -+ -+ for(pmc_mask = 0; pmc < nrctrs; ++pmc) { -+ if ((int)state->pmc[pmc].start < 0) { /* PPC-specific */ -+ /* XXX: "+=" to correct for overshots */ -+ state->pmc[pmc].start = state->control.ireset[pmc]; -+ pmc_mask |= (1 << pmc); -+ } -+ } -+ /* XXX: if pmc_mask == 0, then it must have been a TBL bit flip */ -+ /* XXX: HW cleared MMCR0[ENINT]. We presumably cleared the entire -+ MMCR0, so the re-enable occurs automatically later, no? */ -+ return pmc_mask; -+} -+ -+static inline int check_ireset(const struct perfctr_cpu_state *state) -+{ -+ unsigned int nrctrs, i; -+ -+ i = state->control.nractrs; -+ nrctrs = i + state->control.nrictrs; -+ for(; i < nrctrs; ++i) -+ if (state->control.ireset[i] < 0) /* PPC-specific */ -+ return -EINVAL; -+ return 0; -+} -+ -+static inline void setup_imode_start_values(struct perfctr_cpu_state *state) -+{ -+ unsigned int cstatus, nrctrs, i; -+ -+ cstatus = state->cstatus; -+ nrctrs = perfctr_cstatus_nrctrs(cstatus); -+ for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) -+ state->pmc[i].start = state->control.ireset[i]; -+} -+ -+#else /* PERFCTR_INTERRUPT_SUPPORT */ -+static inline void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) { } -+static inline void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) { } -+static inline int check_ireset(const struct perfctr_cpu_state *state) { return 0; } -+static inline void setup_imode_start_values(struct perfctr_cpu_state *state) { } -+#endif /* PERFCTR_INTERRUPT_SUPPORT */ -+ -+static int check_control(struct perfctr_cpu_state *state) -+{ -+ return ppc_check_control(state); -+} -+ -+int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global) -+{ -+ int err; -+ -+ clear_isuspend_cpu(state); -+ state->cstatus = 0; -+ -+ /* disallow i-mode counters if we cannot catch the interrupts */ -+ if (!(perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) -+ && state->control.nrictrs) -+ return -EPERM; -+ -+ err = check_ireset(state); -+ if (err < 0) -+ return err; -+ err = check_control(state); /* may initialise state->cstatus */ -+ if (err < 0) -+ return err; -+ state->cstatus |= perfctr_mk_cstatus(state->control.tsc_on, -+ state->control.nractrs, -+ state->control.nrictrs); -+ setup_imode_start_values(state); -+ return 0; -+} -+ -+void perfctr_cpu_suspend(struct perfctr_cpu_state *state) -+{ -+ unsigned int i, cstatus, nractrs; -+ struct perfctr_low_ctrs now; -+ -+ if (perfctr_cstatus_has_mmcr0_quirk(state->cstatus)) { -+ unsigned int mmcr0 = mfspr(SPRN_MMCR0); -+ mtspr(SPRN_MMCR0, mmcr0 | MMCR0_FC); -+ get_cpu_cache()->ppc_mmcr[0] = mmcr0 | MMCR0_FC; -+ state->ppc_mmcr[0] = mmcr0; -+ } -+ if (perfctr_cstatus_has_ictrs(state->cstatus)) -+ perfctr_cpu_isuspend(state); -+ perfctr_cpu_read_counters(state, &now); -+ cstatus = state->cstatus; -+ if (perfctr_cstatus_has_tsc(cstatus)) -+ state->tsc_sum += now.tsc - state->tsc_start; -+ nractrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nractrs; ++i) -+ state->pmc[i].sum += now.pmc[i] - state->pmc[i].start; -+} -+ -+void perfctr_cpu_resume(struct perfctr_cpu_state *state) -+{ -+ if (perfctr_cstatus_has_ictrs(state->cstatus)) -+ perfctr_cpu_iresume(state); -+ if (perfctr_cstatus_has_mmcr0_quirk(state->cstatus)) -+ get_cpu_cache()->k1.id = 0; /* force reload of MMCR0 */ -+ perfctr_cpu_write_control(state); -+ //perfctr_cpu_read_counters(state, &state->start); -+ { -+ struct perfctr_low_ctrs now; -+ unsigned int i, cstatus, nrctrs; -+ perfctr_cpu_read_counters(state, &now); -+ cstatus = state->cstatus; -+ if (perfctr_cstatus_has_tsc(cstatus)) -+ state->tsc_start = now.tsc; -+ nrctrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nrctrs; ++i) -+ state->pmc[i].start = now.pmc[i]; -+ } -+ /* XXX: if (SMP && start.tsc == now.tsc) ++now.tsc; */ -+} -+ -+void perfctr_cpu_sample(struct perfctr_cpu_state *state) -+{ -+ unsigned int i, cstatus, nractrs; -+ struct perfctr_low_ctrs now; -+ -+ perfctr_cpu_read_counters(state, &now); -+ cstatus = state->cstatus; -+ if (perfctr_cstatus_has_tsc(cstatus)) { -+ state->tsc_sum += now.tsc - state->tsc_start; -+ state->tsc_start = now.tsc; -+ } -+ nractrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nractrs; ++i) { -+ state->pmc[i].sum += now.pmc[i] - state->pmc[i].start; -+ state->pmc[i].start = now.pmc[i]; -+ } -+} -+ -+static void perfctr_cpu_clear_counters(void) -+{ -+ struct per_cpu_cache *cache; -+ -+ cache = get_cpu_cache(); -+ memset(cache, 0, sizeof *cache); -+ cache->k1.id = -1; -+ -+ ppc_clear_counters(); -+} -+ -+/**************************************************************** -+ * * -+ * Processor detection and initialisation procedures. * -+ * * -+ ****************************************************************/ -+ -+/* Derive CPU core frequency from TB frequency and PLL_CFG. */ -+ -+enum pll_type { -+ PLL_NONE, /* for e.g. 604 which has no HID1[PLL_CFG] */ -+ PLL_604e, -+ PLL_750, -+ PLL_750FX, -+ PLL_7400, -+ PLL_7450, -+ PLL_7457, -+}; -+ -+/* These are the known bus-to-core ratios, indexed by PLL_CFG. -+ Multiplied by 2 since half-multiplier steps are present. */ -+ -+static unsigned char cfg_ratio_604e[16] __initdata = { // *2 -+ 2, 2, 14, 2, 4, 13, 5, 9, -+ 6, 11, 8, 10, 3, 12, 7, 0 -+}; -+ -+static unsigned char cfg_ratio_750[16] __initdata = { // *2 -+ 5, 15, 14, 2, 4, 13, 20, 9, // 0b0110 is 18 if L1_TSTCLK=0, but that is abnormal -+ 6, 11, 8, 10, 16, 12, 7, 0 -+}; -+ -+static unsigned char cfg_ratio_750FX[32] __initdata = { // *2 -+ 0, 0, 2, 2, 4, 5, 6, 7, -+ 8, 9, 10, 11, 12, 13, 14, 15, -+ 16, 17, 18, 19, 20, 22, 24, 26, -+ 28, 30, 32, 34, 36, 38, 40, 0 -+}; -+ -+static unsigned char cfg_ratio_7400[16] __initdata = { // *2 -+ 18, 15, 14, 2, 4, 13, 5, 9, -+ 6, 11, 8, 10, 16, 12, 7, 0 -+}; -+ -+static unsigned char cfg_ratio_7450[32] __initdata = { // *2 -+ 1, 0, 15, 30, 14, 0, 2, 0, -+ 4, 0, 13, 26, 5, 0, 9, 18, -+ 6, 0, 11, 22, 8, 20, 10, 24, -+ 16, 28, 12, 32, 7, 0, 0, 0 -+}; -+ -+static unsigned char cfg_ratio_7457[32] __initdata = { // *2 -+ 23, 34, 15, 30, 14, 36, 2, 40, -+ 4, 42, 13, 26, 17, 48, 19, 18, -+ 6, 21, 11, 22, 8, 20, 10, 24, -+ 16, 28, 12, 32, 27, 56, 0, 25 -+}; -+ -+static unsigned int __init tb_to_core_ratio(enum pll_type pll_type) -+{ -+ unsigned char *cfg_ratio; -+ unsigned int shift = 28, mask = 0xF, hid1, pll_cfg, ratio; -+ -+ switch (pll_type) { -+ case PLL_604e: -+ cfg_ratio = cfg_ratio_604e; -+ break; -+ case PLL_750: -+ cfg_ratio = cfg_ratio_750; -+ break; -+ case PLL_750FX: -+ cfg_ratio = cfg_ratio_750FX; -+ hid1 = mfspr(SPRN_HID1); -+ switch ((hid1 >> 16) & 0x3) { /* HID1[PI0,PS] */ -+ case 0: /* PLL0 with external config */ -+ shift = 31-4; /* access HID1[PCE] */ -+ break; -+ case 2: /* PLL0 with internal config */ -+ shift = 31-20; /* access HID1[PC0] */ -+ break; -+ case 1: case 3: /* PLL1 */ -+ shift = 31-28; /* access HID1[PC1] */ -+ break; -+ } -+ mask = 0x1F; -+ break; -+ case PLL_7400: -+ cfg_ratio = cfg_ratio_7400; -+ break; -+ case PLL_7450: -+ cfg_ratio = cfg_ratio_7450; -+ shift = 12; -+ mask = 0x1F; -+ break; -+ case PLL_7457: -+ cfg_ratio = cfg_ratio_7457; -+ shift = 12; -+ mask = 0x1F; -+ break; -+ default: -+ return 0; -+ } -+ hid1 = mfspr(SPRN_HID1); -+ pll_cfg = (hid1 >> shift) & mask; -+ ratio = cfg_ratio[pll_cfg]; -+ if (!ratio) -+ printk(KERN_WARNING "perfctr: unknown PLL_CFG 0x%x\n", pll_cfg); -+ return (4/2) * ratio; -+} -+ -+static unsigned int __init pll_to_core_khz(enum pll_type pll_type) -+{ -+ unsigned int tb_to_core = tb_to_core_ratio(pll_type); -+ perfctr_info.tsc_to_cpu_mult = tb_to_core; -+ return tb_ticks_per_jiffy * tb_to_core * (HZ/10) / (1000/10); -+} -+ -+/* Extract core and timebase frequencies from Open Firmware. */ -+ -+static unsigned int __init of_to_core_khz(void) -+{ -+ struct device_node *cpu; -+ unsigned int *fp, core, tb; -+ -+ cpu = find_type_devices("cpu"); -+ if (!cpu) -+ return 0; -+ fp = (unsigned int*)get_property(cpu, "clock-frequency", NULL); -+ if (!fp || !(core = *fp)) -+ return 0; -+ fp = (unsigned int*)get_property(cpu, "timebase-frequency", NULL); -+ if (!fp || !(tb = *fp)) -+ return 0; -+ perfctr_info.tsc_to_cpu_mult = core / tb; -+ return core / 1000; -+} -+ -+static unsigned int __init detect_cpu_khz(enum pll_type pll_type) -+{ -+ unsigned int khz; -+ -+ khz = pll_to_core_khz(pll_type); -+ if (khz) -+ return khz; -+ -+ khz = of_to_core_khz(); -+ if (khz) -+ return khz; -+ -+ printk(KERN_WARNING "perfctr: unable to determine CPU speed\n"); -+ return 0; -+} -+ -+static int __init known_init(void) -+{ -+ static char known_name[] __initdata = "PowerPC 60x/7xx/74xx"; -+ unsigned int features; -+ enum pll_type pll_type; -+ unsigned int pvr; -+ int have_mmcr1; -+ -+ features = PERFCTR_FEATURE_RDTSC | PERFCTR_FEATURE_RDPMC; -+ have_mmcr1 = 1; -+ pvr = mfspr(SPRN_PVR); -+ switch (PVR_VER(pvr)) { -+ case 0x0004: /* 604 */ -+ pm_type = PM_604; -+ pll_type = PLL_NONE; -+ features = PERFCTR_FEATURE_RDTSC; -+ have_mmcr1 = 0; -+ break; -+ case 0x0009: /* 604e; */ -+ case 0x000A: /* 604ev */ -+ pm_type = PM_604e; -+ pll_type = PLL_604e; -+ features = PERFCTR_FEATURE_RDTSC; -+ break; -+ case 0x0008: /* 750/740 */ -+ pm_type = PM_750; -+ pll_type = PLL_750; -+ break; -+ case 0x7000: case 0x7001: /* IBM750FX */ -+ case 0x7002: /* IBM750GX */ -+ pm_type = PM_750; -+ pll_type = PLL_750FX; -+ break; -+ case 0x000C: /* 7400 */ -+ pm_type = PM_7400; -+ pll_type = PLL_7400; -+ break; -+ case 0x800C: /* 7410 */ -+ pm_type = PM_7400; -+ pll_type = PLL_7400; -+ break; -+ case 0x8000: /* 7451/7441 */ -+ pm_type = PM_7450; -+ pll_type = PLL_7450; -+ break; -+ case 0x8001: /* 7455/7445 */ -+ pm_type = PM_7450; -+ pll_type = ((pvr & 0xFFFF) < 0x0303) ? PLL_7450 : PLL_7457; -+ break; -+ case 0x8002: /* 7457/7447 */ -+ pm_type = PM_7450; -+ pll_type = PLL_7457; -+ break; -+ default: -+ return -ENODEV; -+ } -+ perfctr_info.cpu_features = features; -+ perfctr_info.cpu_type = 0; /* user-space should inspect PVR */ -+ perfctr_cpu_name = known_name; -+ perfctr_info.cpu_khz = detect_cpu_khz(pll_type); -+ perfctr_ppc_init_tests(have_mmcr1); -+ return 0; -+} -+ -+static int __init unknown_init(void) -+{ -+ static char unknown_name[] __initdata = "Generic PowerPC with TB"; -+ unsigned int khz; -+ -+ khz = detect_cpu_khz(PLL_NONE); -+ if (!khz) -+ return -ENODEV; -+ perfctr_info.cpu_features = PERFCTR_FEATURE_RDTSC; -+ perfctr_info.cpu_type = 0; -+ perfctr_cpu_name = unknown_name; -+ perfctr_info.cpu_khz = khz; -+ pm_type = PM_NONE; -+ return 0; -+} -+ -+static void perfctr_cpu_clear_one(void *ignore) -+{ -+ /* PREEMPT note: when called via on_each_cpu(), -+ this is in IRQ context with preemption disabled. */ -+ perfctr_cpu_clear_counters(); -+} -+ -+static void perfctr_cpu_reset(void) -+{ -+ on_each_cpu(perfctr_cpu_clear_one, NULL, 1, 1); -+ perfctr_cpu_set_ihandler(NULL); -+} -+ -+int __init perfctr_cpu_init(void) -+{ -+ int err; -+ -+ perfctr_info.cpu_features = 0; -+ -+ err = known_init(); -+ if (err) { -+ err = unknown_init(); -+ if (err) -+ goto out; -+ } -+ -+ perfctr_cpu_reset(); -+ out: -+ return err; -+} -+ -+void __exit perfctr_cpu_exit(void) -+{ -+ perfctr_cpu_reset(); -+} -+ -+/**************************************************************** -+ * * -+ * Hardware reservation. * -+ * * -+ ****************************************************************/ -+ -+static DECLARE_MUTEX(mutex); -+static const char *current_service = 0; -+ -+const char *perfctr_cpu_reserve(const char *service) -+{ -+ const char *ret; -+ -+ down(&mutex); -+ ret = current_service; -+ if (!ret) -+ { -+ current_service = service; -+ __module_get(THIS_MODULE); -+ } -+ up(&mutex); -+ return ret; -+} -+ -+void perfctr_cpu_release(const char *service) -+{ -+ down(&mutex); -+ if (service != current_service) { -+ printk(KERN_ERR "%s: attempt by %s to release while reserved by %s\n", -+ __FUNCTION__, service, current_service); -+ } else { -+ /* power down the counters */ -+ perfctr_cpu_reset(); -+ current_service = 0; -+ module_put(THIS_MODULE); -+ } -+ up(&mutex); -+} -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/virtual.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/virtual.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/virtual.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,1049 @@ -+/* $Id: virtual.c,v 1.88.2.2 2004/10/19 15:23:43 mikpe Exp $ -+ * Virtual per-process performance counters. -+ * -+ * Copyright (C) 1999-2003 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#define __NO_VERSION__ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/compiler.h> /* for unlikely() in 2.4.18 and older */ -+#include <linux/kernel.h> -+#include <linux/mm.h> -+#include <linux/ptrace.h> -+#include <linux/fs.h> -+#include <linux/file.h> -+#include <linux/perfctr.h> -+ -+#include <asm/io.h> -+#include <asm/uaccess.h> -+ -+#include "compat.h" -+#include "virtual.h" -+#include "marshal.h" -+ -+/**************************************************************** -+ * * -+ * Data types and macros. * -+ * * -+ ****************************************************************/ -+ -+struct vperfctr { -+/* User-visible fields: (must be first for mmap()) */ -+ struct perfctr_cpu_state cpu_state; -+/* Kernel-private fields: */ -+ int si_signo; -+ atomic_t count; -+ spinlock_t owner_lock; -+ struct task_struct *owner; -+ /* sampling_timer and bad_cpus_allowed are frequently -+ accessed, so they get to share a cache line */ -+ unsigned int sampling_timer ____cacheline_aligned; -+#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED -+ atomic_t bad_cpus_allowed; -+#endif -+#if 0 && defined(CONFIG_PERFCTR_DEBUG) -+ unsigned start_smp_id; -+ unsigned suspended; -+#endif -+#if PERFCTR_INTERRUPT_SUPPORT -+ unsigned int iresume_cstatus; -+#endif -+}; -+#define IS_RUNNING(perfctr) perfctr_cstatus_enabled((perfctr)->cpu_state.cstatus) -+ -+/* XXX: disabled: called from switch_to() where printk() is disallowed */ -+#if 0 && defined(CONFIG_PERFCTR_DEBUG) -+#define debug_free(perfctr) \ -+do { \ -+ int i; \ -+ for(i = 0; i < PAGE_SIZE/sizeof(int); ++i) \ -+ ((int*)(perfctr))[i] = 0xfedac0ed; \ -+} while( 0 ) -+#define debug_init(perfctr) do { (perfctr)->suspended = 1; } while( 0 ) -+#define debug_suspend(perfctr) \ -+do { \ -+ if( (perfctr)->suspended ) \ -+ printk(KERN_ERR "%s: BUG! suspending non-running perfctr (pid %d, comm %s)\n", \ -+ __FUNCTION__, current->pid, current->comm); \ -+ (perfctr)->suspended = 1; \ -+} while( 0 ) -+#define debug_resume(perfctr) \ -+do { \ -+ if( !(perfctr)->suspended ) \ -+ printk(KERN_ERR "%s: BUG! resuming non-suspended perfctr (pid %d, comm %s)\n", \ -+ __FUNCTION__, current->pid, current->comm); \ -+ (perfctr)->suspended = 0; \ -+} while( 0 ) -+#define debug_check_smp_id(perfctr) \ -+do { \ -+ if( (perfctr)->start_smp_id != smp_processor_id() ) { \ -+ printk(KERN_ERR "%s: BUG! current cpu %u differs from start cpu %u (pid %d, comm %s)\n", \ -+ __FUNCTION__, smp_processor_id(), (perfctr)->start_smp_id, \ -+ current->pid, current->comm); \ -+ return; \ -+ } \ -+} while( 0 ) -+#define debug_set_smp_id(perfctr) \ -+ do { (perfctr)->start_smp_id = smp_processor_id(); } while( 0 ) -+#else /* CONFIG_PERFCTR_DEBUG */ -+#define debug_free(perfctr) do{}while(0) -+#define debug_init(perfctr) do{}while(0) -+#define debug_suspend(perfctr) do{}while(0) -+#define debug_resume(perfctr) do{}while(0) -+#define debug_check_smp_id(perfctr) do{}while(0) -+#define debug_set_smp_id(perfctr) do{}while(0) -+#endif /* CONFIG_PERFCTR_DEBUG */ -+ -+#if PERFCTR_INTERRUPT_SUPPORT -+ -+static void vperfctr_ihandler(unsigned long pc); -+ -+static inline void vperfctr_set_ihandler(void) -+{ -+ perfctr_cpu_set_ihandler(vperfctr_ihandler); -+} -+ -+static inline void vperfctr_clear_iresume_cstatus(struct vperfctr *perfctr) -+{ -+ perfctr->iresume_cstatus = 0; -+} -+ -+#else -+static inline void vperfctr_set_ihandler(void) { } -+static inline void vperfctr_clear_iresume_cstatus(struct vperfctr *perfctr) { } -+#endif -+ -+#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED -+ -+static inline void vperfctr_init_bad_cpus_allowed(struct vperfctr *perfctr) -+{ -+ atomic_set(&perfctr->bad_cpus_allowed, 0); -+} -+ -+/* Concurrent set_cpus_allowed() is possible. The only lock it -+ can take is the task lock, so we have to take it as well. -+ task_lock/unlock also disables/enables preemption. */ -+ -+static inline void vperfctr_task_lock(struct task_struct *p) -+{ -+ task_lock(p); -+} -+ -+static inline void vperfctr_task_unlock(struct task_struct *p) -+{ -+ task_unlock(p); -+} -+ -+#else /* !PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED */ -+ -+static inline void vperfctr_init_bad_cpus_allowed(struct vperfctr *perfctr) { } -+ -+/* Concurrent set_cpus_allowed() is impossible or irrelevant. -+ Disabling and enabling preemption suffices for an atomic region. */ -+ -+static inline void vperfctr_task_lock(struct task_struct *p) -+{ -+ preempt_disable(); -+} -+ -+static inline void vperfctr_task_unlock(struct task_struct *p) -+{ -+ preempt_enable(); -+} -+ -+#endif /* !PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED */ -+ -+/**************************************************************** -+ * * -+ * Resource management. * -+ * * -+ ****************************************************************/ -+ -+/* XXX: perhaps relax this to number of _live_ perfctrs */ -+static DECLARE_MUTEX(nrctrs_mutex); -+static int nrctrs; -+static const char this_service[] = __FILE__; -+ -+static int inc_nrctrs(void) -+{ -+ const char *other; -+ -+ other = NULL; -+ down(&nrctrs_mutex); -+ if( ++nrctrs == 1 ) { -+ other = perfctr_cpu_reserve(this_service); -+ if( other ) -+ nrctrs = 0; -+ } -+ up(&nrctrs_mutex); -+ if( other ) { -+ printk(KERN_ERR __FILE__ -+ ": cannot operate, perfctr hardware taken by '%s'\n", -+ other); -+ return -EBUSY; -+ } -+ vperfctr_set_ihandler(); -+ return 0; -+} -+ -+static void dec_nrctrs(void) -+{ -+ down(&nrctrs_mutex); -+ if( --nrctrs == 0 ) -+ perfctr_cpu_release(this_service); -+ up(&nrctrs_mutex); -+} -+ -+static struct vperfctr *vperfctr_alloc(void) -+{ -+ unsigned long page; -+ -+ if( inc_nrctrs() != 0 ) -+ return ERR_PTR(-EBUSY); -+ page = get_zeroed_page(GFP_KERNEL); -+ if( !page ) { -+ dec_nrctrs(); -+ return ERR_PTR(-ENOMEM); -+ } -+ SetPageReserved(virt_to_page(page)); -+ return (struct vperfctr*) page; -+} -+ -+static void vperfctr_free(struct vperfctr *perfctr) -+{ -+ debug_free(perfctr); -+ ClearPageReserved(virt_to_page(perfctr)); -+ free_page((unsigned long)perfctr); -+ dec_nrctrs(); -+} -+ -+static struct vperfctr *get_empty_vperfctr(void) -+{ -+ struct vperfctr *perfctr = vperfctr_alloc(); -+ if( !IS_ERR(perfctr) ) { -+ atomic_set(&perfctr->count, 1); -+ vperfctr_init_bad_cpus_allowed(perfctr); -+ spin_lock_init(&perfctr->owner_lock); -+ debug_init(perfctr); -+ } -+ return perfctr; -+} -+ -+static void put_vperfctr(struct vperfctr *perfctr) -+{ -+ if( atomic_dec_and_test(&perfctr->count) ) -+ vperfctr_free(perfctr); -+} -+ -+/**************************************************************** -+ * * -+ * Basic counter operations. * -+ * These must all be called by the owner process only. * -+ * These must all be called with preemption disabled. * -+ * * -+ ****************************************************************/ -+ -+/* PRE: IS_RUNNING(perfctr) -+ * Suspend the counters. -+ * XXX: When called from switch_to(), perfctr belongs to 'prev' -+ * but current is 'next'. Debug messages will refer to 'next'... -+ */ -+static inline void vperfctr_suspend(struct vperfctr *perfctr) -+{ -+ debug_suspend(perfctr); -+ debug_check_smp_id(perfctr); -+ perfctr_cpu_suspend(&perfctr->cpu_state); -+} -+ -+static inline void vperfctr_reset_sampling_timer(struct vperfctr *perfctr) -+{ -+ /* XXX: base the value on perfctr_info.cpu_khz instead! */ -+ perfctr->sampling_timer = HZ/2; -+} -+ -+/* PRE: perfctr == current->thread.perfctr && IS_RUNNING(perfctr) -+ * Restart the counters. -+ */ -+static inline void vperfctr_resume(struct vperfctr *perfctr) -+{ -+ debug_resume(perfctr); -+ perfctr_cpu_resume(&perfctr->cpu_state); -+ vperfctr_reset_sampling_timer(perfctr); -+ debug_set_smp_id(perfctr); -+} -+ -+/* Sample the counters but do not suspend them. */ -+static void vperfctr_sample(struct vperfctr *perfctr) -+{ -+ if( IS_RUNNING(perfctr) ) { -+ debug_check_smp_id(perfctr); -+ perfctr_cpu_sample(&perfctr->cpu_state); -+ vperfctr_reset_sampling_timer(perfctr); -+ } -+} -+ -+#if PERFCTR_INTERRUPT_SUPPORT -+/* vperfctr interrupt handler (XXX: add buffering support) */ -+/* PREEMPT note: called in IRQ context with preemption disabled. */ -+static void vperfctr_ihandler(unsigned long pc) -+{ -+ struct task_struct *tsk = current; -+ struct vperfctr *perfctr; -+ unsigned int pmc_mask; -+ siginfo_t si; -+ -+ perfctr = tsk->thread.perfctr; -+ if( !perfctr ) { -+ printk(KERN_ERR "%s: BUG! pid %d has no vperfctr\n", -+ __FUNCTION__, tsk->pid); -+ return; -+ } -+ if( !perfctr_cstatus_has_ictrs(perfctr->cpu_state.cstatus) ) { -+ printk(KERN_ERR "%s: BUG! vperfctr has cstatus %#x (pid %d, comm %s)\n", -+ __FUNCTION__, perfctr->cpu_state.cstatus, tsk->pid, tsk->comm); -+ return; -+ } -+ vperfctr_suspend(perfctr); -+ pmc_mask = perfctr_cpu_identify_overflow(&perfctr->cpu_state); -+ if( !pmc_mask ) { -+ printk(KERN_ERR "%s: BUG! pid %d has unidentifiable overflow source\n", -+ __FUNCTION__, tsk->pid); -+ return; -+ } -+ /* suspend a-mode and i-mode PMCs, leaving only TSC on */ -+ /* XXX: some people also want to suspend the TSC */ -+ perfctr->iresume_cstatus = perfctr->cpu_state.cstatus; -+ if( perfctr_cstatus_has_tsc(perfctr->iresume_cstatus) ) { -+ perfctr->cpu_state.cstatus = perfctr_mk_cstatus(1, 0, 0); -+ vperfctr_resume(perfctr); -+ } else -+ perfctr->cpu_state.cstatus = 0; -+ si.si_signo = perfctr->si_signo; -+ si.si_errno = 0; -+ si.si_code = SI_PMC_OVF; -+ si.si_pmc_ovf_mask = pmc_mask; -+ if( !send_sig_info(si.si_signo, &si, tsk) ) -+ send_sig(si.si_signo, tsk, 1); -+} -+#endif -+ -+/**************************************************************** -+ * * -+ * Process management operations. * -+ * These must all, with the exception of vperfctr_unlink() * -+ * and __vperfctr_set_cpus_allowed(), be called by the owner * -+ * process only. * -+ * * -+ ****************************************************************/ -+ -+/* Called from exit_thread() or sys_vperfctr_unlink(). -+ * If the counters are running, stop them and sample their final values. -+ * Detach the vperfctr object from its owner task. -+ * PREEMPT note: exit_thread() does not run with preemption disabled. -+ */ -+static void vperfctr_unlink(struct task_struct *owner, struct vperfctr *perfctr) -+{ -+ /* this synchronises with vperfctr_ioctl() */ -+ spin_lock(&perfctr->owner_lock); -+ perfctr->owner = NULL; -+ spin_unlock(&perfctr->owner_lock); -+ -+ /* perfctr suspend+detach must be atomic wrt process suspend */ -+ /* this also synchronises with perfctr_set_cpus_allowed() */ -+ vperfctr_task_lock(owner); -+ if( IS_RUNNING(perfctr) && owner == current ) -+ vperfctr_suspend(perfctr); -+ owner->thread.perfctr = NULL; -+ vperfctr_task_unlock(owner); -+ -+ perfctr->cpu_state.cstatus = 0; -+ vperfctr_clear_iresume_cstatus(perfctr); -+ put_vperfctr(perfctr); -+} -+ -+void __vperfctr_exit(struct vperfctr *perfctr) -+{ -+ vperfctr_unlink(current, perfctr); -+} -+ -+/* schedule() --> switch_to() --> .. --> __vperfctr_suspend(). -+ * If the counters are running, suspend them. -+ * PREEMPT note: switch_to() runs with preemption disabled. -+ */ -+void __vperfctr_suspend(struct vperfctr *perfctr) -+{ -+ if( IS_RUNNING(perfctr) ) -+ vperfctr_suspend(perfctr); -+} -+ -+/* schedule() --> switch_to() --> .. --> __vperfctr_resume(). -+ * PRE: perfctr == current->thread.perfctr -+ * If the counters are runnable, resume them. -+ * PREEMPT note: switch_to() runs with preemption disabled. -+ */ -+void __vperfctr_resume(struct vperfctr *perfctr) -+{ -+ if( IS_RUNNING(perfctr) ) { -+#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED -+ if( unlikely(atomic_read(&perfctr->bad_cpus_allowed)) && -+ perfctr_cstatus_nrctrs(perfctr->cpu_state.cstatus) ) { -+ perfctr->cpu_state.cstatus = 0; -+ vperfctr_clear_iresume_cstatus(perfctr); -+ BUG_ON(current->state != TASK_RUNNING); -+ send_sig(SIGILL, current, 1); -+ return; -+ } -+#endif -+ vperfctr_resume(perfctr); -+ } -+} -+ -+/* Called from update_one_process() [triggered by timer interrupt]. -+ * PRE: perfctr == current->thread.perfctr. -+ * Sample the counters but do not suspend them. -+ * Needed to avoid precision loss due to multiple counter -+ * wraparounds between resume/suspend for CPU-bound processes. -+ * PREEMPT note: called in IRQ context with preemption disabled. -+ */ -+void __vperfctr_sample(struct vperfctr *perfctr) -+{ -+ if( --perfctr->sampling_timer == 0 ) -+ vperfctr_sample(perfctr); -+} -+ -+#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED -+/* Called from set_cpus_allowed(). -+ * PRE: current holds task_lock(owner) -+ * PRE: owner->thread.perfctr == perfctr -+ */ -+void __vperfctr_set_cpus_allowed(struct task_struct *owner, -+ struct vperfctr *perfctr, -+ cpumask_t new_mask) -+{ -+ cpumask_t tmp; -+ -+ cpus_and(tmp, new_mask, perfctr_cpus_forbidden_mask); -+ if( !cpus_empty(tmp) ) { -+ atomic_set(&perfctr->bad_cpus_allowed, 1); -+ printk(KERN_WARNING "perfctr: process %d (comm %s) issued unsafe" -+ " set_cpus_allowed() on process %d (comm %s)\n", -+ current->pid, current->comm, owner->pid, owner->comm); -+ } else -+ atomic_set(&perfctr->bad_cpus_allowed, 0); -+} -+#endif -+ -+/**************************************************************** -+ * * -+ * Virtual perfctr "system calls". * -+ * These can be called by the owner process (tsk == current), * -+ * a monitor process which has the owner under ptrace ATTACH * -+ * control (tsk && tsk != current), or anyone with a handle to * -+ * an unlinked perfctr (!tsk). * -+ * * -+ ****************************************************************/ -+ -+static int sys_vperfctr_control(struct vperfctr *perfctr, -+ struct perfctr_struct_buf *argp, -+ struct task_struct *tsk) -+{ -+ struct vperfctr_control control; -+ int err; -+ unsigned int next_cstatus; -+ unsigned int nrctrs, i; -+ -+ if( !tsk ) -+ return -ESRCH; /* attempt to update unlinked perfctr */ -+ -+ err = perfctr_copy_from_user(&control, argp, &vperfctr_control_sdesc); -+ if( err ) -+ return err; -+ -+ if( control.cpu_control.nractrs || control.cpu_control.nrictrs ) { -+ cpumask_t old_mask, new_mask; -+ -+ old_mask = tsk->cpus_allowed; -+ cpus_andnot(new_mask, old_mask, perfctr_cpus_forbidden_mask); -+ -+ if( cpus_empty(new_mask) ) -+ return -EINVAL; -+ if( !cpus_equal(new_mask, old_mask) ) -+ set_cpus_allowed(tsk, new_mask); -+ } -+ -+ /* PREEMPT note: preemption is disabled over the entire -+ region since we're updating an active perfctr. */ -+ preempt_disable(); -+ if( IS_RUNNING(perfctr) ) { -+ if( tsk == current ) -+ vperfctr_suspend(perfctr); -+ perfctr->cpu_state.cstatus = 0; -+ vperfctr_clear_iresume_cstatus(perfctr); -+ } -+ perfctr->cpu_state.control = control.cpu_control; -+ /* remote access note: perfctr_cpu_update_control() is ok */ -+ err = perfctr_cpu_update_control(&perfctr->cpu_state, 0); -+ if( err < 0 ) -+ goto out; -+ next_cstatus = perfctr->cpu_state.cstatus; -+ if( !perfctr_cstatus_enabled(next_cstatus) ) -+ goto out; -+ -+ /* XXX: validate si_signo? */ -+ perfctr->si_signo = control.si_signo; -+ -+ if( !perfctr_cstatus_has_tsc(next_cstatus) ) -+ perfctr->cpu_state.tsc_sum = 0; -+ -+ nrctrs = perfctr_cstatus_nrctrs(next_cstatus); -+ for(i = 0; i < nrctrs; ++i) -+ if( !(control.preserve & (1<<i)) ) -+ perfctr->cpu_state.pmc[i].sum = 0; -+ -+ if( tsk == current ) -+ vperfctr_resume(perfctr); -+ -+ out: -+ preempt_enable(); -+ return err; -+} -+ -+static int sys_vperfctr_iresume(struct vperfctr *perfctr, const struct task_struct *tsk) -+{ -+#if PERFCTR_INTERRUPT_SUPPORT -+ unsigned int iresume_cstatus; -+ -+ if( !tsk ) -+ return -ESRCH; /* attempt to update unlinked perfctr */ -+ -+ iresume_cstatus = perfctr->iresume_cstatus; -+ if( !perfctr_cstatus_has_ictrs(iresume_cstatus) ) -+ return -EPERM; -+ -+ /* PREEMPT note: preemption is disabled over the entire -+ region because we're updating an active perfctr. */ -+ preempt_disable(); -+ -+ if( IS_RUNNING(perfctr) && tsk == current ) -+ vperfctr_suspend(perfctr); -+ -+ perfctr->cpu_state.cstatus = iresume_cstatus; -+ perfctr->iresume_cstatus = 0; -+ -+ /* remote access note: perfctr_cpu_ireload() is ok */ -+ perfctr_cpu_ireload(&perfctr->cpu_state); -+ -+ if( tsk == current ) -+ vperfctr_resume(perfctr); -+ -+ preempt_enable(); -+ -+ return 0; -+#else -+ return -ENOSYS; -+#endif -+} -+ -+static int sys_vperfctr_unlink(struct vperfctr *perfctr, struct task_struct *tsk) -+{ -+ if( tsk ) -+ vperfctr_unlink(tsk, perfctr); -+ return 0; -+} -+ -+static int sys_vperfctr_read_sum(struct vperfctr *perfctr, -+ struct perfctr_struct_buf *argp, -+ const struct task_struct *tsk) -+{ -+ struct perfctr_sum_ctrs sum; -+ -+ if( tsk == current ) { -+ preempt_disable(); -+ vperfctr_sample(perfctr); -+ } -+ //sum = perfctr->cpu_state.sum; -+ { -+ int j; -+ sum.tsc = perfctr->cpu_state.tsc_sum; -+ for(j = 0; j < ARRAY_SIZE(sum.pmc); ++j) -+ sum.pmc[j] = perfctr->cpu_state.pmc[j].sum; -+ } -+ if( tsk == current ) -+ preempt_enable(); -+ return perfctr_copy_to_user(argp, &sum, &perfctr_sum_ctrs_sdesc); -+} -+ -+static int sys_vperfctr_read_control(struct vperfctr *perfctr, -+ struct perfctr_struct_buf *argp, -+ const struct task_struct *tsk) -+{ -+ struct vperfctr_control control; -+ -+ /* PREEMPT note: While we're reading our own control, another -+ process may ptrace ATTACH to us and update our control. -+ Disable preemption to ensure we get a consistent copy. -+ Not needed for other cases since the perfctr is either -+ unlinked or its owner is ptrace ATTACH suspended by us. */ -+ if( tsk == current ) -+ preempt_disable(); -+ control.si_signo = perfctr->si_signo; -+ control.cpu_control = perfctr->cpu_state.control; -+ if( tsk == current ) -+ preempt_enable(); -+ control.preserve = 0; -+ return perfctr_copy_to_user(argp, &control, &vperfctr_control_sdesc); -+} -+ -+/**************************************************************** -+ * * -+ * Virtual perfctr file operations. * -+ * * -+ ****************************************************************/ -+ -+static int vperfctr_mmap(struct file *filp, struct vm_area_struct *vma) -+{ -+ struct vperfctr *perfctr; -+ -+ /* Only allow read-only mapping of first page. */ -+ if( (vma->vm_end - vma->vm_start) != PAGE_SIZE || -+ vma->vm_pgoff != 0 || -+ (pgprot_val(vma->vm_page_prot) & _PAGE_RW) || -+ (vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) ) -+ return -EPERM; -+ perfctr = filp->private_data; -+ if( !perfctr ) -+ return -EPERM; -+ return remap_page_range(vma, vma->vm_start, virt_to_phys(perfctr), -+ PAGE_SIZE, vma->vm_page_prot); -+} -+ -+static int vperfctr_release(struct inode *inode, struct file *filp) -+{ -+ struct vperfctr *perfctr = filp->private_data; -+ filp->private_data = NULL; -+ if( perfctr ) -+ put_vperfctr(perfctr); -+ return 0; -+} -+ -+static int vperfctr_ioctl(struct inode *inode, struct file *filp, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct vperfctr *perfctr; -+ struct task_struct *tsk; -+ int ret; -+ -+ switch( cmd ) { -+ case PERFCTR_ABI: -+ return sys_perfctr_abi((unsigned int*)arg); -+ case PERFCTR_INFO: -+ return sys_perfctr_info((struct perfctr_struct_buf*)arg); -+ case PERFCTR_CPUS: -+ return sys_perfctr_cpus((struct perfctr_cpu_mask*)arg); -+ case PERFCTR_CPUS_FORBIDDEN: -+ return sys_perfctr_cpus_forbidden((struct perfctr_cpu_mask*)arg); -+ } -+ perfctr = filp->private_data; -+ if( !perfctr ) -+ return -EINVAL; -+ tsk = current; -+ if( perfctr != current->thread.perfctr ) { -+ /* this synchronises with vperfctr_unlink() and itself */ -+ spin_lock(&perfctr->owner_lock); -+ tsk = perfctr->owner; -+ if( tsk ) -+ get_task_struct(tsk); -+ spin_unlock(&perfctr->owner_lock); -+ if( tsk ) { -+ ret = ptrace_check_attach(tsk, 0); -+ if( ret < 0 ) -+ goto out; -+ } -+ } -+ switch( cmd ) { -+ case VPERFCTR_CONTROL: -+ ret = sys_vperfctr_control(perfctr, (struct perfctr_struct_buf*)arg, tsk); -+ break; -+ case VPERFCTR_UNLINK: -+ ret = sys_vperfctr_unlink(perfctr, tsk); -+ break; -+ case VPERFCTR_READ_SUM: -+ ret = sys_vperfctr_read_sum(perfctr, (struct perfctr_struct_buf*)arg, tsk); -+ break; -+ case VPERFCTR_IRESUME: -+ ret = sys_vperfctr_iresume(perfctr, tsk); -+ break; -+ case VPERFCTR_READ_CONTROL: -+ ret = sys_vperfctr_read_control(perfctr, (struct perfctr_struct_buf*)arg, tsk); -+ break; -+ default: -+ ret = -EINVAL; -+ } -+ out: -+ if( tsk && tsk != current ) -+ put_task_struct(tsk); -+ return ret; -+} -+ -+static struct file_operations vperfctr_file_ops = { -+ .owner = THIS_MODULE, -+ .mmap = vperfctr_mmap, -+ .release = vperfctr_release, -+ .ioctl = vperfctr_ioctl, -+}; -+ -+/**************************************************************** -+ * * -+ * File system for virtual perfctrs. Based on pipefs. * -+ * * -+ ****************************************************************/ -+ -+#define VPERFCTRFS_MAGIC (('V'<<24)|('P'<<16)|('M'<<8)|('C')) -+ -+/* The code to set up a `struct file_system_type' for a pseudo fs -+ is unfortunately not the same in 2.4 and 2.6. */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -+#include <linux/mount.h> /* needed for 2.6, included by fs.h in 2.4 */ -+ -+/* 2.6 doesn't EXPORT_SYMBOL() fs/libfs.c:get_sb_pseudo(). -+ This is a verbatim copy, only renamed. */ -+#ifdef MODULE -+static -+struct super_block * -+perfctr_get_sb_pseudo(struct file_system_type *fs_type, char *name, -+ struct super_operations *ops, unsigned long magic) -+{ -+ struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); -+ static struct super_operations default_ops = {.statfs = simple_statfs}; -+ struct dentry *dentry; -+ struct inode *root; -+ struct qstr d_name = {.name = name, .len = strlen(name)}; -+ -+ if (IS_ERR(s)) -+ return s; -+ -+ s->s_flags = MS_NOUSER; -+ s->s_maxbytes = ~0ULL; -+ s->s_blocksize = 1024; -+ s->s_blocksize_bits = 10; -+ s->s_magic = magic; -+ s->s_op = ops ? ops : &default_ops; -+ root = new_inode(s); -+ if (!root) -+ goto Enomem; -+ root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; -+ root->i_uid = root->i_gid = 0; -+ root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; -+ dentry = d_alloc(NULL, &d_name); -+ if (!dentry) { -+ iput(root); -+ goto Enomem; -+ } -+ dentry->d_sb = s; -+ dentry->d_parent = dentry; -+ d_instantiate(dentry, root); -+ s->s_root = dentry; -+ s->s_flags |= MS_ACTIVE; -+ return s; -+ -+Enomem: -+ up_write(&s->s_umount); -+ deactivate_super(s); -+ return ERR_PTR(-ENOMEM); -+} -+#undef get_sb_pseudo -+#define get_sb_pseudo perfctr_get_sb_pseudo -+#endif /* MODULE */ -+ -+static struct super_block * -+vperfctrfs_get_sb(struct file_system_type *fs_type, -+ int flags, const char *dev_name, void *data) -+{ -+ return get_sb_pseudo(fs_type, "vperfctr:", NULL, VPERFCTRFS_MAGIC); -+} -+ -+static struct file_system_type vperfctrfs_type = { -+ .name = "vperfctrfs", -+ .get_sb = vperfctrfs_get_sb, -+ .kill_sb = kill_anon_super, -+}; -+ -+#else /* 2.4 */ -+ -+static int vperfctrfs_statfs(struct super_block *sb, struct statfs *buf) -+{ -+ buf->f_type = VPERFCTRFS_MAGIC; -+ buf->f_bsize = 1024; -+ buf->f_namelen = 255; -+ return 0; -+} -+ -+static struct super_operations vperfctrfs_ops = { -+ .statfs = vperfctrfs_statfs, -+}; -+ -+static struct super_block* -+vperfctrfs_read_super(struct super_block *sb, void *data, int silent) -+{ -+ static const struct qstr d_name = { "vperfctrfs:", 11, 0 }; -+ struct dentry *dentry; -+ struct inode *root; -+ -+ root = new_inode(sb); -+ if( !root ) -+ return NULL; -+ root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; -+ root->i_uid = root->i_gid = 0; -+ root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; -+ sb->s_blocksize = 1024; -+ sb->s_blocksize_bits = 10; -+ sb->s_magic = VPERFCTRFS_MAGIC; -+ sb->s_op = &vperfctrfs_ops; /* XXX: check if 2.4 really needs this */ -+ sb->s_root = dentry = d_alloc(NULL, &d_name); -+ if( !dentry ) { -+ iput(root); -+ return NULL; -+ } -+ dentry->d_sb = sb; -+ dentry->d_parent = dentry; -+ d_instantiate(dentry, root); -+ return sb; -+} -+ -+/* DECLARE_FSTYPE() hides 'owner: THIS_MODULE'. kern_mount() increments -+ owner's use count, and since we're not unmountable from user-space, -+ the module can't be unloaded because it's use count is >= 1. -+ So we declare the file_system_type manually without the owner field. */ -+static struct file_system_type vperfctrfs_type = { -+ .name = "vperfctrfs", -+ .read_super = vperfctrfs_read_super, -+ .fs_flags = FS_NOMOUNT, -+}; -+ -+#endif /* 2.4 */ -+ -+/* XXX: check if s/vperfctr_mnt/vperfctrfs_type.kern_mnt/ would work */ -+static struct vfsmount *vperfctr_mnt; -+ -+static int __init vperfctrfs_init(void) -+{ -+ int err = register_filesystem(&vperfctrfs_type); -+ if( !err ) { -+ vperfctr_mnt = kern_mount(&vperfctrfs_type); -+ if( !IS_ERR(vperfctr_mnt) ) -+ return 0; -+ err = PTR_ERR(vperfctr_mnt); -+ unregister_filesystem(&vperfctrfs_type); -+ } -+ return err; -+} -+ -+static void __exit vperfctrfs_exit(void) -+{ -+ unregister_filesystem(&vperfctrfs_type); -+ mntput(vperfctr_mnt); -+} -+ -+static struct inode *vperfctr_get_inode(void) -+{ -+ struct inode *inode; -+ -+ inode = new_inode(vperfctr_mnt->mnt_sb); -+ if( !inode ) -+ return NULL; -+ inode->i_fop = &vperfctr_file_ops; -+ inode->i_state = I_DIRTY; -+ inode->i_mode = S_IFCHR | S_IRUSR | S_IWUSR; -+ inode->i_uid = current->fsuid; -+ inode->i_gid = current->fsgid; -+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; -+ inode->i_blksize = 0; -+ return inode; -+} -+ -+static int vperfctrfs_delete_dentry(struct dentry *dentry) -+{ -+ return 1; -+} -+ -+static struct dentry_operations vperfctrfs_dentry_operations = { -+ .d_delete = vperfctrfs_delete_dentry, -+}; -+ -+static struct dentry *vperfctr_d_alloc_root(struct inode *inode) -+{ -+ struct qstr this; -+ char name[32]; -+ struct dentry *dentry; -+ -+ sprintf(name, "[%lu]", inode->i_ino); -+ this.name = name; -+ this.len = strlen(name); -+ this.hash = inode->i_ino; /* will go */ -+ dentry = d_alloc(vperfctr_mnt->mnt_sb->s_root, &this); -+ if( dentry ) { -+ dentry->d_op = &vperfctrfs_dentry_operations; -+ d_add(dentry, inode); -+ } -+ return dentry; -+} -+ -+static struct file *vperfctr_get_filp(void) -+{ -+ struct file *filp; -+ struct inode *inode; -+ struct dentry *dentry; -+ -+ filp = get_empty_filp(); -+ if( !filp ) -+ goto out; -+ inode = vperfctr_get_inode(); -+ if( !inode ) -+ goto out_filp; -+ dentry = vperfctr_d_alloc_root(inode); -+ if( !dentry ) -+ goto out_inode; -+ -+ filp->f_vfsmnt = mntget(vperfctr_mnt); -+ filp->f_dentry = dentry; -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,2) -+ filp->f_mapping = dentry->d_inode->i_mapping; -+#endif -+ -+ filp->f_pos = 0; -+ filp->f_flags = 0; -+ filp->f_op = fops_get(&vperfctr_file_ops); /* fops_get() for MODULE */ -+ filp->f_mode = FMODE_READ; -+ filp->f_version = 0; -+ -+ return filp; -+ -+ out_inode: -+ iput(inode); -+ out_filp: -+ put_filp(filp); /* doesn't run ->release() like fput() does */ -+ out: -+ return NULL; -+} -+ -+/* tid is the actual task/thread id (née pid, stored as ->pid), -+ pid/tgid is that 2.6 thread group id crap (stored as ->tgid) */ -+int vperfctr_attach(int tid, int creat) -+{ -+ struct file *filp; -+ struct task_struct *tsk; -+ struct vperfctr *perfctr; -+ int err; -+ int fd; -+ -+ filp = vperfctr_get_filp(); -+ if( !filp ) -+ return -ENOMEM; -+ err = fd = get_unused_fd(); -+ if( err < 0 ) -+ goto err_filp; -+ perfctr = NULL; -+ if( creat ) { -+ perfctr = get_empty_vperfctr(); /* may sleep */ -+ if( IS_ERR(perfctr) ) { -+ err = PTR_ERR(perfctr); -+ goto err_fd; -+ } -+ } -+ tsk = current; -+ if( tid != 0 && tid != tsk->pid ) { /* remote? */ -+ read_lock(&tasklist_lock); -+ tsk = find_task_by_pid(tid); -+ if( tsk ) -+ get_task_struct(tsk); -+ read_unlock(&tasklist_lock); -+ err = -ESRCH; -+ if( !tsk ) -+ goto err_perfctr; -+ err = ptrace_check_attach(tsk, 0); -+ if( err < 0 ) -+ goto err_tsk; -+ } -+ if( creat ) { -+ /* check+install must be atomic to prevent remote-control races */ -+ vperfctr_task_lock(tsk); -+ if( !tsk->thread.perfctr ) { -+ perfctr->owner = tsk; -+ tsk->thread.perfctr = perfctr; -+ err = 0; -+ } else -+ err = -EEXIST; -+ vperfctr_task_unlock(tsk); -+ if( err ) -+ goto err_tsk; -+ } else { -+ perfctr = tsk->thread.perfctr; -+ /* PERFCTR_ABI and PERFCTR_INFO don't need the perfctr. -+ Hence no non-NULL check here. */ -+ } -+ filp->private_data = perfctr; -+ if( perfctr ) -+ atomic_inc(&perfctr->count); -+ if( tsk != current ) -+ put_task_struct(tsk); -+ fd_install(fd, filp); -+ return fd; -+ err_tsk: -+ if( tsk != current ) -+ put_task_struct(tsk); -+ err_perfctr: -+ if( perfctr ) /* can only occur if creat != 0 */ -+ put_vperfctr(perfctr); -+ err_fd: -+ put_unused_fd(fd); -+ err_filp: -+ fput(filp); -+ return err; -+} -+ -+/**************************************************************** -+ * * -+ * module_init/exit * -+ * * -+ ****************************************************************/ -+ -+#ifdef MODULE -+static struct vperfctr_stub off; -+ -+static void vperfctr_stub_init(void) -+{ -+ off = vperfctr_stub; -+ vperfctr_stub.owner = THIS_MODULE; -+ vperfctr_stub.exit = __vperfctr_exit; -+ vperfctr_stub.suspend = __vperfctr_suspend; -+ vperfctr_stub.resume = __vperfctr_resume; -+ vperfctr_stub.sample = __vperfctr_sample; -+#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED -+ vperfctr_stub.set_cpus_allowed = __vperfctr_set_cpus_allowed; -+#endif -+} -+ -+static void vperfctr_stub_exit(void) -+{ -+ vperfctr_stub = off; -+} -+#else -+static inline void vperfctr_stub_init(void) { } -+static inline void vperfctr_stub_exit(void) { } -+#endif /* MODULE */ -+ -+int __init vperfctr_init(void) -+{ -+ int err = vperfctrfs_init(); -+ if( err ) -+ return err; -+ vperfctr_stub_init(); -+ return 0; -+} -+ -+void __exit vperfctr_exit(void) -+{ -+ vperfctrfs_exit(); -+ vperfctr_stub_exit(); -+} -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_tests.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_tests.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_tests.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,30 @@ -+/* $Id: x86_tests.h,v 1.8.2.2 2004/08/02 15:53:19 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * Optional x86/x86_64-specific init-time tests. -+ * -+ * Copyright (C) 1999-2004 Mikael Pettersson -+ */ -+ -+/* 'enum perfctr_x86_tests_type' classifies CPUs according -+ to relevance for perfctr_x86_init_tests(). */ -+enum perfctr_x86_tests_type { -+ PTT_UNKNOWN, -+ PTT_GENERIC, -+ PTT_P5, -+ PTT_P6, -+ PTT_P4, -+ PTT_AMD, -+ PTT_WINCHIP, -+ PTT_VC3, -+}; -+ -+extern enum perfctr_x86_tests_type perfctr_x86_tests_type; -+ -+static inline void perfctr_set_tests_type(enum perfctr_x86_tests_type t) -+{ -+#ifdef CONFIG_PERFCTR_INIT_TESTS -+ perfctr_x86_tests_type = t; -+#endif -+} -+ -+extern void perfctr_x86_init_tests(void); -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_setup.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/ppc_setup.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_setup.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,40 @@ -+/* $Id: ppc_setup.c,v 1.1 2004/01/12 01:59:11 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * PPC32-specific kernel-resident code. -+ * -+ * Copyright (C) 2004 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/kernel.h> -+#include <linux/sched.h> -+#include <linux/interrupt.h> -+#include <asm/processor.h> -+#include <asm/perfctr.h> -+#include "ppc_compat.h" -+#include "compat.h" -+ -+#if PERFCTR_INTERRUPT_SUPPORT -+static void perfctr_default_ihandler(unsigned long pc) -+{ -+} -+ -+static perfctr_ihandler_t perfctr_ihandler = perfctr_default_ihandler; -+ -+void do_perfctr_interrupt(struct pt_regs *regs) -+{ -+ preempt_disable(); -+ (*perfctr_ihandler)(regs->nip); -+ preempt_enable_no_resched(); -+} -+ -+void perfctr_cpu_set_ihandler(perfctr_ihandler_t ihandler) -+{ -+ perfctr_ihandler = ihandler ? ihandler : perfctr_default_ihandler; -+} -+ -+#ifdef CONFIG_PERFCTR_MODULE -+EXPORT_SYMBOL(perfctr_cpu_set_ihandler); -+#endif /* MODULE */ -+#endif /* PERFCTR_INTERRUPT_SUPPORT */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_tests.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/ppc_tests.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_tests.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,12 @@ -+/* $Id: ppc_tests.h,v 1.1.2.1 2004/06/21 22:33:35 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * Optional PPC32-specific init-time tests. -+ * -+ * Copyright (C) 2004 Mikael Pettersson -+ */ -+ -+#ifdef CONFIG_PERFCTR_INIT_TESTS -+extern void perfctr_ppc_init_tests(int have_mmcr1); -+#else -+static inline void perfctr_ppc_init_tests(int have_mmcr1) { } -+#endif -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/version.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/version.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/version.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1 @@ -+#define VERSION "2.6.10.2" -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_tests.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_64_tests.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_tests.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,174 @@ -+/* $Id: x86_64_tests.c,v 1.3 2004/02/21 11:04:46 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * Optional x86_64-specific init-time tests. -+ * -+ * Copyright (C) 2003-2004 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#define __NO_VERSION__ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/sched.h> -+#include <linux/fs.h> -+#include <linux/perfctr.h> -+#include <asm/msr.h> -+#include <asm/fixmap.h> -+#include <asm/apic.h> -+#include "x86_64_compat.h" -+#include "x86_64_tests.h" -+ -+#define MSR_K8_EVNTSEL0 0xC0010000 -+#define MSR_K8_PERFCTR0 0xC0010004 -+#define K8_EVNTSEL0_VAL (0xC0 | (3<<16) | (1<<22)) -+ -+#define NITER 64 -+#define X2(S) S";"S -+#define X8(S) X2(X2(X2(S))) -+ -+static void __init do_rdpmc(unsigned pmc, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("rdpmc") : : "c"(pmc) : "eax", "edx"); -+} -+ -+static void __init do_rdmsr(unsigned msr, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("rdmsr") : : "c"(msr) : "eax", "edx"); -+} -+ -+static void __init do_wrmsr(unsigned msr, unsigned data) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("wrmsr") : : "c"(msr), "a"(data), "d"(0)); -+} -+ -+static void __init do_rdcr4(unsigned unused1, unsigned unused2) -+{ -+ unsigned i; -+ unsigned long dummy; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("movq %%cr4,%0") : "=r"(dummy)); -+} -+ -+static void __init do_wrcr4(unsigned cr4, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("movq %0,%%cr4") : : "r"((long)cr4)); -+} -+ -+static void __init do_rdtsc(unsigned unused1, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__(X8("rdtsc") : : : "eax", "edx"); -+} -+ -+static void __init do_wrlvtpc(unsigned val, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) { -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ apic_write(APIC_LVTPC, val); -+ } -+} -+ -+static void __init do_empty_loop(unsigned unused1, unsigned unused2) -+{ -+ unsigned i; -+ for(i = 0; i < NITER/8; ++i) -+ __asm__ __volatile__("" : : "c"(0)); -+} -+ -+static unsigned __init run(void (*doit)(unsigned, unsigned), -+ unsigned arg1, unsigned arg2) -+{ -+ unsigned start, dummy, stop; -+ rdtsc(start, dummy); -+ (*doit)(arg1, arg2); /* should take < 2^32 cycles to complete */ -+ rdtsc(stop, dummy); -+ return stop - start; -+} -+ -+static void __init init_tests_message(void) -+{ -+ printk(KERN_INFO "Please email the following PERFCTR INIT lines " -+ "to mikpe@csd.uu.se\n" -+ KERN_INFO "To remove this message, rebuild the driver " -+ "with CONFIG_PERFCTR_INIT_TESTS=n\n"); -+ printk(KERN_INFO "PERFCTR INIT: vendor %u, family %u, model %u, stepping %u, clock %u kHz\n", -+ current_cpu_data.x86_vendor, -+ current_cpu_data.x86, -+ current_cpu_data.x86_model, -+ current_cpu_data.x86_mask, -+ perfctr_cpu_khz()); -+} -+ -+static void __init -+measure_overheads(unsigned msr_evntsel0, unsigned evntsel0, unsigned msr_perfctr0) -+{ -+ int i; -+ unsigned int loop, ticks[9]; -+ const char *name[9]; -+ -+ if( msr_evntsel0 ) -+ wrmsr(msr_evntsel0, 0, 0); -+ -+ name[0] = "rdtsc"; -+ ticks[0] = run(do_rdtsc, 0, 0); -+ name[1] = "rdpmc"; -+ ticks[1] = (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) -+ ? run(do_rdpmc,1,0) : 0; -+ name[2] = "rdmsr (counter)"; -+ ticks[2] = msr_perfctr0 ? run(do_rdmsr, msr_perfctr0, 0) : 0; -+ name[3] = "rdmsr (evntsel)"; -+ ticks[3] = msr_evntsel0 ? run(do_rdmsr, msr_evntsel0, 0) : 0; -+ name[4] = "wrmsr (counter)"; -+ ticks[4] = msr_perfctr0 ? run(do_wrmsr, msr_perfctr0, 0) : 0; -+ name[5] = "wrmsr (evntsel)"; -+ ticks[5] = msr_evntsel0 ? run(do_wrmsr, msr_evntsel0, evntsel0) : 0; -+ name[6] = "read cr4"; -+ ticks[6] = run(do_rdcr4, 0, 0); -+ name[7] = "write cr4"; -+ ticks[7] = run(do_wrcr4, read_cr4(), 0); -+ name[8] = "write LVTPC"; -+ ticks[8] = (perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) -+ ? run(do_wrlvtpc, APIC_DM_NMI|APIC_LVT_MASKED, 0) : 0; -+ -+ loop = run(do_empty_loop, 0, 0); -+ -+ if( msr_evntsel0 ) -+ wrmsr(msr_evntsel0, 0, 0); -+ -+ init_tests_message(); -+ printk(KERN_INFO "PERFCTR INIT: NITER == %u\n", NITER); -+ printk(KERN_INFO "PERFCTR INIT: loop overhead is %u cycles\n", loop); -+ for(i = 0; i < ARRAY_SIZE(ticks); ++i) { -+ unsigned int x; -+ if( !ticks[i] ) -+ continue; -+ x = ((ticks[i] - loop) * 10) / NITER; -+ printk(KERN_INFO "PERFCTR INIT: %s cost is %u.%u cycles (%u total)\n", -+ name[i], x/10, x%10, ticks[i]); -+ } -+} -+ -+void __init perfctr_k8_init_tests(void) -+{ -+ measure_overheads(MSR_K8_EVNTSEL0, K8_EVNTSEL0_VAL, MSR_K8_PERFCTR0); -+} -+ -+void __init perfctr_generic_init_tests(void) -+{ -+ measure_overheads(0, 0, 0); -+} -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_setup.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_64_setup.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_setup.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,63 @@ -+/* $Id: x86_64_setup.c,v 1.9 2004/02/21 11:56:53 mikpe Exp $ -+ * Performance-monitoring counters driver. -+ * x86_86-specific kernel-resident code. -+ * -+ * Copyright (C) 2003-2004 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/kernel.h> -+#include <linux/sched.h> -+#include <linux/interrupt.h> -+#include <asm/processor.h> -+#include <asm/perfctr.h> -+#include <asm/fixmap.h> -+#include <asm/apic.h> -+#include "x86_64_compat.h" -+#include "compat.h" -+ -+static void perfctr_default_ihandler(unsigned long pc) -+{ -+} -+ -+static perfctr_ihandler_t perfctr_ihandler = perfctr_default_ihandler; -+ -+asmlinkage void smp_perfctr_interrupt(struct pt_regs *regs) -+{ -+ /* PREEMPT note: invoked via an interrupt gate, which -+ masks interrupts. We're still on the originating CPU. */ -+ ack_APIC_irq(); -+ irq_enter(); -+ (*perfctr_ihandler)(regs->rip); -+ irq_exit(); -+} -+ -+void perfctr_cpu_set_ihandler(perfctr_ihandler_t ihandler) -+{ -+ perfctr_ihandler = ihandler ? ihandler : perfctr_default_ihandler; -+} -+ -+extern unsigned int cpu_khz; -+ -+/* Wrapper to avoid namespace clash in RedHat 8.0's 2.4.18-14 kernel. */ -+unsigned int perfctr_cpu_khz(void) -+{ -+ return cpu_khz; -+} -+ -+#ifdef CONFIG_PERFCTR_MODULE -+EXPORT_SYMBOL_mmu_cr4_features; -+EXPORT_SYMBOL(perfctr_cpu_khz); -+ -+EXPORT_SYMBOL(nmi_perfctr_msr); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) && defined(CONFIG_PM) -+EXPORT_SYMBOL(apic_pm_register); -+EXPORT_SYMBOL(apic_pm_unregister); -+EXPORT_SYMBOL(nmi_pmdev); -+#endif -+ -+EXPORT_SYMBOL(perfctr_cpu_set_ihandler); -+ -+#endif /* MODULE */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_64.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,776 @@ -+/* $Id: x86_64.c,v 1.22.2.1 2004/05/29 22:25:22 mikpe Exp $ -+ * x86_64 performance-monitoring counters driver. -+ * -+ * Copyright (C) 2003-2004 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#define __NO_VERSION__ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/sched.h> -+#include <linux/fs.h> -+#include <linux/perfctr.h> -+ -+#include <asm/msr.h> -+#include <asm/fixmap.h> -+#include <asm/apic.h> -+struct hw_interrupt_type; -+#include <asm/hw_irq.h> -+ -+#include "compat.h" -+#include "x86_compat.h" -+#include "x86_tests.h" -+ -+/* Support for lazy evntsel and perfctr MSR updates. */ -+struct per_cpu_cache { /* roughly a subset of perfctr_cpu_state */ -+ union { -+ unsigned int id; /* cache owner id */ -+ } k1; -+ struct { -+ /* NOTE: these caches have physical indices, not virtual */ -+ unsigned int evntsel[4]; -+ } control; -+} ____cacheline_aligned; -+static struct per_cpu_cache per_cpu_cache[NR_CPUS] __cacheline_aligned; -+ -+/* Structure for counter snapshots, as 32-bit values. */ -+struct perfctr_low_ctrs { -+ unsigned int tsc; -+ unsigned int pmc[4]; -+}; -+ -+/* AMD K8 */ -+#define MSR_K8_EVNTSEL0 0xC0010000 /* .. 0xC0010003 */ -+#define MSR_K8_PERFCTR0 0xC0010004 /* .. 0xC0010007 */ -+#define K8_EVNTSEL_ENABLE 0x00400000 -+#define K8_EVNTSEL_INT 0x00100000 -+#define K8_EVNTSEL_CPL 0x00030000 -+#define K8_EVNTSEL_RESERVED 0x00280000 -+ -+#define rdpmc_low(ctr,low) \ -+ __asm__ __volatile__("rdpmc" : "=a"(low) : "c"(ctr) : "edx") -+ -+static void clear_msr_range(unsigned int base, unsigned int n) -+{ -+ unsigned int i; -+ -+ for(i = 0; i < n; ++i) -+ wrmsr(base+i, 0, 0); -+} -+ -+static inline void set_in_cr4_local(unsigned int mask) -+{ -+ write_cr4(read_cr4() | mask); -+} -+ -+static inline void clear_in_cr4_local(unsigned int mask) -+{ -+ write_cr4(read_cr4() & ~mask); -+} -+ -+static unsigned int new_id(void) -+{ -+ static spinlock_t lock = SPIN_LOCK_UNLOCKED; -+ static unsigned int counter; -+ int id; -+ -+ spin_lock(&lock); -+ id = ++counter; -+ spin_unlock(&lock); -+ return id; -+} -+ -+#if defined(CONFIG_SMP) -+ -+static inline void set_isuspend_cpu(struct perfctr_cpu_state *state, -+ int cpu) -+{ -+ state->k1.isuspend_cpu = cpu; -+} -+ -+static inline int is_isuspend_cpu(const struct perfctr_cpu_state *state, -+ int cpu) -+{ -+ return state->k1.isuspend_cpu == cpu; -+} -+ -+static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) -+{ -+ state->k1.isuspend_cpu = NR_CPUS; -+} -+ -+#else -+static inline void set_isuspend_cpu(struct perfctr_cpu_state *state, -+ int cpu) { } -+static inline int is_isuspend_cpu(const struct perfctr_cpu_state *state, -+ int cpu) { return 1; } -+static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) { } -+#endif -+ -+/* XXX: disabled: called from switch_to() where printk() is disallowed */ -+#if 0 && defined(CONFIG_PERFCTR_DEBUG) -+static void debug_evntsel_cache(const struct perfctr_cpu_state *state, -+ const struct per_cpu_cache *cache) -+{ -+ unsigned int nrctrs, i; -+ -+ nrctrs = perfctr_cstatus_nrctrs(state->cstatus); -+ for(i = 0; i < nrctrs; ++i) { -+ unsigned int evntsel = state->control.evntsel[i]; -+ unsigned int pmc = state->control.pmc_map[i]; -+ if( evntsel != cache->control.evntsel[pmc] ) { -+ printk(KERN_ERR "perfctr: (pid %d, comm %s) " -+ "evntsel[%u] is %#x, should be %#x\n", -+ current->pid, current->comm, -+ i, cache->control.evntsel[pmc], evntsel); -+ return; -+ } -+ } -+} -+#else -+static inline void debug_evntsel_cache(const struct perfctr_cpu_state *s, -+ const struct per_cpu_cache *c) -+{ } -+#endif -+ -+/**************************************************************** -+ * * -+ * Driver procedures. * -+ * * -+ ****************************************************************/ -+ -+static void perfctr_cpu_read_counters(const struct perfctr_cpu_state *state, -+ struct perfctr_low_ctrs *ctrs) -+{ -+ unsigned int cstatus, nrctrs, i; -+ -+ cstatus = state->cstatus; -+ if( perfctr_cstatus_has_tsc(cstatus) ) -+ rdtscl(ctrs->tsc); -+ nrctrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nrctrs; ++i) { -+ unsigned int pmc = state->pmc[i].map; -+ rdpmc_low(pmc, ctrs->pmc[i]); -+ } -+} -+ -+static int k8_check_control(struct perfctr_cpu_state *state) -+{ -+ unsigned int evntsel, i, nractrs, nrctrs, pmc_mask, pmc; -+ -+ nractrs = state->control.nractrs; -+ nrctrs = nractrs + state->control.nrictrs; -+ if( nrctrs < nractrs || nrctrs > 4 ) -+ return -EINVAL; -+ -+ pmc_mask = 0; -+ for(i = 0; i < nrctrs; ++i) { -+ pmc = state->control.pmc_map[i]; -+ state->pmc[i].map = pmc; -+ if( pmc >= 4 || (pmc_mask & (1<<pmc)) ) -+ return -EINVAL; -+ pmc_mask |= (1<<pmc); -+ evntsel = state->control.evntsel[i]; -+ /* protect reserved bits */ -+ if( evntsel & K8_EVNTSEL_RESERVED ) -+ return -EPERM; -+ /* ENable bit must be set in each evntsel */ -+ if( !(evntsel & K8_EVNTSEL_ENABLE) ) -+ return -EINVAL; -+ /* the CPL field must be non-zero */ -+ if( !(evntsel & K8_EVNTSEL_CPL) ) -+ return -EINVAL; -+ /* INT bit must be off for a-mode and on for i-mode counters */ -+ if( evntsel & K8_EVNTSEL_INT ) { -+ if( i < nractrs ) -+ return -EINVAL; -+ } else { -+ if( i >= nractrs ) -+ return -EINVAL; -+ } -+ } -+ state->k1.id = new_id(); -+ return 0; -+} -+ -+static void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) -+{ -+ struct per_cpu_cache *cache; -+ unsigned int cstatus, nrctrs, i; -+ int cpu; -+ -+ cpu = smp_processor_id(); -+ cache = &per_cpu_cache[cpu]; -+ cstatus = state->cstatus; -+ nrctrs = perfctr_cstatus_nrctrs(cstatus); -+ for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { -+ unsigned int pmc, now; -+ pmc = state->pmc[i].map; -+ cache->control.evntsel[pmc] = 0; -+ wrmsr(MSR_K8_EVNTSEL0+pmc, 0, 0); -+ rdpmc_low(pmc, now); -+ state->pmc[i].sum += now - state->pmc[i].start; -+ state->pmc[i].start = now; -+ } -+ /* cache->k1.id is still == state->k1.id */ -+ set_isuspend_cpu(state, cpu); -+} -+ -+static void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) -+{ -+ struct per_cpu_cache *cache; -+ unsigned int cstatus, nrctrs, i; -+ int cpu; -+ -+ cpu = smp_processor_id(); -+ cache = &per_cpu_cache[cpu]; -+ if( cache->k1.id == state->k1.id ) { -+ cache->k1.id = 0; /* force reload of cleared EVNTSELs */ -+ if( is_isuspend_cpu(state, cpu) ) -+ return; /* skip reload of PERFCTRs */ -+ } -+ cstatus = state->cstatus; -+ nrctrs = perfctr_cstatus_nrctrs(cstatus); -+ for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { -+ unsigned int pmc = state->pmc[i].map; -+ /* If the control wasn't ours we must disable the evntsels -+ before reinitialising the counters, to prevent unexpected -+ counter increments and missed overflow interrupts. */ -+ if( cache->control.evntsel[pmc] ) { -+ cache->control.evntsel[pmc] = 0; -+ wrmsr(MSR_K8_EVNTSEL0+pmc, 0, 0); -+ } -+ wrmsr(MSR_K8_PERFCTR0+pmc, state->pmc[i].start, -1); -+ } -+ /* cache->k1.id remains != state->k1.id */ -+} -+ -+static void perfctr_cpu_write_control(const struct perfctr_cpu_state *state) -+{ -+ struct per_cpu_cache *cache; -+ unsigned int nrctrs, i; -+ -+ cache = &per_cpu_cache[smp_processor_id()]; -+ if( cache->k1.id == state->k1.id ) { -+ debug_evntsel_cache(state, cache); -+ return; -+ } -+ nrctrs = perfctr_cstatus_nrctrs(state->cstatus); -+ for(i = 0; i < nrctrs; ++i) { -+ unsigned int evntsel = state->control.evntsel[i]; -+ unsigned int pmc = state->pmc[i].map; -+ if( evntsel != cache->control.evntsel[pmc] ) { -+ cache->control.evntsel[pmc] = evntsel; -+ wrmsr(MSR_K8_EVNTSEL0+pmc, evntsel, 0); -+ } -+ } -+ cache->k1.id = state->k1.id; -+} -+ -+static void k8_clear_counters(void) -+{ -+ clear_msr_range(MSR_K8_EVNTSEL0, 4+4); -+} -+ -+/* -+ * Generic driver for any x86-64 with a working TSC. -+ * (Mainly for testing with Screwdriver.) -+ */ -+ -+static int generic_check_control(struct perfctr_cpu_state *state) -+{ -+ if( state->control.nractrs || state->control.nrictrs ) -+ return -EINVAL; -+ return 0; -+} -+ -+static void generic_clear_counters(void) -+{ -+} -+ -+/* -+ * Driver methods, internal and exported. -+ */ -+ -+/* Call perfctr_cpu_ireload() just before perfctr_cpu_resume() to -+ bypass internal caching and force a reload if the I-mode PMCs. */ -+void perfctr_cpu_ireload(struct perfctr_cpu_state *state) -+{ -+#ifdef CONFIG_SMP -+ clear_isuspend_cpu(state); -+#else -+ per_cpu_cache[smp_processor_id()].k1.id = 0; -+#endif -+} -+ -+/* PRE: the counters have been suspended and sampled by perfctr_cpu_suspend() */ -+unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state *state) -+{ -+ unsigned int cstatus, nrctrs, pmc, pmc_mask; -+ -+ cstatus = state->cstatus; -+ pmc = perfctr_cstatus_nractrs(cstatus); -+ nrctrs = perfctr_cstatus_nrctrs(cstatus); -+ -+ for(pmc_mask = 0; pmc < nrctrs; ++pmc) { -+ if( (int)state->pmc[pmc].start >= 0 ) { /* XXX: ">" ? */ -+ /* XXX: "+=" to correct for overshots */ -+ state->pmc[pmc].start = state->control.ireset[pmc]; -+ pmc_mask |= (1 << pmc); -+ } -+ } -+ return pmc_mask; -+} -+ -+static inline int check_ireset(const struct perfctr_cpu_state *state) -+{ -+ unsigned int nrctrs, i; -+ -+ i = state->control.nractrs; -+ nrctrs = i + state->control.nrictrs; -+ for(; i < nrctrs; ++i) -+ if( state->control.ireset[i] >= 0 ) -+ return -EINVAL; -+ return 0; -+} -+ -+static inline void setup_imode_start_values(struct perfctr_cpu_state *state) -+{ -+ unsigned int cstatus, nrctrs, i; -+ -+ cstatus = state->cstatus; -+ nrctrs = perfctr_cstatus_nrctrs(cstatus); -+ for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) -+ state->pmc[i].start = state->control.ireset[i]; -+} -+ -+static inline void debug_no_imode(const struct perfctr_cpu_state *state) -+{ -+#ifdef CONFIG_PERFCTR_DEBUG -+ if( perfctr_cstatus_has_ictrs(state->cstatus) ) -+ printk(KERN_ERR "perfctr: BUG! updating control in" -+ " perfctr %p on cpu %u while it has cstatus %x" -+ " (pid %d, comm %s)\n", -+ state, smp_processor_id(), state->cstatus, -+ current->pid, current->comm); -+#endif -+} -+ -+static int (*check_control)(struct perfctr_cpu_state*); -+int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global) -+{ -+ int err; -+ -+ debug_no_imode(state); -+ clear_isuspend_cpu(state); -+ state->cstatus = 0; -+ -+ /* disallow i-mode counters if we cannot catch the interrupts */ -+ if( !(perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) -+ && state->control.nrictrs ) -+ return -EPERM; -+ -+ err = check_control(state); -+ if( err < 0 ) -+ return err; -+ err = check_ireset(state); -+ if( err < 0 ) -+ return err; -+ state->cstatus = perfctr_mk_cstatus(state->control.tsc_on, -+ state->control.nractrs, -+ state->control.nrictrs); -+ setup_imode_start_values(state); -+ return 0; -+} -+ -+void perfctr_cpu_suspend(struct perfctr_cpu_state *state) -+{ -+ unsigned int i, cstatus, nractrs; -+ struct perfctr_low_ctrs now; -+ -+ if( perfctr_cstatus_has_ictrs(state->cstatus) ) -+ perfctr_cpu_isuspend(state); -+ perfctr_cpu_read_counters(state, &now); -+ cstatus = state->cstatus; -+ if( perfctr_cstatus_has_tsc(cstatus) ) -+ state->tsc_sum += now.tsc - state->tsc_start; -+ nractrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nractrs; ++i) -+ state->pmc[i].sum += now.pmc[i] - state->pmc[i].start; -+} -+ -+void perfctr_cpu_resume(struct perfctr_cpu_state *state) -+{ -+ if( perfctr_cstatus_has_ictrs(state->cstatus) ) -+ perfctr_cpu_iresume(state); -+ perfctr_cpu_write_control(state); -+ //perfctr_cpu_read_counters(state, &state->start); -+ { -+ struct perfctr_low_ctrs now; -+ unsigned int i, cstatus, nrctrs; -+ perfctr_cpu_read_counters(state, &now); -+ cstatus = state->cstatus; -+ if( perfctr_cstatus_has_tsc(cstatus) ) -+ state->tsc_start = now.tsc; -+ nrctrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nrctrs; ++i) -+ state->pmc[i].start = now.pmc[i]; -+ } -+ /* XXX: if (SMP && start.tsc == now.tsc) ++now.tsc; */ -+} -+ -+void perfctr_cpu_sample(struct perfctr_cpu_state *state) -+{ -+ unsigned int i, cstatus, nractrs; -+ struct perfctr_low_ctrs now; -+ -+ perfctr_cpu_read_counters(state, &now); -+ cstatus = state->cstatus; -+ if( perfctr_cstatus_has_tsc(cstatus) ) { -+ state->tsc_sum += now.tsc - state->tsc_start; -+ state->tsc_start = now.tsc; -+ } -+ nractrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nractrs; ++i) { -+ state->pmc[i].sum += now.pmc[i] - state->pmc[i].start; -+ state->pmc[i].start = now.pmc[i]; -+ } -+} -+ -+static void (*clear_counters)(void); -+static void perfctr_cpu_clear_counters(void) -+{ -+ return clear_counters(); -+} -+ -+/**************************************************************** -+ * * -+ * Processor detection and initialisation procedures. * -+ * * -+ ****************************************************************/ -+ -+static int __init amd_init(void) -+{ -+ static char k8_name[] __initdata = "AMD K8"; -+ static char k8c_name[] __initdata = "AMD K8C"; -+ -+ if( !cpu_has_tsc ) -+ return -ENODEV; -+ if( boot_cpu_data.x86 != 15 ) -+ return -ENODEV; -+ if( (boot_cpu_data.x86_model > 5) || -+ (boot_cpu_data.x86_model >= 4 && boot_cpu_data.x86_mask >= 8) ) { -+ perfctr_info.cpu_type = PERFCTR_X86_AMD_K8C; -+ perfctr_cpu_name = k8c_name; -+ } else { -+ perfctr_info.cpu_type = PERFCTR_X86_AMD_K8; -+ perfctr_cpu_name = k8_name; -+ } -+ check_control = k8_check_control; -+ clear_counters = k8_clear_counters; -+ if( cpu_has_apic ) -+ perfctr_info.cpu_features |= PERFCTR_FEATURE_PCINT; -+ return 0; -+} -+ -+/* For testing on Screwdriver. */ -+static int __init generic_init(void) -+{ -+ static char generic_name[] __initdata = "Generic x86-64 with TSC"; -+ if( !cpu_has_tsc ) -+ return -ENODEV; -+ perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDPMC; -+ perfctr_info.cpu_type = PERFCTR_X86_GENERIC; -+ perfctr_cpu_name = generic_name; -+ check_control = generic_check_control; -+ clear_counters = generic_clear_counters; -+ return 0; -+} -+ -+static void perfctr_cpu_init_one(void *ignore) -+{ -+ /* PREEMPT note: when called via smp_call_function(), -+ this is in IRQ context with preemption disabled. */ -+ perfctr_cpu_clear_counters(); -+ if( cpu_has_apic ) -+ apic_write(APIC_LVTPC, LOCAL_PERFCTR_VECTOR); -+ if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) -+ set_in_cr4_local(X86_CR4_PCE); -+} -+ -+static void perfctr_cpu_exit_one(void *ignore) -+{ -+ /* PREEMPT note: when called via smp_call_function(), -+ this is in IRQ context with preemption disabled. */ -+ perfctr_cpu_clear_counters(); -+ if( cpu_has_apic ) -+ apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED); -+ if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) -+ clear_in_cr4_local(X86_CR4_PCE); -+} -+ -+#if defined(CONFIG_PM) -+ -+static void perfctr_pm_suspend(void) -+{ -+ /* XXX: clear control registers */ -+ printk("perfctr: PM suspend\n"); -+} -+ -+static void perfctr_pm_resume(void) -+{ -+ /* XXX: reload control registers */ -+ printk("perfctr: PM resume\n"); -+} -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,71) -+ -+#include <linux/sysdev.h> -+ -+static int perfctr_device_suspend(struct sys_device *dev, u32 state) -+{ -+ perfctr_pm_suspend(); -+ return 0; -+} -+ -+static int perfctr_device_resume(struct sys_device *dev) -+{ -+ perfctr_pm_resume(); -+ return 0; -+} -+ -+static struct sysdev_class perfctr_sysclass = { -+ set_kset_name("perfctr"), -+ .resume = perfctr_device_resume, -+ .suspend = perfctr_device_suspend, -+}; -+ -+static struct sys_device device_perfctr = { -+ .id = 0, -+ .cls = &perfctr_sysclass, -+}; -+ -+static void x86_pm_init(void) -+{ -+ if( sysdev_class_register(&perfctr_sysclass) == 0 ) -+ sysdev_register(&device_perfctr); -+} -+ -+static void x86_pm_exit(void) -+{ -+ sysdev_unregister(&device_perfctr); -+ sysdev_class_unregister(&perfctr_sysclass); -+} -+ -+#else /* 2.4 kernel */ -+ -+static int x86_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *data) -+{ -+ switch( rqst ) { -+ case PM_SUSPEND: -+ perfctr_pm_suspend(); -+ break; -+ case PM_RESUME: -+ perfctr_pm_resume(); -+ break; -+ } -+ return 0; -+} -+ -+static struct pm_dev *x86_pmdev; -+ -+static void x86_pm_init(void) -+{ -+ x86_pmdev = apic_pm_register(PM_SYS_DEV, 0, x86_pm_callback); -+} -+ -+static void x86_pm_exit(void) -+{ -+ if( x86_pmdev ) { -+ apic_pm_unregister(x86_pmdev); -+ x86_pmdev = NULL; -+ } -+} -+ -+#endif /* 2.4 kernel */ -+ -+#else -+ -+static inline void x86_pm_init(void) { } -+static inline void x86_pm_exit(void) { } -+ -+#endif /* CONFIG_PM */ -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) -+static void disable_lapic_nmi_watchdog(void) -+{ -+#ifdef CONFIG_PM -+ if( nmi_pmdev ) { -+ apic_pm_unregister(nmi_pmdev); -+ nmi_pmdev = 0; -+ } -+#endif -+} -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) -+static int reserve_lapic_nmi(void) -+{ -+ int ret = 0; -+ if( nmi_perfctr_msr ) { -+ nmi_perfctr_msr = 0; -+ disable_lapic_nmi_watchdog(); -+ ret = 1; -+ } -+ return ret; -+} -+ -+static inline void release_lapic_nmi(void) { } -+#endif -+ -+static void do_init_tests(void) -+{ -+#ifdef CONFIG_PERFCTR_INIT_TESTS -+ if( reserve_lapic_nmi() >= 0 ) { -+ perfctr_x86_init_tests(); -+ release_lapic_nmi(); -+ } -+#endif -+} -+ -+static void invalidate_per_cpu_cache(void) -+{ -+ /* -+ * per_cpu_cache[] is initialised to contain "impossible" -+ * evntsel values guaranteed to differ from anything accepted -+ * by perfctr_cpu_update_control(). This way, initialisation of -+ * a CPU's evntsel MSRs will happen automatically the first time -+ * perfctr_cpu_write_control() executes on it. -+ * All-bits-one works for all currently supported processors. -+ * The memset also sets the ids to -1, which is intentional. -+ */ -+ memset(per_cpu_cache, ~0, sizeof per_cpu_cache); -+} -+ -+int __init perfctr_cpu_init(void) -+{ -+ int err = -ENODEV; -+ -+ preempt_disable(); -+ -+ /* RDPMC and RDTSC are on by default. They will be disabled -+ by the init procedures if necessary. */ -+ perfctr_info.cpu_features = PERFCTR_FEATURE_RDPMC | PERFCTR_FEATURE_RDTSC; -+ -+ switch( boot_cpu_data.x86_vendor ) { -+ case X86_VENDOR_AMD: -+ err = amd_init(); -+ break; -+ } -+ if( err ) { -+ err = generic_init(); /* last resort */ -+ if( err ) -+ goto out; -+ } -+ do_init_tests(); -+#if 0 -+ /* -+ * Put the hardware in a sane state: -+ * - clear perfctr MSRs -+ * - set up APIC_LVTPC -+ * - set CR4.PCE [on permanently due to __flush_tlb_global()] -+ * - install our default interrupt handler -+ */ -+ if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) -+ mmu_cr4_features |= X86_CR4_PCE; -+ perfctr_cpu_init_one(NULL); -+ smp_call_function(perfctr_cpu_init_one, NULL, 1, 1); -+ perfctr_cpu_set_ihandler(NULL); -+ /* -+ * Fix up the connection to the local APIC: -+ * - disable and disconnect the NMI watchdog -+ * - register our PM callback -+ */ -+ disable_nmi_watchdog(); -+ x86_pm_init(); -+#endif -+ -+ invalidate_per_cpu_cache(); -+ -+ perfctr_info.cpu_khz = perfctr_cpu_khz(); -+ perfctr_info.tsc_to_cpu_mult = 1; -+ -+ out: -+ preempt_enable(); -+ return err; -+} -+ -+void __exit perfctr_cpu_exit(void) -+{ -+#if 0 -+ preempt_disable(); -+ if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) -+ mmu_cr4_features &= ~X86_CR4_PCE; -+ perfctr_cpu_exit_one(NULL); -+ smp_call_function(perfctr_cpu_exit_one, NULL, 1, 1); -+ perfctr_cpu_set_ihandler(NULL); -+ x86_pm_exit(); -+ /* XXX: restart nmi watchdog? */ -+ preempt_enable(); -+#endif -+} -+ -+/**************************************************************** -+ * * -+ * Hardware reservation. * -+ * * -+ ****************************************************************/ -+ -+static DECLARE_MUTEX(mutex); -+static const char *current_service = 0; -+ -+const char *perfctr_cpu_reserve(const char *service) -+{ -+ const char *ret; -+ -+ down(&mutex); -+ ret = current_service; -+ if( ret ) -+ goto out_up; -+ ret = "unknown driver (oprofile?)"; -+ if( reserve_lapic_nmi() < 0 ) -+ goto out_up; -+ current_service = service; -+ __module_get(THIS_MODULE); -+ if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) -+ mmu_cr4_features |= X86_CR4_PCE; -+ on_each_cpu(perfctr_cpu_init_one, NULL, 1, 1); -+ perfctr_cpu_set_ihandler(NULL); -+ x86_pm_init(); -+ ret = NULL; -+ out_up: -+ up(&mutex); -+ return ret; -+} -+ -+void perfctr_cpu_release(const char *service) -+{ -+ down(&mutex); -+ if( service != current_service ) { -+ printk(KERN_ERR "%s: attempt by %s to release while reserved by %s\n", -+ __FUNCTION__, service, current_service); -+ goto out_up; -+ } -+ /* power down the counters */ -+ invalidate_per_cpu_cache(); -+ if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) -+ mmu_cr4_features &= ~X86_CR4_PCE; -+ on_each_cpu(perfctr_cpu_exit_one, NULL, 1, 1); -+ perfctr_cpu_set_ihandler(NULL); -+ x86_pm_exit(); -+ current_service = 0; -+ release_lapic_nmi(); -+ module_put(THIS_MODULE); -+ out_up: -+ up(&mutex); -+} -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86.c 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,1720 @@ -+/* $Id: x86.c,v 1.127.2.13 2004/09/14 17:56:42 mikpe Exp $ -+ * x86/x86_64 performance-monitoring counters driver. -+ * -+ * Copyright (C) 1999-2004 Mikael Pettersson -+ */ -+#include <linux/config.h> -+#define __NO_VERSION__ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/sched.h> -+#include <linux/fs.h> -+#include <linux/perfctr.h> -+ -+#include <asm/msr.h> -+#undef MSR_P6_PERFCTR0 -+#undef MSR_IA32_MISC_ENABLE -+#include <asm/fixmap.h> -+#include <asm/apic.h> -+struct hw_interrupt_type; -+#include <asm/hw_irq.h> -+ -+#include "compat.h" -+#include "x86_compat.h" -+#include "x86_tests.h" -+ -+/* Support for lazy evntsel and perfctr MSR updates. */ -+struct per_cpu_cache { /* roughly a subset of perfctr_cpu_state */ -+ union { -+ unsigned int p5_cesr; -+ unsigned int id; /* cache owner id */ -+ } k1; -+ struct { -+ /* NOTE: these caches have physical indices, not virtual */ -+ unsigned int evntsel[18]; -+ unsigned int escr[0x3E2-0x3A0]; -+ unsigned int pebs_enable; -+ unsigned int pebs_matrix_vert; -+ } control; -+} ____cacheline_aligned; -+static struct per_cpu_cache per_cpu_cache[NR_CPUS] __cacheline_aligned; -+#define __get_cpu_cache(cpu) (&per_cpu_cache[cpu]) -+#define get_cpu_cache() __get_cpu_cache(smp_processor_id()) -+ -+/* Structure for counter snapshots, as 32-bit values. */ -+struct perfctr_low_ctrs { -+ unsigned int tsc; -+ unsigned int pmc[18]; -+}; -+ -+/* Intel P5, Cyrix 6x86MX/MII/III, Centaur WinChip C6/2/3 */ -+#define MSR_P5_CESR 0x11 -+#define MSR_P5_CTR0 0x12 /* .. 0x13 */ -+#define P5_CESR_CPL 0x00C0 -+#define P5_CESR_RESERVED (~0x01FF) -+#define MII_CESR_RESERVED (~0x05FF) -+#define C6_CESR_RESERVED (~0x00FF) -+ -+/* Intel P6, VIA C3 */ -+#define MSR_P6_PERFCTR0 0xC1 /* .. 0xC2 */ -+#define MSR_P6_EVNTSEL0 0x186 /* .. 0x187 */ -+#define P6_EVNTSEL_ENABLE 0x00400000 -+#define P6_EVNTSEL_INT 0x00100000 -+#define P6_EVNTSEL_CPL 0x00030000 -+#define P6_EVNTSEL_RESERVED 0x00280000 -+#define VC3_EVNTSEL1_RESERVED (~0x1FF) -+ -+/* AMD K7 */ -+#define MSR_K7_EVNTSEL0 0xC0010000 /* .. 0xC0010003 */ -+#define MSR_K7_PERFCTR0 0xC0010004 /* .. 0xC0010007 */ -+ -+/* Intel P4, Intel Pentium M */ -+#define MSR_IA32_MISC_ENABLE 0x1A0 -+#define MSR_IA32_MISC_ENABLE_PERF_AVAIL (1<<7) /* read-only status bit */ -+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1<<12) /* read-only status bit */ -+ -+/* Intel P4 */ -+#define MSR_P4_PERFCTR0 0x300 /* .. 0x311 */ -+#define MSR_P4_CCCR0 0x360 /* .. 0x371 */ -+#define MSR_P4_ESCR0 0x3A0 /* .. 0x3E1, with some gaps */ -+ -+#define MSR_P4_PEBS_ENABLE 0x3F1 -+#define P4_PE_REPLAY_TAG_BITS 0x00000607 -+#define P4_PE_UOP_TAG 0x01000000 -+#define P4_PE_RESERVED 0xFEFFF9F8 /* only allow ReplayTagging */ -+ -+#define MSR_P4_PEBS_MATRIX_VERT 0x3F2 -+#define P4_PMV_REPLAY_TAG_BITS 0x00000003 -+#define P4_PMV_RESERVED 0xFFFFFFFC -+ -+#define P4_CCCR_OVF 0x80000000 -+#define P4_CCCR_CASCADE 0x40000000 -+#define P4_CCCR_OVF_PMI_T1 0x08000000 -+#define P4_CCCR_OVF_PMI_T0 0x04000000 -+#define P4_CCCR_FORCE_OVF 0x02000000 -+#define P4_CCCR_ACTIVE_THREAD 0x00030000 -+#define P4_CCCR_ENABLE 0x00001000 -+#define P4_CCCR_ESCR_SELECT(X) (((X) >> 13) & 0x7) -+#define P4_CCCR_EXTENDED_CASCADE 0x00000800 -+#define P4_CCCR_RESERVED (0x300007FF|P4_CCCR_OVF|P4_CCCR_OVF_PMI_T1) -+ -+#define P4_ESCR_CPL_T1 0x00000003 -+#define P4_ESCR_CPL_T0 0x0000000C -+#define P4_ESCR_TAG_ENABLE 0x00000010 -+#define P4_ESCR_RESERVED (0x80000000) -+ -+#define P4_FAST_RDPMC 0x80000000 -+#define P4_MASK_FAST_RDPMC 0x0000001F /* we only need low 5 bits */ -+ -+#define rdmsr_low(msr,low) \ -+ __asm__ __volatile__("rdmsr" : "=a"(low) : "c"(msr) : "edx") -+#define rdpmc_low(ctr,low) \ -+ __asm__ __volatile__("rdpmc" : "=a"(low) : "c"(ctr) : "edx") -+ -+static void clear_msr_range(unsigned int base, unsigned int n) -+{ -+ unsigned int i; -+ -+ for(i = 0; i < n; ++i) -+ wrmsr(base+i, 0, 0); -+} -+ -+static inline void set_in_cr4_local(unsigned int mask) -+{ -+ write_cr4(read_cr4() | mask); -+} -+ -+static inline void clear_in_cr4_local(unsigned int mask) -+{ -+ write_cr4(read_cr4() & ~mask); -+} -+ -+static unsigned int new_id(void) -+{ -+ static spinlock_t lock = SPIN_LOCK_UNLOCKED; -+ static unsigned int counter; -+ int id; -+ -+ spin_lock(&lock); -+ id = ++counter; -+ spin_unlock(&lock); -+ return id; -+} -+ -+#if !defined(CONFIG_X86_LOCAL_APIC) -+#define perfctr_cstatus_has_ictrs(cstatus) 0 -+#undef cpu_has_apic -+#define cpu_has_apic 0 -+#undef apic_write -+#define apic_write(reg,vector) do{}while(0) -+#endif -+ -+#if defined(CONFIG_SMP) -+ -+static inline void -+set_isuspend_cpu(struct perfctr_cpu_state *state, int cpu) -+{ -+ state->k1.isuspend_cpu = cpu; -+} -+ -+static inline int -+is_isuspend_cpu(const struct perfctr_cpu_state *state, int cpu) -+{ -+ return state->k1.isuspend_cpu == cpu; -+} -+ -+static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) -+{ -+ state->k1.isuspend_cpu = NR_CPUS; -+} -+ -+#else -+static inline void set_isuspend_cpu(struct perfctr_cpu_state *state, int cpu) { } -+static inline int is_isuspend_cpu(const struct perfctr_cpu_state *state, int cpu) { return 1; } -+static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) { } -+#endif -+ -+/**************************************************************** -+ * * -+ * Driver procedures. * -+ * * -+ ****************************************************************/ -+ -+/* -+ * Intel P5 family (Pentium, family code 5). -+ * - One TSC and two 40-bit PMCs. -+ * - A single 32-bit CESR (MSR 0x11) controls both PMCs. -+ * CESR has two halves, each controlling one PMC. -+ * To keep the API reasonably clean, the user puts 16 bits of -+ * control data in each counter's evntsel; the driver combines -+ * these to a single 32-bit CESR value. -+ * - Overflow interrupts are not available. -+ * - Pentium MMX added the RDPMC instruction. RDPMC has lower -+ * overhead than RDMSR and it can be used in user-mode code. -+ * - The MMX events are not symmetric: some events are only available -+ * for some PMC, and some event codes denote different events -+ * depending on which PMCs they control. -+ */ -+ -+/* shared with MII and C6 */ -+static int p5_like_check_control(struct perfctr_cpu_state *state, -+ unsigned int reserved_bits, int is_c6) -+{ -+ unsigned short cesr_half[2]; -+ unsigned int pmc, evntsel, i; -+ -+ if (state->control.nrictrs != 0 || state->control.nractrs > 2) -+ return -EINVAL; -+ cesr_half[0] = 0; -+ cesr_half[1] = 0; -+ for(i = 0; i < state->control.nractrs; ++i) { -+ pmc = state->control.pmc_map[i]; -+ state->pmc[i].map = pmc; -+ if (pmc > 1 || cesr_half[pmc] != 0) -+ return -EINVAL; -+ evntsel = state->control.evntsel[i]; -+ /* protect reserved bits */ -+ if ((evntsel & reserved_bits) != 0) -+ return -EPERM; -+ /* the CPL field (if defined) must be non-zero */ -+ if (!is_c6 && !(evntsel & P5_CESR_CPL)) -+ return -EINVAL; -+ cesr_half[pmc] = evntsel; -+ } -+ state->k1.id = (cesr_half[1] << 16) | cesr_half[0]; -+ return 0; -+} -+ -+static int p5_check_control(struct perfctr_cpu_state *state, int is_global) -+{ -+ return p5_like_check_control(state, P5_CESR_RESERVED, 0); -+} -+ -+/* shared with MII but not C6 */ -+static void p5_write_control(const struct perfctr_cpu_state *state) -+{ -+ struct per_cpu_cache *cache; -+ unsigned int cesr; -+ -+ cesr = state->k1.id; -+ if (!cesr) /* no PMC is on (this test doesn't work on C6) */ -+ return; -+ cache = get_cpu_cache(); -+ if (cache->k1.p5_cesr != cesr) { -+ cache->k1.p5_cesr = cesr; -+ wrmsr(MSR_P5_CESR, cesr, 0); -+ } -+} -+ -+static void p5_read_counters(const struct perfctr_cpu_state *state, -+ struct perfctr_low_ctrs *ctrs) -+{ -+ unsigned int cstatus, nrctrs, i; -+ -+ /* The P5 doesn't allocate a cache line on a write miss, so do -+ a dummy read to avoid a write miss here _and_ a read miss -+ later in our caller. */ -+ asm("" : : "r"(ctrs->tsc)); -+ -+ cstatus = state->cstatus; -+ if (perfctr_cstatus_has_tsc(cstatus)) -+ rdtscl(ctrs->tsc); -+ nrctrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nrctrs; ++i) { -+ unsigned int pmc = state->pmc[i].map; -+ rdmsr_low(MSR_P5_CTR0+pmc, ctrs->pmc[i]); -+ } -+} -+ -+/* used by all except pre-MMX P5 */ -+static void rdpmc_read_counters(const struct perfctr_cpu_state *state, -+ struct perfctr_low_ctrs *ctrs) -+{ -+ unsigned int cstatus, nrctrs, i; -+ -+ cstatus = state->cstatus; -+ if (perfctr_cstatus_has_tsc(cstatus)) -+ rdtscl(ctrs->tsc); -+ nrctrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nrctrs; ++i) { -+ unsigned int pmc = state->pmc[i].map; -+ rdpmc_low(pmc, ctrs->pmc[i]); -+ } -+} -+ -+/* shared with MII and C6 */ -+static void p5_clear_counters(void) -+{ -+ clear_msr_range(MSR_P5_CESR, 1+2); -+} -+ -+/* -+ * Cyrix 6x86/MII/III. -+ * - Same MSR assignments as P5 MMX. Has RDPMC and two 48-bit PMCs. -+ * - Event codes and CESR formatting as in the plain P5 subset. -+ * - Many but not all P5 MMX event codes are implemented. -+ * - Cyrix adds a few more event codes. The event code is widened -+ * to 7 bits, and Cyrix puts the high bit in CESR bit 10 -+ * (and CESR bit 26 for PMC1). -+ */ -+ -+static int mii_check_control(struct perfctr_cpu_state *state, int is_global) -+{ -+ return p5_like_check_control(state, MII_CESR_RESERVED, 0); -+} -+ -+/* -+ * Centaur WinChip C6/2/3. -+ * - Same MSR assignments as P5 MMX. Has RDPMC and two 40-bit PMCs. -+ * - CESR is formatted with two halves, like P5. However, there -+ * are no defined control fields for e.g. CPL selection, and -+ * there is no defined method for stopping the counters. -+ * - Only a few event codes are defined. -+ * - The 64-bit TSC is synthesised from the low 32 bits of the -+ * two PMCs, and CESR has to be set up appropriately. -+ * Reprogramming CESR causes RDTSC to yield invalid results. -+ * (The C6 may also hang in this case, due to C6 erratum I-13.) -+ * Therefore, using the PMCs on any of these processors requires -+ * that the TSC is not accessed at all: -+ * 1. The kernel must be configured or a TSC-less processor, i.e. -+ * generic 586 or less. -+ * 2. The "notsc" boot parameter must be passed to the kernel. -+ * 3. User-space libraries and code must also be configured and -+ * compiled for a generic 586 or less. -+ */ -+ -+#if !defined(CONFIG_X86_TSC) -+static int c6_check_control(struct perfctr_cpu_state *state, int is_global) -+{ -+ if (state->control.tsc_on) -+ return -EINVAL; -+ return p5_like_check_control(state, C6_CESR_RESERVED, 1); -+} -+ -+static void c6_write_control(const struct perfctr_cpu_state *state) -+{ -+ struct per_cpu_cache *cache; -+ unsigned int cesr; -+ -+ if (perfctr_cstatus_nractrs(state->cstatus) == 0) /* no PMC is on */ -+ return; -+ cache = get_cpu_cache(); -+ cesr = state->k1.id; -+ if (cache->k1.p5_cesr != cesr) { -+ cache->k1.p5_cesr = cesr; -+ wrmsr(MSR_P5_CESR, cesr, 0); -+ } -+} -+#endif -+ -+/* -+ * Intel P6 family (Pentium Pro, Pentium II, and Pentium III cores, -+ * and Xeon and Celeron versions of Pentium II and III cores). -+ * - One TSC and two 40-bit PMCs. -+ * - One 32-bit EVNTSEL MSR for each PMC. -+ * - EVNTSEL0 contains a global enable/disable bit. -+ * That bit is reserved in EVNTSEL1. -+ * - Each EVNTSEL contains a CPL field. -+ * - Overflow interrupts are possible, but requires that the -+ * local APIC is available. Some Mobile P6s have no local APIC. -+ * - The PMCs cannot be initialised with arbitrary values, since -+ * wrmsr fills the high bits by sign-extending from bit 31. -+ * - Most events are symmetric, but a few are not. -+ */ -+ -+/* shared with K7 */ -+static int p6_like_check_control(struct perfctr_cpu_state *state, int is_k7) -+{ -+ unsigned int evntsel, i, nractrs, nrctrs, pmc_mask, pmc; -+ -+ nractrs = state->control.nractrs; -+ nrctrs = nractrs + state->control.nrictrs; -+ if (nrctrs < nractrs || nrctrs > (is_k7 ? 4 : 2)) -+ return -EINVAL; -+ -+ pmc_mask = 0; -+ for(i = 0; i < nrctrs; ++i) { -+ pmc = state->control.pmc_map[i]; -+ state->pmc[i].map = pmc; -+ if (pmc >= (is_k7 ? 4 : 2) || (pmc_mask & (1<<pmc))) -+ return -EINVAL; -+ pmc_mask |= (1<<pmc); -+ evntsel = state->control.evntsel[i]; -+ /* protect reserved bits */ -+ if (evntsel & P6_EVNTSEL_RESERVED) -+ return -EPERM; -+ /* check ENable bit */ -+ if (is_k7) { -+ /* ENable bit must be set in each evntsel */ -+ if (!(evntsel & P6_EVNTSEL_ENABLE)) -+ return -EINVAL; -+ } else { -+ /* only evntsel[0] has the ENable bit */ -+ if (evntsel & P6_EVNTSEL_ENABLE) { -+ if (pmc > 0) -+ return -EPERM; -+ } else { -+ if (pmc == 0) -+ return -EINVAL; -+ } -+ } -+ /* the CPL field must be non-zero */ -+ if (!(evntsel & P6_EVNTSEL_CPL)) -+ return -EINVAL; -+ /* INT bit must be off for a-mode and on for i-mode counters */ -+ if (evntsel & P6_EVNTSEL_INT) { -+ if (i < nractrs) -+ return -EINVAL; -+ } else { -+ if (i >= nractrs) -+ return -EINVAL; -+ } -+ } -+ state->k1.id = new_id(); -+ return 0; -+} -+ -+static int p6_check_control(struct perfctr_cpu_state *state, int is_global) -+{ -+ return p6_like_check_control(state, 0); -+} -+ -+#ifdef CONFIG_X86_LOCAL_APIC -+/* PRE: perfctr_cstatus_has_ictrs(state->cstatus) != 0 */ -+/* shared with K7 and P4 */ -+static void p6_like_isuspend(struct perfctr_cpu_state *state, -+ unsigned int msr_evntsel0) -+{ -+ struct per_cpu_cache *cache; -+ unsigned int cstatus, nrctrs, i; -+ int cpu; -+ -+ cpu = smp_processor_id(); -+ set_isuspend_cpu(state, cpu); /* early to limit cpu's live range */ -+ cache = __get_cpu_cache(cpu); -+ cstatus = state->cstatus; -+ nrctrs = perfctr_cstatus_nrctrs(cstatus); -+ for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { -+ unsigned int pmc_raw, pmc_idx, now; -+ pmc_raw = state->pmc[i].map; -+ /* Note: P4_MASK_FAST_RDPMC is a no-op for P6 and K7. -+ We don't need to make it into a parameter. */ -+ pmc_idx = pmc_raw & P4_MASK_FAST_RDPMC; -+ cache->control.evntsel[pmc_idx] = 0; -+ /* On P4 this intensionally also clears the CCCR.OVF flag. */ -+ wrmsr(msr_evntsel0+pmc_idx, 0, 0); -+ /* P4 erratum N17 does not apply since we read only low 32 bits. */ -+ rdpmc_low(pmc_raw, now); -+ state->pmc[i].sum += now - state->pmc[i].start; -+ state->pmc[i].start = now; -+ } -+ /* cache->k1.id is still == state->k1.id */ -+} -+ -+/* PRE: perfctr_cstatus_has_ictrs(state->cstatus) != 0 */ -+/* shared with K7 and P4 */ -+static void p6_like_iresume(const struct perfctr_cpu_state *state, -+ unsigned int msr_evntsel0, -+ unsigned int msr_perfctr0) -+{ -+ struct per_cpu_cache *cache; -+ unsigned int cstatus, nrctrs, i; -+ int cpu; -+ -+ cpu = smp_processor_id(); -+ cache = __get_cpu_cache(cpu); -+ if (cache->k1.id == state->k1.id) { -+ cache->k1.id = 0; /* force reload of cleared EVNTSELs */ -+ if (is_isuspend_cpu(state, cpu)) -+ return; /* skip reload of PERFCTRs */ -+ } -+ cstatus = state->cstatus; -+ nrctrs = perfctr_cstatus_nrctrs(cstatus); -+ for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { -+ /* Note: P4_MASK_FAST_RDPMC is a no-op for P6 and K7. -+ We don't need to make it into a parameter. */ -+ unsigned int pmc = state->pmc[i].map & P4_MASK_FAST_RDPMC; -+ /* If the control wasn't ours we must disable the evntsels -+ before reinitialising the counters, to prevent unexpected -+ counter increments and missed overflow interrupts. */ -+ if (cache->control.evntsel[pmc]) { -+ cache->control.evntsel[pmc] = 0; -+ wrmsr(msr_evntsel0+pmc, 0, 0); -+ } -+ /* P4 erratum N15 does not apply since the CCCR is disabled. */ -+ wrmsr(msr_perfctr0+pmc, state->pmc[i].start, -1); -+ } -+ /* cache->k1.id remains != state->k1.id */ -+} -+ -+static void p6_isuspend(struct perfctr_cpu_state *state) -+{ -+ p6_like_isuspend(state, MSR_P6_EVNTSEL0); -+} -+ -+static void p6_iresume(const struct perfctr_cpu_state *state) -+{ -+ p6_like_iresume(state, MSR_P6_EVNTSEL0, MSR_P6_PERFCTR0); -+} -+#endif /* CONFIG_X86_LOCAL_APIC */ -+ -+/* shared with K7 and VC3 */ -+static void p6_like_write_control(const struct perfctr_cpu_state *state, -+ unsigned int msr_evntsel0) -+{ -+ struct per_cpu_cache *cache; -+ unsigned int nrctrs, i; -+ -+ cache = get_cpu_cache(); -+ if (cache->k1.id == state->k1.id) -+ return; -+ nrctrs = perfctr_cstatus_nrctrs(state->cstatus); -+ for(i = 0; i < nrctrs; ++i) { -+ unsigned int evntsel = state->control.evntsel[i]; -+ unsigned int pmc = state->pmc[i].map; -+ if (evntsel != cache->control.evntsel[pmc]) { -+ cache->control.evntsel[pmc] = evntsel; -+ wrmsr(msr_evntsel0+pmc, evntsel, 0); -+ } -+ } -+ cache->k1.id = state->k1.id; -+} -+ -+/* shared with VC3, Generic*/ -+static void p6_write_control(const struct perfctr_cpu_state *state) -+{ -+ p6_like_write_control(state, MSR_P6_EVNTSEL0); -+} -+ -+static void p6_clear_counters(void) -+{ -+ clear_msr_range(MSR_P6_EVNTSEL0, 2); -+ clear_msr_range(MSR_P6_PERFCTR0, 2); -+} -+ -+/* -+ * AMD K7 family (Athlon, Duron). -+ * - Somewhat similar to the Intel P6 family. -+ * - Four 48-bit PMCs. -+ * - Four 32-bit EVNTSEL MSRs with similar layout as in P6. -+ * - Completely different MSR assignments :-( -+ * - Fewer countable events defined :-( -+ * - The events appear to be completely symmetric. -+ * - The EVNTSEL MSRs are symmetric since each has its own enable bit. -+ * - Publicly available documentation is incomplete. -+ * - K7 model 1 does not have a local APIC. AMD Document #22007 -+ * Revision J hints that it may use debug interrupts instead. -+ * -+ * The K8 has the same hardware layout as the K7. It also has -+ * better documentation and a different set of available events. -+ */ -+ -+static int k7_check_control(struct perfctr_cpu_state *state, int is_global) -+{ -+ return p6_like_check_control(state, 1); -+} -+ -+#ifdef CONFIG_X86_LOCAL_APIC -+static void k7_isuspend(struct perfctr_cpu_state *state) -+{ -+ p6_like_isuspend(state, MSR_K7_EVNTSEL0); -+} -+ -+static void k7_iresume(const struct perfctr_cpu_state *state) -+{ -+ p6_like_iresume(state, MSR_K7_EVNTSEL0, MSR_K7_PERFCTR0); -+} -+#endif /* CONFIG_X86_LOCAL_APIC */ -+ -+static void k7_write_control(const struct perfctr_cpu_state *state) -+{ -+ p6_like_write_control(state, MSR_K7_EVNTSEL0); -+} -+ -+static void k7_clear_counters(void) -+{ -+ clear_msr_range(MSR_K7_EVNTSEL0, 4+4); -+} -+ -+/* -+ * VIA C3 family. -+ * - A Centaur design somewhat similar to the P6/Celeron. -+ * - PERFCTR0 is an alias for the TSC, and EVNTSEL0 is read-only. -+ * - PERFCTR1 is 32 bits wide. -+ * - EVNTSEL1 has no defined control fields, and there is no -+ * defined method for stopping the counter. -+ * - According to testing, the reserved fields in EVNTSEL1 have -+ * no function. We always fill them with zeroes. -+ * - Only a few event codes are defined. -+ * - No local APIC or interrupt-mode support. -+ * - pmc_map[0] must be 1, if nractrs == 1. -+ */ -+static int vc3_check_control(struct perfctr_cpu_state *state, int is_global) -+{ -+ if (state->control.nrictrs || state->control.nractrs > 1) -+ return -EINVAL; -+ if (state->control.nractrs == 1) { -+ if (state->control.pmc_map[0] != 1) -+ return -EINVAL; -+ state->pmc[0].map = 1; -+ if (state->control.evntsel[0] & VC3_EVNTSEL1_RESERVED) -+ return -EPERM; -+ state->k1.id = state->control.evntsel[0]; -+ } else -+ state->k1.id = 0; -+ return 0; -+} -+ -+static void vc3_clear_counters(void) -+{ -+ /* Not documented, but seems to be default after boot. */ -+ wrmsr(MSR_P6_EVNTSEL0+1, 0x00070079, 0); -+} -+ -+/* -+ * Intel Pentium 4. -+ * Current implementation restrictions: -+ * - No DS/PEBS support. -+ * -+ * Known quirks: -+ * - OVF_PMI+FORCE_OVF counters must have an ireset value of -1. -+ * This allows the regular overflow check to also handle FORCE_OVF -+ * counters. Not having this restriction would lead to MAJOR -+ * complications in the driver's "detect overflow counters" code. -+ * There is no loss of functionality since the ireset value doesn't -+ * affect the counter's PMI rate for FORCE_OVF counters. -+ * - In experiments with FORCE_OVF counters, and regular OVF_PMI -+ * counters with small ireset values between -8 and -1, it appears -+ * that the faulting instruction is subjected to a new PMI before -+ * it can complete, ad infinitum. This occurs even though the driver -+ * clears the CCCR (and in testing also the ESCR) and invokes a -+ * user-space signal handler before restoring the CCCR and resuming -+ * the instruction. -+ */ -+ -+/* -+ * Table 15-4 in the IA32 Volume 3 manual contains a 18x8 entry mapping -+ * from counter/CCCR number (0-17) and ESCR SELECT value (0-7) to the -+ * actual ESCR MSR number. This mapping contains some repeated patterns, -+ * so we can compact it to a 4x8 table of MSR offsets: -+ * -+ * 1. CCCRs 16 and 17 are mapped just like CCCRs 13 and 14, respectively. -+ * Thus, we only consider the 16 CCCRs 0-15. -+ * 2. The CCCRs are organised in pairs, and both CCCRs in a pair use the -+ * same mapping. Thus, we only consider the 8 pairs 0-7. -+ * 3. In each pair of pairs, the second odd-numbered pair has the same domain -+ * as the first even-numbered pair, and the range is 1+ the range of the -+ * the first even-numbered pair. For example, CCCR(0) and (1) map ESCR -+ * SELECT(7) to 0x3A0, and CCCR(2) and (3) map it to 0x3A1. -+ * The only exception is that pair (7) [CCCRs 14 and 15] does not have -+ * ESCR SELECT(3) in its domain, like pair (6) [CCCRs 12 and 13] has. -+ * NOTE: Revisions of IA32 Volume 3 older than #245472-007 had an error -+ * in this table: CCCRs 12, 13, and 16 had their mappings for ESCR SELECT -+ * values 2 and 3 swapped. -+ * 4. All MSR numbers are on the form 0x3??. Instead of storing these as -+ * 16-bit numbers, the table only stores the 8-bit offsets from 0x300. -+ */ -+ -+static const unsigned char p4_cccr_escr_map[4][8] = { -+ /* 0x00 and 0x01 as is, 0x02 and 0x03 are +1 */ -+ [0x00/4] { [7] 0xA0, -+ [6] 0xA2, -+ [2] 0xAA, -+ [4] 0xAC, -+ [0] 0xB2, -+ [1] 0xB4, -+ [3] 0xB6, -+ [5] 0xC8, }, -+ /* 0x04 and 0x05 as is, 0x06 and 0x07 are +1 */ -+ [0x04/4] { [0] 0xC0, -+ [2] 0xC2, -+ [1] 0xC4, }, -+ /* 0x08 and 0x09 as is, 0x0A and 0x0B are +1 */ -+ [0x08/4] { [1] 0xA4, -+ [0] 0xA6, -+ [5] 0xA8, -+ [2] 0xAE, -+ [3] 0xB0, }, -+ /* 0x0C, 0x0D, and 0x10 as is, -+ 0x0E, 0x0F, and 0x11 are +1 except [3] is not in the domain */ -+ [0x0C/4] { [4] 0xB8, -+ [5] 0xCC, -+ [6] 0xE0, -+ [0] 0xBA, -+ [2] 0xBC, -+ [3] 0xBE, -+ [1] 0xCA, }, -+}; -+ -+static unsigned int p4_escr_addr(unsigned int pmc, unsigned int cccr_val) -+{ -+ unsigned int escr_select, pair, escr_offset; -+ -+ escr_select = P4_CCCR_ESCR_SELECT(cccr_val); -+ if (pmc > 0x11) -+ return 0; /* pmc range error */ -+ if (pmc > 0x0F) -+ pmc -= 3; /* 0 <= pmc <= 0x0F */ -+ pair = pmc / 2; /* 0 <= pair <= 7 */ -+ escr_offset = p4_cccr_escr_map[pair / 2][escr_select]; -+ if (!escr_offset || (pair == 7 && escr_select == 3)) -+ return 0; /* ESCR SELECT range error */ -+ return escr_offset + (pair & 1) + 0x300; -+}; -+ -+static int p4_IQ_ESCR_ok; /* only models <= 2 can use IQ_ESCR{0,1} */ -+static int p4_is_ht; /* affects several CCCR & ESCR fields */ -+static int p4_extended_cascade_ok; /* only models >= 2 can use extended cascading */ -+ -+static int p4_check_control(struct perfctr_cpu_state *state, int is_global) -+{ -+ unsigned int i, nractrs, nrctrs, pmc_mask; -+ -+ nractrs = state->control.nractrs; -+ nrctrs = nractrs + state->control.nrictrs; -+ if (nrctrs < nractrs || nrctrs > 18) -+ return -EINVAL; -+ -+ pmc_mask = 0; -+ for(i = 0; i < nrctrs; ++i) { -+ unsigned int pmc, cccr_val, escr_val, escr_addr; -+ /* check that pmc_map[] is well-defined; -+ pmc_map[i] is what we pass to RDPMC, the PMC itself -+ is extracted by masking off the FAST_RDPMC flag */ -+ pmc = state->control.pmc_map[i] & ~P4_FAST_RDPMC; -+ state->pmc[i].map = state->control.pmc_map[i]; -+ if (pmc >= 18 || (pmc_mask & (1<<pmc))) -+ return -EINVAL; -+ pmc_mask |= (1<<pmc); -+ /* check CCCR contents */ -+ cccr_val = state->control.evntsel[i]; -+ if (cccr_val & P4_CCCR_RESERVED) -+ return -EPERM; -+ if (cccr_val & P4_CCCR_EXTENDED_CASCADE) { -+ if (!p4_extended_cascade_ok) -+ return -EPERM; -+ if (!(pmc == 12 || pmc >= 15)) -+ return -EPERM; -+ } -+ if ((cccr_val & P4_CCCR_ACTIVE_THREAD) != P4_CCCR_ACTIVE_THREAD && !p4_is_ht) -+ return -EINVAL; -+ if (!(cccr_val & (P4_CCCR_ENABLE | P4_CCCR_CASCADE | P4_CCCR_EXTENDED_CASCADE))) -+ return -EINVAL; -+ if (cccr_val & P4_CCCR_OVF_PMI_T0) { -+ if (i < nractrs) -+ return -EINVAL; -+ if ((cccr_val & P4_CCCR_FORCE_OVF) && -+ state->control.ireset[i] != -1) -+ return -EINVAL; -+ } else { -+ if (i >= nractrs) -+ return -EINVAL; -+ } -+ /* check ESCR contents */ -+ escr_val = state->control.p4.escr[i]; -+ if (escr_val & P4_ESCR_RESERVED) -+ return -EPERM; -+ if ((escr_val & P4_ESCR_CPL_T1) && (!p4_is_ht || !is_global)) -+ return -EINVAL; -+ /* compute and cache ESCR address */ -+ escr_addr = p4_escr_addr(pmc, cccr_val); -+ if (!escr_addr) -+ return -EINVAL; /* ESCR SELECT range error */ -+ /* IQ_ESCR0 and IQ_ESCR1 only exist in models <= 2 */ -+ if ((escr_addr & ~0x001) == 0x3BA && !p4_IQ_ESCR_ok) -+ return -EINVAL; -+ /* XXX: Two counters could map to the same ESCR. Should we -+ check that they use the same ESCR value? */ -+ state->p4_escr_map[i] = escr_addr - MSR_P4_ESCR0; -+ } -+ /* check ReplayTagging control (PEBS_ENABLE and PEBS_MATRIX_VERT) */ -+ if (state->control.p4.pebs_enable) { -+ if (!nrctrs) -+ return -EPERM; -+ if (state->control.p4.pebs_enable & P4_PE_RESERVED) -+ return -EPERM; -+ if (!(state->control.p4.pebs_enable & P4_PE_UOP_TAG)) -+ return -EINVAL; -+ if (!(state->control.p4.pebs_enable & P4_PE_REPLAY_TAG_BITS)) -+ return -EINVAL; -+ if (state->control.p4.pebs_matrix_vert & P4_PMV_RESERVED) -+ return -EPERM; -+ if (!(state->control.p4.pebs_matrix_vert & P4_PMV_REPLAY_TAG_BITS)) -+ return -EINVAL; -+ } else if (state->control.p4.pebs_matrix_vert) -+ return -EPERM; -+ state->k1.id = new_id(); -+ return 0; -+} -+ -+#ifdef CONFIG_X86_LOCAL_APIC -+static void p4_isuspend(struct perfctr_cpu_state *state) -+{ -+ return p6_like_isuspend(state, MSR_P4_CCCR0); -+} -+ -+static void p4_iresume(const struct perfctr_cpu_state *state) -+{ -+ return p6_like_iresume(state, MSR_P4_CCCR0, MSR_P4_PERFCTR0); -+} -+#endif /* CONFIG_X86_LOCAL_APIC */ -+ -+static void p4_write_control(const struct perfctr_cpu_state *state) -+{ -+ struct per_cpu_cache *cache; -+ unsigned int nrctrs, i; -+ -+ /* XXX: temporary debug check */ -+ if (cpu_isset(smp_processor_id(), perfctr_cpus_forbidden_mask) && -+ perfctr_cstatus_nrctrs(state->cstatus)) -+ printk(KERN_ERR "%s: BUG! CPU %u is in the forbidden set\n", -+ __FUNCTION__, smp_processor_id()); -+ cache = get_cpu_cache(); -+ if (cache->k1.id == state->k1.id) -+ return; -+ nrctrs = perfctr_cstatus_nrctrs(state->cstatus); -+ for(i = 0; i < nrctrs; ++i) { -+ unsigned int escr_val, escr_off, cccr_val, pmc; -+ escr_val = state->control.p4.escr[i]; -+ escr_off = state->p4_escr_map[i]; -+ if (escr_val != cache->control.escr[escr_off]) { -+ cache->control.escr[escr_off] = escr_val; -+ wrmsr(MSR_P4_ESCR0+escr_off, escr_val, 0); -+ } -+ cccr_val = state->control.evntsel[i]; -+ pmc = state->pmc[i].map & P4_MASK_FAST_RDPMC; -+ if (cccr_val != cache->control.evntsel[pmc]) { -+ cache->control.evntsel[pmc] = cccr_val; -+ wrmsr(MSR_P4_CCCR0+pmc, cccr_val, 0); -+ } -+ } -+ if (state->control.p4.pebs_enable != cache->control.pebs_enable) { -+ cache->control.pebs_enable = state->control.p4.pebs_enable; -+ wrmsr(MSR_P4_PEBS_ENABLE, state->control.p4.pebs_enable, 0); -+ } -+ if (state->control.p4.pebs_matrix_vert != cache->control.pebs_matrix_vert) { -+ cache->control.pebs_matrix_vert = state->control.p4.pebs_matrix_vert; -+ wrmsr(MSR_P4_PEBS_MATRIX_VERT, state->control.p4.pebs_matrix_vert, 0); -+ } -+ cache->k1.id = state->k1.id; -+} -+ -+static void p4_clear_counters(void) -+{ -+ /* MSR 0x3F0 seems to have a default value of 0xFC00, but current -+ docs doesn't fully define it, so leave it alone for now. */ -+ /* clear PEBS_ENABLE and PEBS_MATRIX_VERT; they handle both PEBS -+ and ReplayTagging, and should exist even if PEBS is disabled */ -+ clear_msr_range(0x3F1, 2); -+ clear_msr_range(0x3A0, 26); -+ if (p4_IQ_ESCR_ok) -+ clear_msr_range(0x3BA, 2); -+ clear_msr_range(0x3BC, 3); -+ clear_msr_range(0x3C0, 6); -+ clear_msr_range(0x3C8, 6); -+ clear_msr_range(0x3E0, 2); -+ clear_msr_range(MSR_P4_CCCR0, 18); -+ clear_msr_range(MSR_P4_PERFCTR0, 18); -+} -+ -+/* -+ * Generic driver for any x86 with a working TSC. -+ */ -+ -+static int generic_check_control(struct perfctr_cpu_state *state, int is_global) -+{ -+ if (state->control.nractrs || state->control.nrictrs) -+ return -EINVAL; -+ return 0; -+} -+ -+static void generic_clear_counters(void) -+{ -+} -+ -+/* -+ * Driver methods, internal and exported. -+ * -+ * Frequently called functions (write_control, read_counters, -+ * isuspend and iresume) are back-patched to invoke the correct -+ * processor-specific methods directly, thereby saving the -+ * overheads of indirect function calls. -+ * -+ * Backpatchable call sites must have been "finalised" after -+ * initialisation. The reason for this is that unsynchronised code -+ * modification doesn't work in multiprocessor systems, due to -+ * Intel P6 errata. Consequently, all backpatchable call sites -+ * must be known and local to this file. -+ * -+ * Backpatchable calls must initially be to 'noinline' stubs. -+ * Otherwise the compiler may inline the stubs, which breaks -+ * redirect_call() and finalise_backpatching(). -+ */ -+ -+static int redirect_call_disable; -+ -+static noinline void redirect_call(void *ra, void *to) -+{ -+ /* XXX: make this function __init later */ -+ if (redirect_call_disable) -+ printk(KERN_ERR __FILE__ ":%s: unresolved call to %p at %p\n", -+ __FUNCTION__, to, ra); -+ /* we can only redirect `call near relative' instructions */ -+ if (*((unsigned char*)ra - 5) != 0xE8) { -+ printk(KERN_WARNING __FILE__ ":%s: unable to redirect caller %p to %p\n", -+ __FUNCTION__, ra, to); -+ return; -+ } -+ *(int*)((char*)ra - 4) = (char*)to - (char*)ra; -+} -+ -+static void (*write_control)(const struct perfctr_cpu_state*); -+static noinline void perfctr_cpu_write_control(const struct perfctr_cpu_state *state) -+{ -+ redirect_call(__builtin_return_address(0), write_control); -+ return write_control(state); -+} -+ -+static void (*read_counters)(const struct perfctr_cpu_state*, -+ struct perfctr_low_ctrs*); -+static noinline void perfctr_cpu_read_counters(const struct perfctr_cpu_state *state, -+ struct perfctr_low_ctrs *ctrs) -+{ -+ redirect_call(__builtin_return_address(0), read_counters); -+ return read_counters(state, ctrs); -+} -+ -+#ifdef CONFIG_X86_LOCAL_APIC -+static void (*cpu_isuspend)(struct perfctr_cpu_state*); -+static noinline void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) -+{ -+ redirect_call(__builtin_return_address(0), cpu_isuspend); -+ return cpu_isuspend(state); -+} -+ -+static void (*cpu_iresume)(const struct perfctr_cpu_state*); -+static noinline void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) -+{ -+ redirect_call(__builtin_return_address(0), cpu_iresume); -+ return cpu_iresume(state); -+} -+ -+/* Call perfctr_cpu_ireload() just before perfctr_cpu_resume() to -+ bypass internal caching and force a reload if the I-mode PMCs. */ -+void perfctr_cpu_ireload(struct perfctr_cpu_state *state) -+{ -+#ifdef CONFIG_SMP -+ clear_isuspend_cpu(state); -+#else -+ get_cpu_cache()->k1.id = 0; -+#endif -+} -+ -+/* PRE: the counters have been suspended and sampled by perfctr_cpu_suspend() */ -+static int lvtpc_reinit_needed; -+unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state *state) -+{ -+ unsigned int cstatus, nrctrs, pmc, pmc_mask; -+ -+ cstatus = state->cstatus; -+ pmc = perfctr_cstatus_nractrs(cstatus); -+ nrctrs = perfctr_cstatus_nrctrs(cstatus); -+ -+ for(pmc_mask = 0; pmc < nrctrs; ++pmc) { -+ if ((int)state->pmc[pmc].start >= 0) { /* XXX: ">" ? */ -+ /* XXX: "+=" to correct for overshots */ -+ state->pmc[pmc].start = state->control.ireset[pmc]; -+ pmc_mask |= (1 << pmc); -+ /* On a P4 we should now clear the OVF flag in the -+ counter's CCCR. However, p4_isuspend() already -+ did that as a side-effect of clearing the CCCR -+ in order to stop the i-mode counters. */ -+ } -+ } -+ if (lvtpc_reinit_needed) -+ apic_write(APIC_LVTPC, LOCAL_PERFCTR_VECTOR); -+ return pmc_mask; -+} -+ -+static inline int check_ireset(const struct perfctr_cpu_state *state) -+{ -+ unsigned int nrctrs, i; -+ -+ i = state->control.nractrs; -+ nrctrs = i + state->control.nrictrs; -+ for(; i < nrctrs; ++i) -+ if (state->control.ireset[i] >= 0) -+ return -EINVAL; -+ return 0; -+} -+ -+static inline void setup_imode_start_values(struct perfctr_cpu_state *state) -+{ -+ unsigned int cstatus, nrctrs, i; -+ -+ cstatus = state->cstatus; -+ nrctrs = perfctr_cstatus_nrctrs(cstatus); -+ for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) -+ state->pmc[i].start = state->control.ireset[i]; -+} -+ -+static inline void debug_no_imode(const struct perfctr_cpu_state *state) -+{ -+#ifdef CONFIG_PERFCTR_DEBUG -+ if (perfctr_cstatus_has_ictrs(state->cstatus)) -+ printk(KERN_ERR "perfctr/x86.c: BUG! updating control in" -+ " perfctr %p on cpu %u while it has cstatus %x" -+ " (pid %d, comm %s)\n", -+ state, smp_processor_id(), state->cstatus, -+ current->pid, current->comm); -+#endif -+} -+ -+#else /* CONFIG_X86_LOCAL_APIC */ -+static inline void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) { } -+static inline void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) { } -+static inline int check_ireset(const struct perfctr_cpu_state *state) { return 0; } -+static inline void setup_imode_start_values(struct perfctr_cpu_state *state) { } -+static inline void debug_no_imode(const struct perfctr_cpu_state *state) { } -+#endif /* CONFIG_X86_LOCAL_APIC */ -+ -+static int (*check_control)(struct perfctr_cpu_state*, int); -+int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global) -+{ -+ int err; -+ -+ debug_no_imode(state); -+ clear_isuspend_cpu(state); -+ state->cstatus = 0; -+ -+ /* disallow i-mode counters if we cannot catch the interrupts */ -+ if (!(perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) -+ && state->control.nrictrs) -+ return -EPERM; -+ -+ err = check_control(state, is_global); -+ if (err < 0) -+ return err; -+ err = check_ireset(state); -+ if (err < 0) -+ return err; -+ state->cstatus = perfctr_mk_cstatus(state->control.tsc_on, -+ state->control.nractrs, -+ state->control.nrictrs); -+ setup_imode_start_values(state); -+ return 0; -+} -+ -+void perfctr_cpu_suspend(struct perfctr_cpu_state *state) -+{ -+ unsigned int i, cstatus, nractrs; -+ struct perfctr_low_ctrs now; -+ -+ if (perfctr_cstatus_has_ictrs(state->cstatus)) -+ perfctr_cpu_isuspend(state); -+ perfctr_cpu_read_counters(state, &now); -+ cstatus = state->cstatus; -+ if (perfctr_cstatus_has_tsc(cstatus)) -+ state->tsc_sum += now.tsc - state->tsc_start; -+ nractrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nractrs; ++i) -+ state->pmc[i].sum += now.pmc[i] - state->pmc[i].start; -+ /* perfctr_cpu_disable_rdpmc(); */ /* not for x86 */ -+} -+ -+void perfctr_cpu_resume(struct perfctr_cpu_state *state) -+{ -+ if (perfctr_cstatus_has_ictrs(state->cstatus)) -+ perfctr_cpu_iresume(state); -+ /* perfctr_cpu_enable_rdpmc(); */ /* not for x86 or global-mode */ -+ perfctr_cpu_write_control(state); -+ //perfctr_cpu_read_counters(state, &state->start); -+ { -+ struct perfctr_low_ctrs now; -+ unsigned int i, cstatus, nrctrs; -+ perfctr_cpu_read_counters(state, &now); -+ cstatus = state->cstatus; -+ if (perfctr_cstatus_has_tsc(cstatus)) -+ state->tsc_start = now.tsc; -+ nrctrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nrctrs; ++i) -+ state->pmc[i].start = now.pmc[i]; -+ } -+ /* XXX: if (SMP && start.tsc == now.tsc) ++now.tsc; */ -+} -+ -+void perfctr_cpu_sample(struct perfctr_cpu_state *state) -+{ -+ unsigned int i, cstatus, nractrs; -+ struct perfctr_low_ctrs now; -+ -+ perfctr_cpu_read_counters(state, &now); -+ cstatus = state->cstatus; -+ if (perfctr_cstatus_has_tsc(cstatus)) { -+ state->tsc_sum += now.tsc - state->tsc_start; -+ state->tsc_start = now.tsc; -+ } -+ nractrs = perfctr_cstatus_nractrs(cstatus); -+ for(i = 0; i < nractrs; ++i) { -+ state->pmc[i].sum += now.pmc[i] - state->pmc[i].start; -+ state->pmc[i].start = now.pmc[i]; -+ } -+} -+ -+static void (*clear_counters)(void); -+static void perfctr_cpu_clear_counters(void) -+{ -+ return clear_counters(); -+} -+ -+/**************************************************************** -+ * * -+ * Processor detection and initialisation procedures. * -+ * * -+ ****************************************************************/ -+ -+static inline void clear_perfctr_cpus_forbidden_mask(void) -+{ -+#if !defined(perfctr_cpus_forbidden_mask) -+ cpus_clear(perfctr_cpus_forbidden_mask); -+#endif -+} -+ -+static inline void set_perfctr_cpus_forbidden_mask(cpumask_t mask) -+{ -+#if !defined(perfctr_cpus_forbidden_mask) -+ perfctr_cpus_forbidden_mask = mask; -+#endif -+} -+ -+/* see comment above at redirect_call() */ -+static void __init finalise_backpatching(void) -+{ -+ struct per_cpu_cache *cache; -+ struct perfctr_cpu_state state; -+ cpumask_t old_mask; -+ -+ old_mask = perfctr_cpus_forbidden_mask; -+ clear_perfctr_cpus_forbidden_mask(); -+ -+ cache = get_cpu_cache(); -+ memset(cache, 0, sizeof *cache); -+ memset(&state, 0, sizeof state); -+ state.cstatus = -+ (perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) -+ ? __perfctr_mk_cstatus(0, 1, 0, 0) -+ : 0; -+ perfctr_cpu_sample(&state); -+ perfctr_cpu_resume(&state); -+ perfctr_cpu_suspend(&state); -+ -+ set_perfctr_cpus_forbidden_mask(old_mask); -+ -+ redirect_call_disable = 1; -+} -+ -+#ifdef CONFIG_SMP -+ -+cpumask_t perfctr_cpus_forbidden_mask; -+ -+static void __init p4_ht_mask_setup_cpu(void *forbidden) -+{ -+ unsigned int local_apic_physical_id = cpuid_ebx(1) >> 24; -+ unsigned int logical_processor_id = local_apic_physical_id & 1; -+ if (logical_processor_id != 0) -+ /* We rely on cpu_set() being atomic! */ -+ cpu_set(smp_processor_id(), *(cpumask_t*)forbidden); -+} -+ -+static int __init p4_ht_smp_init(void) -+{ -+ cpumask_t forbidden; -+ unsigned int cpu; -+ -+ cpus_clear(forbidden); -+ smp_call_function(p4_ht_mask_setup_cpu, &forbidden, 1, 1); -+ p4_ht_mask_setup_cpu(&forbidden); -+ if (cpus_empty(forbidden)) -+ return 0; -+ perfctr_cpus_forbidden_mask = forbidden; -+ printk(KERN_INFO "perfctr/x86.c: hyper-threaded P4s detected:" -+ " restricting access for CPUs"); -+ for(cpu = 0; cpu < NR_CPUS; ++cpu) -+ if (cpu_isset(cpu, forbidden)) -+ printk(" %u", cpu); -+ printk("\n"); -+ return 0; -+} -+#else /* SMP */ -+#define p4_ht_smp_init() (0) -+#endif /* SMP */ -+ -+static int __init p4_ht_init(void) -+{ -+ unsigned int nr_siblings; -+ -+ if (!cpu_has_ht) -+ return 0; -+ nr_siblings = (cpuid_ebx(1) >> 16) & 0xFF; -+ if (nr_siblings > 2) { -+ printk(KERN_WARNING "perfctr/x86.c: hyper-threaded P4s detected:" -+ " unsupported number of siblings: %u -- bailing out\n", -+ nr_siblings); -+ return -ENODEV; -+ } -+ if (nr_siblings < 2) -+ return 0; -+ p4_is_ht = 1; /* needed even in a UP kernel */ -+ return p4_ht_smp_init(); -+} -+ -+static int __init intel_init(void) -+{ -+ static char p5_name[] __initdata = "Intel P5"; -+ static char p6_name[] __initdata = "Intel P6"; -+ static char p4_name[] __initdata = "Intel P4"; -+ unsigned int misc_enable; -+ -+ if (!cpu_has_tsc) -+ return -ENODEV; -+ switch (current_cpu_data.x86) { -+ case 5: -+ if (cpu_has_mmx) { -+ perfctr_info.cpu_type = PERFCTR_X86_INTEL_P5MMX; -+ read_counters = rdpmc_read_counters; -+ -+ /* Avoid Pentium Erratum 74. */ -+ if (current_cpu_data.x86_model == 4 && -+ (current_cpu_data.x86_mask == 4 || -+ (current_cpu_data.x86_mask == 3 && -+ ((cpuid_eax(1) >> 12) & 0x3) == 1))) -+ perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDPMC; -+ } else { -+ perfctr_info.cpu_type = PERFCTR_X86_INTEL_P5; -+ perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDPMC; -+ read_counters = p5_read_counters; -+ } -+ perfctr_set_tests_type(PTT_P5); -+ perfctr_cpu_name = p5_name; -+ write_control = p5_write_control; -+ check_control = p5_check_control; -+ clear_counters = p5_clear_counters; -+ return 0; -+ case 6: -+ if (current_cpu_data.x86_model == 9 || -+ current_cpu_data.x86_model == 13) { /* Pentium M */ -+ /* Pentium M added the MISC_ENABLE MSR from P4. */ -+ rdmsr_low(MSR_IA32_MISC_ENABLE, misc_enable); -+ if (!(misc_enable & MSR_IA32_MISC_ENABLE_PERF_AVAIL)) -+ break; -+ /* Erratum Y3 probably does not apply since we -+ read only the low 32 bits. */ -+ perfctr_info.cpu_type = PERFCTR_X86_INTEL_PENTM; -+ } else if (current_cpu_data.x86_model >= 7) { /* PIII */ -+ perfctr_info.cpu_type = PERFCTR_X86_INTEL_PIII; -+ } else if (current_cpu_data.x86_model >= 3) { /* PII or Celeron */ -+ perfctr_info.cpu_type = PERFCTR_X86_INTEL_PII; -+ } else { -+ perfctr_info.cpu_type = PERFCTR_X86_INTEL_P6; -+ -+ /* Avoid Pentium Pro Erratum 26. */ -+ if (current_cpu_data.x86_mask < 9) -+ perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDPMC; -+ } -+ perfctr_set_tests_type(PTT_P6); -+ perfctr_cpu_name = p6_name; -+ read_counters = rdpmc_read_counters; -+ write_control = p6_write_control; -+ check_control = p6_check_control; -+ clear_counters = p6_clear_counters; -+#ifdef CONFIG_X86_LOCAL_APIC -+ if (cpu_has_apic) { -+ perfctr_info.cpu_features |= PERFCTR_FEATURE_PCINT; -+ cpu_isuspend = p6_isuspend; -+ cpu_iresume = p6_iresume; -+ /* P-M apparently inherited P4's LVTPC auto-masking :-( */ -+ if (current_cpu_data.x86_model == 9 || -+ current_cpu_data.x86_model == 13) -+ lvtpc_reinit_needed = 1; -+ } -+#endif -+ return 0; -+ case 15: /* Pentium 4 */ -+ rdmsr_low(MSR_IA32_MISC_ENABLE, misc_enable); -+ if (!(misc_enable & MSR_IA32_MISC_ENABLE_PERF_AVAIL)) -+ break; -+ if (p4_ht_init() != 0) -+ break; -+ if (current_cpu_data.x86_model <= 2) -+ p4_IQ_ESCR_ok = 1; -+ if (current_cpu_data.x86_model >= 2) -+ p4_extended_cascade_ok = 1; -+ if (current_cpu_data.x86_model >= 3) { -+ /* Model 3 removes IQ_ESCR{0,1} and adds one event. */ -+ perfctr_info.cpu_type = PERFCTR_X86_INTEL_P4M3; -+ } else if (current_cpu_data.x86_model >= 2) { -+ /* Model 2 changed the ESCR Event Mask programming -+ details for several events. */ -+ perfctr_info.cpu_type = PERFCTR_X86_INTEL_P4M2; -+ } else { -+ perfctr_info.cpu_type = PERFCTR_X86_INTEL_P4; -+ } -+ perfctr_set_tests_type(PTT_P4); -+ perfctr_cpu_name = p4_name; -+ read_counters = rdpmc_read_counters; -+ write_control = p4_write_control; -+ check_control = p4_check_control; -+ clear_counters = p4_clear_counters; -+#ifdef CONFIG_X86_LOCAL_APIC -+ if (cpu_has_apic) { -+ perfctr_info.cpu_features |= PERFCTR_FEATURE_PCINT; -+ cpu_isuspend = p4_isuspend; -+ cpu_iresume = p4_iresume; -+ lvtpc_reinit_needed = 1; -+ } -+#endif -+ return 0; -+ } -+ return -ENODEV; -+} -+ -+static int __init amd_init(void) -+{ -+ static char amd_name[] __initdata = "AMD K7/K8"; -+ -+ if (!cpu_has_tsc) -+ return -ENODEV; -+ switch (current_cpu_data.x86) { -+ case 6: /* K7 */ -+ perfctr_info.cpu_type = PERFCTR_X86_AMD_K7; -+ break; -+ case 15: /* K8. Like a K7 with a different event set. */ -+ if ((current_cpu_data.x86_model > 5) || -+ (current_cpu_data.x86_model >= 4 && current_cpu_data.x86_mask >= 8)) { -+ perfctr_info.cpu_type = PERFCTR_X86_AMD_K8C; -+ } else { -+ perfctr_info.cpu_type = PERFCTR_X86_AMD_K8; -+ } -+ break; -+ default: -+ return -ENODEV; -+ } -+ perfctr_set_tests_type(PTT_AMD); -+ perfctr_cpu_name = amd_name; -+ read_counters = rdpmc_read_counters; -+ write_control = k7_write_control; -+ check_control = k7_check_control; -+ clear_counters = k7_clear_counters; -+#ifdef CONFIG_X86_LOCAL_APIC -+ if (cpu_has_apic) { -+ perfctr_info.cpu_features |= PERFCTR_FEATURE_PCINT; -+ cpu_isuspend = k7_isuspend; -+ cpu_iresume = k7_iresume; -+ } -+#endif -+ return 0; -+} -+ -+static int __init cyrix_init(void) -+{ -+ static char mii_name[] __initdata = "Cyrix 6x86MX/MII/III"; -+ if (!cpu_has_tsc) -+ return -ENODEV; -+ switch (current_cpu_data.x86) { -+ case 6: /* 6x86MX, MII, or III */ -+ perfctr_info.cpu_type = PERFCTR_X86_CYRIX_MII; -+ perfctr_set_tests_type(PTT_P5); -+ perfctr_cpu_name = mii_name; -+ read_counters = rdpmc_read_counters; -+ write_control = p5_write_control; -+ check_control = mii_check_control; -+ clear_counters = p5_clear_counters; -+ return 0; -+ } -+ return -ENODEV; -+} -+ -+static int __init centaur_init(void) -+{ -+#if !defined(CONFIG_X86_TSC) -+ static char winchip_name[] __initdata = "WinChip C6/2/3"; -+#endif -+ static char vc3_name[] __initdata = "VIA C3"; -+ switch (current_cpu_data.x86) { -+#if !defined(CONFIG_X86_TSC) -+ case 5: -+ switch (current_cpu_data.x86_model) { -+ case 4: /* WinChip C6 */ -+ perfctr_info.cpu_type = PERFCTR_X86_WINCHIP_C6; -+ break; -+ case 8: /* WinChip 2, 2A, or 2B */ -+ case 9: /* WinChip 3, a 2A with larger cache and lower voltage */ -+ perfctr_info.cpu_type = PERFCTR_X86_WINCHIP_2; -+ break; -+ default: -+ return -ENODEV; -+ } -+ perfctr_set_tests_type(PTT_WINCHIP); -+ perfctr_cpu_name = winchip_name; -+ /* -+ * TSC must be inaccessible for perfctrs to work. -+ */ -+ if (!(read_cr4() & X86_CR4_TSD) || cpu_has_tsc) -+ return -ENODEV; -+ perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDTSC; -+ read_counters = rdpmc_read_counters; -+ write_control = c6_write_control; -+ check_control = c6_check_control; -+ clear_counters = p5_clear_counters; -+ return 0; -+#endif -+ case 6: /* VIA C3 */ -+ if (!cpu_has_tsc) -+ return -ENODEV; -+ switch (current_cpu_data.x86_model) { -+ case 6: /* Cyrix III */ -+ case 7: /* Samuel 2, Ezra (steppings >= 8) */ -+ case 8: /* Ezra-T */ -+ case 9: /* Antaur/Nehemiah */ -+ break; -+ default: -+ return -ENODEV; -+ } -+ perfctr_info.cpu_type = PERFCTR_X86_VIA_C3; -+ perfctr_set_tests_type(PTT_VC3); -+ perfctr_cpu_name = vc3_name; -+ read_counters = rdpmc_read_counters; -+ write_control = p6_write_control; -+ check_control = vc3_check_control; -+ clear_counters = vc3_clear_counters; -+ return 0; -+ } -+ return -ENODEV; -+} -+ -+static int __init generic_init(void) -+{ -+ static char generic_name[] __initdata = "Generic x86 with TSC"; -+ if (!cpu_has_tsc) -+ return -ENODEV; -+ perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDPMC; -+ perfctr_info.cpu_type = PERFCTR_X86_GENERIC; -+ perfctr_set_tests_type(PTT_GENERIC); -+ perfctr_cpu_name = generic_name; -+ check_control = generic_check_control; -+ write_control = p6_write_control; -+ read_counters = rdpmc_read_counters; -+ clear_counters = generic_clear_counters; -+ return 0; -+} -+ -+static void perfctr_cpu_invalidate_cache(void) -+{ -+ /* -+ * per_cpu_cache[] is initialised to contain "impossible" -+ * evntsel values guaranteed to differ from anything accepted -+ * by perfctr_cpu_update_control(). -+ * All-bits-one works for all currently supported processors. -+ * The memset also sets the ids to -1, which is intentional. -+ */ -+ memset(get_cpu_cache(), ~0, sizeof(struct per_cpu_cache)); -+} -+ -+static void perfctr_cpu_init_one(void *ignore) -+{ -+ /* PREEMPT note: when called via smp_call_function(), -+ this is in IRQ context with preemption disabled. */ -+ perfctr_cpu_clear_counters(); -+ perfctr_cpu_invalidate_cache(); -+ if (cpu_has_apic) -+ apic_write(APIC_LVTPC, LOCAL_PERFCTR_VECTOR); -+ if (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) -+ set_in_cr4_local(X86_CR4_PCE); -+} -+ -+static void perfctr_cpu_exit_one(void *ignore) -+{ -+ /* PREEMPT note: when called via smp_call_function(), -+ this is in IRQ context with preemption disabled. */ -+ perfctr_cpu_clear_counters(); -+ perfctr_cpu_invalidate_cache(); -+ if (cpu_has_apic) -+ apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED); -+ if (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) -+ clear_in_cr4_local(X86_CR4_PCE); -+} -+ -+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PM) -+ -+static void perfctr_pm_suspend(void) -+{ -+ /* XXX: clear control registers */ -+ printk("perfctr/x86: PM suspend\n"); -+} -+ -+static void perfctr_pm_resume(void) -+{ -+ /* XXX: reload control registers */ -+ printk("perfctr/x86: PM resume\n"); -+} -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,71) -+ -+#include <linux/sysdev.h> -+ -+static int perfctr_device_suspend(struct sys_device *dev, u32 state) -+{ -+ perfctr_pm_suspend(); -+ return 0; -+} -+ -+static int perfctr_device_resume(struct sys_device *dev) -+{ -+ perfctr_pm_resume(); -+ return 0; -+} -+ -+static struct sysdev_class perfctr_sysclass = { -+ set_kset_name("perfctr"), -+ .resume = perfctr_device_resume, -+ .suspend = perfctr_device_suspend, -+}; -+ -+static struct sys_device device_perfctr = { -+ .id = 0, -+ .cls = &perfctr_sysclass, -+}; -+ -+static void x86_pm_init(void) -+{ -+ if (sysdev_class_register(&perfctr_sysclass) == 0) -+ sysdev_register(&device_perfctr); -+} -+ -+static void x86_pm_exit(void) -+{ -+ sysdev_unregister(&device_perfctr); -+ sysdev_class_unregister(&perfctr_sysclass); -+} -+ -+#else /* 2.4 kernel */ -+ -+static int x86_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *data) -+{ -+ switch (rqst) { -+ case PM_SUSPEND: -+ perfctr_pm_suspend(); -+ break; -+ case PM_RESUME: -+ perfctr_pm_resume(); -+ break; -+ } -+ return 0; -+} -+ -+static struct pm_dev *x86_pmdev; -+ -+static void x86_pm_init(void) -+{ -+ x86_pmdev = apic_pm_register(PM_SYS_DEV, 0, x86_pm_callback); -+} -+ -+static void x86_pm_exit(void) -+{ -+ if (x86_pmdev) { -+ apic_pm_unregister(x86_pmdev); -+ x86_pmdev = NULL; -+ } -+} -+ -+#endif /* 2.4 kernel */ -+ -+#else -+ -+static inline void x86_pm_init(void) { } -+static inline void x86_pm_exit(void) { } -+ -+#endif /* CONFIG_X86_LOCAL_APIC && CONFIG_PM */ -+ -+#ifdef CONFIG_X86_LOCAL_APIC -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,67) -+static void disable_lapic_nmi_watchdog(void) -+{ -+#ifdef CONFIG_PM -+ if (nmi_pmdev) { -+ apic_pm_unregister(nmi_pmdev); -+ nmi_pmdev = 0; -+ } -+#endif -+} -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) -+static int reserve_lapic_nmi(void) -+{ -+ int ret = 0; -+ if (nmi_perfctr_msr) { -+ nmi_perfctr_msr = 0; -+ disable_lapic_nmi_watchdog(); -+ ret = 1; -+ } -+ return ret; -+} -+ -+static inline void release_lapic_nmi(void) { } -+#endif -+ -+#else -+static inline int reserve_lapic_nmi(void) { return 0; } -+static inline void release_lapic_nmi(void) { } -+#endif -+ -+static void do_init_tests(void) -+{ -+#ifdef CONFIG_PERFCTR_INIT_TESTS -+ if (reserve_lapic_nmi() >= 0) { -+ perfctr_x86_init_tests(); -+ release_lapic_nmi(); -+ } -+#endif -+} -+ -+int __init perfctr_cpu_init(void) -+{ -+ int err = -ENODEV; -+ -+ preempt_disable(); -+ -+ /* RDPMC and RDTSC are on by default. They will be disabled -+ by the init procedures if necessary. */ -+ perfctr_info.cpu_features = PERFCTR_FEATURE_RDPMC | PERFCTR_FEATURE_RDTSC; -+ -+ if (cpu_has_msr) { -+ switch (current_cpu_data.x86_vendor) { -+ case X86_VENDOR_INTEL: -+ err = intel_init(); -+ break; -+ case X86_VENDOR_AMD: -+ err = amd_init(); -+ break; -+ case X86_VENDOR_CYRIX: -+ err = cyrix_init(); -+ break; -+ case X86_VENDOR_CENTAUR: -+ err = centaur_init(); -+ } -+ } -+ if (err) { -+ err = generic_init(); /* last resort */ -+ if (err) -+ goto out; -+ } -+ do_init_tests(); -+ finalise_backpatching(); -+ -+ perfctr_info.cpu_khz = perfctr_cpu_khz(); -+ perfctr_info.tsc_to_cpu_mult = 1; -+ -+ out: -+ preempt_enable(); -+ return err; -+} -+ -+void __exit perfctr_cpu_exit(void) -+{ -+} -+ -+/**************************************************************** -+ * * -+ * Hardware reservation. * -+ * * -+ ****************************************************************/ -+ -+static DECLARE_MUTEX(mutex); -+static const char *current_service = 0; -+ -+const char *perfctr_cpu_reserve(const char *service) -+{ -+ const char *ret; -+ -+ down(&mutex); -+ ret = current_service; -+ if (ret) -+ goto out_up; -+ ret = "unknown driver (oprofile?)"; -+ if (reserve_lapic_nmi() < 0) -+ goto out_up; -+ current_service = service; -+ __module_get(THIS_MODULE); -+ if (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) -+ mmu_cr4_features |= X86_CR4_PCE; -+ on_each_cpu(perfctr_cpu_init_one, NULL, 1, 1); -+ perfctr_cpu_set_ihandler(NULL); -+ x86_pm_init(); -+ ret = NULL; -+ out_up: -+ up(&mutex); -+ return ret; -+} -+ -+void perfctr_cpu_release(const char *service) -+{ -+ down(&mutex); -+ if (service != current_service) { -+ printk(KERN_ERR "%s: attempt by %s to release while reserved by %s\n", -+ __FUNCTION__, service, current_service); -+ goto out_up; -+ } -+ /* power down the counters */ -+ if (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) -+ mmu_cr4_features &= ~X86_CR4_PCE; -+ on_each_cpu(perfctr_cpu_exit_one, NULL, 1, 1); -+ perfctr_cpu_set_ihandler(NULL); -+ x86_pm_exit(); -+ current_service = 0; -+ release_lapic_nmi(); -+ module_put(THIS_MODULE); -+ out_up: -+ up(&mutex); -+} -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/apic.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-i386/apic.h 2004-04-03 22:38:23.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/apic.h 2004-11-18 20:59:11.000000000 -0500 -@@ -99,6 +99,8 @@ - #define NMI_LOCAL_APIC 2 - #define NMI_INVALID 3 - -+extern unsigned int nmi_perfctr_msr; -+ - #endif /* CONFIG_X86_LOCAL_APIC */ - - #endif /* __ASM_APIC_H */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/mach-default/irq_vectors.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-i386/mach-default/irq_vectors.h 2004-11-11 10:27:51.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/mach-default/irq_vectors.h 2004-11-18 20:59:11.000000000 -0500 -@@ -59,14 +59,15 @@ - * sources per level' errata. - */ - #define LOCAL_TIMER_VECTOR 0xef -+#define LOCAL_PERFCTR_VECTOR 0xee - - /* -- * First APIC vector available to drivers: (vectors 0x30-0xee) -+ * First APIC vector available to drivers: (vectors 0x30-0xed) - * we start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ - #define FIRST_DEVICE_VECTOR 0x31 --#define FIRST_SYSTEM_VECTOR 0xef -+#define FIRST_SYSTEM_VECTOR 0xee - - #define TIMER_IRQ 0 - -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/mach-pc9800/irq_vectors.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-i386/mach-pc9800/irq_vectors.h 2004-04-03 22:36:16.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/mach-pc9800/irq_vectors.h 2004-11-18 20:59:11.000000000 -0500 -@@ -59,14 +59,15 @@ - * sources per level' errata. - */ - #define LOCAL_TIMER_VECTOR 0xef -+#define LOCAL_PERFCTR_VECTOR 0xee - - /* -- * First APIC vector available to drivers: (vectors 0x30-0xee) -+ * First APIC vector available to drivers: (vectors 0x30-0xed) - * we start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ - #define FIRST_DEVICE_VECTOR 0x31 --#define FIRST_SYSTEM_VECTOR 0xef -+#define FIRST_SYSTEM_VECTOR 0xee - - #define TIMER_IRQ 0 - -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/mach-visws/irq_vectors.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-i386/mach-visws/irq_vectors.h 2004-04-03 22:36:18.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/mach-visws/irq_vectors.h 2004-11-18 20:59:11.000000000 -0500 -@@ -35,14 +35,15 @@ - * sources per level' errata. - */ - #define LOCAL_TIMER_VECTOR 0xef -+#define LOCAL_PERFCTR_VECTOR 0xee - - /* -- * First APIC vector available to drivers: (vectors 0x30-0xee) -+ * First APIC vector available to drivers: (vectors 0x30-0xed) - * we start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ - #define FIRST_DEVICE_VECTOR 0x31 --#define FIRST_SYSTEM_VECTOR 0xef -+#define FIRST_SYSTEM_VECTOR 0xee - - #define TIMER_IRQ 0 - -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/processor.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-i386/processor.h 2004-11-11 10:27:40.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/processor.h 2004-11-18 20:59:11.000000000 -0500 -@@ -427,6 +427,8 @@ - unsigned int saved_fs, saved_gs; - /* IO permissions */ - unsigned long *io_bitmap_ptr; -+/* performance counters */ -+ struct vperfctr *perfctr; - }; - - #define INIT_THREAD { \ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/perfctr.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-i386/perfctr.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/perfctr.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,189 @@ -+/* $Id: perfctr.h,v 1.48.2.4 2004/08/02 22:24:58 mikpe Exp $ -+ * x86/x86_64 Performance-Monitoring Counters driver -+ * -+ * Copyright (C) 1999-2004 Mikael Pettersson -+ */ -+#ifndef _ASM_I386_PERFCTR_H -+#define _ASM_I386_PERFCTR_H -+ -+struct perfctr_sum_ctrs { -+ unsigned long long tsc; -+ unsigned long long pmc[18]; -+}; -+ -+struct perfctr_cpu_control { -+ unsigned int tsc_on; -+ unsigned int nractrs; /* # of a-mode counters */ -+ unsigned int nrictrs; /* # of i-mode counters */ -+ unsigned int pmc_map[18]; -+ unsigned int evntsel[18]; /* one per counter, even on P5 */ -+ struct { -+ unsigned int escr[18]; -+ unsigned int pebs_enable; /* for replay tagging */ -+ unsigned int pebs_matrix_vert; /* for replay tagging */ -+ } p4; -+ int ireset[18]; /* < 0, for i-mode counters */ -+ unsigned int _reserved1; -+ unsigned int _reserved2; -+ unsigned int _reserved3; -+ unsigned int _reserved4; -+}; -+ -+struct perfctr_cpu_state { -+ unsigned int cstatus; -+ struct { /* k1 is opaque in the user ABI */ -+ unsigned int id; -+ int isuspend_cpu; -+ } k1; -+ /* The two tsc fields must be inlined. Placing them in a -+ sub-struct causes unwanted internal padding on x86-64. */ -+ unsigned int tsc_start; -+ unsigned long long tsc_sum; -+ struct { -+ unsigned int map; -+ unsigned int start; -+ unsigned long long sum; -+ } pmc[18]; /* the size is not part of the user ABI */ -+#ifdef __KERNEL__ -+ struct perfctr_cpu_control control; -+ unsigned int p4_escr_map[18]; -+#endif -+}; -+ -+/* cstatus is a re-encoding of control.tsc_on/nractrs/nrictrs -+ which should have less overhead in most cases */ -+ -+static inline -+unsigned int __perfctr_mk_cstatus(unsigned int tsc_on, unsigned int have_ictrs, -+ unsigned int nrictrs, unsigned int nractrs) -+{ -+ return (tsc_on<<31) | (have_ictrs<<16) | ((nractrs+nrictrs)<<8) | nractrs; -+} -+ -+static inline -+unsigned int perfctr_mk_cstatus(unsigned int tsc_on, unsigned int nractrs, -+ unsigned int nrictrs) -+{ -+ return __perfctr_mk_cstatus(tsc_on, nrictrs, nrictrs, nractrs); -+} -+ -+static inline unsigned int perfctr_cstatus_enabled(unsigned int cstatus) -+{ -+ return cstatus; -+} -+ -+static inline int perfctr_cstatus_has_tsc(unsigned int cstatus) -+{ -+ return (int)cstatus < 0; /* test and jump on sign */ -+} -+ -+static inline unsigned int perfctr_cstatus_nractrs(unsigned int cstatus) -+{ -+ return cstatus & 0x7F; /* and with imm8 */ -+} -+ -+static inline unsigned int perfctr_cstatus_nrctrs(unsigned int cstatus) -+{ -+ return (cstatus >> 8) & 0x7F; -+} -+ -+static inline unsigned int perfctr_cstatus_has_ictrs(unsigned int cstatus) -+{ -+ return cstatus & (0x7F << 16); -+} -+ -+/* -+ * 'struct siginfo' support for perfctr overflow signals. -+ * In unbuffered mode, si_code is set to SI_PMC_OVF and a bitmask -+ * describing which perfctrs overflowed is put in si_pmc_ovf_mask. -+ * A bitmask is used since more than one perfctr can have overflowed -+ * by the time the interrupt handler runs. -+ * -+ * glibc's <signal.h> doesn't seem to define __SI_FAULT or __SI_CODE(), -+ * and including <asm/siginfo.h> as well may cause redefinition errors, -+ * so the user and kernel values are different #defines here. -+ */ -+#ifdef __KERNEL__ -+#define SI_PMC_OVF (__SI_FAULT|'P') -+#else -+#define SI_PMC_OVF ('P') -+#endif -+#define si_pmc_ovf_mask _sifields._pad[0] /* XXX: use an unsigned field later */ -+ -+/* version number for user-visible CPU-specific data */ -+#define PERFCTR_CPU_VERSION 0x0501 /* 5.1 */ -+ -+#ifdef __KERNEL__ -+ -+#if defined(CONFIG_PERFCTR) || defined(CONFIG_PERFCTR_MODULE) -+ -+/* Driver init/exit. */ -+extern int perfctr_cpu_init(void); -+extern void perfctr_cpu_exit(void); -+ -+/* CPU type name. */ -+extern char *perfctr_cpu_name; -+ -+/* Hardware reservation. */ -+extern const char *perfctr_cpu_reserve(const char *service); -+extern void perfctr_cpu_release(const char *service); -+ -+/* PRE: state has no running interrupt-mode counters. -+ Check that the new control data is valid. -+ Update the driver's private control data. -+ is_global should be zero for per-process counters and non-zero -+ for global-mode counters. This matters for HT P4s, alas. -+ Returns a negative error code if the control data is invalid. */ -+extern int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global); -+ -+/* Read a-mode counters. Subtract from start and accumulate into sums. -+ Must be called with preemption disabled. */ -+extern void perfctr_cpu_suspend(struct perfctr_cpu_state *state); -+ -+/* Write control registers. Read a-mode counters into start. -+ Must be called with preemption disabled. */ -+extern void perfctr_cpu_resume(struct perfctr_cpu_state *state); -+ -+/* Perform an efficient combined suspend/resume operation. -+ Must be called with preemption disabled. */ -+extern void perfctr_cpu_sample(struct perfctr_cpu_state *state); -+ -+/* The type of a perfctr overflow interrupt handler. -+ It will be called in IRQ context, with preemption disabled. */ -+typedef void (*perfctr_ihandler_t)(unsigned long pc); -+ -+#if defined(CONFIG_X86_LOCAL_APIC) -+#define PERFCTR_INTERRUPT_SUPPORT 1 -+#endif -+ -+/* Operations related to overflow interrupt handling. */ -+#ifdef CONFIG_X86_LOCAL_APIC -+extern void perfctr_cpu_set_ihandler(perfctr_ihandler_t); -+extern void perfctr_cpu_ireload(struct perfctr_cpu_state*); -+extern unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state*); -+#else -+static inline void perfctr_cpu_set_ihandler(perfctr_ihandler_t x) { } -+#endif -+ -+#if defined(CONFIG_SMP) -+/* CPUs in `perfctr_cpus_forbidden_mask' must not use the -+ performance-monitoring counters. TSC use is unrestricted. -+ This is needed to prevent resource conflicts on hyper-threaded P4s. -+ The declaration of `perfctr_cpus_forbidden_mask' is in the driver's -+ private compat.h, since it needs to handle cpumask_t incompatibilities. */ -+#define PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED 1 -+#endif -+ -+#endif /* CONFIG_PERFCTR */ -+ -+#if defined(CONFIG_KPERFCTR) && defined(CONFIG_X86_LOCAL_APIC) -+asmlinkage void perfctr_interrupt(struct pt_regs*); -+#define perfctr_vector_init() \ -+ set_intr_gate(LOCAL_PERFCTR_VECTOR, perfctr_interrupt) -+#else -+#define perfctr_vector_init() do{}while(0) -+#endif -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* _ASM_I386_PERFCTR_H */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-ppc/processor.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-ppc/processor.h 2004-11-11 10:27:19.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-ppc/processor.h 2004-11-18 20:59:11.000000000 -0500 -@@ -119,6 +119,7 @@ - unsigned long vrsave; - int used_vr; /* set if process has used altivec */ - #endif /* CONFIG_ALTIVEC */ -+ struct vperfctr *perfctr; /* performance counters */ - }; - - #define ARCH_MIN_TASKALIGN 16 -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-ppc/perfctr.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-ppc/perfctr.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-ppc/perfctr.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,171 @@ -+/* $Id: perfctr.h,v 1.3.2.2 2004/06/21 22:38:30 mikpe Exp $ -+ * PPC32 Performance-Monitoring Counters driver -+ * -+ * Copyright (C) 2004 Mikael Pettersson -+ */ -+#ifndef _ASM_PPC_PERFCTR_H -+#define _ASM_PPC_PERFCTR_H -+ -+/* perfctr_info.cpu_type values */ -+#define PERFCTR_PPC_GENERIC 0 -+#define PERFCTR_PPC_604 1 -+#define PERFCTR_PPC_604e 2 -+#define PERFCTR_PPC_750 3 -+#define PERFCTR_PPC_7400 4 -+#define PERFCTR_PPC_7450 5 -+ -+struct perfctr_sum_ctrs { -+ unsigned long long tsc; -+ unsigned long long pmc[6]; -+}; -+ -+struct perfctr_cpu_control { -+ unsigned int tsc_on; -+ unsigned int nractrs; /* # of a-mode counters */ -+ unsigned int nrictrs; /* # of i-mode counters */ -+ unsigned int pmc_map[6]; -+ unsigned int evntsel[6]; /* one per counter, even on P5 */ -+ int ireset[6]; /* [0,0x7fffffff], for i-mode counters */ -+ struct { -+ unsigned int mmcr0; /* sans PMC{1,2}SEL */ -+ unsigned int mmcr2; /* only THRESHMULT */ -+ /* IABR/DABR/BAMR not supported */ -+ } ppc; -+ unsigned int _reserved1; -+ unsigned int _reserved2; -+ unsigned int _reserved3; -+ unsigned int _reserved4; -+}; -+ -+struct perfctr_cpu_state { -+ unsigned int cstatus; -+ struct { /* k1 is opaque in the user ABI */ -+ unsigned int id; -+ int isuspend_cpu; -+ } k1; -+ /* The two tsc fields must be inlined. Placing them in a -+ sub-struct causes unwanted internal padding on x86-64. */ -+ unsigned int tsc_start; -+ unsigned long long tsc_sum; -+ struct { -+ unsigned int map; -+ unsigned int start; -+ unsigned long long sum; -+ } pmc[6]; /* the size is not part of the user ABI */ -+#ifdef __KERNEL__ -+ unsigned int ppc_mmcr[3]; -+ struct perfctr_cpu_control control; -+#endif -+}; -+ -+/* cstatus is a re-encoding of control.tsc_on/nractrs/nrictrs -+ which should have less overhead in most cases */ -+/* XXX: ppc driver internally also uses cstatus&(1<<30) */ -+ -+static inline -+unsigned int perfctr_mk_cstatus(unsigned int tsc_on, unsigned int nractrs, -+ unsigned int nrictrs) -+{ -+ return (tsc_on<<31) | (nrictrs<<16) | ((nractrs+nrictrs)<<8) | nractrs; -+} -+ -+static inline unsigned int perfctr_cstatus_enabled(unsigned int cstatus) -+{ -+ return cstatus; -+} -+ -+static inline int perfctr_cstatus_has_tsc(unsigned int cstatus) -+{ -+ return (int)cstatus < 0; /* test and jump on sign */ -+} -+ -+static inline unsigned int perfctr_cstatus_nractrs(unsigned int cstatus) -+{ -+ return cstatus & 0x7F; /* and with imm8 */ -+} -+ -+static inline unsigned int perfctr_cstatus_nrctrs(unsigned int cstatus) -+{ -+ return (cstatus >> 8) & 0x7F; -+} -+ -+static inline unsigned int perfctr_cstatus_has_ictrs(unsigned int cstatus) -+{ -+ return cstatus & (0x7F << 16); -+} -+ -+/* -+ * 'struct siginfo' support for perfctr overflow signals. -+ * In unbuffered mode, si_code is set to SI_PMC_OVF and a bitmask -+ * describing which perfctrs overflowed is put in si_pmc_ovf_mask. -+ * A bitmask is used since more than one perfctr can have overflowed -+ * by the time the interrupt handler runs. -+ * -+ * glibc's <signal.h> doesn't seem to define __SI_FAULT or __SI_CODE(), -+ * and including <asm/siginfo.h> as well may cause redefinition errors, -+ * so the user and kernel values are different #defines here. -+ */ -+#ifdef __KERNEL__ -+#define SI_PMC_OVF (__SI_FAULT|'P') -+#else -+#define SI_PMC_OVF ('P') -+#endif -+#define si_pmc_ovf_mask _sifields._pad[0] /* XXX: use an unsigned field later */ -+ -+/* version number for user-visible CPU-specific data */ -+#define PERFCTR_CPU_VERSION 0 /* XXX: not yet cast in stone */ -+ -+#ifdef __KERNEL__ -+ -+#if defined(CONFIG_PERFCTR) || defined(CONFIG_PERFCTR_MODULE) -+ -+/* Driver init/exit. */ -+extern int perfctr_cpu_init(void); -+extern void perfctr_cpu_exit(void); -+ -+/* CPU type name. */ -+extern char *perfctr_cpu_name; -+ -+/* Hardware reservation. */ -+extern const char *perfctr_cpu_reserve(const char *service); -+extern void perfctr_cpu_release(const char *service); -+ -+/* PRE: state has no running interrupt-mode counters. -+ Check that the new control data is valid. -+ Update the driver's private control data. -+ Returns a negative error code if the control data is invalid. */ -+extern int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global); -+ -+/* Read a-mode counters. Subtract from start and accumulate into sums. -+ Must be called with preemption disabled. */ -+extern void perfctr_cpu_suspend(struct perfctr_cpu_state *state); -+ -+/* Write control registers. Read a-mode counters into start. -+ Must be called with preemption disabled. */ -+extern void perfctr_cpu_resume(struct perfctr_cpu_state *state); -+ -+/* Perform an efficient combined suspend/resume operation. -+ Must be called with preemption disabled. */ -+extern void perfctr_cpu_sample(struct perfctr_cpu_state *state); -+ -+/* The type of a perfctr overflow interrupt handler. -+ It will be called in IRQ context, with preemption disabled. */ -+typedef void (*perfctr_ihandler_t)(unsigned long pc); -+ -+/* XXX: The hardware supports overflow interrupts, but the driver -+ does not yet enable this due to an erratum in 750/7400/7410. */ -+//#define PERFCTR_INTERRUPT_SUPPORT 1 -+ -+#ifdef PERFCTR_INTERRUPT_SUPPORT -+extern void perfctr_cpu_set_ihandler(perfctr_ihandler_t); -+extern void perfctr_cpu_ireload(struct perfctr_cpu_state*); -+extern unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state*); -+#else -+static inline void perfctr_cpu_set_ihandler(perfctr_ihandler_t x) { } -+#endif -+ -+#endif /* CONFIG_PERFCTR */ -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* _ASM_PPC_PERFCTR_H */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/apic.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/apic.h 2004-11-11 10:28:46.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/apic.h 2004-11-18 20:59:11.000000000 -0500 -@@ -96,6 +96,8 @@ - #define NMI_LOCAL_APIC 2 - #define NMI_INVALID 3 - -+extern unsigned int nmi_perfctr_msr; -+ - #endif /* CONFIG_X86_LOCAL_APIC */ - - #define esr_disable 0 -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/hw_irq.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/hw_irq.h 2004-11-11 10:28:31.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/hw_irq.h 2004-11-18 20:59:11.000000000 -0500 -@@ -65,14 +65,15 @@ - * sources per level' errata. - */ - #define LOCAL_TIMER_VECTOR 0xef -+#define LOCAL_PERFCTR_VECTOR 0xee - - /* -- * First APIC vector available to drivers: (vectors 0x30-0xee) -+ * First APIC vector available to drivers: (vectors 0x30-0xed) - * we start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) - */ - #define FIRST_DEVICE_VECTOR 0x31 --#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ -+#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ - - - #ifndef __ASSEMBLY__ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/irq.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/irq.h 2004-11-11 10:28:46.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/irq.h 2004-11-18 20:59:11.000000000 -0500 -@@ -29,7 +29,7 @@ - */ - #define NR_VECTORS 256 - --#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ -+#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ - - #ifdef CONFIG_PCI_USE_VECTOR - #define NR_IRQS FIRST_SYSTEM_VECTOR -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/processor.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/processor.h 2004-11-11 10:27:40.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/processor.h 2004-11-18 20:59:11.000000000 -0500 -@@ -258,6 +258,8 @@ - unsigned long *io_bitmap_ptr; - /* cached TLS descriptors. */ - u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; -+/* performance counters */ -+ struct vperfctr *perfctr; - }; - - #define INIT_THREAD {} -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/perfctr.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/perfctr.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/perfctr.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1 @@ -+#include <asm-i386/perfctr.h> -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/perfctr.h -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/linux/perfctr.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/perfctr.h 2004-11-18 20:59:11.000000000 -0500 -@@ -0,0 +1,246 @@ -+/* $Id: perfctr.h,v 1.69 2004/02/20 21:31:02 mikpe Exp $ -+ * Performance-Monitoring Counters driver -+ * -+ * Copyright (C) 1999-2004 Mikael Pettersson -+ */ -+#ifndef _LINUX_PERFCTR_H -+#define _LINUX_PERFCTR_H -+ -+#ifdef CONFIG_KPERFCTR /* don't break archs without <asm/perfctr.h> */ -+ -+#include <asm/perfctr.h> -+ -+struct perfctr_info { -+ unsigned int abi_version; -+ char driver_version[32]; -+ unsigned int cpu_type; -+ unsigned int cpu_features; -+ unsigned int cpu_khz; -+ unsigned int tsc_to_cpu_mult; -+ unsigned int _reserved2; -+ unsigned int _reserved3; -+ unsigned int _reserved4; -+}; -+ -+struct perfctr_cpu_mask { -+ unsigned int nrwords; -+ unsigned int mask[1]; /* actually 'nrwords' */ -+}; -+ -+/* abi_version values: Lower 16 bits contain the CPU data version, upper -+ 16 bits contain the API version. Each half has a major version in its -+ upper 8 bits, and a minor version in its lower 8 bits. */ -+#define PERFCTR_API_VERSION 0x0501 /* 5.1 */ -+#define PERFCTR_ABI_VERSION ((PERFCTR_API_VERSION<<16)|PERFCTR_CPU_VERSION) -+ -+/* cpu_type values */ -+#define PERFCTR_X86_GENERIC 0 /* any x86 with rdtsc */ -+#define PERFCTR_X86_INTEL_P5 1 /* no rdpmc */ -+#define PERFCTR_X86_INTEL_P5MMX 2 -+#define PERFCTR_X86_INTEL_P6 3 -+#define PERFCTR_X86_INTEL_PII 4 -+#define PERFCTR_X86_INTEL_PIII 5 -+#define PERFCTR_X86_CYRIX_MII 6 -+#define PERFCTR_X86_WINCHIP_C6 7 /* no rdtsc */ -+#define PERFCTR_X86_WINCHIP_2 8 /* no rdtsc */ -+#define PERFCTR_X86_AMD_K7 9 -+#define PERFCTR_X86_VIA_C3 10 /* no pmc0 */ -+#define PERFCTR_X86_INTEL_P4 11 /* model 0 and 1 */ -+#define PERFCTR_X86_INTEL_P4M2 12 /* model 2 */ -+#define PERFCTR_X86_AMD_K8 13 -+#define PERFCTR_X86_INTEL_PENTM 14 /* Pentium M */ -+#define PERFCTR_X86_AMD_K8C 15 /* Revision C */ -+#define PERFCTR_X86_INTEL_P4M3 16 /* model 3 and above */ -+ -+/* cpu_features flag bits */ -+#define PERFCTR_FEATURE_RDPMC 0x01 -+#define PERFCTR_FEATURE_RDTSC 0x02 -+#define PERFCTR_FEATURE_PCINT 0x04 -+ -+/* user's view of mmap:ed virtual perfctr */ -+struct vperfctr_state { -+ struct perfctr_cpu_state cpu_state; -+}; -+ -+/* parameter in VPERFCTR_CONTROL command */ -+struct vperfctr_control { -+ int si_signo; -+ struct perfctr_cpu_control cpu_control; -+ unsigned int preserve; -+ unsigned int _reserved1; -+ unsigned int _reserved2; -+ unsigned int _reserved3; -+ unsigned int _reserved4; -+}; -+ -+/* parameter in GPERFCTR_CONTROL command */ -+struct gperfctr_cpu_control { -+ unsigned int cpu; -+ struct perfctr_cpu_control cpu_control; -+ unsigned int _reserved1; -+ unsigned int _reserved2; -+ unsigned int _reserved3; -+ unsigned int _reserved4; -+}; -+ -+/* returned by GPERFCTR_READ command */ -+struct gperfctr_cpu_state { -+ unsigned int cpu; -+ struct perfctr_cpu_control cpu_control; -+ struct perfctr_sum_ctrs sum; -+ unsigned int _reserved1; -+ unsigned int _reserved2; -+ unsigned int _reserved3; -+ unsigned int _reserved4; -+}; -+ -+/* buffer for encodings of most of the above structs */ -+struct perfctr_struct_buf { -+ unsigned int rdsize; -+ unsigned int wrsize; -+ unsigned int buffer[1]; /* actually 'max(rdsize,wrsize)' */ -+}; -+ -+#include <linux/ioctl.h> -+#define _PERFCTR_IOCTL 0xD0 /* 'P'+128, currently unassigned */ -+ -+#define PERFCTR_ABI _IOR(_PERFCTR_IOCTL,0,unsigned int) -+#define PERFCTR_INFO _IOR(_PERFCTR_IOCTL,1,struct perfctr_struct_buf) -+#define PERFCTR_CPUS _IOWR(_PERFCTR_IOCTL,2,struct perfctr_cpu_mask) -+#define PERFCTR_CPUS_FORBIDDEN _IOWR(_PERFCTR_IOCTL,3,struct perfctr_cpu_mask) -+#define VPERFCTR_CREAT _IO(_PERFCTR_IOCTL,6)/*int tid*/ -+#define VPERFCTR_OPEN _IO(_PERFCTR_IOCTL,7)/*int tid*/ -+ -+#define VPERFCTR_READ_SUM _IOR(_PERFCTR_IOCTL,8,struct perfctr_struct_buf) -+#define VPERFCTR_UNLINK _IO(_PERFCTR_IOCTL,9) -+#define VPERFCTR_CONTROL _IOW(_PERFCTR_IOCTL,10,struct perfctr_struct_buf) -+#define VPERFCTR_IRESUME _IO(_PERFCTR_IOCTL,11) -+#define VPERFCTR_READ_CONTROL _IOR(_PERFCTR_IOCTL,12,struct perfctr_struct_buf) -+ -+#define GPERFCTR_CONTROL _IOWR(_PERFCTR_IOCTL,16,struct perfctr_struct_buf) -+#define GPERFCTR_READ _IOWR(_PERFCTR_IOCTL,17,struct perfctr_struct_buf) -+#define GPERFCTR_STOP _IO(_PERFCTR_IOCTL,18) -+#define GPERFCTR_START _IO(_PERFCTR_IOCTL,19)/*unsigned int*/ -+ -+#ifdef __KERNEL__ -+extern struct perfctr_info perfctr_info; -+extern int sys_perfctr_abi(unsigned int*); -+extern int sys_perfctr_info(struct perfctr_struct_buf*); -+extern int sys_perfctr_cpus(struct perfctr_cpu_mask*); -+extern int sys_perfctr_cpus_forbidden(struct perfctr_cpu_mask*); -+#endif /* __KERNEL__ */ -+ -+#endif /* CONFIG_KPERFCTR */ -+ -+#ifdef __KERNEL__ -+ -+/* Needed for perfctr_set_cpus_allowed() prototype. */ -+#include <linux/version.h> -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) && !defined(HAVE_CPUMASK_T) -+typedef unsigned long cpumask_t; -+#define PERFCTR_HAVE_CPUMASK_T -+#endif -+ -+#ifdef CONFIG_PERFCTR_VIRTUAL -+ -+/* -+ * Virtual per-process performance-monitoring counters. -+ */ -+struct vperfctr; /* opaque */ -+ -+/* process management operations */ -+extern struct vperfctr *__vperfctr_copy(struct vperfctr*); -+extern void __vperfctr_exit(struct vperfctr*); -+extern void __vperfctr_suspend(struct vperfctr*); -+extern void __vperfctr_resume(struct vperfctr*); -+extern void __vperfctr_sample(struct vperfctr*); -+extern void __vperfctr_set_cpus_allowed(struct task_struct*, struct vperfctr*, cpumask_t); -+ -+#ifdef CONFIG_PERFCTR_MODULE -+extern struct vperfctr_stub { -+ struct module *owner; -+ void (*exit)(struct vperfctr*); -+ void (*suspend)(struct vperfctr*); -+ void (*resume)(struct vperfctr*); -+ void (*sample)(struct vperfctr*); -+#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED -+ void (*set_cpus_allowed)(struct task_struct*, struct vperfctr*, cpumask_t); -+#endif -+} vperfctr_stub; -+extern void _vperfctr_exit(struct vperfctr*); -+#define _vperfctr_suspend(x) vperfctr_stub.suspend((x)) -+#define _vperfctr_resume(x) vperfctr_stub.resume((x)) -+#define _vperfctr_sample(x) vperfctr_stub.sample((x)) -+#define _vperfctr_set_cpus_allowed(x,y,z) (*vperfctr_stub.set_cpus_allowed)((x),(y),(z)) -+#else /* !CONFIG_PERFCTR_MODULE */ -+#define _vperfctr_exit(x) __vperfctr_exit((x)) -+#define _vperfctr_suspend(x) __vperfctr_suspend((x)) -+#define _vperfctr_resume(x) __vperfctr_resume((x)) -+#define _vperfctr_sample(x) __vperfctr_sample((x)) -+#define _vperfctr_set_cpus_allowed(x,y,z) __vperfctr_set_cpus_allowed((x),(y),(z)) -+#endif /* CONFIG_PERFCTR_MODULE */ -+ -+static inline void perfctr_copy_thread(struct thread_struct *thread) -+{ -+ thread->perfctr = NULL; -+} -+ -+static inline void perfctr_exit_thread(struct thread_struct *thread) -+{ -+ struct vperfctr *perfctr; -+ perfctr = thread->perfctr; -+ if( perfctr ) -+ _vperfctr_exit(perfctr); -+} -+ -+static inline void perfctr_suspend_thread(struct thread_struct *prev) -+{ -+ struct vperfctr *perfctr; -+ perfctr = prev->perfctr; -+ if( perfctr ) -+ _vperfctr_suspend(perfctr); -+} -+ -+static inline void perfctr_resume_thread(struct thread_struct *next) -+{ -+ struct vperfctr *perfctr; -+ perfctr = next->perfctr; -+ if( perfctr ) -+ _vperfctr_resume(perfctr); -+} -+ -+static inline void perfctr_sample_thread(struct thread_struct *thread) -+{ -+ struct vperfctr *perfctr; -+ perfctr = thread->perfctr; -+ if( perfctr ) -+ _vperfctr_sample(perfctr); -+} -+ -+static inline void perfctr_set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) -+{ -+#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED -+ struct vperfctr *perfctr; -+ -+ task_lock(p); -+ perfctr = p->thread.perfctr; -+ if( perfctr ) -+ _vperfctr_set_cpus_allowed(p, perfctr, new_mask); -+ task_unlock(p); -+#endif -+} -+ -+#else /* !CONFIG_PERFCTR_VIRTUAL */ -+ -+static inline void perfctr_copy_thread(struct thread_struct *t) { } -+static inline void perfctr_exit_thread(struct thread_struct *t) { } -+static inline void perfctr_suspend_thread(struct thread_struct *t) { } -+static inline void perfctr_resume_thread(struct thread_struct *t) { } -+static inline void perfctr_sample_thread(struct thread_struct *t) { } -+static inline void perfctr_set_cpus_allowed(struct task_struct *p, cpumask_t m) { } -+ -+#endif /* CONFIG_PERFCTR_VIRTUAL */ -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* _LINUX_PERFCTR_H */ -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/sched.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/kernel/sched.c 2004-11-11 10:28:49.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/sched.c 2004-11-18 20:59:11.000000000 -0500 -@@ -39,6 +39,7 @@ - #include <linux/rcupdate.h> - #include <linux/cpu.h> - #include <linux/percpu.h> -+#include <linux/perfctr.h> - #include <linux/kthread.h> - #include <linux/cpuset.h> - #include <linux/stop_machine.h> -@@ -3304,6 +3305,8 @@ - migration_req_t req; - runqueue_t *rq; - -+ perfctr_set_cpus_allowed(p, new_mask); -+ - rq = task_rq_lock(p, &flags); - if (any_online_cpu(new_mask) == NR_CPUS) { - ret = -EINVAL; -Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/timer.c -=================================================================== ---- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/kernel/timer.c 2004-11-11 10:28:46.000000000 -0500 -+++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/timer.c 2004-11-18 20:59:11.000000000 -0500 -@@ -31,6 +31,7 @@ - #include <linux/time.h> - #include <linux/jiffies.h> - #include <linux/cpu.h> -+#include <linux/perfctr.h> - #include <linux/trigevent_hooks.h> - - #include <asm/uaccess.h> -@@ -844,6 +845,7 @@ - do_process_times(p, user, system); - do_it_virt(p, user); - do_it_prof(p); -+ perfctr_sample_thread(&p->thread); - } - - /* diff --git a/lustre/kernel_patches/patches/uml-export-end_iomem.patch b/lustre/kernel_patches/patches/uml-export-end_iomem.patch deleted file mode 100644 index ab8a84bc9a..0000000000 --- a/lustre/kernel_patches/patches/uml-export-end_iomem.patch +++ /dev/null @@ -1,12 +0,0 @@ -Index: linux-2.4.24-vanilla/arch/um/kernel/ksyms.c -=================================================================== ---- linux-2.4.24-vanilla.orig/arch/um/kernel/ksyms.c 2004-01-10 17:47:10.000000000 +0300 -+++ linux-2.4.24-vanilla/arch/um/kernel/ksyms.c 2004-01-10 18:22:30.000000000 +0300 -@@ -34,6 +34,7 @@ - EXPORT_SYMBOL(host_task_size); - EXPORT_SYMBOL(arch_validate); - EXPORT_SYMBOL(get_kmem_end); -+EXPORT_SYMBOL(end_iomem); - - EXPORT_SYMBOL(high_physmem); - EXPORT_SYMBOL(empty_zero_page); diff --git a/lustre/kernel_patches/patches/uml-exprt-clearuser-2.6.12.patch b/lustre/kernel_patches/patches/uml-exprt-clearuser-2.6.12.patch deleted file mode 100644 index 381b03f3b6..0000000000 --- a/lustre/kernel_patches/patches/uml-exprt-clearuser-2.6.12.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- uml-2.4.24/arch/um/kernel/tt/ksyms.c.orig 2005-05-04 13:59:58.806659456 +0300 -+++ uml-2.4.24/arch/um/kernel/tt/ksyms.c 2005-05-04 14:00:18.358687096 +0300 -@@ -12,6 +12,8 @@ - EXPORT_SYMBOL(__do_strncpy_from_user); - EXPORT_SYMBOL(__do_strnlen_user); - EXPORT_SYMBOL(__do_clear_user); -+EXPORT_SYMBOL(clear_user_tt); -+EXPORT_SYMBOL(clear_user_skas); - - EXPORT_SYMBOL(tracing_pid); - EXPORT_SYMBOL(honeypot); diff --git a/lustre/kernel_patches/patches/vfs_nointent-2.6-sles10.patch b/lustre/kernel_patches/patches/vfs_nointent-2.6-sles10.patch deleted file mode 100644 index aba3c8b3da..0000000000 --- a/lustre/kernel_patches/patches/vfs_nointent-2.6-sles10.patch +++ /dev/null @@ -1,453 +0,0 @@ -Index: linux-2.6.16.21-0.8/net/unix/af_unix.c -=================================================================== ---- linux-2.6.16.21-0.8.orig/net/unix/af_unix.c 2006-08-03 01:34:33.000000000 -0600 -+++ linux-2.6.16.21-0.8/net/unix/af_unix.c 2006-08-03 01:35:38.000000000 -0600 -@@ -673,6 +673,7 @@ - int err = 0; - - if (sunname->sun_path[0]) { -+ intent_init(&nd.intent, IT_LOOKUP); - err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); - if (err) - goto fail; -Index: linux-2.6.16.21-0.8/fs/open.c -=================================================================== ---- linux-2.6.16.21-0.8.orig/fs/open.c 2006-08-03 01:34:33.000000000 -0600 -+++ linux-2.6.16.21-0.8/fs/open.c 2006-08-03 02:54:31.000000000 -0600 -@@ -197,9 +197,10 @@ - } - - int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, -- struct file *filp) -+ struct file *filp, int called_from_open) - { - int err; -+ struct inode_operations *op = dentry->d_inode->i_op; - struct iattr newattrs; - - /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ -@@ -214,7 +215,17 @@ - } - - mutex_lock(&dentry->d_inode->i_mutex); -- err = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ newattrs.ia_ctime = CURRENT_TIME; -+ down_write(&dentry->d_inode->i_alloc_sem); -+ err = op->setattr_raw(dentry->d_inode, &newattrs); -+ up_write(&dentry->d_inode->i_alloc_sem); -+ } else -+ err = notify_change(dentry, &newattrs); -+ - mutex_unlock(&dentry->d_inode->i_mutex); - return err; - } -@@ -269,7 +280,7 @@ - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length, 0, NULL); -+ error = do_truncate(nd.dentry, length, 0, NULL, 0); - } - put_write_access(inode); - -@@ -321,7 +332,7 @@ - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length, 0, file); -+ error = do_truncate(dentry, length, 0, file, 0); - out_putf: - fput(file); - out: -@@ -406,9 +417,20 @@ - (error = vfs_permission(&nd, MAY_WRITE)) != 0) - goto dput_and_out; - } -- mutex_lock(&inode->i_mutex); -- error = notify_change(nd.dentry, &newattrs); -- mutex_unlock(&inode->i_mutex); -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } else { -+ mutex_lock(&inode->i_mutex); -+ error = notify_change(nd.dentry, &newattrs); -+ mutex_unlock(&inode->i_mutex); -+ } -+ - dput_and_out: - path_release(&nd); - out: -@@ -620,36 +642,52 @@ - return error; - } - -+int chmod_common(struct dentry *dentry, mode_t mode) -+{ -+ struct inode * inode = dentry->d_inode; -+ struct iattr newattrs; -+ int error = -EROFS; -+ -+ if (IS_RDONLY(inode)) -+ goto out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out; -+ } -+ -+ error = -EPERM; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ goto out; -+ -+ mutex_lock(&inode->i_mutex); -+ if (mode == (mode_t) -1) -+ mode = inode->i_mode; -+ newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ error = notify_change(dentry, &newattrs); -+ mutex_unlock(&inode->i_mutex); -+out: -+ return error; -+} -+ - asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) - { -- struct inode * inode; -- struct dentry * dentry; - struct file * file; - int err = -EBADF; -- struct iattr newattrs; - - file = fget(fd); - if (!file) - goto out; - -- dentry = file->f_dentry; -- inode = dentry->d_inode; -- -- err = -EROFS; -- if (IS_RDONLY(inode)) -- goto out_putf; -- err = -EPERM; -- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto out_putf; -- mutex_lock(&inode->i_mutex); -- if (mode == (mode_t) -1) -- mode = inode->i_mode; -- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); -- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -- err = notify_change(dentry, &newattrs); -- mutex_unlock(&inode->i_mutex); -- --out_putf: -+ err = chmod_common(file->f_dentry, mode); - fput(file); - out: - return err; -@@ -659,32 +697,12 @@ - mode_t mode) - { - struct nameidata nd; -- struct inode * inode; - int error; -- struct iattr newattrs; - - error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); - if (error) - goto out; -- inode = nd.dentry->d_inode; -- -- error = -EROFS; -- if (IS_RDONLY(inode)) -- goto dput_and_out; -- -- error = -EPERM; -- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -- goto dput_and_out; -- -- mutex_lock(&inode->i_mutex); -- if (mode == (mode_t) -1) -- mode = inode->i_mode; -- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); -- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -- error = notify_change(nd.dentry, &newattrs); -- mutex_unlock(&inode->i_mutex); -- --dput_and_out: -+ error = chmod_common(nd.dentry, mode); - path_release(&nd); - out: - return error; -@@ -710,6 +728,18 @@ - if (IS_RDONLY(inode)) - goto out; - error = -EPERM; -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; - newattrs.ia_valid = ATTR_CTIME; -Index: linux-2.6.16.21-0.8/fs/namei.c -=================================================================== ---- linux-2.6.16.21-0.8.orig/fs/namei.c 2006-08-03 01:34:33.000000000 -0600 -+++ linux-2.6.16.21-0.8/fs/namei.c 2006-08-03 02:54:31.000000000 -0600 -@@ -1637,7 +1637,7 @@ - if (!error) { - DQUOT_INIT(inode); - -- error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL); -+ error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL, 1); - } - put_write_access(inode); - if (error) -@@ -1911,6 +1911,7 @@ - char * tmp; - struct dentry * dentry; - struct nameidata nd; -+ intent_init(&nd.intent, IT_LOOKUP); - - if (S_ISDIR(mode)) - return -EPERM; -@@ -1921,6 +1922,15 @@ - error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -+ -+ if (nd.dentry->d_inode->i_op->mknod_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod_raw(&nd, mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ - dentry = lookup_create(&nd, 0); - error = PTR_ERR(dentry); - -@@ -1947,6 +1957,7 @@ - dput(dentry); - } - mutex_unlock(&nd.dentry->d_inode->i_mutex); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1992,9 +2003,18 @@ - struct dentry *dentry; - struct nameidata nd; - -+ intent_init(&nd.intent, IT_LOOKUP); - error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -+ if (nd.dentry->d_inode->i_op->mkdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir_raw(&nd, mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ - dentry = lookup_create(&nd, 1); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { -@@ -2004,6 +2024,7 @@ - dput(dentry); - } - mutex_unlock(&nd.dentry->d_inode->i_mutex); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -2084,6 +2105,7 @@ - char * name; - struct dentry *dentry; - struct nameidata nd; -+ intent_init(&nd.intent, IT_LOOKUP); - - name = getname(pathname); - if(IS_ERR(name)) -@@ -2104,6 +2126,14 @@ - error = -EBUSY; - goto exit1; - } -+ if (nd.dentry->d_inode->i_op->rmdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ error = op->rmdir_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - mutex_lock(&nd.dentry->d_inode->i_mutex); - dentry = lookup_hash(&nd); - error = PTR_ERR(dentry); -@@ -2167,6 +2197,7 @@ - struct dentry *dentry; - struct nameidata nd; - struct inode *inode = NULL; -+ intent_init(&nd.intent, IT_LOOKUP); - - name = getname(pathname); - if(IS_ERR(name)) -@@ -2178,6 +2209,13 @@ - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - mutex_lock(&nd.dentry->d_inode->i_mutex); - dentry = lookup_hash(&nd); - error = PTR_ERR(dentry); -@@ -2260,9 +2298,17 @@ - struct dentry *dentry; - struct nameidata nd; - -+ intent_init(&nd.intent, IT_LOOKUP); - error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); - if (error) - goto out; -+ if (nd.dentry->d_inode->i_op->symlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink_raw(&nd, from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } - dentry = lookup_create(&nd, 0); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { -@@ -2270,6 +2316,7 @@ - dput(dentry); - } - mutex_unlock(&nd.dentry->d_inode->i_mutex); -+out2: - path_release(&nd); - out: - putname(to); -@@ -2357,6 +2404,13 @@ - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -+ if (nd.dentry->d_inode->i_op->link_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link_raw(&old_nd, &nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } - new_dentry = lookup_create(&nd, 0); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { -@@ -2533,6 +2587,8 @@ - struct dentry * old_dentry, *new_dentry; - struct dentry * trap; - struct nameidata oldnd, newnd; -+ intent_init(&oldnd.intent, IT_LOOKUP); -+ intent_init(&newnd.intent, IT_LOOKUP); - - error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd); - if (error) -@@ -2555,6 +2611,13 @@ - if (newnd.last_type != LAST_NORM) - goto exit2; - -+ if (old_dir->d_inode->i_op->rename_raw) { -+ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit2; -+ } -+ - trap = lock_rename(new_dir, old_dir); - - old_dentry = lookup_hash(&oldnd); -@@ -2586,8 +2649,7 @@ - if (new_dentry == trap) - goto exit5; - -- error = vfs_rename(old_dir->d_inode, old_dentry, -- new_dir->d_inode, new_dentry); -+ error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); - exit5: - dput(new_dentry); - exit4: -Index: linux-2.6.16.21-0.8/fs/exec.c -=================================================================== ---- linux-2.6.16.21-0.8.orig/fs/exec.c 2006-08-03 01:34:33.000000000 -0600 -+++ linux-2.6.16.21-0.8/fs/exec.c 2006-08-03 01:35:38.000000000 -0600 -@@ -1524,7 +1524,7 @@ - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0, 0, file) != 0) -+ if (do_truncate(file->f_dentry, 0, 0, file, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); -Index: linux-2.6.16.21-0.8/include/linux/fs.h -=================================================================== ---- linux-2.6.16.21-0.8.orig/include/linux/fs.h 2006-08-03 01:34:33.000000000 -0600 -+++ linux-2.6.16.21-0.8/include/linux/fs.h 2006-08-03 01:35:38.000000000 -0600 -@@ -1041,13 +1041,20 @@ - int (*create) (struct inode *,struct dentry *,int, struct nameidata *); - struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link_raw) (struct nameidata *,struct nameidata *); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink_raw) (struct nameidata *); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink_raw) (struct nameidata *,const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir_raw) (struct nameidata *,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir_raw) (struct nameidata *); - int (*mknod) (struct inode *,struct dentry *,int,dev_t); -+ int (*mknod_raw) (struct nameidata *,int,dev_t); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename_raw) (struct nameidata *, struct nameidata *); - int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); -@@ -1357,7 +1364,7 @@ - /* fs/open.c */ - - extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, -- struct file *filp); -+ struct file *filp, int called_from_open); - extern long do_sys_open(int fdf, const char __user *filename, int flags, - int mode); - extern struct file *filp_open(const char *, int, int); -- GitLab