From 8e552c36d90c03d2cabf5373788998966751b609 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@debian.org>
Date: Mon, 12 May 2008 21:46:29 -0400
Subject: power_supply: add CHARGE_COUNTER property and olpc_battery support
 for it

This adds PROP_CHARGE_COUNTER to the power supply class (documenting it
as well).  The OLPC battery driver uses this for spitting out its ACR
values (in uAh).  We have some rounding errors (the data sheet claims
416.7, the math actually works out to 416.666667, so we're forced to
choose between overflows or precision loss.  I chose precision loss,
and stuck w/ data sheet values), but I don't think anyone will care
that much.

Signed-off-by: Andres Salomon <dilinger@debian.org>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 include/linux/power_supply.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 68ed19ccf1f7..ea96ead1d39d 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -78,6 +78,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_CHARGE_EMPTY,
 	POWER_SUPPLY_PROP_CHARGE_NOW,
 	POWER_SUPPLY_PROP_CHARGE_AVG,
+	POWER_SUPPLY_PROP_CHARGE_COUNTER,
 	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
 	POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN,
 	POWER_SUPPLY_PROP_ENERGY_FULL,
-- 
cgit v1.2.3


From a29ccf6f823a84d89e1c7aaaf221cf7282022024 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Tue, 3 Jun 2008 14:59:40 +0100
Subject: Make console charset translation optional

By turning off the new CONSOLE_TRANSLATIONS option and dropping the
associated code and tables from the kernel, we can save about 7KiB.

Taken from linux-tiny project by Tim Bird and mangled further by dwmw2.

Signed-off-by: Tim Bird <tim.bird@am.sony.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/char/Kconfig       |  8 ++++++++
 drivers/char/Makefile      |  4 ++--
 drivers/char/vt.c          |  2 +-
 include/linux/consolemap.h | 14 ++++++++++++++
 include/linux/vt_kern.h    | 19 +++++++++++++++++++
 5 files changed, 44 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 595a925c62a9..b7f7371dee73 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -36,6 +36,14 @@ config VT
 	  If unsure, say Y, or else you won't be able to do much with your new
 	  shiny Linux system :-)
 
+config CONSOLE_TRANSLATIONS
+	depends on VT
+	default y
+	bool "Enable character translations in console" if EMBEDDED
+	---help---
+	  This enables support for font mapping and Unicode translation
+	  on virtual consoles.
+
 config VT_CONSOLE
 	bool "Support for console on virtual terminal" if EMBEDDED
 	depends on VT
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 4c1c584e9eb6..6ef173cab144 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -12,8 +12,8 @@ obj-y	 += mem.o random.o tty_io.o n_tty.o tty_ioctl.o
 obj-$(CONFIG_LEGACY_PTYS)	+= pty.o
 obj-$(CONFIG_UNIX98_PTYS)	+= pty.o
 obj-y				+= misc.o
-obj-$(CONFIG_VT)		+= vt_ioctl.o vc_screen.o consolemap.o \
-				   consolemap_deftbl.o selection.o keyboard.o
+obj-$(CONFIG_VT)		+= vt_ioctl.o vc_screen.o selection.o keyboard.o
+obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o
 obj-$(CONFIG_HW_CONSOLE)	+= vt.o defkeymap.o
 obj-$(CONFIG_AUDIT)		+= tty_audit.o
 obj-$(CONFIG_MAGIC_SYSRQ)	+= sysrq.o
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index fa1ffbf2c621..18b7fb06dace 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -2208,7 +2208,7 @@ rescan_last_byte:
 			c = 0xfffd;
 		    tc = c;
 		} else {	/* no utf or alternate charset mode */
-		    tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
+		    tc = vc_translate(vc, c);
 		}
 
 		param.c = tc;
diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index e2bf7e5db39a..c4811da1338b 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -3,6 +3,9 @@
  *
  * Interface between console.c, selection.c  and consolemap.c
  */
+#ifndef __LINUX_CONSOLEMAP_H__
+#define __LINUX_CONSOLEMAP_H__
+
 #define LAT1_MAP 0
 #define GRAF_MAP 1
 #define IBMPC_MAP 2
@@ -10,6 +13,7 @@
 
 #include <linux/types.h>
 
+#ifdef CONFIG_CONSOLE_TRANSLATIONS
 struct vc_data;
 
 extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
@@ -18,3 +22,13 @@ extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
 extern u32 conv_8bit_to_uni(unsigned char c);
 extern int conv_uni_to_8bit(u32 uni);
 void console_map_init(void);
+#else
+#define inverse_translate(conp, glyph, uni) ((uint16_t)glyph)
+#define set_translate(m, vc) ((unsigned short *)NULL)
+#define conv_uni_to_pc(conp, ucs) ((int) (ucs > 0xff ? -1: ucs))
+#define conv_8bit_to_uni(c) ((uint32_t)(c))
+#define conv_uni_to_8bit(c) ((int) ((c) & 0xff))
+#define console_map_init(c) do { ; } while (0)
+#endif /* CONFIG_CONSOLE_TRANSLATIONS */
+
+#endif /* __LINUX_CONSOLEMAP_H__ */
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 9448ffbdcbf6..14c0e91be9b5 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -12,6 +12,7 @@
 #include <linux/mutex.h>
 #include <linux/console_struct.h>
 #include <linux/mm.h>
+#include <linux/consolemap.h>
 
 /*
  * Presently, a lot of graphics programs do not restore the contents of
@@ -54,6 +55,7 @@ void redraw_screen(struct vc_data *vc, int is_switch);
 struct tty_struct;
 int tioclinux(struct tty_struct *tty, unsigned long arg);
 
+#ifdef CONFIG_CONSOLE_TRANSLATIONS
 /* consolemap.c */
 
 struct unimapinit;
@@ -71,6 +73,23 @@ void con_free_unimap(struct vc_data *vc);
 void con_protect_unimap(struct vc_data *vc, int rdonly);
 int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
 
+#define vc_translate(vc, c) ((vc)->vc_translate[(c) |			\
+					(vc)->vc_toggle_meta ? 0x80 : 0])
+#else
+#define con_set_trans_old(arg) (0)
+#define con_get_trans_old(arg) (-EINVAL)
+#define con_set_trans_new(arg) (0)
+#define con_get_trans_new(arg) (-EINVAL)
+#define con_clear_unimap(vc, ui) (0)
+#define con_set_unimap(vc, ct, list) (0)
+#define con_set_default_unimap(vc) (0)
+#define con_copy_unimap(d, s) (0)
+#define con_get_unimap(vc, ct, uct, list) (-EINVAL)
+#define con_free_unimap(vc) do { ; } while (0)
+
+#define vc_translate(vc, c) (c)
+#endif
+
 /* vt.c */
 int vt_waitactive(int vt);
 void change_console(struct vc_data *new_vc);
-- 
cgit v1.2.3


From 59018b6d2acabb114ab58637e6ab95ba424a89d0 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 20 May 2008 01:03:52 +0300
Subject: MTD/JFFS2: remove CVS keywords

Once upon a time, the MTD repository was using CVS.

This patch therefore removes all usages of the no longer updated CVS
keywords from the MTD code.

This also includes code that printed them to the user.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 drivers/mtd/Kconfig                 | 2 --
 drivers/mtd/afs.c                   | 2 --
 drivers/mtd/chips/cfi_cmdset_0001.c | 2 --
 drivers/mtd/chips/cfi_cmdset_0002.c | 3 ---
 drivers/mtd/chips/cfi_cmdset_0020.c | 2 --
 drivers/mtd/chips/cfi_probe.c       | 1 -
 drivers/mtd/chips/cfi_util.c        | 3 ---
 drivers/mtd/chips/chipreg.c         | 2 --
 drivers/mtd/chips/gen_probe.c       | 1 -
 drivers/mtd/chips/jedec_probe.c     | 1 -
 drivers/mtd/chips/map_absent.c      | 1 -
 drivers/mtd/chips/map_ram.c         | 1 -
 drivers/mtd/chips/map_rom.c         | 1 -
 drivers/mtd/cmdlinepart.c           | 2 --
 drivers/mtd/devices/Kconfig         | 1 -
 drivers/mtd/devices/Makefile        | 1 -
 drivers/mtd/devices/block2mtd.c     | 6 ------
 drivers/mtd/devices/doc2000.c       | 2 --
 drivers/mtd/devices/doc2001.c       | 2 --
 drivers/mtd/devices/doc2001plus.c   | 2 --
 drivers/mtd/devices/docecc.c        | 2 --
 drivers/mtd/devices/docprobe.c      | 3 ---
 drivers/mtd/devices/lart.c          | 2 --
 drivers/mtd/devices/ms02-nv.c       | 2 --
 drivers/mtd/devices/ms02-nv.h       | 2 --
 drivers/mtd/devices/mtdram.c        | 1 -
 drivers/mtd/devices/phram.c         | 2 --
 drivers/mtd/devices/pmc551.c        | 2 --
 drivers/mtd/devices/slram.c         | 2 --
 drivers/mtd/ftl.c                   | 3 ---
 drivers/mtd/inftlcore.c             | 5 -----
 drivers/mtd/inftlmount.c            | 4 ----
 drivers/mtd/maps/Kconfig            | 1 -
 drivers/mtd/maps/Makefile           | 1 -
 drivers/mtd/maps/amd76xrom.c        | 1 -
 drivers/mtd/maps/autcpu12-nvram.c   | 2 --
 drivers/mtd/maps/bast-flash.c       | 2 --
 drivers/mtd/maps/cdb89712.c         | 1 -
 drivers/mtd/maps/ceiva.c            | 1 -
 drivers/mtd/maps/cfi_flagadm.c      | 2 --
 drivers/mtd/maps/dbox2-flash.c      | 2 --
 drivers/mtd/maps/dc21285.c          | 2 --
 drivers/mtd/maps/dilnetpc.c         | 2 --
 drivers/mtd/maps/dmv182.c           | 2 --
 drivers/mtd/maps/ebony.c            | 2 --
 drivers/mtd/maps/edb7312.c          | 2 --
 drivers/mtd/maps/fortunet.c         | 1 -
 drivers/mtd/maps/h720x-flash.c      | 2 --
 drivers/mtd/maps/ichxrom.c          | 1 -
 drivers/mtd/maps/impa7.c            | 2 --
 drivers/mtd/maps/integrator-flash.c | 2 --
 drivers/mtd/maps/ipaq-flash.c       | 2 --
 drivers/mtd/maps/ixp2000.c          | 2 --
 drivers/mtd/maps/ixp4xx.c           | 2 --
 drivers/mtd/maps/l440gx.c           | 2 --
 drivers/mtd/maps/map_funcs.c        | 2 --
 drivers/mtd/maps/mbx860.c           | 2 --
 drivers/mtd/maps/mtx-1_flash.c      | 2 --
 drivers/mtd/maps/netsc520.c         | 2 --
 drivers/mtd/maps/nettel.c           | 2 --
 drivers/mtd/maps/octagon-5066.c     | 1 -
 drivers/mtd/maps/omap-toto-flash.c  | 2 --
 drivers/mtd/maps/pci.c              | 2 --
 drivers/mtd/maps/pcmciamtd.c        | 5 +----
 drivers/mtd/maps/physmap.c          | 2 --
 drivers/mtd/maps/plat-ram.c         | 2 --
 drivers/mtd/maps/redwood.c          | 2 --
 drivers/mtd/maps/rpxlite.c          | 2 --
 drivers/mtd/maps/sa1100-flash.c     | 2 --
 drivers/mtd/maps/sbc8240.c          | 3 ---
 drivers/mtd/maps/sbc_gxx.c          | 2 --
 drivers/mtd/maps/sc520cdp.c         | 2 --
 drivers/mtd/maps/scb2_flash.c       | 1 -
 drivers/mtd/maps/scx200_docflash.c  | 2 --
 drivers/mtd/maps/sharpsl-flash.c    | 2 --
 drivers/mtd/maps/solutionengine.c   | 2 --
 drivers/mtd/maps/sun_uflash.c       | 2 +-
 drivers/mtd/maps/tqm8xxl.c          | 2 --
 drivers/mtd/maps/ts5500_flash.c     | 2 --
 drivers/mtd/maps/tsunami_flash.c    | 1 -
 drivers/mtd/maps/uclinux.c          | 2 --
 drivers/mtd/maps/vmax301.c          | 1 -
 drivers/mtd/maps/walnut.c           | 2 --
 drivers/mtd/maps/wr_sbc82xx_flash.c | 2 --
 drivers/mtd/mtd_blkdevs.c           | 2 --
 drivers/mtd/mtdblock.c              | 2 --
 drivers/mtd/mtdblock_ro.c           | 2 --
 drivers/mtd/mtdchar.c               | 2 --
 drivers/mtd/mtdconcat.c             | 2 --
 drivers/mtd/mtdcore.c               | 2 --
 drivers/mtd/mtdpart.c               | 2 --
 drivers/mtd/nand/Kconfig            | 1 -
 drivers/mtd/nand/Makefile           | 1 -
 drivers/mtd/nand/au1550nd.c         | 2 --
 drivers/mtd/nand/autcpu12.c         | 2 --
 drivers/mtd/nand/diskonchip.c       | 2 --
 drivers/mtd/nand/edb7312.c          | 2 --
 drivers/mtd/nand/h1910.c            | 2 --
 drivers/mtd/nand/nand_bbt.c         | 2 --
 drivers/mtd/nand/nand_ecc.c         | 2 --
 drivers/mtd/nand/nand_ids.c         | 2 --
 drivers/mtd/nand/nandsim.c          | 2 --
 drivers/mtd/nand/ppchameleonevb.c   | 2 --
 drivers/mtd/nand/rtc_from4.c        | 2 --
 drivers/mtd/nand/s3c2410.c          | 2 --
 drivers/mtd/nand/sharpsl.c          | 2 --
 drivers/mtd/nand/spia.c             | 2 --
 drivers/mtd/nand/toto.c             | 2 --
 drivers/mtd/nand/ts7250.c           | 2 --
 drivers/mtd/nftlcore.c              | 5 -----
 drivers/mtd/nftlmount.c             | 4 ----
 drivers/mtd/redboot.c               | 2 --
 drivers/mtd/rfd_ftl.c               | 2 --
 include/linux/jffs2.h               | 3 ---
 include/linux/mtd/blktrans.h        | 2 --
 include/linux/mtd/cfi.h             | 1 -
 include/linux/mtd/cfi_endian.h      | 5 -----
 include/linux/mtd/concat.h          | 2 --
 include/linux/mtd/doc2000.h         | 2 --
 include/linux/mtd/flashchip.h       | 3 ---
 include/linux/mtd/ftl.h             | 2 --
 include/linux/mtd/gen_probe.h       | 1 -
 include/linux/mtd/inftl.h           | 4 ----
 include/linux/mtd/map.h             | 1 -
 include/linux/mtd/mtd.h             | 2 --
 include/linux/mtd/nand.h            | 2 --
 include/linux/mtd/nand_ecc.h        | 2 --
 include/linux/mtd/nftl.h            | 2 --
 include/linux/mtd/partitions.h      | 2 --
 include/linux/mtd/physmap.h         | 2 --
 include/linux/mtd/plat-ram.h        | 2 --
 include/linux/mtd/pmc551.h          | 4 +---
 include/linux/mtd/xip.h             | 2 --
 include/mtd/inftl-user.h            | 2 --
 include/mtd/jffs2-user.h            | 2 --
 include/mtd/mtd-abi.h               | 2 --
 include/mtd/mtd-user.h              | 2 --
 include/mtd/nftl-user.h             | 2 --
 138 files changed, 3 insertions(+), 279 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index eed06d068fd1..14f11f8b9e5f 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -1,5 +1,3 @@
-# $Id: Kconfig,v 1.11 2005/11/07 11:14:19 gleixner Exp $
-
 menuconfig MTD
 	tristate "Memory Technology Device (MTD) support"
 	depends on HAS_IOMEM
diff --git a/drivers/mtd/afs.c b/drivers/mtd/afs.c
index 52d51eb91c16..d072ca5be689 100644
--- a/drivers/mtd/afs.c
+++ b/drivers/mtd/afs.c
@@ -21,8 +21,6 @@
    This is access code for flashes using ARM's flash partitioning
    standards.
 
-   $Id: afs.c,v 1.15 2005/11/07 11:14:19 gleixner Exp $
-
 ======================================================================*/
 
 #include <linux/module.h>
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index cc6b7bb6de02..324ff82a3cd9 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -4,8 +4,6 @@
  *
  * (C) 2000 Red Hat. GPL'd
  *
- * $Id: cfi_cmdset_0001.c,v 1.186 2005/11/23 22:07:52 nico Exp $
- *
  *
  * 10/10/2000	Nicolas Pitre <nico@cam.org>
  * 	- completely revamped method functions so they are aware and
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index f7fcc6389533..a972cc6be436 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -16,9 +16,6 @@
  * Occasionally maintained by Thayne Harbaugh tharbaugh at lnxi dot com
  *
  * This code is GPL
- *
- * $Id: cfi_cmdset_0002.c,v 1.122 2005/11/07 11:14:22 gleixner Exp $
- *
  */
 
 #include <linux/module.h>
diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 1b720cc571f3..d4714dd9f7ab 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -4,8 +4,6 @@
  *
  * (C) 2000 Red Hat. GPL'd
  *
- * $Id: cfi_cmdset_0020.c,v 1.22 2005/11/07 11:14:22 gleixner Exp $
- *
  * 10/10/2000	Nicolas Pitre <nico@cam.org>
  * 	- completely revamped method functions so they are aware and
  * 	  independent of the flash geometry (buswidth, interleave, etc.)
diff --git a/drivers/mtd/chips/cfi_probe.c b/drivers/mtd/chips/cfi_probe.c
index a4463a91ce31..c418e92e1d92 100644
--- a/drivers/mtd/chips/cfi_probe.c
+++ b/drivers/mtd/chips/cfi_probe.c
@@ -1,7 +1,6 @@
 /*
    Common Flash Interface probe code.
    (C) 2000 Red Hat. GPL'd.
-   $Id: cfi_probe.c,v 1.86 2005/11/29 14:48:31 gleixner Exp $
 */
 
 #include <linux/module.h>
diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c
index 72e0022a47bf..0ee457018016 100644
--- a/drivers/mtd/chips/cfi_util.c
+++ b/drivers/mtd/chips/cfi_util.c
@@ -6,9 +6,6 @@
  * Copyright (C) 2003 STMicroelectronics Limited
  *
  * This code is covered by the GPL.
- *
- * $Id: cfi_util.c,v 1.10 2005/11/07 11:14:23 gleixner Exp $
- *
  */
 
 #include <linux/module.h>
diff --git a/drivers/mtd/chips/chipreg.c b/drivers/mtd/chips/chipreg.c
index 2174c97549f0..c85760968227 100644
--- a/drivers/mtd/chips/chipreg.c
+++ b/drivers/mtd/chips/chipreg.c
@@ -1,6 +1,4 @@
 /*
- * $Id: chipreg.c,v 1.17 2004/11/16 18:29:00 dwmw2 Exp $
- *
  * Registration for chip drivers
  *
  */
diff --git a/drivers/mtd/chips/gen_probe.c b/drivers/mtd/chips/gen_probe.c
index d338b8c92780..e53a58ae384f 100644
--- a/drivers/mtd/chips/gen_probe.c
+++ b/drivers/mtd/chips/gen_probe.c
@@ -2,7 +2,6 @@
  * Routines common to all CFI-type probes.
  * (C) 2001-2003 Red Hat, Inc.
  * GPL'd
- * $Id: gen_probe.c,v 1.24 2005/11/07 11:14:23 gleixner Exp $
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index b229da8060dd..afb35e3a3cee 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -1,7 +1,6 @@
 /*
    Common Flash Interface probe code.
    (C) 2000 Red Hat. GPL'd.
-   $Id: jedec_probe.c,v 1.66 2005/11/07 11:14:23 gleixner Exp $
    See JEDEC (http://www.jedec.org/) standard JESD21C (section 3.5)
    for the standard this probe goes back to.
 
diff --git a/drivers/mtd/chips/map_absent.c b/drivers/mtd/chips/map_absent.c
index fc478c0f93f5..494d30d0631a 100644
--- a/drivers/mtd/chips/map_absent.c
+++ b/drivers/mtd/chips/map_absent.c
@@ -1,7 +1,6 @@
 /*
  * Common code to handle absent "placeholder" devices
  * Copyright 2001 Resilience Corporation <ebrower@resilience.com>
- * $Id: map_absent.c,v 1.6 2005/11/07 11:14:23 gleixner Exp $
  *
  * This map driver is used to allocate "placeholder" MTD
  * devices on systems that have socketed/removable media.
diff --git a/drivers/mtd/chips/map_ram.c b/drivers/mtd/chips/map_ram.c
index 5cb6d5263661..072dd8abf33a 100644
--- a/drivers/mtd/chips/map_ram.c
+++ b/drivers/mtd/chips/map_ram.c
@@ -1,7 +1,6 @@
 /*
  * Common code to handle map devices which are simple RAM
  * (C) 2000 Red Hat. GPL'd.
- * $Id: map_ram.c,v 1.22 2005/01/05 18:05:12 dwmw2 Exp $
  */
 
 #include <linux/module.h>
diff --git a/drivers/mtd/chips/map_rom.c b/drivers/mtd/chips/map_rom.c
index cb27f855074c..821d0ed6bae3 100644
--- a/drivers/mtd/chips/map_rom.c
+++ b/drivers/mtd/chips/map_rom.c
@@ -1,7 +1,6 @@
 /*
  * Common code to handle map devices which are simple ROM
  * (C) 2000 Red Hat. GPL'd.
- * $Id: map_rom.c,v 1.23 2005/01/05 18:05:12 dwmw2 Exp $
  */
 
 #include <linux/module.h>
diff --git a/drivers/mtd/cmdlinepart.c b/drivers/mtd/cmdlinepart.c
index e472a0e9de9d..68782ab2f0de 100644
--- a/drivers/mtd/cmdlinepart.c
+++ b/drivers/mtd/cmdlinepart.c
@@ -1,6 +1,4 @@
 /*
- * $Id: cmdlinepart.c,v 1.19 2005/11/07 11:14:19 gleixner Exp $
- *
  * Read flash partition table from command line
  *
  * Copyright 2002 SYSGO Real-Time Solutions GmbH
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 35ed1103dbb2..9c613f06623c 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -1,5 +1,4 @@
 # drivers/mtd/maps/Kconfig
-# $Id: Kconfig,v 1.18 2005/11/07 11:14:24 gleixner Exp $
 
 menu "Self-contained MTD device drivers"
 	depends on MTD!=n
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index 0f788d5c4bf8..0993d5cf3923 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -1,7 +1,6 @@
 #
 # linux/drivers/devices/Makefile
 #
-# $Id: Makefile.common,v 1.7 2004/12/22 17:51:15 joern Exp $
 
 obj-$(CONFIG_MTD_DOC2000)	+= doc2000.o
 obj-$(CONFIG_MTD_DOC2001)	+= doc2001.o
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 519d942e7940..303ea9b8cfe4 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -1,6 +1,4 @@
 /*
- * $Id: block2mtd.c,v 1.30 2005/11/29 14:48:32 gleixner Exp $
- *
  * block2mtd.c - create an mtd from a block device
  *
  * Copyright (C) 2001,2002	Simon Evans <spse@secret.org.uk>
@@ -20,9 +18,6 @@
 #include <linux/mutex.h>
 #include <linux/mount.h>
 
-#define VERSION "$Revision: 1.30 $"
-
-
 #define ERROR(fmt, args...) printk(KERN_ERR "block2mtd: " fmt "\n" , ## args)
 #define INFO(fmt, args...) printk(KERN_INFO "block2mtd: " fmt "\n" , ## args)
 
@@ -451,7 +446,6 @@ MODULE_PARM_DESC(block2mtd, "Device to use. \"block2mtd=<dev>[,<erasesize>]\"");
 static int __init block2mtd_init(void)
 {
 	int ret = 0;
-	INFO("version " VERSION);
 
 #ifndef MODULE
 	if (strlen(block2mtd_paramline))
diff --git a/drivers/mtd/devices/doc2000.c b/drivers/mtd/devices/doc2000.c
index 846989f292e3..50de839c77a9 100644
--- a/drivers/mtd/devices/doc2000.c
+++ b/drivers/mtd/devices/doc2000.c
@@ -3,8 +3,6 @@
  * Linux driver for Disk-On-Chip 2000 and Millennium
  * (c) 1999 Machine Vision Holdings, Inc.
  * (c) 1999, 2000 David Woodhouse <dwmw2@infradead.org>
- *
- * $Id: doc2000.c,v 1.67 2005/11/07 11:14:24 gleixner Exp $
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/mtd/devices/doc2001.c b/drivers/mtd/devices/doc2001.c
index 6413efc045e0..e32c568c1145 100644
--- a/drivers/mtd/devices/doc2001.c
+++ b/drivers/mtd/devices/doc2001.c
@@ -3,8 +3,6 @@
  * Linux driver for Disk-On-Chip Millennium
  * (c) 1999 Machine Vision Holdings, Inc.
  * (c) 1999, 2000 David Woodhouse <dwmw2@infradead.org>
- *
- * $Id: doc2001.c,v 1.49 2005/11/07 11:14:24 gleixner Exp $
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/mtd/devices/doc2001plus.c b/drivers/mtd/devices/doc2001plus.c
index 83be3461658f..d853f891b586 100644
--- a/drivers/mtd/devices/doc2001plus.c
+++ b/drivers/mtd/devices/doc2001plus.c
@@ -6,8 +6,6 @@
  * (c) 1999 Machine Vision Holdings, Inc.
  * (c) 1999, 2000 David Woodhouse <dwmw2@infradead.org>
  *
- * $Id: doc2001plus.c,v 1.14 2005/11/07 11:14:24 gleixner Exp $
- *
  * Released under GPL
  */
 
diff --git a/drivers/mtd/devices/docecc.c b/drivers/mtd/devices/docecc.c
index fd8a8daba3a8..874e51b110a2 100644
--- a/drivers/mtd/devices/docecc.c
+++ b/drivers/mtd/devices/docecc.c
@@ -7,8 +7,6 @@
  * Author: Fabrice Bellard (fabrice.bellard@netgem.com)
  * Copyright (C) 2000 Netgem S.A.
  *
- * $Id: docecc.c,v 1.7 2005/11/07 11:14:25 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
diff --git a/drivers/mtd/devices/docprobe.c b/drivers/mtd/devices/docprobe.c
index d8cc94ec4e50..6e5d811ae83a 100644
--- a/drivers/mtd/devices/docprobe.c
+++ b/drivers/mtd/devices/docprobe.c
@@ -4,9 +4,6 @@
 /* (C) 1999 Machine Vision Holdings, Inc.			*/
 /* (C) 1999-2003 David Woodhouse <dwmw2@infradead.org>		*/
 
-/* $Id: docprobe.c,v 1.46 2005/11/07 11:14:25 gleixner Exp $	*/
-
-
 
 /* DOC_PASSIVE_PROBE:
    In order to ensure that the BIOS checksum is correct at boot time, and
diff --git a/drivers/mtd/devices/lart.c b/drivers/mtd/devices/lart.c
index 1d324e5c412d..f4bda4cee495 100644
--- a/drivers/mtd/devices/lart.c
+++ b/drivers/mtd/devices/lart.c
@@ -2,8 +2,6 @@
 /*
  * MTD driver for the 28F160F3 Flash Memory (non-CFI) on LART.
  *
- * $Id: lart.c,v 1.9 2005/11/07 11:14:25 gleixner Exp $
- *
  * Author: Abraham vd Merwe <abraham@2d3d.co.za>
  *
  * Copyright (c) 2001, 2d3D, Inc.
diff --git a/drivers/mtd/devices/ms02-nv.c b/drivers/mtd/devices/ms02-nv.c
index 9cff119a2024..6a9a24a80a6d 100644
--- a/drivers/mtd/devices/ms02-nv.c
+++ b/drivers/mtd/devices/ms02-nv.c
@@ -5,8 +5,6 @@
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
- *
- *	$Id: ms02-nv.c,v 1.11 2005/11/14 13:41:47 macro Exp $
  */
 
 #include <linux/init.h>
diff --git a/drivers/mtd/devices/ms02-nv.h b/drivers/mtd/devices/ms02-nv.h
index 8a6eef7cfee3..04deafd3a771 100644
--- a/drivers/mtd/devices/ms02-nv.h
+++ b/drivers/mtd/devices/ms02-nv.h
@@ -9,8 +9,6 @@
  *	modify it under the terms of the GNU General Public License
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
- *
- *	$Id: ms02-nv.h,v 1.3 2003/08/19 09:25:36 dwmw2 Exp $
  */
 
 #include <linux/ioport.h>
diff --git a/drivers/mtd/devices/mtdram.c b/drivers/mtd/devices/mtdram.c
index 0399be178620..3aaca88847d3 100644
--- a/drivers/mtd/devices/mtdram.c
+++ b/drivers/mtd/devices/mtdram.c
@@ -1,6 +1,5 @@
 /*
  * mtdram - a test mtd device
- * $Id: mtdram.c,v 1.37 2005/04/21 03:42:11 joern Exp $
  * Author: Alexander Larsson <alex@cendio.se>
  *
  * Copyright (c) 1999 Alexander Larsson <alex@cendio.se>
diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c
index c7987b1c5e01..088fbb7595b5 100644
--- a/drivers/mtd/devices/phram.c
+++ b/drivers/mtd/devices/phram.c
@@ -1,6 +1,4 @@
 /**
- * $Id: phram.c,v 1.16 2005/11/07 11:14:25 gleixner Exp $
- *
  * Copyright (c) ????		Jochen Schäuble <psionic@psionic.de>
  * Copyright (c) 2003-2004	Joern Engel <joern@wh.fh-wedel.de>
  *
diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c
index bc9981749064..d38bca64bb15 100644
--- a/drivers/mtd/devices/pmc551.c
+++ b/drivers/mtd/devices/pmc551.c
@@ -1,6 +1,4 @@
 /*
- * $Id: pmc551.c,v 1.32 2005/11/07 11:14:25 gleixner Exp $
- *
  * PMC551 PCI Mezzanine Ram Device
  *
  * Author:
diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c
index cb86db746f28..a425d09f35a0 100644
--- a/drivers/mtd/devices/slram.c
+++ b/drivers/mtd/devices/slram.c
@@ -1,7 +1,5 @@
 /*======================================================================
 
-  $Id: slram.c,v 1.36 2005/11/07 11:14:25 gleixner Exp $
-
   This driver provides a method to access memory not used by the kernel
   itself (i.e. if the kernel commandline mem=xxx is used). To actually
   use slram at least mtdblock or mtdchar is required (for block or
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index 4a79b187b568..3fed8f94ac6f 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -1,5 +1,4 @@
 /* This version ported to the Linux-MTD system by dwmw2@infradead.org
- * $Id: ftl.c,v 1.59 2005/11/29 14:48:31 gleixner Exp $
  *
  * Fixes: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  * - fixes some leaks on failure in build_maps and ftl_notify_add, cleanups
@@ -1082,8 +1081,6 @@ static struct mtd_blktrans_ops ftl_tr = {
 
 static int init_ftl(void)
 {
-	DEBUG(0, "$Id: ftl.c,v 1.59 2005/11/29 14:48:31 gleixner Exp $\n");
-
 	return register_mtd_blktrans(&ftl_tr);
 }
 
diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index b0e396504e67..c4f9d3378b24 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -7,8 +7,6 @@
  * (c) 1999 Machine Vision Holdings, Inc.
  * Author: David Woodhouse <dwmw2@infradead.org>
  *
- * $Id: inftlcore.c,v 1.19 2005/11/07 11:14:20 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -953,9 +951,6 @@ static struct mtd_blktrans_ops inftl_tr = {
 
 static int __init init_inftl(void)
 {
-	printk(KERN_INFO "INFTL: inftlcore.c $Revision: 1.19 $, "
-		"inftlmount.c %s\n", inftlmountrev);
-
 	return register_mtd_blktrans(&inftl_tr);
 }
 
diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index c551d2f0779c..9113628ed1ef 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -8,8 +8,6 @@
  * Author: Fabrice Bellard (fabrice.bellard@netgem.com)
  * Copyright (C) 2000 Netgem S.A.
  *
- * $Id: inftlmount.c,v 1.18 2005/11/07 11:14:20 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -39,8 +37,6 @@
 #include <linux/mtd/inftl.h>
 #include <linux/mtd/compatmac.h>
 
-char inftlmountrev[]="$Revision: 1.18 $";
-
 /*
  * find_boot_record: Find the INFTL Media Header and its Spare copy which
  *	contains the various device information of the INFTL partition and
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 1bd69aa9e22a..b2c180245618 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -1,5 +1,4 @@
 # drivers/mtd/maps/Kconfig
-# $Id: Kconfig,v 1.61 2005/11/07 11:14:26 gleixner Exp $
 
 menu "Mapping drivers for chip access"
 	depends on MTD!=n
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index a9cbe80f99a0..5444eaf4026f 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -1,7 +1,6 @@
 #
 # linux/drivers/maps/Makefile
 #
-# $Id: Makefile.common,v 1.34 2005/11/07 11:14:26 gleixner Exp $
 
 ifeq ($(CONFIG_MTD_COMPLEX_MAPPINGS),y)
 obj-$(CONFIG_MTD)		+= map_funcs.o
diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index 728aed6ad722..948b86f35ef4 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c
@@ -2,7 +2,6 @@
  * amd76xrom.c
  *
  * Normal mappings of chips in physical memory
- * $Id: amd76xrom.c,v 1.21 2005/11/07 11:14:26 gleixner Exp $
  */
 
 #include <linux/module.h>
diff --git a/drivers/mtd/maps/autcpu12-nvram.c b/drivers/mtd/maps/autcpu12-nvram.c
index 7ed3424dd959..cf32267263df 100644
--- a/drivers/mtd/maps/autcpu12-nvram.c
+++ b/drivers/mtd/maps/autcpu12-nvram.c
@@ -2,8 +2,6 @@
  * NV-RAM memory access on autcpu12
  * (C) 2002 Thomas Gleixner (gleixner@autronix.de)
  *
- * $Id: autcpu12-nvram.c,v 1.9 2005/11/07 11:14:26 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
diff --git a/drivers/mtd/maps/bast-flash.c b/drivers/mtd/maps/bast-flash.c
index 1f492062f8ca..ca5414880341 100644
--- a/drivers/mtd/maps/bast-flash.c
+++ b/drivers/mtd/maps/bast-flash.c
@@ -9,8 +9,6 @@
  *	20-Sep-2004  BJD  Initial version
  *	17-Jan-2005  BJD  Add whole device if no partitions found
  *
- * $Id: bast-flash.c,v 1.5 2005/11/07 11:14:26 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
diff --git a/drivers/mtd/maps/cdb89712.c b/drivers/mtd/maps/cdb89712.c
index 9f17bb6c5a9d..cb507da0a87d 100644
--- a/drivers/mtd/maps/cdb89712.c
+++ b/drivers/mtd/maps/cdb89712.c
@@ -1,7 +1,6 @@
 /*
  * Flash on Cirrus CDB89712
  *
- * $Id: cdb89712.c,v 1.11 2005/11/07 11:14:26 gleixner Exp $
  */
 
 #include <linux/module.h>
diff --git a/drivers/mtd/maps/ceiva.c b/drivers/mtd/maps/ceiva.c
index 629e6e2641a8..6464d487eb1a 100644
--- a/drivers/mtd/maps/ceiva.c
+++ b/drivers/mtd/maps/ceiva.c
@@ -11,7 +11,6 @@
  *
  * (C) 2000 Nicolas Pitre <nico@cam.org>
  *
- * $Id: ceiva.c,v 1.11 2004/09/16 23:27:12 gleixner Exp $
  */
 
 #include <linux/module.h>
diff --git a/drivers/mtd/maps/cfi_flagadm.c b/drivers/mtd/maps/cfi_flagadm.c
index 65e5ee552010..0ecc3f6d735b 100644
--- a/drivers/mtd/maps/cfi_flagadm.c
+++ b/drivers/mtd/maps/cfi_flagadm.c
@@ -1,8 +1,6 @@
 /*
  *  Copyright © 2001 Flaga hf. Medical Devices, Kári Davíðsson <kd@flaga.is>
  *
- *  $Id: cfi_flagadm.c,v 1.15 2005/11/07 11:14:26 gleixner Exp $
- *
  *  This program is free software; you can redistribute  it and/or modify it
  *  under  the terms of  the GNU General  Public License as published by the
  *  Free Software Foundation;  either version 2 of the  License, or (at your
diff --git a/drivers/mtd/maps/dbox2-flash.c b/drivers/mtd/maps/dbox2-flash.c
index 92a9c7fac993..e115667bf1d0 100644
--- a/drivers/mtd/maps/dbox2-flash.c
+++ b/drivers/mtd/maps/dbox2-flash.c
@@ -1,6 +1,4 @@
 /*
- * $Id: dbox2-flash.c,v 1.14 2005/11/07 11:14:26 gleixner Exp $
- *
  * D-Box 2 flash driver
  */
 
diff --git a/drivers/mtd/maps/dc21285.c b/drivers/mtd/maps/dc21285.c
index b32bb9347d71..3aa018c092f8 100644
--- a/drivers/mtd/maps/dc21285.c
+++ b/drivers/mtd/maps/dc21285.c
@@ -4,8 +4,6 @@
  * (C) 2000  Nicolas Pitre <nico@cam.org>
  *
  * This code is GPL
- *
- * $Id: dc21285.c,v 1.24 2005/11/07 11:14:26 gleixner Exp $
  */
 #include <linux/module.h>
 #include <linux/types.h>
diff --git a/drivers/mtd/maps/dilnetpc.c b/drivers/mtd/maps/dilnetpc.c
index 1c3b34ad7325..0713e3a5a22c 100644
--- a/drivers/mtd/maps/dilnetpc.c
+++ b/drivers/mtd/maps/dilnetpc.c
@@ -14,8 +14,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  *
- * $Id: dilnetpc.c,v 1.20 2005/11/07 11:14:26 gleixner Exp $
- *
  * The DIL/Net PC is a tiny embedded PC board made by SSV Embedded Systems
  * featuring the AMD Elan SC410 processor. There are two variants of this
  * board: DNP/1486 and ADNP/1486. The DNP version has 2 megs of flash
diff --git a/drivers/mtd/maps/dmv182.c b/drivers/mtd/maps/dmv182.c
index e0558b0b2fe6..d171674eb2ed 100644
--- a/drivers/mtd/maps/dmv182.c
+++ b/drivers/mtd/maps/dmv182.c
@@ -4,8 +4,6 @@
  *
  * Flash map driver for the Dy4 SVME182 board
  *
- * $Id: dmv182.c,v 1.6 2005/11/07 11:14:26 gleixner Exp $
- *
  * Copyright 2003-2004, TimeSys Corporation
  *
  * Based on the SVME181 flash map, by Tom Nelson, Dot4, Inc. for TimeSys Corp.
diff --git a/drivers/mtd/maps/ebony.c b/drivers/mtd/maps/ebony.c
index 1488bb92f26f..d92b7c70d3ed 100644
--- a/drivers/mtd/maps/ebony.c
+++ b/drivers/mtd/maps/ebony.c
@@ -1,6 +1,4 @@
 /*
- * $Id: ebony.c,v 1.16 2005/11/07 11:14:26 gleixner Exp $
- *
  * Mapping for Ebony user flash
  *
  * Matt Porter <mporter@kernel.crashing.org>
diff --git a/drivers/mtd/maps/edb7312.c b/drivers/mtd/maps/edb7312.c
index 1c5b97c89685..9433738c1664 100644
--- a/drivers/mtd/maps/edb7312.c
+++ b/drivers/mtd/maps/edb7312.c
@@ -1,6 +1,4 @@
 /*
- * $Id: edb7312.c,v 1.14 2005/11/07 11:14:27 gleixner Exp $
- *
  * Handle mapping of the NOR flash on Cogent EDB7312 boards
  *
  * Copyright 2002 SYSGO Real-Time Solutions GmbH
diff --git a/drivers/mtd/maps/fortunet.c b/drivers/mtd/maps/fortunet.c
index 7c50c271651c..a8e3fde4cbd5 100644
--- a/drivers/mtd/maps/fortunet.c
+++ b/drivers/mtd/maps/fortunet.c
@@ -1,6 +1,5 @@
 /* fortunet.c memory map
  *
- * $Id: fortunet.c,v 1.11 2005/11/07 11:14:27 gleixner Exp $
  */
 
 #include <linux/module.h>
diff --git a/drivers/mtd/maps/h720x-flash.c b/drivers/mtd/maps/h720x-flash.c
index 6dde3182d64a..ef8915474462 100644
--- a/drivers/mtd/maps/h720x-flash.c
+++ b/drivers/mtd/maps/h720x-flash.c
@@ -2,8 +2,6 @@
  * Flash memory access on Hynix GMS30C7201/HMS30C7202 based
  * evaluation boards
  *
- * $Id: h720x-flash.c,v 1.12 2005/11/07 11:14:27 gleixner Exp $
- *
  * (C) 2002 Jungjun Kim <jungjun.kim@hynix.com>
  *     2003 Thomas Gleixner <tglx@linutronix.de>
  */
diff --git a/drivers/mtd/maps/ichxrom.c b/drivers/mtd/maps/ichxrom.c
index 2c884c49e84a..aeb6c916e23f 100644
--- a/drivers/mtd/maps/ichxrom.c
+++ b/drivers/mtd/maps/ichxrom.c
@@ -2,7 +2,6 @@
  * ichxrom.c
  *
  * Normal mappings of chips in physical memory
- * $Id: ichxrom.c,v 1.19 2005/11/07 11:14:27 gleixner Exp $
  */
 
 #include <linux/module.h>
diff --git a/drivers/mtd/maps/impa7.c b/drivers/mtd/maps/impa7.c
index a0b4dc7155dc..2682ab51a367 100644
--- a/drivers/mtd/maps/impa7.c
+++ b/drivers/mtd/maps/impa7.c
@@ -1,6 +1,4 @@
 /*
- * $Id: impa7.c,v 1.14 2005/11/07 11:14:27 gleixner Exp $
- *
  * Handle mapping of the NOR flash on implementa A7 boards
  *
  * Copyright 2002 SYSGO Real-Time Solutions GmbH
diff --git a/drivers/mtd/maps/integrator-flash.c b/drivers/mtd/maps/integrator-flash.c
index 325c8880c437..ee361aaadb1e 100644
--- a/drivers/mtd/maps/integrator-flash.c
+++ b/drivers/mtd/maps/integrator-flash.c
@@ -22,8 +22,6 @@
    This is access code for flashes using ARM's flash partitioning
    standards.
 
-   $Id: integrator-flash.c,v 1.20 2005/11/07 11:14:27 gleixner Exp $
-
 ======================================================================*/
 
 #include <linux/module.h>
diff --git a/drivers/mtd/maps/ipaq-flash.c b/drivers/mtd/maps/ipaq-flash.c
index f27c132794c3..a806119797e0 100644
--- a/drivers/mtd/maps/ipaq-flash.c
+++ b/drivers/mtd/maps/ipaq-flash.c
@@ -4,8 +4,6 @@
  * (C) 2000 Nicolas Pitre <nico@cam.org>
  * (C) 2002 Hewlett-Packard Company <jamey.hicks@hp.com>
  * (C) 2003 Christian Pellegrin <chri@ascensit.com>, <chri@infis.univ.ts.it>: concatenation of multiple flashes
- *
- * $Id: ipaq-flash.c,v 1.5 2005/11/07 11:14:27 gleixner Exp $
  */
 
 #include <linux/module.h>
diff --git a/drivers/mtd/maps/ixp2000.c b/drivers/mtd/maps/ixp2000.c
index c8396b8574c4..c2264792a20b 100644
--- a/drivers/mtd/maps/ixp2000.c
+++ b/drivers/mtd/maps/ixp2000.c
@@ -1,6 +1,4 @@
 /*
- * $Id: ixp2000.c,v 1.9 2005/11/07 11:14:27 gleixner Exp $
- *
  * drivers/mtd/maps/ixp2000.c
  *
  * Mapping for the Intel XScale IXP2000 based systems
diff --git a/drivers/mtd/maps/ixp4xx.c b/drivers/mtd/maps/ixp4xx.c
index 01f19a4714b5..9c7a5fbd4e51 100644
--- a/drivers/mtd/maps/ixp4xx.c
+++ b/drivers/mtd/maps/ixp4xx.c
@@ -1,6 +1,4 @@
 /*
- * $Id: ixp4xx.c,v 1.13 2005/11/16 16:23:21 dvrabel Exp $
- *
  * drivers/mtd/maps/ixp4xx.c
  *
  * MTD Map file for IXP4XX based systems. Please do not make per-board
diff --git a/drivers/mtd/maps/l440gx.c b/drivers/mtd/maps/l440gx.c
index 67620adf4811..9e054503c4cf 100644
--- a/drivers/mtd/maps/l440gx.c
+++ b/drivers/mtd/maps/l440gx.c
@@ -1,6 +1,4 @@
 /*
- * $Id: l440gx.c,v 1.18 2005/11/07 11:14:27 gleixner Exp $
- *
  * BIOS Flash chip on Intel 440GX board.
  *
  * Bugs this currently does not work under linuxBIOS.
diff --git a/drivers/mtd/maps/map_funcs.c b/drivers/mtd/maps/map_funcs.c
index 9105e6ca0aa6..3f268370eeca 100644
--- a/drivers/mtd/maps/map_funcs.c
+++ b/drivers/mtd/maps/map_funcs.c
@@ -1,6 +1,4 @@
 /*
- * $Id: map_funcs.c,v 1.10 2005/06/06 23:04:36 tpoynor Exp $
- *
  * Out-of-line map I/O functions for simple maps when CONFIG_COMPLEX_MAPPINGS
  * is enabled.
  */
diff --git a/drivers/mtd/maps/mbx860.c b/drivers/mtd/maps/mbx860.c
index 06b118727846..706f67394b07 100644
--- a/drivers/mtd/maps/mbx860.c
+++ b/drivers/mtd/maps/mbx860.c
@@ -1,6 +1,4 @@
 /*
- * $Id: mbx860.c,v 1.9 2005/11/07 11:14:27 gleixner Exp $
- *
  * Handle mapping of the flash on MBX860 boards
  *
  * Author:	Anton Todorov
diff --git a/drivers/mtd/maps/mtx-1_flash.c b/drivers/mtd/maps/mtx-1_flash.c
index 2a8fde9b92f0..a3b651904127 100644
--- a/drivers/mtd/maps/mtx-1_flash.c
+++ b/drivers/mtd/maps/mtx-1_flash.c
@@ -1,8 +1,6 @@
 /*
  * Flash memory access on 4G Systems MTX-1 boards
  *
- * $Id: mtx-1_flash.c,v 1.2 2005/11/07 11:14:27 gleixner Exp $
- *
  * (C) 2005 Bruno Randolf <bruno.randolf@4g-systems.biz>
  * (C) 2005 Joern Engel <joern@wohnheim.fh-wedel.de>
  *
diff --git a/drivers/mtd/maps/netsc520.c b/drivers/mtd/maps/netsc520.c
index 95dcab2146ad..c0cb319b2b70 100644
--- a/drivers/mtd/maps/netsc520.c
+++ b/drivers/mtd/maps/netsc520.c
@@ -3,8 +3,6 @@
  * Copyright (C) 2001 Mark Langsdorf (mark.langsdorf@amd.com)
  *	based on sc520cdp.c by Sysgo Real-Time Solutions GmbH
  *
- * $Id: netsc520.c,v 1.14 2005/11/07 11:14:27 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
diff --git a/drivers/mtd/maps/nettel.c b/drivers/mtd/maps/nettel.c
index 0c9b305a72e0..965e6c6d6ab0 100644
--- a/drivers/mtd/maps/nettel.c
+++ b/drivers/mtd/maps/nettel.c
@@ -5,8 +5,6 @@
  *
  *      (C) Copyright 2000-2001, Greg Ungerer (gerg@snapgear.com)
  *      (C) Copyright 2001-2002, SnapGear (www.snapgear.com)
- *
- *	$Id: nettel.c,v 1.12 2005/11/29 14:30:00 gleixner Exp $
  */
 
 /****************************************************************************/
diff --git a/drivers/mtd/maps/octagon-5066.c b/drivers/mtd/maps/octagon-5066.c
index a6642db3d325..43e04c1d22a9 100644
--- a/drivers/mtd/maps/octagon-5066.c
+++ b/drivers/mtd/maps/octagon-5066.c
@@ -1,4 +1,3 @@
-// $Id: octagon-5066.c,v 1.28 2005/11/07 11:14:27 gleixner Exp $
 /* ######################################################################
 
    Octagon 5066 MTD Driver.
diff --git a/drivers/mtd/maps/omap-toto-flash.c b/drivers/mtd/maps/omap-toto-flash.c
index e6e391efbeb6..0a60ebbc2175 100644
--- a/drivers/mtd/maps/omap-toto-flash.c
+++ b/drivers/mtd/maps/omap-toto-flash.c
@@ -4,8 +4,6 @@
  * jzhang@ti.com (C) 2003 Texas Instruments.
  *
  *  (C) 2002 MontVista Software, Inc.
- *
- * $Id: omap-toto-flash.c,v 1.5 2005/11/07 11:14:27 gleixner Exp $
  */
 
 #include <linux/module.h>
diff --git a/drivers/mtd/maps/pci.c b/drivers/mtd/maps/pci.c
index d2ab1bae9c34..5c6a25c90380 100644
--- a/drivers/mtd/maps/pci.c
+++ b/drivers/mtd/maps/pci.c
@@ -7,8 +7,6 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- *  $Id: pci.c,v 1.14 2005/11/17 08:20:27 dwmw2 Exp $
- *
  * Generic PCI memory map driver.  We support the following boards:
  *  - Intel IQ80310 ATU.
  *  - Intel EBSA285 (blank rom programming mode). Tested working 27/09/2001
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index 1912d968718b..8f7ca863f89d 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -1,6 +1,4 @@
 /*
- * $Id: pcmciamtd.c,v 1.55 2005/11/07 11:14:28 gleixner Exp $
- *
  * pcmciamtd.c - MTD driver for PCMCIA flash memory cards
  *
  * Author: Simon Evans <spse@secret.org.uk>
@@ -48,7 +46,6 @@ static const int debug = 0;
 
 
 #define DRIVER_DESC	"PCMCIA Flash memory card driver"
-#define DRIVER_VERSION	"$Revision: 1.55 $"
 
 /* Size of the PCMCIA address space: 26 bits = 64 MB */
 #define MAX_PCMCIA_ADDR	0x4000000
@@ -790,7 +787,7 @@ static struct pcmcia_driver pcmciamtd_driver = {
 
 static int __init init_pcmciamtd(void)
 {
-	info(DRIVER_DESC " " DRIVER_VERSION);
+	info(DRIVER_DESC);
 
 	if(bankwidth && bankwidth != 1 && bankwidth != 2) {
 		info("bad bankwidth (%d), using default", bankwidth);
diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 183255fcfdcb..1f6b9066b63e 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -1,6 +1,4 @@
 /*
- * $Id: physmap.c,v 1.39 2005/11/29 14:49:36 gleixner Exp $
- *
  * Normal mappings of chips in physical memory
  *
  * Copyright (C) 2003 MontaVista Software Inc.
diff --git a/drivers/mtd/maps/plat-ram.c b/drivers/mtd/maps/plat-ram.c
index 3eb2643b2328..e7dd9c8a965e 100644
--- a/drivers/mtd/maps/plat-ram.c
+++ b/drivers/mtd/maps/plat-ram.c
@@ -6,8 +6,6 @@
  *
  * Generic platfrom device based RAM map
  *
- * $Id: plat-ram.c,v 1.7 2005/11/07 11:14:28 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
diff --git a/drivers/mtd/maps/redwood.c b/drivers/mtd/maps/redwood.c
index 4d858b3d5f82..de002eb1a7fe 100644
--- a/drivers/mtd/maps/redwood.c
+++ b/drivers/mtd/maps/redwood.c
@@ -1,6 +1,4 @@
 /*
- * $Id: redwood.c,v 1.11 2005/11/07 11:14:28 gleixner Exp $
- *
  * drivers/mtd/maps/redwood.c
  *
  * FLASH map for the IBM Redwood 4/5/6 boards.
diff --git a/drivers/mtd/maps/rpxlite.c b/drivers/mtd/maps/rpxlite.c
index 809a0c8e7aaf..14d90edb4430 100644
--- a/drivers/mtd/maps/rpxlite.c
+++ b/drivers/mtd/maps/rpxlite.c
@@ -1,6 +1,4 @@
 /*
- * $Id: rpxlite.c,v 1.22 2004/11/04 13:24:15 gleixner Exp $
- *
  * Handle mapping of the flash on the RPX Lite and CLLF boards
  */
 
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index c7d5a52a2d55..e177a43dfff0 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -2,8 +2,6 @@
  * Flash memory access on SA11x0 based devices
  *
  * (C) 2000 Nicolas Pitre <nico@cam.org>
- *
- * $Id: sa1100-flash.c,v 1.51 2005/11/07 11:14:28 gleixner Exp $
  */
 #include <linux/module.h>
 #include <linux/types.h>
diff --git a/drivers/mtd/maps/sbc8240.c b/drivers/mtd/maps/sbc8240.c
index b8c1331b7a04..6e1e99cd2b59 100644
--- a/drivers/mtd/maps/sbc8240.c
+++ b/drivers/mtd/maps/sbc8240.c
@@ -4,9 +4,6 @@
  * Carolyn Smith, Tektronix, Inc.
  *
  * This code is GPLed
- *
- * $Id: sbc8240.c,v 1.5 2005/11/07 11:14:28 gleixner Exp $
- *
  */
 
 /*
diff --git a/drivers/mtd/maps/sbc_gxx.c b/drivers/mtd/maps/sbc_gxx.c
index 7cc4041d096d..1b1c0b7e11ef 100644
--- a/drivers/mtd/maps/sbc_gxx.c
+++ b/drivers/mtd/maps/sbc_gxx.c
@@ -17,8 +17,6 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
 
-   $Id: sbc_gxx.c,v 1.35 2005/11/07 11:14:28 gleixner Exp $
-
 The SBC-MediaGX / SBC-GXx has up to 16 MiB of
 Intel StrataFlash (28F320/28F640) in x8 mode.
 
diff --git a/drivers/mtd/maps/sc520cdp.c b/drivers/mtd/maps/sc520cdp.c
index 4045e372b90d..85c1e56309ec 100644
--- a/drivers/mtd/maps/sc520cdp.c
+++ b/drivers/mtd/maps/sc520cdp.c
@@ -16,8 +16,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  *
- * $Id: sc520cdp.c,v 1.23 2005/11/17 08:20:27 dwmw2 Exp $
- *
  *
  * The SC520CDP is an evaluation board for the Elan SC520 processor available
  * from AMD. It has two banks of 32-bit Flash ROM, each 8 Megabytes in size,
diff --git a/drivers/mtd/maps/scb2_flash.c b/drivers/mtd/maps/scb2_flash.c
index 0fc5584324e3..21169e6d646c 100644
--- a/drivers/mtd/maps/scb2_flash.c
+++ b/drivers/mtd/maps/scb2_flash.c
@@ -1,6 +1,5 @@
 /*
  * MTD map driver for BIOS Flash on Intel SCB2 boards
- * $Id: scb2_flash.c,v 1.12 2005/03/18 14:04:35 gleixner Exp $
  * Copyright (C) 2002 Sun Microsystems, Inc.
  * Tim Hockin <thockin@sun.com>
  *
diff --git a/drivers/mtd/maps/scx200_docflash.c b/drivers/mtd/maps/scx200_docflash.c
index 5e2bce22f37c..b5391ebb736e 100644
--- a/drivers/mtd/maps/scx200_docflash.c
+++ b/drivers/mtd/maps/scx200_docflash.c
@@ -2,8 +2,6 @@
 
    Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com>
 
-   $Id: scx200_docflash.c,v 1.12 2005/11/07 11:14:28 gleixner Exp $
-
    National Semiconductor SCx200 flash mapped with DOCCS
 */
 
diff --git a/drivers/mtd/maps/sharpsl-flash.c b/drivers/mtd/maps/sharpsl-flash.c
index 917dc778f24e..026eab028189 100644
--- a/drivers/mtd/maps/sharpsl-flash.c
+++ b/drivers/mtd/maps/sharpsl-flash.c
@@ -4,8 +4,6 @@
  * Copyright (C) 2001 Lineo Japan, Inc.
  * Copyright (C) 2002  SHARP
  *
- * $Id: sharpsl-flash.c,v 1.7 2005/11/07 11:14:28 gleixner Exp $
- *
  * based on rpxlite.c,v 1.15 2001/10/02 15:05:14 dwmw2 Exp
  *          Handle mapping of the flash on the RPX Lite and CLLF boards
  *
diff --git a/drivers/mtd/maps/solutionengine.c b/drivers/mtd/maps/solutionengine.c
index d76ceef453ce..0eb41d9c6786 100644
--- a/drivers/mtd/maps/solutionengine.c
+++ b/drivers/mtd/maps/solutionengine.c
@@ -1,6 +1,4 @@
 /*
- * $Id: solutionengine.c,v 1.15 2005/11/07 11:14:28 gleixner Exp $
- *
  * Flash and EPROM on Hitachi Solution Engine and similar boards.
  *
  * (C) 2001 Red Hat, Inc.
diff --git a/drivers/mtd/maps/sun_uflash.c b/drivers/mtd/maps/sun_uflash.c
index 001af7f7ddda..0d7c88396c88 100644
--- a/drivers/mtd/maps/sun_uflash.c
+++ b/drivers/mtd/maps/sun_uflash.c
@@ -1,4 +1,4 @@
-/* $Id: sun_uflash.c,v 1.13 2005/11/07 11:14:28 gleixner Exp $
+/*
  *
  * sun_uflash - Driver implementation for user-programmable flash
  * present on many Sun Microsystems SME boardsets.
diff --git a/drivers/mtd/maps/tqm8xxl.c b/drivers/mtd/maps/tqm8xxl.c
index 521734057314..a5d3d8531faa 100644
--- a/drivers/mtd/maps/tqm8xxl.c
+++ b/drivers/mtd/maps/tqm8xxl.c
@@ -2,8 +2,6 @@
  * Handle mapping of the flash memory access routines
  * on TQM8xxL based devices.
  *
- * $Id: tqm8xxl.c,v 1.15 2005/11/07 11:14:28 gleixner Exp $
- *
  * based on rpxlite.c
  *
  * Copyright(C) 2001 Kirk Lee <kirk@hpc.ee.ntu.edu.tw>
diff --git a/drivers/mtd/maps/ts5500_flash.c b/drivers/mtd/maps/ts5500_flash.c
index b47270e850bc..e2147bf11c88 100644
--- a/drivers/mtd/maps/ts5500_flash.c
+++ b/drivers/mtd/maps/ts5500_flash.c
@@ -22,8 +22,6 @@
  * - Drive A and B use the resident flash disk (RFD) flash translation layer.
  * - If you have created your own jffs file system and the bios overwrites
  *   it during boot, try disabling Drive A: and B: in the boot order.
- *
- * $Id: ts5500_flash.c,v 1.5 2005/11/07 11:14:28 gleixner Exp $
  */
 
 #include <linux/init.h>
diff --git a/drivers/mtd/maps/tsunami_flash.c b/drivers/mtd/maps/tsunami_flash.c
index 0f915ac3102e..77a8bfc02577 100644
--- a/drivers/mtd/maps/tsunami_flash.c
+++ b/drivers/mtd/maps/tsunami_flash.c
@@ -2,7 +2,6 @@
  * tsunami_flash.c
  *
  * flash chip on alpha ds10...
- * $Id: tsunami_flash.c,v 1.10 2005/11/07 11:14:29 gleixner Exp $
  */
 #include <asm/io.h>
 #include <asm/core_tsunami.h>
diff --git a/drivers/mtd/maps/uclinux.c b/drivers/mtd/maps/uclinux.c
index c42f4b83f686..bac000a88313 100644
--- a/drivers/mtd/maps/uclinux.c
+++ b/drivers/mtd/maps/uclinux.c
@@ -4,8 +4,6 @@
  *	uclinux.c -- generic memory mapped MTD driver for uclinux
  *
  *	(C) Copyright 2002, Greg Ungerer (gerg@snapgear.com)
- *
- * 	$Id: uclinux.c,v 1.12 2005/11/07 11:14:29 gleixner Exp $
  */
 
 /****************************************************************************/
diff --git a/drivers/mtd/maps/vmax301.c b/drivers/mtd/maps/vmax301.c
index b3e487395435..5a0c9a353b0f 100644
--- a/drivers/mtd/maps/vmax301.c
+++ b/drivers/mtd/maps/vmax301.c
@@ -1,4 +1,3 @@
-// $Id: vmax301.c,v 1.32 2005/11/07 11:14:29 gleixner Exp $
 /* ######################################################################
 
    Tempustech VMAX SBC301 MTD Driver.
diff --git a/drivers/mtd/maps/walnut.c b/drivers/mtd/maps/walnut.c
index ca932122fb64..e243476c8171 100644
--- a/drivers/mtd/maps/walnut.c
+++ b/drivers/mtd/maps/walnut.c
@@ -1,6 +1,4 @@
 /*
- * $Id: walnut.c,v 1.3 2005/11/07 11:14:29 gleixner Exp $
- *
  * Mapping for Walnut flash
  * (used ebony.c as a "framework")
  *
diff --git a/drivers/mtd/maps/wr_sbc82xx_flash.c b/drivers/mtd/maps/wr_sbc82xx_flash.c
index ac5b8105b6ef..413b0cf9bbd2 100644
--- a/drivers/mtd/maps/wr_sbc82xx_flash.c
+++ b/drivers/mtd/maps/wr_sbc82xx_flash.c
@@ -1,6 +1,4 @@
 /*
- * $Id: wr_sbc82xx_flash.c,v 1.8 2005/11/07 11:14:29 gleixner Exp $
- *
  * Map for flash chips on Wind River PowerQUICC II SBC82xx board.
  *
  * Copyright (C) 2004 Red Hat, Inc.
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 839eed8430a2..a0ada45672d8 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -1,6 +1,4 @@
 /*
- * $Id: mtd_blkdevs.c,v 1.27 2005/11/07 11:14:20 gleixner Exp $
- *
  * (C) 2003 David Woodhouse <dwmw2@infradead.org>
  *
  * Interface to Linux 2.5 block layer for MTD 'translation layers'.
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 952da30b1745..208c6faa0358 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -1,8 +1,6 @@
 /*
  * Direct MTD block device access
  *
- * $Id: mtdblock.c,v 1.68 2005/11/07 11:14:20 gleixner Exp $
- *
  * (C) 2000-2003 Nicolas Pitre <nico@cam.org>
  * (C) 1999-2003 David Woodhouse <dwmw2@infradead.org>
  */
diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c
index f79dbb49b1a2..852165f8b1c3 100644
--- a/drivers/mtd/mtdblock_ro.c
+++ b/drivers/mtd/mtdblock_ro.c
@@ -1,6 +1,4 @@
 /*
- * $Id: mtdblock_ro.c,v 1.19 2004/11/16 18:28:59 dwmw2 Exp $
- *
  * (C) 2003 David Woodhouse <dwmw2@infradead.org>
  *
  * Simple read-only (writable only for RAM) mtdblock driver
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 5d3ac512ce16..4b3156f9b36f 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1,6 +1,4 @@
 /*
- * $Id: mtdchar.c,v 1.76 2005/11/07 11:14:20 gleixner Exp $
- *
  * Character-device access to raw MTD devices.
  *
  */
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index d563dcd4b264..2972a5edb73d 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -6,8 +6,6 @@
  * NAND support by Christian Gan <cgan@iders.ca>
  *
  * This code is GPL
- *
- * $Id: mtdconcat.c,v 1.11 2005/11/07 11:14:20 gleixner Exp $
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 8c61035b968b..4373790401d3 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1,6 +1,4 @@
 /*
- * $Id: mtdcore.c,v 1.47 2005/11/07 11:14:20 gleixner Exp $
- *
  * Core registration and callback routines for MTD
  * drivers and users.
  *
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 07c701169344..11b803cc405b 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -5,8 +5,6 @@
  *
  * This code is GPL
  *
- * $Id: mtdpart.c,v 1.55 2005/11/07 11:14:20 gleixner Exp $
- *
  * 	02-21-2002	Thomas Gleixner <gleixner@autronix.de>
  *			added support for read_oob, write_oob
  */
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 5076faf9ca66..7dea6c3a6603 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -1,5 +1,4 @@
 # drivers/mtd/nand/Kconfig
-# $Id: Kconfig,v 1.35 2005/11/07 11:14:30 gleixner Exp $
 
 menuconfig MTD_NAND
 	tristate "NAND Device Support"
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index a6e74a46992a..d95a10c51866 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -1,7 +1,6 @@
 #
 # linux/drivers/nand/Makefile
 #
-# $Id: Makefile.common,v 1.15 2004/11/26 12:28:22 dedekind Exp $
 
 obj-$(CONFIG_MTD_NAND)			+= nand.o nand_ecc.o
 obj-$(CONFIG_MTD_NAND_IDS)		+= nand_ids.o
diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c
index 09e421a96893..22ad9f367760 100644
--- a/drivers/mtd/nand/au1550nd.c
+++ b/drivers/mtd/nand/au1550nd.c
@@ -3,8 +3,6 @@
  *
  *  Copyright (C) 2004 Embedded Edge, LLC
  *
- * $Id: au1550nd.c,v 1.13 2005/11/07 11:14:30 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/drivers/mtd/nand/autcpu12.c b/drivers/mtd/nand/autcpu12.c
index dd38011ee0b7..553dd7e9b41c 100644
--- a/drivers/mtd/nand/autcpu12.c
+++ b/drivers/mtd/nand/autcpu12.c
@@ -6,8 +6,6 @@
  *  Derived from drivers/mtd/spia.c
  *	 Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com)
  *
- * $Id: autcpu12.c,v 1.23 2005/11/07 11:14:30 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index 0e72153b3297..cd4393ce2562 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -15,8 +15,6 @@
  * converted to the generic Reed-Solomon library by Thomas Gleixner <tglx@linutronix.de>
  *
  * Interface to generic NAND code for M-Systems DiskOnChip devices
- *
- * $Id: diskonchip.c,v 1.55 2005/11/07 11:14:30 gleixner Exp $
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/mtd/nand/edb7312.c b/drivers/mtd/nand/edb7312.c
index ba67bbec20d3..387e4352903e 100644
--- a/drivers/mtd/nand/edb7312.c
+++ b/drivers/mtd/nand/edb7312.c
@@ -6,8 +6,6 @@
  *  Derived from drivers/mtd/nand/autcpu12.c
  *       Copyright (c) 2001 Thomas Gleixner (gleixner@autronix.de)
  *
- * $Id: edb7312.c,v 1.12 2005/11/07 11:14:30 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/drivers/mtd/nand/h1910.c b/drivers/mtd/nand/h1910.c
index 2d585d2d090c..9e59de501c2e 100644
--- a/drivers/mtd/nand/h1910.c
+++ b/drivers/mtd/nand/h1910.c
@@ -7,8 +7,6 @@
  *       Copyright (C) 2002 Marius Gröger (mag@sysgo.de)
  *       Copyright (c) 2001 Thomas Gleixner (gleixner@autronix.de)
  *
- * $Id: h1910.c,v 1.6 2005/11/07 11:14:30 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 5e121ceaa598..0b1c48595f12 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -6,8 +6,6 @@
  *
  *  Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de)
  *
- * $Id: nand_bbt.c,v 1.36 2005/11/07 11:14:30 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index 9003a135e050..918a806a8471 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -9,8 +9,6 @@
  *
  * Copyright (C) 2006 Thomas Gleixner <tglx@linutronix.de>
  *
- * $Id: nand_ecc.c,v 1.15 2005/11/07 11:14:30 gleixner Exp $
- *
  * This file is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 2 or (at your option) any
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index a3e3ab0185d5..69ee2c90eb0b 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -3,8 +3,6 @@
  *
  *  Copyright (C) 2002 Thomas Gleixner (tglx@linutronix.de)
  *
- * $Id: nand_ids.c,v 1.16 2005/11/07 11:14:31 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 68c150c8ff9d..add975a229a7 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -21,8 +21,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
- *
- * $Id: nandsim.c,v 1.8 2005/03/19 15:33:56 dedekind Exp $
  */
 
 #include <linux/init.h>
diff --git a/drivers/mtd/nand/ppchameleonevb.c b/drivers/mtd/nand/ppchameleonevb.c
index 082073acf20f..cc8658431851 100644
--- a/drivers/mtd/nand/ppchameleonevb.c
+++ b/drivers/mtd/nand/ppchameleonevb.c
@@ -6,8 +6,6 @@
  *  Derived from drivers/mtd/nand/edb7312.c
  *
  *
- * $Id: ppchameleonevb.c,v 1.7 2005/11/07 11:14:31 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/drivers/mtd/nand/rtc_from4.c b/drivers/mtd/nand/rtc_from4.c
index 26f88215bc47..a033c4cd8e16 100644
--- a/drivers/mtd/nand/rtc_from4.c
+++ b/drivers/mtd/nand/rtc_from4.c
@@ -6,8 +6,6 @@
  *  Derived from drivers/mtd/nand/spia.c
  *       Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com)
  *
- * $Id: rtc_from4.c,v 1.10 2005/11/07 11:14:31 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index b34a460ab679..91f42e485520 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -20,8 +20,6 @@
  *	20-Oct-2005  BJD  Fix timing calculation bug
  *	14-Jan-2006  BJD  Allow clock to be stopped when idle
  *
- * $Id: s3c2410.c,v 1.23 2006/04/01 18:06:29 bjd Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c
index 033f8800b1e6..6dba2fb66ae5 100644
--- a/drivers/mtd/nand/sharpsl.c
+++ b/drivers/mtd/nand/sharpsl.c
@@ -3,8 +3,6 @@
  *
  *  Copyright (C) 2004 Richard Purdie
  *
- *  $Id: sharpsl.c,v 1.7 2005/11/07 11:14:31 gleixner Exp $
- *
  *  Based on Sharp's NAND driver sharp_sl.c
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/mtd/nand/spia.c b/drivers/mtd/nand/spia.c
index 1f6d429b1583..0cc6d0acb8fe 100644
--- a/drivers/mtd/nand/spia.c
+++ b/drivers/mtd/nand/spia.c
@@ -8,8 +8,6 @@
  *			to controllines	(due to change in nand.c)
  *			page_cache added
  *
- * $Id: spia.c,v 1.25 2005/11/07 11:14:31 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/drivers/mtd/nand/toto.c b/drivers/mtd/nand/toto.c
index f9e2d4a0ab8c..bbf492e6830d 100644
--- a/drivers/mtd/nand/toto.c
+++ b/drivers/mtd/nand/toto.c
@@ -14,8 +14,6 @@
  *  Overview:
  *   This is a device driver for the NAND flash device found on the
  *   TI fido board. It supports 32MiB and 64MiB cards
- *
- * $Id: toto.c,v 1.5 2005/11/07 11:14:31 gleixner Exp $
  */
 
 #include <linux/slab.h>
diff --git a/drivers/mtd/nand/ts7250.c b/drivers/mtd/nand/ts7250.c
index f40081069ab2..807a72752eeb 100644
--- a/drivers/mtd/nand/ts7250.c
+++ b/drivers/mtd/nand/ts7250.c
@@ -9,8 +9,6 @@
  * Derived from drivers/mtd/nand/autcpu12.c
  *   Copyright (c) 2001 Thomas Gleixner (gleixner@autronix.de)
  *
- * $Id: ts7250.c,v 1.4 2004/12/30 22:02:07 joff Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 0c9ce19ea27a..320b929abe79 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -1,7 +1,6 @@
 /* Linux driver for NAND Flash Translation Layer      */
 /* (c) 1999 Machine Vision Holdings, Inc.             */
 /* Author: David Woodhouse <dwmw2@infradead.org>      */
-/* $Id: nftlcore.c,v 1.98 2005/11/07 11:14:21 gleixner Exp $ */
 
 /*
   The contents of this file are distributed under the GNU General
@@ -803,12 +802,8 @@ static struct mtd_blktrans_ops nftl_tr = {
 	.owner		= THIS_MODULE,
 };
 
-extern char nftlmountrev[];
-
 static int __init init_nftl(void)
 {
-	printk(KERN_INFO "NFTL driver: nftlcore.c $Revision: 1.98 $, nftlmount.c %s\n", nftlmountrev);
-
 	return register_mtd_blktrans(&nftl_tr);
 }
 
diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c
index 345e6eff89ce..ccc4f209fbb5 100644
--- a/drivers/mtd/nftlmount.c
+++ b/drivers/mtd/nftlmount.c
@@ -4,8 +4,6 @@
  * Author: Fabrice Bellard (fabrice.bellard@netgem.com)
  * Copyright (C) 2000 Netgem S.A.
  *
- * $Id: nftlmount.c,v 1.41 2005/11/07 11:14:21 gleixner Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -31,8 +29,6 @@
 
 #define SECTORSIZE 512
 
-char nftlmountrev[]="$Revision: 1.41 $";
-
 /* find_boot_record: Find the NFTL Media Header and its Spare copy which contains the
  *	various device information of the NFTL partition and Bad Unit Table. Update
  *	the ReplUnitTable[] table accroding to the Bad Unit Table. ReplUnitTable[]
diff --git a/drivers/mtd/redboot.c b/drivers/mtd/redboot.c
index 47474903263c..5afa268c02f1 100644
--- a/drivers/mtd/redboot.c
+++ b/drivers/mtd/redboot.c
@@ -1,6 +1,4 @@
 /*
- * $Id: redboot.c,v 1.21 2006/03/30 18:34:37 bjd Exp $
- *
  * Parse RedBoot-style Flash Image System (FIS) tables and
  * produce a Linux partition array to match.
  */
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index c84e45465499..e538c0a72abb 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -3,8 +3,6 @@
  *
  * Copyright (C) 2005  Sean Young <sean@mess.org>
  *
- * $Id: rfd_ftl.c,v 1.8 2006/01/15 12:51:44 sean Exp $
- *
  * This type of flash translation layer (FTL) is used by the Embedded BIOS
  * by General Software. It is known as the Resident Flash Disk (RFD), see:
  *
diff --git a/include/linux/jffs2.h b/include/linux/jffs2.h
index 6b563cae23df..da720bc3eb15 100644
--- a/include/linux/jffs2.h
+++ b/include/linux/jffs2.h
@@ -7,9 +7,6 @@
  *
  * For licensing information, see the file 'LICENCE' in the
  * jffs2 directory.
- *
- * $Id: jffs2.h,v 1.38 2005/09/26 11:37:23 havasi Exp $
- *
  */
 
 #ifndef __LINUX_JFFS2_H__
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index 9a6e2f953cba..310e61606415 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -1,6 +1,4 @@
 /*
- * $Id: blktrans.h,v 1.6 2005/11/07 11:14:54 gleixner Exp $
- *
  * (C) 2003 David Woodhouse <dwmw2@infradead.org>
  *
  * Interface to Linux block layer for MTD 'translation layers'.
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index b0ddf4b25862..d6fb115f5a07 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -1,7 +1,6 @@
 
 /* Common Flash Interface structures
  * See http://support.intel.com/design/flash/technote/index.htm
- * $Id: cfi.h,v 1.57 2005/11/15 23:28:17 tpoynor Exp $
  */
 
 #ifndef __MTD_CFI_H__
diff --git a/include/linux/mtd/cfi_endian.h b/include/linux/mtd/cfi_endian.h
index 25724f7d3867..d802f7736be3 100644
--- a/include/linux/mtd/cfi_endian.h
+++ b/include/linux/mtd/cfi_endian.h
@@ -1,8 +1,3 @@
-/*
- * $Id: cfi_endian.h,v 1.11 2002/01/30 23:20:48 awozniak Exp $
- *
- */
-
 #include <asm/byteorder.h>
 
 #ifndef CONFIG_MTD_CFI_ADV_OPTIONS
diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h
index ed8dc6755219..c02f3d264ecf 100644
--- a/include/linux/mtd/concat.h
+++ b/include/linux/mtd/concat.h
@@ -4,8 +4,6 @@
  * (C) 2002 Robert Kaiser <rkaiser@sysgo.de>
  *
  * This code is GPL
- *
- * $Id: concat.h,v 1.1 2002/03/08 16:34:36 rkaiser Exp $
  */
 
 #ifndef MTD_CONCAT_H
diff --git a/include/linux/mtd/doc2000.h b/include/linux/mtd/doc2000.h
index 9addd073bf15..0a6d516ab71d 100644
--- a/include/linux/mtd/doc2000.h
+++ b/include/linux/mtd/doc2000.h
@@ -6,8 +6,6 @@
  * Copyright (C) 2002-2003 Greg Ungerer <gerg@snapgear.com>
  * Copyright (C) 2002-2003 SnapGear Inc
  *
- * $Id: doc2000.h,v 1.25 2005/11/07 11:14:54 gleixner Exp $
- *
  * Released under GPL
  */
 
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index 39e7d2a1be9a..08dd131301c1 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -5,9 +5,6 @@
  * Contains information about the location and state of a given flash device
  *
  * (C) 2000 Red Hat. GPLd.
- *
- * $Id: flashchip.h,v 1.18 2005/11/07 11:14:54 gleixner Exp $
- *
  */
 
 #ifndef __MTD_FLASHCHIP_H__
diff --git a/include/linux/mtd/ftl.h b/include/linux/mtd/ftl.h
index d99609113307..0be442f881dd 100644
--- a/include/linux/mtd/ftl.h
+++ b/include/linux/mtd/ftl.h
@@ -1,6 +1,4 @@
 /*
- * $Id: ftl.h,v 1.7 2005/11/07 11:14:54 gleixner Exp $
- *
  * Derived from (and probably identical to):
  * ftl.h 1.7 1999/10/25 20:23:17
  *
diff --git a/include/linux/mtd/gen_probe.h b/include/linux/mtd/gen_probe.h
index 256e7342ed1e..df362ddf2949 100644
--- a/include/linux/mtd/gen_probe.h
+++ b/include/linux/mtd/gen_probe.h
@@ -1,7 +1,6 @@
 /*
  * (C) 2001, 2001 Red Hat, Inc.
  * GPL'd
- * $Id: gen_probe.h,v 1.4 2005/11/07 11:14:54 gleixner Exp $
  */
 
 #ifndef __LINUX_MTD_GEN_PROBE_H__
diff --git a/include/linux/mtd/inftl.h b/include/linux/mtd/inftl.h
index 85fd041d44ad..64ee53ce95a9 100644
--- a/include/linux/mtd/inftl.h
+++ b/include/linux/mtd/inftl.h
@@ -2,8 +2,6 @@
  *	inftl.h -- defines to support the Inverse NAND Flash Translation Layer
  *
  *	(C) Copyright 2002, Greg Ungerer (gerg@snapgear.com)
- *
- *	$Id: inftl.h,v 1.7 2005/06/13 13:08:45 sean Exp $
  */
 
 #ifndef __MTD_INFTL_H__
@@ -52,8 +50,6 @@ struct INFTLrecord {
 int INFTL_mount(struct INFTLrecord *s);
 int INFTL_formatblock(struct INFTLrecord *s, int block);
 
-extern char inftlmountrev[];
-
 void INFTL_dumptables(struct INFTLrecord *s);
 void INFTL_dumpVUchains(struct INFTLrecord *s);
 
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index a9fae032ba81..85e3939cf487 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -1,6 +1,5 @@
 
 /* Overhauled routines for dealing with different mmap regions of flash */
-/* $Id: map.h,v 1.54 2005/11/07 11:14:54 gleixner Exp $ */
 
 #ifndef __LINUX_MTD_MAP_H__
 #define __LINUX_MTD_MAP_H__
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 245f9098e171..31ed234b2a74 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -1,6 +1,4 @@
 /*
- * $Id: mtd.h,v 1.61 2005/11/07 11:14:54 gleixner Exp $
- *
  * Copyright (C) 1999-2003 David Woodhouse <dwmw2@infradead.org> et al.
  *
  * Released under GPL
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index c42bc7f533a5..1288be7b7740 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -5,8 +5,6 @@
  *                     Steven J. Hill <sjhill@realitydiluted.com>
  *		       Thomas Gleixner <tglx@linutronix.de>
  *
- * $Id: nand.h,v 1.74 2005/09/15 13:58:50 vwool Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h
index 12c5bc342ead..090da505425d 100644
--- a/include/linux/mtd/nand_ecc.h
+++ b/include/linux/mtd/nand_ecc.h
@@ -3,8 +3,6 @@
  *
  *  Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com)
  *
- * $Id: nand_ecc.h,v 1.4 2004/06/17 02:35:02 dbrown Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/include/linux/mtd/nftl.h b/include/linux/mtd/nftl.h
index 001eec50cac6..dcaf611ed748 100644
--- a/include/linux/mtd/nftl.h
+++ b/include/linux/mtd/nftl.h
@@ -1,6 +1,4 @@
 /*
- * $Id: nftl.h,v 1.16 2004/06/30 14:49:00 dbrown Exp $
- *
  * (C) 1999-2003 David Woodhouse <dwmw2@infradead.org>
  */
 
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 7c37d7e55abc..5014f7a9f5df 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -4,8 +4,6 @@
  * (C) 2000 Nicolas Pitre <nico@cam.org>
  *
  * This code is GPL
- *
- * $Id: partitions.h,v 1.17 2005/11/07 11:14:55 gleixner Exp $
  */
 
 #ifndef MTD_PARTITIONS_H
diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h
index 0dc07d5f3354..c8e63a5ee72e 100644
--- a/include/linux/mtd/physmap.h
+++ b/include/linux/mtd/physmap.h
@@ -2,8 +2,6 @@
  * For boards with physically mapped flash and using
  * drivers/mtd/maps/physmap.c mapping driver.
  *
- * $Id: physmap.h,v 1.4 2005/11/07 11:14:55 gleixner Exp $
- *
  * Copyright (C) 2003 MontaVista Software Inc.
  * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
  *
diff --git a/include/linux/mtd/plat-ram.h b/include/linux/mtd/plat-ram.h
index 0e37ad07bce2..e07890aff1cf 100644
--- a/include/linux/mtd/plat-ram.h
+++ b/include/linux/mtd/plat-ram.h
@@ -6,8 +6,6 @@
  *
  * Generic platform device based RAM map
  *
- * $Id: plat-ram.h,v 1.2 2005/01/24 00:37:40 bjd Exp $
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
diff --git a/include/linux/mtd/pmc551.h b/include/linux/mtd/pmc551.h
index 5cc070c24d88..27ad40aed19f 100644
--- a/include/linux/mtd/pmc551.h
+++ b/include/linux/mtd/pmc551.h
@@ -1,6 +1,4 @@
 /*
- * $Id: pmc551.h,v 1.6 2005/11/07 11:14:55 gleixner Exp $
- *
  * PMC551 PCI Mezzanine Ram Device
  *
  * Author:
@@ -17,7 +15,7 @@
 
 #include <linux/mtd/mtd.h>
 
-#define PMC551_VERSION "$Id: pmc551.h,v 1.6 2005/11/07 11:14:55 gleixner Exp $\n"\
+#define PMC551_VERSION \
        "Ramix PMC551 PCI Mezzanine Ram Driver. (C) 1999,2000 Nortel Networks.\n"
 
 /*
diff --git a/include/linux/mtd/xip.h b/include/linux/mtd/xip.h
index e9d40bdde48c..36efcba15ecd 100644
--- a/include/linux/mtd/xip.h
+++ b/include/linux/mtd/xip.h
@@ -11,8 +11,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * $Id: xip.h,v 1.5 2005/11/07 11:14:55 gleixner Exp $
  */
 
 #ifndef __LINUX_MTD_XIP_H__
diff --git a/include/mtd/inftl-user.h b/include/mtd/inftl-user.h
index 9b1e2526b45e..e17eda302b2d 100644
--- a/include/mtd/inftl-user.h
+++ b/include/mtd/inftl-user.h
@@ -1,6 +1,4 @@
 /*
- * $Id: inftl-user.h,v 1.2 2005/11/07 11:14:56 gleixner Exp $
- *
  * Parts of INFTL headers shared with userspace
  *
  */
diff --git a/include/mtd/jffs2-user.h b/include/mtd/jffs2-user.h
index d508ef0ae091..001685d7fa88 100644
--- a/include/mtd/jffs2-user.h
+++ b/include/mtd/jffs2-user.h
@@ -1,6 +1,4 @@
 /*
- * $Id: jffs2-user.h,v 1.1 2004/05/05 11:57:54 dwmw2 Exp $
- *
  * JFFS2 definitions for use in user space only
  */
 
diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h
index 615072c4da04..c6c61cd5a254 100644
--- a/include/mtd/mtd-abi.h
+++ b/include/mtd/mtd-abi.h
@@ -1,6 +1,4 @@
 /*
- * $Id: mtd-abi.h,v 1.13 2005/11/07 11:14:56 gleixner Exp $
- *
  * Portions of MTD ABI definition which are shared by kernel and user space
  */
 
diff --git a/include/mtd/mtd-user.h b/include/mtd/mtd-user.h
index 713f34d3e62e..170ceca3b2d0 100644
--- a/include/mtd/mtd-user.h
+++ b/include/mtd/mtd-user.h
@@ -1,6 +1,4 @@
 /*
- * $Id: mtd-user.h,v 1.2 2004/05/05 14:44:57 dwmw2 Exp $
- *
  * MTD ABI header for use by user space only.
  */
 
diff --git a/include/mtd/nftl-user.h b/include/mtd/nftl-user.h
index b2bca18e7311..390d21c080aa 100644
--- a/include/mtd/nftl-user.h
+++ b/include/mtd/nftl-user.h
@@ -1,6 +1,4 @@
 /*
- * $Id: nftl-user.h,v 1.2 2005/11/07 11:14:56 gleixner Exp $
- *
  * Parts of NFTL headers shared with userspace
  *
  */
-- 
cgit v1.2.3


From a822bea7962b500b0bcab41bf3500f7c40ae56b5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 6 Jun 2008 01:34:00 -0400
Subject: Input: serio - mark serio_register_driver() __must_check

Also remove extra declaration of serio_register_driver().

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/serio.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serio.h b/include/linux/serio.h
index e72716cca577..25641d9e0ea8 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -87,11 +87,10 @@ void serio_unregister_port(struct serio *serio);
 void serio_unregister_child_port(struct serio *serio);
 
 int __serio_register_driver(struct serio_driver *drv, struct module *owner, const char *mod_name);
-static inline int serio_register_driver(struct serio_driver *drv)
+static inline int __must_check serio_register_driver(struct serio_driver *drv)
 {
 	return __serio_register_driver(drv, THIS_MODULE, KBUILD_MODNAME);
 }
-int serio_register_driver(struct serio_driver *drv);
 void serio_unregister_driver(struct serio_driver *drv);
 
 static inline int serio_write(struct serio *serio, unsigned char data)
-- 
cgit v1.2.3


From d8e64406a037a64444175730294e449c9e21f5ec Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 23 Jul 2008 13:09:48 -0700
Subject: md: delay notification of 'active_idle' to the recovery thread

sysfs_notify might sleep, so do not call it from md_safemode_timeout.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/md/md.c           | 5 ++++-
 include/linux/raid/md_k.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index c2ff77ccec50..0f1b83096425 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3483,7 +3483,7 @@ static void md_safemode_timeout(unsigned long data)
 	if (!atomic_read(&mddev->writes_pending)) {
 		mddev->safemode = 1;
 		if (mddev->external)
-			sysfs_notify(&mddev->kobj, NULL, "array_state");
+			set_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags);
 	}
 	md_wakeup_thread(mddev->thread);
 }
@@ -6051,6 +6051,9 @@ void md_check_recovery(mddev_t *mddev)
 	if (mddev->bitmap)
 		bitmap_daemon_work(mddev->bitmap);
 
+	if (test_and_clear_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags))
+		sysfs_notify(&mddev->kobj, NULL, "array_state");
+
 	if (mddev->ro)
 		return;
 
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 9f2549ac0e2d..c200b9a34aff 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -128,6 +128,7 @@ struct mddev_s
 #define MD_CHANGE_DEVS	0	/* Some device status has changed */
 #define MD_CHANGE_CLEAN 1	/* transition to or from 'clean' */
 #define MD_CHANGE_PENDING 2	/* superblock update in progress */
+#define MD_NOTIFY_ARRAY_STATE 3	/* atomic context wants to notify userspace */
 
 	int				ro;
 
-- 
cgit v1.2.3


From a5bf6190417cbbf80443a9f71c65b653e13e9982 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 10 Jul 2008 18:38:33 +0300
Subject: UBI: add ubi_sync() interface

To flush MTD device caches.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/kapi.c  | 24 ++++++++++++++++++++++++
 include/linux/mtd/ubi.h |  1 +
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 51508832566d..e65c8e0bcd5d 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -632,3 +632,27 @@ int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum)
 	return vol->eba_tbl[lnum] >= 0;
 }
 EXPORT_SYMBOL_GPL(ubi_is_mapped);
+
+/**
+ * ubi_sync - synchronize UBI device buffers.
+ * @ubi_num: UBI device to synchronize
+ *
+ * The underlying MTD device may cache data in hardware or in software. This
+ * function ensures the caches are flushed. Returns zero in case of success and
+ * a negative error code in case of failure.
+ */
+int ubi_sync(int ubi_num)
+{
+	struct ubi_device *ubi;
+
+	ubi = ubi_get_device(ubi_num);
+	if (!ubi)
+		return -ENODEV;
+
+	if (ubi->mtd->sync)
+		ubi->mtd->sync(ubi->mtd);
+
+	ubi_put_device(ubi);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ubi_sync);
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index f71201d0f3e7..83302bbbddb4 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -152,6 +152,7 @@ int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum);
 int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum);
 int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
 int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum);
+int ubi_sync(int ubi_num);
 
 /*
  * This function is the same as the 'ubi_leb_read()' function, but it does not
-- 
cgit v1.2.3


From 85c6e6e28259e9b58b8984db536c45bc3161f40c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 16 Jul 2008 10:25:56 +0300
Subject: UBI: amend commentaries

Hch asked not to use "unit" for sub-systems, let it be so.
Also some other commentaries modifications.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/build.c     |  2 +-
 drivers/mtd/ubi/debug.h     |  6 +--
 drivers/mtd/ubi/eba.c       | 22 +++++------
 drivers/mtd/ubi/io.c        | 22 +++++------
 drivers/mtd/ubi/scan.c      | 28 +++++++-------
 drivers/mtd/ubi/scan.h      | 19 +++++----
 drivers/mtd/ubi/ubi-media.h | 23 +++++------
 drivers/mtd/ubi/ubi.h       | 37 +++++++++---------
 drivers/mtd/ubi/wl.c        | 94 ++++++++++++++++++++++-----------------------
 include/linux/mtd/ubi.h     |  4 +-
 10 files changed, 129 insertions(+), 128 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index a5b19944eca8..27271fe32e02 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -524,7 +524,7 @@ out_si:
 }
 
 /**
- * io_init - initialize I/O unit for a given UBI device.
+ * io_init - initialize I/O sub-system for a given UBI device.
  * @ubi: UBI device description object
  *
  * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index 8ea99d8c9e1f..7d8d77c31dfe 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -76,21 +76,21 @@ void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
 #endif /* CONFIG_MTD_UBI_DEBUG_MSG */
 
 #ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA
-/* Messages from the eraseblock association unit */
+/* Messages from the eraseblock association sub-system */
 #define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
 #else
 #define dbg_eba(fmt, ...) ({})
 #endif
 
 #ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL
-/* Messages from the wear-leveling unit */
+/* Messages from the wear-leveling sub-system */
 #define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
 #else
 #define dbg_wl(fmt, ...) ({})
 #endif
 
 #ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO
-/* Messages from the input/output unit */
+/* Messages from the input/output sub-system */
 #define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
 #else
 #define dbg_io(fmt, ...) ({})
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 8dc488fc0cdf..613cd1e51648 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -19,20 +19,20 @@
  */
 
 /*
- * The UBI Eraseblock Association (EBA) unit.
+ * The UBI Eraseblock Association (EBA) sub-system.
  *
- * This unit is responsible for I/O to/from logical eraseblock.
+ * This sub-system is responsible for I/O to/from logical eraseblock.
  *
  * Although in this implementation the EBA table is fully kept and managed in
  * RAM, which assumes poor scalability, it might be (partially) maintained on
  * flash in future implementations.
  *
- * The EBA unit implements per-logical eraseblock locking. Before accessing a
- * logical eraseblock it is locked for reading or writing. The per-logical
- * eraseblock locking is implemented by means of the lock tree. The lock tree
- * is an RB-tree which refers all the currently locked logical eraseblocks. The
- * lock tree elements are &struct ubi_ltree_entry objects. They are indexed by
- * (@vol_id, @lnum) pairs.
+ * The EBA sub-system implements per-logical eraseblock locking. Before
+ * accessing a logical eraseblock it is locked for reading or writing. The
+ * per-logical eraseblock locking is implemented by means of the lock tree. The
+ * lock tree is an RB-tree which refers all the currently locked logical
+ * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects.
+ * They are indexed by (@vol_id, @lnum) pairs.
  *
  * EBA also maintains the global sequence counter which is incremented each
  * time a logical eraseblock is mapped to a physical eraseblock and it is
@@ -1128,7 +1128,7 @@ out_unlock_leb:
 }
 
 /**
- * ubi_eba_init_scan - initialize the EBA unit using scanning information.
+ * ubi_eba_init_scan - initialize the EBA sub-system using scanning information.
  * @ubi: UBI device description object
  * @si: scanning information
  *
@@ -1143,7 +1143,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 	struct ubi_scan_leb *seb;
 	struct rb_node *rb;
 
-	dbg_eba("initialize EBA unit");
+	dbg_eba("initialize EBA sub-system");
 
 	spin_lock_init(&ubi->ltree_lock);
 	mutex_init(&ubi->alc_mutex);
@@ -1209,7 +1209,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 		ubi->rsvd_pebs  += ubi->beb_rsvd_pebs;
 	}
 
-	dbg_eba("EBA unit is initialized");
+	dbg_eba("EBA sub-system is initialized");
 	return 0;
 
 out_free:
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 4ac11df7b048..561e7b2f96cb 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -20,15 +20,15 @@
  */
 
 /*
- * UBI input/output unit.
+ * UBI input/output sub-system.
  *
- * This unit provides a uniform way to work with all kinds of the underlying
- * MTD devices. It also implements handy functions for reading and writing UBI
- * headers.
+ * This sub-system provides a uniform way to work with all kinds of the
+ * underlying MTD devices. It also implements handy functions for reading and
+ * writing UBI headers.
  *
  * We are trying to have a paranoid mindset and not to trust to what we read
- * from the flash media in order to be more secure and robust. So this unit
- * validates every single header it reads from the flash media.
+ * from the flash media in order to be more secure and robust. So this
+ * sub-system validates every single header it reads from the flash media.
  *
  * Some words about how the eraseblock headers are stored.
  *
@@ -79,11 +79,11 @@
  * 512-byte chunks, we have to allocate one more buffer and copy our VID header
  * to offset 448 of this buffer.
  *
- * The I/O unit does the following trick in order to avoid this extra copy.
- * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header
- * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the
- * VID header is being written out, it shifts the VID header pointer back and
- * writes the whole sub-page.
+ * The I/O sub-system does the following trick in order to avoid this extra
+ * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID
+ * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer.
+ * When the VID header is being written out, it shifts the VID header pointer
+ * back and writes the whole sub-page.
  */
 
 #include <linux/crc32.h>
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 96d410e106ab..892c2ba49777 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -19,9 +19,9 @@
  */
 
 /*
- * UBI scanning unit.
+ * UBI scanning sub-system.
  *
- * This unit is responsible for scanning the flash media, checking UBI
+ * This sub-system is responsible for scanning the flash media, checking UBI
  * headers and providing complete information about the UBI flash image.
  *
  * The scanning information is represented by a &struct ubi_scan_info' object.
@@ -103,7 +103,7 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
  * non-zero if an inconsistency was found and zero if not.
  *
  * Note, UBI does sanity check of everything it reads from the flash media.
- * Most of the checks are done in the I/O unit. Here we check that the
+ * Most of the checks are done in the I/O sub-system. Here we check that the
  * information in the VID header is consistent to the information in other VID
  * headers of the same volume.
  */
@@ -256,8 +256,8 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
 		 * that versions that are close to %0xFFFFFFFF are less then
 		 * versions that are close to %0.
 		 *
-		 * The UBI WL unit guarantees that the number of pending tasks
-		 * is not greater then %0x7FFFFFFF. So, if the difference
+		 * The UBI WL sub-system guarantees that the number of pending
+		 * tasks is not greater then %0x7FFFFFFF. So, if the difference
 		 * between any two versions is greater or equivalent to
 		 * %0x7FFFFFFF, there was an overflow and the logical
 		 * eraseblock with lower version is actually newer then the one
@@ -645,9 +645,9 @@ void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv)
  *
  * This function erases physical eraseblock 'pnum', and writes the erase
  * counter header to it. This function should only be used on UBI device
- * initialization stages, when the EBA unit had not been yet initialized. This
- * function returns zero in case of success and a negative error code in case
- * of failure.
+ * initialization stages, when the EBA sub-system had not been yet initialized.
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
  */
 int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si,
 		       int pnum, int ec)
@@ -687,9 +687,10 @@ out_free:
  * @si: scanning information
  *
  * This function returns a free physical eraseblock. It is supposed to be
- * called on the UBI initialization stages when the wear-leveling unit is not
- * initialized yet. This function picks a physical eraseblocks from one of the
- * lists, writes the EC header if it is needed, and removes it from the list.
+ * called on the UBI initialization stages when the wear-leveling sub-system is
+ * not initialized yet. This function picks a physical eraseblocks from one of
+ * the lists, writes the EC header if it is needed, and removes it from the
+ * list.
  *
  * This function returns scanning physical eraseblock information in case of
  * success and an error code in case of failure.
@@ -764,8 +765,9 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
 		return err;
 	else if (err) {
 		/*
-		 * FIXME: this is actually duty of the I/O unit to initialize
-		 * this, but MTD does not provide enough information.
+		 * FIXME: this is actually duty of the I/O sub-system to
+		 * initialize this, but MTD does not provide enough
+		 * information.
 		 */
 		si->bad_peb_count += 1;
 		return 0;
diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h
index 966b9b682a42..4e2e3cc0becd 100644
--- a/drivers/mtd/ubi/scan.h
+++ b/drivers/mtd/ubi/scan.h
@@ -59,16 +59,16 @@ struct ubi_scan_leb {
  * @leb_count: number of logical eraseblocks in this volume
  * @vol_type: volume type
  * @used_ebs: number of used logical eraseblocks in this volume (only for
- * static volumes)
+ *            static volumes)
  * @last_data_size: amount of data in the last logical eraseblock of this
- * volume (always equivalent to the usable logical eraseblock size in case of
- * dynamic volumes)
+ *                  volume (always equivalent to the usable logical eraseblock
+ *                  size in case of dynamic volumes)
  * @data_pad: how many bytes at the end of logical eraseblocks of this volume
- * are not used (due to volume alignment)
+ *            are not used (due to volume alignment)
  * @compat: compatibility flags of this volume
  * @rb: link in the volume RB-tree
  * @root: root of the RB-tree containing all the eraseblock belonging to this
- * volume (&struct ubi_scan_leb objects)
+ *        volume (&struct ubi_scan_leb objects)
  *
  * One object of this type is allocated for each volume during scanning.
  */
@@ -92,8 +92,8 @@ struct ubi_scan_volume {
  * @free: list of free physical eraseblocks
  * @erase: list of physical eraseblocks which have to be erased
  * @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
+ *         those belonging to "preserve"-compatible internal volumes)
  * @bad_peb_count: count of bad physical eraseblocks
- * those belonging to "preserve"-compatible internal volumes)
  * @vols_found: number of volumes found during scanning
  * @highest_vol_id: highest volume ID
  * @alien_peb_count: count of physical eraseblocks in the @alien list
@@ -106,8 +106,8 @@ struct ubi_scan_volume {
  * @ec_count: a temporary variable used when calculating @mean_ec
  *
  * This data structure contains the result of scanning and may be used by other
- * UBI units to build final UBI data structures, further error-recovery and so
- * on.
+ * UBI sub-systems to build final UBI data structures, further error-recovery
+ * and so on.
  */
 struct ubi_scan_info {
 	struct rb_root volumes;
@@ -132,8 +132,7 @@ struct ubi_device;
 struct ubi_vid_hdr;
 
 /*
- * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a
- * list.
+ * ubi_scan_move_to_list - move a PEB from the volume tree to a list.
  *
  * @sv: volume scanning information
  * @seb: scanning eraseblock infprmation
diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h
index c3185d9fd048..26bb7af9787a 100644
--- a/drivers/mtd/ubi/ubi-media.h
+++ b/drivers/mtd/ubi/ubi-media.h
@@ -98,10 +98,11 @@ enum {
  * Compatibility constants used by internal volumes.
  *
  * @UBI_COMPAT_DELETE: delete this internal volume before anything is written
- * to the flash
+ *                     to the flash
  * @UBI_COMPAT_RO: attach this device in read-only mode
  * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its
- * physical eraseblocks, don't allow the wear-leveling unit to move them
+ *                       physical eraseblocks, don't allow the wear-leveling
+ *                       sub-system to move them
  * @UBI_COMPAT_REJECT: reject this UBI image
  */
 enum {
@@ -123,7 +124,7 @@ enum {
  * struct ubi_ec_hdr - UBI erase counter header.
  * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC)
  * @version: version of UBI implementation which is supposed to accept this
- * UBI image
+ *           UBI image
  * @padding1: reserved for future, zeroes
  * @ec: the erase counter
  * @vid_hdr_offset: where the VID header starts
@@ -159,20 +160,20 @@ struct ubi_ec_hdr {
  * struct ubi_vid_hdr - on-flash UBI volume identifier header.
  * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC)
  * @version: UBI implementation version which is supposed to accept this UBI
- * image (%UBI_VERSION)
+ *           image (%UBI_VERSION)
  * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC)
  * @copy_flag: if this logical eraseblock was copied from another physical
- * eraseblock (for wear-leveling reasons)
+ *             eraseblock (for wear-leveling reasons)
  * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE,
- * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
+ *          %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
  * @vol_id: ID of this volume
  * @lnum: logical eraseblock number
  * @leb_ver: version of this logical eraseblock (IMPORTANT: obsolete, to be
- * removed, kept only for not breaking older UBI users)
+ *           removed, kept only for not breaking older UBI users)
  * @data_size: how many bytes of data this logical eraseblock contains
  * @used_ebs: total number of used logical eraseblocks in this volume
  * @data_pad: how many bytes at the end of this physical eraseblock are not
- * used
+ *            used
  * @data_crc: CRC checksum of the data stored in this logical eraseblock
  * @padding1: reserved for future, zeroes
  * @sqnum: sequence number
@@ -248,9 +249,9 @@ struct ubi_ec_hdr {
  * The @data_crc field contains the CRC checksum of the contents of the logical
  * eraseblock if this is a static volume. In case of dynamic volumes, it does
  * not contain the CRC checksum as a rule. The only exception is when the
- * data of the physical eraseblock was moved by the wear-leveling unit, then
- * the wear-leveling unit calculates the data CRC and stores it in the
- * @data_crc field. And of course, the @copy_flag is %in this case.
+ * data of the physical eraseblock was moved by the wear-leveling sub-system,
+ * then the wear-leveling sub-system calculates the data CRC and stores it in
+ * the @data_crc field. And of course, the @copy_flag is %in this case.
  *
  * The @data_size field is used only for static volumes because UBI has to know
  * how many bytes of data are stored in this eraseblock. For dynamic volumes,
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 940f6b7deec3..1fc32c863b78 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -74,15 +74,15 @@
 #define UBI_IO_RETRIES 3
 
 /*
- * Error codes returned by the I/O unit.
+ * Error codes returned by the I/O sub-system.
  *
  * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
- * 0xFF bytes
+ *                   %0xFF bytes
  * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
- * valid erase counter header, and the rest are %0xFF bytes
+ *                  valid erase counter header, and the rest are %0xFF bytes
  * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
  * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
- * CRC)
+ *                     CRC)
  * UBI_IO_BITFLIPS: bit-flips were detected and corrected
  */
 enum {
@@ -99,9 +99,9 @@ enum {
  * @ec: erase counter
  * @pnum: physical eraseblock number
  *
- * This data structure is used in the WL unit. Each physical eraseblock has a
- * corresponding &struct wl_entry object which may be kept in different
- * RB-trees. See WL unit for details.
+ * This data structure is used in the WL sub-system. Each physical eraseblock
+ * has a corresponding &struct wl_entry object which may be kept in different
+ * RB-trees. See WL sub-system for details.
  */
 struct ubi_wl_entry {
 	struct rb_node rb;
@@ -118,10 +118,10 @@ struct ubi_wl_entry {
  * @mutex: read/write mutex to implement read/write access serialization to
  *         the (@vol_id, @lnum) logical eraseblock
  *
- * This data structure is used in the EBA unit to implement per-LEB locking.
- * When a logical eraseblock is being locked - corresponding
+ * This data structure is used in the EBA sub-system to implement per-LEB
+ * locking. When a logical eraseblock is being locked - corresponding
  * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree).
- * See EBA unit for details.
+ * See EBA sub-system for details.
  */
 struct ubi_ltree_entry {
 	struct rb_node rb;
@@ -225,7 +225,7 @@ struct ubi_volume {
 #ifdef CONFIG_MTD_UBI_GLUEBI
 	/*
 	 * Gluebi-related stuff may be compiled out.
-	 * TODO: this should not be built into UBI but should be a separate
+	 * Note: this should not be built into UBI but should be a separate
 	 * ubimtd driver which works on top of UBI and emulates MTD devices.
 	 */
 	struct ubi_volume_desc *gluebi_desc;
@@ -235,8 +235,7 @@ struct ubi_volume {
 };
 
 /**
- * struct ubi_volume_desc - descriptor of the UBI volume returned when it is
- * opened.
+ * struct ubi_volume_desc - UBI volume descriptor returned when it is opened.
  * @vol: reference to the corresponding volume description object
  * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE)
  */
@@ -316,11 +315,11 @@ struct ubi_wl_entry;
  * @ro_mode: if the UBI device is in read-only mode
  * @leb_size: logical eraseblock size
  * @leb_start: starting offset of logical eraseblocks within physical
- * eraseblocks
+ *             eraseblocks
  * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size
  * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size
  * @vid_hdr_offset: starting offset of the volume identifier header (might be
- * unaligned)
+ *                  unaligned)
  * @vid_hdr_aloffset: starting offset of the VID header aligned to
  * @hdrs_min_io_size
  * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
@@ -356,16 +355,16 @@ struct ubi_device {
 	struct mutex volumes_mutex;
 
 	int max_ec;
-	/* TODO: mean_ec is not updated run-time, fix */
+	/* Note, mean_ec is not updated run-time - should be fixed */
 	int mean_ec;
 
-	/* EBA unit's stuff */
+	/* EBA sub-system's stuff */
 	unsigned long long global_sqnum;
 	spinlock_t ltree_lock;
 	struct rb_root ltree;
 	struct mutex alc_mutex;
 
-	/* Wear-leveling unit's stuff */
+	/* Wear-leveling sub-system's stuff */
 	struct rb_root used;
 	struct rb_root free;
 	struct rb_root scrub;
@@ -388,7 +387,7 @@ struct ubi_device {
 	int thread_enabled;
 	char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2];
 
-	/* I/O unit's stuff */
+	/* I/O sub-system's stuff */
 	long long flash_size;
 	int peb_count;
 	int peb_size;
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index cc8fe2934d2b..761952ba125b 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -19,22 +19,22 @@
  */
 
 /*
- * UBI wear-leveling unit.
+ * UBI wear-leveling sub-system.
  *
- * This unit is responsible for wear-leveling. It works in terms of physical
- * eraseblocks and erase counters and knows nothing about logical eraseblocks,
- * volumes, etc. From this unit's perspective all physical eraseblocks are of
- * two types - used and free. Used physical eraseblocks are those that were
- * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are
- * those that were put by the 'ubi_wl_put_peb()' function.
+ * This sub-system is responsible for wear-leveling. It works in terms of
+ * physical* eraseblocks and erase counters and knows nothing about logical
+ * eraseblocks, volumes, etc. From this sub-system's perspective all physical
+ * eraseblocks are of two types - used and free. Used physical eraseblocks are
+ * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
+ * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
  *
  * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
- * header. The rest of the physical eraseblock contains only 0xFF bytes.
+ * header. The rest of the physical eraseblock contains only %0xFF bytes.
  *
- * When physical eraseblocks are returned to the WL unit by means of the
+ * When physical eraseblocks are returned to the WL sub-system by means of the
  * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
  * done asynchronously in context of the per-UBI device background thread,
- * which is also managed by the WL unit.
+ * which is also managed by the WL sub-system.
  *
  * The wear-leveling is ensured by means of moving the contents of used
  * physical eraseblocks with low erase counter to free physical eraseblocks
@@ -43,34 +43,36 @@
  * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick
  * an "optimal" physical eraseblock. For example, when it is known that the
  * physical eraseblock will be "put" soon because it contains short-term data,
- * the WL unit may pick a free physical eraseblock with low erase counter, and
- * so forth.
+ * the WL sub-system may pick a free physical eraseblock with low erase
+ * counter, and so forth.
  *
- * If the WL unit fails to erase a physical eraseblock, it marks it as bad.
+ * If the WL sub-system fails to erase a physical eraseblock, it marks it as
+ * bad.
  *
- * This unit is also responsible for scrubbing. If a bit-flip is detected in a
- * physical eraseblock, it has to be moved. Technically this is the same as
- * moving it for wear-leveling reasons.
+ * This sub-system is also responsible for scrubbing. If a bit-flip is detected
+ * in a physical eraseblock, it has to be moved. Technically this is the same
+ * as moving it for wear-leveling reasons.
  *
- * As it was said, for the UBI unit all physical eraseblocks are either "free"
- * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used
- * eraseblocks are kept in a set of different RB-trees: @wl->used,
+ * As it was said, for the UBI sub-system all physical eraseblocks are either
+ * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
+ * used eraseblocks are kept in a set of different RB-trees: @wl->used,
  * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
  *
  * Note, in this implementation, we keep a small in-RAM object for each physical
  * eraseblock. This is surely not a scalable solution. But it appears to be good
  * enough for moderately large flashes and it is simple. In future, one may
- * re-work this unit and make it more scalable.
+ * re-work this sub-system and make it more scalable.
  *
- * At the moment this unit does not utilize the sequence number, which was
- * introduced relatively recently. But it would be wise to do this because the
- * sequence number of a logical eraseblock characterizes how old is it. For
+ * At the moment this sub-system does not utilize the sequence number, which
+ * was introduced relatively recently. But it would be wise to do this because
+ * the sequence number of a logical eraseblock characterizes how old is it. For
  * example, when we move a PEB with low erase counter, and we need to pick the
  * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
  * pick target PEB with an average EC if our PEB is not very "old". This is a
- * room for future re-works of the WL unit.
+ * room for future re-works of the WL sub-system.
  *
- * FIXME: looks too complex, should be simplified (later).
+ * Note: the stuff with protection trees looks too complex and is difficult to
+ * understand. Should be fixed.
  */
 
 #include <linux/slab.h>
@@ -92,20 +94,21 @@
 
 /*
  * Maximum difference between two erase counters. If this threshold is
- * exceeded, the WL unit starts moving data from used physical eraseblocks with
- * low erase counter to free physical eraseblocks with high erase counter.
+ * exceeded, the WL sub-system starts moving data from used physical
+ * eraseblocks with low erase counter to free physical eraseblocks with high
+ * erase counter.
  */
 #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
 
 /*
- * When a physical eraseblock is moved, the WL unit has to pick the target
+ * When a physical eraseblock is moved, the WL sub-system has to pick the target
  * physical eraseblock to move to. The simplest way would be just to pick the
  * one with the highest erase counter. But in certain workloads this could lead
  * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
  * situation when the picked physical eraseblock is constantly erased after the
  * data is written to it. So, we have a constant which limits the highest erase
- * counter of the free physical eraseblock to pick. Namely, the WL unit does
- * not pick eraseblocks with erase counter greater then the lowest erase
+ * counter of the free physical eraseblock to pick. Namely, the WL sub-system
+ * does not pick eraseblocks with erase counter greater then the lowest erase
  * counter plus %WL_FREE_MAX_DIFF.
  */
 #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
@@ -123,11 +126,11 @@
  * @abs_ec: the absolute erase counter value when the protection ends
  * @e: the wear-leveling entry of the physical eraseblock under protection
  *
- * When the WL unit returns a physical eraseblock, the physical eraseblock is
- * protected from being moved for some "time". For this reason, the physical
- * eraseblock is not directly moved from the @wl->free tree to the @wl->used
- * tree. There is one more tree in between where this physical eraseblock is
- * temporarily stored (@wl->prot).
+ * When the WL sub-system returns a physical eraseblock, the physical
+ * eraseblock is protected from being moved for some "time". For this reason,
+ * the physical eraseblock is not directly moved from the @wl->free tree to the
+ * @wl->used tree. There is one more tree in between where this physical
+ * eraseblock is temporarily stored (@wl->prot).
  *
  * All this protection stuff is needed because:
  *  o we don't want to move physical eraseblocks just after we have given them
@@ -175,7 +178,6 @@ struct ubi_wl_prot_entry {
  * @list: a link in the list of pending works
  * @func: worker function
  * @priv: private data of the worker function
- *
  * @e: physical eraseblock to erase
  * @torture: if the physical eraseblock has to be tortured
  *
@@ -1136,7 +1138,7 @@ out_ro:
 }
 
 /**
- * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit.
+ * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
  * @ubi: UBI device description object
  * @pnum: physical eraseblock to return
  * @torture: if this physical eraseblock has to be tortured
@@ -1175,11 +1177,11 @@ retry:
 		/*
 		 * User is putting the physical eraseblock which was selected
 		 * as the target the data is moved to. It may happen if the EBA
-		 * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but
-		 * the WL unit has not put the PEB to the "used" tree yet, but
-		 * it is about to do this. So we just set a flag which will
-		 * tell the WL worker that the PEB is not needed anymore and
-		 * should be scheduled for erasure.
+		 * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
+		 * but the WL sub-system has not put the PEB to the "used" tree
+		 * yet, but it is about to do this. So we just set a flag which
+		 * will tell the WL worker that the PEB is not needed anymore
+		 * and should be scheduled for erasure.
 		 */
 		dbg_wl("PEB %d is the target of data moving", pnum);
 		ubi_assert(!ubi->move_to_put);
@@ -1425,8 +1427,7 @@ static void cancel_pending(struct ubi_device *ubi)
 }
 
 /**
- * ubi_wl_init_scan - initialize the wear-leveling unit using scanning
- * information.
+ * ubi_wl_init_scan - initialize the WL sub-system using scanning information.
  * @ubi: UBI device description object
  * @si: scanning information
  *
@@ -1583,13 +1584,12 @@ static void protection_trees_destroy(struct ubi_device *ubi)
 }
 
 /**
- * ubi_wl_close - close the wear-leveling unit.
+ * ubi_wl_close - close the wear-leveling sub-system.
  * @ubi: UBI device description object
  */
 void ubi_wl_close(struct ubi_device *ubi)
 {
-	dbg_wl("close the UBI wear-leveling unit");
-
+	dbg_wl("close the WL sub-system");
 	cancel_pending(ubi);
 	protection_trees_destroy(ubi);
 	tree_destroy(&ubi->used);
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index 83302bbbddb4..6316fafe5c2a 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -45,13 +45,13 @@ enum {
  * @size: how many physical eraseblocks are reserved for this volume
  * @used_bytes: how many bytes of data this volume contains
  * @used_ebs: how many physical eraseblocks of this volume actually contain any
- * data
+ *            data
  * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME)
  * @corrupted: non-zero if the volume is corrupted (static volumes only)
  * @upd_marker: non-zero if the volume has update marker set
  * @alignment: volume alignment
  * @usable_leb_size: how many bytes are available in logical eraseblocks of
- * this volume
+ *                   this volume
  * @name_len: volume name length
  * @name: volume name
  * @cdev: UBI volume character device major and minor numbers
-- 
cgit v1.2.3


From 674bfc23c585b34c42263d73fb51710d49762a23 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:03 -0500
Subject: virtio: clarify that ABI is usable by any implementations

We want others to implement and use virtio, so it makes sense to BSD
license the non-__KERNEL__ parts of the headers to make this crystal
clear.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Mark McLoughlin <markmc@redhat.com>
Acked-by: Ryan Harper <ryanh@us.ibm.com>
Acked-by: Eric Van Hensbergen <ericvh@gmail.com>
Acked-by: Anthony Liguori <aliguori@us.ibm.com>
---
 include/linux/virtio_9p.h      | 2 ++
 include/linux/virtio_balloon.h | 2 ++
 include/linux/virtio_blk.h     | 2 ++
 include/linux/virtio_config.h  | 3 +++
 include/linux/virtio_console.h | 2 ++
 include/linux/virtio_net.h     | 2 ++
 include/linux/virtio_pci.h     | 5 ++---
 include/linux/virtio_rng.h     | 2 ++
 8 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 8eff0b53910b..b3c4a60ceeb3 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_9P_H
 #define _LINUX_VIRTIO_9P_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio console */
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 979524ee75b7..c30c7bfbf39b 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_BALLOON_H
 #define _LINUX_VIRTIO_BALLOON_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio_balloon */
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 5f79a5f9de79..6a66c7f30bcb 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_BLK_H
 #define _LINUX_VIRTIO_BLK_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio_block */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index f364bbf63c34..7eb4b34d13bb 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -1,5 +1,8 @@
 #ifndef _LINUX_VIRTIO_CONFIG_H
 #define _LINUX_VIRTIO_CONFIG_H
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers. */
+
 /* Virtio devices use a standardized configuration space to define their
  * features and pass configuration information, but each implementation can
  * store and access that space differently. */
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index ed2d4ead7eb7..19a0da0dba41 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_VIRTIO_CONSOLE_H
 #define _LINUX_VIRTIO_CONSOLE_H
 #include <linux/virtio_config.h>
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers. */
 
 /* The ID for virtio console */
 #define VIRTIO_ID_CONSOLE	3
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 38c0571820fb..5e33761b9b8a 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_NET_H
 #define _LINUX_VIRTIO_NET_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio_net */
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index b3151659cf49..cdef35742932 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -9,9 +9,8 @@
  * Authors:
  *  Anthony Liguori  <aliguori@us.ibm.com>
  *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
  */
 
 #ifndef _LINUX_VIRTIO_PCI_H
diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h
index 331afb6c9f62..1a85dab8a940 100644
--- a/include/linux/virtio_rng.h
+++ b/include/linux/virtio_rng.h
@@ -1,5 +1,7 @@
 #ifndef _LINUX_VIRTIO_RNG_H
 #define _LINUX_VIRTIO_RNG_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
 /* The ID for virtio_rng */
-- 
cgit v1.2.3


From 066f4d82a67f621ddd547bfa4b9c94631d8457b0 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 29 May 2008 11:08:26 +0200
Subject: virtio_blk: check for hardsector size from host

Currently virtio_blk assumes a 512 byte hard sector size. This can cause
trouble / performance issues if the backing has a different block size
(like a file on an ext3 file system formatted with 4k block size or a dasd).

Lets add a feature flag that tells the guest to use a different hard sector
size than 512 byte.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/block/virtio_blk.c | 10 +++++++++-
 include/linux/virtio_blk.h |  3 +++
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index dd7ea203f940..42251095134f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -196,6 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 	int err;
 	u64 cap;
 	u32 v;
+	u32 blk_size;
 
 	if (index_to_minor(index) >= 1 << MINORBITS)
 		return -ENOSPC;
@@ -290,6 +291,13 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (!err)
 		blk_queue_max_hw_segments(vblk->disk->queue, v);
 
+	/* Host can optionally specify the block size of the device */
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
+				offsetof(struct virtio_blk_config, blk_size),
+				&blk_size);
+	if (!err)
+		blk_queue_hardsect_size(vblk->disk->queue, blk_size);
+
 	add_disk(vblk->disk);
 	return 0;
 
@@ -330,7 +338,7 @@ static struct virtio_device_id id_table[] = {
 
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
-	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO,
+	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 6a66c7f30bcb..c1aef85243bf 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -13,6 +13,7 @@
 #define VIRTIO_BLK_F_SEG_MAX	2	/* Indicates maximum # of segments */
 #define VIRTIO_BLK_F_GEOMETRY	4	/* Legacy geometry available  */
 #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 
 struct virtio_blk_config
 {
@@ -28,6 +29,8 @@ struct virtio_blk_config
 		__u8 heads;
 		__u8 sectors;
 	} geometry;
+	/* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
+	__u32 blk_size;
 } __attribute__((packed));
 
 /* These two define direction. */
-- 
cgit v1.2.3


From dd7c7bc46211785a1aa7d70feb15830f62682b3c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:07 -0500
Subject: virtio: Formally reserve bits 28-31 to be 'transport' features.

We assign feature bits as required, but it makes sense to reserve some
for the particular transport, rather than the particular device.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio.c       | 5 +++++
 include/linux/virtio_config.h | 6 ++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index fc85cba64578..baf103361e3a 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -113,6 +113,11 @@ static int virtio_dev_probe(struct device *_d)
 			set_bit(f, dev->features);
 	}
 
+	/* Transport features are always preserved to pass to set_features. */
+	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
+		if (device_features & (1 << i))
+			set_bit(i, dev->features);
+
 	err = drv->probe(dev);
 	if (err)
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7eb4b34d13bb..5a30cfb7934b 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -18,6 +18,12 @@
 /* We've given up on this device. */
 #define VIRTIO_CONFIG_S_FAILED		0x80
 
+/* Some virtio feature bits (currently bits 28 through 31) are reserved for the
+ * transport being used (eg. virtio_ring), the rest are per-device feature
+ * bits. */
+#define VIRTIO_TRANSPORT_F_START	28
+#define VIRTIO_TRANSPORT_F_END		32
+
 /* Do we get callbacks when the ring is completely used, even if we've
  * suppressed them? */
 #define VIRTIO_F_NOTIFY_ON_EMPTY	24
-- 
cgit v1.2.3


From c624896e488ba2bff5ae497782cfb265c8b00646 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:07 -0500
Subject: virtio: Rename set_features to finalize_features

Rather than explicitly handing the features to the lower-level, we just
hand the virtio_device and have it set the features.  This make it clear
that it has the chance to manipulate the features of the device at this
point (and that all feature negotiation is already done).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c | 11 ++++++-----
 drivers/s390/kvm/kvm_virtio.c  | 11 ++++++-----
 drivers/virtio/virtio.c        |  5 ++---
 drivers/virtio/virtio_pci.c    | 10 ++++++----
 include/linux/virtio_config.h  |  7 ++++---
 5 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 1a8de57289eb..54fdc2aa4806 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -98,16 +98,17 @@ static u32 lg_get_features(struct virtio_device *vdev)
 	return features;
 }
 
-static void lg_set_features(struct virtio_device *vdev, u32 features)
+static void lg_finalize_features(struct virtio_device *vdev)
 {
-	unsigned int i;
+	unsigned int i, bits;
 	struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
 	/* Second half of bitmap is features we accept. */
 	u8 *out_features = lg_features(desc) + desc->feature_len;
 
 	memset(out_features, 0, desc->feature_len);
-	for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
-		if (features & (1 << i))
+	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
+	for (i = 0; i < bits; i++) {
+		if (test_bit(i, vdev->features))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
 }
@@ -297,7 +298,7 @@ static void lg_del_vq(struct virtqueue *vq)
 /* The ops structure which hooks everything together. */
 static struct virtio_config_ops lguest_config_ops = {
 	.get_features = lg_get_features,
-	.set_features = lg_set_features,
+	.finalize_features = lg_finalize_features,
 	.get = lg_get,
 	.set = lg_set,
 	.get_status = lg_get_status,
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index d41f234bb2c2..5953510e7d5f 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -88,16 +88,17 @@ static u32 kvm_get_features(struct virtio_device *vdev)
 	return features;
 }
 
-static void kvm_set_features(struct virtio_device *vdev, u32 features)
+static void kvm_finalize_features(struct virtio_device *vdev)
 {
-	unsigned int i;
+	unsigned int i, bits;
 	struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
 	/* Second half of bitmap is features we accept. */
 	u8 *out_features = kvm_vq_features(desc) + desc->feature_len;
 
 	memset(out_features, 0, desc->feature_len);
-	for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
-		if (features & (1 << i))
+	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
+	for (i = 0; i < bits; i++) {
+		if (test_bit(i, vdev->features))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
 }
@@ -223,7 +224,7 @@ static void kvm_del_vq(struct virtqueue *vq)
  */
 static struct virtio_config_ops kvm_vq_configspace_ops = {
 	.get_features = kvm_get_features,
-	.set_features = kvm_set_features,
+	.finalize_features = kvm_finalize_features,
 	.get = kvm_get,
 	.set = kvm_set,
 	.get_status = kvm_get_status,
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index baf103361e3a..5b78fd0aff0a 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -113,7 +113,7 @@ static int virtio_dev_probe(struct device *_d)
 			set_bit(f, dev->features);
 	}
 
-	/* Transport features are always preserved to pass to set_features. */
+	/* Transport features always preserved to pass to finalize_features. */
 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
 		if (device_features & (1 << i))
 			set_bit(i, dev->features);
@@ -122,8 +122,7 @@ static int virtio_dev_probe(struct device *_d)
 	if (err)
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
 	else {
-		/* They should never have set feature bits beyond 32 */
-		dev->config->set_features(dev, dev->features[0]);
+		dev->config->finalize_features(dev);
 		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
 	}
 	return err;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index eae7236310e4..9855975a72a3 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -94,12 +94,14 @@ static u32 vp_get_features(struct virtio_device *vdev)
 	return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
 }
 
-/* virtio config->set_features() implementation */
-static void vp_set_features(struct virtio_device *vdev, u32 features)
+/* virtio config->finalize_features() implementation */
+static void vp_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
-	iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+	/* We only support 32 feature bits. */
+	BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
+	iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
 }
 
 /* virtio config->get() implementation */
@@ -297,7 +299,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
 	.find_vq	= vp_find_vq,
 	.del_vq		= vp_del_vq,
 	.get_features	= vp_get_features,
-	.set_features	= vp_set_features,
+	.finalize_features = vp_finalize_features,
 };
 
 /* the PCI probing function */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 5a30cfb7934b..bf8ec283b232 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -61,9 +61,10 @@
  * @get_features: get the array of feature bits for this device.
  *	vdev: the virtio_device
  *	Returns the first 32 feature bits (all we currently need).
- * @set_features: confirm what device features we'll be using.
+ * @finalize_features: confirm what device features we'll be using.
  *	vdev: the virtio_device
- *	feature: the first 32 feature bits
+ *	This gives the final feature bits for the device: it can change
+ *	the dev->feature bits if it wants.
  */
 struct virtio_config_ops
 {
@@ -79,7 +80,7 @@ struct virtio_config_ops
 				     void (*callback)(struct virtqueue *));
 	void (*del_vq)(struct virtqueue *vq);
 	u32 (*get_features)(struct virtio_device *vdev);
-	void (*set_features)(struct virtio_device *vdev, u32 features);
+	void (*finalize_features)(struct virtio_device *vdev);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
-- 
cgit v1.2.3


From e34f87256794b87e7f4a8f1812538be7b7b5214c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:06:13 -0500
Subject: virtio: Add transport feature handling stub for virtio_ring.

To prepare for virtio_ring transport feature bits, hook in a call in
all the users to manipulate them.  This currently just clears all the
bits, since it doesn't understand any features.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c |  3 +++
 drivers/s390/kvm/kvm_virtio.c  |  3 +++
 drivers/virtio/virtio_pci.c    |  3 +++
 drivers/virtio/virtio_ring.c   | 16 ++++++++++++++++
 include/linux/virtio_ring.h    |  2 ++
 5 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 54fdc2aa4806..37344aaee22f 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -105,6 +105,9 @@ static void lg_finalize_features(struct virtio_device *vdev)
 	/* Second half of bitmap is features we accept. */
 	u8 *out_features = lg_features(desc) + desc->feature_len;
 
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 5953510e7d5f..79954bd6bfa5 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -95,6 +95,9 @@ static void kvm_finalize_features(struct virtio_device *vdev)
 	/* Second half of bitmap is features we accept. */
 	u8 *out_features = kvm_vq_features(desc) + desc->feature_len;
 
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 9855975a72a3..c7dc37c7cce9 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -99,6 +99,9 @@ static void vp_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
 	/* We only support 32 feature bits. */
 	BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
 	iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 21d9a62767af..6eb5303fed11 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -18,6 +18,7 @@
  */
 #include <linux/virtio.h>
 #include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
 #include <linux/device.h>
 
 #ifdef DEBUG
@@ -323,4 +324,19 @@ void vring_del_virtqueue(struct virtqueue *vq)
 }
 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
 
+/* Manipulates transport-specific feature bits. */
+void vring_transport_features(struct virtio_device *vdev)
+{
+	unsigned int i;
+
+	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
+		switch (i) {
+		default:
+			/* We don't understand this bit. */
+			clear_bit(i, vdev->features);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(vring_transport_features);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index abe481ed990e..c4a598fb3826 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -120,6 +120,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq));
 void vring_del_virtqueue(struct virtqueue *vq);
+/* Filter out transport-specific feature bits. */
+void vring_transport_features(struct virtio_device *vdev);
 
 irqreturn_t vring_interrupt(int irq, void *_vq);
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From ed9559d38a87a44e3bda87d73a50aab92471d7dc Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 25 Jul 2008 12:11:09 +1000
Subject: Label kthread_create() with printf attribute tag.

Obvious misc patch been in my queue (& linux-next) for over a cycle.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kthread.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 00dd957e245b..aabc8a13ba71 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -6,7 +6,8 @@
 
 struct task_struct *kthread_create(int (*threadfn)(void *data),
 				   void *data,
-				   const char namefmt[], ...);
+				   const char namefmt[], ...)
+	__attribute__((format(printf, 3, 4)));
 
 /**
  * kthread_run - create and wake a thread.
-- 
cgit v1.2.3


From 483fad1c3fa1060d7e6710e84a065ad514571739 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <ntl@pobox.com>
Date: Tue, 22 Jul 2008 04:48:46 +1000
Subject: ELF loader support for auxvec base platform string

Some IBM POWER-based platforms have the ability to run in a
mode which mostly appears to the OS as a different processor from the
actual hardware.  For example, a Power6 system may appear to be a
Power5+, which makes the AT_PLATFORM value "power5+".  This means that
programs are restricted to the ISA supported by Power5+;
Power6-specific instructions are treated as illegal.

However, some applications (virtual machines, optimized libraries) can
benefit from knowledge of the underlying CPU model.  A new aux vector
entry, AT_BASE_PLATFORM, will denote the actual hardware.  For
example, on a Power6 system in Power5+ compatibility mode, AT_PLATFORM
will be "power5+" and AT_BASE_PLATFORM will be "power6".  The idea is
that AT_PLATFORM indicates the instruction set supported, while
AT_BASE_PLATFORM indicates the underlying microarchitecture.

If the architecture has defined ELF_BASE_PLATFORM, copy that value to
the user stack in the same manner as ELF_PLATFORM.

Signed-off-by: Nathan Lynch <ntl@pobox.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 fs/binfmt_elf.c        | 28 ++++++++++++++++++++++++++++
 include/linux/auxvec.h |  6 +++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 639d2d8b5710..742c8f530481 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss)
 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 #endif
 
+#ifndef ELF_BASE_PLATFORM
+/*
+ * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
+ * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
+ * will be copied to the user stack in the same manner as AT_PLATFORM.
+ */
+#define ELF_BASE_PLATFORM NULL
+#endif
+
 static int
 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 		unsigned long load_addr, unsigned long interp_load_addr)
@@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	elf_addr_t __user *envp;
 	elf_addr_t __user *sp;
 	elf_addr_t __user *u_platform;
+	elf_addr_t __user *u_base_platform;
 	const char *k_platform = ELF_PLATFORM;
+	const char *k_base_platform = ELF_BASE_PLATFORM;
 	int items;
 	elf_addr_t *elf_info;
 	int ei_index = 0;
@@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 			return -EFAULT;
 	}
 
+	/*
+	 * If this architecture has a "base" platform capability
+	 * string, copy it to userspace.
+	 */
+	u_base_platform = NULL;
+	if (k_base_platform) {
+		size_t len = strlen(k_base_platform) + 1;
+
+		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
+		if (__copy_to_user(u_base_platform, k_base_platform, len))
+			return -EFAULT;
+	}
+
 	/* Create the ELF interpreter info */
 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
@@ -209,6 +233,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 		NEW_AUX_ENT(AT_PLATFORM,
 			    (elf_addr_t)(unsigned long)u_platform);
 	}
+	if (k_base_platform) {
+		NEW_AUX_ENT(AT_BASE_PLATFORM,
+			    (elf_addr_t)(unsigned long)u_base_platform);
+	}
 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 	}
diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h
index 0da17d14fd13..d7afa9dd6635 100644
--- a/include/linux/auxvec.h
+++ b/include/linux/auxvec.h
@@ -26,9 +26,13 @@
 
 #define AT_SECURE 23   /* secure mode boolean */
 
+#define AT_BASE_PLATFORM 24	/* string identifying real platform, may
+				 * differ from AT_PLATFORM. */
+
 #define AT_EXECFN  31	/* filename of program */
+
 #ifdef __KERNEL__
-#define AT_VECTOR_SIZE_BASE 17 /* NEW_AUX_ENT entries in auxiliary table */
+#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */
   /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
 #endif
 
-- 
cgit v1.2.3


From 95984f62c9b0bf6d89ef4f514b1afe73623481de Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Tue, 22 Jul 2008 18:41:10 +0200
Subject: firewire: fw-ohci: TSB43AB22/A dualbuffer workaround

Isochronous reception in dualbuffer mode is reportedly broken with
TI TSB43AB22A on x86-64.  Descriptor addresses above 2G have been
determined as the trigger:
https://bugzilla.redhat.com/show_bug.cgi?id=435550

Two fixes are possible:
  - pci_set_consistent_dma_mask(pdev, DMA_31BIT_MASK);
    at least when IR descriptors are allocated, or
  - simply don't use dualbuffer.
This fix implements the latter workaround.

But we keep using dualbuffer on x86-32 which won't give us highmen (and
thus physical addresses outside the 31bit range) in coherent DMA memory
allocations.  Right now we could for example also whitelist PPC32, but
DMA mapping implementation details are expected to change there.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Jarod Wilson <jwilson@redhat.com>
---
 drivers/firewire/fw-ohci.c | 37 ++++++++++++++++++++++++-------------
 include/linux/pci_ids.h    |  1 +
 2 files changed, 25 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c
index 333b12544dd1..a4eff32621b5 100644
--- a/drivers/firewire/fw-ohci.c
+++ b/drivers/firewire/fw-ohci.c
@@ -171,7 +171,6 @@ struct iso_context {
 struct fw_ohci {
 	struct fw_card card;
 
-	u32 version;
 	__iomem char *registers;
 	dma_addr_t self_id_bus;
 	__le32 *self_id_cpu;
@@ -180,6 +179,8 @@ struct fw_ohci {
 	int generation;
 	int request_generation;	/* for timestamping incoming requests */
 	u32 bus_seconds;
+
+	bool use_dualbuffer;
 	bool old_uninorth;
 	bool bus_reset_packet_quirk;
 
@@ -1885,7 +1886,7 @@ ohci_allocate_iso_context(struct fw_card *card, int type, size_t header_size)
 	} else {
 		mask = &ohci->ir_context_mask;
 		list = ohci->ir_context_list;
-		if (ohci->version >= OHCI_VERSION_1_1)
+		if (ohci->use_dualbuffer)
 			callback = handle_ir_dualbuffer_packet;
 		else
 			callback = handle_ir_packet_per_buffer;
@@ -1949,7 +1950,7 @@ static int ohci_start_iso(struct fw_iso_context *base,
 	} else {
 		index = ctx - ohci->ir_context_list;
 		control = IR_CONTEXT_ISOCH_HEADER;
-		if (ohci->version >= OHCI_VERSION_1_1)
+		if (ohci->use_dualbuffer)
 			control |= IR_CONTEXT_DUAL_BUFFER_MODE;
 		match = (tags << 28) | (sync << 8) | ctx->base.channel;
 		if (cycle >= 0) {
@@ -2279,7 +2280,7 @@ ohci_queue_iso(struct fw_iso_context *base,
 	spin_lock_irqsave(&ctx->context.ohci->lock, flags);
 	if (base->type == FW_ISO_CONTEXT_TRANSMIT)
 		retval = ohci_queue_iso_transmit(base, packet, buffer, payload);
-	else if (ctx->context.ohci->version >= OHCI_VERSION_1_1)
+	else if (ctx->context.ohci->use_dualbuffer)
 		retval = ohci_queue_iso_receive_dualbuffer(base, packet,
 							 buffer, payload);
 	else
@@ -2341,7 +2342,7 @@ static int __devinit
 pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
 {
 	struct fw_ohci *ohci;
-	u32 bus_options, max_receive, link_speed;
+	u32 bus_options, max_receive, link_speed, version;
 	u64 guid;
 	int err;
 	size_t size;
@@ -2366,12 +2367,6 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
 	pci_write_config_dword(dev, OHCI1394_PCI_HCI_Control, 0);
 	pci_set_drvdata(dev, ohci);
 
-#if defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)
-	ohci->old_uninorth = dev->vendor == PCI_VENDOR_ID_APPLE &&
-			     dev->device == PCI_DEVICE_ID_APPLE_UNI_N_FW;
-#endif
-	ohci->bus_reset_packet_quirk = dev->vendor == PCI_VENDOR_ID_TI;
-
 	spin_lock_init(&ohci->lock);
 
 	tasklet_init(&ohci->bus_reset_tasklet,
@@ -2390,6 +2385,23 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
 		goto fail_iomem;
 	}
 
+	version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff;
+	ohci->use_dualbuffer = version >= OHCI_VERSION_1_1;
+
+/* x86-32 currently doesn't use highmem for dma_alloc_coherent */
+#if !defined(CONFIG_X86_32)
+	/* dual-buffer mode is broken with descriptor addresses above 2G */
+	if (dev->vendor == PCI_VENDOR_ID_TI &&
+	    dev->device == PCI_DEVICE_ID_TI_TSB43AB22)
+		ohci->use_dualbuffer = false;
+#endif
+
+#if defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)
+	ohci->old_uninorth = dev->vendor == PCI_VENDOR_ID_APPLE &&
+			     dev->device == PCI_DEVICE_ID_APPLE_UNI_N_FW;
+#endif
+	ohci->bus_reset_packet_quirk = dev->vendor == PCI_VENDOR_ID_TI;
+
 	ar_context_init(&ohci->ar_request_ctx, ohci,
 			OHCI1394_AsReqRcvContextControlSet);
 
@@ -2441,9 +2453,8 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
 	if (err < 0)
 		goto fail_self_id;
 
-	ohci->version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff;
 	fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n",
-		  dev->dev.bus_id, ohci->version >> 16, ohci->version & 0xff);
+		  dev->dev.bus_id, version >> 16, version & 0xff);
 	return 0;
 
  fail_self_id:
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 65953822c9cb..720d67554106 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -748,6 +748,7 @@
 #define PCI_VENDOR_ID_TI		0x104c
 #define PCI_DEVICE_ID_TI_TVP4020	0x3d07
 #define PCI_DEVICE_ID_TI_4450		0x8011
+#define PCI_DEVICE_ID_TI_TSB43AB22	0x8023
 #define PCI_DEVICE_ID_TI_XX21_XX11	0x8031
 #define PCI_DEVICE_ID_TI_XX21_XX11_FM	0x8033
 #define PCI_DEVICE_ID_TI_XX21_XX11_SD	0x8034
-- 
cgit v1.2.3


From 3d45955962496879dead8d4dd70bb9a23b07154b Mon Sep 17 00:00:00 2001
From: Alexey Korolev <akorolev@infradead.org>
Date: Thu, 15 May 2008 17:23:18 +0100
Subject: [MTD] [NAND] subpage read feature as a way to increase performance.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch enables NAND subpage read functionality.
If upper layer drivers are requesting to read non page aligned data NAND
subpage-read functionality reads the only whose ECC regions which include
requested data when original code reads whole page.
This significantly improves performance in many cases.

Here are some digits :

UBI volume mount time
No subpage reads: 5.75 seconds
Subpage read patch: 2.42 seconds

Open/stat time for files on JFFS2 volume:
No subpage read  0m 5.36s
Subpage read     0m 2.88s

Signed-off-by Alexey Korolev <akorolev@infradead.org>
Acked-by: Artem Bityutskiy <dedekind@infradead.org>
Acked-by: Jörn Engel <joern@logfs.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 87 +++++++++++++++++++++++++++++++++++++++++++-
 include/linux/mtd/nand.h     |  5 +++
 2 files changed, 91 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index ba1bdf787323..d1129bae6c27 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -797,6 +797,87 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 	return 0;
 }
 
+/**
+ * nand_read_subpage - [REPLACABLE] software ecc based sub-page read function
+ * @mtd:	mtd info structure
+ * @chip:	nand chip info structure
+ * @dataofs	offset of requested data within the page
+ * @readlen	data length
+ * @buf:	buffer to store read data
+ */
+static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint32_t data_offs, uint32_t readlen, uint8_t *bufpoi)
+{
+	int start_step, end_step, num_steps;
+	uint32_t *eccpos = chip->ecc.layout->eccpos;
+	uint8_t *p;
+	int data_col_addr, i, gaps = 0;
+	int datafrag_len, eccfrag_len, aligned_len, aligned_pos;
+	int busw = (chip->options & NAND_BUSWIDTH_16) ? 2 : 1;
+
+	/* Column address wihin the page aligned to ECC size (256bytes). */
+	start_step = data_offs / chip->ecc.size;
+	end_step = (data_offs + readlen - 1) / chip->ecc.size;
+	num_steps = end_step - start_step + 1;
+
+	/* Data size aligned to ECC ecc.size*/
+	datafrag_len = num_steps * chip->ecc.size;
+	eccfrag_len = num_steps * chip->ecc.bytes;
+
+	data_col_addr = start_step * chip->ecc.size;
+	/* If we read not a page aligned data */
+	if (data_col_addr != 0)
+		chip->cmdfunc(mtd, NAND_CMD_RNDOUT, data_col_addr, -1);
+
+	p = bufpoi + data_col_addr;
+	chip->read_buf(mtd, p, datafrag_len);
+
+	/* Calculate  ECC */
+	for (i = 0; i < eccfrag_len ; i += chip->ecc.bytes, p += chip->ecc.size)
+		chip->ecc.calculate(mtd, p, &chip->buffers->ecccalc[i]);
+
+	/* The performance is faster if to position offsets
+	   according to ecc.pos. Let make sure here that
+	   there are no gaps in ecc positions */
+	for (i = 0; i < eccfrag_len - 1; i++) {
+		if (eccpos[i + start_step * chip->ecc.bytes] + 1 !=
+			eccpos[i + start_step * chip->ecc.bytes + 1]) {
+			gaps = 1;
+			break;
+		}
+	}
+	if (gaps) {
+		chip->cmdfunc(mtd, NAND_CMD_RNDOUT, mtd->writesize, -1);
+		chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+	} else {
+		/* send the command to read the particular ecc bytes */
+		/* take care about buswidth alignment in read_buf */
+		aligned_pos = eccpos[start_step * chip->ecc.bytes] & ~(busw - 1);
+		aligned_len = eccfrag_len;
+		if (eccpos[start_step * chip->ecc.bytes] & (busw - 1))
+			aligned_len++;
+		if (eccpos[(start_step + num_steps) * chip->ecc.bytes] & (busw - 1))
+			aligned_len++;
+
+		chip->cmdfunc(mtd, NAND_CMD_RNDOUT, mtd->writesize + aligned_pos, -1);
+		chip->read_buf(mtd, &chip->oob_poi[aligned_pos], aligned_len);
+	}
+
+	for (i = 0; i < eccfrag_len; i++)
+		chip->buffers->ecccode[i] = chip->oob_poi[eccpos[i + start_step * chip->ecc.bytes]];
+
+	p = bufpoi + data_col_addr;
+	for (i = 0; i < eccfrag_len ; i += chip->ecc.bytes, p += chip->ecc.size) {
+		int stat;
+
+		stat = chip->ecc.correct(mtd, p, &chip->buffers->ecccode[i], &chip->buffers->ecccalc[i]);
+		if (stat == -1)
+			mtd->ecc_stats.failed++;
+		else
+			mtd->ecc_stats.corrected += stat;
+	}
+	return 0;
+}
+
 /**
  * nand_read_page_hwecc - [REPLACABLE] hardware ecc based page read function
  * @mtd:	mtd info structure
@@ -994,6 +1075,8 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 			/* Now read the page into the buffer */
 			if (unlikely(ops->mode == MTD_OOB_RAW))
 				ret = chip->ecc.read_page_raw(mtd, chip, bufpoi);
+			else if (!aligned && NAND_SUBPAGE_READ(chip) && !oob)
+				ret = chip->ecc.read_subpage(mtd, chip, col, bytes, bufpoi);
 			else
 				ret = chip->ecc.read_page(mtd, chip, bufpoi);
 			if (ret < 0)
@@ -1001,7 +1084,8 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 
 			/* Transfer not aligned data */
 			if (!aligned) {
-				chip->pagebuf = realpage;
+				if (!NAND_SUBPAGE_READ(chip) && !oob)
+					chip->pagebuf = realpage;
 				memcpy(buf, chip->buffers->databuf + col, bytes);
 			}
 
@@ -2521,6 +2605,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 		chip->ecc.calculate = nand_calculate_ecc;
 		chip->ecc.correct = nand_correct_data;
 		chip->ecc.read_page = nand_read_page_swecc;
+		chip->ecc.read_subpage = nand_read_subpage;
 		chip->ecc.write_page = nand_write_page_swecc;
 		chip->ecc.read_oob = nand_read_oob_std;
 		chip->ecc.write_oob = nand_write_oob_std;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 863e22a0ddb5..83f678702dff 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -177,6 +177,7 @@ typedef enum {
 #define NAND_MUST_PAD(chip) (!(chip->options & NAND_NO_PADDING))
 #define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG))
 #define NAND_HAS_COPYBACK(chip) ((chip->options & NAND_COPYBACK))
+#define NAND_SUBPAGE_READ(chip) ((chip->ecc.mode == NAND_ECC_SOFT))
 
 /* Mask to zero out the chip options, which come from the id table */
 #define NAND_CHIPOPTIONS_MSK	(0x0000ffff & ~NAND_NO_AUTOINCR)
@@ -274,6 +275,10 @@ struct nand_ecc_ctrl {
 	int			(*read_page)(struct mtd_info *mtd,
 					     struct nand_chip *chip,
 					     uint8_t *buf);
+	int			(*read_subpage)(struct mtd_info *mtd,
+					     struct nand_chip *chip,
+					     uint32_t offs, uint32_t len,
+					     uint8_t *buf);
 	void			(*write_page)(struct mtd_info *mtd,
 					      struct nand_chip *chip,
 					      const uint8_t *buf);
-- 
cgit v1.2.3


From 3d6f4a20cc287a8980c6186624834cf10a70752b Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Thu, 24 Jul 2008 23:38:31 -0700
Subject: endian: Always evaluate arguments.

Changeset 7fa897b91a3ea0f16c2873b869d7a0eef05acff4 ("ide: trivial sparse
annotations") created an IDE bootup regression on big-endian systems.

In drivers/ide/ide-iops.c, function ide_fixstring() we now have the
loop:

		for (p = end ; p != s;)
			be16_to_cpus((u16 *)(p -= 2));

which will never terminate on big-endian because in such
a configuration be16_to_cpus() evaluates to "do { } while (0)"

Therefore, always evaluate the arguments to nop endian transformation
operations.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/byteorder/big_endian.h    | 12 ++++++------
 include/linux/byteorder/little_endian.h | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h
index 961ed4b48d8e..44f95b92393b 100644
--- a/include/linux/byteorder/big_endian.h
+++ b/include/linux/byteorder/big_endian.h
@@ -94,12 +94,12 @@ static inline __u16 __be16_to_cpup(const __be16 *p)
 #define __le32_to_cpus(x) __swab32s((x))
 #define __cpu_to_le16s(x) __swab16s((x))
 #define __le16_to_cpus(x) __swab16s((x))
-#define __cpu_to_be64s(x) do {} while (0)
-#define __be64_to_cpus(x) do {} while (0)
-#define __cpu_to_be32s(x) do {} while (0)
-#define __be32_to_cpus(x) do {} while (0)
-#define __cpu_to_be16s(x) do {} while (0)
-#define __be16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) do { (void)(x); } while (0)
+#define __be64_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_be32s(x) do { (void)(x); } while (0)
+#define __be32_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_be16s(x) do { (void)(x); } while (0)
+#define __be16_to_cpus(x) do { (void)(x); } while (0)
 
 #ifdef __KERNEL__
 #include <linux/byteorder/generic.h>
diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h
index 05dc7c35b3b2..4cc170a31762 100644
--- a/include/linux/byteorder/little_endian.h
+++ b/include/linux/byteorder/little_endian.h
@@ -88,12 +88,12 @@ static inline __u16 __be16_to_cpup(const __be16 *p)
 {
 	return __swab16p((__u16 *)p);
 }
-#define __cpu_to_le64s(x) do {} while (0)
-#define __le64_to_cpus(x) do {} while (0)
-#define __cpu_to_le32s(x) do {} while (0)
-#define __le32_to_cpus(x) do {} while (0)
-#define __cpu_to_le16s(x) do {} while (0)
-#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_le64s(x) do { (void)(x); } while (0)
+#define __le64_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_le32s(x) do { (void)(x); } while (0)
+#define __le32_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_le16s(x) do { (void)(x); } while (0)
+#define __le16_to_cpus(x) do { (void)(x); } while (0)
 #define __cpu_to_be64s(x) __swab64s((x))
 #define __be64_to_cpus(x) __swab64s((x))
 #define __cpu_to_be32s(x) __swab32s((x))
-- 
cgit v1.2.3


From 25c94d010a8ae8605dc4d5453e0c82fa97da5d12 Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Date: Fri, 25 Jul 2008 10:30:06 -0700
Subject: mlx4_core: Add VLAN tag field to WQE control segment struct

Add fields for VLAN tag and insert VLAN tag flag to the control
section struct.  These fields will be used for sending ethernet
packets.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 include/linux/mlx4/qp.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index e27082cd650e..bf8f11982dae 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -164,11 +164,13 @@ enum {
 	MLX4_WQE_CTRL_SOLICITED		= 1 << 1,
 	MLX4_WQE_CTRL_IP_CSUM		= 1 << 4,
 	MLX4_WQE_CTRL_TCP_UDP_CSUM	= 1 << 5,
+	MLX4_WQE_CTRL_INS_VLAN		= 1 << 6,
 };
 
 struct mlx4_wqe_ctrl_seg {
 	__be32			owner_opcode;
-	u8			reserved2[3];
+	__be16			vlan_tag;
+	u8			ins_vlan;
 	u8			fence_size;
 	/*
 	 * High 24 bits are SRC remote buffer; low 8 bits are flags:
-- 
cgit v1.2.3


From e0deaff470900a4c3222ca7139f6c9639e26a2f5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 25 Jul 2008 01:45:24 -0700
Subject: split the typecheck macros out of include/linux/kernel.h

Needed to fix up a recursive include snafu in
locking-add-typecheck-on-irqsave-and-friends-for-correct-flags.patch

Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h    | 21 +--------------------
 include/linux/typecheck.h | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 20 deletions(-)
 create mode 100644 include/linux/typecheck.h

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f9cd7a513f9c..5c4b1251e110 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -14,6 +14,7 @@
 #include <linux/compiler.h>
 #include <linux/bitops.h>
 #include <linux/log2.h>
+#include <linux/typecheck.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
 
@@ -441,26 +442,6 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
 	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
 	(type *)( (char *)__mptr - offsetof(type,member) );})
 
-/*
- * Check at compile time that something is of a particular type.
- * Always evaluates to 1 so you may use it easily in comparisons.
- */
-#define typecheck(type,x) \
-({	type __dummy; \
-	typeof(x) __dummy2; \
-	(void)(&__dummy == &__dummy2); \
-	1; \
-})
-
-/*
- * Check at compile time that 'function' is a certain type, or is a pointer
- * to that type (needs to use typedef for the function type.)
- */
-#define typecheck_fn(type,function) \
-({	typeof(type) __tmp = function; \
-	(void)__tmp; \
-})
-
 struct sysinfo;
 extern int do_sysinfo(struct sysinfo *info);
 
diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h
new file mode 100644
index 000000000000..eb5b74a575be
--- /dev/null
+++ b/include/linux/typecheck.h
@@ -0,0 +1,24 @@
+#ifndef TYPECHECK_H_INCLUDED
+#define TYPECHECK_H_INCLUDED
+
+/*
+ * Check at compile time that something is of a particular type.
+ * Always evaluates to 1 so you may use it easily in comparisons.
+ */
+#define typecheck(type,x) \
+({	type __dummy; \
+	typeof(x) __dummy2; \
+	(void)(&__dummy == &__dummy2); \
+	1; \
+})
+
+/*
+ * Check at compile time that 'function' is a certain type, or is a pointer
+ * to that type (needs to use typedef for the function type.)
+ */
+#define typecheck_fn(type,function) \
+({	typeof(type) __tmp = function; \
+	(void)__tmp; \
+})
+
+#endif		/* TYPECHECK_H_INCLUDED */
-- 
cgit v1.2.3


From 3f307891ce0e7b0438c432af1aacd656a092ff45 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 25 Jul 2008 01:45:25 -0700
Subject: locking: add typecheck on irqsave and friends for correct flags

There haave been several areas in the kernel where an int has been used for
flags in local_irq_save() and friends instead of a long.  This can cause some
hard to debug problems on some architectures.

This patch adds a typecheck inside the irqsave and restore functions to flag
these cases.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/irqflags.h | 54 ++++++++++++++++++++++++++----------
 include/linux/spinlock.h | 72 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 95 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 2b1c2e58566e..74bde13224c9 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -11,6 +11,8 @@
 #ifndef _LINUX_TRACE_IRQFLAGS_H
 #define _LINUX_TRACE_IRQFLAGS_H
 
+#include <linux/typecheck.h>
+
 #ifdef CONFIG_TRACE_IRQFLAGS
   extern void trace_softirqs_on(unsigned long ip);
   extern void trace_softirqs_off(unsigned long ip);
@@ -58,18 +60,24 @@
 	do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
 #define local_irq_disable() \
 	do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
-#define local_irq_save(flags) \
-	do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0)
+#define local_irq_save(flags)				\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		raw_local_irq_save(flags);		\
+		trace_hardirqs_off();			\
+	} while (0)
 
-#define local_irq_restore(flags)				\
-	do {							\
-		if (raw_irqs_disabled_flags(flags)) {		\
-			raw_local_irq_restore(flags);		\
-			trace_hardirqs_off();			\
-		} else {					\
-			trace_hardirqs_on();			\
-			raw_local_irq_restore(flags);		\
-		}						\
+
+#define local_irq_restore(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		if (raw_irqs_disabled_flags(flags)) {	\
+			raw_local_irq_restore(flags);	\
+			trace_hardirqs_off();		\
+		} else {				\
+			trace_hardirqs_on();		\
+			raw_local_irq_restore(flags);	\
+		}					\
 	} while (0)
 #else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
 /*
@@ -78,8 +86,16 @@
  */
 # define raw_local_irq_disable()	local_irq_disable()
 # define raw_local_irq_enable()		local_irq_enable()
-# define raw_local_irq_save(flags)	local_irq_save(flags)
-# define raw_local_irq_restore(flags)	local_irq_restore(flags)
+# define raw_local_irq_save(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		local_irq_save(flags);			\
+	} while (0)
+# define raw_local_irq_restore(flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		local_irq_restore(flags);		\
+	} while (0)
 #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
@@ -89,7 +105,11 @@
 		raw_safe_halt();				\
 	} while (0)
 
-#define local_save_flags(flags)		raw_local_save_flags(flags)
+#define local_save_flags(flags)				\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		raw_local_save_flags(flags);		\
+	} while (0)
 
 #define irqs_disabled()						\
 ({								\
@@ -99,7 +119,11 @@
 	raw_irqs_disabled_flags(_flags);			\
 })
 
-#define irqs_disabled_flags(flags)	raw_irqs_disabled_flags(flags)
+#define irqs_disabled_flags(flags)		\
+({						\
+	typecheck(unsigned long, flags);	\
+	raw_irqs_disabled_flags(flags);		\
+})
 #endif		/* CONFIG_X86 */
 
 #endif
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index d311a090fae7..61e5610ad165 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -46,6 +46,7 @@
  *  linux/spinlock.h:     builds the final spin_*() APIs.
  */
 
+#include <linux/typecheck.h>
 #include <linux/preempt.h>
 #include <linux/linkage.h>
 #include <linux/compiler.h>
@@ -191,23 +192,53 @@ do {								\
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
-#define spin_lock_irqsave(lock, flags)	flags = _spin_lock_irqsave(lock)
-#define read_lock_irqsave(lock, flags)	flags = _read_lock_irqsave(lock)
-#define write_lock_irqsave(lock, flags)	flags = _write_lock_irqsave(lock)
+#define spin_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = _spin_lock_irqsave(lock);	\
+	} while (0)
+#define read_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = _read_lock_irqsave(lock);	\
+	} while (0)
+#define write_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		flags = _write_lock_irqsave(lock);	\
+	} while (0)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define spin_lock_irqsave_nested(lock, flags, subclass) \
-	flags = _spin_lock_irqsave_nested(lock, subclass)
+#define spin_lock_irqsave_nested(lock, flags, subclass)			\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		flags = _spin_lock_irqsave_nested(lock, subclass);	\
+	} while (0)
 #else
-#define spin_lock_irqsave_nested(lock, flags, subclass) \
-	flags = _spin_lock_irqsave(lock)
+#define spin_lock_irqsave_nested(lock, flags, subclass)			\
+	do {								\
+		typecheck(unsigned long, flags);			\
+		flags = _spin_lock_irqsave(lock);			\
+	} while (0)
 #endif
 
 #else
 
-#define spin_lock_irqsave(lock, flags)	_spin_lock_irqsave(lock, flags)
-#define read_lock_irqsave(lock, flags)	_read_lock_irqsave(lock, flags)
-#define write_lock_irqsave(lock, flags)	_write_lock_irqsave(lock, flags)
+#define spin_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_spin_lock_irqsave(lock, flags);	\
+	} while (0)
+#define read_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_read_lock_irqsave(lock, flags);	\
+	} while (0)
+#define write_lock_irqsave(lock, flags)			\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_write_lock_irqsave(lock, flags);	\
+	} while (0)
 #define spin_lock_irqsave_nested(lock, flags, subclass)	\
 	spin_lock_irqsave(lock, flags)
 
@@ -260,16 +291,25 @@ do {						\
 } while (0)
 #endif
 
-#define spin_unlock_irqrestore(lock, flags) \
-					_spin_unlock_irqrestore(lock, flags)
+#define spin_unlock_irqrestore(lock, flags)		\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_spin_unlock_irqrestore(lock, flags);	\
+	} while (0)
 #define spin_unlock_bh(lock)		_spin_unlock_bh(lock)
 
-#define read_unlock_irqrestore(lock, flags) \
-					_read_unlock_irqrestore(lock, flags)
+#define read_unlock_irqrestore(lock, flags)		\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_read_unlock_irqrestore(lock, flags);	\
+	} while (0)
 #define read_unlock_bh(lock)		_read_unlock_bh(lock)
 
-#define write_unlock_irqrestore(lock, flags) \
-					_write_unlock_irqrestore(lock, flags)
+#define write_unlock_irqrestore(lock, flags)		\
+	do {						\
+		typecheck(unsigned long, flags);	\
+		_write_unlock_irqrestore(lock, flags);	\
+	} while (0)
 #define write_unlock_bh(lock)		_write_unlock_bh(lock)
 
 #define spin_trylock_bh(lock)	__cond_lock(lock, _spin_trylock_bh(lock))
-- 
cgit v1.2.3


From 8b5ac31e27135a6f2c210c40d03bf8f1b3a86b77 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 25 Jul 2008 01:45:26 -0700
Subject: include: use get/put_unaligned_* helpers

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/reiserfs_fs.h      |  4 ++--
 include/linux/smb_fs.h           | 19 +++++++------------
 include/net/ieee80211_radiotap.h |  2 +-
 3 files changed, 10 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 4aacaeecb56f..e9963af16cda 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -526,8 +526,8 @@ struct item_head {
 ** p is the array of __u32, i is the index into the array, v is the value
 ** to store there.
 */
-#define get_block_num(p, i) le32_to_cpu(get_unaligned((p) + (i)))
-#define put_block_num(p, i, v) put_unaligned(cpu_to_le32(v), (p) + (i))
+#define get_block_num(p, i) get_unaligned_le32((p) + (i))
+#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
 
 //
 // in old version uniqueness field shows key type
diff --git a/include/linux/smb_fs.h b/include/linux/smb_fs.h
index 2c5cd55f44ff..923cd8a247b1 100644
--- a/include/linux/smb_fs.h
+++ b/include/linux/smb_fs.h
@@ -43,18 +43,13 @@ static inline struct smb_inode_info *SMB_I(struct inode *inode)
 }
 
 /* macro names are short for word, double-word, long value (?) */
-#define WVAL(buf,pos) \
-	(le16_to_cpu(get_unaligned((__le16 *)((u8 *)(buf) + (pos)))))
-#define DVAL(buf,pos) \
-	(le32_to_cpu(get_unaligned((__le32 *)((u8 *)(buf) + (pos)))))
-#define LVAL(buf,pos) \
-	(le64_to_cpu(get_unaligned((__le64 *)((u8 *)(buf) + (pos)))))
-#define WSET(buf,pos,val) \
-	put_unaligned(cpu_to_le16((u16)(val)), (__le16 *)((u8 *)(buf) + (pos)))
-#define DSET(buf,pos,val) \
-	put_unaligned(cpu_to_le32((u32)(val)), (__le32 *)((u8 *)(buf) + (pos)))
-#define LSET(buf,pos,val) \
-	put_unaligned(cpu_to_le64((u64)(val)), (__le64 *)((u8 *)(buf) + (pos)))
+#define WVAL(buf, pos) (get_unaligned_le16((u8 *)(buf) + (pos)))
+#define DVAL(buf, pos) (get_unaligned_le32((u8 *)(buf) + (pos)))
+#define LVAL(buf, pos) (get_unaligned_le64((u8 *)(buf) + (pos)))
+
+#define WSET(buf, pos, val) put_unaligned_le16((val), (u8 *)(buf) + (pos))
+#define DSET(buf, pos, val) put_unaligned_le32((val), (u8 *)(buf) + (pos))
+#define LSET(buf, pos, val) put_unaligned_le64((val), (u8 *)(buf) + (pos))
 
 /* where to find the base of the SMB packet proper */
 #define smb_base(buf) ((u8 *)(((u8 *)(buf))+4))
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index dfd8bf66ce27..d364fd594ea4 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -262,7 +262,7 @@ static inline int ieee80211_get_radiotap_len(unsigned char *data)
 	struct ieee80211_radiotap_header *hdr =
 		(struct ieee80211_radiotap_header *)data;
 
-	return le16_to_cpu(get_unaligned(&hdr->it_len));
+	return get_unaligned_le16(&hdr->it_len);
 }
 
 #endif				/* IEEE80211_RADIOTAP_H */
-- 
cgit v1.2.3


From b39c08cb692cb8898c30e0d8187c7cbe27cc905c Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:29 -0700
Subject: Remove apparently unused fd1772.h header file.

This header file has been unused for quite some time, and the
corresponding source files appear to have been removed back in commit
99eb8a550dbccc0e1f6c7e866fe421810e0585f6 ("Remove the arm26 port")

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Adrian Bunk <bunk@stusta.de>
Cc: Ian Molton <spyro@f2s.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fd1772.h | 80 --------------------------------------------------
 1 file changed, 80 deletions(-)
 delete mode 100644 include/linux/fd1772.h

(limited to 'include/linux')

diff --git a/include/linux/fd1772.h b/include/linux/fd1772.h
deleted file mode 100644
index 871d6e4c677e..000000000000
--- a/include/linux/fd1772.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef _LINUX_FD1772REG_H
-#define _LINUX_FD1772REG_H
-
-/*
-** WD1772 stuff - originally from the M68K Linux
- * Modified for Archimedes by Dave Gilbert (gilbertd@cs.man.ac.uk)
- */
-
-/* register codes */
-
-#define FDC1772SELREG_STP   (0x80)   /* command/status register */
-#define FDC1772SELREG_TRA   (0x82)   /* track register */
-#define FDC1772SELREG_SEC   (0x84)   /* sector register */
-#define FDC1772SELREG_DTA   (0x86)   /* data register */
-
-/* register names for FDC1772_READ/WRITE macros */
-
-#define FDC1772REG_CMD         0
-#define FDC1772REG_STATUS      0
-#define FDC1772REG_TRACK       2
-#define FDC1772REG_SECTOR      4
-#define FDC1772REG_DATA                6
-
-/* command opcodes */
-
-#define FDC1772CMD_RESTORE  (0x00)   /*  -                   */
-#define FDC1772CMD_SEEK     (0x10)   /*   |                  */
-#define FDC1772CMD_STEP     (0x20)   /*   |  TYP 1 Commands  */
-#define FDC1772CMD_STIN     (0x40)   /*   |                  */
-#define FDC1772CMD_STOT     (0x60)   /*  -                   */
-#define FDC1772CMD_RDSEC    (0x80)   /*  -   TYP 2 Commands  */
-#define FDC1772CMD_WRSEC    (0xa0)   /*  -          "        */
-#define FDC1772CMD_RDADR    (0xc0)   /*  -                   */
-#define FDC1772CMD_RDTRA    (0xe0)   /*   |  TYP 3 Commands  */
-#define FDC1772CMD_WRTRA    (0xf0)   /*  -                   */
-#define FDC1772CMD_FORCI    (0xd0)   /*  -   TYP 4 Command   */
-
-/* command modifier bits */
-
-#define FDC1772CMDADD_SR6   (0x00)   /* step rate settings */
-#define FDC1772CMDADD_SR12  (0x01)
-#define FDC1772CMDADD_SR2   (0x02)
-#define FDC1772CMDADD_SR3   (0x03)
-#define FDC1772CMDADD_V     (0x04)   /* verify */
-#define FDC1772CMDADD_H     (0x08)   /* wait for spin-up */
-#define FDC1772CMDADD_U     (0x10)   /* update track register */
-#define FDC1772CMDADD_M     (0x10)   /* multiple sector access */
-#define FDC1772CMDADD_E     (0x04)   /* head settling flag */
-#define FDC1772CMDADD_P     (0x02)   /* precompensation */
-#define FDC1772CMDADD_A0    (0x01)   /* DAM flag */
-
-/* status register bits */
-
-#define        FDC1772STAT_MOTORON     (0x80)   /* motor on */
-#define        FDC1772STAT_WPROT       (0x40)   /* write protected (FDC1772CMD_WR*) */
-#define        FDC1772STAT_SPINUP      (0x20)   /* motor speed stable (Type I) */
-#define        FDC1772STAT_DELDAM      (0x20)   /* sector has deleted DAM (Type II+III) */
-#define        FDC1772STAT_RECNF       (0x10)   /* record not found */
-#define        FDC1772STAT_CRC         (0x08)   /* CRC error */
-#define        FDC1772STAT_TR00        (0x04)   /* Track 00 flag (Type I) */
-#define        FDC1772STAT_LOST        (0x04)   /* Lost Data (Type II+III) */
-#define        FDC1772STAT_IDX         (0x02)   /* Index status (Type I) */
-#define        FDC1772STAT_DRQ         (0x02)   /* DRQ status (Type II+III) */
-#define        FDC1772STAT_BUSY        (0x01)   /* FDC1772 is busy */
-
-
-/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1  1 -> Side 2 */
-#define DSKSIDE     (0x01)
-        
-#define DSKDRVNONE  (0x06)
-#define DSKDRV0     (0x02)
-#define DSKDRV1     (0x04)
-
-/* step rates */
-#define        FDC1772STEP_6   0x00
-#define        FDC1772STEP_12  0x01
-#define        FDC1772STEP_2   0x02
-#define        FDC1772STEP_3   0x03
-
-#endif
-- 
cgit v1.2.3


From e0ce0da9fefcc723dc006c35a7f91a32750abd40 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:32 -0700
Subject: lists: remove a redundant conditional definition of list_add()

Remove the conditional surrounding the definition of list_add() from list.h
since, if you define CONFIG_DEBUG_LIST, the definition you will subsequently
pick up from lib/list_debug.c will be absolutely identical, at which point you
can remove that redundant definition from list_debug.c as well.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h |  4 ----
 lib/list_debug.c     | 14 --------------
 2 files changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 139ec41d9c2e..453916bc0412 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -61,14 +61,10 @@ extern void __list_add(struct list_head *new,
  * Insert a new entry after the specified head.
  * This is good for implementing stacks.
  */
-#ifndef CONFIG_DEBUG_LIST
 static inline void list_add(struct list_head *new, struct list_head *head)
 {
 	__list_add(new, head, head->next);
 }
-#else
-extern void list_add(struct list_head *new, struct list_head *head);
-#endif
 
 
 /**
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 4350ba9655bd..45c03fd608dd 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -39,20 +39,6 @@ void __list_add(struct list_head *new,
 }
 EXPORT_SYMBOL(__list_add);
 
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-void list_add(struct list_head *new, struct list_head *head)
-{
-	__list_add(new, head, head->next);
-}
-EXPORT_SYMBOL(list_add);
-
 /**
  * list_del - deletes entry from list.
  * @entry: the element to delete from the list.
-- 
cgit v1.2.3


From b03f6489f9f27dc519a4c60ebf39cc7b8a58eae7 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:45:35 -0700
Subject: build kernel/profile.o only when requested

Build kernel/profile.o only if CONFIG_PROFILING is enabled.

This makes CONFIG_PROFILING=n kernels smaller.

As a bonus, some profile_tick() calls and one branch from schedule() are
now eliminated with CONFIG_PROFILING=n (but I doubt these are
measurable effects).

This patch changes the effects of CONFIG_PROFILING=n, but I don't think
having more than two choices would be the better choice.

This patch also adds the name of the first parameter to the prototypes
of profile_{hits,tick}() since I anyway had to add them for the dummy
functions.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/profile.h | 56 ++++++++++++++++++++++++++++++++++---------------
 kernel/Makefile         |  3 ++-
 kernel/profile.c        |  4 ----
 3 files changed, 41 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/profile.h b/include/linux/profile.h
index 05c1cc736937..4081fa31081f 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -8,8 +8,6 @@
 
 #include <asm/errno.h>
 
-extern int prof_on __read_mostly;
-
 #define CPU_PROFILING	1
 #define SCHED_PROFILING	2
 #define SLEEP_PROFILING	3
@@ -19,14 +17,29 @@ struct proc_dir_entry;
 struct pt_regs;
 struct notifier_block;
 
+#if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
+void create_prof_cpu_mask(struct proc_dir_entry *);
+#else
+#define create_prof_cpu_mask(x)			do { (void)(x); } while (0)
+#endif
+
+enum profile_type {
+	PROFILE_TASK_EXIT,
+	PROFILE_MUNMAP
+};
+
+#ifdef CONFIG_PROFILING
+
+extern int prof_on __read_mostly;
+
 /* init basic kernel profiler */
 void __init profile_init(void);
-void profile_tick(int);
+void profile_tick(int type);
 
 /*
  * Add multiple profiler hits to a given address:
  */
-void profile_hits(int, void *ip, unsigned int nr_hits);
+void profile_hits(int type, void *ip, unsigned int nr_hits);
 
 /*
  * Single profiler hit:
@@ -40,19 +53,6 @@ static inline void profile_hit(int type, void *ip)
 		profile_hits(type, ip, 1);
 }
 
-#ifdef CONFIG_PROC_FS
-void create_prof_cpu_mask(struct proc_dir_entry *);
-#else
-#define create_prof_cpu_mask(x)			do { (void)(x); } while (0)
-#endif
-
-enum profile_type {
-	PROFILE_TASK_EXIT,
-	PROFILE_MUNMAP
-};
-
-#ifdef CONFIG_PROFILING
-
 struct task_struct;
 struct mm_struct;
 
@@ -80,6 +80,28 @@ struct pt_regs;
 
 #else
 
+#define prof_on 0
+
+static inline void profile_init(void)
+{
+	return;
+}
+
+static inline void profile_tick(int type)
+{
+	return;
+}
+
+static inline void profile_hits(int type, void *ip, unsigned int nr_hits)
+{
+	return;
+}
+
+static inline void profile_hit(int type, void *ip)
+{
+	return;
+}
+
 static inline int task_handoff_register(struct notifier_block * n)
 {
 	return -ENOSYS;
diff --git a/kernel/Makefile b/kernel/Makefile
index 15ab63ffe64d..54f69837d35a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_sched.o = -mno-spe -pg
 endif
 
+obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
diff --git a/kernel/profile.c b/kernel/profile.c
index 58926411eb2a..cd26bed4cc26 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -112,8 +112,6 @@ void __init profile_init(void)
 
 /* Profile event notifications */
 
-#ifdef CONFIG_PROFILING
-
 static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
 static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
 static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
@@ -203,8 +201,6 @@ void unregister_timer_hook(int (*hook)(struct pt_regs *))
 }
 EXPORT_SYMBOL_GPL(unregister_timer_hook);
 
-#endif /* CONFIG_PROFILING */
-
 
 #ifdef CONFIG_SMP
 /*
-- 
cgit v1.2.3


From cebbd3fb803603b12408458ba17c29ce1e15a5f2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 25 Jul 2008 01:45:35 -0700
Subject: build-kernel-profileo-only-when-requested-cleanups

Cc: Adrian Bunk <bunk@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/profile.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/profile.h b/include/linux/profile.h
index 4081fa31081f..7e7087239af5 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -18,9 +18,11 @@ struct pt_regs;
 struct notifier_block;
 
 #if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
-void create_prof_cpu_mask(struct proc_dir_entry *);
+void create_prof_cpu_mask(struct proc_dir_entry *de);
 #else
-#define create_prof_cpu_mask(x)			do { (void)(x); } while (0)
+static inline void create_prof_cpu_mask(struct proc_dir_entry *de)
+{
+}
 #endif
 
 enum profile_type {
-- 
cgit v1.2.3


From ac331d158e198d2a91a5b0a3ec4ca9991fdb57af Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:45:38 -0700
Subject: call_usermodehelper(): increase reliability

Presently call_usermodehelper_setup() uses GFP_ATOMIC.  but it can return
NULL _very_ easily.

GFP_ATOMIC is needed only when we can't sleep.  and, GFP_KERNEL is robust
and better.

thus, I add gfp_mask argument to call_usermodehelper_setup().

So, its callers pass the gfp_t as below:

call_usermodehelper() and call_usermodehelper_keys():
	depend on 'wait' argument.
call_usermodehelper_pipe():
	always GFP_KERNEL because always run under process context.
orderly_poweroff():
	pass to GFP_ATOMIC because may run under interrupt context.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: "Paul Menage" <menage@google.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmod.h | 11 +++++++----
 kernel/kmod.c        |  9 +++++----
 kernel/sys.c         |  2 +-
 3 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 0509c4ce4857..a1a91577813c 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -19,6 +19,7 @@
  *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/gfp.h>
 #include <linux/stddef.h>
 #include <linux/errno.h>
 #include <linux/compiler.h>
@@ -41,8 +42,8 @@ struct file;
 struct subprocess_info;
 
 /* Allocate a subprocess_info structure */
-struct subprocess_info *call_usermodehelper_setup(char *path,
-						  char **argv, char **envp);
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+						  char **envp, gfp_t gfp_mask);
 
 /* Set various pieces of state into the subprocess_info structure */
 void call_usermodehelper_setkeys(struct subprocess_info *info,
@@ -69,8 +70,9 @@ static inline int
 call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
 {
 	struct subprocess_info *info;
+	gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
 
-	info = call_usermodehelper_setup(path, argv, envp);
+	info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
 	if (info == NULL)
 		return -ENOMEM;
 	return call_usermodehelper_exec(info, wait);
@@ -81,8 +83,9 @@ call_usermodehelper_keys(char *path, char **argv, char **envp,
 			 struct key *session_keyring, enum umh_wait wait)
 {
 	struct subprocess_info *info;
+	gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
 
-	info = call_usermodehelper_setup(path, argv, envp);
+	info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
 	if (info == NULL)
 		return -ENOMEM;
 
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 2989f67c4446..2456d1a0befb 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -352,16 +352,17 @@ static inline void register_pm_notifier_callback(void) {}
  * @path: path to usermode executable
  * @argv: arg vector for process
  * @envp: environment for process
+ * @gfp_mask: gfp mask for memory allocation
  *
  * Returns either %NULL on allocation failure, or a subprocess_info
  * structure.  This should be passed to call_usermodehelper_exec to
  * exec the process and free the structure.
  */
-struct subprocess_info *call_usermodehelper_setup(char *path,
-						  char **argv, char **envp)
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+						  char **envp, gfp_t gfp_mask)
 {
 	struct subprocess_info *sub_info;
-	sub_info = kzalloc(sizeof(struct subprocess_info),  GFP_ATOMIC);
+	sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
 	if (!sub_info)
 		goto out;
 
@@ -494,7 +495,7 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 	struct subprocess_info *sub_info;
 	int ret;
 
-	sub_info = call_usermodehelper_setup(path, argv, envp);
+	sub_info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL);
 	if (sub_info == NULL)
 		return -ENOMEM;
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 14e97282eb6c..6c2188046048 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1795,7 +1795,7 @@ int orderly_poweroff(bool force)
 		goto out;
 	}
 
-	info = call_usermodehelper_setup(argv[0], argv, envp);
+	info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
 	if (info == NULL) {
 		argv_free(argv);
 		goto out;
-- 
cgit v1.2.3


From 4500d067eeb3d00679335d9cf5c6536e79cd3ef4 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 25 Jul 2008 01:45:49 -0700
Subject: init.h: remove obsolete content

Remove apparently obsolete content from init.h referring to gcc 2.9x
and to "no_module_init".

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 21d658cdfa27..42ae95411a93 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -275,13 +275,7 @@ void __init parse_early_param(void);
 
 #define security_initcall(fn)		module_init(fn)
 
-/* These macros create a dummy inline: gcc 2.9x does not count alias
- as usage, hence the `unused function' warning when __init functions
- are declared static. We use the dummy __*_module_inline functions
- both to kill the warning and check the type of the init/cleanup
- function. */
-
-/* Each module must use one module_init(), or one no_module_init */
+/* Each module must use one module_init(). */
 #define module_init(initfn)					\
 	static inline initcall_t __inittest(void)		\
 	{ return initfn; }					\
-- 
cgit v1.2.3


From b6c63937001889af6fe431aaba97e59d04e028e7 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 25 Jul 2008 01:45:52 -0700
Subject: Rename WARN() to WARNING() to clear the namespace

We want to use WARN() as a variant of WARN_ON(), however a few drivers are
using WARN() internally.  This patch renames these to WARNING() to avoid the
namespace clash.  A few cases were defining but not using the thing, for those
cases I just deleted the definition.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Greg KH <greg@kroah.com>
Cc: Karsten Keil <kkeil@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/isdn/hisax/st5481.h       |  4 ++--
 drivers/isdn/hisax/st5481_b.c     |  4 ++--
 drivers/isdn/hisax/st5481_d.c     |  6 +++---
 drivers/isdn/hisax/st5481_usb.c   | 18 +++++++++---------
 drivers/usb/gadget/at91_udc.h     |  2 +-
 drivers/usb/gadget/cdc2.c         |  2 +-
 drivers/usb/gadget/ether.c        |  2 +-
 drivers/usb/gadget/file_storage.c | 14 +++++++-------
 drivers/usb/gadget/fsl_usb2_udc.c |  2 +-
 drivers/usb/gadget/fsl_usb2_udc.h |  2 +-
 drivers/usb/gadget/gmidi.c        |  2 --
 drivers/usb/gadget/goku_udc.c     |  2 +-
 drivers/usb/gadget/goku_udc.h     |  2 +-
 drivers/usb/gadget/inode.c        |  2 --
 drivers/usb/gadget/net2280.c      |  2 +-
 drivers/usb/gadget/net2280.h      |  2 +-
 drivers/usb/gadget/omap_udc.c     |  6 +++---
 drivers/usb/gadget/omap_udc.h     |  2 +-
 drivers/usb/gadget/printer.c      |  2 +-
 drivers/usb/gadget/pxa25x_udc.c   |  6 +++---
 drivers/usb/gadget/pxa25x_udc.h   |  2 +-
 drivers/usb/gadget/u_ether.c      |  3 ---
 drivers/usb/host/isp116x-hcd.c    |  2 +-
 drivers/usb/host/isp116x.h        |  2 +-
 drivers/usb/host/sl811-hcd.c      |  2 +-
 drivers/usb/host/sl811.h          |  2 +-
 drivers/usb/misc/usbtest.c        |  4 ++--
 include/linux/usb/composite.h     |  2 +-
 28 files changed, 48 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/hisax/st5481.h b/drivers/isdn/hisax/st5481.h
index 2044e7173ab4..cff7a6354334 100644
--- a/drivers/isdn/hisax/st5481.h
+++ b/drivers/isdn/hisax/st5481.h
@@ -220,7 +220,7 @@ enum {
 #define ERR(format, arg...) \
 printk(KERN_ERR "%s:%s: " format "\n" , __FILE__,  __func__ , ## arg)
 
-#define WARN(format, arg...) \
+#define WARNING(format, arg...) \
 printk(KERN_WARNING "%s:%s: " format "\n" , __FILE__,  __func__ , ## arg)
 
 #define INFO(format, arg...) \
@@ -412,7 +412,7 @@ struct st5481_adapter {
 ({ \
 	int status; \
 	if ((status = usb_submit_urb(urb, mem_flags)) < 0) { \
-		WARN("usb_submit_urb failed,status=%d", status); \
+		WARNING("usb_submit_urb failed,status=%d", status); \
 	} \
         status; \
 })
diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c
index fa64115cd7c7..0074b600a0ef 100644
--- a/drivers/isdn/hisax/st5481_b.c
+++ b/drivers/isdn/hisax/st5481_b.c
@@ -180,7 +180,7 @@ static void usb_b_out_complete(struct urb *urb)
 				DBG(4,"urb killed status %d", urb->status);
 				return; // Give up
 			default: 
-				WARN("urb status %d",urb->status);
+				WARNING("urb status %d",urb->status);
 				if (b_out->busy == 0) {
 					st5481_usb_pipe_reset(adapter, (bcs->channel+1)*2 | USB_DIR_OUT, NULL, NULL);
 				}
@@ -372,6 +372,6 @@ void st5481_b_l2l1(struct hisax_if *ifc, int pr, void *arg)
 		B_L1L2(bcs, PH_DEACTIVATE | INDICATION, NULL);
 		break;
 	default:
-		WARN("pr %#x\n", pr);
+		WARNING("pr %#x\n", pr);
 	}
 }
diff --git a/drivers/isdn/hisax/st5481_d.c b/drivers/isdn/hisax/st5481_d.c
index b8c4855cc889..077991c1cd05 100644
--- a/drivers/isdn/hisax/st5481_d.c
+++ b/drivers/isdn/hisax/st5481_d.c
@@ -389,7 +389,7 @@ static void usb_d_out_complete(struct urb *urb)
 				DBG(1,"urb killed status %d", urb->status);
 				break;
 			default: 
-				WARN("urb status %d",urb->status);
+				WARNING("urb status %d",urb->status);
 				if (d_out->busy == 0) {
 					st5481_usb_pipe_reset(adapter, EP_D_OUT | USB_DIR_OUT, fifo_reseted, adapter);
 				}
@@ -420,7 +420,7 @@ static void dout_start_xmit(struct FsmInst *fsm, int event, void *arg)
 	isdnhdlc_out_init(&d_out->hdlc_state, 1, 0);
 
 	if (test_and_set_bit(buf_nr, &d_out->busy)) {
-		WARN("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
+		WARNING("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
 		return;
 	}
 	urb = d_out->urb[buf_nr];
@@ -601,7 +601,7 @@ void st5481_d_l2l1(struct hisax_if *hisax_d_if, int pr, void *arg)
 		FsmEvent(&adapter->d_out.fsm, EV_DOUT_START_XMIT, NULL);
 		break;
 	default:
-		WARN("pr %#x\n", pr);
+		WARNING("pr %#x\n", pr);
 		break;
 	}
 }
diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c
index 427a8b0520f5..ec3c0e507669 100644
--- a/drivers/isdn/hisax/st5481_usb.c
+++ b/drivers/isdn/hisax/st5481_usb.c
@@ -66,7 +66,7 @@ static void usb_ctrl_msg(struct st5481_adapter *adapter,
 	struct ctrl_msg *ctrl_msg;
 	
 	if ((w_index = fifo_add(&ctrl->msg_fifo.f)) < 0) {
-		WARN("control msg FIFO full");
+		WARNING("control msg FIFO full");
 		return;
 	}
 	ctrl_msg = &ctrl->msg_fifo.data[w_index]; 
@@ -139,7 +139,7 @@ static void usb_ctrl_complete(struct urb *urb)
 				DBG(1,"urb killed status %d", urb->status);
 				return; // Give up
 			default: 
-				WARN("urb status %d",urb->status);
+				WARNING("urb status %d",urb->status);
 				break;
 		}
 	}
@@ -198,7 +198,7 @@ static void usb_int_complete(struct urb *urb)
 			DBG(2, "urb shutting down with status: %d", urb->status);
 			return;
 		default:
-			WARN("nonzero urb status received: %d", urb->status);
+			WARNING("nonzero urb status received: %d", urb->status);
 			goto exit;
 	}
 
@@ -235,7 +235,7 @@ static void usb_int_complete(struct urb *urb)
 exit:
 	status = usb_submit_urb (urb, GFP_ATOMIC);
 	if (status)
-		WARN("usb_submit_urb failed with result %d", status);
+		WARNING("usb_submit_urb failed with result %d", status);
 }
 
 /* ======================================================================
@@ -257,7 +257,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
 	DBG(2,"");
 	
 	if ((status = usb_reset_configuration (dev)) < 0) {
-		WARN("reset_configuration failed,status=%d",status);
+		WARNING("reset_configuration failed,status=%d",status);
 		return status;
 	}
 
@@ -269,7 +269,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
 
 	// Check if the config is sane
 	if ( altsetting->desc.bNumEndpoints != 7 ) {
-		WARN("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
+		WARNING("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
 		return -EINVAL;
 	}
 
@@ -279,7 +279,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
 
 	// Use alternative setting 3 on interface 0 to have 2B+D
 	if ((status = usb_set_interface (dev, 0, 3)) < 0) {
-		WARN("usb_set_interface failed,status=%d",status);
+		WARNING("usb_set_interface failed,status=%d",status);
 		return status;
 	}
 
@@ -497,7 +497,7 @@ static void usb_in_complete(struct urb *urb)
 				DBG(1,"urb killed status %d", urb->status);
 				return; // Give up
 			default: 
-				WARN("urb status %d",urb->status);
+				WARNING("urb status %d",urb->status);
 				break;
 		}
 	}
@@ -523,7 +523,7 @@ static void usb_in_complete(struct urb *urb)
 			DBG(4,"count=%d",status);
 			DBG_PACKET(0x400, in->rcvbuf, status);
 			if (!(skb = dev_alloc_skb(status))) {
-				WARN("receive out of memory\n");
+				WARNING("receive out of memory\n");
 				break;
 			}
 			memcpy(skb_put(skb, status), in->rcvbuf, status);
diff --git a/drivers/usb/gadget/at91_udc.h b/drivers/usb/gadget/at91_udc.h
index a973f2a50fb9..c65d62295890 100644
--- a/drivers/usb/gadget/at91_udc.h
+++ b/drivers/usb/gadget/at91_udc.h
@@ -171,7 +171,7 @@ struct at91_request {
 #endif
 
 #define ERR(stuff...)		pr_err("udc: " stuff)
-#define WARN(stuff...)		pr_warning("udc: " stuff)
+#define WARNING(stuff...)	pr_warning("udc: " stuff)
 #define INFO(stuff...)		pr_info("udc: " stuff)
 #define DBG(stuff...)		pr_debug("udc: " stuff)
 
diff --git a/drivers/usb/gadget/cdc2.c b/drivers/usb/gadget/cdc2.c
index d490d0289507..a39a4b940c33 100644
--- a/drivers/usb/gadget/cdc2.c
+++ b/drivers/usb/gadget/cdc2.c
@@ -170,7 +170,7 @@ static int __init cdc_bind(struct usb_composite_dev *cdev)
 		 * but if the controller isn't recognized at all then
 		 * that assumption is a bit more likely to be wrong.
 		 */
-		WARN(cdev, "controller '%s' not recognized; trying %s\n",
+		WARNING(cdev, "controller '%s' not recognized; trying %s\n",
 				gadget->name,
 				cdc_config_driver.label);
 		device_desc.bcdDevice =
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index d7aaaa29b1e1..bcac2e68660d 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -293,7 +293,7 @@ static int __init eth_bind(struct usb_composite_dev *cdev)
 		 * but if the controller isn't recognized at all then
 		 * that assumption is a bit more likely to be wrong.
 		 */
-		WARN(cdev, "controller '%s' not recognized; trying %s\n",
+		WARNING(cdev, "controller '%s' not recognized; trying %s\n",
 				gadget->name,
 				eth_config_driver.label);
 		device_desc.bcdDevice =
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 15c24edbb61a..ea2c31d18080 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -308,7 +308,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 	dev_vdbg(&(d)->gadget->dev , fmt , ## args)
 #define ERROR(d, fmt, args...) \
 	dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
+#define WARNING(d, fmt, args...) \
 	dev_warn(&(d)->gadget->dev , fmt , ## args)
 #define INFO(d, fmt, args...) \
 	dev_info(&(d)->gadget->dev , fmt , ## args)
@@ -1091,7 +1091,7 @@ static int ep0_queue(struct fsg_dev *fsg)
 	if (rc != 0 && rc != -ESHUTDOWN) {
 
 		/* We can't do much more than wait for a reset */
-		WARN(fsg, "error in submission: %s --> %d\n",
+		WARNING(fsg, "error in submission: %s --> %d\n",
 				fsg->ep0->name, rc);
 	}
 	return rc;
@@ -1227,7 +1227,7 @@ static void received_cbi_adsc(struct fsg_dev *fsg, struct fsg_buffhd *bh)
 
 	/* Save the command for later */
 	if (fsg->cbbuf_cmnd_size)
-		WARN(fsg, "CB[I] overwriting previous command\n");
+		WARNING(fsg, "CB[I] overwriting previous command\n");
 	fsg->cbbuf_cmnd_size = req->actual;
 	memcpy(fsg->cbbuf_cmnd, req->buf, fsg->cbbuf_cmnd_size);
 
@@ -1506,7 +1506,7 @@ static void start_transfer(struct fsg_dev *fsg, struct usb_ep *ep,
 		 * submissions if DMA is enabled. */
 		if (rc != -ESHUTDOWN && !(rc == -EOPNOTSUPP &&
 						req->length == 0))
-			WARN(fsg, "error in submission: %s --> %d\n",
+			WARNING(fsg, "error in submission: %s --> %d\n",
 					ep->name, rc);
 	}
 }
@@ -2294,7 +2294,7 @@ static int halt_bulk_in_endpoint(struct fsg_dev *fsg)
 		VDBG(fsg, "delayed bulk-in endpoint halt\n");
 	while (rc != 0) {
 		if (rc != -EAGAIN) {
-			WARN(fsg, "usb_ep_set_halt -> %d\n", rc);
+			WARNING(fsg, "usb_ep_set_halt -> %d\n", rc);
 			rc = 0;
 			break;
 		}
@@ -2317,7 +2317,7 @@ static int wedge_bulk_in_endpoint(struct fsg_dev *fsg)
 		VDBG(fsg, "delayed bulk-in endpoint wedge\n");
 	while (rc != 0) {
 		if (rc != -EAGAIN) {
-			WARN(fsg, "usb_ep_set_wedge -> %d\n", rc);
+			WARNING(fsg, "usb_ep_set_wedge -> %d\n", rc);
 			rc = 0;
 			break;
 		}
@@ -3755,7 +3755,7 @@ static int __init check_parameters(struct fsg_dev *fsg)
 		if (gcnum >= 0)
 			mod_data.release = 0x0300 + gcnum;
 		else {
-			WARN(fsg, "controller '%s' not recognized\n",
+			WARNING(fsg, "controller '%s' not recognized\n",
 				fsg->gadget->name);
 			mod_data.release = 0x0399;
 		}
diff --git a/drivers/usb/gadget/fsl_usb2_udc.c b/drivers/usb/gadget/fsl_usb2_udc.c
index 1695382f30fe..1cfccf102a2d 100644
--- a/drivers/usb/gadget/fsl_usb2_udc.c
+++ b/drivers/usb/gadget/fsl_usb2_udc.c
@@ -1538,7 +1538,7 @@ static void dtd_complete_irq(struct fsl_udc *udc)
 
 		/* If the ep is configured */
 		if (curr_ep->name == NULL) {
-			WARN("Invalid EP?");
+			WARNING("Invalid EP?");
 			continue;
 		}
 
diff --git a/drivers/usb/gadget/fsl_usb2_udc.h b/drivers/usb/gadget/fsl_usb2_udc.h
index 98b1483ef6a5..6131752a38bc 100644
--- a/drivers/usb/gadget/fsl_usb2_udc.h
+++ b/drivers/usb/gadget/fsl_usb2_udc.h
@@ -552,7 +552,7 @@ static void dump_msg(const char *label, const u8 * buf, unsigned int length)
 #endif
 
 #define ERR(stuff...)		pr_err("udc: " stuff)
-#define WARN(stuff...)		pr_warning("udc: " stuff)
+#define WARNING(stuff...)		pr_warning("udc: " stuff)
 #define INFO(stuff...)		pr_info("udc: " stuff)
 
 /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index 7f4d4828e3aa..ea8651e3da1a 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -138,8 +138,6 @@ static void gmidi_transmit(struct gmidi_device* dev, struct usb_request* req);
 	dev_vdbg(&(d)->gadget->dev , fmt , ## args)
 #define ERROR(d, fmt, args...) \
 	dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
-	dev_warn(&(d)->gadget->dev , fmt , ## args)
 #define INFO(d, fmt, args...) \
 	dev_info(&(d)->gadget->dev , fmt , ## args)
 
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c
index 48f1c63b7013..60aa04847b18 100644
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -1768,7 +1768,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 * usb_gadget_driver_{register,unregister}() must change.
 	 */
 	if (the_controller) {
-		WARN(dev, "ignoring %s\n", pci_name(pdev));
+		WARNING(dev, "ignoring %s\n", pci_name(pdev));
 		return -EBUSY;
 	}
 	if (!pdev->irq) {
diff --git a/drivers/usb/gadget/goku_udc.h b/drivers/usb/gadget/goku_udc.h
index bc4eb1e0b507..566cb2319056 100644
--- a/drivers/usb/gadget/goku_udc.h
+++ b/drivers/usb/gadget/goku_udc.h
@@ -285,7 +285,7 @@ struct goku_udc {
 
 #define ERROR(dev,fmt,args...) \
 	xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
+#define WARNING(dev,fmt,args...) \
 	xprintk(dev , KERN_WARNING , fmt , ## args)
 #define INFO(dev,fmt,args...) \
 	xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 04692d59fc1c..f4585d3e90d7 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -262,8 +262,6 @@ static const char *CHIP;
 
 #define ERROR(dev,fmt,args...) \
 	xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
-	xprintk(dev , KERN_WARNING , fmt , ## args)
 #define INFO(dev,fmt,args...) \
 	xprintk(dev , KERN_INFO , fmt , ## args)
 
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index b67ab677af72..5cfb5ebf3881 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -1007,7 +1007,7 @@ static void scan_dma_completions (struct net2280_ep *ep)
 			 * 0122, and 0124; not all cases trigger the warning.
 			 */
 			if ((tmp & (1 << NAK_OUT_PACKETS)) == 0) {
-				WARN (ep->dev, "%s lost packet sync!\n",
+				WARNING (ep->dev, "%s lost packet sync!\n",
 						ep->ep.name);
 				req->req.status = -EOVERFLOW;
 			} else if ((tmp = readl (&ep->regs->ep_avail)) != 0) {
diff --git a/drivers/usb/gadget/net2280.h b/drivers/usb/gadget/net2280.h
index 1f2af398a9a4..81a71dbdc2c6 100644
--- a/drivers/usb/gadget/net2280.h
+++ b/drivers/usb/gadget/net2280.h
@@ -272,7 +272,7 @@ static inline void net2280_led_shutdown (struct net2280 *dev)
 
 #define ERROR(dev,fmt,args...) \
 	xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
+#define WARNING(dev,fmt,args...) \
 	xprintk(dev , KERN_WARNING , fmt , ## args)
 #define INFO(dev,fmt,args...) \
 	xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 4b79a8509e84..395bd1844482 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -1120,7 +1120,7 @@ static int omap_ep_set_halt(struct usb_ep *_ep, int value)
 			status = -EINVAL;
 		else if (value) {
 			if (ep->udc->ep0_set_config) {
-				WARN("error changing config?\n");
+				WARNING("error changing config?\n");
 				omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
 			}
 			omap_writew(UDC_STALL_CMD, UDC_SYSCON2);
@@ -1764,7 +1764,7 @@ do_stall:
 					u.r.bRequestType, u.r.bRequest, status);
 			if (udc->ep0_set_config) {
 				if (udc->ep0_reset_config)
-					WARN("error resetting config?\n");
+					WARNING("error resetting config?\n");
 				else
 					omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
 			}
@@ -3076,7 +3076,7 @@ static int omap_udc_suspend(struct platform_device *dev, pm_message_t message)
 	 * which would prevent entry to deep sleep...
 	 */
 	if ((devstat & UDC_ATT) != 0 && (devstat & UDC_SUS) == 0) {
-		WARN("session active; suspend requires disconnect\n");
+		WARNING("session active; suspend requires disconnect\n");
 		omap_pullup(&udc->gadget, 0);
 	}
 
diff --git a/drivers/usb/gadget/omap_udc.h b/drivers/usb/gadget/omap_udc.h
index 8522bbb12278..29edc51b6b22 100644
--- a/drivers/usb/gadget/omap_udc.h
+++ b/drivers/usb/gadget/omap_udc.h
@@ -188,7 +188,7 @@ struct omap_udc {
 #endif
 
 #define ERR(stuff...)		pr_err("udc: " stuff)
-#define WARN(stuff...)		pr_warning("udc: " stuff)
+#define WARNING(stuff...)	pr_warning("udc: " stuff)
 #define INFO(stuff...)		pr_info("udc: " stuff)
 #define DBG(stuff...)		pr_debug("udc: " stuff)
 
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index 49cd9e145a9b..e0090085b78e 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -179,7 +179,7 @@ module_param(qlen, uint, S_IRUGO|S_IWUSR);
 
 #define ERROR(dev, fmt, args...) \
 	xprintk(dev, KERN_ERR, fmt, ## args)
-#define WARN(dev, fmt, args...) \
+#define WARNING(dev, fmt, args...) \
 	xprintk(dev, KERN_WARNING, fmt, ## args)
 #define INFO(dev, fmt, args...) \
 	xprintk(dev, KERN_INFO, fmt, ## args)
diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c
index 8fb0066609bb..7e6725d89976 100644
--- a/drivers/usb/gadget/pxa25x_udc.c
+++ b/drivers/usb/gadget/pxa25x_udc.c
@@ -342,7 +342,7 @@ pxa25x_ep_free_request (struct usb_ep *_ep, struct usb_request *_req)
 	struct pxa25x_request	*req;
 
 	req = container_of (_req, struct pxa25x_request, req);
-	WARN_ON (!list_empty (&req->queue));
+	WARN_ON(!list_empty (&req->queue));
 	kfree(req);
 }
 
@@ -1556,7 +1556,7 @@ config_change:
 					 * tell us about config change events,
 					 * so later ones may fail...
 					 */
-					WARN("config change %02x fail %d?\n",
+					WARNING("config change %02x fail %d?\n",
 						u.r.bRequest, i);
 					return;
 					/* TODO experiment:  if has_cfr,
@@ -2330,7 +2330,7 @@ static int pxa25x_udc_suspend(struct platform_device *dev, pm_message_t state)
 	unsigned long flags;
 
 	if (!udc->mach->gpio_pullup && !udc->mach->udc_command)
-		WARN("USB host won't detect disconnect!\n");
+		WARNING("USB host won't detect disconnect!\n");
 	udc->suspended = 1;
 
 	local_irq_save(flags);
diff --git a/drivers/usb/gadget/pxa25x_udc.h b/drivers/usb/gadget/pxa25x_udc.h
index 4d11ece7c95f..c8a13215e02c 100644
--- a/drivers/usb/gadget/pxa25x_udc.h
+++ b/drivers/usb/gadget/pxa25x_udc.h
@@ -259,7 +259,7 @@ dump_state(struct pxa25x_udc *dev)
 #define DBG(lvl, stuff...) do{if ((lvl) <= UDC_DEBUG) DMSG(stuff);}while(0)
 
 #define ERR(stuff...)		pr_err("udc: " stuff)
-#define WARN(stuff...)		pr_warning("udc: " stuff)
+#define WARNING(stuff...)	pr_warning("udc: " stuff)
 #define INFO(stuff...)		pr_info("udc: " stuff)
 
 
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 5458f43a8668..3791e6271903 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -116,7 +116,6 @@ static inline int qlen(struct usb_gadget *gadget)
 #undef DBG
 #undef VDBG
 #undef ERROR
-#undef WARN
 #undef INFO
 
 #define xprintk(d, level, fmt, args...) \
@@ -140,8 +139,6 @@ static inline int qlen(struct usb_gadget *gadget)
 
 #define ERROR(dev, fmt, args...) \
 	xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev, fmt, args...) \
-	xprintk(dev , KERN_WARNING , fmt , ## args)
 #define INFO(dev, fmt, args...) \
 	xprintk(dev , KERN_INFO , fmt , ## args)
 
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
index 31178e10cbbe..ce1ca0ba0515 100644
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -882,7 +882,7 @@ static void isp116x_endpoint_disable(struct usb_hcd *hcd,
 	for (i = 0; i < 100 && !list_empty(&hep->urb_list); i++)
 		msleep(3);
 	if (!list_empty(&hep->urb_list))
-		WARN("ep %p not empty?\n", ep);
+		WARNING("ep %p not empty?\n", ep);
 
 	kfree(ep);
 	hep->hcpriv = NULL;
diff --git a/drivers/usb/host/isp116x.h b/drivers/usb/host/isp116x.h
index 595b90a99848..aa211bafcff9 100644
--- a/drivers/usb/host/isp116x.h
+++ b/drivers/usb/host/isp116x.h
@@ -338,7 +338,7 @@ struct isp116x_ep {
 #endif
 
 #define ERR(stuff...)		printk(KERN_ERR "116x: " stuff)
-#define WARN(stuff...)		printk(KERN_WARNING "116x: " stuff)
+#define WARNING(stuff...)	printk(KERN_WARNING "116x: " stuff)
 #define INFO(stuff...)		printk(KERN_INFO "116x: " stuff)
 
 /* ------------------------------------------------- */
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index 340d72da554a..8a74bbb57d08 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -1026,7 +1026,7 @@ sl811h_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *hep)
 	if (!list_empty(&hep->urb_list))
 		msleep(3);
 	if (!list_empty(&hep->urb_list))
-		WARN("ep %p not empty?\n", ep);
+		WARNING("ep %p not empty?\n", ep);
 
 	kfree(ep);
 	hep->hcpriv = NULL;
diff --git a/drivers/usb/host/sl811.h b/drivers/usb/host/sl811.h
index 7690d98e42a7..b6b8c1f233dd 100644
--- a/drivers/usb/host/sl811.h
+++ b/drivers/usb/host/sl811.h
@@ -261,6 +261,6 @@ sl811_read_buf(struct sl811 *sl811, int addr, void *buf, size_t count)
 #endif
 
 #define ERR(stuff...)		printk(KERN_ERR "sl811: " stuff)
-#define WARN(stuff...)		printk(KERN_WARNING "sl811: " stuff)
+#define WARNING(stuff...)	printk(KERN_WARNING "sl811: " stuff)
 #define INFO(stuff...)		printk(KERN_INFO "sl811: " stuff)
 
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index 054dedd28127..b358c4e1cf21 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -81,7 +81,7 @@ static struct usb_device *testdev_to_usbdev (struct usbtest_dev *test)
 
 #define ERROR(tdev, fmt, args...) \
 	dev_err(&(tdev)->intf->dev , fmt , ## args)
-#define WARN(tdev, fmt, args...) \
+#define WARNING(tdev, fmt, args...) \
 	dev_warn(&(tdev)->intf->dev , fmt , ## args)
 
 /*-------------------------------------------------------------------------*/
@@ -1946,7 +1946,7 @@ usbtest_probe (struct usb_interface *intf, const struct usb_device_id *id)
 
 			status = get_endpoints (dev, intf);
 			if (status < 0) {
-				WARN(dev, "couldn't get endpoints, %d\n",
+				WARNING(dev, "couldn't get endpoints, %d\n",
 						status);
 				return status;
 			}
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 747c3a49cdc9..c932390c6da0 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -330,7 +330,7 @@ extern int usb_string_id(struct usb_composite_dev *c);
 	dev_vdbg(&(d)->gadget->dev , fmt , ## args)
 #define ERROR(d, fmt, args...) \
 	dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
+#define WARNING(d, fmt, args...) \
 	dev_warn(&(d)->gadget->dev , fmt , ## args)
 #define INFO(d, fmt, args...) \
 	dev_info(&(d)->gadget->dev , fmt , ## args)
-- 
cgit v1.2.3


From 2711b793eb62a5873a0ba583a69252040aef176e Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 25 Jul 2008 01:45:56 -0700
Subject: kallsyms: unify 32- and 64-bit code

Use the %p format string which already accounts for the padding you need
with a pointer type on a particular architecture.

Also replace the macro with a static inline function to match the rest of
the file.

Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kallsyms.h | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 00c1801099fa..57aefa160a92 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -6,6 +6,7 @@
 #define _LINUX_KALLSYMS_H
 
 #include <linux/errno.h>
+#include <linux/kernel.h>
 #include <linux/stddef.h>
 
 #define KSYM_NAME_LEN 128
@@ -105,18 +106,10 @@ static inline void print_fn_descriptor_symbol(const char *fmt, void *addr)
 	print_symbol(fmt, (unsigned long)addr);
 }
 
-#ifndef CONFIG_64BIT
-#define print_ip_sym(ip)		\
-do {					\
-	printk("[<%08lx>]", ip);	\
-	print_symbol(" %s\n", ip);	\
-} while(0)
-#else
-#define print_ip_sym(ip)		\
-do {					\
-	printk("[<%016lx>]", ip);	\
-	print_symbol(" %s\n", ip);	\
-} while(0)
-#endif
+static inline void print_ip_sym(unsigned long ip)
+{
+	printk("[<%p>]", (void *) ip);
+	print_symbol(" %s\n", ip);
+}
 
 #endif /*_LINUX_KALLSYMS_H*/
-- 
cgit v1.2.3


From 717115e1a5856b57af0f71e1df7149108294fc10 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Fri, 25 Jul 2008 01:45:58 -0700
Subject: printk ratelimiting rewrite

All ratelimit user use same jiffies and burst params, so some messages
(callbacks) will be lost.

For example:
a call printk_ratelimit(5 * HZ, 1)
b call printk_ratelimit(5 * HZ, 1) before the 5*HZ timeout of a, then b will
will be supressed.

- rewrite __ratelimit, and use a ratelimit_state as parameter.  Thanks for
  hints from andrew.

- Add WARN_ON_RATELIMIT, update rcupreempt.h

- remove __printk_ratelimit

- use __ratelimit in net_ratelimit

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h  |  3 +++
 include/linux/kernel.h     |  8 ++-----
 include/linux/net.h        |  3 +--
 include/linux/ratelimit.h  | 27 +++++++++++++++++++++++
 include/linux/rcupreempt.h |  9 ++++++--
 kernel/printk.c            | 17 +++-----------
 kernel/sysctl.c            |  4 ++--
 lib/ratelimit.c            | 55 +++++++++++++++++++++++++---------------------
 net/core/sysctl_net_core.c |  4 ++--
 net/core/utils.c           |  5 ++---
 10 files changed, 79 insertions(+), 56 deletions(-)
 create mode 100644 include/linux/ratelimit.h

(limited to 'include/linux')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index a346e744e770..a3f738cffdb6 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -97,6 +97,9 @@ extern void warn_slowpath(const char *file, const int line,
 	unlikely(__ret_warn_once);				\
 })
 
+#define WARN_ON_RATELIMIT(condition, state)			\
+		WARN_ON((condition) && __ratelimit(state))
+
 #ifdef CONFIG_SMP
 # define WARN_ON_SMP(x)			WARN_ON(x)
 #else
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5c4b1251e110..fdbbf72ca2eb 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -15,6 +15,7 @@
 #include <linux/bitops.h>
 #include <linux/log2.h>
 #include <linux/typecheck.h>
+#include <linux/ratelimit.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
 
@@ -189,11 +190,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 asmlinkage int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2))) __cold;
 
-extern int printk_ratelimit_jiffies;
-extern int printk_ratelimit_burst;
+extern struct ratelimit_state printk_ratelimit_state;
 extern int printk_ratelimit(void);
-extern int __ratelimit(int ratelimit_jiffies, int ratelimit_burst);
-extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
 #else
@@ -204,8 +202,6 @@ static inline int printk(const char *s, ...)
 	__attribute__ ((format (printf, 1, 2)));
 static inline int __cold printk(const char *s, ...) { return 0; }
 static inline int printk_ratelimit(void) { return 0; }
-static inline int __printk_ratelimit(int ratelimit_jiffies, \
-				     int ratelimit_burst) { return 0; }
 static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
 					  unsigned int interval_msec)	\
 		{ return false; }
diff --git a/include/linux/net.h b/include/linux/net.h
index 2f999fbb188d..4a9a30f2d68f 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -351,8 +351,7 @@ static const struct proto_ops name##_ops = {			\
 
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
-extern int net_msg_cost;
-extern int net_msg_burst;
+extern struct ratelimit_state net_ratelimit_state;
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
new file mode 100644
index 000000000000..18a5b9ba9d40
--- /dev/null
+++ b/include/linux/ratelimit.h
@@ -0,0 +1,27 @@
+#ifndef _LINUX_RATELIMIT_H
+#define _LINUX_RATELIMIT_H
+#include <linux/param.h>
+
+#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
+#define DEFAULT_RATELIMIT_BURST 10
+
+struct ratelimit_state {
+	int interval;
+	int burst;
+	int printed;
+	int missed;
+	unsigned long begin;
+};
+
+#define DEFINE_RATELIMIT_STATE(name, interval, burst)		\
+		struct ratelimit_state name = {interval, burst,}
+
+extern int __ratelimit(struct ratelimit_state *rs);
+
+static inline int ratelimit(void)
+{
+	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+					DEFAULT_RATELIMIT_BURST);
+	return __ratelimit(&rs);
+}
+#endif
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index f04b64eca636..0967f03b0705 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -115,16 +115,21 @@ DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
 
 static inline void rcu_enter_nohz(void)
 {
+	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
 	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
 	__get_cpu_var(rcu_dyntick_sched).dynticks++;
-	WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1);
+	WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
 }
 
 static inline void rcu_exit_nohz(void)
 {
+	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
 	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
 	__get_cpu_var(rcu_dyntick_sched).dynticks++;
-	WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1));
+	WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
+				&rs);
 }
 
 #else /* CONFIG_NO_HZ */
diff --git a/kernel/printk.c b/kernel/printk.c
index 3f7a2a94583b..a7f7559c5f6c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1308,6 +1308,8 @@ void tty_write_message(struct tty_struct *tty, char *msg)
 }
 
 #if defined CONFIG_PRINTK
+
+DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
 /*
  * printk rate limiting, lifted from the networking subsystem.
  *
@@ -1315,22 +1317,9 @@ void tty_write_message(struct tty_struct *tty, char *msg)
  * every printk_ratelimit_jiffies to make a denial-of-service
  * attack impossible.
  */
-int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
-{
-	return __ratelimit(ratelimit_jiffies, ratelimit_burst);
-}
-EXPORT_SYMBOL(__printk_ratelimit);
-
-/* minimum time in jiffies between messages */
-int printk_ratelimit_jiffies = 5 * HZ;
-
-/* number of messages we send before ratelimiting */
-int printk_ratelimit_burst = 10;
-
 int printk_ratelimit(void)
 {
-	return __printk_ratelimit(printk_ratelimit_jiffies,
-				printk_ratelimit_burst);
+	return __ratelimit(&printk_ratelimit_state);
 }
 EXPORT_SYMBOL(printk_ratelimit);
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a8299d1fe59..35a50db9b6ce 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -624,7 +624,7 @@ static struct ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_PRINTK_RATELIMIT,
 		.procname	= "printk_ratelimit",
-		.data		= &printk_ratelimit_jiffies,
+		.data		= &printk_ratelimit_state.interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -633,7 +633,7 @@ static struct ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
 		.procname	= "printk_ratelimit_burst",
-		.data		= &printk_ratelimit_burst,
+		.data		= &printk_ratelimit_state.burst,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 485e3040dcd4..35136671b215 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -3,6 +3,9 @@
  *
  * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com>
  *
+ * 2008-05-01 rewrite the function and use a ratelimit_state data struct as
+ * parameter. Now every user can use their own standalone ratelimit_state.
+ *
  * This file is released under the GPLv2.
  *
  */
@@ -11,41 +14,43 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 
+static DEFINE_SPINLOCK(ratelimit_lock);
+static unsigned long flags;
+
 /*
  * __ratelimit - rate limiting
- * @ratelimit_jiffies: minimum time in jiffies between two callbacks
- * @ratelimit_burst: number of callbacks we do before ratelimiting
+ * @rs: ratelimit_state data
  *
- * This enforces a rate limit: not more than @ratelimit_burst callbacks
- * in every ratelimit_jiffies
+ * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
+ * in every @rs->ratelimit_jiffies
  */
-int __ratelimit(int ratelimit_jiffies, int ratelimit_burst)
+int __ratelimit(struct ratelimit_state *rs)
 {
-	static DEFINE_SPINLOCK(ratelimit_lock);
-	static unsigned toks = 10 * 5 * HZ;
-	static unsigned long last_msg;
-	static int missed;
-	unsigned long flags;
-	unsigned long now = jiffies;
+	if (!rs->interval)
+		return 1;
 
 	spin_lock_irqsave(&ratelimit_lock, flags);
-	toks += now - last_msg;
-	last_msg = now;
-	if (toks > (ratelimit_burst * ratelimit_jiffies))
-		toks = ratelimit_burst * ratelimit_jiffies;
-	if (toks >= ratelimit_jiffies) {
-		int lost = missed;
+	if (!rs->begin)
+		rs->begin = jiffies;
 
-		missed = 0;
-		toks -= ratelimit_jiffies;
-		spin_unlock_irqrestore(&ratelimit_lock, flags);
-		if (lost)
-			printk(KERN_WARNING "%s: %d messages suppressed\n",
-				__func__, lost);
-		return 1;
+	if (time_is_before_jiffies(rs->begin + rs->interval)) {
+		if (rs->missed)
+			printk(KERN_WARNING "%s: %d callbacks suppressed\n",
+				__func__, rs->missed);
+		rs->begin = 0;
+		rs->printed = 0;
+		rs->missed = 0;
 	}
-	missed++;
+	if (rs->burst && rs->burst > rs->printed)
+		goto print;
+
+	rs->missed++;
 	spin_unlock_irqrestore(&ratelimit_lock, flags);
 	return 0;
+
+print:
+	rs->printed++;
+	spin_unlock_irqrestore(&ratelimit_lock, flags);
+	return 1;
 }
 EXPORT_SYMBOL(__ratelimit);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a570e2af22cb..f686467ff12b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -67,7 +67,7 @@ static struct ctl_table net_core_table[] = {
 	{
 		.ctl_name	= NET_CORE_MSG_COST,
 		.procname	= "message_cost",
-		.data		= &net_msg_cost,
+		.data		= &net_ratelimit_state.interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -76,7 +76,7 @@ static struct ctl_table net_core_table[] = {
 	{
 		.ctl_name	= NET_CORE_MSG_BURST,
 		.procname	= "message_burst",
-		.data		= &net_msg_burst,
+		.data		= &net_ratelimit_state.burst,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
diff --git a/net/core/utils.c b/net/core/utils.c
index 8031eb59054e..72e0ebe964a0 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -31,17 +31,16 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
-int net_msg_cost __read_mostly = 5*HZ;
-int net_msg_burst __read_mostly = 10;
 int net_msg_warn __read_mostly = 1;
 EXPORT_SYMBOL(net_msg_warn);
 
+DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
 /*
  * All net warning printk()s should be guarded by this function.
  */
 int net_ratelimit(void)
 {
-	return __printk_ratelimit(net_msg_cost, net_msg_burst);
+	return __ratelimit(&net_ratelimit_state);
 }
 EXPORT_SYMBOL(net_ratelimit);
 
-- 
cgit v1.2.3


From 472dba7d117844c746be97db6be26c2810d79b62 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 25 Jul 2008 01:45:58 -0700
Subject: sm501: add power control callback

Add callback to get or set the power control if the device has the sleep
connected to some form of GPIO.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Cc: Arnaud Patard <apatard@mandriva.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c   | 31 +++++++++++++++++++++++++++++++
 include/linux/sm501.h |  7 +++++++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index e2530df4d85c..9296b2673b52 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -1138,8 +1138,31 @@ static int sm501_plat_probe(struct platform_device *dev)
 }
 
 #ifdef CONFIG_PM
+
 /* power management support */
 
+static void sm501_set_power(struct sm501_devdata *sm, int on)
+{
+	struct sm501_platdata *pd = sm->platdata;
+
+	if (pd == NULL)
+		return;
+
+	if (pd->get_power) {
+		if (pd->get_power(sm->dev) == on) {
+			dev_dbg(sm->dev, "is already %d\n", on);
+			return;
+		}
+	}
+
+	if (pd->set_power) {
+		dev_dbg(sm->dev, "setting power to %d\n", on);
+
+		pd->set_power(sm->dev, on);
+		sm501_mdelay(sm, 10);
+	}
+}
+
 static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	struct sm501_devdata *sm = platform_get_drvdata(pdev);
@@ -1148,6 +1171,12 @@ static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state)
 	sm->pm_misc = readl(sm->regs + SM501_MISC_CONTROL);
 
 	sm501_dump_regs(sm);
+
+	if (sm->platdata) {
+		if (sm->platdata->flags & SM501_FLAG_SUSPEND_OFF)
+			sm501_set_power(sm, 0);
+	}
+
 	return 0;
 }
 
@@ -1155,6 +1184,8 @@ static int sm501_plat_resume(struct platform_device *pdev)
 {
 	struct sm501_devdata *sm = platform_get_drvdata(pdev);
 
+	sm501_set_power(sm, 1);
+
 	sm501_dump_regs(sm);
 	sm501_dump_gate(sm);
 	sm501_dump_clk(sm);
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index b530fa6a1d34..145405bf9efa 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -157,6 +157,8 @@ struct sm501_init_gpio {
 	struct sm501_reg_init	gpio_ddr_high;
 };
 
+#define SM501_FLAG_SUSPEND_OFF		(1<<4)
+
 /* sm501_platdata
  *
  * This is passed with the platform device to allow the board
@@ -170,6 +172,11 @@ struct sm501_platdata {
 	struct sm501_init_gpio		*init_gpiop;
 	struct sm501_platdata_fb	*fb;
 
+	int				 flags;
+
+	int	(*get_power)(struct device *dev);
+	int	(*set_power)(struct device *dev, unsigned int on);
+
 	struct sm501_platdata_gpio_i2c	*gpio_i2c;
 	unsigned int			 gpio_i2c_nr;
 };
-- 
cgit v1.2.3


From f61be273d3699d174bc1438e6804f9f9e52bb932 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 25 Jul 2008 01:45:59 -0700
Subject: sm501: add gpiolib support

Add support for exporting the GPIOs on the SM501 via gpiolib.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Cc: Arnaud Patard <apatard@mandriva.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/Kconfig   |   8 ++
 drivers/mfd/sm501.c   | 299 +++++++++++++++++++++++++++++++++++++++++---------
 include/linux/sm501.h |  20 +---
 3 files changed, 257 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 9f93c29fed35..bac9e973ece0 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -19,6 +19,14 @@ config MFD_SM501
 	  interface. The device may be connected by PCI or local bus with
 	  varying functions enabled.
 
+config MFD_SM501_GPIO
+	bool "Export GPIO via GPIO layer"
+	depends on MFD_SM501 && HAVE_GPIO_LIB
+	 ---help---
+	 This option uses the gpio library layer to export the 64 GPIO
+	 lines on the SM501. The platform data is used to supply the
+	 base number for the first GPIO line to register.
+
 config MFD_ASIC3
 	bool "Support for Compaq ASIC3"
 	depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 9296b2673b52..be8713908125 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -19,6 +19,7 @@
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/pci.h>
+#include <linux/gpio.h>
 
 #include <linux/sm501.h>
 #include <linux/sm501-regs.h>
@@ -31,10 +32,29 @@ struct sm501_device {
 	struct platform_device		pdev;
 };
 
+struct sm501_gpio;
+
+struct sm501_gpio_chip {
+	struct gpio_chip	gpio;
+	struct sm501_gpio	*ourgpio;	/* to get back to parent. */
+	void __iomem		*regbase;
+};
+
+struct sm501_gpio {
+	struct sm501_gpio_chip	low;
+	struct sm501_gpio_chip	high;
+	spinlock_t		lock;
+
+	unsigned int		 registered : 1;
+	void __iomem		*regs;
+	struct resource		*regs_res;
+};
+
 struct sm501_devdata {
 	spinlock_t			 reg_lock;
 	struct mutex			 clock_lock;
 	struct list_head		 devices;
+	struct sm501_gpio		 gpio;
 
 	struct device			*dev;
 	struct resource			*io_res;
@@ -42,6 +62,7 @@ struct sm501_devdata {
 	struct resource			*regs_claim;
 	struct sm501_platdata		*platdata;
 
+
 	unsigned int			 in_suspend;
 	unsigned long			 pm_misc;
 
@@ -52,6 +73,7 @@ struct sm501_devdata {
 	unsigned int			 rev;
 };
 
+
 #define MHZ (1000 * 1000)
 
 #ifdef DEBUG
@@ -276,58 +298,6 @@ unsigned long sm501_modify_reg(struct device *dev,
 
 EXPORT_SYMBOL_GPL(sm501_modify_reg);
 
-unsigned long sm501_gpio_get(struct device *dev,
-			     unsigned long gpio)
-{
-	struct sm501_devdata *sm = dev_get_drvdata(dev);
-	unsigned long result;
-	unsigned long reg;
-
-	reg = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
-	result = readl(sm->regs + reg);
-
-	result >>= (gpio & 31);
-	return result & 1UL;
-}
-
-EXPORT_SYMBOL_GPL(sm501_gpio_get);
-
-void sm501_gpio_set(struct device *dev,
-		    unsigned long gpio,
-		    unsigned int to,
-		    unsigned int dir)
-{
-	struct sm501_devdata *sm = dev_get_drvdata(dev);
-
-	unsigned long bit = 1 << (gpio & 31);
-	unsigned long base;
-	unsigned long save;
-	unsigned long val;
-
-	base = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
-	base += SM501_GPIO;
-
-	spin_lock_irqsave(&sm->reg_lock, save);
-
-	val = readl(sm->regs + base) & ~bit;
-	if (to)
-		val |= bit;
-	writel(val, sm->regs + base);
-
-	val = readl(sm->regs + SM501_GPIO_DDR_LOW) & ~bit;
-	if (dir)
-		val |= bit;
-
-	writel(val, sm->regs + SM501_GPIO_DDR_LOW);
-	sm501_sync_regs(sm);
-
-	spin_unlock_irqrestore(&sm->reg_lock, save);
-
-}
-
-EXPORT_SYMBOL_GPL(sm501_gpio_set);
-
-
 /* sm501_unit_power
  *
  * alters the power active gate to set specific units on or off
@@ -906,6 +876,226 @@ static int sm501_register_display(struct sm501_devdata *sm,
 	return sm501_register_device(sm, pdev);
 }
 
+#ifdef CONFIG_MFD_SM501_GPIO
+
+static inline struct sm501_gpio_chip *to_sm501_gpio(struct gpio_chip *gc)
+{
+	return container_of(gc, struct sm501_gpio_chip, gpio);
+}
+
+static inline struct sm501_devdata *sm501_gpio_to_dev(struct sm501_gpio *gpio)
+{
+	return container_of(gpio, struct sm501_devdata, gpio);
+}
+
+static int sm501_gpio_get(struct gpio_chip *chip, unsigned offset)
+
+{
+	struct sm501_gpio_chip *smgpio = to_sm501_gpio(chip);
+	unsigned long result;
+
+	result = readl(smgpio->regbase + SM501_GPIO_DATA_LOW);
+	result >>= offset;
+
+	return result & 1UL;
+}
+
+static void sm501_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+
+{
+	struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+	struct sm501_gpio *smgpio = smchip->ourgpio;
+	unsigned long bit = 1 << offset;
+	void __iomem *regs = smchip->regbase;
+	unsigned long save;
+	unsigned long val;
+
+	dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
+		__func__, chip, offset);
+
+	spin_lock_irqsave(&smgpio->lock, save);
+
+	val = readl(regs + SM501_GPIO_DATA_LOW) & ~bit;
+	if (value)
+		val |= bit;
+	writel(val, regs);
+
+	sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+	spin_unlock_irqrestore(&smgpio->lock, save);
+}
+
+static int sm501_gpio_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+	struct sm501_gpio *smgpio = smchip->ourgpio;
+	void __iomem *regs = smchip->regbase;
+	unsigned long bit = 1 << offset;
+	unsigned long save;
+	unsigned long ddr;
+
+	dev_info(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
+		 __func__, chip, offset);
+
+	spin_lock_irqsave(&smgpio->lock, save);
+
+	ddr = readl(regs + SM501_GPIO_DDR_LOW);
+	writel(ddr & ~bit, regs + SM501_GPIO_DDR_LOW);
+
+	sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+	spin_unlock_irqrestore(&smgpio->lock, save);
+
+	return 0;
+}
+
+static int sm501_gpio_output(struct gpio_chip *chip,
+			     unsigned offset, int value)
+{
+	struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+	struct sm501_gpio *smgpio = smchip->ourgpio;
+	unsigned long bit = 1 << offset;
+	void __iomem *regs = smchip->regbase;
+	unsigned long save;
+	unsigned long val;
+	unsigned long ddr;
+
+	dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d,%d)\n",
+		__func__, chip, offset, value);
+
+	spin_lock_irqsave(&smgpio->lock, save);
+
+	val = readl(regs + SM501_GPIO_DATA_LOW);
+	if (value)
+		val |= bit;
+	else
+		val &= ~bit;
+	writel(val, regs);
+
+	ddr = readl(regs + SM501_GPIO_DDR_LOW);
+	writel(ddr | bit, regs + SM501_GPIO_DDR_LOW);
+
+	sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+	writel(val, regs + SM501_GPIO_DATA_LOW);
+
+	sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+	spin_unlock_irqrestore(&smgpio->lock, save);
+
+	return 0;
+}
+
+static struct gpio_chip gpio_chip_template = {
+	.ngpio			= 32,
+	.direction_input	= sm501_gpio_input,
+	.direction_output	= sm501_gpio_output,
+	.set			= sm501_gpio_set,
+	.get			= sm501_gpio_get,
+};
+
+static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
+					      struct sm501_gpio *gpio,
+					      struct sm501_gpio_chip *chip)
+{
+	struct sm501_platdata *pdata = sm->platdata;
+	struct gpio_chip *gchip = &chip->gpio;
+	unsigned base = pdata->gpio_base;
+
+	memcpy(chip, &gpio_chip_template, sizeof(struct gpio_chip));
+
+	if (chip == &gpio->high) {
+		base += 32;
+		chip->regbase = gpio->regs + SM501_GPIO_DATA_HIGH;
+		gchip->label  = "SM501-HIGH";
+	} else {
+		chip->regbase = gpio->regs + SM501_GPIO_DATA_LOW;
+		gchip->label  = "SM501-LOW";
+	}
+
+	gchip->base   = base;
+	chip->ourgpio = gpio;
+
+	return gpiochip_add(gchip);
+}
+
+static int sm501_register_gpio(struct sm501_devdata *sm)
+{
+	struct sm501_gpio *gpio = &sm->gpio;
+	resource_size_t iobase = sm->io_res->start + SM501_GPIO;
+	int ret;
+	int tmp;
+
+	dev_dbg(sm->dev, "registering gpio block %08llx\n",
+		(unsigned long long)iobase);
+
+	spin_lock_init(&gpio->lock);
+
+	gpio->regs_res = request_mem_region(iobase, 0x20, "sm501-gpio");
+	if (gpio->regs_res == NULL) {
+		dev_err(sm->dev, "gpio: failed to request region\n");
+		return -ENXIO;
+	}
+
+	gpio->regs = ioremap(iobase, 0x20);
+	if (gpio->regs == NULL) {
+		dev_err(sm->dev, "gpio: failed to remap registers\n");
+		ret = -ENXIO;
+		goto err_mapped;
+	}
+
+	/* Register both our chips. */
+
+	ret = sm501_gpio_register_chip(sm, gpio, &gpio->low);
+	if (ret) {
+		dev_err(sm->dev, "failed to add low chip\n");
+		goto err_mapped;
+	}
+
+	ret = sm501_gpio_register_chip(sm, gpio, &gpio->high);
+	if (ret) {
+		dev_err(sm->dev, "failed to add high chip\n");
+		goto err_low_chip;
+	}
+
+	gpio->registered = 1;
+
+	return 0;
+
+ err_low_chip:
+	tmp = gpiochip_remove(&gpio->low.gpio);
+	if (tmp) {
+		dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
+		return ret;
+	}
+
+ err_mapped:
+	release_resource(gpio->regs_res);
+	kfree(gpio->regs_res);
+
+	return ret;
+}
+
+static void sm501_gpio_remove(struct sm501_devdata *sm)
+{
+	int ret;
+
+	ret = gpiochip_remove(&sm->gpio.low.gpio);
+	if (ret)
+		dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
+
+	ret = gpiochip_remove(&sm->gpio.high.gpio);
+	if (ret)
+		dev_err(sm->dev, "cannot remove high chip, cannot tidy up\n");
+}
+
+#else
+static int sm501_register_gpio(struct sm501_devdata *sm)
+{
+	return 0;
+}
+
+static void sm501_gpio_remove(struct sm501_devdata *sm)
+{
+}
+#endif
+
 /* sm501_dbg_regs
  *
  * Debug attribute to attach to parent device to show core registers
@@ -1059,6 +1249,8 @@ static int sm501_init_dev(struct sm501_devdata *sm)
 			sm501_register_usbhost(sm, &mem_avail);
 		if (idata->devices & (SM501_USE_UART0 | SM501_USE_UART1))
 			sm501_register_uart(sm, idata->devices);
+		if (idata->devices & SM501_USE_GPIO)
+			sm501_register_gpio(sm);
 	}
 
 	ret = sm501_check_clocks(sm);
@@ -1366,6 +1558,9 @@ static void sm501_dev_remove(struct sm501_devdata *sm)
 		sm501_remove_sub(sm, smdev);
 
 	device_remove_file(sm->dev, &dev_attr_dbg_regs);
+
+	if (sm->gpio.registered)
+		sm501_gpio_remove(sm);
 }
 
 static void sm501_pci_remove(struct pci_dev *dev)
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index 145405bf9efa..6ea39007c8a3 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -46,24 +46,6 @@ extern unsigned long sm501_modify_reg(struct device *dev,
 				      unsigned long set,
 				      unsigned long clear);
 
-/* sm501_gpio_set
- *
- * set the state of the given GPIO line
-*/
-
-extern void sm501_gpio_set(struct device *dev,
-			   unsigned long gpio,
-			   unsigned int to,
-			   unsigned int dir);
-
-/* sm501_gpio_get
- *
- * get the state of the given GPIO line
-*/
-
-extern unsigned long sm501_gpio_get(struct device *dev,
-				    unsigned long gpio);
-
 
 /* Platform data definitions */
 
@@ -131,6 +113,7 @@ struct sm501_reg_init {
 #define SM501_USE_FBACCEL	(1<<6)
 #define SM501_USE_AC97		(1<<7)
 #define SM501_USE_I2S		(1<<8)
+#define SM501_USE_GPIO		(1<<9)
 
 #define SM501_USE_ALL		(0xffffffff)
 
@@ -173,6 +156,7 @@ struct sm501_platdata {
 	struct sm501_platdata_fb	*fb;
 
 	int				 flags;
+	unsigned			 gpio_base;
 
 	int	(*get_power)(struct device *dev);
 	int	(*set_power)(struct device *dev, unsigned int on);
-- 
cgit v1.2.3


From 60e540d617b40eb3d37f1dd99c97af588ff9b70b Mon Sep 17 00:00:00 2001
From: Arnaud Patard <apatard@mandriva.com>
Date: Fri, 25 Jul 2008 01:46:00 -0700
Subject: sm501: gpio dynamic registration for PCI devices

The SM501 PCI card requires a dyanmic gpio allocation as the number of
cards is not known at compile time.  Fixup the platform data and
registration to deal with this.

Acked-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Arnaud Patard <apatard@mandriva.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c   | 6 ++++--
 include/linux/sm501.h | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index be8713908125..c3e5a48f6148 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -996,12 +996,13 @@ static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
 {
 	struct sm501_platdata *pdata = sm->platdata;
 	struct gpio_chip *gchip = &chip->gpio;
-	unsigned base = pdata->gpio_base;
+	int base = pdata->gpio_base;
 
 	memcpy(chip, &gpio_chip_template, sizeof(struct gpio_chip));
 
 	if (chip == &gpio->high) {
-		base += 32;
+		if (base > 0)
+			base += 32;
 		chip->regbase = gpio->regs + SM501_GPIO_DATA_HIGH;
 		gchip->label  = "SM501-HIGH";
 	} else {
@@ -1452,6 +1453,7 @@ static struct sm501_platdata_fb sm501_fb_pdata = {
 static struct sm501_platdata sm501_pci_platdata = {
 	.init		= &sm501_pci_initdata,
 	.fb		= &sm501_fb_pdata,
+	.gpio_base	= -1,
 };
 
 static int sm501_pci_probe(struct pci_dev *dev,
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index 6ea39007c8a3..a8d02f36ad32 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -156,7 +156,7 @@ struct sm501_platdata {
 	struct sm501_platdata_fb	*fb;
 
 	int				 flags;
-	unsigned			 gpio_base;
+	int				 gpio_base;
 
 	int	(*get_power)(struct device *dev);
 	int	(*set_power)(struct device *dev, unsigned int on);
-- 
cgit v1.2.3


From 42cd2366fb9b58cdfc1855be32b31a78e40b2079 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Fri, 25 Jul 2008 01:46:01 -0700
Subject: sm501: gpio I2C support

Add support for adding the GPIO based I2C resources.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Cc: Arnaud Patard <apatard@mandriva.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c   | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/sm501.h | 10 ++++++-
 2 files changed, 84 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index c3e5a48f6148..107215b28805 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -20,6 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/pci.h>
 #include <linux/gpio.h>
+#include <linux/i2c-gpio.h>
 
 #include <linux/sm501.h>
 #include <linux/sm501-regs.h>
@@ -1086,6 +1087,11 @@ static void sm501_gpio_remove(struct sm501_devdata *sm)
 		dev_err(sm->dev, "cannot remove high chip, cannot tidy up\n");
 }
 
+static int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+{
+	struct sm501_gpio *gpio = &sm->gpio;
+	return pin + (pin < 32) ? gpio->low.gpio.base : gpio->high.gpio.base;
+}
 #else
 static int sm501_register_gpio(struct sm501_devdata *sm)
 {
@@ -1095,8 +1101,66 @@ static int sm501_register_gpio(struct sm501_devdata *sm)
 static void sm501_gpio_remove(struct sm501_devdata *sm)
 {
 }
+
+static int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+{
+	return -1;
+}
 #endif
 
+static int sm501_register_gpio_i2c_instance(struct sm501_devdata *sm,
+					    struct sm501_platdata_gpio_i2c *iic)
+{
+	struct i2c_gpio_platform_data *icd;
+	struct platform_device *pdev;
+
+	pdev = sm501_create_subdev(sm, "i2c-gpio", 0,
+				   sizeof(struct i2c_gpio_platform_data));
+	if (!pdev)
+		return -ENOMEM;
+
+	icd = pdev->dev.platform_data;
+
+	/* We keep the pin_sda and pin_scl fields relative in case the
+	 * same platform data is passed to >1 SM501.
+	 */
+
+	icd->sda_pin = sm501_gpio_pin2nr(sm, iic->pin_sda);
+	icd->scl_pin = sm501_gpio_pin2nr(sm, iic->pin_scl);
+	icd->timeout = iic->timeout;
+	icd->udelay = iic->udelay;
+
+	/* note, we can't use either of the pin numbers, as the i2c-gpio
+	 * driver uses the platform.id field to generate the bus number
+	 * to register with the i2c core; The i2c core doesn't have enough
+	 * entries to deal with anything we currently use.
+	*/
+
+	pdev->id = iic->bus_num;
+
+	dev_info(sm->dev, "registering i2c-%d: sda=%d (%d), scl=%d (%d)\n",
+		 iic->bus_num,
+		 icd->sda_pin, iic->pin_sda, icd->scl_pin, iic->pin_scl);
+
+	return sm501_register_device(sm, pdev);
+}
+
+static int sm501_register_gpio_i2c(struct sm501_devdata *sm,
+				   struct sm501_platdata *pdata)
+{
+	struct sm501_platdata_gpio_i2c *iic = pdata->gpio_i2c;
+	int index;
+	int ret;
+
+	for (index = 0; index < pdata->gpio_i2c_nr; index++, iic++) {
+		ret = sm501_register_gpio_i2c_instance(sm, iic);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
 /* sm501_dbg_regs
  *
  * Debug attribute to attach to parent device to show core registers
@@ -1204,6 +1268,7 @@ static unsigned int sm501_mem_local[] = {
 static int sm501_init_dev(struct sm501_devdata *sm)
 {
 	struct sm501_initdata *idata;
+	struct sm501_platdata *pdata;
 	resource_size_t mem_avail;
 	unsigned long dramctrl;
 	unsigned long devid;
@@ -1242,7 +1307,9 @@ static int sm501_init_dev(struct sm501_devdata *sm)
 
 	/* check to see if we have some device initialisation */
 
-	idata = sm->platdata ? sm->platdata->init : NULL;
+	pdata = sm->platdata;
+	idata = pdata ? pdata->init : NULL;
+
 	if (idata) {
 		sm501_init_regs(sm, idata);
 
@@ -1254,6 +1321,13 @@ static int sm501_init_dev(struct sm501_devdata *sm)
 			sm501_register_gpio(sm);
 	}
 
+	if (pdata->gpio_i2c != NULL && pdata->gpio_i2c_nr > 0) {
+		if (!sm->gpio.registered)
+			dev_err(sm->dev, "no gpio registered for i2c gpio.\n");
+		else
+			sm501_register_gpio_i2c(sm, pdata);
+	}
+
 	ret = sm501_check_clocks(sm);
 	if (ret) {
 		dev_err(sm->dev, "M1X and M clocks sourced from different "
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index a8d02f36ad32..214f93209b8c 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -86,11 +86,19 @@ struct sm501_platdata_fb {
 	struct sm501_platdata_fbsub	*fb_pnl;
 };
 
-/* gpio i2c */
+/* gpio i2c
+ *
+ * Note, we have to pass in the bus number, as the number used will be
+ * passed to the i2c-gpio driver's platform_device.id, subsequently used
+ * to register the i2c bus.
+*/
 
 struct sm501_platdata_gpio_i2c {
+	unsigned int		bus_num;
 	unsigned int		pin_sda;
 	unsigned int		pin_scl;
+	int			udelay;
+	int			timeout;
 };
 
 /* sm501_initdata
-- 
cgit v1.2.3


From ef53d9c5e4da147ecaa43c44c5e5945eb83970a2 Mon Sep 17 00:00:00 2001
From: Srinivasa D S <srinivasa@in.ibm.com>
Date: Fri, 25 Jul 2008 01:46:04 -0700
Subject: kprobes: improve kretprobe scalability with hashed locking

Currently list of kretprobe instances are stored in kretprobe object (as
used_instances,free_instances) and in kretprobe hash table.  We have one
global kretprobe lock to serialise the access to these lists.  This causes
only one kretprobe handler to execute at a time.  Hence affects system
performance, particularly on SMP systems and when return probe is set on
lot of functions (like on all systemcalls).

Solution proposed here gives fine-grain locks that performs better on SMP
system compared to present kretprobe implementation.

Solution:

 1) Instead of having one global lock to protect kretprobe instances
    present in kretprobe object and kretprobe hash table.  We will have
    two locks, one lock for protecting kretprobe hash table and another
    lock for kretporbe object.

 2) We hold lock present in kretprobe object while we modify kretprobe
    instance in kretprobe object and we hold per-hash-list lock while
    modifying kretprobe instances present in that hash list.  To prevent
    deadlock, we never grab a per-hash-list lock while holding a kretprobe
    lock.

 3) We can remove used_instances from struct kretprobe, as we can
    track used instances of kretprobe instances using kretprobe hash
    table.

Time duration for kernel compilation ("make -j 8") on a 8-way ppc64 system
with return probes set on all systemcalls looks like this.

cacheline              non-cacheline             Un-patched kernel
aligned patch 	       aligned patch
===============================================================================
real    9m46.784s       9m54.412s                  10m2.450s
user    40m5.715s       40m7.142s                  40m4.273s
sys     2m57.754s       2m58.583s                  3m17.430s
===========================================================

Time duration for kernel compilation ("make -j 8) on the same system, when
kernel is not probed.
=========================
real    9m26.389s
user    40m8.775s
sys     2m7.283s
=========================

Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com>
Signed-off-by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/kprobes.c     |   6 +-
 arch/ia64/kernel/kprobes.c    |   6 +-
 arch/powerpc/kernel/kprobes.c |   6 +-
 arch/s390/kernel/kprobes.c    |   6 +-
 arch/sparc64/kernel/kprobes.c |  11 ++--
 arch/x86/kernel/kprobes.c     |   6 +-
 include/linux/kprobes.h       |   7 ++-
 kernel/kprobes.c              | 127 +++++++++++++++++++++++++++++-------------
 8 files changed, 108 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 5ee39e10c8d1..d28513f14d05 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -296,8 +296,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -337,7 +336,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	}
 
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
@@ -347,7 +346,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	return (void *)orig_ret_address;
 }
 
-/* Called with kretprobe_lock held. */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 233434f4f88f..f07688da947c 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -429,8 +429,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 		((struct fnptr *)kretprobe_trampoline)->ip;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -485,7 +484,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
@@ -500,7 +499,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	return 1;
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 4ba2af125450..de79915452c8 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -144,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 	kcb->kprobe_saved_msr = regs->msr;
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
@@ -312,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -352,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	regs->nip = orig_ret_address;
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 288ad490a6dd..4f82e5b5f879 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -270,7 +270,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 	__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 					struct pt_regs *regs)
 {
@@ -377,8 +376,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -417,7 +415,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index f43b5d755354..201a6e547e4a 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -478,9 +478,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
-/* Called with kretprobe_lock held.  The value stored in the return
- * address register is actually 2 instructions before where the
- * callee will return to.  Sequences usually look something like this
+/* The value stored in the return address register is actually 2
+ * instructions before where the callee will return to.
+ * Sequences usually look something like this
  *
  *		call	some_function	<--- return register points here
  *		 nop			<--- call delay slot
@@ -512,8 +512,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 
 	/*
 	 * It is possible to have multiple instances associated with a given
@@ -553,7 +552,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	regs->tnpc = orig_ret_address + 4;
 
 	reset_current_kprobe();
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 43c019f85f0d..6c27679ec6aa 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 		regs->ip = (unsigned long)p->ainsn.insn;
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
@@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
 	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(current);
+	kretprobe_hash_lock(current, &head, &flags);
 	/* fixup registers */
 #ifdef CONFIG_X86_64
 	regs->cs = __KERNEL_CS;
@@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
 
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	kretprobe_hash_unlock(current, &flags);
 
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 04a3556bdea6..0be7795655fa 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -157,11 +157,10 @@ struct kretprobe {
 	int nmissed;
 	size_t data_size;
 	struct hlist_head free_instances;
-	struct hlist_head used_instances;
+	spinlock_t lock;
 };
 
 struct kretprobe_instance {
-	struct hlist_node uflist; /* either on free list or used list */
 	struct hlist_node hlist;
 	struct kretprobe *rp;
 	kprobe_opcode_t *ret_addr;
@@ -201,7 +200,6 @@ static inline int init_test_probes(void)
 }
 #endif /* CONFIG_KPROBES_SANITY_TEST */
 
-extern spinlock_t kretprobe_lock;
 extern struct mutex kprobe_mutex;
 extern int arch_prepare_kprobe(struct kprobe *p);
 extern void arch_arm_kprobe(struct kprobe *p);
@@ -214,6 +212,9 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p);
 
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
 struct kprobe *get_kprobe(void *addr);
+void kretprobe_hash_lock(struct task_struct *tsk,
+			 struct hlist_head **head, unsigned long *flags);
+void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags);
 struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk);
 
 /* kprobe_running() will just return the current_kprobe on this CPU */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1485ca8d0e00..cb0b3bde3617 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -62,6 +62,7 @@
 	addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
 #endif
 
+static int kprobes_initialized;
 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 
@@ -69,8 +70,15 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 static bool kprobe_enabled;
 
 DEFINE_MUTEX(kprobe_mutex);		/* Protects kprobe_table */
-DEFINE_SPINLOCK(kretprobe_lock);	/* Protects kretprobe_inst_table */
 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+static struct {
+	spinlock_t lock ____cacheline_aligned;
+} kretprobe_table_locks[KPROBE_TABLE_SIZE];
+
+static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
+{
+	return &(kretprobe_table_locks[hash].lock);
+}
 
 /*
  * Normally, functions that we'd want to prohibit kprobes in, are marked
@@ -368,26 +376,53 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
 	return;
 }
 
-/* Called with kretprobe_lock held */
 void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
 				struct hlist_head *head)
 {
+	struct kretprobe *rp = ri->rp;
+
 	/* remove rp inst off the rprobe_inst_table */
 	hlist_del(&ri->hlist);
-	if (ri->rp) {
-		/* remove rp inst off the used list */
-		hlist_del(&ri->uflist);
-		/* put rp inst back onto the free list */
-		INIT_HLIST_NODE(&ri->uflist);
-		hlist_add_head(&ri->uflist, &ri->rp->free_instances);
+	INIT_HLIST_NODE(&ri->hlist);
+	if (likely(rp)) {
+		spin_lock(&rp->lock);
+		hlist_add_head(&ri->hlist, &rp->free_instances);
+		spin_unlock(&rp->lock);
 	} else
 		/* Unregistering */
 		hlist_add_head(&ri->hlist, head);
 }
 
-struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
+void kretprobe_hash_lock(struct task_struct *tsk,
+			 struct hlist_head **head, unsigned long *flags)
+{
+	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+	spinlock_t *hlist_lock;
+
+	*head = &kretprobe_inst_table[hash];
+	hlist_lock = kretprobe_table_lock_ptr(hash);
+	spin_lock_irqsave(hlist_lock, *flags);
+}
+
+void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
 {
-	return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
+	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+	spin_lock_irqsave(hlist_lock, *flags);
+}
+
+void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
+{
+	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+	spinlock_t *hlist_lock;
+
+	hlist_lock = kretprobe_table_lock_ptr(hash);
+	spin_unlock_irqrestore(hlist_lock, *flags);
+}
+
+void kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
+{
+	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+	spin_unlock_irqrestore(hlist_lock, *flags);
 }
 
 /*
@@ -401,17 +436,21 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
 	struct kretprobe_instance *ri;
 	struct hlist_head *head, empty_rp;
 	struct hlist_node *node, *tmp;
-	unsigned long flags = 0;
+	unsigned long hash, flags = 0;
 
-	INIT_HLIST_HEAD(&empty_rp);
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	head = kretprobe_inst_table_head(tk);
+	if (unlikely(!kprobes_initialized))
+		/* Early boot.  kretprobe_table_locks not yet initialized. */
+		return;
+
+	hash = hash_ptr(tk, KPROBE_HASH_BITS);
+	head = &kretprobe_inst_table[hash];
+	kretprobe_table_lock(hash, &flags);
 	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
 		if (ri->task == tk)
 			recycle_rp_inst(ri, &empty_rp);
 	}
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
-
+	kretprobe_table_unlock(hash, &flags);
+	INIT_HLIST_HEAD(&empty_rp);
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
@@ -423,24 +462,29 @@ static inline void free_rp_inst(struct kretprobe *rp)
 	struct kretprobe_instance *ri;
 	struct hlist_node *pos, *next;
 
-	hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, uflist) {
-		hlist_del(&ri->uflist);
+	hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) {
+		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
 }
 
 static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
 {
-	unsigned long flags;
+	unsigned long flags, hash;
 	struct kretprobe_instance *ri;
 	struct hlist_node *pos, *next;
+	struct hlist_head *head;
+
 	/* No race here */
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) {
-		ri->rp = NULL;
-		hlist_del(&ri->uflist);
+	for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
+		kretprobe_table_lock(hash, &flags);
+		head = &kretprobe_inst_table[hash];
+		hlist_for_each_entry_safe(ri, pos, next, head, hlist) {
+			if (ri->rp == rp)
+				ri->rp = NULL;
+		}
+		kretprobe_table_unlock(hash, &flags);
 	}
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
 	free_rp_inst(rp);
 }
 
@@ -831,32 +875,37 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
 					   struct pt_regs *regs)
 {
 	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
-	unsigned long flags = 0;
+	unsigned long hash, flags = 0;
+	struct kretprobe_instance *ri;
 
 	/*TODO: consider to only swap the RA after the last pre_handler fired */
-	spin_lock_irqsave(&kretprobe_lock, flags);
+	hash = hash_ptr(current, KPROBE_HASH_BITS);
+	spin_lock_irqsave(&rp->lock, flags);
 	if (!hlist_empty(&rp->free_instances)) {
-		struct kretprobe_instance *ri;
-
 		ri = hlist_entry(rp->free_instances.first,
-				 struct kretprobe_instance, uflist);
+				struct kretprobe_instance, hlist);
+		hlist_del(&ri->hlist);
+		spin_unlock_irqrestore(&rp->lock, flags);
+
 		ri->rp = rp;
 		ri->task = current;
 
 		if (rp->entry_handler && rp->entry_handler(ri, regs)) {
-			spin_unlock_irqrestore(&kretprobe_lock, flags);
+			spin_unlock_irqrestore(&rp->lock, flags);
 			return 0;
 		}
 
 		arch_prepare_kretprobe(ri, regs);
 
 		/* XXX(hch): why is there no hlist_move_head? */
-		hlist_del(&ri->uflist);
-		hlist_add_head(&ri->uflist, &ri->rp->used_instances);
-		hlist_add_head(&ri->hlist, kretprobe_inst_table_head(ri->task));
-	} else
+		INIT_HLIST_NODE(&ri->hlist);
+		kretprobe_table_lock(hash, &flags);
+		hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
+		kretprobe_table_unlock(hash, &flags);
+	} else {
 		rp->nmissed++;
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
+		spin_unlock_irqrestore(&rp->lock, flags);
+	}
 	return 0;
 }
 
@@ -892,7 +941,7 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
 		rp->maxactive = NR_CPUS;
 #endif
 	}
-	INIT_HLIST_HEAD(&rp->used_instances);
+	spin_lock_init(&rp->lock);
 	INIT_HLIST_HEAD(&rp->free_instances);
 	for (i = 0; i < rp->maxactive; i++) {
 		inst = kmalloc(sizeof(struct kretprobe_instance) +
@@ -901,8 +950,8 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
 			free_rp_inst(rp);
 			return -ENOMEM;
 		}
-		INIT_HLIST_NODE(&inst->uflist);
-		hlist_add_head(&inst->uflist, &rp->free_instances);
+		INIT_HLIST_NODE(&inst->hlist);
+		hlist_add_head(&inst->hlist, &rp->free_instances);
 	}
 
 	rp->nmissed = 0;
@@ -1009,6 +1058,7 @@ static int __init init_kprobes(void)
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		INIT_HLIST_HEAD(&kprobe_table[i]);
 		INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
+		spin_lock_init(&(kretprobe_table_locks[i].lock));
 	}
 
 	/*
@@ -1050,6 +1100,7 @@ static int __init init_kprobes(void)
 	err = arch_init_kprobes();
 	if (!err)
 		err = register_die_notifier(&kprobe_exceptions_nb);
+	kprobes_initialized = (err == 0);
 
 	if (!err)
 		init_test_probes();
-- 
cgit v1.2.3


From d8f388d8dc8d4f36539dd37c1fff62cc404ea0fc Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Fri, 25 Jul 2008 01:46:07 -0700
Subject: gpio: sysfs interface

This adds a simple sysfs interface for GPIOs.

    /sys/class/gpio
    	/export ... asks the kernel to export a GPIO to userspace
    	/unexport ... to return a GPIO to the kernel
        /gpioN ... for each exported GPIO #N
	    /value ... always readable, writes fail for input GPIOs
	    /direction ... r/w as: in, out (default low); write high, low
	/gpiochipN ... for each gpiochip; #N is its first GPIO
	    /base ... (r/o) same as N
	    /label ... (r/o) descriptive, not necessarily unique
	    /ngpio ... (r/o) number of GPIOs; numbered N .. N+(ngpio - 1)

GPIOs claimed by kernel code may be exported by its owner using a new
gpio_export() call, which should be most useful for driver debugging.
Such exports may optionally be done without a "direction" attribute.

Userspace may ask to take over a GPIO by writing to a sysfs control file,
helping to cope with incomplete board support or other "one-off"
requirements that don't merit full kernel support:

  echo 23 > /sys/class/gpio/export
	... will gpio_request(23, "sysfs") and gpio_export(23);
	use /sys/class/gpio/gpio-23/direction to (re)configure it,
	when that GPIO can be used as both input and output.
  echo 23 > /sys/class/gpio/unexport
	... will gpio_free(23), when it was exported as above

The extra D-space footprint is a few hundred bytes, except for the sysfs
resources associated with each exported GPIO.  The additional I-space
footprint is about two thirds of the current size of gpiolib (!).  Since
no /dev node creation is involved, no "udev" support is needed.

Related changes:

  * This adds a device pointer to "struct gpio_chip".  When GPIO
    providers initialize that, sysfs gpio class devices become children of
    that device instead of being "virtual" devices.

  * The (few) gpio_chip providers which have such a device node have
    been updated.

  * Some gpio_chip drivers also needed to update their module "owner"
    field ...  for which missing kerneldoc was added.

  * Some gpio_chips don't support input GPIOs.  Those GPIOs are now
    flagged appropriately when the chip is registered.

Based on previous patches, and discussion both on and off LKML.

A Documentation/ABI/testing/sysfs-gpio update is ready to submit once this
merges to mainline.

[akpm@linux-foundation.org: a few maintenance build fixes]
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Guennadi Liakhovetski <g.liakhovetski@pengutronix.de>
Cc: Greg KH <greg@kroah.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/gpio.txt       | 123 +++++++++-
 arch/arm/plat-omap/gpio.c    |   3 +
 arch/avr32/mach-at32ap/pio.c |   2 +
 drivers/gpio/Kconfig         |  15 ++
 drivers/gpio/gpiolib.c       | 536 +++++++++++++++++++++++++++++++++++++++++--
 drivers/gpio/mcp23s08.c      |   1 +
 drivers/gpio/pca953x.c       |   1 +
 drivers/gpio/pcf857x.c       |   1 +
 drivers/i2c/chips/tps65010.c |   2 +
 drivers/mfd/htc-egpio.c      |   2 +
 include/asm-generic/gpio.h   |  33 ++-
 include/linux/gpio.h         |  13 ++
 12 files changed, 712 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index c35ca9e40d4c..8b69811a9642 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -347,15 +347,12 @@ necessarily be nonportable.
 Dynamic definition of GPIOs is not currently standard; for example, as
 a side effect of configuring an add-on board with some GPIO expanders.
 
-These calls are purely for kernel space, but a userspace API could be built
-on top of them.
-
 
 GPIO implementor's framework (OPTIONAL)
 =======================================
 As noted earlier, there is an optional implementation framework making it
 easier for platforms to support different kinds of GPIO controller using
-the same programming interface.
+the same programming interface.  This framework is called "gpiolib".
 
 As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file
 will be found there.  That will list all the controllers registered through
@@ -439,4 +436,120 @@ becomes available.  That may mean the device should not be registered until
 calls for that GPIO can work.  One way to address such dependencies is for
 such gpio_chip controllers to provide setup() and teardown() callbacks to
 board specific code; those board specific callbacks would register devices
-once all the necessary resources are available.
+once all the necessary resources are available, and remove them later when
+the GPIO controller device becomes unavailable.
+
+
+Sysfs Interface for Userspace (OPTIONAL)
+========================================
+Platforms which use the "gpiolib" implementors framework may choose to
+configure a sysfs user interface to GPIOs.  This is different from the
+debugfs interface, since it provides control over GPIO direction and
+value instead of just showing a gpio state summary.  Plus, it could be
+present on production systems without debugging support.
+
+Given approprate hardware documentation for the system, userspace could
+know for example that GPIO #23 controls the write protect line used to
+protect boot loader segments in flash memory.  System upgrade procedures
+may need to temporarily remove that protection, first importing a GPIO,
+then changing its output state, then updating the code before re-enabling
+the write protection.  In normal use, GPIO #23 would never be touched,
+and the kernel would have no need to know about it.
+
+Again depending on appropriate hardware documentation, on some systems
+userspace GPIO can be used to determine system configuration data that
+standard kernels won't know about.  And for some tasks, simple userspace
+GPIO drivers could be all that the system really needs.
+
+Note that standard kernel drivers exist for common "LEDs and Buttons"
+GPIO tasks:  "leds-gpio" and "gpio_keys", respectively.  Use those
+instead of talking directly to the GPIOs; they integrate with kernel
+frameworks better than your userspace code could.
+
+
+Paths in Sysfs
+--------------
+There are three kinds of entry in /sys/class/gpio:
+
+   -	Control interfaces used to get userspace control over GPIOs;
+
+   -	GPIOs themselves; and
+
+   -	GPIO controllers ("gpio_chip" instances).
+
+That's in addition to standard files including the "device" symlink.
+
+The control interfaces are write-only:
+
+    /sys/class/gpio/
+
+    	"export" ... Userspace may ask the kernel to export control of
+		a GPIO to userspace by writing its number to this file.
+
+		Example:  "echo 19 > export" will create a "gpio19" node
+		for GPIO #19, if that's not requested by kernel code.
+
+    	"unexport" ... Reverses the effect of exporting to userspace.
+
+		Example:  "echo 19 > unexport" will remove a "gpio19"
+		node exported using the "export" file.
+
+GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42)
+and have the following read/write attributes:
+
+    /sys/class/gpio/gpioN/
+
+	"direction" ... reads as either "in" or "out".  This value may
+		normally be written.  Writing as "out" defaults to
+		initializing the value as low.  To ensure glitch free
+		operation, values "low" and "high" may be written to
+		configure the GPIO as an output with that initial value.
+
+		Note that this attribute *will not exist* if the kernel
+		doesn't support changing the direction of a GPIO, or
+		it was exported by kernel code that didn't explicitly
+		allow userspace to reconfigure this GPIO's direction.
+
+	"value" ... reads as either 0 (low) or 1 (high).  If the GPIO
+		is configured as an output, this value may be written;
+		any nonzero value is treated as high.
+
+GPIO controllers have paths like /sys/class/gpio/chipchip42/ (for the
+controller implementing GPIOs starting at #42) and have the following
+read-only attributes:
+
+    /sys/class/gpio/gpiochipN/
+
+    	"base" ... same as N, the first GPIO managed by this chip
+
+    	"label" ... provided for diagnostics (not always unique)
+
+    	"ngpio" ... how many GPIOs this manges (N to N + ngpio - 1)
+
+Board documentation should in most cases cover what GPIOs are used for
+what purposes.  However, those numbers are not always stable; GPIOs on
+a daughtercard might be different depending on the base board being used,
+or other cards in the stack.  In such cases, you may need to use the
+gpiochip nodes (possibly in conjunction with schematics) to determine
+the correct GPIO number to use for a given signal.
+
+
+Exporting from Kernel code
+--------------------------
+Kernel code can explicitly manage exports of GPIOs which have already been
+requested using gpio_request():
+
+	/* export the GPIO to userspace */
+	int gpio_export(unsigned gpio, bool direction_may_change);
+
+	/* reverse gpio_export() */
+	void gpio_unexport();
+
+After a kernel driver requests a GPIO, it may only be made available in
+the sysfs interface by gpio_export().  The driver can control whether the
+signal direction may change.  This helps drivers prevent userspace code
+from accidentally clobbering important system state.
+
+This explicit exporting can help with debugging (by making some kinds
+of experiments easier), or can provide an always-there interface that's
+suitable for documenting as part of a board support package.
diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index 1903a3491ee9..d8e9c2c3f0f6 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -1488,6 +1488,9 @@ static int __init _omap_gpio_init(void)
 		bank->chip.set = gpio_set;
 		if (bank_is_mpuio(bank)) {
 			bank->chip.label = "mpuio";
+#ifdef CONFIG_ARCH_OMAP1
+			bank->chip.dev = &omap_mpuio_device.dev;
+#endif
 			bank->chip.base = OMAP_MPUIO(0);
 		} else {
 			bank->chip.label = "gpio";
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
index 60da03ba7117..296294f8ed81 100644
--- a/arch/avr32/mach-at32ap/pio.c
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -360,6 +360,8 @@ static int __init pio_probe(struct platform_device *pdev)
 	pio->chip.label = pio->name;
 	pio->chip.base = pdev->id * 32;
 	pio->chip.ngpio = 32;
+	pio->chip.dev = &pdev->dev;
+	pio->chip.owner = THIS_MODULE;
 
 	pio->chip.direction_input = direction_input;
 	pio->chip.get = gpio_get;
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index fced1909cbba..6ec0e35b98e3 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -23,6 +23,21 @@ config DEBUG_GPIO
 	  slower.  The diagnostics help catch the type of setup errors
 	  that are most common when setting up new platforms or boards.
 
+config GPIO_SYSFS
+	bool "/sys/class/gpio/... (sysfs interface)"
+	depends on SYSFS && EXPERIMENTAL
+	help
+	  Say Y here to add a sysfs interface for GPIOs.
+
+	  This is mostly useful to work around omissions in a system's
+	  kernel support.  Those are common in custom and semicustom
+	  hardware assembled using standard kernels with a minimum of
+	  custom patches.  In those cases, userspace code may import
+	  a given GPIO from the kernel, if no kernel driver requested it.
+
+	  Kernel drivers may also request that a particular GPIO be
+	  exported to userspace; this can be useful when debugging.
+
 # put expanders in the right section, in alphabetical order
 
 comment "I2C GPIO expanders:"
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index beaf6b3a37dc..8d2940517c99 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2,8 +2,11 @@
 #include <linux/module.h>
 #include <linux/irq.h>
 #include <linux/spinlock.h>
-
-#include <asm/gpio.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/gpio.h>
 
 
 /* Optional implementation infrastructure for GPIO interfaces.
@@ -44,6 +47,8 @@ struct gpio_desc {
 #define FLAG_REQUESTED	0
 #define FLAG_IS_OUT	1
 #define FLAG_RESERVED	2
+#define FLAG_EXPORT	3	/* protected by sysfs_lock */
+#define FLAG_SYSFS	4	/* exported via /sys/class/gpio/control */
 
 #ifdef CONFIG_DEBUG_FS
 	const char		*label;
@@ -151,6 +156,482 @@ err:
 	return ret;
 }
 
+#ifdef CONFIG_GPIO_SYSFS
+
+/* lock protects against unexport_gpio() being called while
+ * sysfs files are active.
+ */
+static DEFINE_MUTEX(sysfs_lock);
+
+/*
+ * /sys/class/gpio/gpioN... only for GPIOs that are exported
+ *   /direction
+ *      * MAY BE OMITTED if kernel won't allow direction changes
+ *      * is read/write as "in" or "out"
+ *      * may also be written as "high" or "low", initializing
+ *        output value as specified ("out" implies "low")
+ *   /value
+ *      * always readable, subject to hardware behavior
+ *      * may be writable, as zero/nonzero
+ *
+ * REVISIT there will likely be an attribute for configuring async
+ * notifications, e.g. to specify polling interval or IRQ trigger type
+ * that would for example trigger a poll() on the "value".
+ */
+
+static ssize_t gpio_direction_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else
+		status = sprintf(buf, "%s\n",
+			test_bit(FLAG_IS_OUT, &desc->flags)
+				? "out" : "in");
+
+	mutex_unlock(&sysfs_lock);
+	return status;
+}
+
+static ssize_t gpio_direction_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	unsigned		gpio = desc - gpio_desc;
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else if (sysfs_streq(buf, "high"))
+		status = gpio_direction_output(gpio, 1);
+	else if (sysfs_streq(buf, "out") || sysfs_streq(buf, "low"))
+		status = gpio_direction_output(gpio, 0);
+	else if (sysfs_streq(buf, "in"))
+		status = gpio_direction_input(gpio);
+	else
+		status = -EINVAL;
+
+	mutex_unlock(&sysfs_lock);
+	return status ? : size;
+}
+
+static const DEVICE_ATTR(direction, 0644,
+		gpio_direction_show, gpio_direction_store);
+
+static ssize_t gpio_value_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	unsigned		gpio = desc - gpio_desc;
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else
+		status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio));
+
+	mutex_unlock(&sysfs_lock);
+	return status;
+}
+
+static ssize_t gpio_value_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	unsigned		gpio = desc - gpio_desc;
+	ssize_t			status;
+
+	mutex_lock(&sysfs_lock);
+
+	if (!test_bit(FLAG_EXPORT, &desc->flags))
+		status = -EIO;
+	else if (!test_bit(FLAG_IS_OUT, &desc->flags))
+		status = -EPERM;
+	else {
+		long		value;
+
+		status = strict_strtol(buf, 0, &value);
+		if (status == 0) {
+			gpio_set_value_cansleep(gpio, value != 0);
+			status = size;
+		}
+	}
+
+	mutex_unlock(&sysfs_lock);
+	return status;
+}
+
+static /*const*/ DEVICE_ATTR(value, 0644,
+		gpio_value_show, gpio_value_store);
+
+static const struct attribute *gpio_attrs[] = {
+	&dev_attr_direction.attr,
+	&dev_attr_value.attr,
+	NULL,
+};
+
+static const struct attribute_group gpio_attr_group = {
+	.attrs = (struct attribute **) gpio_attrs,
+};
+
+/*
+ * /sys/class/gpio/gpiochipN/
+ *   /base ... matching gpio_chip.base (N)
+ *   /label ... matching gpio_chip.label
+ *   /ngpio ... matching gpio_chip.ngpio
+ */
+
+static ssize_t chip_base_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	const struct gpio_chip	*chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->base);
+}
+static DEVICE_ATTR(base, 0444, chip_base_show, NULL);
+
+static ssize_t chip_label_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	const struct gpio_chip	*chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", chip->label ? : "");
+}
+static DEVICE_ATTR(label, 0444, chip_label_show, NULL);
+
+static ssize_t chip_ngpio_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	const struct gpio_chip	*chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", chip->ngpio);
+}
+static DEVICE_ATTR(ngpio, 0444, chip_ngpio_show, NULL);
+
+static const struct attribute *gpiochip_attrs[] = {
+	&dev_attr_base.attr,
+	&dev_attr_label.attr,
+	&dev_attr_ngpio.attr,
+	NULL,
+};
+
+static const struct attribute_group gpiochip_attr_group = {
+	.attrs = (struct attribute **) gpiochip_attrs,
+};
+
+/*
+ * /sys/class/gpio/export ... write-only
+ *	integer N ... number of GPIO to export (full access)
+ * /sys/class/gpio/unexport ... write-only
+ *	integer N ... number of GPIO to unexport
+ */
+static ssize_t export_store(struct class *class, const char *buf, size_t len)
+{
+	long	gpio;
+	int	status;
+
+	status = strict_strtol(buf, 0, &gpio);
+	if (status < 0)
+		goto done;
+
+	/* No extra locking here; FLAG_SYSFS just signifies that the
+	 * request and export were done by on behalf of userspace, so
+	 * they may be undone on its behalf too.
+	 */
+
+	status = gpio_request(gpio, "sysfs");
+	if (status < 0)
+		goto done;
+
+	status = gpio_export(gpio, true);
+	if (status < 0)
+		gpio_free(gpio);
+	else
+		set_bit(FLAG_SYSFS, &gpio_desc[gpio].flags);
+
+done:
+	if (status)
+		pr_debug("%s: status %d\n", __func__, status);
+	return status ? : len;
+}
+
+static ssize_t unexport_store(struct class *class, const char *buf, size_t len)
+{
+	long	gpio;
+	int	status;
+
+	status = strict_strtol(buf, 0, &gpio);
+	if (status < 0)
+		goto done;
+
+	status = -EINVAL;
+
+	/* reject bogus commands (gpio_unexport ignores them) */
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	/* No extra locking here; FLAG_SYSFS just signifies that the
+	 * request and export were done by on behalf of userspace, so
+	 * they may be undone on its behalf too.
+	 */
+	if (test_and_clear_bit(FLAG_SYSFS, &gpio_desc[gpio].flags)) {
+		status = 0;
+		gpio_free(gpio);
+	}
+done:
+	if (status)
+		pr_debug("%s: status %d\n", __func__, status);
+	return status ? : len;
+}
+
+static struct class_attribute gpio_class_attrs[] = {
+	__ATTR(export, 0200, NULL, export_store),
+	__ATTR(unexport, 0200, NULL, unexport_store),
+	__ATTR_NULL,
+};
+
+static struct class gpio_class = {
+	.name =		"gpio",
+	.owner =	THIS_MODULE,
+
+	.class_attrs =	gpio_class_attrs,
+};
+
+
+/**
+ * gpio_export - export a GPIO through sysfs
+ * @gpio: gpio to make available, already requested
+ * @direction_may_change: true if userspace may change gpio direction
+ * Context: arch_initcall or later
+ *
+ * When drivers want to make a GPIO accessible to userspace after they
+ * have requested it -- perhaps while debugging, or as part of their
+ * public interface -- they may use this routine.  If the GPIO can
+ * change direction (some can't) and the caller allows it, userspace
+ * will see "direction" sysfs attribute which may be used to change
+ * the gpio's direction.  A "value" attribute will always be provided.
+ *
+ * Returns zero on success, else an error.
+ */
+int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	unsigned long		flags;
+	struct gpio_desc	*desc;
+	int			status = -EINVAL;
+
+	/* can't export until sysfs is available ... */
+	if (!gpio_class.p) {
+		pr_debug("%s: called too early!\n", __func__);
+		return -ENOENT;
+	}
+
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	mutex_lock(&sysfs_lock);
+
+	spin_lock_irqsave(&gpio_lock, flags);
+	desc = &gpio_desc[gpio];
+	if (test_bit(FLAG_REQUESTED, &desc->flags)
+			&& !test_bit(FLAG_EXPORT, &desc->flags)) {
+		status = 0;
+		if (!desc->chip->direction_input
+				|| !desc->chip->direction_output)
+			direction_may_change = false;
+	}
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	if (status == 0) {
+		struct device	*dev;
+
+		dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0),
+					desc, "gpio%d", gpio);
+		if (dev) {
+			if (direction_may_change)
+				status = sysfs_create_group(&dev->kobj,
+						&gpio_attr_group);
+			else
+				status = device_create_file(dev,
+						&dev_attr_value);
+			if (status != 0)
+				device_unregister(dev);
+		} else
+			status = -ENODEV;
+		if (status == 0)
+			set_bit(FLAG_EXPORT, &desc->flags);
+	}
+
+	mutex_unlock(&sysfs_lock);
+
+done:
+	if (status)
+		pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(gpio_export);
+
+static int match_export(struct device *dev, void *data)
+{
+	return dev_get_drvdata(dev) == data;
+}
+
+/**
+ * gpio_unexport - reverse effect of gpio_export()
+ * @gpio: gpio to make unavailable
+ *
+ * This is implicit on gpio_free().
+ */
+void gpio_unexport(unsigned gpio)
+{
+	struct gpio_desc	*desc;
+	int			status = -EINVAL;
+
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	mutex_lock(&sysfs_lock);
+
+	desc = &gpio_desc[gpio];
+	if (test_bit(FLAG_EXPORT, &desc->flags)) {
+		struct device	*dev = NULL;
+
+		dev = class_find_device(&gpio_class, NULL, desc, match_export);
+		if (dev) {
+			clear_bit(FLAG_EXPORT, &desc->flags);
+			put_device(dev);
+			device_unregister(dev);
+			status = 0;
+		} else
+			status = -ENODEV;
+	}
+
+	mutex_unlock(&sysfs_lock);
+done:
+	if (status)
+		pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+}
+EXPORT_SYMBOL_GPL(gpio_unexport);
+
+static int gpiochip_export(struct gpio_chip *chip)
+{
+	int		status;
+	struct device	*dev;
+
+	/* Many systems register gpio chips for SOC support very early,
+	 * before driver model support is available.  In those cases we
+	 * export this later, in gpiolib_sysfs_init() ... here we just
+	 * verify that _some_ field of gpio_class got initialized.
+	 */
+	if (!gpio_class.p)
+		return 0;
+
+	/* use chip->base for the ID; it's already known to be unique */
+	mutex_lock(&sysfs_lock);
+	dev = device_create(&gpio_class, chip->dev, MKDEV(0, 0), chip,
+				"gpiochip%d", chip->base);
+	if (dev) {
+		status = sysfs_create_group(&dev->kobj,
+				&gpiochip_attr_group);
+	} else
+		status = -ENODEV;
+	chip->exported = (status == 0);
+	mutex_unlock(&sysfs_lock);
+
+	if (status) {
+		unsigned long	flags;
+		unsigned	gpio;
+
+		spin_lock_irqsave(&gpio_lock, flags);
+		gpio = chip->base;
+		while (gpio_desc[gpio].chip == chip)
+			gpio_desc[gpio++].chip = NULL;
+		spin_unlock_irqrestore(&gpio_lock, flags);
+
+		pr_debug("%s: chip %s status %d\n", __func__,
+				chip->label, status);
+	}
+
+	return status;
+}
+
+static void gpiochip_unexport(struct gpio_chip *chip)
+{
+	int			status;
+	struct device		*dev;
+
+	mutex_lock(&sysfs_lock);
+	dev = class_find_device(&gpio_class, NULL, chip, match_export);
+	if (dev) {
+		put_device(dev);
+		device_unregister(dev);
+		chip->exported = 0;
+		status = 0;
+	} else
+		status = -ENODEV;
+	mutex_unlock(&sysfs_lock);
+
+	if (status)
+		pr_debug("%s: chip %s status %d\n", __func__,
+				chip->label, status);
+}
+
+static int __init gpiolib_sysfs_init(void)
+{
+	int		status;
+	unsigned long	flags;
+	unsigned	gpio;
+
+	status = class_register(&gpio_class);
+	if (status < 0)
+		return status;
+
+	/* Scan and register the gpio_chips which registered very
+	 * early (e.g. before the class_register above was called).
+	 *
+	 * We run before arch_initcall() so chip->dev nodes can have
+	 * registered, and so arch_initcall() can always gpio_export().
+	 */
+	spin_lock_irqsave(&gpio_lock, flags);
+	for (gpio = 0; gpio < ARCH_NR_GPIOS; gpio++) {
+		struct gpio_chip	*chip;
+
+		chip = gpio_desc[gpio].chip;
+		if (!chip || chip->exported)
+			continue;
+
+		spin_unlock_irqrestore(&gpio_lock, flags);
+		status = gpiochip_export(chip);
+		spin_lock_irqsave(&gpio_lock, flags);
+	}
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
+
+	return status;
+}
+postcore_initcall(gpiolib_sysfs_init);
+
+#else
+static inline int gpiochip_export(struct gpio_chip *chip)
+{
+	return 0;
+}
+
+static inline void gpiochip_unexport(struct gpio_chip *chip)
+{
+}
+
+#endif /* CONFIG_GPIO_SYSFS */
+
 /**
  * gpiochip_add() - register a gpio_chip
  * @chip: the chip to register, with chip->base initialized
@@ -160,6 +641,11 @@ err:
  * because the chip->base is invalid or already associated with a
  * different chip.  Otherwise it returns zero as a success code.
  *
+ * When gpiochip_add() is called very early during boot, so that GPIOs
+ * can be freely used, the chip->dev device must be registered before
+ * the gpio framework's arch_initcall().  Otherwise sysfs initialization
+ * for GPIOs will fail rudely.
+ *
  * If chip->base is negative, this requests dynamic assignment of
  * a range of valid GPIOs.
  */
@@ -182,7 +668,7 @@ int gpiochip_add(struct gpio_chip *chip)
 		base = gpiochip_find_base(chip->ngpio);
 		if (base < 0) {
 			status = base;
-			goto fail_unlock;
+			goto unlock;
 		}
 		chip->base = base;
 	}
@@ -197,12 +683,23 @@ int gpiochip_add(struct gpio_chip *chip)
 	if (status == 0) {
 		for (id = base; id < base + chip->ngpio; id++) {
 			gpio_desc[id].chip = chip;
-			gpio_desc[id].flags = 0;
+
+			/* REVISIT:  most hardware initializes GPIOs as
+			 * inputs (often with pullups enabled) so power
+			 * usage is minimized.  Linux code should set the
+			 * gpio direction first thing; but until it does,
+			 * we may expose the wrong direction in sysfs.
+			 */
+			gpio_desc[id].flags = !chip->direction_input
+				? (1 << FLAG_IS_OUT)
+				: 0;
 		}
 	}
 
-fail_unlock:
+unlock:
 	spin_unlock_irqrestore(&gpio_lock, flags);
+	if (status == 0)
+		status = gpiochip_export(chip);
 fail:
 	/* failures here can mean systems won't boot... */
 	if (status)
@@ -239,6 +736,10 @@ int gpiochip_remove(struct gpio_chip *chip)
 	}
 
 	spin_unlock_irqrestore(&gpio_lock, flags);
+
+	if (status == 0)
+		gpiochip_unexport(chip);
+
 	return status;
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
@@ -296,6 +797,8 @@ void gpio_free(unsigned gpio)
 		return;
 	}
 
+	gpio_unexport(gpio);
+
 	spin_lock_irqsave(&gpio_lock, flags);
 
 	desc = &gpio_desc[gpio];
@@ -534,10 +1037,6 @@ EXPORT_SYMBOL_GPL(gpio_set_value_cansleep);
 
 #ifdef CONFIG_DEBUG_FS
 
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-
 static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 {
 	unsigned		i;
@@ -614,17 +1113,28 @@ static int gpiolib_show(struct seq_file *s, void *unused)
 	/* REVISIT this isn't locked against gpio_chip removal ... */
 
 	for (gpio = 0; gpio_is_valid(gpio); gpio++) {
+		struct device *dev;
+
 		if (chip == gpio_desc[gpio].chip)
 			continue;
 		chip = gpio_desc[gpio].chip;
 		if (!chip)
 			continue;
 
-		seq_printf(s, "%sGPIOs %d-%d, %s%s:\n",
+		seq_printf(s, "%sGPIOs %d-%d",
 				started ? "\n" : "",
-				chip->base, chip->base + chip->ngpio - 1,
-				chip->label ? : "generic",
-				chip->can_sleep ? ", can sleep" : "");
+				chip->base, chip->base + chip->ngpio - 1);
+		dev = chip->dev;
+		if (dev)
+			seq_printf(s, ", %s/%s",
+				dev->bus ? dev->bus->name : "no-bus",
+				dev->bus_id);
+		if (chip->label)
+			seq_printf(s, ", %s", chip->label);
+		if (chip->can_sleep)
+			seq_printf(s, ", can sleep");
+		seq_printf(s, ":\n");
+
 		started = 1;
 		if (chip->dbg_show)
 			chip->dbg_show(s, chip);
diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
index 7f92fdd5f0e2..7efd7d3a81f9 100644
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -239,6 +239,7 @@ static int mcp23s08_probe(struct spi_device *spi)
 	mcp->chip.base = pdata->base;
 	mcp->chip.ngpio = 8;
 	mcp->chip.can_sleep = 1;
+	mcp->chip.dev = &spi->dev;
 	mcp->chip.owner = THIS_MODULE;
 
 	spi_set_drvdata(spi, mcp);
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index a380730b61ab..cc8468692ae0 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -188,6 +188,7 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios)
 	gc->base = chip->gpio_start;
 	gc->ngpio = gpios;
 	gc->label = chip->client->name;
+	gc->dev = &chip->client->dev;
 	gc->owner = THIS_MODULE;
 }
 
diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c
index d25d356c4f20..fc9c6ae739ee 100644
--- a/drivers/gpio/pcf857x.c
+++ b/drivers/gpio/pcf857x.c
@@ -200,6 +200,7 @@ static int pcf857x_probe(struct i2c_client *client,
 
 	gpio->chip.base = pdata->gpio_base;
 	gpio->chip.can_sleep = 1;
+	gpio->chip.dev = &client->dev;
 	gpio->chip.owner = THIS_MODULE;
 
 	/* NOTE:  the OnSemi jlc1562b is also largely compatible with
diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c
index 85949685191b..cf02e8fceb42 100644
--- a/drivers/i2c/chips/tps65010.c
+++ b/drivers/i2c/chips/tps65010.c
@@ -636,6 +636,8 @@ static int tps65010_probe(struct i2c_client *client,
 		tps->outmask = board->outmask;
 
 		tps->chip.label = client->name;
+		tps->chip.dev = &client->dev;
+		tps->chip.owner = THIS_MODULE;
 
 		tps->chip.set = tps65010_gpio_set;
 		tps->chip.direction_output = tps65010_output;
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index 8872cc077519..6be43172dc65 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -318,6 +318,8 @@ static int __init egpio_probe(struct platform_device *pdev)
 		ei->chip[i].dev = &(pdev->dev);
 		chip = &(ei->chip[i].chip);
 		chip->label           = "htc-egpio";
+		chip->dev             = &pdev->dev;
+		chip->owner           = THIS_MODULE;
 		chip->get             = egpio_get;
 		chip->set             = egpio_set;
 		chip->direction_input = egpio_direction_input;
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 6be061d09da9..1beff5166e53 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -32,6 +32,8 @@ struct module;
 /**
  * struct gpio_chip - abstract a GPIO controller
  * @label: for diagnostics
+ * @dev: optional device providing the GPIOs
+ * @owner: helps prevent removal of modules exporting active GPIOs
  * @direction_input: configures signal "offset" as input, or returns error
  * @get: returns value for signal "offset"; for output signals this
  *	returns either the value actually sensed, or zero
@@ -59,6 +61,7 @@ struct module;
  */
 struct gpio_chip {
 	char			*label;
+	struct device		*dev;
 	struct module		*owner;
 
 	int			(*direction_input)(struct gpio_chip *chip,
@@ -74,6 +77,7 @@ struct gpio_chip {
 	int			base;
 	u16			ngpio;
 	unsigned		can_sleep:1;
+	unsigned		exported:1;
 };
 
 extern const char *gpiochip_is_requested(struct gpio_chip *chip,
@@ -108,7 +112,18 @@ extern void __gpio_set_value(unsigned gpio, int value);
 extern int __gpio_cansleep(unsigned gpio);
 
 
-#else
+#ifdef CONFIG_GPIO_SYSFS
+
+/*
+ * A sysfs interface can be exported by individual drivers if they want,
+ * but more typically is configured entirely from userspace.
+ */
+extern int gpio_export(unsigned gpio, bool direction_may_change);
+extern void gpio_unexport(unsigned gpio);
+
+#endif	/* CONFIG_GPIO_SYSFS */
+
+#else	/* !CONFIG_HAVE_GPIO_LIB */
 
 static inline int gpio_is_valid(int number)
 {
@@ -137,6 +152,20 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
 	gpio_set_value(gpio, value);
 }
 
-#endif
+#endif /* !CONFIG_HAVE_GPIO_LIB */
+
+#ifndef CONFIG_GPIO_SYSFS
+
+/* sysfs support is only available with gpiolib, where it's optional */
+
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	return -ENOSYS;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+}
+#endif	/* CONFIG_GPIO_SYSFS */
 
 #endif /* _ASM_GENERIC_GPIO_H */
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 98be6c5762b9..730a20b83576 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -79,6 +79,19 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
 	WARN_ON(1);
 }
 
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	/* GPIO can never have been requested or set as {in,out}put */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+	/* GPIO can never have been exported */
+	WARN_ON(1);
+}
+
 static inline int gpio_to_irq(unsigned gpio)
 {
 	/* GPIO can never have been requested or set as input */
-- 
cgit v1.2.3


From 8f1cc3b10e6ee0c5c7c8ed27f8771c4f252b4862 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Fri, 25 Jul 2008 01:46:09 -0700
Subject: gpio: mcp23s08 handles multiple chips per chipselect

Teach the mcp23s08 driver about a curious feature of these chips: up to
four of them can share the same chipselect, with the SPI signals wired in
parallel, by matching two bits in the first protocol byte against two
address lines on the chip.

This is handled by three software changes:

  * Platform data now holds an array of per-chip structs, not
    just one chip's address and pullup configuration.

  * Probe() and remove() now use another level of structure,
    wrapping an instance of the original structure for each
    mcp23s08 chip sharing that chipselect.

  * The HAEN bit is set, so that the hardware address bits can no
    longer be ignored (boot firmware may not have enabled them).

The "one struct per chip" preserves the guts of the current code,
but platform_data will need minor changes.

    OLD:
	/* incorrect "slave" ID may not have mattered */
	.slave = 3,
	.pullups = BIT(3) | BIT(1) | BIT(0),

    NEW:
	/* slave address _must_ match chip's wiring */
	.chip[3] = {
		.is_present = true,
		.pullups = BIT(3) | BIT(1) | BIT(0),
	},

There's no change in how things _behave_ for spi_device nodes with a
single mcp23s08 chip.  New multi-chip configurations assign GPIOs in
sequence, without holes.  The spi_device just resembles a bigger
controller, but internally it has multiple gpio_chip instances.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/mcp23s08.c      | 133 +++++++++++++++++++++++++++++++++----------
 include/linux/spi/mcp23s08.h |  25 +++++---
 2 files changed, 118 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
index 7efd7d3a81f9..8a1b405fefda 100644
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -40,15 +40,26 @@ struct mcp23s08 {
 	struct spi_device	*spi;
 	u8			addr;
 
+	u8			cache[11];
 	/* lock protects the cached values */
 	struct mutex		lock;
-	u8			cache[11];
 
 	struct gpio_chip	chip;
 
 	struct work_struct	work;
 };
 
+/* A given spi_device can represent up to four mcp23s08 chips
+ * sharing the same chipselect but using different addresses
+ * (e.g. chips #0 and #3 might be populated, but not #1 or $2).
+ * Driver data holds all the per-chip data.
+ */
+struct mcp23s08_driver_data {
+	unsigned		ngpio;
+	struct mcp23s08		*mcp[4];
+	struct mcp23s08		chip[];
+};
+
 static int mcp23s08_read(struct mcp23s08 *mcp, unsigned reg)
 {
 	u8	tx[2], rx[1];
@@ -208,25 +219,18 @@ done:
 
 /*----------------------------------------------------------------------*/
 
-static int mcp23s08_probe(struct spi_device *spi)
+static int mcp23s08_probe_one(struct spi_device *spi, unsigned addr,
+		unsigned base, unsigned pullups)
 {
-	struct mcp23s08			*mcp;
-	struct mcp23s08_platform_data	*pdata;
+	struct mcp23s08_driver_data	*data = spi_get_drvdata(spi);
+	struct mcp23s08			*mcp = data->mcp[addr];
 	int				status;
 	int				do_update = 0;
 
-	pdata = spi->dev.platform_data;
-	if (!pdata || pdata->slave > 3 || !pdata->base)
-		return -ENODEV;
-
-	mcp = kzalloc(sizeof *mcp, GFP_KERNEL);
-	if (!mcp)
-		return -ENOMEM;
-
 	mutex_init(&mcp->lock);
 
 	mcp->spi = spi;
-	mcp->addr = 0x40 | (pdata->slave << 1);
+	mcp->addr = 0x40 | (addr << 1);
 
 	mcp->chip.label = "mcp23s08",
 
@@ -236,27 +240,28 @@ static int mcp23s08_probe(struct spi_device *spi)
 	mcp->chip.set = mcp23s08_set;
 	mcp->chip.dbg_show = mcp23s08_dbg_show;
 
-	mcp->chip.base = pdata->base;
+	mcp->chip.base = base;
 	mcp->chip.ngpio = 8;
 	mcp->chip.can_sleep = 1;
 	mcp->chip.dev = &spi->dev;
 	mcp->chip.owner = THIS_MODULE;
 
-	spi_set_drvdata(spi, mcp);
-
-	/* verify MCP_IOCON.SEQOP = 0, so sequential reads work */
+	/* verify MCP_IOCON.SEQOP = 0, so sequential reads work,
+	 * and MCP_IOCON.HAEN = 1, so we work with all chips.
+	 */
 	status = mcp23s08_read(mcp, MCP_IOCON);
 	if (status < 0)
 		goto fail;
-	if (status & IOCON_SEQOP) {
+	if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN)) {
 		status &= ~IOCON_SEQOP;
+		status |= IOCON_HAEN;
 		status = mcp23s08_write(mcp, MCP_IOCON, (u8) status);
 		if (status < 0)
 			goto fail;
 	}
 
 	/* configure ~100K pullups */
-	status = mcp23s08_write(mcp, MCP_GPPU, pdata->pullups);
+	status = mcp23s08_write(mcp, MCP_GPPU, pullups);
 	if (status < 0)
 		goto fail;
 
@@ -283,11 +288,58 @@ static int mcp23s08_probe(struct spi_device *spi)
 		tx[1] = MCP_IPOL;
 		memcpy(&tx[2], &mcp->cache[MCP_IPOL], sizeof(tx) - 2);
 		status = spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0);
-
-		/* FIXME check status... */
+		if (status < 0)
+			goto fail;
 	}
 
 	status = gpiochip_add(&mcp->chip);
+fail:
+	if (status < 0)
+		dev_dbg(&spi->dev, "can't setup chip %d, --> %d\n",
+				addr, status);
+	return status;
+}
+
+static int mcp23s08_probe(struct spi_device *spi)
+{
+	struct mcp23s08_platform_data	*pdata;
+	unsigned			addr;
+	unsigned			chips = 0;
+	struct mcp23s08_driver_data	*data;
+	int				status;
+	unsigned			base;
+
+	pdata = spi->dev.platform_data;
+	if (!pdata || !gpio_is_valid(pdata->base))
+		return -ENODEV;
+
+	for (addr = 0; addr < 4; addr++) {
+		if (!pdata->chip[addr].is_present)
+			continue;
+		chips++;
+	}
+	if (!chips)
+		return -ENODEV;
+
+	data = kzalloc(sizeof *data + chips * sizeof(struct mcp23s08),
+			GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	spi_set_drvdata(spi, data);
+
+	base = pdata->base;
+	for (addr = 0; addr < 4; addr++) {
+		if (!pdata->chip[addr].is_present)
+			continue;
+		chips--;
+		data->mcp[addr] = &data->chip[chips];
+		status = mcp23s08_probe_one(spi, addr, base,
+				pdata->chip[addr].pullups);
+		if (status < 0)
+			goto fail;
+		base += 8;
+	}
+	data->ngpio = base - pdata->base;
 
 	/* NOTE:  these chips have a relatively sane IRQ framework, with
 	 * per-signal masking and level/edge triggering.  It's not yet
@@ -295,8 +347,9 @@ static int mcp23s08_probe(struct spi_device *spi)
 	 */
 
 	if (pdata->setup) {
-		status = pdata->setup(spi, mcp->chip.base,
-				mcp->chip.ngpio, pdata->context);
+		status = pdata->setup(spi,
+				pdata->base, data->ngpio,
+				pdata->context);
 		if (status < 0)
 			dev_dbg(&spi->dev, "setup --> %d\n", status);
 	}
@@ -304,19 +357,29 @@ static int mcp23s08_probe(struct spi_device *spi)
 	return 0;
 
 fail:
-	kfree(mcp);
+	for (addr = 0; addr < 4; addr++) {
+		int tmp;
+
+		if (!data->mcp[addr])
+			continue;
+		tmp = gpiochip_remove(&data->mcp[addr]->chip);
+		if (tmp < 0)
+			dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
+	}
+	kfree(data);
 	return status;
 }
 
 static int mcp23s08_remove(struct spi_device *spi)
 {
-	struct mcp23s08			*mcp = spi_get_drvdata(spi);
+	struct mcp23s08_driver_data	*data = spi_get_drvdata(spi);
 	struct mcp23s08_platform_data	*pdata = spi->dev.platform_data;
+	unsigned			addr;
 	int				status = 0;
 
 	if (pdata->teardown) {
 		status = pdata->teardown(spi,
-				mcp->chip.base, mcp->chip.ngpio,
+				pdata->base, data->ngpio,
 				pdata->context);
 		if (status < 0) {
 			dev_err(&spi->dev, "%s --> %d\n", "teardown", status);
@@ -324,11 +387,20 @@ static int mcp23s08_remove(struct spi_device *spi)
 		}
 	}
 
-	status = gpiochip_remove(&mcp->chip);
+	for (addr = 0; addr < 4; addr++) {
+		int tmp;
+
+		if (!data->mcp[addr])
+			continue;
+
+		tmp = gpiochip_remove(&data->mcp[addr]->chip);
+		if (tmp < 0) {
+			dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
+			status = tmp;
+		}
+	}
 	if (status == 0)
-		kfree(mcp);
-	else
-		dev_err(&spi->dev, "%s --> %d\n", "remove", status);
+		kfree(data);
 	return status;
 }
 
@@ -356,4 +428,3 @@ static void __exit mcp23s08_exit(void)
 module_exit(mcp23s08_exit);
 
 MODULE_LICENSE("GPL");
-
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
index 835ddf47d45c..22ef107d7704 100644
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -1,18 +1,25 @@
 
-/* FIXME driver should be able to handle all four slaves that
- * can be hooked up to each chipselect, as well as IRQs...
- */
+/* FIXME driver should be able to handle IRQs...  */
+
+struct mcp23s08_chip_info {
+	bool	is_present;		/* true iff populated */
+	u8	pullups;		/* BIT(x) means enable pullup x */
+};
 
 struct mcp23s08_platform_data {
-	/* four slaves can share one SPI chipselect */
-	u8		slave;
+	/* Four slaves (numbered 0..3) can share one SPI chipselect, and
+	 * will provide 8..32 GPIOs using 1..4 gpio_chip instances.
+	 */
+	struct mcp23s08_chip_info	chip[4];
 
-	/* number assigned to the first GPIO */
+	/* "base" is the number of the first GPIO.  Dynamic assignment is
+	 * not currently supported, and even if there are gaps in chip
+	 * addressing the GPIO numbers are sequential .. so for example
+	 * if only slaves 0 and 3 are present, their GPIOs range from
+	 * base to base+15.
+	 */
 	unsigned	base;
 
-	/* pins with pullups */
-	u8		pullups;
-
 	void		*context;	/* param to setup/teardown */
 
 	int		(*setup)(struct spi_device *spi,
-- 
cgit v1.2.3


From bbcd6d543de335bf81e96477f46a60a8bf51039c Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.miao@marvell.com>
Date: Fri, 25 Jul 2008 01:46:14 -0700
Subject: gpio: max732x driver

This adds a driver supporting a family of I2C port expanders from Maxim,
which includes the MAX7319 and MAX7320-7327 chips.

[dbrownell@users.sourceforge.net: minor fixes]
Signed-off-by: Jack Ren <jack.ren@marvell.com>
Signed-off-by: Eric Miao <eric.miao@marvell.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |  19 +++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/max732x.c      | 385 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/max732x.h |  19 +++
 4 files changed, 424 insertions(+)
 create mode 100644 drivers/gpio/max732x.c
 create mode 100644 include/linux/i2c/max732x.h

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 5a355f829167..dbd42d6c93a7 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -67,6 +67,25 @@ config GPIO_SYSFS
 
 comment "I2C GPIO expanders:"
 
+config GPIO_MAX732X
+	tristate "MAX7319, MAX7320-7327 I2C Port Expanders"
+	depends on I2C
+	help
+	  Say yes here to support the MAX7319, MAX7320-7327 series of I2C
+	  Port Expanders. Each IO port on these chips has a fixed role of
+	  Input (designated by 'I'), Push-Pull Output ('O'), or Open-Drain
+	  Input and Output (designed by 'P'). The combinations are listed
+	  below:
+
+	  8 bits:	max7319 (8I), max7320 (8O), max7321 (8P),
+		  	max7322 (4I4O), max7323 (4P4O)
+
+	  16 bits:	max7324 (8I8O), max7325 (8P8O),
+		  	max7326 (4I12O), max7327 (4P12O)
+
+	  Board setup code must specify the model to use, and the start
+	  number for these GPIOs.
+
 config GPIO_PCA953X
 	tristate "PCA953x, PCA955x, and MAX7310 I/O ports"
 	depends on I2C
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 8c45948d1fe7..01b4bbde1956 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -5,6 +5,7 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
+obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
 obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
diff --git a/drivers/gpio/max732x.c b/drivers/gpio/max732x.c
new file mode 100644
index 000000000000..b51c8135ca28
--- /dev/null
+++ b/drivers/gpio/max732x.c
@@ -0,0 +1,385 @@
+/*
+ *  max732x.c - I2C Port Expander with 8/16 I/O
+ *
+ *  Copyright (C) 2007 Marvell International Ltd.
+ *  Copyright (C) 2008 Jack Ren <jack.ren@marvell.com>
+ *  Copyright (C) 2008 Eric Miao <eric.miao@marvell.com>
+ *
+ *  Derived from drivers/gpio/pca953x.c
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gpio.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c/max732x.h>
+
+
+/*
+ * Each port of MAX732x (including MAX7319) falls into one of the
+ * following three types:
+ *
+ *   - Push Pull Output
+ *   - Input
+ *   - Open Drain I/O
+ *
+ * designated by 'O', 'I' and 'P' individually according to MAXIM's
+ * datasheets.
+ *
+ * There are two groups of I/O ports, each group usually includes
+ * up to 8 I/O ports, and is accessed by a specific I2C address:
+ *
+ *   - Group A : by I2C address 0b'110xxxx
+ *   - Group B : by I2C address 0b'101xxxx
+ *
+ * where 'xxxx' is decided by the connections of pin AD2/AD0.  The
+ * address used also affects the initial state of output signals.
+ *
+ * Within each group of ports, there are five known combinations of
+ * I/O ports: 4I4O, 4P4O, 8I, 8P, 8O, see the definitions below for
+ * the detailed organization of these ports.
+ *
+ * GPIO numbers start from 'gpio_base + 0' to 'gpio_base + 8/16',
+ * and GPIOs from GROUP_A are numbered before those from GROUP_B
+ * (if there are two groups).
+ *
+ * NOTE: MAX7328/MAX7329 are drop-in replacements for PCF8574/a, so
+ * they are not supported by this driver.
+ */
+
+#define PORT_NONE	0x0	/* '/' No Port */
+#define PORT_OUTPUT	0x1	/* 'O' Push-Pull, Output Only */
+#define PORT_INPUT	0x2	/* 'I' Input Only */
+#define PORT_OPENDRAIN	0x3	/* 'P' Open-Drain, I/O */
+
+#define IO_4I4O		0x5AA5	/* O7 O6 I5 I4 I3 I2 O1 O0 */
+#define IO_4P4O		0x5FF5	/* O7 O6 P5 P4 P3 P2 O1 O0 */
+#define IO_8I		0xAAAA	/* I7 I6 I5 I4 I3 I2 I1 I0 */
+#define IO_8P		0xFFFF	/* P7 P6 P5 P4 P3 P2 P1 P0 */
+#define IO_8O		0x5555	/* O7 O6 O5 O4 O3 O2 O1 O0 */
+
+#define GROUP_A(x)	((x) & 0xffff)	/* I2C Addr: 0b'110xxxx */
+#define GROUP_B(x)	((x) << 16)	/* I2C Addr: 0b'101xxxx */
+
+static const struct i2c_device_id max732x_id[] = {
+	{ "max7319", GROUP_A(IO_8I) },
+	{ "max7320", GROUP_B(IO_8O) },
+	{ "max7321", GROUP_A(IO_8P) },
+	{ "max7322", GROUP_A(IO_4I4O) },
+	{ "max7323", GROUP_A(IO_4P4O) },
+	{ "max7324", GROUP_A(IO_8I) | GROUP_B(IO_8O) },
+	{ "max7325", GROUP_A(IO_8P) | GROUP_B(IO_8O) },
+	{ "max7326", GROUP_A(IO_4I4O) | GROUP_B(IO_8O) },
+	{ "max7327", GROUP_A(IO_4P4O) | GROUP_B(IO_8O) },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, max732x_id);
+
+struct max732x_chip {
+	struct gpio_chip gpio_chip;
+
+	struct i2c_client *client;	/* "main" client */
+	struct i2c_client *client_dummy;
+	struct i2c_client *client_group_a;
+	struct i2c_client *client_group_b;
+
+	unsigned int	mask_group_a;
+	unsigned int	dir_input;
+	unsigned int	dir_output;
+
+	struct mutex	lock;
+	uint8_t		reg_out[2];
+};
+
+static int max732x_write(struct max732x_chip *chip, int group_a, uint8_t val)
+{
+	struct i2c_client *client;
+	int ret;
+
+	client = group_a ? chip->client_group_a : chip->client_group_b;
+	ret = i2c_smbus_write_byte(client, val);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed writing\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int max732x_read(struct max732x_chip *chip, int group_a, uint8_t *val)
+{
+	struct i2c_client *client;
+	int ret;
+
+	client = group_a ? chip->client_group_a : chip->client_group_b;
+	ret = i2c_smbus_read_byte(client);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed reading\n");
+		return ret;
+	}
+
+	*val = (uint8_t)ret;
+	return 0;
+}
+
+static inline int is_group_a(struct max732x_chip *chip, unsigned off)
+{
+	return (1u << off) & chip->mask_group_a;
+}
+
+static int max732x_gpio_get_value(struct gpio_chip *gc, unsigned off)
+{
+	struct max732x_chip *chip;
+	uint8_t reg_val;
+	int ret;
+
+	chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+	ret = max732x_read(chip, is_group_a(chip, off), &reg_val);
+	if (ret < 0)
+		return 0;
+
+	return reg_val & (1u << (off & 0x7));
+}
+
+static void max732x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
+{
+	struct max732x_chip *chip;
+	uint8_t reg_out, mask = 1u << (off & 0x7);
+	int ret;
+
+	chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+	mutex_lock(&chip->lock);
+
+	reg_out = (off > 7) ? chip->reg_out[1] : chip->reg_out[0];
+	reg_out = (val) ? reg_out | mask : reg_out & ~mask;
+
+	ret = max732x_write(chip, is_group_a(chip, off), reg_out);
+	if (ret < 0)
+		goto out;
+
+	/* update the shadow register then */
+	if (off > 7)
+		chip->reg_out[1] = reg_out;
+	else
+		chip->reg_out[0] = reg_out;
+out:
+	mutex_unlock(&chip->lock);
+}
+
+static int max732x_gpio_direction_input(struct gpio_chip *gc, unsigned off)
+{
+	struct max732x_chip *chip;
+	unsigned int mask = 1u << off;
+
+	chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+	if ((mask & chip->dir_input) == 0) {
+		dev_dbg(&chip->client->dev, "%s port %d is output only\n",
+			chip->client->name, off);
+		return -EACCES;
+	}
+
+	return 0;
+}
+
+static int max732x_gpio_direction_output(struct gpio_chip *gc,
+		unsigned off, int val)
+{
+	struct max732x_chip *chip;
+	unsigned int mask = 1u << off;
+
+	chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+	if ((mask & chip->dir_output) == 0) {
+		dev_dbg(&chip->client->dev, "%s port %d is input only\n",
+			chip->client->name, off);
+		return -EACCES;
+	}
+
+	max732x_gpio_set_value(gc, off, val);
+	return 0;
+}
+
+static int __devinit max732x_setup_gpio(struct max732x_chip *chip,
+					const struct i2c_device_id *id,
+					unsigned gpio_start)
+{
+	struct gpio_chip *gc = &chip->gpio_chip;
+	uint32_t id_data = id->driver_data;
+	int i, port = 0;
+
+	for (i = 0; i < 16; i++, id_data >>= 2) {
+		unsigned int mask = 1 << port;
+
+		switch (id_data & 0x3) {
+		case PORT_OUTPUT:
+			chip->dir_output |= mask;
+			break;
+		case PORT_INPUT:
+			chip->dir_input |= mask;
+			break;
+		case PORT_OPENDRAIN:
+			chip->dir_output |= mask;
+			chip->dir_input |= mask;
+			break;
+		default:
+			continue;
+		}
+
+		if (i < 8)
+			chip->mask_group_a |= mask;
+		port++;
+	}
+
+	if (chip->dir_input)
+		gc->direction_input = max732x_gpio_direction_input;
+	if (chip->dir_output) {
+		gc->direction_output = max732x_gpio_direction_output;
+		gc->set = max732x_gpio_set_value;
+	}
+	gc->get = max732x_gpio_get_value;
+	gc->can_sleep = 1;
+
+	gc->base = gpio_start;
+	gc->ngpio = port;
+	gc->label = chip->client->name;
+	gc->owner = THIS_MODULE;
+
+	return port;
+}
+
+static int __devinit max732x_probe(struct i2c_client *client,
+				   const struct i2c_device_id *id)
+{
+	struct max732x_platform_data *pdata;
+	struct max732x_chip *chip;
+	struct i2c_client *c;
+	uint16_t addr_a, addr_b;
+	int ret, nr_port;
+
+	pdata = client->dev.platform_data;
+	if (pdata == NULL)
+		return -ENODEV;
+
+	chip = kzalloc(sizeof(struct max732x_chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+	chip->client = client;
+
+	nr_port = max732x_setup_gpio(chip, id, pdata->gpio_base);
+
+	addr_a = (client->addr & 0x0f) | 0x60;
+	addr_b = (client->addr & 0x0f) | 0x50;
+
+	switch (client->addr & 0x70) {
+	case 0x60:
+		chip->client_group_a = client;
+		if (nr_port > 7) {
+			c = i2c_new_dummy(client->adapter, addr_b);
+			chip->client_group_b = chip->client_dummy = c;
+		}
+		break;
+	case 0x50:
+		chip->client_group_b = client;
+		if (nr_port > 7) {
+			c = i2c_new_dummy(client->adapter, addr_a);
+			chip->client_group_a = chip->client_dummy = c;
+		}
+		break;
+	default:
+		dev_err(&client->dev, "invalid I2C address specified %02x\n",
+				client->addr);
+		ret = -EINVAL;
+		goto out_failed;
+	}
+
+	mutex_init(&chip->lock);
+
+	max732x_read(chip, is_group_a(chip, 0), &chip->reg_out[0]);
+	if (nr_port > 7)
+		max732x_read(chip, is_group_a(chip, 8), &chip->reg_out[1]);
+
+	ret = gpiochip_add(&chip->gpio_chip);
+	if (ret)
+		goto out_failed;
+
+	if (pdata->setup) {
+		ret = pdata->setup(client, chip->gpio_chip.base,
+				chip->gpio_chip.ngpio, pdata->context);
+		if (ret < 0)
+			dev_warn(&client->dev, "setup failed, %d\n", ret);
+	}
+
+	i2c_set_clientdata(client, chip);
+	return 0;
+
+out_failed:
+	kfree(chip);
+	return ret;
+}
+
+static int __devexit max732x_remove(struct i2c_client *client)
+{
+	struct max732x_platform_data *pdata = client->dev.platform_data;
+	struct max732x_chip *chip = i2c_get_clientdata(client);
+	int ret;
+
+	if (pdata->teardown) {
+		ret = pdata->teardown(client, chip->gpio_chip.base,
+				chip->gpio_chip.ngpio, pdata->context);
+		if (ret < 0) {
+			dev_err(&client->dev, "%s failed, %d\n",
+					"teardown", ret);
+			return ret;
+		}
+	}
+
+	ret = gpiochip_remove(&chip->gpio_chip);
+	if (ret) {
+		dev_err(&client->dev, "%s failed, %d\n",
+				"gpiochip_remove()", ret);
+		return ret;
+	}
+
+	/* unregister any dummy i2c_client */
+	if (chip->client_dummy)
+		i2c_unregister_device(chip->client_dummy);
+
+	kfree(chip);
+	return 0;
+}
+
+static struct i2c_driver max732x_driver = {
+	.driver = {
+		.name	= "max732x",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= max732x_probe,
+	.remove		= __devexit_p(max732x_remove),
+	.id_table	= max732x_id,
+};
+
+static int __init max732x_init(void)
+{
+	return i2c_add_driver(&max732x_driver);
+}
+module_init(max732x_init);
+
+static void __exit max732x_exit(void)
+{
+	i2c_del_driver(&max732x_driver);
+}
+module_exit(max732x_exit);
+
+MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
+MODULE_DESCRIPTION("GPIO expander driver for MAX732X");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/max732x.h b/include/linux/i2c/max732x.h
new file mode 100644
index 000000000000..e10336631c62
--- /dev/null
+++ b/include/linux/i2c/max732x.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_I2C_MAX732X_H
+#define __LINUX_I2C_MAX732X_H
+
+/* platform data for the MAX732x 8/16-bit I/O expander driver */
+
+struct max732x_platform_data {
+	/* number of the first GPIO */
+	unsigned	gpio_base;
+
+	void		*context;	/* param to setup/teardown */
+
+	int		(*setup)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+	int		(*teardown)(struct i2c_client *client,
+				unsigned gpio, unsigned ngpio,
+				void *context);
+};
+#endif /* __LINUX_I2C_MAX732X_H */
-- 
cgit v1.2.3


From 50c33a84db4aa5082e3af8d873b22344ae2ebea8 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Fri, 25 Jul 2008 01:46:16 -0700
Subject: ext2: fix typo in Hurd part of include/linux/ext2_fs.h

Fix typo in Hurd part of include/linux/ext2_fs.h

The ';' here is redundant or can even pose problem.  This is actually not
used by the Linux kernel, but it is exposed in GNU/Hurd.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ext2_fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 84cec2aa9f1e..2efe7b863cff 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -284,8 +284,8 @@ struct ext2_inode {
 
 #ifdef	__hurd__
 #define i_translator	osd1.hurd1.h_i_translator
-#define i_frag		osd2.hurd2.h_i_frag;
-#define i_fsize		osd2.hurd2.h_i_fsize;
+#define i_frag		osd2.hurd2.h_i_frag
+#define i_fsize		osd2.hurd2.h_i_fsize
 #define i_uid_high	osd2.hurd2.h_i_uid_high
 #define i_gid_high	osd2.hurd2.h_i_gid_high
 #define i_author	osd2.hurd2.h_i_author
-- 
cgit v1.2.3


From ae76dd9a6b5bbe5315fb7028e03f68f75b8538f3 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 25 Jul 2008 01:46:23 -0700
Subject: ext3: handle corrupted orphan list at mount

If the orphan node list includes valid, untruncatable nodes with nlink > 0
the ext3_orphan_cleanup loop which attempts to delete them will not do so,
causing it to loop forever. Fix by checking for such nodes in the
ext3_orphan_get function.

This patch fixes the second case (image hdb.20000009.softlockup.gz)
reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: printk warning fix]
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/ialloc.c        |  9 +++++++++
 fs/ext3/inode.c         | 20 ++++++++++++++------
 include/linux/ext3_fs.h |  1 +
 3 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 77126821b2e9..47b678d73e7a 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 	if (IS_ERR(inode))
 		goto iget_failed;
 
+	/*
+	 * If the orphans has i_nlinks > 0 then it should be able to be
+	 * truncated, otherwise it won't be removed from the orphan list
+	 * during processing and an infinite loop will result.
+	 */
+	if (inode->i_nlink && !ext3_can_truncate(inode))
+		goto bad_orphan;
+
 	if (NEXT_ORPHAN(inode) > max_ino)
 		goto bad_orphan;
 	brelse(bitmap_bh);
@@ -690,6 +698,7 @@ bad_orphan:
 		printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
 		       NEXT_ORPHAN(inode));
 		printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+		printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
 		/* Avoid freeing blocks if we got a bad deleted inode */
 		if (inode->i_nlink == 0)
 			inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf3ce40..74b432fa166b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2253,6 +2253,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 	}
 }
 
+int ext3_can_truncate(struct inode *inode)
+{
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return 0;
+	if (S_ISREG(inode->i_mode))
+		return 1;
+	if (S_ISDIR(inode->i_mode))
+		return 1;
+	if (S_ISLNK(inode->i_mode))
+		return !ext3_inode_is_fast_symlink(inode);
+	return 0;
+}
+
 /*
  * ext3_truncate()
  *
@@ -2297,12 +2310,7 @@ void ext3_truncate(struct inode *inode)
 	unsigned blocksize = inode->i_sb->s_blocksize;
 	struct page *page;
 
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	    S_ISLNK(inode->i_mode)))
-		return;
-	if (ext3_inode_is_fast_symlink(inode))
-		return;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+	if (!ext3_can_truncate(inode))
 		return;
 
 	/*
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 36c540396377..80171ee89a22 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -832,6 +832,7 @@ extern void ext3_discard_reservation (struct inode *);
 extern void ext3_dirty_inode(struct inode *);
 extern int ext3_change_inode_journal_flag(struct inode *, int);
 extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
+extern int ext3_can_truncate(struct inode *inode);
 extern void ext3_truncate (struct inode *);
 extern void ext3_set_inode_flags(struct inode *);
 extern void ext3_get_inode_flags(struct ext3_inode_info *);
-- 
cgit v1.2.3


From de0ca06a99c33df8333955642843331ab6b6e7ff Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:46:34 -0700
Subject: coda: remove CODA_FS_OLD_API

While fixing CONFIG_ leakages to the userspace kernel headers I ran into
CODA_FS_OLD_API.

After five years, are there still people using the old API left?
Especially considering that you have to choose at compile time which API
to support in the kernel (and distributions tend to offer the new API for
some time).

Jan: "The old API can definitely go.  Around the time the new
      interface went in there were some non-Coda userspace file system
      implementations that took a while longer to convert to the new API,
      but by now they all switched to the new interface or in some cases
      to a FUSE-based solution."

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig           | 14 --------------
 fs/coda/coda_linux.c |  6 ++----
 fs/coda/psdev.c      |  4 ----
 fs/coda/upcall.c     | 15 +--------------
 include/linux/coda.h | 43 -------------------------------------------
 5 files changed, 3 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index 37db79a2ff95..ed563b9e352a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -2093,20 +2093,6 @@ config CODA_FS
 	  To compile the coda client support as a module, choose M here: the
 	  module will be called coda.
 
-config CODA_FS_OLD_API
-	bool "Use 96-bit Coda file identifiers"
-	depends on CODA_FS
-	help
-	  A new kernel-userspace API had to be introduced for Coda v6.0
-	  to support larger 128-bit file identifiers as needed by the
-	  new realms implementation.
-
-	  However this new API is not backward compatible with older
-	  clients. If you really need to run the old Coda userspace
-	  cache manager then say Y.
-
-	  For most cases you probably want to say N.
-
 config AFS_FS
 	tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
 	depends on INET && EXPERIMENTAL
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index e1c854890f94..bf4a3fd3c8e3 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -28,11 +28,9 @@ int coda_fake_statfs;
 char * coda_f2s(struct CodaFid *f)
 {
 	static char s[60];
-#ifdef CONFIG_CODA_FS_OLD_API
- 	sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]);
-#else
+
  	sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]);
-#endif
+
 	return s;
 }
 
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 40c36f7352a6..0d9b80ec689c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -378,11 +378,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam");
 MODULE_DESCRIPTION("Coda Distributed File System VFS interface");
 MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR);
 MODULE_LICENSE("GPL");
-#ifdef CONFIG_CODA_FS_OLD_API
-MODULE_VERSION("5.3.21");
-#else
 MODULE_VERSION("6.6");
-#endif
 
 static int __init init_coda(void)
 {
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 359e531094dd..ce432bca95d1 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,12 +52,8 @@ static void *alloc_upcall(int opcode, int size)
         inp->ih.opcode = opcode;
 	inp->ih.pid = current->pid;
 	inp->ih.pgid = task_pgrp_nr(current);
-#ifdef CONFIG_CODA_FS_OLD_API
-	memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
-	inp->ih.cred.cr_fsuid = current->fsuid;
-#else
 	inp->ih.uid = current->fsuid;
-#endif
+
 	return (void*)inp;
 }
 
@@ -166,20 +162,11 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
 	union inputArgs *inp;
 	union outputArgs *outp;
 	int insize, outsize, error;
-#ifdef CONFIG_CODA_FS_OLD_API
-	struct coda_cred cred = { 0, };
-	cred.cr_fsuid = uid;
-#endif
 	
 	insize = SIZE(release);
 	UPARG(CODA_CLOSE);
 	
-#ifdef CONFIG_CODA_FS_OLD_API
-	memcpy(&(inp->ih.cred), &cred, sizeof(cred));
-#else
 	inp->ih.uid = uid;
-#endif
-	
         inp->coda_close.VFid = *fid;
         inp->coda_close.flags = flags;
 
diff --git a/include/linux/coda.h b/include/linux/coda.h
index b5cf0780c51a..96c87693800b 100644
--- a/include/linux/coda.h
+++ b/include/linux/coda.h
@@ -199,28 +199,6 @@ typedef u_int32_t vuid_t;
 typedef u_int32_t vgid_t;
 #endif /*_VUID_T_ */
 
-#ifdef CONFIG_CODA_FS_OLD_API
-struct CodaFid {
-	u_int32_t opaque[3];
-};
-
-static __inline__ ino_t  coda_f2i(struct CodaFid *fid)
-{
-	if ( ! fid ) 
-		return 0; 
-	if (fid->opaque[1] == 0xfffffffe || fid->opaque[1] == 0xffffffff)
-		return ((fid->opaque[0] << 20) | (fid->opaque[2] & 0xfffff));
-	else
-		return (fid->opaque[2] + (fid->opaque[1]<<10) + (fid->opaque[0]<<20));
-}
-
-struct coda_cred {
-    vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/
-    vgid_t cr_groupid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
-};
-
-#else /* not defined(CONFIG_CODA_FS_OLD_API) */
-
 struct CodaFid {
 	u_int32_t opaque[4];
 };
@@ -228,8 +206,6 @@ struct CodaFid {
 #define coda_f2i(fid)\
 	(fid ? (fid->opaque[3] ^ (fid->opaque[2]<<10) ^ (fid->opaque[1]<<20) ^ fid->opaque[0]) : 0)
 
-#endif
-
 #ifndef _VENUS_VATTR_T_
 #define _VENUS_VATTR_T_
 /*
@@ -313,15 +289,7 @@ struct coda_statfs {
 
 #define CIOC_KERNEL_VERSION _IOWR('c', 10, size_t)
 
-#if 0
-#define CODA_KERNEL_VERSION 0 /* don't care about kernel version number */
-#define CODA_KERNEL_VERSION 1 /* The old venus 4.6 compatible interface */
-#endif
-#ifdef CONFIG_CODA_FS_OLD_API
-#define CODA_KERNEL_VERSION 2 /* venus_lookup got an extra parameter */
-#else
 #define CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */
-#endif
 
 /*
  *        Venus <-> Coda  RPC arguments
@@ -329,16 +297,9 @@ struct coda_statfs {
 struct coda_in_hdr {
     u_int32_t opcode;
     u_int32_t unique;	    /* Keep multiple outstanding msgs distinct */
-#ifdef CONFIG_CODA_FS_OLD_API
-    u_int16_t pid;	    /* Common to all */
-    u_int16_t pgid;	    /* Common to all */
-    u_int16_t sid;          /* Common to all */
-    struct coda_cred cred;  /* Common to all */
-#else
     pid_t pid;
     pid_t pgid;
     vuid_t uid;
-#endif
 };
 
 /* Really important that opcode and unique are 1st two fields! */
@@ -613,11 +574,7 @@ struct coda_vget_out {
 /* CODA_PURGEUSER is a venus->kernel call */
 struct coda_purgeuser_out {
     struct coda_out_hdr oh;
-#ifdef CONFIG_CODA_FS_OLD_API
-    struct coda_cred cred;
-#else
     vuid_t uid;
-#endif
 };
 
 /* coda_zapfile: */
-- 
cgit v1.2.3


From f68215c4640a38d66429014e524a627bf572d26a Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 25 Jul 2008 01:46:38 -0700
Subject: reiserfs: convert j_lock to mutex

j_lock is a semaphore but uses it as if it were a mutex.  This patch converts
it to a mutex.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Edward Shishkin <edward.shishkin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/journal.c          | 6 +++---
 include/linux/reiserfs_fs_sb.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e396b2fa4743..0f7b1e807e60 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -558,13 +558,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
 static inline void lock_journal(struct super_block *p_s_sb)
 {
 	PROC_INFO_INC(p_s_sb, journal.lock_journal);
-	down(&SB_JOURNAL(p_s_sb)->j_lock);
+	mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
 }
 
 /* unlock the current transaction */
 static inline void unlock_journal(struct super_block *p_s_sb)
 {
-	up(&SB_JOURNAL(p_s_sb)->j_lock);
+	mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
 }
 
 static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -2837,7 +2837,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
 	journal->j_last = NULL;
 	journal->j_first = NULL;
 	init_waitqueue_head(&(journal->j_join_wait));
-	sema_init(&journal->j_lock, 1);
+	mutex_init(&journal->j_mutex);
 	sema_init(&journal->j_flush_sem, 1);
 
 	journal->j_trans_id = 10;
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 336ee43ed7d8..49b639b88bac 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -193,7 +193,7 @@ struct reiserfs_journal {
 	struct buffer_head *j_header_bh;
 
 	time_t j_trans_start_time;	/* time this transaction started */
-	struct semaphore j_lock;
+	struct mutex j_mutex;
 	struct semaphore j_flush_sem;
 	wait_queue_head_t j_join_wait;	/* wait for current transaction to finish before starting new one */
 	atomic_t j_jlock;	/* lock for j_join_wait */
-- 
cgit v1.2.3


From afe70259076fff0446001eaa1a287f615241a357 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 25 Jul 2008 01:46:39 -0700
Subject: reiserfs: convert j_flush_sem to mutex

j_flush_sem is a semaphore but uses it as if it were a mutex.  This patch
converts it to a mutex.

[akpm@linux-foundation.org: fix mutex_trylock retval treatment]
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Edward Shishkin <edward.shishkin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/journal.c          | 14 +++++++-------
 include/linux/reiserfs_fs_sb.h |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 0f7b1e807e60..3cb4a562030e 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1411,8 +1411,8 @@ static int flush_journal_list(struct super_block *s,
 
 	/* if flushall == 0, the lock is already held */
 	if (flushall) {
-		down(&journal->j_flush_sem);
-	} else if (!down_trylock(&journal->j_flush_sem)) {
+		mutex_lock(&journal->j_flush_mutex);
+	} else if (mutex_trylock(&journal->j_flush_mutex)) {
 		BUG();
 	}
 
@@ -1642,7 +1642,7 @@ static int flush_journal_list(struct super_block *s,
 	jl->j_state = 0;
 	put_journal_list(s, jl);
 	if (flushall)
-		up(&journal->j_flush_sem);
+		mutex_unlock(&journal->j_flush_mutex);
 	put_fs_excl();
 	return err;
 }
@@ -1772,12 +1772,12 @@ static int kupdate_transactions(struct super_block *s,
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
 	chunk.nr = 0;
 
-	down(&journal->j_flush_sem);
+	mutex_lock(&journal->j_flush_mutex);
 	if (!journal_list_still_alive(s, orig_trans_id)) {
 		goto done;
 	}
 
-	/* we've got j_flush_sem held, nobody is going to delete any
+	/* we've got j_flush_mutex held, nobody is going to delete any
 	 * of these lists out from underneath us
 	 */
 	while ((num_trans && transactions_flushed < num_trans) ||
@@ -1812,7 +1812,7 @@ static int kupdate_transactions(struct super_block *s,
 	}
 
       done:
-	up(&journal->j_flush_sem);
+	mutex_unlock(&journal->j_flush_mutex);
 	return ret;
 }
 
@@ -2838,7 +2838,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
 	journal->j_first = NULL;
 	init_waitqueue_head(&(journal->j_join_wait));
 	mutex_init(&journal->j_mutex);
-	sema_init(&journal->j_flush_sem, 1);
+	mutex_init(&journal->j_flush_mutex);
 
 	journal->j_trans_id = 10;
 	journal->j_mount_id = 10;
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 49b639b88bac..c0751724ee64 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -194,7 +194,7 @@ struct reiserfs_journal {
 
 	time_t j_trans_start_time;	/* time this transaction started */
 	struct mutex j_mutex;
-	struct semaphore j_flush_sem;
+	struct mutex j_flush_mutex;
 	wait_queue_head_t j_join_wait;	/* wait for current transaction to finish before starting new one */
 	atomic_t j_jlock;	/* lock for j_join_wait */
 	int j_list_bitmap_index;	/* number of next list bitmap to use */
-- 
cgit v1.2.3


From 90415deac75a761a25239af6f56381546f8d2201 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 25 Jul 2008 01:46:40 -0700
Subject: reiserfs: convert j_commit_lock to mutex

j_commit_lock is a semaphore but uses it as if it were a mutex.  This patch
converts it to a mutex.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Edward Shishkin <edward.shishkin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/journal.c          | 22 ++++++++++------------
 include/linux/reiserfs_fs_sb.h |  2 +-
 2 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3cb4a562030e..c8f60ee183b5 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -34,15 +34,10 @@
 **		        from within kupdate, it will ignore the immediate flag
 */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
 #include <linux/time.h>
 #include <linux/semaphore.h>
-
 #include <linux/vmalloc.h>
 #include <linux/reiserfs_fs.h>
-
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/fcntl.h>
@@ -54,6 +49,9 @@
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
+#include <linux/uaccess.h>
+
+#include <asm/system.h>
 
 /* gets a struct reiserfs_journal_list * from a list head */
 #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -1045,9 +1043,9 @@ static int flush_commit_list(struct super_block *s,
 	}
 
 	/* make sure nobody is trying to flush this one at the same time */
-	down(&jl->j_commit_lock);
+	mutex_lock(&jl->j_commit_mutex);
 	if (!journal_list_still_alive(s, trans_id)) {
-		up(&jl->j_commit_lock);
+		mutex_unlock(&jl->j_commit_mutex);
 		goto put_jl;
 	}
 	BUG_ON(jl->j_trans_id == 0);
@@ -1057,7 +1055,7 @@ static int flush_commit_list(struct super_block *s,
 		if (flushall) {
 			atomic_set(&(jl->j_older_commits_done), 1);
 		}
-		up(&jl->j_commit_lock);
+		mutex_unlock(&jl->j_commit_mutex);
 		goto put_jl;
 	}
 
@@ -1181,7 +1179,7 @@ static int flush_commit_list(struct super_block *s,
 	if (flushall) {
 		atomic_set(&(jl->j_older_commits_done), 1);
 	}
-	up(&jl->j_commit_lock);
+	mutex_unlock(&jl->j_commit_mutex);
       put_jl:
 	put_journal_list(s, jl);
 
@@ -2556,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
 	INIT_LIST_HEAD(&jl->j_working_list);
 	INIT_LIST_HEAD(&jl->j_tail_bh_list);
 	INIT_LIST_HEAD(&jl->j_bh_list);
-	sema_init(&jl->j_commit_lock, 1);
+	mutex_init(&jl->j_commit_mutex);
 	SB_JOURNAL(s)->j_num_lists++;
 	get_journal_list(jl);
 	return jl;
@@ -4030,7 +4028,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 	 * the new transaction is fully setup, and we've already flushed the
 	 * ordered bh list
 	 */
-	down(&jl->j_commit_lock);
+	mutex_lock(&jl->j_commit_mutex);
 
 	/* save the transaction id in case we need to commit it later */
 	commit_trans_id = jl->j_trans_id;
@@ -4196,7 +4194,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
 		lock_kernel();
 	}
 	BUG_ON(!list_empty(&jl->j_tail_bh_list));
-	up(&jl->j_commit_lock);
+	mutex_unlock(&jl->j_commit_mutex);
 
 	/* honor the flush wishes from the caller, simple commits can
 	 ** be done outside the journal lock, they are done below
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index c0751724ee64..315517e8bfa1 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -152,7 +152,7 @@ struct reiserfs_journal_list {
 	atomic_t j_nonzerolen;
 	atomic_t j_commit_left;
 	atomic_t j_older_commits_done;	/* all commits older than this on disk */
-	struct semaphore j_commit_lock;
+	struct mutex j_commit_mutex;
 	unsigned long j_trans_id;
 	time_t j_timestamp;
 	struct reiserfs_list_bitmap *j_list_bitmap;
-- 
cgit v1.2.3


From 4596c8aaf96e8634ca755c9f34b91420a39bebd4 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Fri, 25 Jul 2008 01:46:42 -0700
Subject: fat: fix VFAT_IOCTL_READDIR_xxx and cleanup for userland

"struct dirent" is a kernel type here, but is a **different type** in
userspace!  This means both the structure and the IOCTL number is wrong!

So, this adds new "struct __fat_dirent" to generate correct IOCTL number.
And kernel stuff moves to under __KERNEL__.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msdos_fs.h | 47 +++++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 81cd36b735b0..5161394c7894 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -2,11 +2,11 @@
 #define _LINUX_MSDOS_FS_H
 
 #include <linux/magic.h>
+#include <asm/byteorder.h>
 
 /*
  * The MS-DOS filesystem constants/structures
  */
-#include <asm/byteorder.h>
 
 #define SECTOR_SIZE	512		/* sector size (bytes) */
 #define SECTOR_BITS	9		/* log2(SECTOR_SIZE) */
@@ -89,24 +89,22 @@
 #define IS_FSINFO(x)	(le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \
 			 && le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2)
 
+struct __fat_dirent {
+	long		d_ino;
+	__kernel_off_t	d_off;
+	unsigned short	d_reclen;
+	char		d_name[256]; /* We must not include limits.h! */
+};
+
 /*
  * ioctl commands
  */
-#define VFAT_IOCTL_READDIR_BOTH		_IOR('r', 1, struct dirent [2])
-#define VFAT_IOCTL_READDIR_SHORT	_IOR('r', 2, struct dirent [2])
+#define VFAT_IOCTL_READDIR_BOTH		_IOR('r', 1, struct __fat_dirent[2])
+#define VFAT_IOCTL_READDIR_SHORT	_IOR('r', 2, struct __fat_dirent[2])
 /* <linux/videotext.h> has used 0x72 ('r') in collision, so skip a few */
 #define FAT_IOCTL_GET_ATTRIBUTES	_IOR('r', 0x10, __u32)
 #define FAT_IOCTL_SET_ATTRIBUTES	_IOW('r', 0x11, __u32)
 
-/*
- * vfat shortname flags
- */
-#define VFAT_SFN_DISPLAY_LOWER	0x0001 /* convert to lowercase for display */
-#define VFAT_SFN_DISPLAY_WIN95	0x0002 /* emulate win95 rule for display */
-#define VFAT_SFN_DISPLAY_WINNT	0x0004 /* emulate winnt rule for display */
-#define VFAT_SFN_CREATE_WIN95	0x0100 /* emulate win95 rule for create */
-#define VFAT_SFN_CREATE_WINNT	0x0200 /* emulate winnt rule for create */
-
 struct fat_boot_sector {
 	__u8	ignored[3];	/* Boot strap short or near jump */
 	__u8	system_id[8];	/* Name - can be used to special case
@@ -168,14 +166,6 @@ struct msdos_dir_slot {
 	__u8    name11_12[4];	/* last 2 characters in name */
 };
 
-struct fat_slot_info {
-	loff_t i_pos;		/* on-disk position of directory entry */
-	loff_t slot_off;	/* offset for slot or de start */
-	int nr_slots;		/* number of slots + 1(de) in filename */
-	struct msdos_dir_entry *de;
-	struct buffer_head *bh;
-};
-
 #ifdef __KERNEL__
 
 #include <linux/buffer_head.h>
@@ -184,6 +174,15 @@ struct fat_slot_info {
 #include <linux/fs.h>
 #include <linux/mutex.h>
 
+/*
+ * vfat shortname flags
+ */
+#define VFAT_SFN_DISPLAY_LOWER	0x0001 /* convert to lowercase for display */
+#define VFAT_SFN_DISPLAY_WIN95	0x0002 /* emulate win95 rule for display */
+#define VFAT_SFN_DISPLAY_WINNT	0x0004 /* emulate winnt rule for display */
+#define VFAT_SFN_CREATE_WIN95	0x0100 /* emulate win95 rule for create */
+#define VFAT_SFN_CREATE_WINNT	0x0200 /* emulate winnt rule for create */
+
 struct fat_mount_options {
 	uid_t fs_uid;
 	gid_t fs_gid;
@@ -267,6 +266,14 @@ struct msdos_inode_info {
 	struct inode vfs_inode;
 };
 
+struct fat_slot_info {
+	loff_t i_pos;		/* on-disk position of directory entry */
+	loff_t slot_off;	/* offset for slot or de start */
+	int nr_slots;		/* number of slots + 1(de) in filename */
+	struct msdos_dir_entry *de;
+	struct buffer_head *bh;
+};
+
 static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
-- 
cgit v1.2.3


From 7557bc66be629d19a402e752673708bfbb8b5e86 Mon Sep 17 00:00:00 2001
From: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Date: Fri, 25 Jul 2008 01:46:45 -0700
Subject: msdos fs: remove unsettable atari option

It has been impossible to set the option 'atari' of the MSDOS filesystem
for several years.  Since nobody seems to have missed it, let's remove its
remains.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/msdos/namei.c         | 18 ++++++------------
 include/linux/msdos_fs.h |  1 -
 2 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 1f7f2956412a..e4ad6c6b753e 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -14,12 +14,7 @@
 
 /* Characters that are undesirable in an MS-DOS file name */
 static unsigned char bad_chars[] = "*?<>|\"";
-static unsigned char bad_if_strict_pc[] = "+=,; ";
-/* GEMDOS is less restrictive */
-static unsigned char bad_if_strict_atari[] = " ";
-
-#define bad_if_strict(opts) \
-	((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc)
+static unsigned char bad_if_strict[] = "+=,; ";
 
 /***** Formats an MS-DOS file name. Rejects invalid names. */
 static int msdos_format_name(const unsigned char *name, int len,
@@ -40,21 +35,20 @@ static int msdos_format_name(const unsigned char *name, int len,
 			/* Get rid of dot - test for it elsewhere */
 			name++;
 			len--;
-		} else if (!opts->atari)
+		} else
 			return -EINVAL;
 	}
 	/*
-	 * disallow names that _really_ start with a dot for MS-DOS,
-	 * GEMDOS does not care
+	 * disallow names that _really_ start with a dot
 	 */
-	space = !opts->atari;
+	space = 1;
 	c = 0;
 	for (walk = res; len && walk - res < 8; walk++) {
 		c = *name++;
 		len--;
 		if (opts->name_check != 'r' && strchr(bad_chars, c))
 			return -EINVAL;
-		if (opts->name_check == 's' && strchr(bad_if_strict(opts), c))
+		if (opts->name_check == 's' && strchr(bad_if_strict, c))
 			return -EINVAL;
 		if (c >= 'A' && c <= 'Z' && opts->name_check == 's')
 			return -EINVAL;
@@ -94,7 +88,7 @@ static int msdos_format_name(const unsigned char *name, int len,
 			if (opts->name_check != 'r' && strchr(bad_chars, c))
 				return -EINVAL;
 			if (opts->name_check == 's' &&
-			    strchr(bad_if_strict(opts), c))
+			    strchr(bad_if_strict, c))
 				return -EINVAL;
 			if (c < ' ' || c == ':' || c == '\\')
 				return -EINVAL;
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 5161394c7894..3346c9c8f17a 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -201,7 +201,6 @@ struct fat_mount_options {
 		 utf8:1,	  /* Use of UTF-8 character set (Default) */
 		 unicode_xlate:1, /* create escape sequences for unhandled Unicode */
 		 numtail:1,       /* Does first alias have a numeric '~1' type tail? */
-		 atari:1,         /* Use Atari GEMDOS variation of MS-DOS fs */
 		 flush:1,	  /* write things quickly */
 		 nocase:1,	  /* Does this need case conversion? 0=need case conversion*/
 		 usefree:1;	  /* Use free_clusters for FAT32 */
-- 
cgit v1.2.3


From cf6ae8b50e0ee3f764392dadd1970e3f03c40773 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:46:46 -0700
Subject: remove the in-kernel struct dirent{,64}

The kernel struct dirent{,64} were different from the ones in
userspace.

Even worse, we exported the kernel ones to userspace.

But after the fat usages are fixed we can remove the conflicting
kernel versions.

Reviewed-by: H. Peter Anvin <hpa@kernel.org>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild   |  1 -
 include/linux/dirent.h | 20 --------------------
 2 files changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 71d70d1fbce2..a18008ce7aba 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -189,7 +189,6 @@ unifdef-y += connector.h
 unifdef-y += cuda.h
 unifdef-y += cyclades.h
 unifdef-y += dccp.h
-unifdef-y += dirent.h
 unifdef-y += dlm.h
 unifdef-y += dlm_plock.h
 unifdef-y += edd.h
diff --git a/include/linux/dirent.h b/include/linux/dirent.h
index 5d6023b87800..f072fb8d10a3 100644
--- a/include/linux/dirent.h
+++ b/include/linux/dirent.h
@@ -1,23 +1,6 @@
 #ifndef _LINUX_DIRENT_H
 #define _LINUX_DIRENT_H
 
-struct dirent {
-	long		d_ino;
-	__kernel_off_t	d_off;
-	unsigned short	d_reclen;
-	char		d_name[256]; /* We must not include limits.h! */
-};
-
-struct dirent64 {
-	__u64		d_ino;
-	__s64		d_off;
-	unsigned short	d_reclen;
-	unsigned char	d_type;
-	char		d_name[256];
-};
-
-#ifdef __KERNEL__
-
 struct linux_dirent64 {
 	u64		d_ino;
 	s64		d_off;
@@ -26,7 +9,4 @@ struct linux_dirent64 {
 	char		d_name[0];
 };
 
-#endif	/* __KERNEL__ */
-
-
 #endif
-- 
cgit v1.2.3


From e8938a62a85d1f487e02c3b01955b47c9598f6d2 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:46:46 -0700
Subject: remove unused #include <linux/dirent.h>'s

Remove some unused #include <linux/dirent.h>'s.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/kernel/linux32.c | 1 -
 fs/compat_ioctl.c          | 1 -
 fs/smbfs/cache.c           | 1 -
 fs/smbfs/proc.c            | 1 -
 include/linux/nfsd/nfsd.h  | 1 -
 5 files changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index c266211ed653..2fefb14414b7 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -11,7 +11,6 @@
 #include <linux/file.h>
 #include <linux/smp_lock.h>
 #include <linux/highuid.h>
-#include <linux/dirent.h>
 #include <linux/resource.h>
 #include <linux/highmem.h>
 #include <linux/time.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 18e2c548161d..5235c67e7594 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/raid/md.h>
 #include <linux/kd.h>
-#include <linux/dirent.h>
 #include <linux/route.h>
 #include <linux/in6.h>
 #include <linux/ipv6_route.h>
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index 8182f0542a21..8c177eb7e344 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -13,7 +13,6 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/dirent.h>
 #include <linux/smb_fs.h>
 #include <linux/pagemap.h>
 #include <linux/net.h>
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index d517a27b7f4b..ee536e8a649a 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -16,7 +16,6 @@
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/dcache.h>
-#include <linux/dirent.h>
 #include <linux/nls.h>
 #include <linux/smp_lock.h>
 #include <linux/net.h>
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index a2861d95ecc3..108f47e5fd95 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -12,7 +12,6 @@
 
 #include <linux/types.h>
 #include <linux/unistd.h>
-#include <linux/dirent.h>
 #include <linux/fs.h>
 #include <linux/posix_acl.h>
 #include <linux/mount.h>
-- 
cgit v1.2.3


From b271e067c896ad4082b15e96077675d08db40625 Mon Sep 17 00:00:00 2001
From: Joe Peterson <joe@skyrush.com>
Date: Fri, 25 Jul 2008 01:46:47 -0700
Subject: fatfs: add UTC timestamp option

Provide a new mount option ("tz=UTC") for DOS (vfat/msdos) filesystems,
allowing timestamps to be in coordinated universal time (UTC) rather than
local time in applications where doing this is advantageous.

In particular, portable devices that use fat/vfat (such as digital
cameras) can benefit from using UTC in their internal clocks, thus
avoiding daylight saving time errors and general time ambiguity issues.
The user of the device does not have to worry about changing the time when
moving from place or when daylight saving changes.

The new mount option, when set, disables the counter-adjustment that Linux
currently makes to FAT timestamp info in anticipation of the normal
userspace time zone correction.  When used in this new mode, all daylight
saving time and time zone handling is done in userspace as is normal for
many other filesystems (like ext3).  The default mode, which remains
unchanged, is still appropriate when mounting volumes written in Windows
(because of its use of local time).

I originally based this patch on one submitted last year by Paul Collins,
but I updated it to work with current source and changed variable/option
naming.  Ogawa Hirofumi (who maintains these filesystems) and I discussed
this patch at length on lkml, and he suggested using the option name in
the attached version of the patch.  Barry Bouwsma pointed out a good
addition to the patch as well.

Signed-off-by: Joe Peterson <joe@skyrush.com>
Signed-off-by: Paul Collins <paul@ondioline.org>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Barry Bouwsma <free_beer_for_all@yahoo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/dir.c             |  2 +-
 fs/fat/inode.c           | 27 ++++++++++++++++++++-------
 fs/fat/misc.c            | 10 ++++++----
 fs/msdos/namei.c         |  3 ++-
 fs/vfat/namei.c          |  2 +-
 include/linux/msdos_fs.h |  8 +++++---
 6 files changed, 35 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4c35477bc94c..cd4a0162e10d 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1101,7 +1101,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
 		goto error_free;
 	}
 
-	fat_date_unix2dos(ts->tv_sec, &time, &date);
+	fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
 
 	de = (struct msdos_dir_entry *)bhs[0]->b_data;
 	/* filling the new directory slots ("." and ".." entries) */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 60deb5fd1188..23676f9d79ce 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -382,17 +382,20 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 	inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
 			   & ~((loff_t)sbi->cluster_size - 1)) >> 9;
 	inode->i_mtime.tv_sec =
-		date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
+		date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
+			      sbi->options.tz_utc);
 	inode->i_mtime.tv_nsec = 0;
 	if (sbi->options.isvfat) {
 		int secs = de->ctime_cs / 100;
 		int csecs = de->ctime_cs % 100;
 		inode->i_ctime.tv_sec  =
 			date_dos2unix(le16_to_cpu(de->ctime),
-				      le16_to_cpu(de->cdate)) + secs;
+				      le16_to_cpu(de->cdate),
+				      sbi->options.tz_utc) + secs;
 		inode->i_ctime.tv_nsec = csecs * 10000000;
 		inode->i_atime.tv_sec =
-			date_dos2unix(0, le16_to_cpu(de->adate));
+			date_dos2unix(0, le16_to_cpu(de->adate),
+				      sbi->options.tz_utc);
 		inode->i_atime.tv_nsec = 0;
 	} else
 		inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -591,11 +594,14 @@ retry:
 	raw_entry->attr = fat_attr(inode);
 	raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
 	raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
-	fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
+	fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
+			  &raw_entry->date, sbi->options.tz_utc);
 	if (sbi->options.isvfat) {
 		__le16 atime;
-		fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
-		fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
+		fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
+				  &raw_entry->cdate, sbi->options.tz_utc);
+		fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
+				  &raw_entry->adate, sbi->options.tz_utc);
 		raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
 			inode->i_ctime.tv_nsec / 10000000;
 	}
@@ -836,6 +842,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
 	}
 	if (sbi->options.flush)
 		seq_puts(m, ",flush");
+	if (opts->tz_utc)
+		seq_puts(m, ",tz=UTC");
 
 	return 0;
 }
@@ -848,7 +856,7 @@ enum {
 	Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
 	Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
 	Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
-	Opt_obsolate, Opt_flush, Opt_err,
+	Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
 };
 
 static match_table_t fat_tokens = {
@@ -883,6 +891,7 @@ static match_table_t fat_tokens = {
 	{Opt_obsolate, "cvf_options=%100s"},
 	{Opt_obsolate, "posix"},
 	{Opt_flush, "flush"},
+	{Opt_tz_utc, "tz=UTC"},
 	{Opt_err, NULL},
 };
 static match_table_t msdos_tokens = {
@@ -947,6 +956,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 	opts->utf8 = opts->unicode_xlate = 0;
 	opts->numtail = 1;
 	opts->usefree = opts->nocase = 0;
+	opts->tz_utc = 0;
 	*debug = 0;
 
 	if (!options)
@@ -1036,6 +1046,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 		case Opt_flush:
 			opts->flush = 1;
 			break;
+		case Opt_tz_utc:
+			opts->tz_utc = 1;
+			break;
 
 		/* msdos specific */
 		case Opt_dots:
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 61f23511eacf..79fb98ad36d4 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -142,7 +142,7 @@ static int day_n[] = {
 };
 
 /* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
-int date_dos2unix(unsigned short time, unsigned short date)
+int date_dos2unix(unsigned short time, unsigned short date, int tz_utc)
 {
 	int month, year, secs;
 
@@ -156,16 +156,18 @@ int date_dos2unix(unsigned short time, unsigned short date)
 	    ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
 	    month < 2 ? 1 : 0)+3653);
 			/* days since 1.1.70 plus 80's leap day */
-	secs += sys_tz.tz_minuteswest*60;
+	if (!tz_utc)
+		secs += sys_tz.tz_minuteswest*60;
 	return secs;
 }
 
 /* Convert linear UNIX date to a MS-DOS time/date pair. */
-void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
+void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc)
 {
 	int day, year, nl_day, month;
 
-	unix_date -= sys_tz.tz_minuteswest*60;
+	if (!tz_utc)
+		unix_date -= sys_tz.tz_minuteswest*60;
 
 	/* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
 	if (unix_date < 315532800)
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index e4ad6c6b753e..e844b9809d27 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -237,6 +237,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
 			   int is_dir, int is_hid, int cluster,
 			   struct timespec *ts, struct fat_slot_info *sinfo)
 {
+	struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
 	struct msdos_dir_entry de;
 	__le16 time, date;
 	int err;
@@ -246,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
 	if (is_hid)
 		de.attr |= ATTR_HIDDEN;
 	de.lcase = 0;
-	fat_date_unix2dos(ts->tv_sec, &time, &date);
+	fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
 	de.cdate = de.adate = 0;
 	de.ctime = 0;
 	de.ctime_cs = 0;
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index b546ba69be82..155c10b4adbd 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -621,7 +621,7 @@ shortname:
 	memcpy(de->name, msdos_name, MSDOS_NAME);
 	de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
 	de->lcase = lcase;
-	fat_date_unix2dos(ts->tv_sec, &time, &date);
+	fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
 	de->time = de->ctime = time;
 	de->date = de->cdate = de->adate = date;
 	de->ctime_cs = 0;
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 3346c9c8f17a..ba63858056c7 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -203,7 +203,8 @@ struct fat_mount_options {
 		 numtail:1,       /* Does first alias have a numeric '~1' type tail? */
 		 flush:1,	  /* write things quickly */
 		 nocase:1,	  /* Does this need case conversion? 0=need case conversion*/
-		 usefree:1;	  /* Use free_clusters for FAT32 */
+		 usefree:1,	  /* Use free_clusters for FAT32 */
+		 tz_utc:1;	  /* Filesystem timestamps are in UTC */
 };
 
 #define FAT_HASH_BITS	8
@@ -434,8 +435,9 @@ extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
 extern void fat_fs_panic(struct super_block *s, const char *fmt, ...);
 extern void fat_clusters_flush(struct super_block *sb);
 extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
-extern int date_dos2unix(unsigned short time, unsigned short date);
-extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date);
+extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc);
+extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date,
+			      int tz_utc);
 extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
 
 int fat_cache_init(void);
-- 
cgit v1.2.3


From b85f4b87a511bea86dac68c4f0fabaee2cac6c4c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:50 -0700
Subject: quota: rename quota functions from upper case, make bigger ones
 non-inline

Cleanup quotaops.h: Rename functions from uppercase to lowercase (and
define backward compatibility macros), move larger functions to dquot.c
and make them non-inline.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dquot.c               |  53 +++++++++++
 include/linux/quotaops.h | 226 ++++++++++++++++++++++-------------------------
 2 files changed, 160 insertions(+), 119 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index ad88cf6fcbaf..0bcaf970bbb4 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1153,6 +1153,28 @@ int dquot_drop(struct inode *inode)
 	return 0;
 }
 
+/* Wrapper to remove references to quota structures from inode */
+void vfs_dq_drop(struct inode *inode)
+{
+	/* Here we can get arbitrary inode from clear_inode() so we have
+	 * to be careful. OTOH we don't need locking as quota operations
+	 * are allowed to change only at mount time */
+	if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
+	    && inode->i_sb->dq_op->drop) {
+		int cnt;
+		/* Test before calling to rule out calls from proc and such
+                 * where we are not allowed to block. Note that this is
+		 * actually reliable test even without the lock - the caller
+		 * must assure that nobody can come after the DQUOT_DROP and
+		 * add quota pointers back anyway */
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+			if (inode->i_dquot[cnt] != NODQUOT)
+				break;
+		if (cnt < MAXQUOTAS)
+			inode->i_sb->dq_op->drop(inode);
+	}
+}
+
 /*
  * Following four functions update i_blocks+i_bytes fields and
  * quota information (together with appropriate checks)
@@ -1426,6 +1448,18 @@ warn_put_all:
 	return ret;
 }
 
+/* Wrapper for transferring ownership of an inode */
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+{
+	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
+		vfs_dq_init(inode);
+		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
+			return 1;
+	}
+	return 0;
+}
+
+
 /*
  * Write info of quota file to disk
  */
@@ -1766,6 +1800,22 @@ out:
 	return error;
 }
 
+/* Wrapper to turn on quotas when remounting rw */
+int vfs_dq_quota_on_remount(struct super_block *sb)
+{
+	int cnt;
+	int ret = 0, err;
+
+	if (!sb->s_qcop || !sb->s_qcop->quota_on)
+		return -ENOSYS;
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
+		if (err < 0 && !ret)
+			ret = err;
+	}
+	return ret;
+}
+
 /* Generic routine for getting common part of quota structure */
 static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
 {
@@ -2101,8 +2151,11 @@ EXPORT_SYMBOL(dquot_release);
 EXPORT_SYMBOL(dquot_mark_dquot_dirty);
 EXPORT_SYMBOL(dquot_initialize);
 EXPORT_SYMBOL(dquot_drop);
+EXPORT_SYMBOL(vfs_dq_drop);
 EXPORT_SYMBOL(dquot_alloc_space);
 EXPORT_SYMBOL(dquot_alloc_inode);
 EXPORT_SYMBOL(dquot_free_space);
 EXPORT_SYMBOL(dquot_free_inode);
 EXPORT_SYMBOL(dquot_transfer);
+EXPORT_SYMBOL(vfs_dq_transfer);
+EXPORT_SYMBOL(vfs_dq_quota_on_remount);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index f86702053853..0c8f9fe462af 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -19,34 +19,38 @@
 /*
  * declaration of quota_function calls in kernel.
  */
-extern void sync_dquots(struct super_block *sb, int type);
-
-extern int dquot_initialize(struct inode *inode, int type);
-extern int dquot_drop(struct inode *inode);
-
-extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
-extern int dquot_alloc_inode(const struct inode *inode, unsigned long number);
-
-extern int dquot_free_space(struct inode *inode, qsize_t number);
-extern int dquot_free_inode(const struct inode *inode, unsigned long number);
-
-extern int dquot_transfer(struct inode *inode, struct iattr *iattr);
-extern int dquot_commit(struct dquot *dquot);
-extern int dquot_acquire(struct dquot *dquot);
-extern int dquot_release(struct dquot *dquot);
-extern int dquot_commit_info(struct super_block *sb, int type);
-extern int dquot_mark_dquot_dirty(struct dquot *dquot);
-
-extern int vfs_quota_on(struct super_block *sb, int type, int format_id,
-		char *path, int remount);
-extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
-		int format_id, int type);
-extern int vfs_quota_off(struct super_block *sb, int type, int remount);
-extern int vfs_quota_sync(struct super_block *sb, int type);
-extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
-extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+void sync_dquots(struct super_block *sb, int type);
+
+int dquot_initialize(struct inode *inode, int type);
+int dquot_drop(struct inode *inode);
+
+int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
+int dquot_alloc_inode(const struct inode *inode, unsigned long number);
+
+int dquot_free_space(struct inode *inode, qsize_t number);
+int dquot_free_inode(const struct inode *inode, unsigned long number);
+
+int dquot_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_commit(struct dquot *dquot);
+int dquot_acquire(struct dquot *dquot);
+int dquot_release(struct dquot *dquot);
+int dquot_commit_info(struct super_block *sb, int type);
+int dquot_mark_dquot_dirty(struct dquot *dquot);
+
+int vfs_quota_on(struct super_block *sb, int type, int format_id,
+ 	char *path, int remount);
+int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+ 	int format_id, int type);
+int vfs_quota_off(struct super_block *sb, int type, int remount);
+int vfs_quota_sync(struct super_block *sb, int type);
+int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+
+void vfs_dq_drop(struct inode *inode);
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
+int vfs_dq_quota_on_remount(struct super_block *sb);
 
 /*
  * Operations supported for diskquotas.
@@ -59,38 +63,16 @@ extern struct quotactl_ops vfs_quotactl_ops;
 
 /* It is better to call this function outside of any transaction as it might
  * need a lot of space in journal for dquot structure allocation. */
-static inline void DQUOT_INIT(struct inode *inode)
+static inline void vfs_dq_init(struct inode *inode)
 {
 	BUG_ON(!inode->i_sb);
 	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
 		inode->i_sb->dq_op->initialize(inode, -1);
 }
 
-/* The same as with DQUOT_INIT */
-static inline void DQUOT_DROP(struct inode *inode)
-{
-	/* Here we can get arbitrary inode from clear_inode() so we have
-	 * to be careful. OTOH we don't need locking as quota operations
-	 * are allowed to change only at mount time */
-	if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
-	    && inode->i_sb->dq_op->drop) {
-		int cnt;
-		/* Test before calling to rule out calls from proc and such
-                 * where we are not allowed to block. Note that this is
-		 * actually reliable test even without the lock - the caller
-		 * must assure that nobody can come after the DQUOT_DROP and
-		 * add quota pointers back anyway */
-		for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-			if (inode->i_dquot[cnt] != NODQUOT)
-				break;
-		if (cnt < MAXQUOTAS)
-			inode->i_sb->dq_op->drop(inode);
-	}
-}
-
 /* The following allocation/freeing/transfer functions *must* be called inside
  * a transaction (deadlocks possible otherwise) */
-static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	if (sb_any_quota_enabled(inode->i_sb)) {
 		/* Used space is updated in alloc_space() */
@@ -102,15 +84,15 @@ static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 	return 0;
 }
 
-static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
 {
 	int ret;
-        if (!(ret =  DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr)))
+        if (!(ret =  vfs_dq_prealloc_space_nodirty(inode, nr)))
 		mark_inode_dirty(inode);
 	return ret;
 }
 
-static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	if (sb_any_quota_enabled(inode->i_sb)) {
 		/* Used space is updated in alloc_space() */
@@ -122,25 +104,25 @@ static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 	return 0;
 }
 
-static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
 {
 	int ret;
-	if (!(ret = DQUOT_ALLOC_SPACE_NODIRTY(inode, nr)))
+	if (!(ret = vfs_dq_alloc_space_nodirty(inode, nr)))
 		mark_inode_dirty(inode);
 	return ret;
 }
 
-static inline int DQUOT_ALLOC_INODE(struct inode *inode)
+static inline int vfs_dq_alloc_inode(struct inode *inode)
 {
 	if (sb_any_quota_enabled(inode->i_sb)) {
-		DQUOT_INIT(inode);
+		vfs_dq_init(inode);
 		if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
 			return 1;
 	}
 	return 0;
 }
 
-static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	if (sb_any_quota_enabled(inode->i_sb))
 		inode->i_sb->dq_op->free_space(inode, nr);
@@ -148,35 +130,25 @@ static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 		inode_sub_bytes(inode, nr);
 }
 
-static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
 {
-	DQUOT_FREE_SPACE_NODIRTY(inode, nr);
+	vfs_dq_free_space_nodirty(inode, nr);
 	mark_inode_dirty(inode);
 }
 
-static inline void DQUOT_FREE_INODE(struct inode *inode)
+static inline void vfs_dq_free_inode(struct inode *inode)
 {
 	if (sb_any_quota_enabled(inode->i_sb))
 		inode->i_sb->dq_op->free_inode(inode, 1);
 }
 
-static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
-{
-	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
-		DQUOT_INIT(inode);
-		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
-			return 1;
-	}
-	return 0;
-}
-
 /* The following two functions cannot be called inside a transaction */
-static inline void DQUOT_SYNC(struct super_block *sb)
+static inline void vfs_dq_sync(struct super_block *sb)
 {
 	sync_dquots(sb, -1);
 }
 
-static inline int DQUOT_OFF(struct super_block *sb, int remount)
+static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	int ret = -ENOSYS;
 
@@ -185,21 +157,6 @@ static inline int DQUOT_OFF(struct super_block *sb, int remount)
 	return ret;
 }
 
-static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
-{
-	int cnt;
-	int ret = 0, err;
-
-	if (!sb->s_qcop || !sb->s_qcop->quota_on)
-		return -ENOSYS;
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
-		if (err < 0 && !ret)
-			ret = err;
-	}
-	return ret;
-}
-
 #else
 
 /*
@@ -208,113 +165,144 @@ static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
 #define sb_dquot_ops				(NULL)
 #define sb_quotactl_ops				(NULL)
 
-static inline void DQUOT_INIT(struct inode *inode)
+static inline void vfs_dq_init(struct inode *inode)
 {
 }
 
-static inline void DQUOT_DROP(struct inode *inode)
+static inline void vfs_dq_drop(struct inode *inode)
 {
 }
 
-static inline int DQUOT_ALLOC_INODE(struct inode *inode)
+static inline int vfs_dq_alloc_inode(struct inode *inode)
 {
 	return 0;
 }
 
-static inline void DQUOT_FREE_INODE(struct inode *inode)
+static inline void vfs_dq_free_inode(struct inode *inode)
 {
 }
 
-static inline void DQUOT_SYNC(struct super_block *sb)
+static inline void vfs_dq_sync(struct super_block *sb)
 {
 }
 
-static inline int DQUOT_OFF(struct super_block *sb, int remount)
+static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	return 0;
 }
 
-static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
+static inline int vfs_dq_quota_on_remount(struct super_block *sb)
 {
 	return 0;
 }
 
-static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
+static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
 {
 	return 0;
 }
 
-static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	inode_add_bytes(inode, nr);
 	return 0;
 }
 
-static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
 {
-	DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr);
+	vfs_dq_prealloc_space_nodirty(inode, nr);
 	mark_inode_dirty(inode);
 	return 0;
 }
 
-static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	inode_add_bytes(inode, nr);
 	return 0;
 }
 
-static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
 {
-	DQUOT_ALLOC_SPACE_NODIRTY(inode, nr);
+	vfs_dq_alloc_space_nodirty(inode, nr);
 	mark_inode_dirty(inode);
 	return 0;
 }
 
-static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
 	inode_sub_bytes(inode, nr);
 }
 
-static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
 {
-	DQUOT_FREE_SPACE_NODIRTY(inode, nr);
+	vfs_dq_free_space_nodirty(inode, nr);
 	mark_inode_dirty(inode);
 }	
 
 #endif /* CONFIG_QUOTA */
 
-static inline int DQUOT_PREALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	return DQUOT_PREALLOC_SPACE_NODIRTY(inode,
+	return vfs_dq_prealloc_space_nodirty(inode,
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline int DQUOT_PREALLOC_BLOCK(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr)
 {
-	return DQUOT_PREALLOC_SPACE(inode,
+	return vfs_dq_prealloc_space(inode,
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline int DQUOT_ALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	return DQUOT_ALLOC_SPACE_NODIRTY(inode,
+ 	return vfs_dq_alloc_space_nodirty(inode,
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline int DQUOT_ALLOC_BLOCK(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
 {
-	return DQUOT_ALLOC_SPACE(inode,
+	return vfs_dq_alloc_space(inode,
 			nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline void DQUOT_FREE_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	DQUOT_FREE_SPACE_NODIRTY(inode, nr << inode->i_sb->s_blocksize_bits);
+	vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
 }
 
-static inline void DQUOT_FREE_BLOCK(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
 {
-	DQUOT_FREE_SPACE(inode, nr << inode->i_sb->s_blocksize_bits);
+	vfs_dq_free_space(inode, nr << inode->i_sb->s_blocksize_bits);
 }
 
+/*
+ * Define uppercase equivalents for compatibility with old function names
+ * Can go away when we think all users have been converted (15/04/2008)
+ */
+#define DQUOT_INIT(inode) vfs_dq_init(inode)
+#define DQUOT_DROP(inode) vfs_dq_drop(inode)
+#define DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr) \
+				vfs_dq_prealloc_space_nodirty(inode, nr)
+#define DQUOT_PREALLOC_SPACE(inode, nr) vfs_dq_prealloc_space(inode, nr)
+#define DQUOT_ALLOC_SPACE_NODIRTY(inode, nr) \
+				vfs_dq_alloc_space_nodirty(inode, nr)
+#define DQUOT_ALLOC_SPACE(inode, nr) vfs_dq_alloc_space(inode, nr)
+#define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr) \
+				vfs_dq_prealloc_block_nodirty(inode, nr)
+#define DQUOT_PREALLOC_BLOCK(inode, nr) vfs_dq_prealloc_block(inode, nr)
+#define DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr) \
+				vfs_dq_alloc_block_nodirty(inode, nr)
+#define DQUOT_ALLOC_BLOCK(inode, nr) vfs_dq_alloc_block(inode, nr)
+#define DQUOT_ALLOC_INODE(inode) vfs_dq_alloc_inode(inode)
+#define DQUOT_FREE_SPACE_NODIRTY(inode, nr) \
+				vfs_dq_free_space_nodirty(inode, nr)
+#define DQUOT_FREE_SPACE(inode, nr) vfs_dq_free_space(inode, nr)
+#define DQUOT_FREE_BLOCK_NODIRTY(inode, nr) \
+				vfs_dq_free_block_nodirty(inode, nr)
+#define DQUOT_FREE_BLOCK(inode, nr) vfs_dq_free_block(inode, nr)
+#define DQUOT_FREE_INODE(inode) vfs_dq_free_inode(inode)
+#define DQUOT_TRANSFER(inode, iattr) vfs_dq_transfer(inode, iattr)
+#define DQUOT_SYNC(sb) vfs_dq_sync(sb)
+#define DQUOT_OFF(sb, remount) vfs_dq_off(sb, remount)
+#define DQUOT_ON_REMOUNT(sb) vfs_dq_quota_on_remount(sb)
+
 #endif /* _LINUX_QUOTAOPS_ */
-- 
cgit v1.2.3


From 02a55ca87185e114e5d298a8d00608501dbabf67 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:50 -0700
Subject: quota: cleanup loop in sync_dquots()

Make loop in sync_dquots() checking whether there's something to write
more readable, remove useless variable and macro info_any_dirty() which
is used only in this place.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: "Vegard Nossum" <vegard.nossum@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/quota.c            | 18 ++++++++++++------
 include/linux/quota.h |  2 --
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota.c b/fs/quota.c
index db1cc9f3c7aa..7f4386ebc23a 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -186,7 +186,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
 
 void sync_dquots(struct super_block *sb, int type)
 {
-	int cnt, dirty;
+	int cnt;
 
 	if (sb) {
 		if (sb->s_qcop->quota_sync)
@@ -198,11 +198,17 @@ void sync_dquots(struct super_block *sb, int type)
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
 		/* This test just improves performance so it needn't be reliable... */
-		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
-			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
-			    && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
-				dirty = 1;
-		if (!dirty)
+		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+			if (type != -1 && type != cnt)
+				continue;
+			if (!sb_has_quota_enabled(sb, cnt))
+				continue;
+			if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
+			    list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
+				continue;
+			break;
+		}
+		if (cnt == MAXQUOTAS)
 			continue;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index dcddfb200947..6f1d97ddf828 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -224,8 +224,6 @@ struct super_block;
 
 extern void mark_info_dirty(struct super_block *sb, int type);
 #define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
-#define info_any_dquot_dirty(info) (!list_empty(&(info)->dqi_dirty_list))
-#define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info))
 
 #define sb_dqopt(sb) (&(sb)->s_dquot)
 #define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
-- 
cgit v1.2.3


From 74abb9890dafb12a50dc140de215ed477beb1b88 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:51 -0700
Subject: quota: move function-macros from quota.h to quotaops.h

Move declarations of some macros, which should be in fact functions to
quotaops.h.  This way they can be later converted to inline functions
because we can now use declarations from quota.h.  Also add necessary
includes of quotaops.h to a few files.

[akpm@linux-foundation.org: fix JFS build]
[akpm@linux-foundation.org: fix UFS build]
[vegard.nossum@gmail.com: fix QUOTA=n build]
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Arjen Pool <arjenpool@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext2/super.c          |  1 +
 fs/jfs/super.c           |  1 +
 fs/quota_v1.c            |  1 +
 fs/quota_v2.c            |  1 +
 fs/reiserfs/super.c      |  1 +
 fs/ufs/super.c           |  1 +
 include/linux/quota.h    | 22 +++-------------------
 include/linux/quotaops.h | 26 ++++++++++++++++++++++++++
 8 files changed, 35 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ef50cbc792db..31308a3b0b8b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include <linux/log2.h>
+#include <linux/quotaops.h>
 #include <asm/uaccess.h>
 #include "ext2.h"
 #include "xattr.h"
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0288e6d7936a..359c091d8965 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -22,6 +22,7 @@
 #include <linux/parser.h>
 #include <linux/completion.h>
 #include <linux/vfs.h>
+#include <linux/quotaops.h>
 #include <linux/mount.h>
 #include <linux/moduleparam.h>
 #include <linux/kthread.h>
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index a6cf9269105c..5ae15b13eeb0 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -1,6 +1,7 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/quota.h>
+#include <linux/quotaops.h>
 #include <linux/dqblk_v1.h>
 #include <linux/quotaio_v1.h>
 #include <linux/kernel.h>
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 234ada903633..b53827dc02d9 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/quotaops.h>
 
 #include <asm/byteorder.h>
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index a10a6d2a8870..2ec748ba0bd3 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -22,6 +22,7 @@
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/exportfs.h>
+#include <linux/quotaops.h>
 #include <linux/vfs.h>
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 506f724055c2..227c9d700040 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -76,6 +76,7 @@
 
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/stat.h>
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 6f1d97ddf828..f9983ea0ff88 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -41,9 +41,6 @@
 #define __DQUOT_VERSION__	"dquot_6.5.1"
 #define __DQUOT_NUM_VERSION__	6*10000+5*100+1
 
-typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
-typedef __u64 qsize_t;          /* Type in which we store sizes */
-
 /* Size of blocks in which are counted size limits */
 #define QUOTABLOCK_BITS 10
 #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -172,6 +169,9 @@ enum {
 
 #include <asm/atomic.h>
 
+typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
+typedef __u64 qsize_t;          /* Type in which we store sizes */
+
 extern spinlock_t dq_data_lock;
 
 /* Maximal numbers of writes for quota operation (insert/delete/update)
@@ -225,9 +225,6 @@ struct super_block;
 extern void mark_info_dirty(struct super_block *sb, int type);
 #define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
 
-#define sb_dqopt(sb) (&(sb)->s_dquot)
-#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
-
 struct dqstats {
 	int lookups;
 	int drops;
@@ -335,19 +332,6 @@ struct quota_info {
 	struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
 };
 
-#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
-	(sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
-
-#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
-				  sb_has_quota_enabled(sb, GRPQUOTA))
-
-#define sb_has_quota_suspended(sb, type) \
-	((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
-			      (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
-
-#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
-				  sb_has_quota_suspended(sb, GRPQUOTA))
-
 int register_quota_format(struct quota_format_type *fmt);
 void unregister_quota_format(struct quota_format_type *fmt);
 
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 0c8f9fe462af..38218c1334b1 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -14,6 +14,8 @@
 
 #include <linux/fs.h>
 
+#define sb_dqopt(sb) (&(sb)->s_dquot)
+
 #if defined(CONFIG_QUOTA)
 
 /*
@@ -52,6 +54,25 @@ void vfs_dq_drop(struct inode *inode);
 int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
 
+#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
+
+/*
+ * Functions for checking status of quota
+ */
+
+#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
+	(sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
+
+#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
+				  sb_has_quota_enabled(sb, GRPQUOTA))
+
+#define sb_has_quota_suspended(sb, type) \
+	((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
+			      (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
+
+#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
+				  sb_has_quota_suspended(sb, GRPQUOTA))
+
 /*
  * Operations supported for diskquotas.
  */
@@ -159,6 +180,11 @@ static inline int vfs_dq_off(struct super_block *sb, int remount)
 
 #else
 
+#define sb_has_quota_enabled(sb, type) 0
+#define sb_any_quota_enabled(sb) 0
+#define sb_has_quota_suspended(sb, type) 0
+#define sb_any_quota_suspended(sb) 0
+
 /*
  * NO-OP when quota not configured.
  */
-- 
cgit v1.2.3


From 03b063436ca1076301de58d9d628f610ab5404ad Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:52 -0700
Subject: quota: convert macros to inline functions

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/quota.h    |  5 +++-
 include/linux/quotaops.h | 65 ++++++++++++++++++++++++++++++++++++------------
 2 files changed, 53 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/quota.h b/include/linux/quota.h
index f9983ea0ff88..4e004fef8134 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -223,7 +223,10 @@ struct super_block;
 #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B)	/* Is info dirty? */
 
 extern void mark_info_dirty(struct super_block *sb, int type);
-#define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
+static inline int info_dirty(struct mem_dqinfo *info)
+{
+	return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
+}
 
 struct dqstats {
 	int lookups;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 38218c1334b1..742187f7a05c 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -11,10 +11,12 @@
 #define _LINUX_QUOTAOPS_
 
 #include <linux/smp_lock.h>
-
 #include <linux/fs.h>
 
-#define sb_dqopt(sb) (&(sb)->s_dquot)
+static inline struct quota_info *sb_dqopt(struct super_block *sb)
+{
+	return &sb->s_dquot;
+}
 
 #if defined(CONFIG_QUOTA)
 
@@ -54,24 +56,40 @@ void vfs_dq_drop(struct inode *inode);
 int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
 
-#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
+static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
+{
+	return sb_dqopt(sb)->info + type;
+}
 
 /*
  * Functions for checking status of quota
  */
 
-#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
-	(sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
+static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+{
+	if (type == USRQUOTA)
+		return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED;
+	return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED;
+}
 
-#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
-				  sb_has_quota_enabled(sb, GRPQUOTA))
+static inline int sb_any_quota_enabled(struct super_block *sb)
+{
+	return sb_has_quota_enabled(sb, USRQUOTA) ||
+		sb_has_quota_enabled(sb, GRPQUOTA);
+}
 
-#define sb_has_quota_suspended(sb, type) \
-	((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
-			      (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
+static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+{
+	if (type == USRQUOTA)
+		return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED;
+	return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED;
+}
 
-#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
-				  sb_has_quota_suspended(sb, GRPQUOTA))
+static inline int sb_any_quota_suspended(struct super_block *sb)
+{
+	return sb_has_quota_suspended(sb, USRQUOTA) ||
+		sb_has_quota_suspended(sb, GRPQUOTA);
+}
 
 /*
  * Operations supported for diskquotas.
@@ -180,10 +198,25 @@ static inline int vfs_dq_off(struct super_block *sb, int remount)
 
 #else
 
-#define sb_has_quota_enabled(sb, type) 0
-#define sb_any_quota_enabled(sb) 0
-#define sb_has_quota_suspended(sb, type) 0
-#define sb_any_quota_suspended(sb) 0
+static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+{
+	return 0;
+}
+
+static inline int sb_any_quota_enabled(struct super_block *sb)
+{
+	return 0;
+}
+
+static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+{
+	return 0;
+}
+
+static inline int sb_any_quota_suspended(struct super_block *sb)
+{
+	return 0;
+}
 
 /*
  * NO-OP when quota not configured.
-- 
cgit v1.2.3


From 657d3bfa98e542271b449f8cd84c7501ae2b2255 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 25 Jul 2008 01:46:52 -0700
Subject: quota: implement sending information via netlink about user below
 quota

Sometimes it may be useful for userspace to know (e.g.  for some hosting
guys) that some user stopped exceeding his hardlimit or softlimit in
quotas.  Implement sending of such events to userspace via quota netlink
protocol so that they don't have to poll for such events.  Based on idea
and initial implementation by Vladislav Bogdanov.

Cc: Vladislav Bogdanov <slava@nsys.by>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dquot.c            | 60 +++++++++++++++++++++++++++++++++++++++++++++------
 include/linux/quota.h |  4 ++++
 2 files changed, 58 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index 0bcaf970bbb4..1346eebe74ce 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -889,7 +889,10 @@ static void print_warning(struct dquot *dquot, const int warntype)
 	char *msg = NULL;
 	struct tty_struct *tty;
 
-	if (!need_print_warning(dquot))
+	if (warntype == QUOTA_NL_IHARDBELOW ||
+	    warntype == QUOTA_NL_ISOFTBELOW ||
+	    warntype == QUOTA_NL_BHARDBELOW ||
+	    warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
 		return;
 
 	mutex_lock(&tty_mutex);
@@ -1097,6 +1100,35 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	return QUOTA_OK;
 }
 
+static int info_idq_free(struct dquot *dquot, ulong inodes)
+{
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+	    dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
+		return QUOTA_NL_NOWARN;
+
+	if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
+		return QUOTA_NL_ISOFTBELOW;
+	if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
+	    dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit)
+		return QUOTA_NL_IHARDBELOW;
+	return QUOTA_NL_NOWARN;
+}
+
+static int info_bdq_free(struct dquot *dquot, qsize_t space)
+{
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+	    toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+		return QUOTA_NL_NOWARN;
+
+	if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
+	    dquot->dq_dqb.dqb_bsoftlimit)
+		return QUOTA_NL_BSOFTBELOW;
+	if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
+	    toqb(dquot->dq_dqb.dqb_curspace - space) <
+						dquot->dq_dqb.dqb_bhardlimit)
+		return QUOTA_NL_BHARDBELOW;
+	return QUOTA_NL_NOWARN;
+}
 /*
  *	Initialize quota pointers in inode
  *	Transaction must be started at entry
@@ -1284,6 +1316,7 @@ warn_put_all:
 int dquot_free_space(struct inode *inode, qsize_t number)
 {
 	unsigned int cnt;
+	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1292,6 +1325,7 @@ out_sub:
 		inode_sub_bytes(inode, number);
 		return QUOTA_OK;
 	}
+
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	/* Now recheck reliably when holding dqptr_sem */
 	if (IS_NOQUOTA(inode)) {
@@ -1302,6 +1336,7 @@ out_sub:
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
+		warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
 		dquot_decr_space(inode->i_dquot[cnt], number);
 	}
 	inode_sub_bytes(inode, number);
@@ -1310,6 +1345,7 @@ out_sub:
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (inode->i_dquot[cnt])
 			mark_dquot_dirty(inode->i_dquot[cnt]);
+	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
@@ -1320,11 +1356,13 @@ out_sub:
 int dquot_free_inode(const struct inode *inode, unsigned long number)
 {
 	unsigned int cnt;
+	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return QUOTA_OK;
+
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	/* Now recheck reliably when holding dqptr_sem */
 	if (IS_NOQUOTA(inode)) {
@@ -1335,6 +1373,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
+		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
 		dquot_decr_inodes(inode->i_dquot[cnt], number);
 	}
 	spin_unlock(&dq_data_lock);
@@ -1342,6 +1381,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (inode->i_dquot[cnt])
 			mark_dquot_dirty(inode->i_dquot[cnt]);
+	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
@@ -1359,7 +1399,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	struct dquot *transfer_to[MAXQUOTAS];
 	int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
 	    chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
-	char warntype[MAXQUOTAS];
+	char warntype_to[MAXQUOTAS];
+	char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1368,7 +1409,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	/* Clear the arrays */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
-		warntype[cnt] = QUOTA_NL_NOWARN;
+		warntype_to[cnt] = QUOTA_NL_NOWARN;
 	}
 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	/* Now recheck reliably when holding dqptr_sem */
@@ -1400,8 +1441,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		if (transfer_to[cnt] == NODQUOT)
 			continue;
 		transfer_from[cnt] = inode->i_dquot[cnt];
-		if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA ||
-		    check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA)
+		if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
+		    NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
+		    warntype_to + cnt) == NO_QUOTA)
 			goto warn_put_all;
 	}
 
@@ -1417,6 +1459,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 
 		/* Due to IO error we might not have transfer_from[] structure */
 		if (transfer_from[cnt]) {
+			warntype_from_inodes[cnt] =
+				info_idq_free(transfer_from[cnt], 1);
+			warntype_from_space[cnt] =
+				info_bdq_free(transfer_from[cnt], space);
 			dquot_decr_inodes(transfer_from[cnt], 1);
 			dquot_decr_space(transfer_from[cnt], space);
 		}
@@ -1436,7 +1482,9 @@ warn_put_all:
 		if (transfer_to[cnt])
 			mark_dquot_dirty(transfer_to[cnt]);
 	}
-	flush_warnings(transfer_to, warntype);
+	flush_warnings(transfer_to, warntype_to);
+	flush_warnings(transfer_from, warntype_from_inodes);
+	flush_warnings(transfer_from, warntype_from_space);
 	
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 4e004fef8134..376a05048bc5 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -135,6 +135,10 @@ struct if_dqinfo {
 #define QUOTA_NL_BHARDWARN 4		/* Block hardlimit reached */
 #define QUOTA_NL_BSOFTLONGWARN 5	/* Block grace time expired */
 #define QUOTA_NL_BSOFTWARN 6		/* Block softlimit reached */
+#define QUOTA_NL_IHARDBELOW 7		/* Usage got below inode hardlimit */
+#define QUOTA_NL_ISOFTBELOW 8		/* Usage got below inode softlimit */
+#define QUOTA_NL_BHARDBELOW 9		/* Usage got below block hardlimit */
+#define QUOTA_NL_BSOFTBELOW 10		/* Usage got below block softlimit */
 
 enum {
 	QUOTA_NL_C_UNSPEC,
-- 
cgit v1.2.3


From f2992db2a4f7ae10f61d5bc68c7c1528cec639e2 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:46:55 -0700
Subject: Mark res_counter_charge(_locked) with __must_check

Ignoring their return values may result in counter underflow in the future -
when the value charged will be uncharged (or in "leaks" - when the value is
not uncharged).

This also prevents from using charging routines to decrement the
counter value (i.e. uncharge it) ;)

(Current code works OK with res_counter, however :) )

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 6d9e1fca098c..125660e7793f 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -95,8 +95,10 @@ void res_counter_init(struct res_counter *counter);
  * counter->limit _locked call expects the counter->lock to be taken
  */
 
-int res_counter_charge_locked(struct res_counter *counter, unsigned long val);
-int res_counter_charge(struct res_counter *counter, unsigned long val);
+int __must_check res_counter_charge_locked(struct res_counter *counter,
+		unsigned long val);
+int __must_check res_counter_charge(struct res_counter *counter,
+		unsigned long val);
 
 /*
  * uncharge - tell that some portion of the resource is released
-- 
cgit v1.2.3


From ce16b49d37e748574f7fabc2726268d542d0aa1a Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:46:57 -0700
Subject: cgroup files: clean up whitespace in struct cftype

This patch removes some extraneous spaces from method declarations in
struct cftype, to fit in with conventional kernel style.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e155aa78d859..88a734edccbc 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -205,48 +205,48 @@ struct cftype {
 	 * subsystem, followed by a period */
 	char name[MAX_CFTYPE_NAME];
 	int private;
-	int (*open) (struct inode *inode, struct file *file);
-	ssize_t (*read) (struct cgroup *cgrp, struct cftype *cft,
-			 struct file *file,
-			 char __user *buf, size_t nbytes, loff_t *ppos);
+	int (*open)(struct inode *inode, struct file *file);
+	ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
+			struct file *file,
+			char __user *buf, size_t nbytes, loff_t *ppos);
 	/*
 	 * read_u64() is a shortcut for the common case of returning a
 	 * single integer. Use it in place of read()
 	 */
-	u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft);
+	u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
 	/*
 	 * read_s64() is a signed version of read_u64()
 	 */
-	s64 (*read_s64) (struct cgroup *cgrp, struct cftype *cft);
+	s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
 	/*
 	 * read_map() is used for defining a map of key/value
 	 * pairs. It should call cb->fill(cb, key, value) for each
 	 * entry. The key/value pairs (and their ordering) should not
 	 * change between reboots.
 	 */
-	int (*read_map) (struct cgroup *cont, struct cftype *cft,
-			 struct cgroup_map_cb *cb);
+	int (*read_map)(struct cgroup *cont, struct cftype *cft,
+			struct cgroup_map_cb *cb);
 	/*
 	 * read_seq_string() is used for outputting a simple sequence
 	 * using seqfile.
 	 */
-	int (*read_seq_string) (struct cgroup *cont, struct cftype *cft,
-			 struct seq_file *m);
+	int (*read_seq_string)(struct cgroup *cont, struct cftype *cft,
+			       struct seq_file *m);
 
-	ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft,
-			  struct file *file,
-			  const char __user *buf, size_t nbytes, loff_t *ppos);
+	ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
+			 struct file *file,
+			 const char __user *buf, size_t nbytes, loff_t *ppos);
 
 	/*
 	 * write_u64() is a shortcut for the common case of accepting
 	 * a single integer (as parsed by simple_strtoull) from
 	 * userspace. Use in place of write(); return 0 or error.
 	 */
-	int (*write_u64) (struct cgroup *cgrp, struct cftype *cft, u64 val);
+	int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
 	/*
 	 * write_s64() is a signed version of write_u64()
 	 */
-	int (*write_s64) (struct cgroup *cgrp, struct cftype *cft, s64 val);
+	int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
 
 	/*
 	 * trigger() callback can be used to get some kick from the
@@ -256,7 +256,7 @@ struct cftype {
 	 */
 	int (*trigger)(struct cgroup *cgrp, unsigned int event);
 
-	int (*release) (struct inode *inode, struct file *file);
+	int (*release)(struct inode *inode, struct file *file);
 };
 
 struct cgroup_scanner {
-- 
cgit v1.2.3


From db3b14978abc02041046ed8353f0899cb58ffffc Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:46:58 -0700
Subject: cgroup files: add write_string cgroup control file method

This patch adds a write_string() method for cgroups control files. The
semantics are that a buffer is copied from userspace to kernelspace
and the handler function invoked on that buffer.  The buffer is
guaranteed to be nul-terminated, and no longer than max_write_len
(defaulting to 64 bytes if unspecified). Later patches will convert
existing raw file write handlers in control group subsystems to use
this method.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Balbir Singh <balbir@in.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 14 ++++++++++++++
 kernel/cgroup.c        | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 88a734edccbc..f5379455bb59 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -205,6 +205,13 @@ struct cftype {
 	 * subsystem, followed by a period */
 	char name[MAX_CFTYPE_NAME];
 	int private;
+
+	/*
+	 * If non-zero, defines the maximum length of string that can
+	 * be passed to write_string; defaults to 64
+	 */
+	size_t max_write_len;
+
 	int (*open)(struct inode *inode, struct file *file);
 	ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
 			struct file *file,
@@ -248,6 +255,13 @@ struct cftype {
 	 */
 	int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
 
+	/*
+	 * write_string() is passed a nul-terminated kernelspace
+	 * buffer of maximum length determined by max_write_len.
+	 * Returns 0 or -ve error code.
+	 */
+	int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
+			    const char *buffer);
 	/*
 	 * trigger() callback can be used to get some kick from the
 	 * userspace, when the actual string written is not important
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 70d083c6fb6b..3a99cc2df860 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1363,6 +1363,39 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
 	return retval;
 }
 
+static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
+				   struct file *file,
+				   const char __user *userbuf,
+				   size_t nbytes, loff_t *unused_ppos)
+{
+	char local_buffer[64];
+	int retval = 0;
+	size_t max_bytes = cft->max_write_len;
+	char *buffer = local_buffer;
+
+	if (!max_bytes)
+		max_bytes = sizeof(local_buffer) - 1;
+	if (nbytes >= max_bytes)
+		return -E2BIG;
+	/* Allocate a dynamic buffer if we need one */
+	if (nbytes >= sizeof(local_buffer)) {
+		buffer = kmalloc(nbytes + 1, GFP_KERNEL);
+		if (buffer == NULL)
+			return -ENOMEM;
+	}
+	if (nbytes && copy_from_user(buffer, userbuf, nbytes))
+		return -EFAULT;
+
+	buffer[nbytes] = 0;     /* nul-terminate */
+	strstrip(buffer);
+	retval = cft->write_string(cgrp, cft, buffer);
+	if (!retval)
+		retval = nbytes;
+	if (buffer != local_buffer)
+		kfree(buffer);
+	return retval;
+}
+
 static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
 					   struct cftype *cft,
 					   struct file *file,
@@ -1440,6 +1473,8 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
 		return cft->write(cgrp, cft, file, buf, nbytes, ppos);
 	if (cft->write_u64 || cft->write_s64)
 		return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
+	if (cft->write_string)
+		return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
 	if (cft->trigger) {
 		int ret = cft->trigger(cgrp, (unsigned int)cft->private);
 		return ret ? ret : nbytes;
-- 
cgit v1.2.3


From e788e066c651b1bbf4a927dc95395c1aa13be436 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:46:59 -0700
Subject: cgroup files: move the release_agent file to use typed handlers

Adds cgroup_release_agent_write() and cgroup_release_agent_show()
methods to handle writing/reading the path to a cgroup hierarchy's
release agent. As a result, cgroup_common_file_read() is now unnecessary.

As part of the change, a previously-tolerated race in
cgroup_release_agent() is avoided by copying the current
release_agent_path prior to calling call_usermode_helper().

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |   2 +
 kernel/cgroup.c        | 125 ++++++++++++++++++++++---------------------------
 2 files changed, 59 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f5379455bb59..e78377a91a74 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -295,6 +295,8 @@ int cgroup_add_files(struct cgroup *cgrp,
 
 int cgroup_is_removed(const struct cgroup *cgrp);
 
+int cgroup_lock_live_group(struct cgroup *cgrp);
+
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
 
 int cgroup_task_count(const struct cgroup *cgrp);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3a99cc2df860..0120b5d67a73 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -89,11 +89,7 @@ struct cgroupfs_root {
 	/* Hierarchy-specific flags */
 	unsigned long flags;
 
-	/* The path to use for release notifications. No locking
-	 * between setting and use - so if userspace updates this
-	 * while child cgroups exist, you could miss a
-	 * notification. We ensure that it's always a valid
-	 * NUL-terminated string */
+	/* The path to use for release notifications. */
 	char release_agent_path[PATH_MAX];
 };
 
@@ -1329,6 +1325,45 @@ enum cgroup_filetype {
 	FILE_RELEASE_AGENT,
 };
 
+/**
+ * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
+ * @cgrp: the cgroup to be checked for liveness
+ *
+ * Returns true (with lock held) on success, or false (with no lock
+ * held) on failure.
+ */
+int cgroup_lock_live_group(struct cgroup *cgrp)
+{
+	mutex_lock(&cgroup_mutex);
+	if (cgroup_is_removed(cgrp)) {
+		mutex_unlock(&cgroup_mutex);
+		return false;
+	}
+	return true;
+}
+
+static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
+				      const char *buffer)
+{
+	BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+	strcpy(cgrp->root->release_agent_path, buffer);
+	mutex_unlock(&cgroup_mutex);
+	return 0;
+}
+
+static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
+				     struct seq_file *seq)
+{
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+	seq_puts(seq, cgrp->root->release_agent_path);
+	seq_putc(seq, '\n');
+	mutex_unlock(&cgroup_mutex);
+	return 0;
+}
+
 static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
 				struct file *file,
 				const char __user *userbuf,
@@ -1443,10 +1478,6 @@ static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
 		else
 			clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
 		break;
-	case FILE_RELEASE_AGENT:
-		BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
-		strcpy(cgrp->root->release_agent_path, buffer);
-		break;
 	default:
 		retval = -EINVAL;
 		goto out2;
@@ -1506,49 +1537,6 @@ static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
 	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
 }
 
-static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
-					  struct cftype *cft,
-					  struct file *file,
-					  char __user *buf,
-					  size_t nbytes, loff_t *ppos)
-{
-	enum cgroup_filetype type = cft->private;
-	char *page;
-	ssize_t retval = 0;
-	char *s;
-
-	if (!(page = (char *)__get_free_page(GFP_KERNEL)))
-		return -ENOMEM;
-
-	s = page;
-
-	switch (type) {
-	case FILE_RELEASE_AGENT:
-	{
-		struct cgroupfs_root *root;
-		size_t n;
-		mutex_lock(&cgroup_mutex);
-		root = cgrp->root;
-		n = strnlen(root->release_agent_path,
-			    sizeof(root->release_agent_path));
-		n = min(n, (size_t) PAGE_SIZE);
-		strncpy(s, root->release_agent_path, n);
-		mutex_unlock(&cgroup_mutex);
-		s += n;
-		break;
-	}
-	default:
-		retval = -EINVAL;
-		goto out;
-	}
-	*s++ = '\n';
-
-	retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
-out:
-	free_page((unsigned long)page);
-	return retval;
-}
-
 static ssize_t cgroup_file_read(struct file *file, char __user *buf,
 				   size_t nbytes, loff_t *ppos)
 {
@@ -1606,6 +1594,7 @@ int cgroup_seqfile_release(struct inode *inode, struct file *file)
 
 static struct file_operations cgroup_seqfile_operations = {
 	.read = seq_read,
+	.write = cgroup_file_write,
 	.llseek = seq_lseek,
 	.release = cgroup_seqfile_release,
 };
@@ -2283,8 +2272,9 @@ static struct cftype files[] = {
 
 static struct cftype cft_release_agent = {
 	.name = "release_agent",
-	.read = cgroup_common_file_read,
-	.write = cgroup_common_file_write,
+	.read_seq_string = cgroup_release_agent_show,
+	.write_string = cgroup_release_agent_write,
+	.max_write_len = PATH_MAX,
 	.private = FILE_RELEASE_AGENT,
 };
 
@@ -3111,27 +3101,24 @@ static void cgroup_release_agent(struct work_struct *work)
 	while (!list_empty(&release_list)) {
 		char *argv[3], *envp[3];
 		int i;
-		char *pathbuf;
+		char *pathbuf = NULL, *agentbuf = NULL;
 		struct cgroup *cgrp = list_entry(release_list.next,
 						    struct cgroup,
 						    release_list);
 		list_del_init(&cgrp->release_list);
 		spin_unlock(&release_list_lock);
 		pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-		if (!pathbuf) {
-			spin_lock(&release_list_lock);
-			continue;
-		}
-
-		if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
-			kfree(pathbuf);
-			spin_lock(&release_list_lock);
-			continue;
-		}
+		if (!pathbuf)
+			goto continue_free;
+		if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
+			goto continue_free;
+		agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+		if (!agentbuf)
+			goto continue_free;
 
 		i = 0;
-		argv[i++] = cgrp->root->release_agent_path;
-		argv[i++] = (char *)pathbuf;
+		argv[i++] = agentbuf;
+		argv[i++] = pathbuf;
 		argv[i] = NULL;
 
 		i = 0;
@@ -3145,8 +3132,10 @@ static void cgroup_release_agent(struct work_struct *work)
 		 * be a slow process */
 		mutex_unlock(&cgroup_mutex);
 		call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-		kfree(pathbuf);
 		mutex_lock(&cgroup_mutex);
+ continue_free:
+		kfree(pathbuf);
+		kfree(agentbuf);
 		spin_lock(&release_list_lock);
 	}
 	spin_unlock(&release_list_lock);
-- 
cgit v1.2.3


From 84eea842886ac35020be6043e04748ed22014359 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:47:00 -0700
Subject: cgroups: misc cleanups to write_string patchset

This patch contains cleanups suggested by reviewers for the recent
write_string() patchset:

- pair cgroup_lock_live_group() with cgroup_unlock() in cgroup.c for
  clarity, rather than directly unlocking cgroup_mutex.

- make the return type of cgroup_lock_live_group() a bool

- use a #define'd constant for the local buffer size in read/write functions

Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |  4 ++--
 kernel/cgroup.c        | 21 ++++++++++++---------
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e78377a91a74..cc59d3a21d87 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -21,11 +21,13 @@
 struct cgroupfs_root;
 struct cgroup_subsys;
 struct inode;
+struct cgroup;
 
 extern int cgroup_init_early(void);
 extern int cgroup_init(void);
 extern void cgroup_init_smp(void);
 extern void cgroup_lock(void);
+extern bool cgroup_lock_live_group(struct cgroup *cgrp);
 extern void cgroup_unlock(void);
 extern void cgroup_fork(struct task_struct *p);
 extern void cgroup_fork_callbacks(struct task_struct *p);
@@ -295,8 +297,6 @@ int cgroup_add_files(struct cgroup *cgrp,
 
 int cgroup_is_removed(const struct cgroup *cgrp);
 
-int cgroup_lock_live_group(struct cgroup *cgrp);
-
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
 
 int cgroup_task_count(const struct cgroup *cgrp);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0120b5d67a73..a14122ecaa5e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1329,10 +1329,10 @@ enum cgroup_filetype {
  * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
  * @cgrp: the cgroup to be checked for liveness
  *
- * Returns true (with lock held) on success, or false (with no lock
- * held) on failure.
+ * On success, returns true; the lock should be later released with
+ * cgroup_unlock(). On failure returns false with no lock held.
  */
-int cgroup_lock_live_group(struct cgroup *cgrp)
+bool cgroup_lock_live_group(struct cgroup *cgrp)
 {
 	mutex_lock(&cgroup_mutex);
 	if (cgroup_is_removed(cgrp)) {
@@ -1349,7 +1349,7 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
 	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
 	strcpy(cgrp->root->release_agent_path, buffer);
-	mutex_unlock(&cgroup_mutex);
+	cgroup_unlock();
 	return 0;
 }
 
@@ -1360,16 +1360,19 @@ static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
 		return -ENODEV;
 	seq_puts(seq, cgrp->root->release_agent_path);
 	seq_putc(seq, '\n');
-	mutex_unlock(&cgroup_mutex);
+	cgroup_unlock();
 	return 0;
 }
 
+/* A buffer size big enough for numbers or short strings */
+#define CGROUP_LOCAL_BUFFER_SIZE 64
+
 static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
 				struct file *file,
 				const char __user *userbuf,
 				size_t nbytes, loff_t *unused_ppos)
 {
-	char buffer[64];
+	char buffer[CGROUP_LOCAL_BUFFER_SIZE];
 	int retval = 0;
 	char *end;
 
@@ -1403,7 +1406,7 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
 				   const char __user *userbuf,
 				   size_t nbytes, loff_t *unused_ppos)
 {
-	char local_buffer[64];
+	char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
 	int retval = 0;
 	size_t max_bytes = cft->max_write_len;
 	char *buffer = local_buffer;
@@ -1518,7 +1521,7 @@ static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
 			       char __user *buf, size_t nbytes,
 			       loff_t *ppos)
 {
-	char tmp[64];
+	char tmp[CGROUP_LOCAL_BUFFER_SIZE];
 	u64 val = cft->read_u64(cgrp, cft);
 	int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
 
@@ -1530,7 +1533,7 @@ static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
 			       char __user *buf, size_t nbytes,
 			       loff_t *ppos)
 {
-	char tmp[64];
+	char tmp[CGROUP_LOCAL_BUFFER_SIZE];
 	s64 val = cft->read_s64(cgrp, cft);
 	int len = sprintf(tmp, "%lld\n", (long long) val);
 
-- 
cgit v1.2.3


From 856c13aa1ff6136c1968414fdea5938ea9d5ebf2 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 25 Jul 2008 01:47:04 -0700
Subject: cgroup files: convert res_counter_write() to be a cgroups
 write_string() handler

Currently res_counter_write() is a raw file handler even though it's
ultimately taking a number, since in some cases it wants to
pre-process the string when converting it to a number.

This patch converts res_counter_write() from a raw file handler to a
write_string() handler; this allows some of the boilerplate
copying/locking/checking to be removed, and simplies the cleanup path,
since these functions are now performed by the cgroups framework.

[lizf@cn.fujitsu.com: build fix]
Signed-off-by: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 11 ++++++++---
 kernel/res_counter.c        | 48 ++++++++++++++++++++-------------------------
 mm/memcontrol.c             | 24 +++++------------------
 3 files changed, 34 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 125660e7793f..290205dfe094 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -63,9 +63,14 @@ u64 res_counter_read_u64(struct res_counter *counter, int member);
 ssize_t res_counter_read(struct res_counter *counter, int member,
 		const char __user *buf, size_t nbytes, loff_t *pos,
 		int (*read_strategy)(unsigned long long val, char *s));
-ssize_t res_counter_write(struct res_counter *counter, int member,
-		const char __user *buf, size_t nbytes, loff_t *pos,
-		int (*write_strategy)(char *buf, unsigned long long *val));
+
+typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val);
+
+int res_counter_memparse_write_strategy(const char *buf,
+					unsigned long long *res);
+
+int res_counter_write(struct res_counter *counter, int member,
+		      const char *buffer, write_strategy_fn write_strategy);
 
 /*
  * the field descriptors. one for each member of res_counter
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index d3c61b4ebef2..f275c8eca772 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/res_counter.h>
 #include <linux/uaccess.h>
+#include <linux/mm.h>
 
 void res_counter_init(struct res_counter *counter)
 {
@@ -102,44 +103,37 @@ u64 res_counter_read_u64(struct res_counter *counter, int member)
 	return *res_counter_member(counter, member);
 }
 
-ssize_t res_counter_write(struct res_counter *counter, int member,
-		const char __user *userbuf, size_t nbytes, loff_t *pos,
-		int (*write_strategy)(char *st_buf, unsigned long long *val))
+int res_counter_memparse_write_strategy(const char *buf,
+					unsigned long long *res)
 {
-	int ret;
-	char *buf, *end;
-	unsigned long flags;
-	unsigned long long tmp, *val;
-
-	buf = kmalloc(nbytes + 1, GFP_KERNEL);
-	ret = -ENOMEM;
-	if (buf == NULL)
-		goto out;
+	char *end;
+	/* FIXME - make memparse() take const char* args */
+	*res = memparse((char *)buf, &end);
+	if (*end != '\0')
+		return -EINVAL;
 
-	buf[nbytes] = '\0';
-	ret = -EFAULT;
-	if (copy_from_user(buf, userbuf, nbytes))
-		goto out_free;
+	*res = PAGE_ALIGN(*res);
+	return 0;
+}
 
-	ret = -EINVAL;
+int res_counter_write(struct res_counter *counter, int member,
+		      const char *buf, write_strategy_fn write_strategy)
+{
+	char *end;
+	unsigned long flags;
+	unsigned long long tmp, *val;
 
-	strstrip(buf);
 	if (write_strategy) {
-		if (write_strategy(buf, &tmp)) {
-			goto out_free;
-		}
+		if (write_strategy(buf, &tmp))
+			return -EINVAL;
 	} else {
 		tmp = simple_strtoull(buf, &end, 10);
 		if (*end != '\0')
-			goto out_free;
+			return -EINVAL;
 	}
 	spin_lock_irqsave(&counter->lock, flags);
 	val = res_counter_member(counter, member);
 	*val = tmp;
 	spin_unlock_irqrestore(&counter->lock, flags);
-	ret = nbytes;
-out_free:
-	kfree(buf);
-out:
-	return ret;
+	return 0;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e46451e1d9b7..7385d58fb061 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -838,32 +838,18 @@ out:
 	return ret;
 }
 
-static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
-{
-	*tmp = memparse(buf, &buf);
-	if (*buf != '\0')
-		return -EINVAL;
-
-	/*
-	 * Round up the value to the closest page size
-	 */
-	*tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
-	return 0;
-}
-
 static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
 {
 	return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
 				    cft->private);
 }
 
-static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
-				struct file *file, const char __user *userbuf,
-				size_t nbytes, loff_t *ppos)
+static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
+			    const char *buffer)
 {
 	return res_counter_write(&mem_cgroup_from_cont(cont)->res,
-				cft->private, userbuf, nbytes, ppos,
-				mem_cgroup_write_strategy);
+				 cft->private, buffer,
+				 res_counter_memparse_write_strategy);
 }
 
 static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
@@ -940,7 +926,7 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "limit_in_bytes",
 		.private = RES_LIMIT,
-		.write = mem_cgroup_write,
+		.write_string = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read,
 	},
 	{
-- 
cgit v1.2.3


From e885dcde75685e09f23cffae1f6d5169c105b8a0 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Fri, 25 Jul 2008 01:47:06 -0700
Subject: cgroup_clone: use pid of newly created task for new cgroup

cgroup_clone creates a new cgroup with the pid of the task.  This works
correctly for unshare, but for clone cgroup_clone is called from
copy_namespaces inside copy_process, which happens before the new pid is
created.  As a result, the new cgroup was created with current's pid.
This patch:

	1. Moves the call inside copy_process to after the new pid
	   is created
	2. Passes the struct pid into ns_cgroup_clone (as it is not
	   yet attached to the task)
	3. Passes a name from ns_cgroup_clone() into cgroup_clone()
	   so as to keep cgroup_clone() itself simpler
	4. Uses pid_vnr() to get the process id value, so that the
	   pid used to name the new cgroup is always the pid as it
	   would be known to the task which did the cloning or
	   unsharing.  I think that is the most intuitive thing to
	   do.  This way, task t1 does clone(CLONE_NEWPID) to get
	   t2, which does clone(CLONE_NEWPID) to get t3, then the
	   cgroup for t3 will be named for the pid by which t2 knows
	   t3.

(Thanks to Dan Smith for finding the main bug)

Changelog:
	June 11: Incorporate Paul Menage's feedback:  don't pass
	         NULL to ns_cgroup_clone from unshare, and reduce
		 patch size by using 'nodename' in cgroup_clone.
	June 10: Original version

[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Serge Hallyn <serge@us.ibm.com>
Acked-by: Paul Menage <menage@google.com>
Tested-by: Dan Smith <danms@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h  | 3 ++-
 include/linux/nsproxy.h | 7 +++++--
 kernel/cgroup.c         | 7 +++----
 kernel/fork.c           | 6 ++++++
 kernel/ns_cgroup.c      | 8 ++++++--
 kernel/nsproxy.c        | 8 +-------
 6 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index cc59d3a21d87..c98dd7cb7076 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -364,7 +364,8 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
 	return task_subsys_state(task, subsys_id)->cgroup;
 }
 
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss,
+							char *nodename);
 
 /* A cgroup_iter should be treated as an opaque object */
 struct cgroup_iter {
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 0e66b57631fc..c8a768e59640 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -82,9 +82,12 @@ static inline void get_nsproxy(struct nsproxy *ns)
 }
 
 #ifdef CONFIG_CGROUP_NS
-int ns_cgroup_clone(struct task_struct *tsk);
+int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid);
 #else
-static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; }
+static inline int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid)
+{
+	return 0;
+}
 #endif
 
 #endif
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 86b71e714e13..66ec9fd21e0c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2848,16 +2848,17 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
  * cgroup_clone - clone the cgroup the given subsystem is attached to
  * @tsk: the task to be moved
  * @subsys: the given subsystem
+ * @nodename: the name for the new cgroup
  *
  * Duplicate the current cgroup in the hierarchy that the given
  * subsystem is attached to, and move this task into the new
  * child.
  */
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
+							char *nodename)
 {
 	struct dentry *dentry;
 	int ret = 0;
-	char nodename[MAX_CGROUP_TYPE_NAMELEN];
 	struct cgroup *parent, *child;
 	struct inode *inode;
 	struct css_set *cg;
@@ -2882,8 +2883,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
 	cg = tsk->cgroups;
 	parent = task_cgroup(tsk, subsys->subsys_id);
 
-	snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "%d", tsk->pid);
-
 	/* Pin the hierarchy */
 	atomic_inc(&parent->root->sb->s_active);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 5a5d6fef341d..228f80c9155a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1107,6 +1107,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (clone_flags & CLONE_THREAD)
 		p->tgid = current->tgid;
 
+	if (current->nsproxy != p->nsproxy) {
+		retval = ns_cgroup_clone(p, pid);
+		if (retval)
+			goto bad_fork_free_pid;
+	}
+
 	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
 	/*
 	 * Clear TID on mm_release()?
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 48d7ed6fc3a4..43c2111cd54d 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/cgroup.h>
 #include <linux/fs.h>
+#include <linux/proc_fs.h>
 #include <linux/slab.h>
 #include <linux/nsproxy.h>
 
@@ -24,9 +25,12 @@ static inline struct ns_cgroup *cgroup_to_ns(
 			    struct ns_cgroup, css);
 }
 
-int ns_cgroup_clone(struct task_struct *task)
+int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
 {
-	return cgroup_clone(task, &ns_subsys);
+	char name[PROC_NUMBUF];
+
+	snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
+	return cgroup_clone(task, &ns_subsys, name);
 }
 
 /*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index adc785146a1c..21575fc46d05 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -157,12 +157,6 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 		goto out;
 	}
 
-	err = ns_cgroup_clone(tsk);
-	if (err) {
-		put_nsproxy(new_ns);
-		goto out;
-	}
-
 	tsk->nsproxy = new_ns;
 
 out:
@@ -209,7 +203,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 		goto out;
 	}
 
-	err = ns_cgroup_clone(current);
+	err = ns_cgroup_clone(current, task_pid(current));
 	if (err)
 		put_nsproxy(*new_nsp);
 
-- 
cgit v1.2.3


From e8589cc189f96b87348ae83ea4db38eaac624135 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:10 -0700
Subject: memcg: better migration handling

This patch changes page migration under memory controller to use a
different algorithm.  (thanks to Christoph for new idea.)

Before:
 - page_cgroup is migrated from an old page to a new page.
After:
 - a new page is accounted , no reuse of page_cgroup.

Pros:

 - We can avoid compliated lock depndencies and races in migration.

Cons:

 - new param to mem_cgroup_charge_common().

 - mem_cgroup_getref() is added for handling ref_cnt ping-pong.

This version simplifies complicated lock dependency in page migraiton
under memory resource controller.

  new refcnt sequence is following.

a mapped page:
  prepage_migration() ..... +1 to NEW page
  try_to_unmap()      ..... all refs to OLD page is gone.
  move_pages()        ..... +1 to NEW page if page cache.
  remap...            ..... all refs from *map* is added to NEW one.
  end_migration()     ..... -1 to New page.

  page's mapcount + (page_is_cache) refs are added to NEW one.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  11 ++--
 mm/memcontrol.c            | 128 +++++++++++++++++++++++----------------------
 mm/migrate.c               |  22 +++++---
 3 files changed, 86 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e6608776bc96..84ead2aa6f18 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -50,9 +50,10 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 #define mm_match_cgroup(mm, cgroup)	\
 	((cgroup) == mem_cgroup_from_task((mm)->owner))
 
-extern int mem_cgroup_prepare_migration(struct page *page);
+extern int
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
 extern void mem_cgroup_end_migration(struct page *page);
-extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
+extern int mem_cgroup_getref(struct page *page);
 
 /*
  * For memory reclaim.
@@ -112,7 +113,8 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
 	return 1;
 }
 
-static inline int mem_cgroup_prepare_migration(struct page *page)
+static inline int
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 {
 	return 0;
 }
@@ -121,8 +123,7 @@ static inline void mem_cgroup_end_migration(struct page *page)
 {
 }
 
-static inline void
-mem_cgroup_page_migration(struct page *page, struct page *newpage)
+static inline void mem_cgroup_getref(struct page *page)
 {
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 90ccc1326356..da5912b84551 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -524,7 +524,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
  * < 0 if the cgroup is over its limit
  */
 static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
-				gfp_t gfp_mask, enum charge_type ctype)
+				gfp_t gfp_mask, enum charge_type ctype,
+				struct mem_cgroup *memcg)
 {
 	struct mem_cgroup *mem;
 	struct page_cgroup *pc;
@@ -569,16 +570,21 @@ retry:
 	 * thread group leader migrates. It's possible that mm is not
 	 * set, if so charge the init_mm (happens for pagecache usage).
 	 */
-	if (!mm)
-		mm = &init_mm;
+	if (!memcg) {
+		if (!mm)
+			mm = &init_mm;
 
-	rcu_read_lock();
-	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
-	/*
-	 * For every charge from the cgroup, increment reference count
-	 */
-	css_get(&mem->css);
-	rcu_read_unlock();
+		rcu_read_lock();
+		mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+		/*
+		 * For every charge from the cgroup, increment reference count
+		 */
+		css_get(&mem->css);
+		rcu_read_unlock();
+	} else {
+		mem = memcg;
+		css_get(&memcg->css);
+	}
 
 	while (res_counter_charge(&mem->res, PAGE_SIZE)) {
 		if (!(gfp_mask & __GFP_WAIT))
@@ -648,7 +654,7 @@ err:
 int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 {
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
-				MEM_CGROUP_CHARGE_TYPE_MAPPED);
+				MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
 }
 
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
@@ -657,7 +663,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 	if (!mm)
 		mm = &init_mm;
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
-				MEM_CGROUP_CHARGE_TYPE_CACHE);
+				MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
+}
+
+int mem_cgroup_getref(struct page *page)
+{
+	struct page_cgroup *pc;
+
+	if (mem_cgroup_subsys.disabled)
+		return 0;
+
+	lock_page_cgroup(page);
+	pc = page_get_page_cgroup(page);
+	VM_BUG_ON(!pc);
+	pc->ref_cnt++;
+	unlock_page_cgroup(page);
+	return 0;
 }
 
 /*
@@ -707,65 +728,39 @@ unlock:
 }
 
 /*
- * Returns non-zero if a page (under migration) has valid page_cgroup member.
- * Refcnt of page_cgroup is incremented.
+ * Before starting migration, account against new page.
  */
-int mem_cgroup_prepare_migration(struct page *page)
+int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 {
 	struct page_cgroup *pc;
+	struct mem_cgroup *mem = NULL;
+	enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
+	int ret = 0;
 
 	if (mem_cgroup_subsys.disabled)
 		return 0;
 
 	lock_page_cgroup(page);
 	pc = page_get_page_cgroup(page);
-	if (pc)
-		pc->ref_cnt++;
+	if (pc) {
+		mem = pc->mem_cgroup;
+		css_get(&mem->css);
+		if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
+			ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+	}
 	unlock_page_cgroup(page);
-	return pc != NULL;
-}
-
-void mem_cgroup_end_migration(struct page *page)
-{
-	mem_cgroup_uncharge_page(page);
+	if (mem) {
+		ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
+			ctype, mem);
+		css_put(&mem->css);
+	}
+	return ret;
 }
 
-/*
- * We know both *page* and *newpage* are now not-on-LRU and PG_locked.
- * And no race with uncharge() routines because page_cgroup for *page*
- * has extra one reference by mem_cgroup_prepare_migration.
- */
-void mem_cgroup_page_migration(struct page *page, struct page *newpage)
+/* remove redundant charge */
+void mem_cgroup_end_migration(struct page *newpage)
 {
-	struct page_cgroup *pc;
-	struct mem_cgroup_per_zone *mz;
-	unsigned long flags;
-
-	lock_page_cgroup(page);
-	pc = page_get_page_cgroup(page);
-	if (!pc) {
-		unlock_page_cgroup(page);
-		return;
-	}
-
-	mz = page_cgroup_zoneinfo(pc);
-	spin_lock_irqsave(&mz->lru_lock, flags);
-	__mem_cgroup_remove_list(mz, pc);
-	spin_unlock_irqrestore(&mz->lru_lock, flags);
-
-	page_assign_page_cgroup(page, NULL);
-	unlock_page_cgroup(page);
-
-	pc->page = newpage;
-	lock_page_cgroup(newpage);
-	page_assign_page_cgroup(newpage, pc);
-
-	mz = page_cgroup_zoneinfo(pc);
-	spin_lock_irqsave(&mz->lru_lock, flags);
-	__mem_cgroup_add_list(mz, pc);
-	spin_unlock_irqrestore(&mz->lru_lock, flags);
-
-	unlock_page_cgroup(newpage);
+	mem_cgroup_uncharge_page(newpage);
 }
 
 /*
@@ -795,12 +790,19 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 		page = pc->page;
 		get_page(page);
 		spin_unlock_irqrestore(&mz->lru_lock, flags);
-		mem_cgroup_uncharge_page(page);
-		put_page(page);
-		if (--count <= 0) {
-			count = FORCE_UNCHARGE_BATCH;
+		/*
+		 * Check if this page is on LRU. !LRU page can be found
+		 * if it's under page migration.
+		 */
+		if (PageLRU(page)) {
+			mem_cgroup_uncharge_page(page);
+			put_page(page);
+			if (--count <= 0) {
+				count = FORCE_UNCHARGE_BATCH;
+				cond_resched();
+			}
+		} else
 			cond_resched();
-		}
 		spin_lock_irqsave(&mz->lru_lock, flags);
 	}
 	spin_unlock_irqrestore(&mz->lru_lock, flags);
diff --git a/mm/migrate.c b/mm/migrate.c
index 376cceba82f9..f6d7f8efd1a8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -358,6 +358,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	__inc_zone_page_state(newpage, NR_FILE_PAGES);
 
 	write_unlock_irq(&mapping->tree_lock);
+	if (!PageSwapCache(newpage)) {
+		mem_cgroup_uncharge_page(page);
+		mem_cgroup_getref(newpage);
+	}
 
 	return 0;
 }
@@ -611,7 +615,6 @@ static int move_to_new_page(struct page *newpage, struct page *page)
 		rc = fallback_migrate_page(mapping, newpage, page);
 
 	if (!rc) {
-		mem_cgroup_page_migration(page, newpage);
 		remove_migration_ptes(page, newpage);
 	} else
 		newpage->mapping = NULL;
@@ -641,6 +644,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 		/* page was freed from under us. So we are done. */
 		goto move_newpage;
 
+	charge = mem_cgroup_prepare_migration(page, newpage);
+	if (charge == -ENOMEM) {
+		rc = -ENOMEM;
+		goto move_newpage;
+	}
+	/* prepare cgroup just returns 0 or -ENOMEM */
+	BUG_ON(charge);
+
 	rc = -EAGAIN;
 	if (TestSetPageLocked(page)) {
 		if (!force)
@@ -692,19 +703,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 		goto rcu_unlock;
 	}
 
-	charge = mem_cgroup_prepare_migration(page);
 	/* Establish migration ptes or remove ptes */
 	try_to_unmap(page, 1);
 
 	if (!page_mapped(page))
 		rc = move_to_new_page(newpage, page);
 
-	if (rc) {
+	if (rc)
 		remove_migration_ptes(page, page);
-		if (charge)
-			mem_cgroup_end_migration(page);
-	} else if (charge)
- 		mem_cgroup_end_migration(newpage);
 rcu_unlock:
 	if (rcu_locked)
 		rcu_read_unlock();
@@ -725,6 +731,8 @@ unlock:
 	}
 
 move_newpage:
+	if (!charge)
+		mem_cgroup_end_migration(newpage);
 	/*
 	 * Move the new page to the LRU. If migration was not successful
 	 * then this will free the page.
-- 
cgit v1.2.3


From 69029cd550284e32de13d6dd2f77b723c8a0e444 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:14 -0700
Subject: memcg: remove refcnt from page_cgroup

memcg: performance improvements

Patch Description
 1/5 ... remove refcnt fron page_cgroup patch (shmem handling is fixed)
 2/5 ... swapcache handling patch
 3/5 ... add helper function for shmem's memory reclaim patch
 4/5 ... optimize by likely/unlikely ppatch
 5/5 ... remove redundunt check patch (shmem handling is fixed.)

Unix bench result.

== 2.6.26-rc2-mm1 + memory resource controller
Execl Throughput                           2915.4 lps   (29.6 secs, 3 samples)
C Compiler Throughput                      1019.3 lpm   (60.0 secs, 3 samples)
Shell Scripts (1 concurrent)               5796.0 lpm   (60.0 secs, 3 samples)
Shell Scripts (8 concurrent)               1097.7 lpm   (60.0 secs, 3 samples)
Shell Scripts (16 concurrent)               565.3 lpm   (60.0 secs, 3 samples)
File Read 1024 bufsize 2000 maxblocks    1022128.0 KBps  (30.0 secs, 3 samples)
File Write 1024 bufsize 2000 maxblocks   544057.0 KBps  (30.0 secs, 3 samples)
File Copy 1024 bufsize 2000 maxblocks    346481.0 KBps  (30.0 secs, 3 samples)
File Read 256 bufsize 500 maxblocks      319325.0 KBps  (30.0 secs, 3 samples)
File Write 256 bufsize 500 maxblocks     148788.0 KBps  (30.0 secs, 3 samples)
File Copy 256 bufsize 500 maxblocks       99051.0 KBps  (30.0 secs, 3 samples)
File Read 4096 bufsize 8000 maxblocks    2058917.0 KBps  (30.0 secs, 3 samples)
File Write 4096 bufsize 8000 maxblocks   1606109.0 KBps  (30.0 secs, 3 samples)
File Copy 4096 bufsize 8000 maxblocks    854789.0 KBps  (30.0 secs, 3 samples)
Dc: sqrt(2) to 99 decimal places         126145.2 lpm   (30.0 secs, 3 samples)

                     INDEX VALUES
TEST                                        BASELINE     RESULT      INDEX

Execl Throughput                                43.0     2915.4      678.0
File Copy 1024 bufsize 2000 maxblocks         3960.0   346481.0      875.0
File Copy 256 bufsize 500 maxblocks           1655.0    99051.0      598.5
File Copy 4096 bufsize 8000 maxblocks         5800.0   854789.0     1473.8
Shell Scripts (8 concurrent)                     6.0     1097.7     1829.5
                                                                 =========
     FINAL SCORE                                                     991.3

== 2.6.26-rc2-mm1 + this set ==
Execl Throughput                           3012.9 lps   (29.9 secs, 3 samples)
C Compiler Throughput                       981.0 lpm   (60.0 secs, 3 samples)
Shell Scripts (1 concurrent)               5872.0 lpm   (60.0 secs, 3 samples)
Shell Scripts (8 concurrent)               1120.3 lpm   (60.0 secs, 3 samples)
Shell Scripts (16 concurrent)               578.0 lpm   (60.0 secs, 3 samples)
File Read 1024 bufsize 2000 maxblocks    1003993.0 KBps  (30.0 secs, 3 samples)
File Write 1024 bufsize 2000 maxblocks   550452.0 KBps  (30.0 secs, 3 samples)
File Copy 1024 bufsize 2000 maxblocks    347159.0 KBps  (30.0 secs, 3 samples)
File Read 256 bufsize 500 maxblocks      314644.0 KBps  (30.0 secs, 3 samples)
File Write 256 bufsize 500 maxblocks     151852.0 KBps  (30.0 secs, 3 samples)
File Copy 256 bufsize 500 maxblocks      101000.0 KBps  (30.0 secs, 3 samples)
File Read 4096 bufsize 8000 maxblocks    2033256.0 KBps  (30.0 secs, 3 samples)
File Write 4096 bufsize 8000 maxblocks   1611814.0 KBps  (30.0 secs, 3 samples)
File Copy 4096 bufsize 8000 maxblocks    847979.0 KBps  (30.0 secs, 3 samples)
Dc: sqrt(2) to 99 decimal places         128148.7 lpm   (30.0 secs, 3 samples)

                     INDEX VALUES
TEST                                        BASELINE     RESULT      INDEX

Execl Throughput                                43.0     3012.9      700.7
File Copy 1024 bufsize 2000 maxblocks         3960.0   347159.0      876.7
File Copy 256 bufsize 500 maxblocks           1655.0   101000.0      610.3
File Copy 4096 bufsize 8000 maxblocks         5800.0   847979.0     1462.0
Shell Scripts (8 concurrent)                     6.0     1120.3     1867.2
                                                                 =========
     FINAL SCORE                                                    1004.6

This patch:

Remove refcnt from page_cgroup().

After this,

 * A page is charged only when !page_mapped() && no page_cgroup is assigned.
	* Anon page is newly mapped.
	* File page is added to mapping->tree.

 * A page is uncharged only when
	* Anon page is fully unmapped.
	* File page is removed from LRU.

There is no change in behavior from user's view.

This patch also removes unnecessary calls in rmap.c which was used only for
refcnt mangement.

[akpm@linux-foundation.org: fix warning]
[hugh@veritas.com: fix shmem_unuse_inode charging]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  10 ++---
 mm/filemap.c               |   6 +--
 mm/memcontrol.c            | 109 ++++++++++++++++++++++++++-------------------
 mm/migrate.c               |   3 +-
 mm/rmap.c                  |  14 +-----
 mm/shmem.c                 |  35 ++++++++++-----
 6 files changed, 97 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 84ead2aa6f18..b4980b8f048e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -35,6 +35,7 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
 extern void mem_cgroup_uncharge_page(struct page *page);
+extern void mem_cgroup_uncharge_cache_page(struct page *page);
 extern void mem_cgroup_move_lists(struct page *page, bool active);
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
@@ -53,7 +54,6 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 extern int
 mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
 extern void mem_cgroup_end_migration(struct page *page);
-extern int mem_cgroup_getref(struct page *page);
 
 /*
  * For memory reclaim.
@@ -98,6 +98,10 @@ static inline void mem_cgroup_uncharge_page(struct page *page)
 {
 }
 
+static inline void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+}
+
 static inline void mem_cgroup_move_lists(struct page *page, bool active)
 {
 }
@@ -123,10 +127,6 @@ static inline void mem_cgroup_end_migration(struct page *page)
 {
 }
 
-static inline void mem_cgroup_getref(struct page *page)
-{
-}
-
 static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
 {
 	return 0;
diff --git a/mm/filemap.c b/mm/filemap.c
index 5d4c880d7cd9..2d3ec1ffc66e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -115,7 +115,7 @@ void __remove_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
-	mem_cgroup_uncharge_page(page);
+	mem_cgroup_uncharge_cache_page(page);
 	radix_tree_delete(&mapping->page_tree, page->index);
 	page->mapping = NULL;
 	mapping->nrpages--;
@@ -474,12 +474,12 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
 		} else
-			mem_cgroup_uncharge_page(page);
+			mem_cgroup_uncharge_cache_page(page);
 
 		write_unlock_irq(&mapping->tree_lock);
 		radix_tree_preload_end();
 	} else
-		mem_cgroup_uncharge_page(page);
+		mem_cgroup_uncharge_cache_page(page);
 out:
 	return error;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index da5912b84551..a61706193c31 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -166,7 +166,6 @@ struct page_cgroup {
 	struct list_head lru;		/* per cgroup LRU list */
 	struct page *page;
 	struct mem_cgroup *mem_cgroup;
-	int ref_cnt;			/* cached, mapped, migrating */
 	int flags;
 };
 #define PAGE_CGROUP_FLAG_CACHE	(0x1)	/* charged as cache */
@@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
 enum charge_type {
 	MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
 	MEM_CGROUP_CHARGE_TYPE_MAPPED,
+	MEM_CGROUP_CHARGE_TYPE_FORCE,	/* used by force_empty */
 };
 
 /*
@@ -552,9 +552,7 @@ retry:
 	 */
 	if (pc) {
 		VM_BUG_ON(pc->page != page);
-		VM_BUG_ON(pc->ref_cnt <= 0);
-
-		pc->ref_cnt++;
+		VM_BUG_ON(!pc->mem_cgroup);
 		unlock_page_cgroup(page);
 		goto done;
 	}
@@ -570,10 +568,7 @@ retry:
 	 * thread group leader migrates. It's possible that mm is not
 	 * set, if so charge the init_mm (happens for pagecache usage).
 	 */
-	if (!memcg) {
-		if (!mm)
-			mm = &init_mm;
-
+	if (likely(!memcg)) {
 		rcu_read_lock();
 		mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
 		/*
@@ -609,7 +604,6 @@ retry:
 		}
 	}
 
-	pc->ref_cnt = 1;
 	pc->mem_cgroup = mem;
 	pc->page = page;
 	/*
@@ -653,6 +647,17 @@ err:
 
 int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 {
+	/*
+	 * If already mapped, we don't have to account.
+	 * If page cache, page->mapping has address_space.
+	 * But page->mapping may have out-of-use anon_vma pointer,
+	 * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
+	 * is NULL.
+  	 */
+	if (page_mapped(page) || (page->mapping && !PageAnon(page)))
+		return 0;
+	if (unlikely(!mm))
+		mm = &init_mm;
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
 				MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
 }
@@ -660,32 +665,17 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
 int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask)
 {
-	if (!mm)
+	if (unlikely(!mm))
 		mm = &init_mm;
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
 				MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
 }
 
-int mem_cgroup_getref(struct page *page)
-{
-	struct page_cgroup *pc;
-
-	if (mem_cgroup_subsys.disabled)
-		return 0;
-
-	lock_page_cgroup(page);
-	pc = page_get_page_cgroup(page);
-	VM_BUG_ON(!pc);
-	pc->ref_cnt++;
-	unlock_page_cgroup(page);
-	return 0;
-}
-
 /*
- * Uncharging is always a welcome operation, we never complain, simply
- * uncharge.
+ * uncharge if !page_mapped(page)
  */
-void mem_cgroup_uncharge_page(struct page *page)
+static void
+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 {
 	struct page_cgroup *pc;
 	struct mem_cgroup *mem;
@@ -704,29 +694,41 @@ void mem_cgroup_uncharge_page(struct page *page)
 		goto unlock;
 
 	VM_BUG_ON(pc->page != page);
-	VM_BUG_ON(pc->ref_cnt <= 0);
 
-	if (--(pc->ref_cnt) == 0) {
-		mz = page_cgroup_zoneinfo(pc);
-		spin_lock_irqsave(&mz->lru_lock, flags);
-		__mem_cgroup_remove_list(mz, pc);
-		spin_unlock_irqrestore(&mz->lru_lock, flags);
+	if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
+	    && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
+		|| page_mapped(page)))
+		goto unlock;
 
-		page_assign_page_cgroup(page, NULL);
-		unlock_page_cgroup(page);
+	mz = page_cgroup_zoneinfo(pc);
+	spin_lock_irqsave(&mz->lru_lock, flags);
+	__mem_cgroup_remove_list(mz, pc);
+	spin_unlock_irqrestore(&mz->lru_lock, flags);
 
-		mem = pc->mem_cgroup;
-		res_counter_uncharge(&mem->res, PAGE_SIZE);
-		css_put(&mem->css);
+	page_assign_page_cgroup(page, NULL);
+	unlock_page_cgroup(page);
 
-		kmem_cache_free(page_cgroup_cache, pc);
-		return;
-	}
+	mem = pc->mem_cgroup;
+	res_counter_uncharge(&mem->res, PAGE_SIZE);
+	css_put(&mem->css);
 
+	kmem_cache_free(page_cgroup_cache, pc);
+	return;
 unlock:
 	unlock_page_cgroup(page);
 }
 
+void mem_cgroup_uncharge_page(struct page *page)
+{
+	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
+}
+
+void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+	VM_BUG_ON(page_mapped(page));
+	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
+}
+
 /*
  * Before starting migration, account against new page.
  */
@@ -757,15 +759,29 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
 	return ret;
 }
 
-/* remove redundant charge */
+/* remove redundant charge if migration failed*/
 void mem_cgroup_end_migration(struct page *newpage)
 {
-	mem_cgroup_uncharge_page(newpage);
+	/*
+	 * At success, page->mapping is not NULL.
+	 * special rollback care is necessary when
+	 * 1. at migration failure. (newpage->mapping is cleared in this case)
+	 * 2. the newpage was moved but not remapped again because the task
+	 *    exits and the newpage is obsolete. In this case, the new page
+	 *    may be a swapcache. So, we just call mem_cgroup_uncharge_page()
+	 *    always for avoiding mess. The  page_cgroup will be removed if
+	 *    unnecessary. File cache pages is still on radix-tree. Don't
+	 *    care it.
+	 */
+	if (!newpage->mapping)
+		__mem_cgroup_uncharge_common(newpage,
+					 MEM_CGROUP_CHARGE_TYPE_FORCE);
+	else if (PageAnon(newpage))
+		mem_cgroup_uncharge_page(newpage);
 }
 
 /*
  * This routine traverse page_cgroup in given list and drop them all.
- * This routine ignores page_cgroup->ref_cnt.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
  */
 #define FORCE_UNCHARGE_BATCH	(128)
@@ -795,7 +811,8 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
 		 * if it's under page migration.
 		 */
 		if (PageLRU(page)) {
-			mem_cgroup_uncharge_page(page);
+			__mem_cgroup_uncharge_common(page,
+					MEM_CGROUP_CHARGE_TYPE_FORCE);
 			put_page(page);
 			if (--count <= 0) {
 				count = FORCE_UNCHARGE_BATCH;
diff --git a/mm/migrate.c b/mm/migrate.c
index f6d7f8efd1a8..d8c65a65c61d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -359,8 +359,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 
 	write_unlock_irq(&mapping->tree_lock);
 	if (!PageSwapCache(newpage)) {
-		mem_cgroup_uncharge_page(page);
-		mem_cgroup_getref(newpage);
+		mem_cgroup_uncharge_cache_page(page);
 	}
 
 	return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index bf0a5b7cfb8e..abbd29f7c43f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -576,14 +576,8 @@ void page_add_anon_rmap(struct page *page,
 	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 	if (atomic_inc_and_test(&page->_mapcount))
 		__page_set_anon_rmap(page, vma, address);
-	else {
+	else
 		__page_check_anon_rmap(page, vma, address);
-		/*
-		 * We unconditionally charged during prepare, we uncharge here
-		 * This takes care of balancing the reference counts
-		 */
-		mem_cgroup_uncharge_page(page);
-	}
 }
 
 /**
@@ -614,12 +608,6 @@ void page_add_file_rmap(struct page *page)
 {
 	if (atomic_inc_and_test(&page->_mapcount))
 		__inc_zone_page_state(page, NR_FILE_MAPPED);
-	else
-		/*
-		 * We unconditionally charged during prepare, we uncharge here
-		 * This takes care of balancing the reference counts
-		 */
-		mem_cgroup_uncharge_page(page);
 }
 
 #ifdef CONFIG_DEBUG_VM
diff --git a/mm/shmem.c b/mm/shmem.c
index 9ffbea9b79e1..d58305e8a484 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -922,20 +922,26 @@ found:
 	error = 1;
 	if (!inode)
 		goto out;
-	/* Precharge page while we can wait, compensate afterwards */
+	/* Precharge page using GFP_KERNEL while we can wait */
 	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
 	if (error)
 		goto out;
 	error = radix_tree_preload(GFP_KERNEL);
-	if (error)
-		goto uncharge;
+	if (error) {
+		mem_cgroup_uncharge_cache_page(page);
+		goto out;
+	}
 	error = 1;
 
 	spin_lock(&info->lock);
 	ptr = shmem_swp_entry(info, idx, NULL);
-	if (ptr && ptr->val == entry.val)
+	if (ptr && ptr->val == entry.val) {
 		error = add_to_page_cache(page, inode->i_mapping,
 						idx, GFP_NOWAIT);
+		/* does mem_cgroup_uncharge_cache_page on error */
+	} else	/* we must compensate for our precharge above */
+		mem_cgroup_uncharge_cache_page(page);
+
 	if (error == -EEXIST) {
 		struct page *filepage = find_get_page(inode->i_mapping, idx);
 		error = 1;
@@ -961,8 +967,6 @@ found:
 		shmem_swp_unmap(ptr);
 	spin_unlock(&info->lock);
 	radix_tree_preload_end();
-uncharge:
-	mem_cgroup_uncharge_page(page);
 out:
 	unlock_page(page);
 	page_cache_release(page);
@@ -1319,7 +1323,7 @@ repeat:
 					page_cache_release(swappage);
 					goto failed;
 				}
-				mem_cgroup_uncharge_page(swappage);
+				mem_cgroup_uncharge_cache_page(swappage);
 			}
 			page_cache_release(swappage);
 			goto repeat;
@@ -1358,6 +1362,8 @@ repeat:
 		}
 
 		if (!filepage) {
+			int ret;
+
 			spin_unlock(&info->lock);
 			filepage = shmem_alloc_page(gfp, info, idx);
 			if (!filepage) {
@@ -1386,10 +1392,18 @@ repeat:
 				swap = *entry;
 				shmem_swp_unmap(entry);
 			}
-			if (error || swap.val || 0 != add_to_page_cache_lru(
-					filepage, mapping, idx, GFP_NOWAIT)) {
+			ret = error || swap.val;
+			if (ret)
+				mem_cgroup_uncharge_cache_page(filepage);
+			else
+				ret = add_to_page_cache_lru(filepage, mapping,
+						idx, GFP_NOWAIT);
+			/*
+			 * At add_to_page_cache_lru() failure, uncharge will
+			 * be done automatically.
+			 */
+			if (ret) {
 				spin_unlock(&info->lock);
-				mem_cgroup_uncharge_page(filepage);
 				page_cache_release(filepage);
 				shmem_unacct_blocks(info->flags, 1);
 				shmem_free_blocks(inode, 1);
@@ -1398,7 +1412,6 @@ repeat:
 					goto failed;
 				goto repeat;
 			}
-			mem_cgroup_uncharge_page(filepage);
 			info->flags |= SHMEM_PAGEIN;
 		}
 
-- 
cgit v1.2.3


From c9b0ed51483cc2fc42bb801b6675c4231b0e4634 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:15 -0700
Subject: memcg: helper function for relcaim from shmem.

A new call, mem_cgroup_shrink_usage() is added for shmem handling and
relacing non-standard usage of mem_cgroup_charge/uncharge.

Now, shmem calls mem_cgroup_charge() just for reclaim some pages from
mem_cgroup.  In general, shmem is used by some process group and not for
global resource (like file caches).  So, it's reasonable to reclaim pages
from mem_cgroup where shmem is mainly used.

[hugh@veritas.com: shmem_getpage release page sooner]
[hugh@veritas.com: mem_cgroup_shrink_usage css_put]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  7 +++++++
 mm/memcontrol.c            | 26 ++++++++++++++++++++++++++
 mm/shmem.c                 | 11 ++++-------
 3 files changed, 37 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b4980b8f048e..fdf3967e1397 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -37,6 +37,8 @@ extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
 extern void mem_cgroup_move_lists(struct page *page, bool active);
+extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
+
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
 					unsigned long *scanned, int order,
@@ -102,6 +104,11 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
 {
 }
 
+static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+{
+	return 0;
+}
+
 static inline void mem_cgroup_move_lists(struct page *page, bool active)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a61706193c31..f46b8615de6c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -780,6 +780,32 @@ void mem_cgroup_end_migration(struct page *newpage)
 		mem_cgroup_uncharge_page(newpage);
 }
 
+/*
+ * A call to try to shrink memory usage under specified resource controller.
+ * This is typically used for page reclaiming for shmem for reducing side
+ * effect of page allocation from shmem, which is used by some mem_cgroup.
+ */
+int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+{
+	struct mem_cgroup *mem;
+	int progress = 0;
+	int retry = MEM_CGROUP_RECLAIM_RETRIES;
+
+	rcu_read_lock();
+	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+	css_get(&mem->css);
+	rcu_read_unlock();
+
+	do {
+		progress = try_to_free_mem_cgroup_pages(mem, gfp_mask);
+	} while (!progress && --retry);
+
+	css_put(&mem->css);
+	if (!retry)
+		return -ENOMEM;
+	return 0;
+}
+
 /*
  * This routine traverse page_cgroup in given list and drop them all.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
diff --git a/mm/shmem.c b/mm/shmem.c
index d58305e8a484..f92fea94d037 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1315,17 +1315,14 @@ repeat:
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
 			unlock_page(swappage);
+			page_cache_release(swappage);
 			if (error == -ENOMEM) {
 				/* allow reclaim from this memory cgroup */
-				error = mem_cgroup_cache_charge(swappage,
-					current->mm, gfp & ~__GFP_HIGHMEM);
-				if (error) {
-					page_cache_release(swappage);
+				error = mem_cgroup_shrink_usage(current->mm,
+								gfp);
+				if (error)
 					goto failed;
-				}
-				mem_cgroup_uncharge_cache_page(swappage);
 			}
-			page_cache_release(swappage);
 			goto repeat;
 		}
 	} else if (sgp == SGP_READ && !filepage) {
-- 
cgit v1.2.3


From 12b9804419cfb1c1bdac413f6c373af3b88d154b Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 25 Jul 2008 01:47:19 -0700
Subject: res_counter: limit change support ebusy

Add an interface to set limit.  This is necessary to memory resource
controller because it shrinks usage at set limit.

Other controllers may not need this interface to shrink usage because
shrinking is not necessary or impossible.

Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 290205dfe094..fdeadd9740dc 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -158,4 +158,20 @@ static inline void res_counter_reset_failcnt(struct res_counter *cnt)
 	cnt->failcnt = 0;
 	spin_unlock_irqrestore(&cnt->lock, flags);
 }
+
+static inline int res_counter_set_limit(struct res_counter *cnt,
+		unsigned long long limit)
+{
+	unsigned long flags;
+	int ret = -EBUSY;
+
+	spin_lock_irqsave(&cnt->lock, flags);
+	if (cnt->usage < limit) {
+		cnt->limit = limit;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&cnt->lock, flags);
+	return ret;
+}
+
 #endif
-- 
cgit v1.2.3


From 364d3c13c17f45da6d638011078d4c4d3070d719 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:36 -0700
Subject: ptrace: give more respect to SIGKILL

ptrace_stop() has some complicated checks to prevent the scheduling in the
TASK_TRACED state with the pending SIGKILL, but these checks are racy, and
they depend on arch_ptrace_stop_needed().

This patch assumes that the traced task should die asap if it was killed by
SIGKILL, in that case schedule()->signal_pending_state() has no reason to
ignore the TASK_WAKEKILL part of TASK_TRACED, and we can kill this nasty
special case.

Note: do_exit()->ptrace_notify() is special, the killed task can already
dequeue SIGKILL at this point. Another indication that fatal_signal_pending()
is not exactly right.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6aca4a16e377..79e749dbf81e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2054,9 +2054,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
 	if (!signal_pending(p))
 		return 0;
 
-	if (state & (__TASK_STOPPED | __TASK_TRACED))
-		return 0;
-
 	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
 }
 
-- 
cgit v1.2.3


From 7b34e4283c685f5cc6ba6d30e939906eee0d4bcf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:37 -0700
Subject: introduce PF_KTHREAD flag

Introduce the new PF_KTHREAD flag to mark the kernel threads.  It is set
by INIT_TASK() and copied to the forked childs (we could set it in
kthreadd() along with PF_NOFREEZE instead).

daemonize() was changed as well.  In that case testing of PF_KTHREAD is
racy, but daemonize() is hopeless anyway.

This flag is cleared in do_execve(), before search_binary_handler().
Probably not the best place, we can do this in exec_mmap() or in
start_thread(), or clear it along with PF_FORKNOEXEC.  But I think this
doesn't matter in practice, and if do_execve() fails kthread should die
soon.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                 | 1 +
 include/linux/init_task.h | 2 +-
 include/linux/sched.h     | 1 +
 kernel/exit.c             | 2 +-
 4 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index af249af4ccab..cd2e8c9b1249 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1326,6 +1326,7 @@ int do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
+	current->flags &= ~PF_KTHREAD;
 	retval = search_binary_handler(bprm,regs);
 	if (retval >= 0) {
 		/* execve success */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 93c45acf249a..021d8e720c79 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -122,7 +122,7 @@ extern struct group_info init_groups;
 	.state		= 0,						\
 	.stack		= &init_thread_info,				\
 	.usage		= ATOMIC_INIT(2),				\
-	.flags		= 0,						\
+	.flags		= PF_KTHREAD,					\
 	.lock_depth	= -1,						\
 	.prio		= MAX_PRIO-20,					\
 	.static_prio	= MAX_PRIO-20,					\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 79e749dbf81e..eec64a4adb9d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1483,6 +1483,7 @@ static inline void put_task_struct(struct task_struct *t)
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
+#define PF_KTHREAD	0x00000020	/* I am a kernel thread */
 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
 #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
 #define PF_DUMPCORE	0x00000200	/* dumped core */
diff --git a/kernel/exit.c b/kernel/exit.c
index a7799d8a6404..28a44a2612dc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -430,7 +430,7 @@ void daemonize(const char *name, ...)
 	 * We don't want to have TIF_FREEZE set if the system-wide hibernation
 	 * or suspend transition begins right now.
 	 */
-	current->flags |= PF_NOFREEZE;
+	current->flags |= (PF_NOFREEZE | PF_KTHREAD);
 
 	if (current->nsproxy != &init_nsproxy) {
 		get_nsproxy(&init_nsproxy);
-- 
cgit v1.2.3


From 246bb0b1deb29726990620d8b5e55ca29f331362 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:38 -0700
Subject: kill PF_BORROWED_MM in favour of PF_KTHREAD

Kill PF_BORROWED_MM.  Change use_mm/unuse_mm to not play with ->flags, and
do s/PF_BORROWED_MM/PF_KTHREAD/ for a couple of other users.

No functional changes yet.  But this allows us to do further
fixes/cleanups.

oom_kill/ptrace/etc often check "p->mm != NULL" to filter out the
kthreads, this is wrong because of use_mm().  The problem with
PF_BORROWED_MM is that we need task_lock() to avoid races.  With this
patch we can check PF_KTHREAD directly, or use a simple lockless helper:

	/* The result must not be dereferenced !!! */
	struct mm_struct *__get_task_mm(struct task_struct *tsk)
	{
		if (tsk->flags & PF_KTHREAD)
			return NULL;
		return tsk->mm;
	}

Note also ecard_task().  It runs with ->mm != NULL, but it's the kernel
thread without PF_BORROWED_MM.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c              | 2 --
 include/linux/sched.h | 3 +--
 kernel/fork.c         | 4 ++--
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index 0fb3117ddd93..0051fd94b44e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -586,7 +586,6 @@ static void use_mm(struct mm_struct *mm)
 	struct task_struct *tsk = current;
 
 	task_lock(tsk);
-	tsk->flags |= PF_BORROWED_MM;
 	active_mm = tsk->active_mm;
 	atomic_inc(&mm->mm_count);
 	tsk->mm = mm;
@@ -610,7 +609,6 @@ static void unuse_mm(struct mm_struct *mm)
 	struct task_struct *tsk = current;
 
 	task_lock(tsk);
-	tsk->flags &= ~PF_BORROWED_MM;
 	tsk->mm = NULL;
 	/* active_mm is still 'mm' */
 	enter_lazy_tlb(mm, tsk);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eec64a4adb9d..0560999eb1db 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1483,7 +1483,6 @@ static inline void put_task_struct(struct task_struct *t)
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
-#define PF_KTHREAD	0x00000020	/* I am a kernel thread */
 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
 #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
 #define PF_DUMPCORE	0x00000200	/* dumped core */
@@ -1497,7 +1496,7 @@ static inline void put_task_struct(struct task_struct *t)
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
 #define PF_SWAPOFF	0x00080000	/* I am in swapoff */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
-#define PF_BORROWED_MM	0x00200000	/* I am a kthread doing use_mm */
+#define PF_KTHREAD	0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
 #define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
 #define PF_SPREAD_PAGE	0x01000000	/* Spread page cache over cpuset */
diff --git a/kernel/fork.c b/kernel/fork.c
index 228f80c9155a..eeaec6893b0d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -474,7 +474,7 @@ EXPORT_SYMBOL_GPL(mmput);
 /**
  * get_task_mm - acquire a reference to the task's mm
  *
- * Returns %NULL if the task has no mm.  Checks PF_BORROWED_MM (meaning
+ * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
  * this kernel workthread has transiently adopted a user mm with use_mm,
  * to do its AIO) is not set and if so returns a reference to it, after
  * bumping up the use count.  User must release the mm via mmput()
@@ -487,7 +487,7 @@ struct mm_struct *get_task_mm(struct task_struct *task)
 	task_lock(task);
 	mm = task->mm;
 	if (mm) {
-		if (task->flags & PF_BORROWED_MM)
+		if (task->flags & PF_KTHREAD)
 			mm = NULL;
 		else
 			atomic_inc(&mm->mm_users);
-- 
cgit v1.2.3


From 32ecb1f26dd50eeaac4e3f4dea4541c97848e459 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:41 -0700
Subject: coredump: turn mm->core_startup_done into the pointer to struct
 core_state

mm->core_startup_done points to "struct completion startup_done" allocated
on the coredump_wait()'s stack.  Introduce the new structure, core_state,
which holds this "struct completion".  This way we can add more info
visible to the threads participating in coredump without enlarging
mm_struct.

No changes in affected .o files.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 8 ++++----
 include/linux/mm_types.h | 7 ++++++-
 kernel/exit.c            | 2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index e347e6ed1617..71734568f018 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1597,13 +1597,13 @@ static int coredump_wait(int exit_code)
 {
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
-	struct completion startup_done;
+	struct core_state core_state;
 	struct completion *vfork_done;
 	int core_waiters;
 
 	init_completion(&mm->core_done);
-	init_completion(&startup_done);
-	mm->core_startup_done = &startup_done;
+	init_completion(&core_state.startup);
+	mm->core_state = &core_state;
 
 	core_waiters = zap_threads(tsk, mm, exit_code);
 	up_write(&mm->mmap_sem);
@@ -1622,7 +1622,7 @@ static int coredump_wait(int exit_code)
 	}
 
 	if (core_waiters)
-		wait_for_completion(&startup_done);
+		wait_for_completion(&core_state.startup);
 fail:
 	BUG_ON(mm->core_waiters);
 	return core_waiters;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 02a27ae78539..97819efd2333 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -159,6 +159,10 @@ struct vm_area_struct {
 #endif
 };
 
+struct core_state {
+	struct completion startup;
+};
+
 struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
@@ -220,7 +224,8 @@ struct mm_struct {
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
 	/* coredumping support */
-	struct completion *core_startup_done, core_done;
+	struct core_state *core_state;
+	struct completion core_done;
 
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;	/* aio lock */
diff --git a/kernel/exit.c b/kernel/exit.c
index 28a44a2612dc..f7fa21dbced4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -680,7 +680,7 @@ static void exit_mm(struct task_struct * tsk)
 		up_read(&mm->mmap_sem);
 		down_write(&mm->mmap_sem);
 		if (!--mm->core_waiters)
-			complete(mm->core_startup_done);
+			complete(&mm->core_state->startup);
 		up_write(&mm->mmap_sem);
 
 		wait_for_completion(&mm->core_done);
-- 
cgit v1.2.3


From 999d9fc1670bc082928b93b11d1f2e0e417d973c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:41 -0700
Subject: coredump: move mm->core_waiters into struct core_state

Move mm->core_waiters into "struct core_state" allocated on stack.  This
shrinks mm_struct a little bit and allows further changes.

This patch mostly does s/core_waiters/core_state.  The only essential
change is that coredump_wait() must clear mm->core_state before return.

The coredump_wait()'s path is uglified and .text grows by 30 bytes, this
is fixed by the next patch.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 21 +++++++++++----------
 include/linux/mm_types.h |  2 +-
 kernel/exit.c            |  8 ++++----
 kernel/fork.c            |  2 +-
 kernel/signal.c          |  4 ++--
 5 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 71734568f018..50de3aaff4d0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -722,12 +722,10 @@ static int exec_mmap(struct mm_struct *mm)
 		 * Make sure that if there is a core dump in progress
 		 * for the old mm, we get out and die instead of going
 		 * through with the exec.  We must hold mmap_sem around
-		 * checking core_waiters and changing tsk->mm.  The
-		 * core-inducing thread will increment core_waiters for
-		 * each thread whose ->mm == old_mm.
+		 * checking core_state and changing tsk->mm.
 		 */
 		down_read(&old_mm->mmap_sem);
-		if (unlikely(old_mm->core_waiters)) {
+		if (unlikely(old_mm->core_state)) {
 			up_read(&old_mm->mmap_sem);
 			return -EINTR;
 		}
@@ -1514,7 +1512,7 @@ static void zap_process(struct task_struct *start)
 	t = start;
 	do {
 		if (t != current && t->mm) {
-			t->mm->core_waiters++;
+			t->mm->core_state->nr_threads++;
 			sigaddset(&t->pending.signal, SIGKILL);
 			signal_wake_up(t, 1);
 		}
@@ -1538,11 +1536,11 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	if (err)
 		return err;
 
-	if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
+	if (atomic_read(&mm->mm_users) == mm->core_state->nr_threads + 1)
 		goto done;
 	/*
 	 * We should find and kill all tasks which use this mm, and we should
-	 * count them correctly into mm->core_waiters. We don't take tasklist
+	 * count them correctly into ->nr_threads. We don't take tasklist
 	 * lock, but this is safe wrt:
 	 *
 	 * fork:
@@ -1590,7 +1588,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	}
 	rcu_read_unlock();
 done:
-	return mm->core_waiters;
+	return mm->core_state->nr_threads;
 }
 
 static int coredump_wait(int exit_code)
@@ -1603,9 +1601,12 @@ static int coredump_wait(int exit_code)
 
 	init_completion(&mm->core_done);
 	init_completion(&core_state.startup);
+	core_state.nr_threads = 0;
 	mm->core_state = &core_state;
 
 	core_waiters = zap_threads(tsk, mm, exit_code);
+	if (core_waiters < 0)
+		mm->core_state = NULL;
 	up_write(&mm->mmap_sem);
 
 	if (unlikely(core_waiters < 0))
@@ -1623,8 +1624,8 @@ static int coredump_wait(int exit_code)
 
 	if (core_waiters)
 		wait_for_completion(&core_state.startup);
+	mm->core_state = NULL;
 fail:
-	BUG_ON(mm->core_waiters);
 	return core_waiters;
 }
 
@@ -1702,7 +1703,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	/*
 	 * If another thread got here first, or we are not dumpable, bail out.
 	 */
-	if (mm->core_waiters || !get_dumpable(mm)) {
+	if (mm->core_state || !get_dumpable(mm)) {
 		up_write(&mm->mmap_sem);
 		goto fail;
 	}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 97819efd2333..c0b1747b61a5 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -160,6 +160,7 @@ struct vm_area_struct {
 };
 
 struct core_state {
+	int nr_threads;
 	struct completion startup;
 };
 
@@ -179,7 +180,6 @@ struct mm_struct {
 	atomic_t mm_users;			/* How many users with user space? */
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
 	int map_count;				/* number of VMAs */
-	int core_waiters;
 	struct rw_semaphore mmap_sem;
 	spinlock_t page_table_lock;		/* Protects page tables and some counters */
 
diff --git a/kernel/exit.c b/kernel/exit.c
index f7fa21dbced4..988e232254e9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -670,16 +670,16 @@ static void exit_mm(struct task_struct * tsk)
 		return;
 	/*
 	 * Serialize with any possible pending coredump.
-	 * We must hold mmap_sem around checking core_waiters
+	 * We must hold mmap_sem around checking core_state
 	 * and clearing tsk->mm.  The core-inducing thread
-	 * will increment core_waiters for each thread in the
+	 * will increment ->nr_threads for each thread in the
 	 * group with ->mm != NULL.
 	 */
 	down_read(&mm->mmap_sem);
-	if (mm->core_waiters) {
+	if (mm->core_state) {
 		up_read(&mm->mmap_sem);
 		down_write(&mm->mmap_sem);
-		if (!--mm->core_waiters)
+		if (!--mm->core_state->nr_threads)
 			complete(&mm->core_state->startup);
 		up_write(&mm->mmap_sem);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index eeaec6893b0d..813d5c89b9d5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -400,7 +400,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 	INIT_LIST_HEAD(&mm->mmlist);
 	mm->flags = (current->mm) ? current->mm->flags
 				  : MMF_DUMP_FILTER_DEFAULT;
-	mm->core_waiters = 0;
+	mm->core_state = NULL;
 	mm->nr_ptes = 0;
 	set_mm_counter(mm, file_rss, 0);
 	set_mm_counter(mm, anon_rss, 0);
diff --git a/kernel/signal.c b/kernel/signal.c
index 39c1706edf03..5c7b7eaa0dc6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1480,10 +1480,10 @@ static inline int may_ptrace_stop(void)
 	 * is a deadlock situation, and pointless because our tracer
 	 * is dead so don't allow us to stop.
 	 * If SIGKILL was already sent before the caller unlocked
-	 * ->siglock we must see ->core_waiters != 0. Otherwise it
+	 * ->siglock we must see ->core_state != NULL. Otherwise it
 	 * is safe to enter schedule().
 	 */
-	if (unlikely(current->mm->core_waiters) &&
+	if (unlikely(current->mm->core_state) &&
 	    unlikely(current->mm == current->parent->mm))
 		return 0;
 
-- 
cgit v1.2.3


From c5f1cc8c1828486a61ab3e575da6e2c62b34d399 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:42 -0700
Subject: coredump: turn core_state->nr_threads into atomic_t

Turn core_state->nr_threads into atomic_t and kill now unneeded
down_write(&mm->mmap_sem) in exit_mm().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 2 +-
 include/linux/mm_types.h | 2 +-
 kernel/exit.c            | 5 ++---
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index c74bb34eeeff..15d493fe8aa3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1591,7 +1591,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	}
 	rcu_read_unlock();
 done:
-	core_state->nr_threads = nr;
+	atomic_set(&core_state->nr_threads, nr);
 	return nr;
 }
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c0b1747b61a5..ae99a28ba6ae 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -160,7 +160,7 @@ struct vm_area_struct {
 };
 
 struct core_state {
-	int nr_threads;
+	atomic_t nr_threads;
 	struct completion startup;
 };
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 988e232254e9..63d82957baae 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -678,10 +678,9 @@ static void exit_mm(struct task_struct * tsk)
 	down_read(&mm->mmap_sem);
 	if (mm->core_state) {
 		up_read(&mm->mmap_sem);
-		down_write(&mm->mmap_sem);
-		if (!--mm->core_state->nr_threads)
+
+		if (atomic_dec_and_test(&mm->core_state->nr_threads))
 			complete(&mm->core_state->startup);
-		up_write(&mm->mmap_sem);
 
 		wait_for_completion(&mm->core_done);
 		down_read(&mm->mmap_sem);
-- 
cgit v1.2.3


From b564daf806d492dd4f7afe9b6c83b8d35d137669 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:44 -0700
Subject: coredump: construct the list of coredumping threads at startup time

binfmt->core_dump() has to iterate over the all threads in system in order
to find the coredumping threads and construct the list using the
GFP_ATOMIC allocations.

With this patch each thread allocates the list node on exit_mm()'s stack and
adds itself to the list.

This allows us to do further changes:

	- simplify ->core_dump()

	- change exit_mm() to clear ->mm first, then wait for ->core_done.
	  this makes the coredumping process visible to oom_kill

	- kill mm->core_done

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                |  2 ++
 include/linux/mm_types.h |  6 ++++++
 kernel/exit.c            | 15 ++++++++++++---
 3 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index b8ee842d93cd..fe2873b8037f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1604,6 +1604,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 
 	init_completion(&mm->core_done);
 	init_completion(&core_state->startup);
+	core_state->dumper.task = tsk;
+	core_state->dumper.next = NULL;
 	core_waiters = zap_threads(tsk, mm, core_state, exit_code);
 	up_write(&mm->mmap_sem);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ae99a28ba6ae..4d0d0abc79fe 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -159,8 +159,14 @@ struct vm_area_struct {
 #endif
 };
 
+struct core_thread {
+	struct task_struct *task;
+	struct core_thread *next;
+};
+
 struct core_state {
 	atomic_t nr_threads;
+	struct core_thread dumper;
 	struct completion startup;
 };
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 63d82957baae..b66f0d55c791 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -664,6 +664,7 @@ assign_new_owner:
 static void exit_mm(struct task_struct * tsk)
 {
 	struct mm_struct *mm = tsk->mm;
+	struct core_state *core_state;
 
 	mm_release(tsk, mm);
 	if (!mm)
@@ -676,11 +677,19 @@ static void exit_mm(struct task_struct * tsk)
 	 * group with ->mm != NULL.
 	 */
 	down_read(&mm->mmap_sem);
-	if (mm->core_state) {
+	core_state = mm->core_state;
+	if (core_state) {
+		struct core_thread self;
 		up_read(&mm->mmap_sem);
 
-		if (atomic_dec_and_test(&mm->core_state->nr_threads))
-			complete(&mm->core_state->startup);
+		self.task = tsk;
+		self.next = xchg(&core_state->dumper.next, &self);
+		/*
+		 * Implies mb(), the result of xchg() must be visible
+		 * to core_state->dumper.
+		 */
+		if (atomic_dec_and_test(&core_state->nr_threads))
+			complete(&core_state->startup);
 
 		wait_for_completion(&mm->core_done);
 		down_read(&mm->mmap_sem);
-- 
cgit v1.2.3


From a94e2d408eaedbd85aae259621d46fafc10479a2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:46 -0700
Subject: coredump: kill mm->core_done

Now that we have core_state->dumper list we can use it to wake up the
sub-threads waiting for the coredump completion.

This uglifies the code and .text grows by 47 bytes, but otoh mm_struct
lessens by sizeof(struct completion).  Also, with this change we can
decouple exit_mm() from the coredumping code.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 25 ++++++++++++++++++++++---
 include/linux/mm_types.h |  4 +---
 kernel/exit.c            |  8 +++++++-
 3 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index fe2873b8037f..bff43aeb235e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1602,7 +1602,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 	struct completion *vfork_done;
 	int core_waiters;
 
-	init_completion(&mm->core_done);
 	init_completion(&core_state->startup);
 	core_state->dumper.task = tsk;
 	core_state->dumper.next = NULL;
@@ -1628,6 +1627,27 @@ fail:
 	return core_waiters;
 }
 
+static void coredump_finish(struct mm_struct *mm)
+{
+	struct core_thread *curr, *next;
+	struct task_struct *task;
+
+	next = mm->core_state->dumper.next;
+	while ((curr = next) != NULL) {
+		next = curr->next;
+		task = curr->task;
+		/*
+		 * see exit_mm(), curr->task must not see
+		 * ->task == NULL before we read ->next.
+		 */
+		smp_mb();
+		curr->task = NULL;
+		wake_up_process(task);
+	}
+
+	mm->core_state = NULL;
+}
+
 /*
  * set_dumpable converts traditional three-value dumpable to two flags and
  * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
@@ -1812,8 +1832,7 @@ fail_unlock:
 		argv_free(helper_argv);
 
 	current->fsuid = fsuid;
-	complete_all(&mm->core_done);
-	mm->core_state = NULL;
+	coredump_finish(mm);
 fail:
 	return retval;
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4d0d0abc79fe..746f975b58ef 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -229,9 +229,7 @@ struct mm_struct {
 
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
-	/* coredumping support */
-	struct core_state *core_state;
-	struct completion core_done;
+	struct core_state *core_state; /* coredumping support */
 
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;	/* aio lock */
diff --git a/kernel/exit.c b/kernel/exit.c
index b66f0d55c791..8a4d4d12e294 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -691,7 +691,13 @@ static void exit_mm(struct task_struct * tsk)
 		if (atomic_dec_and_test(&core_state->nr_threads))
 			complete(&core_state->startup);
 
-		wait_for_completion(&mm->core_done);
+		for (;;) {
+			set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+			if (!self.task) /* see coredump_finish() */
+				break;
+			schedule();
+		}
+		__set_task_state(tsk, TASK_RUNNING);
 		down_read(&mm->mmap_sem);
 	}
 	atomic_inc(&mm->mm_count);
-- 
cgit v1.2.3


From db700897224b5ebdf852f2d38920ce428940d059 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:49 -0700
Subject: workqueues: implement flush_work()

Most of users of flush_workqueue() can be changed to use cancel_work_sync(),
but sometimes we really need to wait for the completion and cancelling is not
an option. schedule_on_each_cpu() is good example.

Add the new helper, flush_work(work), which waits for the completion of the
specific work_struct. More precisely, it "flushes" the result of of the last
queue_work() which is visible to the caller.

For example, this code

	queue_work(wq, work);
	/* WINDOW */
	queue_work(wq, work);

	flush_work(work);

doesn't necessary work "as expected". What can happen in the WINDOW above is

	- wq starts the execution of work->func()

	- the caller migrates to another CPU

now, after the 2nd queue_work() this work is active on the previous CPU, and
at the same time it is queued on another. In this case flush_work(work) may
return before the first work->func() completes.

It is trivial to add another helper

	int flush_work_sync(struct work_struct *work)
	{
		return flush_work(work) || wait_on_work(work);
	}

which works "more correctly", but it has to iterate over all CPUs and thus
it much slower than flush_work().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Max Krasnyansky <maxk@qualcomm.com>
Acked-by: Jarek Poplawski <jarkao2@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h |  2 ++
 kernel/workqueue.c        | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 14d47120682b..5c158c477ac7 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -201,6 +201,8 @@ extern int keventd_up(void);
 extern void init_workqueues(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
+extern int flush_work(struct work_struct *work);
+
 extern int cancel_work_sync(struct work_struct *work);
 
 /*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d9a2d65cc63e..ee41cf857d55 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -423,6 +423,52 @@ void flush_workqueue(struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
+/**
+ * flush_work - block until a work_struct's callback has terminated
+ * @work: the work which is to be flushed
+ *
+ * It is expected that, prior to calling flush_work(), the caller has
+ * arranged for the work to not be requeued, otherwise it doesn't make
+ * sense to use this function.
+ */
+int flush_work(struct work_struct *work)
+{
+	struct cpu_workqueue_struct *cwq;
+	struct list_head *prev;
+	struct wq_barrier barr;
+
+	might_sleep();
+	cwq = get_wq_data(work);
+	if (!cwq)
+		return 0;
+
+	prev = NULL;
+	spin_lock_irq(&cwq->lock);
+	if (!list_empty(&work->entry)) {
+		/*
+		 * See the comment near try_to_grab_pending()->smp_rmb().
+		 * If it was re-queued under us we are not going to wait.
+		 */
+		smp_rmb();
+		if (unlikely(cwq != get_wq_data(work)))
+			goto out;
+		prev = &work->entry;
+	} else {
+		if (cwq->current_work != work)
+			goto out;
+		prev = &cwq->worklist;
+	}
+	insert_wq_barrier(cwq, &barr, prev->next);
+out:
+	spin_unlock_irq(&cwq->lock);
+	if (!prev)
+		return 0;
+
+	wait_for_completion(&barr.done);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(flush_work);
+
 /*
  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
  * so this work can't be re-armed in any way.
-- 
cgit v1.2.3


From 3da1c84c00c7e5fa8348336bd8c342f9128b0f14 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:50 -0700
Subject: workqueues: make get_online_cpus() useable for work->func()

workqueue_cpu_callback(CPU_DEAD) flushes cwq->thread under
cpu_maps_update_begin().  This means that the multithreaded workqueues
can't use get_online_cpus() due to the possible deadlock, very bad and
very old problem.

Introduce the new state, CPU_POST_DEAD, which is called after
cpu_hotplug_done() but before cpu_maps_update_done().

Change workqueue_cpu_callback() to use CPU_POST_DEAD instead of CPU_DEAD.
This means that create/destroy functions can't rely on get_online_cpus()
any longer and should take cpu_add_remove_lock instead.

[akpm@linux-foundation.org: fix CONFIG_SMP=n]
Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu.h      | 15 +++++++++++----
 include/linux/notifier.h |  2 ++
 kernel/cpu.c             |  5 +++++
 kernel/workqueue.c       | 18 +++++++++---------
 4 files changed, 27 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7464ba3b4333..d7faf8808497 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -69,10 +69,11 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb)
 #endif
 
 int cpu_up(unsigned int cpu);
-
 extern void cpu_hotplug_init(void);
+extern void cpu_maps_update_begin(void);
+extern void cpu_maps_update_done(void);
 
-#else
+#else	/* CONFIG_SMP */
 
 static inline int register_cpu_notifier(struct notifier_block *nb)
 {
@@ -87,10 +88,16 @@ static inline void cpu_hotplug_init(void)
 {
 }
 
+static inline void cpu_maps_update_begin(void)
+{
+}
+
+static inline void cpu_maps_update_done(void)
+{
+}
+
 #endif /* CONFIG_SMP */
 extern struct sysdev_class cpu_sysdev_class;
-extern void cpu_maps_update_begin(void);
-extern void cpu_maps_update_done(void);
 
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index bd3d72ddf333..da2698b0fdd1 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -214,6 +214,8 @@ static inline int notifier_to_errno(int ret)
 #define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
 #define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
 				        * not handling interrupts, soon dead */
+#define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
+					* lock is dropped */
 
 /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
  * operation in progress
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2cc409ce0a8f..10ba5f1004a5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -285,6 +285,11 @@ out_allowed:
 	set_cpus_allowed_ptr(current, &old_allowed);
 out_release:
 	cpu_hotplug_done();
+	if (!err) {
+		if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
+					    hcpu) == NOTIFY_BAD)
+			BUG();
+	}
 	return err;
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5fbffd302eb5..828e58230cbc 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -828,7 +828,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		err = create_workqueue_thread(cwq, singlethread_cpu);
 		start_workqueue_thread(cwq, -1);
 	} else {
-		get_online_cpus();
+		cpu_maps_update_begin();
 		spin_lock(&workqueue_lock);
 		list_add(&wq->list, &workqueues);
 		spin_unlock(&workqueue_lock);
@@ -840,7 +840,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 			err = create_workqueue_thread(cwq, cpu);
 			start_workqueue_thread(cwq, cpu);
 		}
-		put_online_cpus();
+		cpu_maps_update_done();
 	}
 
 	if (err) {
@@ -854,8 +854,8 @@ EXPORT_SYMBOL_GPL(__create_workqueue_key);
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 {
 	/*
-	 * Our caller is either destroy_workqueue() or CPU_DEAD,
-	 * get_online_cpus() protects cwq->thread.
+	 * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
+	 * cpu_add_remove_lock protects cwq->thread.
 	 */
 	if (cwq->thread == NULL)
 		return;
@@ -865,7 +865,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 
 	flush_cpu_workqueue(cwq);
 	/*
-	 * If the caller is CPU_DEAD and cwq->worklist was not empty,
+	 * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
 	 * a concurrent flush_workqueue() can insert a barrier after us.
 	 * However, in that case run_workqueue() won't return and check
 	 * kthread_should_stop() until it flushes all work_struct's.
@@ -889,14 +889,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	const cpumask_t *cpu_map = wq_cpu_map(wq);
 	int cpu;
 
-	get_online_cpus();
+	cpu_maps_update_begin();
 	spin_lock(&workqueue_lock);
 	list_del(&wq->list);
 	spin_unlock(&workqueue_lock);
 
 	for_each_cpu_mask_nr(cpu, *cpu_map)
 		cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
-	put_online_cpus();
+ 	cpu_maps_update_done();
 
 	free_percpu(wq->cpu_wq);
 	kfree(wq);
@@ -935,7 +935,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
 		case CPU_UP_CANCELED:
 			start_workqueue_thread(cwq, -1);
-		case CPU_DEAD:
+		case CPU_POST_DEAD:
 			cleanup_workqueue_thread(cwq);
 			break;
 		}
@@ -943,7 +943,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
 	switch (action) {
 	case CPU_UP_CANCELED:
-	case CPU_DEAD:
+	case CPU_POST_DEAD:
 		cpu_clear(cpu, cpu_populated_map);
 	}
 
-- 
cgit v1.2.3


From 95b68dec0d52c7b8fea3698b3938cf3ab936436b Mon Sep 17 00:00:00 2001
From: Chandru <chandru@in.ibm.com>
Date: Fri, 25 Jul 2008 01:47:55 -0700
Subject: calgary iommu: use the first kernels TCE tables in kdump

kdump kernel fails to boot with calgary iommu and aacraid driver on a x366
box.  The ongoing dma's of aacraid from the first kernel continue to exist
until the driver is loaded in the kdump kernel.  Calgary is initialized
prior to aacraid and creation of new tce tables causes wrong dma's to
occur.  Here we try to get the tce tables of the first kernel in kdump
kernel and use them.  While in the kdump kernel we do not allocate new tce
tables but instead read the base address register contents of calgary
iommu and use the tables that the registers point to.  With these changes
the kdump kernel and hence aacraid now boots normally.

Signed-off-by: Chandru Siddalingappa <chandru@in.ibm.com>
Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/pci-calgary_64.c | 85 +++++++++++++++++++++++++++++++++++++---
 include/linux/crash_dump.h       |  8 ++++
 2 files changed, 87 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 151f2d171f7c..19e7fc7c2c4f 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -29,6 +29,7 @@
 #include <linux/mm.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
+#include <linux/crash_dump.h>
 #include <linux/dma-mapping.h>
 #include <linux/bitops.h>
 #include <linux/pci_ids.h>
@@ -167,6 +168,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl);
 static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
 static void calioc2_tce_cache_blast(struct iommu_table *tbl);
 static void calioc2_dump_error_regs(struct iommu_table *tbl);
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
+static void get_tce_space_from_tar(void);
 
 static struct cal_chipset_ops calgary_chip_ops = {
 	.handle_quirks = calgary_handle_quirks,
@@ -830,7 +833,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
 
 	tbl = pci_iommu(dev->bus);
 	tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
-	tce_free(tbl, 0, tbl->it_size);
+
+	if (is_kdump_kernel())
+		calgary_init_bitmap_from_tce_table(tbl);
+	else
+		tce_free(tbl, 0, tbl->it_size);
 
 	if (is_calgary(dev->device))
 		tbl->chip_ops = &calgary_chip_ops;
@@ -1209,6 +1216,10 @@ static int __init calgary_init(void)
 	if (ret)
 		return ret;
 
+	/* Purely for kdump kernel case */
+	if (is_kdump_kernel())
+		get_tce_space_from_tar();
+
 	do {
 		dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
 		if (!dev)
@@ -1339,6 +1350,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
 	return (val != 0xffffffff);
 }
 
+/*
+ * calgary_init_bitmap_from_tce_table():
+ * Funtion for kdump case. In the second/kdump kernel initialize
+ * the bitmap based on the tce table entries obtained from first kernel
+ */
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
+{
+	u64 *tp;
+	unsigned int index;
+	tp = ((u64 *)tbl->it_base);
+	for (index = 0 ; index < tbl->it_size; index++) {
+		if (*tp != 0x0)
+			set_bit(index, tbl->it_map);
+		tp++;
+	}
+}
+
+/*
+ * get_tce_space_from_tar():
+ * Function for kdump case. Get the tce tables from first kernel
+ * by reading the contents of the base adress register of calgary iommu
+ */
+static void get_tce_space_from_tar()
+{
+	int bus;
+	void __iomem *target;
+	unsigned long tce_space;
+
+	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+		struct calgary_bus_info *info = &bus_info[bus];
+		unsigned short pci_device;
+		u32 val;
+
+		val = read_pci_config(bus, 0, 0, 0);
+		pci_device = (val & 0xFFFF0000) >> 16;
+
+		if (!is_cal_pci_dev(pci_device))
+			continue;
+		if (info->translation_disabled)
+			continue;
+
+		if (calgary_bus_has_devices(bus, pci_device) ||
+						translate_empty_slots) {
+			target = calgary_reg(bus_info[bus].bbar,
+						tar_offset(bus));
+			tce_space = be64_to_cpu(readq(target));
+			tce_space = tce_space & TAR_SW_BITS;
+
+			tce_space = tce_space & (~specified_table_size);
+			info->tce_space = (u64 *)__va(tce_space);
+		}
+	}
+	return;
+}
+
 void __init detect_calgary(void)
 {
 	int bus;
@@ -1394,7 +1460,8 @@ void __init detect_calgary(void)
 		return;
 	}
 
-	specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE);
+	specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
+					saved_max_pfn : max_pfn) * PAGE_SIZE);
 
 	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
 		struct calgary_bus_info *info = &bus_info[bus];
@@ -1412,10 +1479,16 @@ void __init detect_calgary(void)
 
 		if (calgary_bus_has_devices(bus, pci_device) ||
 		    translate_empty_slots) {
-			tbl = alloc_tce_table();
-			if (!tbl)
-				goto cleanup;
-			info->tce_space = tbl;
+			/*
+			 * If it is kdump kernel, find and use tce tables
+			 * from first kernel, else allocate tce tables here
+			 */
+			if (!is_kdump_kernel()) {
+				tbl = alloc_tce_table();
+				if (!tbl)
+					goto cleanup;
+				info->tce_space = tbl;
+			}
 			calgary_found = 1;
 		}
 	}
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 22c7ac5cd80c..6cd39a927e1f 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -22,5 +22,13 @@ extern struct proc_dir_entry *proc_vmcore;
 
 #define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
 
+static inline int is_kdump_kernel(void)
+{
+	return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
+}
+#else /* !CONFIG_CRASH_DUMP */
+static inline int is_kdump_kernel(void) { return 0; }
 #endif /* CONFIG_CRASH_DUMP */
+
+extern unsigned long saved_max_pfn;
 #endif /* LINUX_CRASHDUMP_H */
-- 
cgit v1.2.3


From 2027d1abc25ff770cc3bc936abd33570ce85d85a Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:47:57 -0700
Subject: idr: change the idr structure

After scalability problems have been detected when using the sysV ipcs, I have
proposed to use an RCU based implementation of the IDR api instead (see
threads http://lkml.org/lkml/2008/4/11/212 and
http://lkml.org/lkml/2008/4/29/295).

This resulted in many people asking to convert the idr API and make it rcu
safe (because most of the code was duplicated and thus unmaintanable and
unreviewable).

So here is a first attempt.

The important change wrt to the idr API itself is during idr removes: idr
layers are freed after a grace period, instead of being moved to the free
list.

The important change wrt to ipcs, is that idr_find() can now be called
locklessly inside a rcu read critical section.

Here are the results I've got for the pmsg test sent by Manfred:

   2.6.25-rc3-mm1   2.6.25-rc3-mm1+   2.6.25-mm1   Patched 2.6.25-mm1
1         1168441           1064021       876000               947488
2         1094264            921059      1549592              1730685
3         2082520           1738165      1694370              2324880
4         2079929           1695521       404553              2400408
5         2898758            406566       391283              3246580
6         2921417            261275       263249              3752148
7         3308761            126056       191742              4243142
8         3329456            100129       141722              4275780

1st column: stock 2.6.25-rc3-mm1
2nd column: 2.6.25-rc3-mm1 + ipc patches (store ipcs into idrs)
3nd column: stock 2.6.25-mm1
4th column: 2.6.25-mm1 + this pacth series.

This patch:

Add an rcu_head to the idr_layer structure in order to free it after a grace
period.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 9a2d762124de..1af61d23be36 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include <linux/init.h>
+#include <linux/rcupdate.h>
 
 #if BITS_PER_LONG == 32
 # define IDR_BITS 5
@@ -51,6 +52,7 @@ struct idr_layer {
 	unsigned long		 bitmap; /* A zero bit means "space here" */
 	struct idr_layer	*ary[1<<IDR_BITS];
 	int			 count;	 /* When zero, we can release it */
+	struct rcu_head		 rcu_head;
 };
 
 struct idr {
-- 
cgit v1.2.3


From 944ca05c7b4972f2ebf37262e0f4933d178ad6db Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:47:59 -0700
Subject: idr: error checking factorization

Do some code factorization in the return code analysis.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h |  6 ++++++
 lib/idr.c           | 30 +++++++++---------------------
 2 files changed, 15 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 1af61d23be36..762c3f2c631d 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -73,6 +73,12 @@ struct idr {
 }
 #define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
 
+/* Actions to be taken after a call to _idr_sub_alloc */
+#define IDR_NEED_TO_GROW -2
+#define IDR_NOMORE_SPACE -3
+
+#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
+
 /*
  * This is what we export.
  */
diff --git a/lib/idr.c b/lib/idr.c
index 9d905b131ecb..80ba06f29d36 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -143,7 +143,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			/* if already at the top layer, we need to grow */
 			if (!(p = pa[l])) {
 				*starting_id = id;
-				return -2;
+				return IDR_NEED_TO_GROW;
 			}
 
 			/* If we need to go up one layer, continue the
@@ -160,7 +160,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			id = ((id >> sh) ^ n ^ m) << sh;
 		}
 		if ((id >= MAX_ID_BIT) || (id < 0))
-			return -3;
+			return IDR_NOMORE_SPACE;
 		if (l == 0)
 			break;
 		/*
@@ -229,7 +229,7 @@ build_up:
 	idp->top = p;
 	idp->layers = layers;
 	v = sub_alloc(idp, &id, pa);
-	if (v == -2)
+	if (v == IDR_NEED_TO_GROW)
 		goto build_up;
 	return(v);
 }
@@ -278,12 +278,8 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
 	 * This is a cheap hack until the IDR code can be fixed to
 	 * return proper error values.
 	 */
-	if (rv < 0) {
-		if (rv == -1)
-			return -EAGAIN;
-		else /* Will be -3 */
-			return -ENOSPC;
-	}
+	if (rv < 0)
+		return _idr_rc_to_errno(rv);
 	*id = rv;
 	return 0;
 }
@@ -313,12 +309,8 @@ int idr_get_new(struct idr *idp, void *ptr, int *id)
 	 * This is a cheap hack until the IDR code can be fixed to
 	 * return proper error values.
 	 */
-	if (rv < 0) {
-		if (rv == -1)
-			return -EAGAIN;
-		else /* Will be -3 */
-			return -ENOSPC;
-	}
+	if (rv < 0)
+		return _idr_rc_to_errno(rv);
 	*id = rv;
 	return 0;
 }
@@ -696,12 +688,8 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
  restart:
 	/* get vacant slot */
 	t = idr_get_empty_slot(&ida->idr, idr_id, pa);
-	if (t < 0) {
-		if (t == -1)
-			return -EAGAIN;
-		else /* will be -3 */
-			return -ENOSPC;
-	}
+	if (t < 0)
+		return _idr_rc_to_errno(t);
 
 	if (t * IDA_BITMAP_BITS >= MAX_ID_BIT)
 		return -ENOSPC;
-- 
cgit v1.2.3


From f9c46d6ea5ce138a886c3a0f10a46130afab75f5 Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:01 -0700
Subject: idr: make idr_find rcu-safe

Make idr_find rcu-safe: it can now be called inside an rcu_read critical
section.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Reviewed-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jim Houston <jim.houston@comcast.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 16 ++++++++++++++++
 lib/idr.c           | 11 ++++++-----
 2 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 762c3f2c631d..fa035f96f2a3 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -79,6 +79,22 @@ struct idr {
 
 #define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
 
+/**
+ * idr synchronization (stolen from radix-tree.h)
+ *
+ * idr_find() is able to be called locklessly, using RCU. The caller must
+ * ensure calls to this function are made within rcu_read_lock() regions.
+ * Other readers (lock-free or otherwise) and modifications may be running
+ * concurrently.
+ *
+ * It is still required that the caller manage the synchronization and
+ * lifetimes of the items. So if RCU lock-free lookups are used, typically
+ * this would mean that the items have their own locks, or are amenable to
+ * lock-free access; and that the items are freed by RCU (or only freed after
+ * having been deleted from the idr tree *and* a synchronize_rcu() grace
+ * period).
+ */
+
 /*
  * This is what we export.
  */
diff --git a/lib/idr.c b/lib/idr.c
index 44ab3b2a4eba..21e12af1f231 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -456,7 +456,8 @@ EXPORT_SYMBOL(idr_destroy);
  * return indicates that @id is not valid or you passed %NULL in
  * idr_get_new().
  *
- * The caller must serialize idr_find() vs idr_get_new() and idr_remove().
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
  */
 void *idr_find(struct idr *idp, int id)
 {
@@ -464,7 +465,7 @@ void *idr_find(struct idr *idp, int id)
 	struct idr_layer *p;
 
 	n = idp->layers * IDR_BITS;
-	p = idp->top;
+	p = rcu_dereference(idp->top);
 
 	/* Mask off upper bits we don't use for the search. */
 	id &= MAX_ID_MASK;
@@ -474,7 +475,7 @@ void *idr_find(struct idr *idp, int id)
 
 	while (n > 0 && p) {
 		n -= IDR_BITS;
-		p = p->ary[(id >> n) & IDR_MASK];
+		p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
 	}
 	return((void *)p);
 }
@@ -507,7 +508,7 @@ int idr_for_each(struct idr *idp,
 	struct idr_layer **paa = &pa[0];
 
 	n = idp->layers * IDR_BITS;
-	p = idp->top;
+	p = rcu_dereference(idp->top);
 	max = 1 << n;
 
 	id = 0;
@@ -515,7 +516,7 @@ int idr_for_each(struct idr *idp,
 		while (n > 0 && p) {
 			n -= IDR_BITS;
 			*paa++ = p;
-			p = p->ary[(id >> n) & IDR_MASK];
+			p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
 		}
 
 		if (p) {
-- 
cgit v1.2.3


From 4daa28f6d8f5cda8ea0f55048e3c8811c384cbdd Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 25 Jul 2008 01:48:04 -0700
Subject: ipc/sem.c: convert undo structures to struct list_head

The undo structures contain two linked lists, the attached patch replaces
them with generic struct list_head lists.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h |  12 ++--
 ipc/sem.c           | 163 ++++++++++++++++++++++++++++------------------------
 2 files changed, 95 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sem.h b/include/linux/sem.h
index c8eaad9e4b72..6a1af1b49a13 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -95,7 +95,7 @@ struct sem_array {
 	struct sem		*sem_base;	/* ptr to first semaphore in array */
 	struct sem_queue	*sem_pending;	/* pending operations to be processed */
 	struct sem_queue	**sem_pending_last; /* last pending operation */
-	struct sem_undo		*undo;		/* undo requests on this array */
+	struct list_head	list_id;	/* undo requests on this array */
 	unsigned long		sem_nsems;	/* no. of semaphores in array */
 };
 
@@ -118,8 +118,8 @@ struct sem_queue {
  * when the process exits.
  */
 struct sem_undo {
-	struct sem_undo *	proc_next;	/* next entry on this process */
-	struct sem_undo *	id_next;	/* next entry on this semaphore set */
+	struct list_head	list_proc;	/* per-process list: all undos from one process */
+	struct list_head	list_id;	/* per semaphore array list: all undos for one array */
 	int			semid;		/* semaphore set identifier */
 	short *			semadj;		/* array of adjustments, one per semaphore */
 };
@@ -128,9 +128,9 @@ struct sem_undo {
  * that may be shared among all a CLONE_SYSVSEM task group.
  */ 
 struct sem_undo_list {
-	atomic_t	refcnt;
-	spinlock_t	lock;
-	struct sem_undo	*proc_list;
+	atomic_t		refcnt;
+	spinlock_t		lock;
+	struct list_head	list_proc;
 };
 
 struct sysv_sem {
diff --git a/ipc/sem.c b/ipc/sem.c
index e9418df5ff3e..4f26c7157356 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -274,7 +274,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	sma->sem_base = (struct sem *) &sma[1];
 	/* sma->sem_pending = NULL; */
 	sma->sem_pending_last = &sma->sem_pending;
-	/* sma->undo = NULL; */
+	INIT_LIST_HEAD(&sma->list_id);
 	sma->sem_nsems = nsems;
 	sma->sem_ctime = get_seconds();
 	sem_unlock(sma);
@@ -536,7 +536,8 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	 * (They will be freed without any further action in exit_sem()
 	 * or during the next semop.)
 	 */
-	for (un = sma->undo; un; un = un->id_next)
+	assert_spin_locked(&sma->sem_perm.lock);
+	list_for_each_entry(un, &sma->list_id, list_id)
 		un->semid = -1;
 
 	/* Wake up all pending processes and let them fail with EIDRM. */
@@ -763,9 +764,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 
 		for (i = 0; i < nsems; i++)
 			sma->sem_base[i].semval = sem_io[i];
-		for (un = sma->undo; un; un = un->id_next)
+
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_for_each_entry(un, &sma->list_id, list_id) {
 			for (i = 0; i < nsems; i++)
 				un->semadj[i] = 0;
+		}
 		sma->sem_ctime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
@@ -797,12 +801,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 	{
 		int val = arg.val;
 		struct sem_undo *un;
+
 		err = -ERANGE;
 		if (val > SEMVMX || val < 0)
 			goto out_unlock;
 
-		for (un = sma->undo; un; un = un->id_next)
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_for_each_entry(un, &sma->list_id, list_id)
 			un->semadj[semnum] = 0;
+
 		curr->semval = val;
 		curr->sempid = task_tgid_vnr(current);
 		sma->sem_ctime = get_seconds();
@@ -952,6 +959,8 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
 			return -ENOMEM;
 		spin_lock_init(&undo_list->lock);
 		atomic_set(&undo_list->refcnt, 1);
+		INIT_LIST_HEAD(&undo_list->list_proc);
+
 		current->sysvsem.undo_list = undo_list;
 	}
 	*undo_listp = undo_list;
@@ -960,25 +969,30 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
 
 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
 {
-	struct sem_undo **last, *un;
-
-	last = &ulp->proc_list;
-	un = *last;
-	while(un != NULL) {
-		if(un->semid==semid)
-			break;
-		if(un->semid==-1) {
-			*last=un->proc_next;
-			kfree(un);
-		} else {
-			last=&un->proc_next;
+	struct sem_undo *walk, *tmp;
+
+	assert_spin_locked(&ulp->lock);
+	list_for_each_entry_safe(walk, tmp, &ulp->list_proc, list_proc) {
+		if (walk->semid == semid)
+			return walk;
+		if (walk->semid == -1) {
+			list_del(&walk->list_proc);
+			kfree(walk);
 		}
-		un=*last;
 	}
-	return un;
+	return NULL;
 }
 
-static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
+/**
+ * find_alloc_undo - Lookup (and if not present create) undo array
+ * @ns: namespace
+ * @semid: semaphore array id
+ *
+ * The function looks up (and if not present creates) the undo structure.
+ * The size of the undo structure depends on the size of the semaphore
+ * array, thus the alloc path is not that straightforward.
+ */
+static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 {
 	struct sem_array *sma;
 	struct sem_undo_list *ulp;
@@ -997,6 +1011,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 		goto out;
 
 	/* no undo structure around - allocate one. */
+	/* step 1: figure out the size of the semaphore array */
 	sma = sem_lock_check(ns, semid);
 	if (IS_ERR(sma))
 		return ERR_PTR(PTR_ERR(sma));
@@ -1004,15 +1019,19 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 	nsems = sma->sem_nsems;
 	sem_getref_and_unlock(sma);
 
+	/* step 2: allocate new undo structure */
 	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
 	if (!new) {
 		sem_putref(sma);
 		return ERR_PTR(-ENOMEM);
 	}
-	new->semadj = (short *) &new[1];
-	new->semid = semid;
 
+	/* step 3: Acquire the lock on the undo list pointer */
 	spin_lock(&ulp->lock);
+
+	/* step 4: check for races: someone else allocated the undo struct,
+	 *         semaphore array was destroyed.
+	 */
 	un = lookup_undo(ulp, semid);
 	if (un) {
 		spin_unlock(&ulp->lock);
@@ -1028,13 +1047,17 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 		un = ERR_PTR(-EIDRM);
 		goto out;
 	}
-	new->proc_next = ulp->proc_list;
-	ulp->proc_list = new;
-	new->id_next = sma->undo;
-	sma->undo = new;
+	/* step 5: initialize & link new undo structure */
+	new->semadj = (short *) &new[1];
+	new->semid = semid;
+	assert_spin_locked(&ulp->lock);
+	list_add(&new->list_proc, &ulp->list_proc);
+	assert_spin_locked(&sma->sem_perm.lock);
+	list_add(&new->list_id, &sma->list_id);
+
 	sem_unlock(sma);
-	un = new;
 	spin_unlock(&ulp->lock);
+	un = new;
 out:
 	return un;
 }
@@ -1090,9 +1113,8 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 			alter = 1;
 	}
 
-retry_undos:
 	if (undos) {
-		un = find_undo(ns, semid);
+		un = find_alloc_undo(ns, semid);
 		if (IS_ERR(un)) {
 			error = PTR_ERR(un);
 			goto out_free;
@@ -1107,14 +1129,14 @@ retry_undos:
 	}
 
 	/*
-	 * semid identifiers are not unique - find_undo may have
+	 * semid identifiers are not unique - find_alloc_undo may have
 	 * allocated an undo structure, it was invalidated by an RMID
-	 * and now a new array with received the same id. Check and retry.
+	 * and now a new array with received the same id. Check and fail.
 	 */
-	if (un && un->semid == -1) {
-		sem_unlock(sma);
-		goto retry_undos;
-	}
+	error = -EIDRM;
+	if (un && un->semid == -1)
+		goto out_unlock_free;
+
 	error = -EFBIG;
 	if (max >= sma->sem_nsems)
 		goto out_unlock_free;
@@ -1243,56 +1265,44 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
  */
 void exit_sem(struct task_struct *tsk)
 {
-	struct sem_undo_list *undo_list;
-	struct sem_undo *u, **up;
-	struct ipc_namespace *ns;
+	struct sem_undo_list *ulp;
+	struct sem_undo *un, *tmp;
 
-	undo_list = tsk->sysvsem.undo_list;
-	if (!undo_list)
+	ulp = tsk->sysvsem.undo_list;
+	if (!ulp)
 		return;
 	tsk->sysvsem.undo_list = NULL;
 
-	if (!atomic_dec_and_test(&undo_list->refcnt))
+	if (!atomic_dec_and_test(&ulp->refcnt))
 		return;
 
-	ns = tsk->nsproxy->ipc_ns;
-	/* There's no need to hold the semundo list lock, as current
-         * is the last task exiting for this undo list.
-	 */
-	for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
+	spin_lock(&ulp->lock);
+
+	list_for_each_entry_safe(un, tmp, &ulp->list_proc, list_proc) {
 		struct sem_array *sma;
-		int nsems, i;
-		struct sem_undo *un, **unp;
-		int semid;
-	       
-		semid = u->semid;
-
-		if(semid == -1)
-			continue;
-		sma = sem_lock(ns, semid);
+		int i;
+
+		if (un->semid == -1)
+			goto free;
+
+		sma = sem_lock(tsk->nsproxy->ipc_ns, un->semid);
 		if (IS_ERR(sma))
-			continue;
+			goto free;
 
-		if (u->semid == -1)
-			goto next_entry;
+		if (un->semid == -1)
+			goto unlock_free;
 
-		BUG_ON(sem_checkid(sma, u->semid));
+		BUG_ON(sem_checkid(sma, un->semid));
 
-		/* remove u from the sma->undo list */
-		for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
-			if (u == un)
-				goto found;
-		}
-		printk ("exit_sem undo list error id=%d\n", u->semid);
-		goto next_entry;
-found:
-		*unp = un->id_next;
-		/* perform adjustments registered in u */
-		nsems = sma->sem_nsems;
-		for (i = 0; i < nsems; i++) {
+		/* remove un from sma->list_id */
+		assert_spin_locked(&sma->sem_perm.lock);
+		list_del(&un->list_id);
+
+		/* perform adjustments registered in un */
+		for (i = 0; i < sma->sem_nsems; i++) {
 			struct sem * semaphore = &sma->sem_base[i];
-			if (u->semadj[i]) {
-				semaphore->semval += u->semadj[i];
+			if (un->semadj[i]) {
+				semaphore->semval += un->semadj[i];
 				/*
 				 * Range checks of the new semaphore value,
 				 * not defined by sus:
@@ -1316,10 +1326,15 @@ found:
 		sma->sem_otime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
-next_entry:
+unlock_free:
 		sem_unlock(sma);
+free:
+		assert_spin_locked(&ulp->lock);
+		list_del(&un->list_proc);
+		kfree(un);
 	}
-	kfree(undo_list);
+	spin_unlock(&ulp->lock);
+	kfree(ulp);
 }
 
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3


From 2c0c29d414087f3b021059673c20a7088f5f1fff Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 25 Jul 2008 01:48:05 -0700
Subject: ipc/sem.c: remove unused entries from struct sem_queue

sem_queue.sma and sem_queue.id were never used, the attached patch removes
them.

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Reviewed-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h | 2 --
 ipc/sem.c           | 2 --
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 6a1af1b49a13..87756ef1198e 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -107,8 +107,6 @@ struct sem_queue {
 	struct sem_undo *	undo;	 /* undo structure */
 	int    			pid;	 /* process id of requesting process */
 	int    			status;	 /* completion status of operation */
-	struct sem_array *	sma;	 /* semaphore array for operations */
-	int			id;	 /* internal sem id */
 	struct sembuf *		sops;	 /* array of pending operations */
 	int			nsops;	 /* number of operations */
 	int			alter;   /* does the operation alter the array? */
diff --git a/ipc/sem.c b/ipc/sem.c
index 4f26c7157356..d5ce4000ca17 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1160,12 +1160,10 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 	 * task into the pending queue and go to sleep.
 	 */
 		
-	queue.sma = sma;
 	queue.sops = sops;
 	queue.nsops = nsops;
 	queue.undo = un;
 	queue.pid = task_tgid_vnr(current);
-	queue.id = semid;
 	queue.alter = alter;
 	if (alter)
 		append_to_queue(sma ,&queue);
-- 
cgit v1.2.3


From a1193f8ec091cd8fd309cc2982abe4499f6f2b4d Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 25 Jul 2008 01:48:06 -0700
Subject: ipc/sem.c: convert sem_array.sem_pending to struct list_head

sem_array.sem_pending is a double linked list, the attached patch converts
it to struct list_head.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Reviewed-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h | 12 +++----
 ipc/sem.c           | 92 ++++++++++++++++++++---------------------------------
 2 files changed, 40 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 87756ef1198e..d42599395d79 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -93,21 +93,19 @@ struct sem_array {
 	time_t			sem_otime;	/* last semop time */
 	time_t			sem_ctime;	/* last change time */
 	struct sem		*sem_base;	/* ptr to first semaphore in array */
-	struct sem_queue	*sem_pending;	/* pending operations to be processed */
-	struct sem_queue	**sem_pending_last; /* last pending operation */
+	struct list_head	sem_pending;	/* pending operations to be processed */
 	struct list_head	list_id;	/* undo requests on this array */
 	unsigned long		sem_nsems;	/* no. of semaphores in array */
 };
 
 /* One queue for each sleeping process in the system. */
 struct sem_queue {
-	struct sem_queue *	next;	 /* next entry in the queue */
-	struct sem_queue **	prev;	 /* previous entry in the queue, *(q->prev) == q */
-	struct task_struct*	sleeper; /* this process */
-	struct sem_undo *	undo;	 /* undo structure */
+	struct list_head	list;	 /* queue of pending operations */
+	struct task_struct	*sleeper; /* this process */
+	struct sem_undo		*undo;	 /* undo structure */
 	int    			pid;	 /* process id of requesting process */
 	int    			status;	 /* completion status of operation */
-	struct sembuf *		sops;	 /* array of pending operations */
+	struct sembuf		*sops;	 /* array of pending operations */
 	int			nsops;	 /* number of operations */
 	int			alter;   /* does the operation alter the array? */
 };
diff --git a/ipc/sem.c b/ipc/sem.c
index d5ce4000ca17..3ca232736b31 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -272,8 +272,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	ns->used_sems += nsems;
 
 	sma->sem_base = (struct sem *) &sma[1];
-	/* sma->sem_pending = NULL; */
-	sma->sem_pending_last = &sma->sem_pending;
+	INIT_LIST_HEAD(&sma->sem_pending);
 	INIT_LIST_HEAD(&sma->list_id);
 	sma->sem_nsems = nsems;
 	sma->sem_ctime = get_seconds();
@@ -331,38 +330,6 @@ asmlinkage long sys_semget(key_t key, int nsems, int semflg)
 	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
 }
 
-/* Manage the doubly linked list sma->sem_pending as a FIFO:
- * insert new queue elements at the tail sma->sem_pending_last.
- */
-static inline void append_to_queue (struct sem_array * sma,
-				    struct sem_queue * q)
-{
-	*(q->prev = sma->sem_pending_last) = q;
-	*(sma->sem_pending_last = &q->next) = NULL;
-}
-
-static inline void prepend_to_queue (struct sem_array * sma,
-				     struct sem_queue * q)
-{
-	q->next = sma->sem_pending;
-	*(q->prev = &sma->sem_pending) = q;
-	if (q->next)
-		q->next->prev = &q->next;
-	else /* sma->sem_pending_last == &sma->sem_pending */
-		sma->sem_pending_last = &q->next;
-}
-
-static inline void remove_from_queue (struct sem_array * sma,
-				      struct sem_queue * q)
-{
-	*(q->prev) = q->next;
-	if (q->next)
-		q->next->prev = q->prev;
-	else /* sma->sem_pending_last == &q->next */
-		sma->sem_pending_last = q->prev;
-	q->prev = NULL; /* mark as removed */
-}
-
 /*
  * Determine whether a sequence of semaphore operations would succeed
  * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
@@ -438,16 +405,15 @@ static void update_queue (struct sem_array * sma)
 	int error;
 	struct sem_queue * q;
 
-	q = sma->sem_pending;
-	while(q) {
+	q = list_entry(sma->sem_pending.next, struct sem_queue, list);
+	while (&q->list != &sma->sem_pending) {
 		error = try_atomic_semop(sma, q->sops, q->nsops,
 					 q->undo, q->pid);
 
 		/* Does q->sleeper still need to sleep? */
 		if (error <= 0) {
 			struct sem_queue *n;
-			remove_from_queue(sma,q);
-			q->status = IN_WAKEUP;
+
 			/*
 			 * Continue scanning. The next operation
 			 * that must be checked depends on the type of the
@@ -458,11 +424,26 @@ static void update_queue (struct sem_array * sma)
 			 *   for semaphore values to become 0.
 			 * - if the operation didn't modify the array,
 			 *   then just continue.
+			 * The order of list_del() and reading ->next
+			 * is crucial: In the former case, the list_del()
+			 * must be done first [because we might be the
+			 * first entry in ->sem_pending], in the latter
+			 * case the list_del() must be done last
+			 * [because the list is invalid after the list_del()]
 			 */
-			if (q->alter)
-				n = sma->sem_pending;
-			else
-				n = q->next;
+			if (q->alter) {
+				list_del(&q->list);
+				n = list_entry(sma->sem_pending.next,
+						struct sem_queue, list);
+			} else {
+				n = list_entry(q->list.next, struct sem_queue,
+						list);
+				list_del(&q->list);
+			}
+
+			/* wake up the waiting thread */
+			q->status = IN_WAKEUP;
+
 			wake_up_process(q->sleeper);
 			/* hands-off: q will disappear immediately after
 			 * writing q->status.
@@ -471,7 +452,7 @@ static void update_queue (struct sem_array * sma)
 			q->status = error;
 			q = n;
 		} else {
-			q = q->next;
+			q = list_entry(q->list.next, struct sem_queue, list);
 		}
 	}
 }
@@ -491,7 +472,7 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
 	struct sem_queue * q;
 
 	semncnt = 0;
-	for (q = sma->sem_pending; q; q = q->next) {
+	list_for_each_entry(q, &sma->sem_pending, list) {
 		struct sembuf * sops = q->sops;
 		int nsops = q->nsops;
 		int i;
@@ -503,13 +484,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
 	}
 	return semncnt;
 }
+
 static int count_semzcnt (struct sem_array * sma, ushort semnum)
 {
 	int semzcnt;
 	struct sem_queue * q;
 
 	semzcnt = 0;
-	for (q = sma->sem_pending; q; q = q->next) {
+	list_for_each_entry(q, &sma->sem_pending, list) {
 		struct sembuf * sops = q->sops;
 		int nsops = q->nsops;
 		int i;
@@ -529,7 +511,7 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
 	struct sem_undo *un;
-	struct sem_queue *q;
+	struct sem_queue *q, *t;
 	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
 
 	/* Invalidate the existing undo structures for this semaphore set.
@@ -541,17 +523,14 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 		un->semid = -1;
 
 	/* Wake up all pending processes and let them fail with EIDRM. */
-	q = sma->sem_pending;
-	while(q) {
-		struct sem_queue *n;
-		/* lazy remove_from_queue: we are killing the whole queue */
-		q->prev = NULL;
-		n = q->next;
+
+	list_for_each_entry_safe(q, t, &sma->sem_pending, list) {
+		list_del(&q->list);
+
 		q->status = IN_WAKEUP;
 		wake_up_process(q->sleeper); /* doesn't sleep */
 		smp_wmb();
 		q->status = -EIDRM;	/* hands-off q */
-		q = n;
 	}
 
 	/* Remove the semaphore set from the IDR */
@@ -1166,9 +1145,9 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 	queue.pid = task_tgid_vnr(current);
 	queue.alter = alter;
 	if (alter)
-		append_to_queue(sma ,&queue);
+		list_add_tail(&queue.list, &sma->sem_pending);
 	else
-		prepend_to_queue(sma ,&queue);
+		list_add(&queue.list, &sma->sem_pending);
 
 	queue.status = -EINTR;
 	queue.sleeper = current;
@@ -1194,7 +1173,6 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 
 	sma = sem_lock(ns, semid);
 	if (IS_ERR(sma)) {
-		BUG_ON(queue.prev != NULL);
 		error = -EIDRM;
 		goto out_free;
 	}
@@ -1212,7 +1190,7 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 	 */
 	if (timeout && jiffies_left == 0)
 		error = -EAGAIN;
-	remove_from_queue(sma,&queue);
+	list_del(&queue.list);
 	goto out_unlock_free;
 
 out_unlock_free:
-- 
cgit v1.2.3


From 380af1b33b3ff92df5cda96329b58f5d1b6b5a53 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 25 Jul 2008 01:48:06 -0700
Subject: ipc/sem.c: rewrite undo list locking

The attached patch:
- reverses the locking order of ulp->lock and sem_lock:
  Previously, it was first ulp->lock, then inside sem_lock.
  Now it's the other way around.
- converts the undo structure to rcu.

Benefits:
- With the old locking order, IPC_RMID could not kfree the undo structures.
  The stale entries remained in the linked lists and were released later.
- The patch fixes a a race in semtimedop(): if both IPC_RMID and a semget() that
  recreates exactly the same id happen between find_alloc_undo() and sem_lock,
  then semtimedop() would access already kfree'd memory.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Reviewed-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Pierre Peiffer <peifferp@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sem.h |   6 ++-
 ipc/sem.c           | 147 +++++++++++++++++++++++++++++++++-------------------
 2 files changed, 98 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sem.h b/include/linux/sem.h
index d42599395d79..1b191c176bcd 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -78,6 +78,7 @@ struct  seminfo {
 
 #ifdef __KERNEL__
 #include <asm/atomic.h>
+#include <linux/rcupdate.h>
 
 struct task_struct;
 
@@ -114,7 +115,10 @@ struct sem_queue {
  * when the process exits.
  */
 struct sem_undo {
-	struct list_head	list_proc;	/* per-process list: all undos from one process */
+	struct list_head	list_proc;	/* per-process list: all undos from one process. */
+						/* rcu protected */
+	struct rcu_head		rcu;		/* rcu struct for sem_undo() */
+	struct sem_undo_list	*ulp;		/* sem_undo_list for the process */
 	struct list_head	list_id;	/* per semaphore array list: all undos for one array */
 	int			semid;		/* semaphore set identifier */
 	short *			semadj;		/* array of adjustments, one per semaphore */
diff --git a/ipc/sem.c b/ipc/sem.c
index 3ca232736b31..bf1bc36cb7ee 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -504,27 +504,35 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
 	return semzcnt;
 }
 
+void free_un(struct rcu_head *head)
+{
+	struct sem_undo *un = container_of(head, struct sem_undo, rcu);
+	kfree(un);
+}
+
 /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
  * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
  * remains locked on exit.
  */
 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
-	struct sem_undo *un;
-	struct sem_queue *q, *t;
+	struct sem_undo *un, *tu;
+	struct sem_queue *q, *tq;
 	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
 
-	/* Invalidate the existing undo structures for this semaphore set.
-	 * (They will be freed without any further action in exit_sem()
-	 * or during the next semop.)
-	 */
+	/* Free the existing undo structures for this semaphore set.  */
 	assert_spin_locked(&sma->sem_perm.lock);
-	list_for_each_entry(un, &sma->list_id, list_id)
+	list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
+		list_del(&un->list_id);
+		spin_lock(&un->ulp->lock);
 		un->semid = -1;
+		list_del_rcu(&un->list_proc);
+		spin_unlock(&un->ulp->lock);
+		call_rcu(&un->rcu, free_un);
+	}
 
 	/* Wake up all pending processes and let them fail with EIDRM. */
-
-	list_for_each_entry_safe(q, t, &sma->sem_pending, list) {
+	list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
 		list_del(&q->list);
 
 		q->status = IN_WAKEUP;
@@ -948,16 +956,11 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
 
 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
 {
-	struct sem_undo *walk, *tmp;
+	struct sem_undo *walk;
 
-	assert_spin_locked(&ulp->lock);
-	list_for_each_entry_safe(walk, tmp, &ulp->list_proc, list_proc) {
+	list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) {
 		if (walk->semid == semid)
 			return walk;
-		if (walk->semid == -1) {
-			list_del(&walk->list_proc);
-			kfree(walk);
-		}
 	}
 	return NULL;
 }
@@ -970,6 +973,8 @@ static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
  * The function looks up (and if not present creates) the undo structure.
  * The size of the undo structure depends on the size of the semaphore
  * array, thus the alloc path is not that straightforward.
+ * Lifetime-rules: sem_undo is rcu-protected, on success, the function
+ * performs a rcu_read_lock().
  */
 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 {
@@ -983,11 +988,13 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 	if (error)
 		return ERR_PTR(error);
 
+	rcu_read_lock();
 	spin_lock(&ulp->lock);
 	un = lookup_undo(ulp, semid);
 	spin_unlock(&ulp->lock);
 	if (likely(un!=NULL))
 		goto out;
+	rcu_read_unlock();
 
 	/* no undo structure around - allocate one. */
 	/* step 1: figure out the size of the semaphore array */
@@ -1005,38 +1012,38 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	/* step 3: Acquire the lock on the undo list pointer */
-	spin_lock(&ulp->lock);
-
-	/* step 4: check for races: someone else allocated the undo struct,
-	 *         semaphore array was destroyed.
-	 */
-	un = lookup_undo(ulp, semid);
-	if (un) {
-		spin_unlock(&ulp->lock);
-		kfree(new);
-		sem_putref(sma);
-		goto out;
-	}
+	/* step 3: Acquire the lock on semaphore array */
 	sem_lock_and_putref(sma);
 	if (sma->sem_perm.deleted) {
 		sem_unlock(sma);
-		spin_unlock(&ulp->lock);
 		kfree(new);
 		un = ERR_PTR(-EIDRM);
 		goto out;
 	}
+	spin_lock(&ulp->lock);
+
+	/*
+	 * step 4: check for races: did someone else allocate the undo struct?
+	 */
+	un = lookup_undo(ulp, semid);
+	if (un) {
+		kfree(new);
+		goto success;
+	}
 	/* step 5: initialize & link new undo structure */
 	new->semadj = (short *) &new[1];
+	new->ulp = ulp;
 	new->semid = semid;
 	assert_spin_locked(&ulp->lock);
-	list_add(&new->list_proc, &ulp->list_proc);
+	list_add_rcu(&new->list_proc, &ulp->list_proc);
 	assert_spin_locked(&sma->sem_perm.lock);
 	list_add(&new->list_id, &sma->list_id);
+	un = new;
 
-	sem_unlock(sma);
+success:
 	spin_unlock(&ulp->lock);
-	un = new;
+	rcu_read_lock();
+	sem_unlock(sma);
 out:
 	return un;
 }
@@ -1103,6 +1110,8 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 
 	sma = sem_lock_check(ns, semid);
 	if (IS_ERR(sma)) {
+		if (un)
+			rcu_read_unlock();
 		error = PTR_ERR(sma);
 		goto out_free;
 	}
@@ -1111,10 +1120,26 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 	 * semid identifiers are not unique - find_alloc_undo may have
 	 * allocated an undo structure, it was invalidated by an RMID
 	 * and now a new array with received the same id. Check and fail.
+	 * This case can be detected checking un->semid. The existance of
+	 * "un" itself is guaranteed by rcu.
 	 */
 	error = -EIDRM;
-	if (un && un->semid == -1)
-		goto out_unlock_free;
+	if (un) {
+		if (un->semid == -1) {
+			rcu_read_unlock();
+			goto out_unlock_free;
+		} else {
+			/*
+			 * rcu lock can be released, "un" cannot disappear:
+			 * - sem_lock is acquired, thus IPC_RMID is
+			 *   impossible.
+			 * - exit_sem is impossible, it always operates on
+			 *   current (or a dead task).
+			 */
+
+			rcu_read_unlock();
+		}
+	}
 
 	error = -EFBIG;
 	if (max >= sma->sem_nsems)
@@ -1242,7 +1267,6 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
 void exit_sem(struct task_struct *tsk)
 {
 	struct sem_undo_list *ulp;
-	struct sem_undo *un, *tmp;
 
 	ulp = tsk->sysvsem.undo_list;
 	if (!ulp)
@@ -1252,28 +1276,47 @@ void exit_sem(struct task_struct *tsk)
 	if (!atomic_dec_and_test(&ulp->refcnt))
 		return;
 
-	spin_lock(&ulp->lock);
-
-	list_for_each_entry_safe(un, tmp, &ulp->list_proc, list_proc) {
+	for (;;) {
 		struct sem_array *sma;
+		struct sem_undo *un;
+		int semid;
 		int i;
 
-		if (un->semid == -1)
-			goto free;
+		rcu_read_lock();
+		un = list_entry(rcu_dereference(ulp->list_proc.next),
+					struct sem_undo, list_proc);
+		if (&un->list_proc == &ulp->list_proc)
+			semid = -1;
+		 else
+			semid = un->semid;
+		rcu_read_unlock();
 
-		sma = sem_lock(tsk->nsproxy->ipc_ns, un->semid);
-		if (IS_ERR(sma))
-			goto free;
+		if (semid == -1)
+			break;
 
-		if (un->semid == -1)
-			goto unlock_free;
+		sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
 
-		BUG_ON(sem_checkid(sma, un->semid));
+		/* exit_sem raced with IPC_RMID, nothing to do */
+		if (IS_ERR(sma))
+			continue;
 
-		/* remove un from sma->list_id */
+		un = lookup_undo(ulp, semid);
+		if (un == NULL) {
+			/* exit_sem raced with IPC_RMID+semget() that created
+			 * exactly the same semid. Nothing to do.
+			 */
+			sem_unlock(sma);
+			continue;
+		}
+
+		/* remove un from the linked lists */
 		assert_spin_locked(&sma->sem_perm.lock);
 		list_del(&un->list_id);
 
+		spin_lock(&ulp->lock);
+		list_del_rcu(&un->list_proc);
+		spin_unlock(&ulp->lock);
+
 		/* perform adjustments registered in un */
 		for (i = 0; i < sma->sem_nsems; i++) {
 			struct sem * semaphore = &sma->sem_base[i];
@@ -1302,14 +1345,10 @@ void exit_sem(struct task_struct *tsk)
 		sma->sem_otime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
-unlock_free:
 		sem_unlock(sma);
-free:
-		assert_spin_locked(&ulp->lock);
-		list_del(&un->list_proc);
-		kfree(un);
+
+		call_rcu(&un->rcu, free_un);
 	}
-	spin_unlock(&ulp->lock);
 	kfree(ulp);
 }
 
-- 
cgit v1.2.3


From 9eefe520c814f6f62c5d36a2ddcd3fb99dfdb30e Mon Sep 17 00:00:00 2001
From: Nadia Derbey <Nadia.Derbey@bull.net>
Date: Fri, 25 Jul 2008 01:48:08 -0700
Subject: ipc: do not use a negative value to re-enable msgmni automatic
 recomputing

This patch proposes an alternative to the "magical
positive-versus-negative number trick" Andrew complained about last week
in http://lkml.org/lkml/2008/6/24/418.

This had been introduced with the patches that scale msgmni to the amount
of lowmem.  With these patches, msgmni has a registered notification
routine that recomputes msgmni value upon memory add/remove or ipc
namespace creation/ removal.

When msgmni is changed from user space (i.e.  value written to the proc
file), that notification routine is unregistered, and the way to make it
registered back is to write a negative value into the proc file.  This is
the "magical positive-versus-negative number trick".

To fix this, a new proc file is introduced: /proc/sys/kernel/auto_msgmni.
This file acts as ON/OFF for msgmni automatic recomputing.

With this patch, the process is the following:
1) kernel boots in "automatic recomputing mode"
   /proc/sys/kernel/msgmni contains the value that has been computed (depends
                           on lowmem)
   /proc/sys/kernel/automatic_msgmni contains "1"

2) echo <val> > /proc/sys/kernel/msgmni
   . sets msg_ctlmni to <val>
   . de-activates automatic recomputing (i.e. if, say, some memory is added
     msgmni won't be recomputed anymore)
   . /proc/sys/kernel/automatic_msgmni now contains "0"

3) echo "0" > /proc/sys/kernel/automatic_msgmni
   . de-activates msgmni automatic recomputing
     this has the same effect as 2) except that msg_ctlmni's value stays
     blocked at its current value)

3) echo "1" > /proc/sys/kernel/automatic_msgmni
   . recomputes msgmni's value based on the current available memory size
     and number of ipc namespaces
   . re-activates automatic recomputing for msgmni.

Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Solofo Ramangalahy <Solofo.Ramangalahy@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h |  3 +-
 ipc/ipc_sysctl.c              | 72 +++++++++++++++++++++++++++++++++++--------
 ipc/ipcns_notifier.c          | 20 +++++++++---
 3 files changed, 76 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index ea6c18a8b0d4..ea330f9e7100 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -36,6 +36,7 @@ struct ipc_namespace {
 	int		msg_ctlmni;
 	atomic_t	msg_bytes;
 	atomic_t	msg_hdrs;
+	int		auto_msgmni;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
@@ -53,7 +54,7 @@ extern atomic_t nr_ipc_ns;
 
 extern int register_ipcns_notifier(struct ipc_namespace *);
 extern int cond_register_ipcns_notifier(struct ipc_namespace *);
-extern int unregister_ipcns_notifier(struct ipc_namespace *);
+extern void unregister_ipcns_notifier(struct ipc_namespace *);
 extern int ipcns_notify(unsigned long);
 
 #else /* CONFIG_SYSVIPC */
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index d3497465cc0a..69bc85978ba0 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -27,15 +27,17 @@ static void *get_ipc(ctl_table *table)
 }
 
 /*
- * Routine that is called when a tunable has successfully been changed by
- * hand and it has a callback routine registered on the ipc namespace notifier
- * chain: we don't want such tunables to be recomputed anymore upon memory
- * add/remove or ipc namespace creation/removal.
- * They can come back to a recomputable state by being set to a <0 value.
+ * Routine that is called when the file "auto_msgmni" has successfully been
+ * written.
+ * Two values are allowed:
+ * 0: unregister msgmni's callback routine from the ipc namespace notifier
+ *    chain. This means that msgmni won't be recomputed anymore upon memory
+ *    add/remove or ipc namespace creation/removal.
+ * 1: register back the callback routine.
  */
-static void tunable_set_callback(int val)
+static void ipc_auto_callback(int val)
 {
-	if (val >= 0)
+	if (!val)
 		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
 	else {
 		/*
@@ -71,7 +73,12 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
 	rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
 
 	if (write && !rc && lenp_bef == *lenp)
-		tunable_set_callback(*((int *)(ipc_table.data)));
+		/*
+		 * Tunable has successfully been changed by hand. Disable its
+		 * automatic adjustment. This simply requires unregistering
+		 * the notifiers that trigger recalculation.
+		 */
+		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
 
 	return rc;
 }
@@ -87,10 +94,39 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
 					lenp, ppos);
 }
 
+static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
+	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table ipc_table;
+	size_t lenp_bef = *lenp;
+	int oldval;
+	int rc;
+
+	memcpy(&ipc_table, table, sizeof(ipc_table));
+	ipc_table.data = get_ipc(table);
+	oldval = *((int *)(ipc_table.data));
+
+	rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
+
+	if (write && !rc && lenp_bef == *lenp) {
+		int newval = *((int *)(ipc_table.data));
+		/*
+		 * The file "auto_msgmni" has correctly been set.
+		 * React by (un)registering the corresponding tunable, if the
+		 * value has changed.
+		 */
+		if (newval != oldval)
+			ipc_auto_callback(newval);
+	}
+
+	return rc;
+}
+
 #else
 #define proc_ipc_doulongvec_minmax NULL
 #define proc_ipc_dointvec	   NULL
 #define proc_ipc_callback_dointvec NULL
+#define proc_ipcauto_dointvec_minmax NULL
 #endif
 
 #ifdef CONFIG_SYSCTL_SYSCALL
@@ -142,14 +178,11 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name,
 	rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval,
 		newlen);
 
-	if (newval && newlen && rc > 0) {
+	if (newval && newlen && rc > 0)
 		/*
 		 * Tunable has successfully been changed from userland
 		 */
-		int *data = get_ipc(table);
-
-		tunable_set_callback(*data);
-	}
+		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
 
 	return rc;
 }
@@ -158,6 +191,9 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name,
 #define sysctl_ipc_registered_data NULL
 #endif
 
+static int zero;
+static int one = 1;
+
 static struct ctl_table ipc_kern_table[] = {
 	{
 		.ctl_name	= KERN_SHMMAX,
@@ -222,6 +258,16 @@ static struct ctl_table ipc_kern_table[] = {
 		.proc_handler	= proc_ipc_dointvec,
 		.strategy	= sysctl_ipc_data,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "auto_msgmni",
+		.data		= &init_ipc_ns.auto_msgmni,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_ipcauto_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{}
 };
 
diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c
index 70ff09183f7b..b9b31a4f77e1 100644
--- a/ipc/ipcns_notifier.c
+++ b/ipc/ipcns_notifier.c
@@ -55,25 +55,35 @@ static int ipcns_callback(struct notifier_block *self,
 
 int register_ipcns_notifier(struct ipc_namespace *ns)
 {
+	int rc;
+
 	memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
 	ns->ipcns_nb.notifier_call = ipcns_callback;
 	ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
-	return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
+	rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
+	if (!rc)
+		ns->auto_msgmni = 1;
+	return rc;
 }
 
 int cond_register_ipcns_notifier(struct ipc_namespace *ns)
 {
+	int rc;
+
 	memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
 	ns->ipcns_nb.notifier_call = ipcns_callback;
 	ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
-	return blocking_notifier_chain_cond_register(&ipcns_chain,
+	rc = blocking_notifier_chain_cond_register(&ipcns_chain,
 							&ns->ipcns_nb);
+	if (!rc)
+		ns->auto_msgmni = 1;
+	return rc;
 }
 
-int unregister_ipcns_notifier(struct ipc_namespace *ns)
+void unregister_ipcns_notifier(struct ipc_namespace *ns)
 {
-	return blocking_notifier_chain_unregister(&ipcns_chain,
-						&ns->ipcns_nb);
+	blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb);
+	ns->auto_msgmni = 0;
 }
 
 int ipcns_notify(unsigned long val)
-- 
cgit v1.2.3


From d805dda412346225a50af2d399d958a4bc676c38 Mon Sep 17 00:00:00 2001
From: Abdel Benamrouche <draconux@gmail.com>
Date: Fri, 25 Jul 2008 01:48:25 -0700
Subject: fs/partition/check.c: fix return value warning

fs/partitions/check.c:381: warning: ignoring return value of ___device_add___,
  declared with attribute warn_unused_result

[akpm@linux-foundation.org: multiple-return-statements-per-function are evil]
Signed-off-by: Abdel Benamrouche <draconux@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/partitions/check.c | 28 ++++++++++++++++++++++------
 include/linux/genhd.h |  2 +-
 2 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index efef715135d3..2e6413fbd2d8 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -344,18 +344,18 @@ static ssize_t whole_disk_show(struct device *dev,
 static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
 		   whole_disk_show, NULL);
 
-void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
+int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
 {
 	struct hd_struct *p;
 	int err;
 
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p)
-		return;
+		return -ENOMEM;
 
 	if (!init_part_stats(p)) {
-		kfree(p);
-		return;
+		err = -ENOMEM;
+		goto out0;
 	}
 	p->start_sect = start;
 	p->nr_sects = len;
@@ -378,15 +378,31 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
 
 	/* delay uevent until 'holders' subdir is created */
 	p->dev.uevent_suppress = 1;
-	device_add(&p->dev);
+	err = device_add(&p->dev);
+	if (err)
+		goto out1;
 	partition_sysfs_add_subdir(p);
 	p->dev.uevent_suppress = 0;
-	if (flags & ADDPART_FLAG_WHOLEDISK)
+	if (flags & ADDPART_FLAG_WHOLEDISK) {
 		err = device_create_file(&p->dev, &dev_attr_whole_disk);
+		if (err)
+			goto out2;
+	}
 
 	/* suppress uevent if the disk supresses it */
 	if (!disk->dev.uevent_suppress)
 		kobject_uevent(&p->dev.kobj, KOBJ_ADD);
+
+	return 0;
+
+out2:
+	device_del(&p->dev);
+out1:
+	put_device(&p->dev);
+	free_part_stats(p);
+out0:
+	kfree(p);
+	return err;
 }
 
 /* Not exported, helper to add_disk(). */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e8787417f65a..118216f1bd3c 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -541,7 +541,7 @@ extern dev_t blk_lookup_devt(const char *name, int part);
 extern char *disk_name (struct gendisk *hd, int part, char *buf);
 
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
-extern void add_partition(struct gendisk *, int, sector_t, sector_t, int);
+extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int);
 extern void delete_partition(struct gendisk *, int);
 extern void printk_all_partitions(void);
 
-- 
cgit v1.2.3


From 6e644c3126149b65460610fe5a00d8a162092abe Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:48:28 -0700
Subject: move proc_kmsg_operations to fs/proc/internal.h

This patch moves the extern of struct proc_kmsg_operations to
fs/proc/internal.h and adds an #include "internal.h" to fs/proc/kmsg.c
so that the latter sees the former.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/internal.h      | 1 +
 fs/proc/kmsg.c          | 2 ++
 include/linux/proc_fs.h | 1 -
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca805905..8d67616e7bb0 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations;
 extern const struct file_operations proc_clear_refs_operations;
 extern const struct file_operations proc_pagemap_operations;
 extern const struct file_operations proc_net_operations;
+extern const struct file_operations proc_kmsg_operations;
 extern const struct inode_operations proc_net_inode_operations;
 
 void free_proc_entry(struct proc_dir_entry *de);
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b56e9d..9fd5df3f40ce 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
+#include "internal.h"
+
 extern wait_queue_head_t log_wait;
 
 extern int do_syslog(int type, char __user *bug, int count);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 15a9eaf4a802..cdabc2fc02f7 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -138,7 +138,6 @@ extern int proc_readdir(struct file *, void *, filldir_t);
 extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
 
 extern const struct file_operations proc_kcore_operations;
-extern const struct file_operations proc_kmsg_operations;
 extern const struct file_operations ppc_htab_operations;
 
 extern int pid_ns_prepare_proc(struct pid_namespace *ns);
-- 
cgit v1.2.3


From 881adb85358309ea9c6f707394002719982ec607 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 25 Jul 2008 01:48:29 -0700
Subject: proc: always do ->release

Current two-stage scheme of removing PDE emphasizes one bug in proc:

		open
				rmmod
				remove_proc_entry
		close

->release won't be called because ->proc_fops were cleared.  In simple
cases it's small memory leak.

For every ->open, ->release has to be done.  List of openers is introduced
which is traversed at remove_proc_entry() if neeeded.

Discussions with Al long ago (sigh).

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/generic.c       | 14 ++++++++++
 fs/proc/inode.c         | 74 ++++++++++++++++++++++++++++++++++++++++++++++---
 fs/proc/internal.h      |  7 +++++
 include/linux/proc_fs.h |  1 +
 4 files changed, 92 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e86cefd..bc0a0dd2d844 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,6 +597,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
 	ent->pde_users = 0;
 	spin_lock_init(&ent->pde_unload_lock);
 	ent->pde_unload_completion = NULL;
+	INIT_LIST_HEAD(&ent->pde_openers);
  out:
 	return ent;
 }
@@ -789,6 +790,19 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 	spin_unlock(&de->pde_unload_lock);
 
 continue_removing:
+	spin_lock(&de->pde_unload_lock);
+	while (!list_empty(&de->pde_openers)) {
+		struct pde_opener *pdeo;
+
+		pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
+		list_del(&pdeo->lh);
+		spin_unlock(&de->pde_unload_lock);
+		pdeo->release(pdeo->inode, pdeo->file);
+		kfree(pdeo);
+		spin_lock(&de->pde_unload_lock);
+	}
+	spin_unlock(&de->pde_unload_lock);
+
 	if (S_ISDIR(de->mode))
 		parent->nlink--;
 	de->nlink = 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d10017911..354c08485825 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -126,12 +126,17 @@ static const struct super_operations proc_sops = {
 	.remount_fs	= proc_remount,
 };
 
-static void pde_users_dec(struct proc_dir_entry *pde)
+static void __pde_users_dec(struct proc_dir_entry *pde)
 {
-	spin_lock(&pde->pde_unload_lock);
 	pde->pde_users--;
 	if (pde->pde_unload_completion && pde->pde_users == 0)
 		complete(pde->pde_unload_completion);
+}
+
+static void pde_users_dec(struct proc_dir_entry *pde)
+{
+	spin_lock(&pde->pde_unload_lock);
+	__pde_users_dec(pde);
 	spin_unlock(&pde->pde_unload_lock);
 }
 
@@ -318,36 +323,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 	struct proc_dir_entry *pde = PDE(inode);
 	int rv = 0;
 	int (*open)(struct inode *, struct file *);
+	int (*release)(struct inode *, struct file *);
+	struct pde_opener *pdeo;
+
+	/*
+	 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
+	 * sequence. ->release won't be called because ->proc_fops will be
+	 * cleared. Depending on complexity of ->release, consequences vary.
+	 *
+	 * We can't wait for mercy when close will be done for real, it's
+	 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
+	 * by hand in remove_proc_entry(). For this, save opener's credentials
+	 * for later.
+	 */
+	pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
+	if (!pdeo)
+		return -ENOMEM;
 
 	spin_lock(&pde->pde_unload_lock);
 	if (!pde->proc_fops) {
 		spin_unlock(&pde->pde_unload_lock);
+		kfree(pdeo);
 		return rv;
 	}
 	pde->pde_users++;
 	open = pde->proc_fops->open;
+	release = pde->proc_fops->release;
 	spin_unlock(&pde->pde_unload_lock);
 
 	if (open)
 		rv = open(inode, file);
 
-	pde_users_dec(pde);
+	spin_lock(&pde->pde_unload_lock);
+	if (rv == 0 && release) {
+		/* To know what to release. */
+		pdeo->inode = inode;
+		pdeo->file = file;
+		/* Strictly for "too late" ->release in proc_reg_release(). */
+		pdeo->release = release;
+		list_add(&pdeo->lh, &pde->pde_openers);
+	} else
+		kfree(pdeo);
+	__pde_users_dec(pde);
+	spin_unlock(&pde->pde_unload_lock);
 	return rv;
 }
 
+static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
+					struct inode *inode, struct file *file)
+{
+	struct pde_opener *pdeo;
+
+	list_for_each_entry(pdeo, &pde->pde_openers, lh) {
+		if (pdeo->inode == inode && pdeo->file == file)
+			return pdeo;
+	}
+	return NULL;
+}
+
 static int proc_reg_release(struct inode *inode, struct file *file)
 {
 	struct proc_dir_entry *pde = PDE(inode);
 	int rv = 0;
 	int (*release)(struct inode *, struct file *);
+	struct pde_opener *pdeo;
 
 	spin_lock(&pde->pde_unload_lock);
+	pdeo = find_pde_opener(pde, inode, file);
 	if (!pde->proc_fops) {
-		spin_unlock(&pde->pde_unload_lock);
+		/*
+		 * Can't simply exit, __fput() will think that everything is OK,
+		 * and move on to freeing struct file. remove_proc_entry() will
+		 * find slacker in opener's list and will try to do non-trivial
+		 * things with struct file. Therefore, remove opener from list.
+		 *
+		 * But if opener is removed from list, who will ->release it?
+		 */
+		if (pdeo) {
+			list_del(&pdeo->lh);
+			spin_unlock(&pde->pde_unload_lock);
+			rv = pdeo->release(inode, file);
+			kfree(pdeo);
+		} else
+			spin_unlock(&pde->pde_unload_lock);
 		return rv;
 	}
 	pde->pde_users++;
 	release = pde->proc_fops->release;
+	if (pdeo) {
+		list_del(&pdeo->lh);
+		kfree(pdeo);
+	}
 	spin_unlock(&pde->pde_unload_lock);
 
 	if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 8d67616e7bb0..442202314d53 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -89,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
 		struct dentry *dentry);
 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
 		filldir_t filldir);
+
+struct pde_opener {
+	struct inode *inode;
+	struct file *file;
+	int (*release)(struct inode *, struct file *);
+	struct list_head lh;
+};
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index cdabc2fc02f7..f560d1705afe 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -79,6 +79,7 @@ struct proc_dir_entry {
 	int pde_users;	/* number of callers into module in progress */
 	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
 	struct completion *pde_unload_completion;
+	struct list_head pde_openers;	/* who did ->open, but not ->release */
 };
 
 struct kcore_list {
-- 
cgit v1.2.3


From 3ae4eed34be0177a8e003411a84e4ee212adbced Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 01:48:34 -0700
Subject: proper pid{hash,map}_init() prototypes

This patch adds proper prototypes for pid{hash,map}_init() in
include/linux/pid_namespace.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid_namespace.h | 3 +++
 init/main.c                   | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index caff5283d15c..1a49ab5ec7b9 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -85,4 +85,7 @@ static inline struct task_struct *task_child_reaper(struct task_struct *tsk)
 	return tsk->nsproxy->pid_ns->child_reaper;
 }
 
+void pidhash_init(void);
+void pidmap_init(void);
+
 #endif /* _LINUX_PID_NS_H */
diff --git a/init/main.c b/init/main.c
index 2769dc031c62..0604cbcaf1e4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -87,8 +87,6 @@ extern void init_IRQ(void);
 extern void fork_init(unsigned long);
 extern void mca_init(void);
 extern void sbus_init(void);
-extern void pidhash_init(void);
-extern void pidmap_init(void);
 extern void prio_tree_init(void);
 extern void radix_tree_init(void);
 extern void free_initmem(void);
-- 
cgit v1.2.3


From 33166b1ffca5e1945246bcaa77d72a22b0d3e531 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Fri, 25 Jul 2008 01:48:35 -0700
Subject: shrink struct pid by removing padding on 64 bit builds

When struct pid is built on a 64 bit platform gcc has to insert padding to
maintain the correct alignment, by simply reordering its members the
memory usage shrinks from 88 bytes to 80.

I've successfully run with this patch on my desktop AMD64 machine.

There are no significant kernel size changes to a default config.X86_64
on the latest git v2.6.26-rc1

   text    data     bss     dec     hex filename
5404828  976760  734280 7115868  6c945c vmlinux
5404811  976760  734280 7115851  6c944b vmlinux.pid-patch

Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index c21c7e8124a7..6f084b9e2c40 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -57,10 +57,10 @@ struct upid {
 struct pid
 {
 	atomic_t count;
+	unsigned int level;
 	/* lists of tasks that use this pid */
 	struct hlist_head tasks[PIDTYPE_MAX];
 	struct rcu_head rcu;
-	unsigned int level;
 	struct upid numbers[1];
 };
 
-- 
cgit v1.2.3


From 19b0cfcca41dd772065671ad0584e1cea0f3fd13 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:35 -0700
Subject: pidns: remove now unused kill_proc function

This function operated on a pid_t to kill a task, which is no longer valid
in a containerized system.

It has finally lost all its users and we can safely remove it from the
tree.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 -
 kernel/signal.c       | 12 ------------
 2 files changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0560999eb1db..134cb5cb506c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1800,7 +1800,6 @@ extern void force_sig(int, struct task_struct *);
 extern void force_sig_specific(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
 extern void zap_other_threads(struct task_struct *p);
-extern int kill_proc(pid_t, int, int);
 extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
 extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
diff --git a/kernel/signal.c b/kernel/signal.c
index 5c7b7eaa0dc6..82c3545596c5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1228,17 +1228,6 @@ int kill_pid(struct pid *pid, int sig, int priv)
 }
 EXPORT_SYMBOL(kill_pid);
 
-int
-kill_proc(pid_t pid, int sig, int priv)
-{
-	int ret;
-
-	rcu_read_lock();
-	ret = kill_pid_info(sig, __si_special(priv), find_pid(pid));
-	rcu_read_unlock();
-	return ret;
-}
-
 /*
  * These functions support sending signals using preallocated sigqueue
  * structures.  This is needed "because realtime applications cannot
@@ -1906,7 +1895,6 @@ EXPORT_SYMBOL(recalc_sigpending);
 EXPORT_SYMBOL_GPL(dequeue_signal);
 EXPORT_SYMBOL(flush_signals);
 EXPORT_SYMBOL(force_sig);
-EXPORT_SYMBOL(kill_proc);
 EXPORT_SYMBOL(ptrace_notify);
 EXPORT_SYMBOL(send_sig);
 EXPORT_SYMBOL(send_sig_info);
-- 
cgit v1.2.3


From e49859e71e0318b564de1546bdc30fab738f9deb Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:36 -0700
Subject: pidns: remove now unused find_pid function.

This one had the only users so far - the kill_proc, which is removed, so
drop this (invalid in namespaced world) call too.

And of course - erase all references on it from comments.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h   | 4 +---
 include/linux/sched.h | 2 +-
 kernel/pid.c          | 8 +-------
 3 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 6f084b9e2c40..ff1b2a5814d4 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -48,7 +48,7 @@ enum pid_type
  */
 
 struct upid {
-	/* Try to keep pid_chain in the same cacheline as nr for find_pid */
+	/* Try to keep pid_chain in the same cacheline as nr for find_vpid */
 	int nr;
 	struct pid_namespace *ns;
 	struct hlist_node pid_chain;
@@ -105,14 +105,12 @@ extern struct pid_namespace init_pid_ns;
  * or rcu_read_lock() held.
  *
  * find_pid_ns() finds the pid in the namespace specified
- * find_pid() find the pid by its global id, i.e. in the init namespace
  * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
  *
  * see also find_task_by_pid() set in include/linux/sched.h
  */
 extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
 extern struct pid *find_vpid(int nr);
-extern struct pid *find_pid(int nr);
 
 /*
  * Lookup a PID in the hash table, and return with it's count elevated.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 134cb5cb506c..182da1550fad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1718,7 +1718,7 @@ extern struct pid_namespace init_pid_ns;
  * find_task_by_pid():
  *      finds a task by its global pid
  *
- * see also find_pid() etc in include/linux/pid.h
+ * see also find_vpid() etc in include/linux/pid.h
  */
 
 extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
diff --git a/kernel/pid.c b/kernel/pid.c
index 753fd90d9ec1..064e76afa507 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -309,12 +309,6 @@ struct pid *find_vpid(int nr)
 }
 EXPORT_SYMBOL_GPL(find_vpid);
 
-struct pid *find_pid(int nr)
-{
-	return find_pid_ns(nr, &init_pid_ns);
-}
-EXPORT_SYMBOL_GPL(find_pid);
-
 /*
  * attach_pid() must be called with the tasklist_lock write-held.
  */
@@ -483,7 +477,7 @@ EXPORT_SYMBOL(task_session_nr_ns);
 /*
  * Used by proc to find the first pid that is greater then or equal to nr.
  *
- * If there is a pid at nr this function is exactly the same as find_pid.
+ * If there is a pid at nr this function is exactly the same as find_pid_ns.
  */
 struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 {
-- 
cgit v1.2.3


From dbda0de52618d13d1b927c7ba7bb839cfddc4e8c Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:37 -0700
Subject: pidns: remove find_task_by_pid, unused for a long time

It seems to me that it was a mistake marking this function as deprecated
and scheduling it for removal, rather than resolutely removing it after
the last caller's death.

Anyway - better late, then never.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/feature-removal-schedule.txt | 18 ------------------
 include/linux/pid.h                        |  2 +-
 include/linux/sched.h                      |  6 ------
 3 files changed, 1 insertion(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 09c4a1efb8e3..721c71b86e06 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -138,24 +138,6 @@ Who:	Kay Sievers <kay.sievers@suse.de>
 
 ---------------------------
 
-What:	find_task_by_pid
-When:	2.6.26
-Why:	With pid namespaces, calling this funciton will return the
-	wrong task when called from inside a namespace.
-
-	The best way to save a task pid and find a task by this
-	pid later, is to find this task's struct pid pointer (or get
-	it directly from the task) and call pid_task() later.
-
-	If someone really needs to get a task by its pid_t, then
-	he most likely needs the find_task_by_vpid() to get the
-	task from the same namespace as the current task is in, but
-	this may be not so in general.
-
-Who:	Pavel Emelyanov <xemul@openvz.org>
-
----------------------------
-
 What:	ACPI procfs interface
 When:	July 2008
 Why:	ACPI sysfs conversion should be finished by January 2008.
diff --git a/include/linux/pid.h b/include/linux/pid.h
index ff1b2a5814d4..22921ac4cfd9 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -107,7 +107,7 @@ extern struct pid_namespace init_pid_ns;
  * find_pid_ns() finds the pid in the namespace specified
  * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
  *
- * see also find_task_by_pid() set in include/linux/sched.h
+ * see also find_task_by_vpid() set in include/linux/sched.h
  */
 extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
 extern struct pid *find_vpid(int nr);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 182da1550fad..354ef478a80d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1715,8 +1715,6 @@ extern struct pid_namespace init_pid_ns;
  *      finds a task by its pid in the specified namespace
  * find_task_by_vpid():
  *      finds a task by its virtual pid
- * find_task_by_pid():
- *      finds a task by its global pid
  *
  * see also find_vpid() etc in include/linux/pid.h
  */
@@ -1724,10 +1722,6 @@ extern struct pid_namespace init_pid_ns;
 extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
 		struct pid_namespace *ns);
 
-static inline struct task_struct *__deprecated find_task_by_pid(pid_t nr)
-{
-	return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
-}
 extern struct task_struct *find_task_by_vpid(pid_t nr);
 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 		struct pid_namespace *ns);
-- 
cgit v1.2.3


From 49b5cf34727a6c1be1568ab28e89a2d9a6bf51e0 Mon Sep 17 00:00:00 2001
From: Jonathan Lim <jlim@sgi.com>
Date: Fri, 25 Jul 2008 01:48:40 -0700
Subject: accounting: account for user time when updating memory integrals

Adapt acct_update_integrals() to include user time when calculating the time
difference.  The units of acct_rss_mem1 and acct_vm_mem1 are also changed from
pages-jiffies to pages-usecs to avoid calling jiffies_to_usecs() in
xacct_add_tsk() which might overflow.

Signed-off-by: Jonathan Lim <jlim@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  2 +-
 kernel/sched.c        |  2 ++
 kernel/tsacct.c       | 21 ++++++++++++++-------
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 354ef478a80d..af780f299c7c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1257,7 +1257,7 @@ struct task_struct {
 #if defined(CONFIG_TASK_XACCT)
 	u64 acct_rss_mem1;	/* accumulated rss usage */
 	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
-	cputime_t acct_stimexpd;/* stime since last update */
+	cputime_t acct_timexpd;	/* stime + utime since last update */
 #endif
 #ifdef CONFIG_CPUSETS
 	nodemask_t mems_allowed;
diff --git a/kernel/sched.c b/kernel/sched.c
index 6acf749d3336..0047bd9b96aa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4046,6 +4046,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
 		cpustat->nice = cputime64_add(cpustat->nice, tmp);
 	else
 		cpustat->user = cputime64_add(cpustat->user, tmp);
+	/* Account for user time used */
+	acct_update_integrals(p);
 }
 
 /*
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 4ab1b584961b..1da6990af8e0 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -84,9 +84,9 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 {
 	struct mm_struct *mm;
 
-	/* convert pages-jiffies to Mbyte-usec */
-	stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
-	stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
+	/* convert pages-usec to Mbyte-usec */
+	stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
+	stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
 	mm = get_task_mm(p);
 	if (mm) {
 		/* adjust to KB unit */
@@ -118,12 +118,19 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 void acct_update_integrals(struct task_struct *tsk)
 {
 	if (likely(tsk->mm)) {
-		long delta = cputime_to_jiffies(
-			cputime_sub(tsk->stime, tsk->acct_stimexpd));
+		cputime_t time, dtime;
+		struct timeval value;
+		u64 delta;
+
+		time = tsk->stime + tsk->utime;
+		dtime = cputime_sub(time, tsk->acct_timexpd);
+		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
+		delta = value.tv_sec;
+		delta = delta * USEC_PER_SEC + value.tv_usec;
 
 		if (delta == 0)
 			return;
-		tsk->acct_stimexpd = tsk->stime;
+		tsk->acct_timexpd = time;
 		tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
 		tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
 	}
@@ -135,7 +142,7 @@ void acct_update_integrals(struct task_struct *tsk)
  */
 void acct_clear_integrals(struct task_struct *tsk)
 {
-	tsk->acct_stimexpd = 0;
+	tsk->acct_timexpd = 0;
 	tsk->acct_rss_mem1 = 0;
 	tsk->acct_vm_mem1 = 0;
 }
-- 
cgit v1.2.3


From 20fad13ac66ac001c19220d3d08b4de5b6cca6e1 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:43 -0700
Subject: pidns: add the struct bsd_acct_struct pointer on pid_namespace struct

All the bsdacct-related info will be stored in the area, pointer by this
one.

It will be NULL automatically for all new namespaces.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid_namespace.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 1a49ab5ec7b9..1af82c4e17d4 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -14,6 +14,8 @@ struct pidmap {
 
 #define PIDMAP_ENTRIES         ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
 
+struct bsd_acct_struct;
+
 struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
@@ -25,6 +27,9 @@ struct pid_namespace {
 #ifdef CONFIG_PROC_FS
 	struct vfsmount *proc_mnt;
 #endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+	struct bsd_acct_struct *bacct;
+#endif
 };
 
 extern struct pid_namespace init_pid_ns;
-- 
cgit v1.2.3


From 0b6b030fc30d169bb406b34b4fc60d99dde4a9c6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Fri, 25 Jul 2008 01:48:47 -0700
Subject: bsdacct: switch from global bsd_acct_struct instance to per-pidns one

Allocate the structure on the first call to sys_acct().  After this each
namespace, that ordered the accounting, will live with this structure till
its own death.

Two notes
- routines, that close the accounting on fs umount time use
  the init_pid_ns's acct by now;
- accounting routine accounts to dying task's namespace
  (also by now).

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acct.h   |  3 ++
 kernel/acct.c          | 84 +++++++++++++++++++++++++++++++++++++++-----------
 kernel/pid_namespace.c |  2 ++
 3 files changed, 71 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acct.h b/include/linux/acct.h
index e8cae54e8d88..882dc7248766 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -120,17 +120,20 @@ struct acct_v3
 struct vfsmount;
 struct super_block;
 struct pacct_struct;
+struct pid_namespace;
 extern void acct_auto_close_mnt(struct vfsmount *m);
 extern void acct_auto_close(struct super_block *sb);
 extern void acct_init_pacct(struct pacct_struct *pacct);
 extern void acct_collect(long exitcode, int group_dead);
 extern void acct_process(void);
+extern void acct_exit_ns(struct pid_namespace *);
 #else
 #define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_auto_close(x)	do { } while (0)
 #define acct_init_pacct(x)	do { } while (0)
 #define acct_collect(x,y)	do { } while (0)
 #define acct_process()		do { } while (0)
+#define acct_exit_ns(ns)	do { } while (0)
 #endif
 
 /*
diff --git a/kernel/acct.c b/kernel/acct.c
index 72d4760c8da8..febbbc67157e 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -93,8 +93,6 @@ struct bsd_acct_struct {
 
 static DEFINE_SPINLOCK(acct_lock);
 
-static struct bsd_acct_struct acct_globals __cacheline_aligned;
-
 /*
  * Called whenever the timer says to check the free space.
  */
@@ -176,7 +174,8 @@ out:
  *
  * NOTE: acct_lock MUST be held on entry and exit.
  */
-static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
+		struct pid_namespace *ns)
 {
 	struct file *old_acct = NULL;
 	struct pid_namespace *old_ns = NULL;
@@ -188,10 +187,11 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
 		acct->active = 0;
 		acct->needcheck = 0;
 		acct->file = NULL;
+		acct->ns = NULL;
 	}
 	if (file) {
 		acct->file = file;
-		acct->ns = get_pid_ns(task_active_pid_ns(current));
+		acct->ns = ns;
 		acct->needcheck = 0;
 		acct->active = 1;
 		/* It's been deleted if it was used before so this is safe */
@@ -204,7 +204,6 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
 		spin_unlock(&acct_lock);
 		do_acct_process(acct, old_ns, old_acct);
 		filp_close(old_acct, NULL);
-		put_pid_ns(old_ns);
 		spin_lock(&acct_lock);
 	}
 }
@@ -213,6 +212,8 @@ static int acct_on(char *name)
 {
 	struct file *file;
 	int error;
+	struct pid_namespace *ns;
+	struct bsd_acct_struct *acct = NULL;
 
 	/* Difference from BSD - they don't do O_APPEND */
 	file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
@@ -229,18 +230,34 @@ static int acct_on(char *name)
 		return -EIO;
 	}
 
+	ns = task_active_pid_ns(current);
+	if (ns->bacct == NULL) {
+		acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+		if (acct == NULL) {
+			filp_close(file, NULL);
+			return -ENOMEM;
+		}
+	}
+
 	error = security_acct(file);
 	if (error) {
+		kfree(acct);
 		filp_close(file, NULL);
 		return error;
 	}
 
 	spin_lock(&acct_lock);
+	if (ns->bacct == NULL) {
+		ns->bacct = acct;
+		acct = NULL;
+	}
+
 	mnt_pin(file->f_path.mnt);
-	acct_file_reopen(&acct_globals, file);
+	acct_file_reopen(ns->bacct, file, ns);
 	spin_unlock(&acct_lock);
 
 	mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
+	kfree(acct);
 
 	return 0;
 }
@@ -270,10 +287,16 @@ asmlinkage long sys_acct(const char __user *name)
 		error = acct_on(tmp);
 		putname(tmp);
 	} else {
+		struct bsd_acct_struct *acct;
+
+		acct = task_active_pid_ns(current)->bacct;
+		if (acct == NULL)
+			return 0;
+
 		error = security_acct(NULL);
 		if (!error) {
 			spin_lock(&acct_lock);
-			acct_file_reopen(&acct_globals, NULL);
+			acct_file_reopen(acct, NULL, NULL);
 			spin_unlock(&acct_lock);
 		}
 	}
@@ -289,9 +312,15 @@ asmlinkage long sys_acct(const char __user *name)
  */
 void acct_auto_close_mnt(struct vfsmount *m)
 {
+	struct bsd_acct_struct *acct;
+
+	acct = init_pid_ns.bacct;
+	if (acct == NULL)
+		return;
+
 	spin_lock(&acct_lock);
-	if (acct_globals.file && acct_globals.file->f_path.mnt == m)
-		acct_file_reopen(&acct_globals, NULL);
+	if (acct->file && acct->file->f_path.mnt == m)
+		acct_file_reopen(acct, NULL, NULL);
 	spin_unlock(&acct_lock);
 }
 
@@ -304,10 +333,29 @@ void acct_auto_close_mnt(struct vfsmount *m)
  */
 void acct_auto_close(struct super_block *sb)
 {
+	struct bsd_acct_struct *acct;
+
+	acct = init_pid_ns.bacct;
+	if (acct == NULL)
+		return;
+
 	spin_lock(&acct_lock);
-	if (acct_globals.file &&
-	    acct_globals.file->f_path.mnt->mnt_sb == sb) {
-		acct_file_reopen(&acct_globals, NULL);
+	if (acct->file && acct->file->f_path.mnt->mnt_sb == sb)
+		acct_file_reopen(acct, NULL, NULL);
+	spin_unlock(&acct_lock);
+}
+
+void acct_exit_ns(struct pid_namespace *ns)
+{
+	struct bsd_acct_struct *acct;
+
+	spin_lock(&acct_lock);
+	acct = ns->bacct;
+	if (acct != NULL) {
+		if (acct->file != NULL)
+			acct_file_reopen(acct, NULL, NULL);
+
+		kfree(acct);
 	}
 	spin_unlock(&acct_lock);
 }
@@ -587,25 +635,25 @@ void acct_collect(long exitcode, int group_dead)
 void acct_process(void)
 {
 	struct file *file = NULL;
-	struct pid_namespace *ns;
+	struct pid_namespace *ns = task_active_pid_ns(current);
+	struct bsd_acct_struct *acct;
 
+	acct = ns->bacct;
 	/*
 	 * accelerate the common fastpath:
 	 */
-	if (!acct_globals.file)
+	if (!acct || !acct->file)
 		return;
 
 	spin_lock(&acct_lock);
-	file = acct_globals.file;
+	file = acct->file;
 	if (unlikely(!file)) {
 		spin_unlock(&acct_lock);
 		return;
 	}
 	get_file(file);
-	ns = get_pid_ns(acct_globals.ns);
 	spin_unlock(&acct_lock);
 
-	do_acct_process(&acct_globals, ns, file);
+	do_acct_process(acct, ns, file);
 	fput(file);
-	put_pid_ns(ns);
 }
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 06331cc1c3f5..ea567b78d1aa 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -12,6 +12,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/syscalls.h>
 #include <linux/err.h>
+#include <linux/acct.h>
 
 #define BITS_PER_PAGE		(PAGE_SIZE*8)
 
@@ -181,6 +182,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 
 	/* Child reaper for the pid namespace is going away */
 	pid_ns->child_reaper = NULL;
+	acct_exit_ns(pid_ns);
 	return;
 }
 
-- 
cgit v1.2.3


From 297c5d92634c809cef23d73e7b2556f2528ff7e2 Mon Sep 17 00:00:00 2001
From: Andrea Righi <righi.andrea@gmail.com>
Date: Fri, 25 Jul 2008 01:48:49 -0700
Subject: task IO accounting: provide distinct tgid/tid I/O statistics

Report per-thread I/O statistics in /proc/pid/task/tid/io and aggregate
parent I/O statistics in /proc/pid/io.  This approach follows the same
model used to account per-process and per-thread CPU times.

As a practial application, this allows for example to quickly find the top
I/O consumer when a process spawns many child threads that perform the
actual I/O work, because the aggregated I/O statistics can always be found
in /proc/pid/io.

[ Oleg Nesterov points out that we should check that the task is still
  alive before we iterate over the threads, but also says that we can do
  that fixup on top of this later.  - Linus ]

Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Cc: Matt Heaton <matt@hostmonster.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Acked-by-with-comments: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c        | 86 ++++++++++++++++++++++++++++++++++++++++++---------
 include/linux/sched.h |  4 +++
 kernel/exit.c         | 27 ++++++++++++++++
 kernel/fork.c         |  6 ++++
 4 files changed, 108 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a8e15e..a891fe4cb43b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2376,29 +2376,82 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
 }
 
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
-{
+static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
+{
+	u64 rchar, wchar, syscr, syscw;
+	struct task_io_accounting ioac;
+
+	if (!whole) {
+		rchar = task->rchar;
+		wchar = task->wchar;
+		syscr = task->syscr;
+		syscw = task->syscw;
+		memcpy(&ioac, &task->ioac, sizeof(ioac));
+	} else {
+		unsigned long flags;
+		struct task_struct *t = task;
+		rchar = wchar = syscr = syscw = 0;
+		memset(&ioac, 0, sizeof(ioac));
+
+		rcu_read_lock();
+		do {
+			rchar += t->rchar;
+			wchar += t->wchar;
+			syscr += t->syscr;
+			syscw += t->syscw;
+
+			ioac.read_bytes += t->ioac.read_bytes;
+			ioac.write_bytes += t->ioac.write_bytes;
+			ioac.cancelled_write_bytes +=
+					t->ioac.cancelled_write_bytes;
+			t = next_thread(t);
+		} while (t != task);
+		rcu_read_unlock();
+
+		if (lock_task_sighand(task, &flags)) {
+			struct signal_struct *sig = task->signal;
+
+			rchar += sig->rchar;
+			wchar += sig->wchar;
+			syscr += sig->syscr;
+			syscw += sig->syscw;
+
+			ioac.read_bytes += sig->ioac.read_bytes;
+			ioac.write_bytes += sig->ioac.write_bytes;
+			ioac.cancelled_write_bytes +=
+					sig->ioac.cancelled_write_bytes;
+
+			unlock_task_sighand(task, &flags);
+		}
+	}
+
 	return sprintf(buffer,
-#ifdef CONFIG_TASK_XACCT
 			"rchar: %llu\n"
 			"wchar: %llu\n"
 			"syscr: %llu\n"
 			"syscw: %llu\n"
-#endif
 			"read_bytes: %llu\n"
 			"write_bytes: %llu\n"
 			"cancelled_write_bytes: %llu\n",
-#ifdef CONFIG_TASK_XACCT
-			(unsigned long long)task->rchar,
-			(unsigned long long)task->wchar,
-			(unsigned long long)task->syscr,
-			(unsigned long long)task->syscw,
-#endif
-			(unsigned long long)task->ioac.read_bytes,
-			(unsigned long long)task->ioac.write_bytes,
-			(unsigned long long)task->ioac.cancelled_write_bytes);
+			(unsigned long long)rchar,
+			(unsigned long long)wchar,
+			(unsigned long long)syscr,
+			(unsigned long long)syscw,
+			(unsigned long long)ioac.read_bytes,
+			(unsigned long long)ioac.write_bytes,
+			(unsigned long long)ioac.cancelled_write_bytes);
+}
+
+static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
+{
+	return do_io_accounting(task, buffer, 0);
 }
-#endif
+
+static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
+{
+	return do_io_accounting(task, buffer, 1);
+}
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
 
 /*
  * Thread groups
@@ -2470,7 +2523,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
 #endif
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-	INF("io",	S_IRUGO, pid_io_accounting),
+	INF("io",	S_IRUGO, tgid_io_accounting),
 #endif
 };
 
@@ -2797,6 +2850,9 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_FAULT_INJECTION
 	REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
 #endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+	INF("io",	S_IRUGO, tid_io_accounting),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file * filp,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index af780f299c7c..d22ffe06d0eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -506,6 +506,10 @@ struct signal_struct {
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
 	unsigned long inblock, oublock, cinblock, coublock;
+#ifdef CONFIG_TASK_XACCT
+	u64 rchar, wchar, syscr, syscw;
+#endif
+	struct task_io_accounting ioac;
 
 	/*
 	 * Cumulative ns of scheduled CPU time for dead threads in the
diff --git a/kernel/exit.c b/kernel/exit.c
index 8a4d4d12e294..ad933bb29ec7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -120,6 +120,18 @@ static void __exit_signal(struct task_struct *tsk)
 		sig->nivcsw += tsk->nivcsw;
 		sig->inblock += task_io_get_inblock(tsk);
 		sig->oublock += task_io_get_oublock(tsk);
+#ifdef CONFIG_TASK_XACCT
+		sig->rchar += tsk->rchar;
+		sig->wchar += tsk->wchar;
+		sig->syscr += tsk->syscr;
+		sig->syscw += tsk->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+		sig->ioac.read_bytes += tsk->ioac.read_bytes;
+		sig->ioac.write_bytes += tsk->ioac.write_bytes;
+		sig->ioac.cancelled_write_bytes +=
+					tsk->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
 		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 		sig = NULL; /* Marker for below. */
 	}
@@ -1366,6 +1378,21 @@ static int wait_task_zombie(struct task_struct *p, int options,
 		psig->coublock +=
 			task_io_get_oublock(p) +
 			sig->oublock + sig->coublock;
+#ifdef CONFIG_TASK_XACCT
+		psig->rchar += p->rchar + sig->rchar;
+		psig->wchar += p->wchar + sig->wchar;
+		psig->syscr += p->syscr + sig->syscr;
+		psig->syscw += p->syscw + sig->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+		psig->ioac.read_bytes +=
+			p->ioac.read_bytes + sig->ioac.read_bytes;
+		psig->ioac.write_bytes +=
+			p->ioac.write_bytes + sig->ioac.write_bytes;
+		psig->ioac.cancelled_write_bytes +=
+				p->ioac.cancelled_write_bytes +
+				sig->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
 		spin_unlock_irq(&p->parent->sighand->siglock);
 	}
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 813d5c89b9d5..b99d73e971a4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -812,6 +812,12 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
+#ifdef CONFIG_TASK_XACCT
+	sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0;
+#endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+	memset(&sig->ioac, 0, sizeof(sig->ioac));
+#endif
 	sig->sum_sched_runtime = 0;
 	INIT_LIST_HEAD(&sig->cpu_timers[0]);
 	INIT_LIST_HEAD(&sig->cpu_timers[1]);
-- 
cgit v1.2.3


From 873b47717732c2f33a4b14de02571a4295a02f0c Mon Sep 17 00:00:00 2001
From: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Date: Fri, 25 Jul 2008 01:48:52 -0700
Subject: per-task-delay-accounting: add memory reclaim delay

Sometimes, application responses become bad under heavy memory load.
Applications take a bit time to reclaim memory.  The statistics, how long
memory reclaim takes, will be useful to measure memory usage.

This patch adds accounting memory reclaim to per-task-delay-accounting for
accounting the time of do_try_to_free_pages().

<i.e>

- When System is under low memory load,
  memory reclaim may not occur.

$ free
             total       used       free     shared    buffers     cached
Mem:       8197800    1577300    6620500          0       4808    1516724
-/+ buffers/cache:      55768    8142032
Swap:     16386292          0   16386292

$ vmstat 1
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 0  0      0 5069748  10612 3014060    0    0     0     0    3   26  0  0 100  0
 0  0      0 5069748  10612 3014060    0    0     0     0    4   22  0  0 100  0
 0  0      0 5069748  10612 3014060    0    0     0     0    3   18  0  0 100  0

Measure the time of tar command.

$ ls -s test.dat
1501472 test.dat

$ time tar cvf test.tar test.dat
real    0m13.388s
user    0m0.116s
sys     0m5.304s

$ ./delayget -d -p <pid>
CPU             count     real total  virtual total    delay total
                  428     5528345500     5477116080       62749891
IO              count    delay total
                  338     8078977189
SWAP            count    delay total
                    0              0
RECLAIM         count    delay total
                    0              0

- When system is under heavy memory load
  memory reclaim may occur.

$ vmstat 1
procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa
 0  0 7159032  49724   1812   3012    0    0     0     0    3   24  0  0 100  0
 0  0 7159032  49724   1812   3012    0    0     0     0    4   24  0  0 100  0
 0  0 7159032  49848   1812   3012    0    0     0     0    3   22  0  0 100  0

In this case, one process uses more 8G memory
by execution of malloc() and memset().

$ time tar cvf test.tar test.dat
real    1m38.563s        <-  increased by 85 sec
user    0m0.140s
sys     0m7.060s

$ ./delayget -d -p <pid>
CPU             count     real total  virtual total    delay total
                 9021     7140446250     7315277975      923201824
IO              count    delay total
                 8965    90466349669
SWAP            count    delay total
                    3       21036367
RECLAIM         count    delay total
                  740    61011951153

In the later case, the value of RECLAIM is increasing.
So, taskstats can show how much memory reclaim influences TAT.

Signed-off-by: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujistu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delayacct.h | 19 +++++++++++++++++++
 include/linux/sched.h     |  4 ++++
 kernel/delayacct.c        | 13 +++++++++++++
 mm/vmscan.c               |  5 +++++
 4 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index ab94bc083558..f352f06fa063 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -39,6 +39,8 @@ extern void __delayacct_blkio_start(void);
 extern void __delayacct_blkio_end(void);
 extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
+extern void __delayacct_freepages_start(void);
+extern void __delayacct_freepages_end(void);
 
 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
 {
@@ -107,6 +109,18 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 	return 0;
 }
 
+static inline void delayacct_freepages_start(void)
+{
+	if (current->delays)
+		__delayacct_freepages_start();
+}
+
+static inline void delayacct_freepages_end(void)
+{
+	if (current->delays)
+		__delayacct_freepages_end();
+}
+
 #else
 static inline void delayacct_set_flag(int flag)
 {}
@@ -129,6 +143,11 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 { return 0; }
 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
 { return 0; }
+static inline void delayacct_freepages_start(void)
+{}
+static inline void delayacct_freepages_end(void)
+{}
+
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d22ffe06d0eb..42036ffe6b00 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -672,6 +672,10 @@ struct task_delay_info {
 				/* io operations performed */
 	u32 swapin_count;	/* total count of the number of swapin block */
 				/* io operations performed */
+
+	struct timespec freepages_start, freepages_end;
+	u64 freepages_delay;	/* wait for memory reclaim */
+	u32 freepages_count;	/* total count of memory reclaim */
 };
 #endif	/* CONFIG_TASK_DELAY_ACCT */
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 10e43fd8b721..84b6782a2ce4 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -165,3 +165,16 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
 	return ret;
 }
 
+void __delayacct_freepages_start(void)
+{
+	delayacct_start(&current->delays->freepages_start);
+}
+
+void __delayacct_freepages_end(void)
+{
+	delayacct_end(&current->delays->freepages_start,
+			&current->delays->freepages_end,
+			&current->delays->freepages_delay,
+			&current->delays->freepages_count);
+}
+
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 967d30ccd92b..26672c6cd3ce 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -38,6 +38,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/memcontrol.h>
+#include <linux/delayacct.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -1316,6 +1317,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 	struct zone *zone;
 	enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
 
+	delayacct_freepages_start();
+
 	if (scan_global_lru(sc))
 		count_vm_event(ALLOCSTALL);
 	/*
@@ -1396,6 +1399,8 @@ out:
 	} else
 		mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
 
+	delayacct_freepages_end();
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 016ae219b920c4e606088761d3d6070cdf8ba706 Mon Sep 17 00:00:00 2001
From: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Date: Fri, 25 Jul 2008 01:48:53 -0700
Subject: per-task-delay-accounting: update taskstats for memory reclaim delay

Add members for memory reclaim delay to taskstats, and accumulate them in
__delayacct_add_tsk() .

Signed-off-by: Keika Kobayashi <kobayashi.kk@ncos.nec.co.jp>
Cc: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/accounting/taskstats-struct.txt | 7 +++++++
 include/linux/taskstats.h                     | 6 +++++-
 kernel/delayacct.c                            | 3 +++
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
index cd784f46bf8a..b988d110db59 100644
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -26,6 +26,8 @@ There are three different groups of fields in the struct taskstats:
 
 5) Time accounting for SMT machines
 
+6) Extended delay accounting fields for memory reclaim
+
 Future extension should add fields to the end of the taskstats struct, and
 should not change the relative position of each field within the struct.
 
@@ -170,4 +172,9 @@ struct taskstats {
 	__u64	ac_utimescaled;		/* utime scaled on frequency etc */
 	__u64	ac_stimescaled;		/* stime scaled on frequency etc */
 	__u64	cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+6) Extended delay accounting fields for memory reclaim
+	/* Delay waiting for memory reclaim */
+	__u64	freepages_count;
+	__u64	freepages_delay_total;
 }
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 5d69c0744fff..18269e956a71 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
  */
 
 
-#define TASKSTATS_VERSION	6
+#define TASKSTATS_VERSION	7
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -157,6 +157,10 @@ struct taskstats {
 	__u64	ac_utimescaled;		/* utime scaled on frequency etc */
 	__u64	ac_stimescaled;		/* stime scaled on frequency etc */
 	__u64	cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+	/* Delay waiting for memory reclaim */
+	__u64	freepages_count;
+	__u64	freepages_delay_total;
 };
 
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 84b6782a2ce4..b3179dad71be 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -145,8 +145,11 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
 	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
 	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+	tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
+	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
 	d->blkio_count += tsk->delays->blkio_count;
 	d->swapin_count += tsk->delays->swapin_count;
+	d->freepages_count += tsk->delays->freepages_count;
 	spin_unlock_irqrestore(&tsk->delays->lock, flags);
 
 done:
-- 
cgit v1.2.3


From bde74e4bc64415b142e556a34d295a52a1b7da9d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:48:57 -0700
Subject: locks: add special return value for asynchronous locks

Use a special error value FILE_LOCK_DEFERRED to mean that a locking
operation returned asynchronously.  This is returned by

  posix_lock_file() for sleeping locks to mean that the lock has been
  queued on the block list, and will be woken up when it might become
  available and needs to be retried (either fl_lmops->fl_notify() is
  called or fl_wait is woken up).

  f_op->lock() to mean either the above, or that the filesystem will
  call back with fl_lmops->fl_grant() when the result of the locking
  operation is known.  The filesystem can do this for sleeping as well
  as non-sleeping locks.

This is to make sure, that return values of -EAGAIN and -EINPROGRESS by
filesystems are not mistaken to mean an asynchronous locking.

This also makes error handling in fs/locks.c and lockd/svclock.c slightly
cleaner.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: David Teigland <teigland@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dlm/plock.c     |  2 +-
 fs/lockd/svclock.c | 13 ++++---------
 fs/locks.c         | 28 ++++++++++++++--------------
 include/linux/fs.h |  6 ++++++
 4 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 78878c5781ca..eba87ff3177b 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -116,7 +116,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
 	if (xop->callback == NULL)
 		wait_event(recv_wq, (op->done != 0));
 	else {
-		rv = -EINPROGRESS;
+		rv = FILE_LOCK_DEFERRED;
 		goto out;
 	}
 
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 821b9acdfb66..cf0d5c2c318d 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -418,8 +418,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			goto out;
 		case -EAGAIN:
 			ret = nlm_lck_denied;
-			break;
-		case -EINPROGRESS:
+			goto out;
+		case FILE_LOCK_DEFERRED:
 			if (wait)
 				break;
 			/* Filesystem lock operation is in progress
@@ -434,10 +434,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			goto out;
 	}
 
-	ret = nlm_lck_denied;
-	if (!wait)
-		goto out;
-
 	ret = nlm_lck_blocked;
 
 	/* Append to list of blocked */
@@ -507,7 +503,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 	}
 
 	error = vfs_test_lock(file->f_file, &lock->fl);
-	if (error == -EINPROGRESS) {
+	if (error == FILE_LOCK_DEFERRED) {
 		ret = nlmsvc_defer_lock_rqst(rqstp, block);
 		goto out;
 	}
@@ -731,8 +727,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	switch (error) {
 	case 0:
 		break;
-	case -EAGAIN:
-	case -EINPROGRESS:
+	case FILE_LOCK_DEFERRED:
 		dprintk("lockd: lock still blocked error %d\n", error);
 		nlmsvc_insert_block(block, NLM_NEVER);
 		nlmsvc_release_block(block);
diff --git a/fs/locks.c b/fs/locks.c
index dce8c747371c..1ce57b4b362c 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -779,8 +779,10 @@ find_conflict:
 		if (!flock_locks_conflict(request, fl))
 			continue;
 		error = -EAGAIN;
-		if (request->fl_flags & FL_SLEEP)
-			locks_insert_block(fl, request);
+		if (!(request->fl_flags & FL_SLEEP))
+			goto out;
+		error = FILE_LOCK_DEFERRED;
+		locks_insert_block(fl, request);
 		goto out;
 	}
 	if (request->fl_flags & FL_ACCESS)
@@ -836,7 +838,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 			error = -EDEADLK;
 			if (posix_locks_deadlock(request, fl))
 				goto out;
-			error = -EAGAIN;
+			error = FILE_LOCK_DEFERRED;
 			locks_insert_block(fl, request);
 			goto out;
   		}
@@ -1035,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
 	might_sleep ();
 	for (;;) {
 		error = posix_lock_file(filp, fl, NULL);
-		if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
 		if (!error)
@@ -1107,9 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
 
 	for (;;) {
 		error = __posix_lock_file(inode, &fl, NULL);
-		if (error != -EAGAIN)
-			break;
-		if (!(fl.fl_flags & FL_SLEEP))
+		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
 		if (!error) {
@@ -1531,7 +1531,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
 	might_sleep();
 	for (;;) {
 		error = flock_lock_file(filp, fl);
-		if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+		if (error != FILE_LOCK_DEFERRED)
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
 		if (!error)
@@ -1716,17 +1716,17 @@ out:
  * fl_grant is set. Callers expecting ->lock() to return asynchronously
  * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
  * the request is for a blocking lock. When ->lock() does return asynchronously,
- * it must return -EINPROGRESS, and call ->fl_grant() when the lock
+ * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
  * request completes.
  * If the request is for non-blocking lock the file system should return
- * -EINPROGRESS then try to get the lock and call the callback routine with
- * the result. If the request timed out the callback routine will return a
+ * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
+ * with the result. If the request timed out the callback routine will return a
  * nonzero return code and the file system should release the lock. The file
  * system is also responsible to keep a corresponding posix lock when it
  * grants a lock so the VFS can find out which locks are locally held and do
  * the correct lock cleanup when required.
  * The underlying filesystem must not drop the kernel lock or call
- * ->fl_grant() before returning to the caller with a -EINPROGRESS
+ * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
  * return code.
  */
 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
@@ -1804,7 +1804,7 @@ again:
 	else {
 		for (;;) {
 			error = posix_lock_file(filp, file_lock, NULL);
-			if (error != -EAGAIN || cmd == F_SETLK)
+			if (error != FILE_LOCK_DEFERRED)
 				break;
 			error = wait_event_interruptible(file_lock->fl_wait,
 					!file_lock->fl_next);
@@ -1941,7 +1941,7 @@ again:
 	else {
 		for (;;) {
 			error = posix_lock_file(filp, file_lock, NULL);
-			if (error != -EAGAIN || cmd == F_SETLK64)
+			if (error != FILE_LOCK_DEFERRED)
 				break;
 			error = wait_event_interruptible(file_lock->fl_wait,
 					!file_lock->fl_next);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b86f806014c..49d8eb7a71be 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -885,6 +885,12 @@ static inline int file_check_writeable(struct file *filp)
 #define FL_CLOSE	64	/* unlock on close */
 #define FL_SLEEP	128	/* A blocking lock */
 
+/*
+ * Special return value from posix_lock_file() and vfs_lock_file() for
+ * asynchronous locking.
+ */
+#define FILE_LOCK_DEFERRED 1
+
 /*
  * The POSIX file lock owner is determined by
  * the "struct files_struct" in the thread group
-- 
cgit v1.2.3


From 33670fa296860283f04a7975b8c790f101e43a6e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 25 Jul 2008 01:49:02 -0700
Subject: fuse: nfs export special lookups

Implement the get_parent export operation by sending a LOOKUP request with
".." as the name.

Implement looking up an inode by node ID after it has been evicted from
the cache.  This is done by seding a LOOKUP request with "." as the name
(for all file types, not just directories).

The filesystem can set the FUSE_EXPORT_SUPPORT flag in the INIT reply, to
indicate that it supports these special lookups.

Thanks to John Muir for the original implementation of this feature.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: David Teigland <teigland@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/fuse_i.h     |  6 +++++
 fs/fuse/inode.c      | 66 +++++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/fuse.h |  3 +++
 3 files changed, 72 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5d3146da64e6..3a876076bdd1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -363,6 +363,9 @@ struct fuse_conn {
 	/** Do not send separate SETATTR request before open(O_TRUNC)  */
 	unsigned atomic_o_trunc : 1;
 
+	/** Filesystem supports NFS exporting.  Only set in INIT */
+	unsigned export_support : 1;
+
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
@@ -473,6 +476,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
 			int generation, struct fuse_attr *attr,
 			u64 attr_valid, u64 attr_version);
 
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+		     struct fuse_entry_out *outarg, struct inode **inode);
+
 /**
  * Send FORGET command
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 71fa76a48a31..7d2f7d6e22e2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -562,6 +562,7 @@ struct fuse_inode_handle
 static struct dentry *fuse_get_dentry(struct super_block *sb,
 				      struct fuse_inode_handle *handle)
 {
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
 	struct inode *inode;
 	struct dentry *entry;
 	int err = -ESTALE;
@@ -570,8 +571,27 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
 		goto out_err;
 
 	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
-	if (!inode)
-		goto out_err;
+	if (!inode) {
+		struct fuse_entry_out outarg;
+		struct qstr name;
+
+		if (!fc->export_support)
+			goto out_err;
+
+		name.len = 1;
+		name.name = ".";
+		err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
+				       &inode);
+		if (err && err != -ENOENT)
+			goto out_err;
+		if (err || !inode) {
+			err = -ESTALE;
+			goto out_err;
+		}
+		err = -EIO;
+		if (get_node_id(inode) != handle->nodeid)
+			goto out_iput;
+	}
 	err = -ESTALE;
 	if (inode->i_generation != handle->generation)
 		goto out_iput;
@@ -659,11 +679,46 @@ static struct dentry *fuse_fh_to_parent(struct super_block *sb,
 	return fuse_get_dentry(sb, &parent);
 }
 
+static struct dentry *fuse_get_parent(struct dentry *child)
+{
+	struct inode *child_inode = child->d_inode;
+	struct fuse_conn *fc = get_fuse_conn(child_inode);
+	struct inode *inode;
+	struct dentry *parent;
+	struct fuse_entry_out outarg;
+	struct qstr name;
+	int err;
+
+	if (!fc->export_support)
+		return ERR_PTR(-ESTALE);
+
+	name.len = 2;
+	name.name = "..";
+	err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
+			       &name, &outarg, &inode);
+	if (err && err != -ENOENT)
+		return ERR_PTR(err);
+	if (err || !inode)
+		return ERR_PTR(-ESTALE);
+
+	parent = d_alloc_anon(inode);
+	if (!parent) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	if (get_node_id(inode) != FUSE_ROOT_ID) {
+		parent->d_op = &fuse_dentry_operations;
+		fuse_invalidate_entry_cache(parent);
+	}
+
+	return parent;
+}
 
 static const struct export_operations fuse_export_operations = {
 	.fh_to_dentry	= fuse_fh_to_dentry,
 	.fh_to_parent	= fuse_fh_to_parent,
 	.encode_fh	= fuse_encode_fh,
+	.get_parent	= fuse_get_parent,
 };
 
 static const struct super_operations fuse_super_operations = {
@@ -695,6 +750,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 				fc->no_lock = 1;
 			if (arg->flags & FUSE_ATOMIC_O_TRUNC)
 				fc->atomic_o_trunc = 1;
+			if (arg->minor >= 9) {
+				/* LOOKUP has dependency on proto version */
+				if (arg->flags & FUSE_EXPORT_SUPPORT)
+					fc->export_support = 1;
+			}
 			if (arg->flags & FUSE_BIG_WRITES)
 				fc->big_writes = 1;
 		} else {
@@ -721,7 +781,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
 	arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
 	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
-		FUSE_BIG_WRITES;
+		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index d48282197696..265635dc9908 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -104,11 +104,14 @@ struct fuse_file_lock {
 
 /**
  * INIT request/reply flags
+ *
+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
 #define FUSE_FILE_OPS		(1 << 2)
 #define FUSE_ATOMIC_O_TRUNC	(1 << 3)
+#define FUSE_EXPORT_SUPPORT	(1 << 4)
 #define FUSE_BIG_WRITES		(1 << 5)
 
 /**
-- 
cgit v1.2.3


From 8f421c595a9145959d8aab09172743132abdffdb Mon Sep 17 00:00:00 2001
From: Arthur Jones <ajones@riverbed.com>
Date: Fri, 25 Jul 2008 01:49:04 -0700
Subject: edac: i5100 new intel chipset driver

Preliminary support for the Intel 5100 MCH.  CE and UE errors are reported
along with the current DIMM label information and other memory parameters.

Reasons why this is preliminary:

1) This chip has 2 independent memory controllers which, for best
   perforance, use interleaved accesses to the DDR2 memory.  This
   architecture does not map very well to the current edac data structures
   which depend on symmetric channel access to the interleaved data.
   Without core changes, the best I could do for now is to map both memory
   controllers to different csrows (first all ranks of controller 0, then
   all ranks of controller 1).  Someone much more familiar with the edac
   core than I will probably need to come up with a more general data
   structure to handle the interleaving and de-interleaving of the two
   memory controllers.

2) I have not yet tackled the de-interleaving of the rank/controller
   address space into the physical address space of the CPU.  There is
   nothing fundamentally missing, it is just ending up to be a lot of
   code, and I'd rather keep it separate for now, esp since it doesn't
   work yet...

3) The code depends on a particular i5100 chip select to DIMM mainboard
   chip select mapping.  This mapping seems obvious to me in order to
   support dual and single ranked memory, but it is not unique and DIMM
   labels could be wrong on other mainboards.  There is no way to query
   this mapping that I know of.

4) The code requires that the i5100 is in 32GB mode.  Only 4 ranks per
   controller, 2 ranks per DIMM are supported.  I do not have hardware
   (nor do I expect to have hardware anytime soon) for the 48GB (6 ranks
   per controller) mode.

5) The serial presence detect code should be broken out into a "real"
   i2c driver so that decode-dimms.pl can work.

Signed-off-by: Arthur Jones <ajones@riverbed.com>
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/Kconfig      |   7 +
 drivers/edac/Makefile     |   1 +
 drivers/edac/i5100_edac.c | 827 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h   |   3 +
 4 files changed, 838 insertions(+)
 create mode 100644 drivers/edac/i5100_edac.c

(limited to 'include/linux')

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 6e6c3c4aea6b..5a11e3cbcae2 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -123,6 +123,13 @@ config EDAC_I5000
 	  Support for error detection and correction the Intel
 	  Greekcreek/Blackford chipsets.
 
+config EDAC_I5100
+	tristate "Intel San Clemente MCH"
+	depends on EDAC_MM_EDAC && X86 && PCI
+	help
+	  Support for error detection and correction the Intel
+	  San Clemente MCH.
+
 config EDAC_MPC85XX
 	tristate "Freescale MPC85xx"
 	depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 83807731d4a9..e5e9104b5520 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -19,6 +19,7 @@ endif
 
 obj-$(CONFIG_EDAC_AMD76X)		+= amd76x_edac.o
 obj-$(CONFIG_EDAC_I5000)		+= i5000_edac.o
+obj-$(CONFIG_EDAC_I5100)		+= i5100_edac.o
 obj-$(CONFIG_EDAC_E7XXX)		+= e7xxx_edac.o
 obj-$(CONFIG_EDAC_E752X)		+= e752x_edac.o
 obj-$(CONFIG_EDAC_I82443BXGX)		+= i82443bxgx_edac.o
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
new file mode 100644
index 000000000000..43430bf70181
--- /dev/null
+++ b/drivers/edac/i5100_edac.c
@@ -0,0 +1,827 @@
+/*
+ * Intel 5100 Memory Controllers kernel module
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ *
+ * This module is based on the following document:
+ *
+ * Intel 5100X Chipset Memory Controller Hub (MCH) - Datasheet
+ *      http://download.intel.com/design/chipsets/datashts/318378.pdf
+ *
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/edac.h>
+#include <linux/delay.h>
+#include <linux/mmzone.h>
+
+#include "edac_core.h"
+
+/* register addresses and bit field accessors... */
+
+/* device 16, func 1 */
+#define I5100_MS		0x44	/* Memory Status Register */
+#define I5100_SPDDATA		0x48	/* Serial Presence Detect Status Reg */
+#define		I5100_SPDDATA_RDO(a)	((a) >> 15 & 1)
+#define		I5100_SPDDATA_SBE(a)	((a) >> 13 & 1)
+#define		I5100_SPDDATA_BUSY(a)	((a) >> 12 & 1)
+#define		I5100_SPDDATA_DATA(a)	((a)       & ((1 << 8) - 1))
+#define I5100_SPDCMD		0x4c	/* Serial Presence Detect Command Reg */
+#define		I5100_SPDCMD_DTI(a)	(((a) & ((1 << 4) - 1)) << 28)
+#define		I5100_SPDCMD_CKOVRD(a)	(((a) & 1)              << 27)
+#define		I5100_SPDCMD_SA(a)	(((a) & ((1 << 3) - 1)) << 24)
+#define		I5100_SPDCMD_BA(a)	(((a) & ((1 << 8) - 1)) << 16)
+#define		I5100_SPDCMD_DATA(a)	(((a) & ((1 << 8) - 1)) <<  8)
+#define		I5100_SPDCMD_CMD(a)	((a) & 1)
+#define I5100_TOLM		0x6c	/* Top of Low Memory */
+#define		I5100_TOLM_TOLM(a)	((a) >> 12 & ((1 << 4) - 1))
+#define I5100_MIR0		0x80	/* Memory Interleave Range 0 */
+#define I5100_MIR1		0x84	/* Memory Interleave Range 1 */
+#define I5100_AMIR_0		0x8c	/* Adjusted Memory Interleave Range 0 */
+#define I5100_AMIR_1		0x90	/* Adjusted Memory Interleave Range 1 */
+#define		I5100_MIR_LIMIT(a)	((a) >> 4 & ((1 << 12) - 1))
+#define		I5100_MIR_WAY1(a)	((a) >> 1 & 1)
+#define		I5100_MIR_WAY0(a)	((a)      & 1)
+#define I5100_FERR_NF_MEM	0xa0	/* MC First Non Fatal Errors */
+#define		I5100_FERR_NF_MEM_CHAN_INDX(a)	((a) >> 28 & 1)
+#define		I5100_FERR_NF_MEM_SPD_MASK	(1 << 18)
+#define		I5100_FERR_NF_MEM_M16ERR_MASK	(1 << 16)
+#define		I5100_FERR_NF_MEM_M15ERR_MASK	(1 << 15)
+#define		I5100_FERR_NF_MEM_M14ERR_MASK	(1 << 14)
+#define		I5100_FERR_NF_MEM_
+#define		I5100_FERR_NF_MEM_
+#define		I5100_FERR_NF_MEM_ANY_MASK	\
+			(I5100_FERR_NF_MEM_M16ERR_MASK | \
+			I5100_FERR_NF_MEM_M15ERR_MASK | \
+			I5100_FERR_NF_MEM_M14ERR_MASK)
+#define		I5100_FERR_NF_MEM_ANY(a)  ((a) & I5100_FERR_NF_MEM_ANY_MASK)
+#define	I5100_NERR_NF_MEM	0xa4	/* MC Next Non-Fatal Errors */
+#define		I5100_NERR_NF_MEM_ANY(a)  I5100_FERR_NF_MEM_ANY(a)
+
+/* device 21 and 22, func 0 */
+#define I5100_MTR_0	0x154	/* Memory Technology Registers 0-3 */
+#define I5100_DMIR	0x15c	/* DIMM Interleave Range */
+#define		I5100_DMIR_LIMIT(a)	((a) >> 16 & ((1 << 11) - 1))
+#define		I5100_DMIR_RANK(a, i)	((a) >> (4 * i) & ((1 <<  2) - 1))
+#define I5100_MTR_4	0x1b0	/* Memory Technology Registers 4,5 */
+#define		I5100_MTR_PRESENT(a)	((a) >> 10 & 1)
+#define		I5100_MTR_ETHROTTLE(a)	((a) >>  9 & 1)
+#define		I5100_MTR_WIDTH(a)	((a) >>  8 & 1)
+#define		I5100_MTR_NUMBANK(a)	((a) >>  6 & 1)
+#define		I5100_MTR_NUMROW(a)	((a) >>  2 & ((1 << 2) - 1))
+#define		I5100_MTR_NUMCOL(a)	((a)       & ((1 << 2) - 1))
+#define	I5100_VALIDLOG	0x18c	/* Valid Log Markers */
+#define		I5100_VALIDLOG_REDMEMVALID(a)	((a) >> 2 & 1)
+#define		I5100_VALIDLOG_RECMEMVALID(a)	((a) >> 1 & 1)
+#define		I5100_VALIDLOG_NRECMEMVALID(a)	((a)      & 1)
+#define	I5100_NRECMEMA	0x190	/* Non-Recoverable Memory Error Log Reg A */
+#define		I5100_NRECMEMA_MERR(a)		((a) >> 15 & ((1 << 5) - 1))
+#define		I5100_NRECMEMA_BANK(a)		((a) >> 12 & ((1 << 3) - 1))
+#define		I5100_NRECMEMA_RANK(a)		((a) >>  8 & ((1 << 3) - 1))
+#define		I5100_NRECMEMA_DM_BUF_ID(a)	((a)       & ((1 << 8) - 1))
+#define	I5100_NRECMEMB	0x194	/* Non-Recoverable Memory Error Log Reg B */
+#define		I5100_NRECMEMB_CAS(a)		((a) >> 16 & ((1 << 13) - 1))
+#define		I5100_NRECMEMB_RAS(a)		((a)       & ((1 << 16) - 1))
+#define	I5100_REDMEMA	0x198	/* Recoverable Memory Data Error Log Reg A */
+#define		I5100_REDMEMA_SYNDROME(a)	(a)
+#define	I5100_REDMEMB	0x19c	/* Recoverable Memory Data Error Log Reg B */
+#define		I5100_REDMEMB_ECC_LOCATOR(a)	((a) & ((1 << 18) - 1))
+#define	I5100_RECMEMA	0x1a0	/* Recoverable Memory Error Log Reg A */
+#define		I5100_RECMEMA_MERR(a)		I5100_NRECMEMA_MERR(a)
+#define		I5100_RECMEMA_BANK(a)		I5100_NRECMEMA_BANK(a)
+#define		I5100_RECMEMA_RANK(a)		I5100_NRECMEMA_RANK(a)
+#define		I5100_RECMEMA_DM_BUF_ID(a)	I5100_NRECMEMA_DM_BUF_ID(a)
+#define	I5100_RECMEMB	0x1a4	/* Recoverable Memory Error Log Reg B */
+#define		I5100_RECMEMB_CAS(a)		I5100_NRECMEMB_CAS(a)
+#define		I5100_RECMEMB_RAS(a)		I5100_NRECMEMB_RAS(a)
+
+/* some generic limits */
+#define I5100_MAX_RANKS_PER_CTLR	6
+#define I5100_MAX_CTLRS			2
+#define I5100_MAX_RANKS_PER_DIMM	4
+#define I5100_DIMM_ADDR_LINES		(6 - 3)	/* 64 bits / 8 bits per byte */
+#define I5100_MAX_DIMM_SLOTS_PER_CTLR	4
+#define I5100_MAX_RANK_INTERLEAVE	4
+#define I5100_MAX_DMIRS			5
+
+struct i5100_priv {
+	/* ranks on each dimm -- 0 maps to not present -- obtained via SPD */
+	int dimm_numrank[I5100_MAX_CTLRS][I5100_MAX_DIMM_SLOTS_PER_CTLR];
+
+	/*
+	 * mainboard chip select map -- maps i5100 chip selects to
+	 * DIMM slot chip selects.  In the case of only 4 ranks per
+	 * controller, the mapping is fairly obvious but not unique.
+	 * we map -1 -> NC and assume both controllers use the same
+	 * map...
+	 *
+	 */
+	int dimm_csmap[I5100_MAX_DIMM_SLOTS_PER_CTLR][I5100_MAX_RANKS_PER_DIMM];
+
+	/* memory interleave range */
+	struct {
+		u64	 limit;
+		unsigned way[2];
+	} mir[I5100_MAX_CTLRS];
+
+	/* adjusted memory interleave range register */
+	unsigned amir[I5100_MAX_CTLRS];
+
+	/* dimm interleave range */
+	struct {
+		unsigned rank[I5100_MAX_RANK_INTERLEAVE];
+		u64	 limit;
+	} dmir[I5100_MAX_CTLRS][I5100_MAX_DMIRS];
+
+	/* memory technology registers... */
+	struct {
+		unsigned present;	/* 0 or 1 */
+		unsigned ethrottle;	/* 0 or 1 */
+		unsigned width;		/* 4 or 8 bits  */
+		unsigned numbank;	/* 2 or 3 lines */
+		unsigned numrow;	/* 13 .. 16 lines */
+		unsigned numcol;	/* 11 .. 12 lines */
+	} mtr[I5100_MAX_CTLRS][I5100_MAX_RANKS_PER_CTLR];
+
+	u64 tolm;		/* top of low memory in bytes */
+	unsigned ranksperctlr;	/* number of ranks per controller */
+
+	struct pci_dev *mc;	/* device 16 func 1 */
+	struct pci_dev *ch0mm;	/* device 21 func 0 */
+	struct pci_dev *ch1mm;	/* device 22 func 0 */
+};
+
+/* map a rank/ctlr to a slot number on the mainboard */
+static int i5100_rank_to_slot(const struct mem_ctl_info *mci,
+			      int ctlr, int rank)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+	int i;
+
+	for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
+		int j;
+		const int numrank = priv->dimm_numrank[ctlr][i];
+
+		for (j = 0; j < numrank; j++)
+			if (priv->dimm_csmap[i][j] == rank)
+				return i * 2 + ctlr;
+	}
+
+	return -1;
+}
+
+/*
+ * The processor bus memory addresses are broken into three
+ * pieces, whereas the controller addresses are contiguous.
+ *
+ * here we map from the controller address space to the
+ * processor address space:
+ *
+ *    Processor Address Space
+ * +-----------------------------+
+ * |                             |
+ * |  "high" memory addresses    |
+ * |                             |
+ * +-----------------------------+ <- 4GB on the i5100
+ * |                             |
+ * |  other non-memory addresses |
+ * |                             |
+ * +-----------------------------+ <- top of low memory
+ * |                             |
+ * | "low" memory addresses      |
+ * |                             |
+ * +-----------------------------+
+ */
+static unsigned long i5100_ctl_page_to_phys(struct mem_ctl_info *mci,
+					    unsigned long cntlr_addr)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+
+	if (cntlr_addr < priv->tolm)
+		return cntlr_addr;
+
+	return (1ULL << 32) + (cntlr_addr - priv->tolm);
+}
+
+static const char *i5100_err_msg(unsigned err)
+{
+	const char *merrs[] = {
+		"unknown", /* 0 */
+		"uncorrectable data ECC on replay", /* 1 */
+		"unknown", /* 2 */
+		"unknown", /* 3 */
+		"aliased uncorrectable demand data ECC", /* 4 */
+		"aliased uncorrectable spare-copy data ECC", /* 5 */
+		"aliased uncorrectable patrol data ECC", /* 6 */
+		"unknown", /* 7 */
+		"unknown", /* 8 */
+		"unknown", /* 9 */
+		"non-aliased uncorrectable demand data ECC", /* 10 */
+		"non-aliased uncorrectable spare-copy data ECC", /* 11 */
+		"non-aliased uncorrectable patrol data ECC", /* 12 */
+		"unknown", /* 13 */
+		"correctable demand data ECC", /* 14 */
+		"correctable spare-copy data ECC", /* 15 */
+		"correctable patrol data ECC", /* 16 */
+		"unknown", /* 17 */
+		"SPD protocol error", /* 18 */
+		"unknown", /* 19 */
+		"spare copy initiated", /* 20 */
+		"spare copy completed", /* 21 */
+	};
+	unsigned i;
+
+	for (i = 0; i < ARRAY_SIZE(merrs); i++)
+		if (1 << i & err)
+			return merrs[i];
+
+	return "none";
+}
+
+/* convert csrow index into a rank (per controller -- 0..5) */
+static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+
+	return csrow % priv->ranksperctlr;
+}
+
+/* convert csrow index into a controller (0..1) */
+static int i5100_csrow_to_cntlr(const struct mem_ctl_info *mci, int csrow)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+
+	return csrow / priv->ranksperctlr;
+}
+
+static unsigned i5100_rank_to_csrow(const struct mem_ctl_info *mci,
+				    int ctlr, int rank)
+{
+	const struct i5100_priv *priv = mci->pvt_info;
+
+	return ctlr * priv->ranksperctlr + rank;
+}
+
+static void i5100_handle_ce(struct mem_ctl_info *mci,
+			    int ctlr,
+			    unsigned bank,
+			    unsigned rank,
+			    unsigned long syndrome,
+			    unsigned cas,
+			    unsigned ras,
+			    const char *msg)
+{
+	const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
+
+	printk(KERN_ERR
+		"CE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
+		"cas %u, ras %u, csrow %u, label \"%s\": %s\n",
+		ctlr, bank, rank, syndrome, cas, ras,
+		csrow, mci->csrows[csrow].channels[0].label, msg);
+
+	mci->ce_count++;
+	mci->csrows[csrow].ce_count++;
+	mci->csrows[csrow].channels[0].ce_count++;
+}
+
+static void i5100_handle_ue(struct mem_ctl_info *mci,
+			    int ctlr,
+			    unsigned bank,
+			    unsigned rank,
+			    unsigned long syndrome,
+			    unsigned cas,
+			    unsigned ras,
+			    const char *msg)
+{
+	const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
+
+	printk(KERN_ERR
+		"UE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
+		"cas %u, ras %u, csrow %u, label \"%s\": %s\n",
+		ctlr, bank, rank, syndrome, cas, ras,
+		csrow, mci->csrows[csrow].channels[0].label, msg);
+
+	mci->ue_count++;
+	mci->csrows[csrow].ue_count++;
+}
+
+static void i5100_read_log(struct mem_ctl_info *mci, int ctlr,
+			   u32 ferr, u32 nerr)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	struct pci_dev *pdev = (ctlr) ? priv->ch1mm : priv->ch0mm;
+	u32 dw;
+	u32 dw2;
+	unsigned syndrome = 0;
+	unsigned ecc_loc = 0;
+	unsigned merr;
+	unsigned bank;
+	unsigned rank;
+	unsigned cas;
+	unsigned ras;
+
+	pci_read_config_dword(pdev, I5100_VALIDLOG, &dw);
+
+	if (I5100_VALIDLOG_REDMEMVALID(dw)) {
+		pci_read_config_dword(pdev, I5100_REDMEMA, &dw2);
+		syndrome = I5100_REDMEMA_SYNDROME(dw2);
+		pci_read_config_dword(pdev, I5100_REDMEMB, &dw2);
+		ecc_loc = I5100_REDMEMB_ECC_LOCATOR(dw2);
+	}
+
+	if (I5100_VALIDLOG_RECMEMVALID(dw)) {
+		const char *msg;
+
+		pci_read_config_dword(pdev, I5100_RECMEMA, &dw2);
+		merr = I5100_RECMEMA_MERR(dw2);
+		bank = I5100_RECMEMA_BANK(dw2);
+		rank = I5100_RECMEMA_RANK(dw2);
+
+		pci_read_config_dword(pdev, I5100_RECMEMB, &dw2);
+		cas = I5100_RECMEMB_CAS(dw2);
+		ras = I5100_RECMEMB_RAS(dw2);
+
+		/* FIXME:  not really sure if this is what merr is...
+		 */
+		if (!merr)
+			msg = i5100_err_msg(ferr);
+		else
+			msg = i5100_err_msg(nerr);
+
+		i5100_handle_ce(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
+	}
+
+	if (I5100_VALIDLOG_NRECMEMVALID(dw)) {
+		const char *msg;
+
+		pci_read_config_dword(pdev, I5100_NRECMEMA, &dw2);
+		merr = I5100_NRECMEMA_MERR(dw2);
+		bank = I5100_NRECMEMA_BANK(dw2);
+		rank = I5100_NRECMEMA_RANK(dw2);
+
+		pci_read_config_dword(pdev, I5100_NRECMEMB, &dw2);
+		cas = I5100_NRECMEMB_CAS(dw2);
+		ras = I5100_NRECMEMB_RAS(dw2);
+
+		/* FIXME:  not really sure if this is what merr is...
+		 */
+		if (!merr)
+			msg = i5100_err_msg(ferr);
+		else
+			msg = i5100_err_msg(nerr);
+
+		i5100_handle_ue(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
+	}
+
+	pci_write_config_dword(pdev, I5100_VALIDLOG, dw);
+}
+
+static void i5100_check_error(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	u32 dw;
+
+
+	pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw);
+	if (I5100_FERR_NF_MEM_ANY(dw)) {
+		u32 dw2;
+
+		pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2);
+		if (dw2)
+			pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM,
+					       dw2);
+		pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
+
+		i5100_read_log(mci, I5100_FERR_NF_MEM_CHAN_INDX(dw),
+			       I5100_FERR_NF_MEM_ANY(dw),
+			       I5100_NERR_NF_MEM_ANY(dw2));
+	}
+}
+
+static struct pci_dev *pci_get_device_func(unsigned vendor,
+					   unsigned device,
+					   unsigned func)
+{
+	struct pci_dev *ret = NULL;
+
+	while (1) {
+		ret = pci_get_device(vendor, device, ret);
+
+		if (!ret)
+			break;
+
+		if (PCI_FUNC(ret->devfn) == func)
+			break;
+	}
+
+	return ret;
+}
+
+static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci,
+					    int csrow)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	const unsigned ctlr_rank = i5100_csrow_to_rank(mci, csrow);
+	const unsigned ctlr = i5100_csrow_to_cntlr(mci, csrow);
+	unsigned addr_lines;
+
+	/* dimm present? */
+	if (!priv->mtr[ctlr][ctlr_rank].present)
+		return 0ULL;
+
+	addr_lines =
+		I5100_DIMM_ADDR_LINES +
+		priv->mtr[ctlr][ctlr_rank].numcol +
+		priv->mtr[ctlr][ctlr_rank].numrow +
+		priv->mtr[ctlr][ctlr_rank].numbank;
+
+	return (unsigned long)
+		((unsigned long long) (1ULL << addr_lines) / PAGE_SIZE);
+}
+
+static void __devinit i5100_init_mtr(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
+	int i;
+
+	for (i = 0; i < I5100_MAX_CTLRS; i++) {
+		int j;
+		struct pci_dev *pdev = mms[i];
+
+		for (j = 0; j < I5100_MAX_RANKS_PER_CTLR; j++) {
+			const unsigned addr =
+				(j < 4) ? I5100_MTR_0 + j * 2 :
+					  I5100_MTR_4 + (j - 4) * 2;
+			u16 w;
+
+			pci_read_config_word(pdev, addr, &w);
+
+			priv->mtr[i][j].present = I5100_MTR_PRESENT(w);
+			priv->mtr[i][j].ethrottle = I5100_MTR_ETHROTTLE(w);
+			priv->mtr[i][j].width = 4 + 4 * I5100_MTR_WIDTH(w);
+			priv->mtr[i][j].numbank = 2 + I5100_MTR_NUMBANK(w);
+			priv->mtr[i][j].numrow = 13 + I5100_MTR_NUMROW(w);
+			priv->mtr[i][j].numcol = 10 + I5100_MTR_NUMCOL(w);
+		}
+	}
+}
+
+/*
+ * FIXME: make this into a real i2c adapter (so that dimm-decode
+ * will work)?
+ */
+static int i5100_read_spd_byte(const struct mem_ctl_info *mci,
+			       u8 ch, u8 slot, u8 addr, u8 *byte)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	u16 w;
+	u32 dw;
+	unsigned long et;
+
+	pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
+	if (I5100_SPDDATA_BUSY(w))
+		return -1;
+
+	dw =	I5100_SPDCMD_DTI(0xa) |
+		I5100_SPDCMD_CKOVRD(1) |
+		I5100_SPDCMD_SA(ch * 4 + slot) |
+		I5100_SPDCMD_BA(addr) |
+		I5100_SPDCMD_DATA(0) |
+		I5100_SPDCMD_CMD(0);
+	pci_write_config_dword(priv->mc, I5100_SPDCMD, dw);
+
+	/* wait up to 100ms */
+	et = jiffies + HZ / 10;
+	udelay(100);
+	while (1) {
+		pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
+		if (!I5100_SPDDATA_BUSY(w))
+			break;
+		udelay(100);
+	}
+
+	if (!I5100_SPDDATA_RDO(w) || I5100_SPDDATA_SBE(w))
+		return -1;
+
+	*byte = I5100_SPDDATA_DATA(w);
+
+	return 0;
+}
+
+/*
+ * fill dimm chip select map
+ *
+ * FIXME:
+ *   o only valid for 4 ranks per controller
+ *   o not the only way to may chip selects to dimm slots
+ *   o investigate if there is some way to obtain this map from the bios
+ */
+static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	int i;
+
+	WARN_ON(priv->ranksperctlr != 4);
+
+	for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
+		int j;
+
+		for (j = 0; j < I5100_MAX_RANKS_PER_DIMM; j++)
+			priv->dimm_csmap[i][j] = -1; /* default NC */
+	}
+
+	/* only 2 chip selects per slot... */
+	priv->dimm_csmap[0][0] = 0;
+	priv->dimm_csmap[0][1] = 3;
+	priv->dimm_csmap[1][0] = 1;
+	priv->dimm_csmap[1][1] = 2;
+	priv->dimm_csmap[2][0] = 2;
+	priv->dimm_csmap[3][0] = 3;
+}
+
+static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev,
+					     struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	int i;
+
+	for (i = 0; i < I5100_MAX_CTLRS; i++) {
+		int j;
+
+		for (j = 0; j < I5100_MAX_DIMM_SLOTS_PER_CTLR; j++) {
+			u8 rank;
+
+			if (i5100_read_spd_byte(mci, i, j, 5, &rank) < 0)
+				priv->dimm_numrank[i][j] = 0;
+			else
+				priv->dimm_numrank[i][j] = (rank & 3) + 1;
+		}
+	}
+
+	i5100_init_dimm_csmap(mci);
+}
+
+static void __devinit i5100_init_interleaving(struct pci_dev *pdev,
+					      struct mem_ctl_info *mci)
+{
+	u16 w;
+	u32 dw;
+	struct i5100_priv *priv = mci->pvt_info;
+	struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
+	int i;
+
+	pci_read_config_word(pdev, I5100_TOLM, &w);
+	priv->tolm = (u64) I5100_TOLM_TOLM(w) * 256 * 1024 * 1024;
+
+	pci_read_config_word(pdev, I5100_MIR0, &w);
+	priv->mir[0].limit = (u64) I5100_MIR_LIMIT(w) << 28;
+	priv->mir[0].way[1] = I5100_MIR_WAY1(w);
+	priv->mir[0].way[0] = I5100_MIR_WAY0(w);
+
+	pci_read_config_word(pdev, I5100_MIR1, &w);
+	priv->mir[1].limit = (u64) I5100_MIR_LIMIT(w) << 28;
+	priv->mir[1].way[1] = I5100_MIR_WAY1(w);
+	priv->mir[1].way[0] = I5100_MIR_WAY0(w);
+
+	pci_read_config_word(pdev, I5100_AMIR_0, &w);
+	priv->amir[0] = w;
+	pci_read_config_word(pdev, I5100_AMIR_1, &w);
+	priv->amir[1] = w;
+
+	for (i = 0; i < I5100_MAX_CTLRS; i++) {
+		int j;
+
+		for (j = 0; j < 5; j++) {
+			int k;
+
+			pci_read_config_dword(mms[i], I5100_DMIR + j * 4, &dw);
+
+			priv->dmir[i][j].limit =
+				(u64) I5100_DMIR_LIMIT(dw) << 28;
+			for (k = 0; k < I5100_MAX_RANKS_PER_DIMM; k++)
+				priv->dmir[i][j].rank[k] =
+					I5100_DMIR_RANK(dw, k);
+		}
+	}
+
+	i5100_init_mtr(mci);
+}
+
+static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
+{
+	int i;
+	unsigned long total_pages = 0UL;
+	struct i5100_priv *priv = mci->pvt_info;
+
+	for (i = 0; i < mci->nr_csrows; i++) {
+		const unsigned long npages = i5100_npages(mci, i);
+		const unsigned cntlr = i5100_csrow_to_cntlr(mci, i);
+		const unsigned rank = i5100_csrow_to_rank(mci, i);
+
+		if (!npages)
+			continue;
+
+		/*
+		 * FIXME: these two are totally bogus -- I don't see how to
+		 * map them correctly to this structure...
+		 */
+		mci->csrows[i].first_page = total_pages;
+		mci->csrows[i].last_page = total_pages + npages - 1;
+		mci->csrows[i].page_mask = 0UL;
+
+		mci->csrows[i].nr_pages = npages;
+		mci->csrows[i].grain = 32;
+		mci->csrows[i].csrow_idx = i;
+		mci->csrows[i].dtype =
+			(priv->mtr[cntlr][rank].width == 4) ? DEV_X4 : DEV_X8;
+		mci->csrows[i].ue_count = 0;
+		mci->csrows[i].ce_count = 0;
+		mci->csrows[i].mtype = MEM_RDDR2;
+		mci->csrows[i].edac_mode = EDAC_SECDED;
+		mci->csrows[i].mci = mci;
+		mci->csrows[i].nr_channels = 1;
+		mci->csrows[i].channels[0].chan_idx = 0;
+		mci->csrows[i].channels[0].ce_count = 0;
+		mci->csrows[i].channels[0].csrow = mci->csrows + i;
+		snprintf(mci->csrows[i].channels[0].label,
+			 sizeof(mci->csrows[i].channels[0].label),
+			 "DIMM%u", i5100_rank_to_slot(mci, cntlr, rank));
+
+		total_pages += npages;
+	}
+}
+
+static int __devinit i5100_init_one(struct pci_dev *pdev,
+				    const struct pci_device_id *id)
+{
+	int rc;
+	struct mem_ctl_info *mci;
+	struct i5100_priv *priv;
+	struct pci_dev *ch0mm, *ch1mm;
+	int ret = 0;
+	u32 dw;
+	int ranksperch;
+
+	if (PCI_FUNC(pdev->devfn) != 1)
+		return -ENODEV;
+
+	rc = pci_enable_device(pdev);
+	if (rc < 0) {
+		ret = rc;
+		goto bail;
+	}
+
+	/* figure out how many ranks, from strapped state of 48GB_Mode input */
+	pci_read_config_dword(pdev, I5100_MS, &dw);
+	ranksperch = !!(dw & (1 << 8)) * 2 + 4;
+
+	if (ranksperch != 4) {
+		/* FIXME: get 6 ranks / controller to work - need hw... */
+		printk(KERN_INFO "i5100_edac: unsupported configuration.\n");
+		ret = -ENODEV;
+		goto bail;
+	}
+
+	/* device 21, func 0, Channel 0 Memory Map, Error Flag/Mask, etc... */
+	ch0mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+				    PCI_DEVICE_ID_INTEL_5100_21, 0);
+	if (!ch0mm)
+		return -ENODEV;
+
+	rc = pci_enable_device(ch0mm);
+	if (rc < 0) {
+		ret = rc;
+		goto bail_ch0;
+	}
+
+	/* device 22, func 0, Channel 1 Memory Map, Error Flag/Mask, etc... */
+	ch1mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+				    PCI_DEVICE_ID_INTEL_5100_22, 0);
+	if (!ch1mm) {
+		ret = -ENODEV;
+		goto bail_ch0;
+	}
+
+	rc = pci_enable_device(ch1mm);
+	if (rc < 0) {
+		ret = rc;
+		goto bail_ch1;
+	}
+
+	mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0);
+	if (!mci) {
+		ret = -ENOMEM;
+		goto bail_ch1;
+	}
+
+	mci->dev = &pdev->dev;
+
+	priv = mci->pvt_info;
+	priv->ranksperctlr = ranksperch;
+	priv->mc = pdev;
+	priv->ch0mm = ch0mm;
+	priv->ch1mm = ch1mm;
+
+	i5100_init_dimm_layout(pdev, mci);
+	i5100_init_interleaving(pdev, mci);
+
+	mci->mtype_cap = MEM_FLAG_FB_DDR2;
+	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+	mci->edac_cap = EDAC_FLAG_SECDED;
+	mci->mod_name = "i5100_edac.c";
+	mci->mod_ver = "not versioned";
+	mci->ctl_name = "i5100";
+	mci->dev_name = pci_name(pdev);
+	mci->ctl_page_to_phys = i5100_ctl_page_to_phys;
+
+	mci->edac_check = i5100_check_error;
+
+	i5100_init_csrows(mci);
+
+	/* this strange construction seems to be in every driver, dunno why */
+	switch (edac_op_state) {
+	case EDAC_OPSTATE_POLL:
+	case EDAC_OPSTATE_NMI:
+		break;
+	default:
+		edac_op_state = EDAC_OPSTATE_POLL;
+		break;
+	}
+
+	if (edac_mc_add_mc(mci)) {
+		ret = -ENODEV;
+		goto bail_mc;
+	}
+
+	goto bail;
+
+bail_mc:
+	edac_mc_free(mci);
+
+bail_ch1:
+	pci_dev_put(ch1mm);
+
+bail_ch0:
+	pci_dev_put(ch0mm);
+
+bail:
+	return ret;
+}
+
+static void __devexit i5100_remove_one(struct pci_dev *pdev)
+{
+	struct mem_ctl_info *mci;
+	struct i5100_priv *priv;
+
+	mci = edac_mc_del_mc(&pdev->dev);
+
+	if (!mci)
+		return;
+
+	priv = mci->pvt_info;
+	pci_dev_put(priv->ch0mm);
+	pci_dev_put(priv->ch1mm);
+
+	edac_mc_free(mci);
+}
+
+static const struct pci_device_id i5100_pci_tbl[] __devinitdata = {
+	/* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, i5100_pci_tbl);
+
+static struct pci_driver i5100_driver = {
+	.name = KBUILD_BASENAME,
+	.probe = i5100_init_one,
+	.remove = __devexit_p(i5100_remove_one),
+	.id_table = i5100_pci_tbl,
+};
+
+static int __init i5100_init(void)
+{
+	int pci_rc;
+
+	pci_rc = pci_register_driver(&i5100_driver);
+
+	return (pci_rc < 0) ? pci_rc : 0;
+}
+
+static void __exit i5100_exit(void)
+{
+	pci_unregister_driver(&i5100_driver);
+}
+
+module_init(i5100_init);
+module_exit(i5100_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR
+    ("Arthur Jones <ajones@riverbed.com>");
+MODULE_DESCRIPTION("MC Driver for Intel I5100 memory controllers");
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 119ae7b8f028..c3b1761aba26 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2400,6 +2400,9 @@
 #define PCI_DEVICE_ID_INTEL_ICH10_4	0x3a30
 #define PCI_DEVICE_ID_INTEL_ICH10_5	0x3a60
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
+#define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
+#define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
+#define PCI_DEVICE_ID_INTEL_5100_22	0x65f6
 #define PCI_DEVICE_ID_INTEL_5400_ERR	0x4030
 #define PCI_DEVICE_ID_INTEL_5400_FBD0	0x4035
 #define PCI_DEVICE_ID_INTEL_5400_FBD1	0x4036
-- 
cgit v1.2.3


From 7dcf2a9fced59e58e4694cdcf15850c01fdba89b Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 1 Jul 2008 19:27:16 +0300
Subject: remove dummy asm/kvm.h files

This patch removes the dummy asm/kvm.h files on architectures not (yet)
supporting KVM and uses the same conditional headers installation as
already used for a.out.h .

Also removed are superfluous install rules in the s390 and x86 Kbuild
files (they are already in Kbuild.asm).

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/asm-alpha/kvm.h        | 6 ------
 include/asm-arm/kvm.h          | 6 ------
 include/asm-avr32/kvm.h        | 6 ------
 include/asm-blackfin/kvm.h     | 6 ------
 include/asm-cris/kvm.h         | 6 ------
 include/asm-frv/kvm.h          | 6 ------
 include/asm-generic/Kbuild.asm | 2 ++
 include/asm-h8300/kvm.h        | 6 ------
 include/asm-m32r/kvm.h         | 6 ------
 include/asm-m68k/kvm.h         | 6 ------
 include/asm-m68knommu/kvm.h    | 6 ------
 include/asm-mips/kvm.h         | 6 ------
 include/asm-mn10300/kvm.h      | 6 ------
 include/asm-parisc/kvm.h       | 6 ------
 include/asm-s390/Kbuild        | 1 -
 include/asm-sh/kvm.h           | 6 ------
 include/asm-sparc/kvm.h        | 6 ------
 include/asm-sparc64/kvm.h      | 1 -
 include/asm-um/kvm.h           | 6 ------
 include/asm-x86/Kbuild         | 1 -
 include/asm-xtensa/kvm.h       | 6 ------
 include/linux/Kbuild           | 2 ++
 22 files changed, 4 insertions(+), 105 deletions(-)
 delete mode 100644 include/asm-alpha/kvm.h
 delete mode 100644 include/asm-arm/kvm.h
 delete mode 100644 include/asm-avr32/kvm.h
 delete mode 100644 include/asm-blackfin/kvm.h
 delete mode 100644 include/asm-cris/kvm.h
 delete mode 100644 include/asm-frv/kvm.h
 delete mode 100644 include/asm-h8300/kvm.h
 delete mode 100644 include/asm-m32r/kvm.h
 delete mode 100644 include/asm-m68k/kvm.h
 delete mode 100644 include/asm-m68knommu/kvm.h
 delete mode 100644 include/asm-mips/kvm.h
 delete mode 100644 include/asm-mn10300/kvm.h
 delete mode 100644 include/asm-parisc/kvm.h
 delete mode 100644 include/asm-sh/kvm.h
 delete mode 100644 include/asm-sparc/kvm.h
 delete mode 100644 include/asm-sparc64/kvm.h
 delete mode 100644 include/asm-um/kvm.h
 delete mode 100644 include/asm-xtensa/kvm.h

(limited to 'include/linux')

diff --git a/include/asm-alpha/kvm.h b/include/asm-alpha/kvm.h
deleted file mode 100644
index b9daec429689..000000000000
--- a/include/asm-alpha/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_ALPHA_H
-#define __LINUX_KVM_ALPHA_H
-
-/* alpha does not support KVM */
-
-#endif
diff --git a/include/asm-arm/kvm.h b/include/asm-arm/kvm.h
deleted file mode 100644
index cb3c08cbcb9e..000000000000
--- a/include/asm-arm/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_ARM_H
-#define __LINUX_KVM_ARM_H
-
-/* arm does not support KVM */
-
-#endif
diff --git a/include/asm-avr32/kvm.h b/include/asm-avr32/kvm.h
deleted file mode 100644
index 8c5777020e2c..000000000000
--- a/include/asm-avr32/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_AVR32_H
-#define __LINUX_KVM_AVR32_H
-
-/* avr32 does not support KVM */
-
-#endif
diff --git a/include/asm-blackfin/kvm.h b/include/asm-blackfin/kvm.h
deleted file mode 100644
index e3477d77c014..000000000000
--- a/include/asm-blackfin/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_BLACKFIN_H
-#define __LINUX_KVM_BLACKFIN_H
-
-/* blackfin does not support KVM */
-
-#endif
diff --git a/include/asm-cris/kvm.h b/include/asm-cris/kvm.h
deleted file mode 100644
index c860f51149f0..000000000000
--- a/include/asm-cris/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_CRIS_H
-#define __LINUX_KVM_CRIS_H
-
-/* cris does not support KVM */
-
-#endif
diff --git a/include/asm-frv/kvm.h b/include/asm-frv/kvm.h
deleted file mode 100644
index 9c8a4f08d0a9..000000000000
--- a/include/asm-frv/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_FRV_H
-#define __LINUX_KVM_FRV_H
-
-/* frv does not support KVM */
-
-#endif
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index 7cd25b8e7c9a..1170dc60e638 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -1,4 +1,6 @@
+ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
 header-y  += kvm.h
+endif
 
 ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/a.out.h),)
 unifdef-y += a.out.h
diff --git a/include/asm-h8300/kvm.h b/include/asm-h8300/kvm.h
deleted file mode 100644
index bdbed7b987e1..000000000000
--- a/include/asm-h8300/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_H8300_H
-#define __LINUX_KVM_H8300_H
-
-/* h8300 does not support KVM */
-
-#endif
diff --git a/include/asm-m32r/kvm.h b/include/asm-m32r/kvm.h
deleted file mode 100644
index 99a40515b77e..000000000000
--- a/include/asm-m32r/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_M32R_H
-#define __LINUX_KVM_M32R_H
-
-/* m32r does not support KVM */
-
-#endif
diff --git a/include/asm-m68k/kvm.h b/include/asm-m68k/kvm.h
deleted file mode 100644
index 7ed27fce5240..000000000000
--- a/include/asm-m68k/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_M68K_H
-#define __LINUX_KVM_M68K_H
-
-/* m68k does not support KVM */
-
-#endif
diff --git a/include/asm-m68knommu/kvm.h b/include/asm-m68knommu/kvm.h
deleted file mode 100644
index b49d4258dabb..000000000000
--- a/include/asm-m68knommu/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_M68KNOMMU_H
-#define __LINUX_KVM_M68KNOMMU_H
-
-/* m68knommu does not support KVM */
-
-#endif
diff --git a/include/asm-mips/kvm.h b/include/asm-mips/kvm.h
deleted file mode 100644
index 093a5b7f796b..000000000000
--- a/include/asm-mips/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_MIPS_H
-#define __LINUX_KVM_MIPS_H
-
-/* mips does not support KVM */
-
-#endif
diff --git a/include/asm-mn10300/kvm.h b/include/asm-mn10300/kvm.h
deleted file mode 100644
index f6b609ff4a57..000000000000
--- a/include/asm-mn10300/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_MN10300_H
-#define __LINUX_KVM_MN10300_H
-
-/* mn10300 does not support KVM */
-
-#endif
diff --git a/include/asm-parisc/kvm.h b/include/asm-parisc/kvm.h
deleted file mode 100644
index 00cc45812547..000000000000
--- a/include/asm-parisc/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_PARISC_H
-#define __LINUX_KVM_PARISC_H
-
-/* parisc does not support KVM */
-
-#endif
diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild
index bb5e9edb9825..63a23415fba6 100644
--- a/include/asm-s390/Kbuild
+++ b/include/asm-s390/Kbuild
@@ -7,7 +7,6 @@ header-y += tape390.h
 header-y += ucontext.h
 header-y += vtoc.h
 header-y += zcrypt.h
-header-y += kvm.h
 header-y += chsc.h
 
 unifdef-y += cmb.h
diff --git a/include/asm-sh/kvm.h b/include/asm-sh/kvm.h
deleted file mode 100644
index 6af51dbab2d0..000000000000
--- a/include/asm-sh/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_SH_H
-#define __LINUX_KVM_SH_H
-
-/* sh does not support KVM */
-
-#endif
diff --git a/include/asm-sparc/kvm.h b/include/asm-sparc/kvm.h
deleted file mode 100644
index 2e5478da3819..000000000000
--- a/include/asm-sparc/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_SPARC_H
-#define __LINUX_KVM_SPARC_H
-
-/* sparc does not support KVM */
-
-#endif
diff --git a/include/asm-sparc64/kvm.h b/include/asm-sparc64/kvm.h
deleted file mode 100644
index 53564ad86b15..000000000000
--- a/include/asm-sparc64/kvm.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-sparc/kvm.h>
diff --git a/include/asm-um/kvm.h b/include/asm-um/kvm.h
deleted file mode 100644
index 66aa77094551..000000000000
--- a/include/asm-um/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_UM_H
-#define __LINUX_KVM_UM_H
-
-/* um does not support KVM */
-
-#endif
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index 1e3554596f72..811e9828ccb3 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -3,7 +3,6 @@ include include/asm-generic/Kbuild.asm
 header-y += boot.h
 header-y += bootparam.h
 header-y += debugreg.h
-header-y += kvm.h
 header-y += ldt.h
 header-y += msr-index.h
 header-y += prctl.h
diff --git a/include/asm-xtensa/kvm.h b/include/asm-xtensa/kvm.h
deleted file mode 100644
index bda4e331e98c..000000000000
--- a/include/asm-xtensa/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_XTENSA_H
-#define __LINUX_KVM_XTENSA_H
-
-/* xtensa does not support KVM */
-
-#endif
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 71d70d1fbce2..402c8f55d713 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -256,7 +256,9 @@ unifdef-y += kd.h
 unifdef-y += kernelcapi.h
 unifdef-y += kernel.h
 unifdef-y += keyboard.h
+ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
 unifdef-y += kvm.h
+endif
 unifdef-y += llc.h
 unifdef-y += loop.h
 unifdef-y += lp.h
-- 
cgit v1.2.3


From c6af5e9f8a57467df2e55e428316a43480174521 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@saeurebad.de>
Date: Fri, 25 Jul 2008 15:48:04 +0200
Subject: bootmem: Move node allocation macros back to !HAVE_ARCH_BOOTMEM_NODE

These got unintentionally moved, put them back as x86 provides its own
versions.

Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 4ddf2922fc8d..652470b687c9 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -103,17 +103,16 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 	__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages(x) \
 	__alloc_bootmem_low(x, PAGE_SIZE, 0)
-#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-
-extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
-				   int flags);
-
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages_node(pgdat, x) \
 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
+#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
+
+extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
+				   int flags);
 
 extern void *alloc_bootmem_section(unsigned long size,
 				   unsigned long section_nr);
-- 
cgit v1.2.3


From b4615e69b6c6353878b734a8202b65efbc554df4 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 25 Jul 2008 13:19:22 -0700
Subject: sys_paccept definition missing __user annotation

Introduced by commit aaca0bdca573f3f51ea03139f9c7289541e7bca3 ("flag
parameters: paccept"):

  net/socket.c:1515:17: error: symbol 'sys_paccept' redeclared with different type (originally declared at include/linux/syscalls.h:413) - incompatible argument 4 (different address spaces)

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 06f2bf76c030..d6ff145919ca 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -411,7 +411,7 @@ asmlinkage long sys_bind(int, struct sockaddr __user *, int);
 asmlinkage long sys_connect(int, struct sockaddr __user *, int);
 asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
-			    const sigset_t *, size_t, int);
+			    const __user sigset_t *, size_t, int);
 asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
-- 
cgit v1.2.3


From 3f07af494dfa6de43137dae430431c9fbf929c0c Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Fri, 25 Jul 2008 22:25:13 -0400
Subject: of: adapt of_find_i2c_driver() to be usable by SPI also

SPI has a similar problem as I2C in that it needs to determine an
appropriate modalias value for each device node.  This patch adapts
the of_i2c of_find_i2c_driver() function to be usable by of_spi also.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/base.c   | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/of/of_i2c.c | 64 ++------------------------------------
 include/linux/of.h  |  1 +
 3 files changed, 92 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 23ffb7c0caf2..ad8ac1a8af28 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -385,3 +385,91 @@ struct device_node *of_find_matching_node(struct device_node *from,
 	return np;
 }
 EXPORT_SYMBOL(of_find_matching_node);
+
+/**
+ * of_modalias_table: Table of explicit compatible ==> modalias mappings
+ *
+ * This table allows particulare compatible property values to be mapped
+ * to modalias strings.  This is useful for busses which do not directly
+ * understand the OF device tree but are populated based on data contained
+ * within the device tree.  SPI and I2C are the two current users of this
+ * table.
+ *
+ * In most cases, devices do not need to be listed in this table because
+ * the modalias value can be derived directly from the compatible table.
+ * However, if for any reason a value cannot be derived, then this table
+ * provides a method to override the implicit derivation.
+ *
+ * At the moment, a single table is used for all bus types because it is
+ * assumed that the data size is small and that the compatible values
+ * should already be distinct enough to differentiate between SPI, I2C
+ * and other devices.
+ */
+struct of_modalias_table {
+	char *of_device;
+	char *modalias;
+};
+static struct of_modalias_table of_modalias_table[] = {
+	/* Empty for now; add entries as needed */
+};
+
+/**
+ * of_modalias_node - Lookup appropriate modalias for a device node
+ * @node:	pointer to a device tree node
+ * @modalias:	Pointer to buffer that modalias value will be copied into
+ * @len:	Length of modalias value
+ *
+ * Based on the value of the compatible property, this routine will determine
+ * an appropriate modalias value for a particular device tree node.  Three
+ * separate methods are used to derive a modalias value.
+ *
+ * First method is to lookup the compatible value in of_modalias_table.
+ * Second is to look for a "linux,<modalias>" entry in the compatible list
+ * and used that for modalias.  Third is to strip off the manufacturer
+ * prefix from the first compatible entry and use the remainder as modalias
+ *
+ * This routine returns 0 on success
+ */
+int of_modalias_node(struct device_node *node, char *modalias, int len)
+{
+	int i, cplen;
+	const char *compatible;
+	const char *p;
+
+	/* 1. search for exception list entry */
+	for (i = 0; i < ARRAY_SIZE(of_modalias_table); i++) {
+		compatible = of_modalias_table[i].of_device;
+		if (!of_device_is_compatible(node, compatible))
+			continue;
+		strlcpy(modalias, of_modalias_table[i].modalias, len);
+		return 0;
+	}
+
+	compatible = of_get_property(node, "compatible", &cplen);
+	if (!compatible)
+		return -ENODEV;
+
+	/* 2. search for linux,<modalias> entry */
+	p = compatible;
+	while (cplen > 0) {
+		if (!strncmp(p, "linux,", 6)) {
+			p += 6;
+			strlcpy(modalias, p, len);
+			return 0;
+		}
+
+		i = strlen(p) + 1;
+		p += i;
+		cplen -= i;
+	}
+
+	/* 3. take first compatible entry and strip manufacturer */
+	p = strchr(compatible, ',');
+	if (!p)
+		return -ENODEV;
+	p++;
+	strlcpy(modalias, p, len);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_modalias_node);
+
diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c
index 344e1b03dd8b..6a98dc8aa30b 100644
--- a/drivers/of/of_i2c.c
+++ b/drivers/of/of_i2c.c
@@ -16,62 +16,6 @@
 #include <linux/of_i2c.h>
 #include <linux/module.h>
 
-struct i2c_driver_device {
-	char    *of_device;
-	char    *i2c_type;
-};
-
-static struct i2c_driver_device i2c_devices[] = {
-};
-
-static int of_find_i2c_driver(struct device_node *node,
-			      struct i2c_board_info *info)
-{
-	int i, cplen;
-	const char *compatible;
-	const char *p;
-
-	/* 1. search for exception list entry */
-	for (i = 0; i < ARRAY_SIZE(i2c_devices); i++) {
-		if (!of_device_is_compatible(node, i2c_devices[i].of_device))
-			continue;
-		if (strlcpy(info->type, i2c_devices[i].i2c_type,
-			    I2C_NAME_SIZE) >= I2C_NAME_SIZE)
-			return -ENOMEM;
-
-		return 0;
-	}
-
-	compatible = of_get_property(node, "compatible", &cplen);
-	if (!compatible)
-		return -ENODEV;
-
-	/* 2. search for linux,<i2c-type> entry */
-	p = compatible;
-	while (cplen > 0) {
-		if (!strncmp(p, "linux,", 6)) {
-			p += 6;
-			if (strlcpy(info->type, p,
-				    I2C_NAME_SIZE) >= I2C_NAME_SIZE)
-				return -ENOMEM;
-			return 0;
-		}
-
-		i = strlen(p) + 1;
-		p += i;
-		cplen -= i;
-	}
-
-	/* 3. take fist compatible entry and strip manufacturer */
-	p = strchr(compatible, ',');
-	if (!p)
-		return -ENODEV;
-	p++;
-	if (strlcpy(info->type, p, I2C_NAME_SIZE) >= I2C_NAME_SIZE)
-		return -ENOMEM;
-	return 0;
-}
-
 void of_register_i2c_devices(struct i2c_adapter *adap,
 			     struct device_node *adap_node)
 {
@@ -83,6 +27,9 @@ void of_register_i2c_devices(struct i2c_adapter *adap,
 		const u32 *addr;
 		int len;
 
+		if (of_modalias_node(node, info.type, sizeof(info.type)) < 0)
+			continue;
+
 		addr = of_get_property(node, "reg", &len);
 		if (!addr || len < sizeof(int) || *addr > (1 << 10) - 1) {
 			printk(KERN_ERR
@@ -92,11 +39,6 @@ void of_register_i2c_devices(struct i2c_adapter *adap,
 
 		info.irq = irq_of_parse_and_map(node, 0);
 
-		if (of_find_i2c_driver(node, &info) < 0) {
-			irq_dispose_mapping(info.irq);
-			continue;
-		}
-
 		info.addr = *addr;
 
 		request_module(info.type);
diff --git a/include/linux/of.h b/include/linux/of.h
index 59a61bdc98b6..79886ade070f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -70,5 +70,6 @@ extern int of_n_addr_cells(struct device_node *np);
 extern int of_n_size_cells(struct device_node *np);
 extern const struct of_device_id *of_match_node(
 	const struct of_device_id *matches, const struct device_node *node);
+extern int of_modalias_node(struct device_node *node, char *modalias, int len);
 
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From dc87c98e8f635a718f1abb2c3e15fc77c0001651 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 15 May 2008 16:50:22 -0600
Subject: spi: split up spi_new_device() to allow two stage registration.

spi_new_device() allocates and registers an spi device all in one swoop.
If the driver needs to add extra data to the spi_device before it is
registered, then this causes problems.  This is needed for OF device
tree support so that the SPI device tree helper can add a pointer to
the device node after the device is allocated, but before the device
is registered.  OF aware SPI devices can then retrieve data out of the
device node to populate a platform data structure.

This patch splits the allocation and registration portions of code out
of spi_new_device() and creates two new functions; spi_alloc_device()
and spi_register_device().  spi_new_device() is modified to use the new
functions for allocation and registration.  None of the existing users
of spi_new_device() should be affected by this change.

Drivers using the new API can forego the use of spi_board_info
structure to describe the device layout and populate data into the
spi_device structure directly.

This change is in preparation for adding an OF device tree parser to
generate spi_devices based on data in the device tree.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
---
 drivers/spi/spi.c       | 139 +++++++++++++++++++++++++++++++++---------------
 include/linux/spi/spi.h |  12 +++++
 2 files changed, 107 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ecca4a6a6f94..964124b60db2 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -178,6 +178,96 @@ struct boardinfo {
 static LIST_HEAD(board_list);
 static DEFINE_MUTEX(board_lock);
 
+/**
+ * spi_alloc_device - Allocate a new SPI device
+ * @master: Controller to which device is connected
+ * Context: can sleep
+ *
+ * Allows a driver to allocate and initialize a spi_device without
+ * registering it immediately.  This allows a driver to directly
+ * fill the spi_device with device parameters before calling
+ * spi_add_device() on it.
+ *
+ * Caller is responsible to call spi_add_device() on the returned
+ * spi_device structure to add it to the SPI master.  If the caller
+ * needs to discard the spi_device without adding it, then it should
+ * call spi_dev_put() on it.
+ *
+ * Returns a pointer to the new device, or NULL.
+ */
+struct spi_device *spi_alloc_device(struct spi_master *master)
+{
+	struct spi_device	*spi;
+	struct device		*dev = master->dev.parent;
+
+	if (!spi_master_get(master))
+		return NULL;
+
+	spi = kzalloc(sizeof *spi, GFP_KERNEL);
+	if (!spi) {
+		dev_err(dev, "cannot alloc spi_device\n");
+		spi_master_put(master);
+		return NULL;
+	}
+
+	spi->master = master;
+	spi->dev.parent = dev;
+	spi->dev.bus = &spi_bus_type;
+	spi->dev.release = spidev_release;
+	device_initialize(&spi->dev);
+	return spi;
+}
+EXPORT_SYMBOL_GPL(spi_alloc_device);
+
+/**
+ * spi_add_device - Add spi_device allocated with spi_alloc_device
+ * @spi: spi_device to register
+ *
+ * Companion function to spi_alloc_device.  Devices allocated with
+ * spi_alloc_device can be added onto the spi bus with this function.
+ *
+ * Returns 0 on success; non-zero on failure
+ */
+int spi_add_device(struct spi_device *spi)
+{
+	struct device *dev = spi->master->dev.parent;
+	int status;
+
+	/* Chipselects are numbered 0..max; validate. */
+	if (spi->chip_select >= spi->master->num_chipselect) {
+		dev_err(dev, "cs%d >= max %d\n",
+			spi->chip_select,
+			spi->master->num_chipselect);
+		return -EINVAL;
+	}
+
+	/* Set the bus ID string */
+	snprintf(spi->dev.bus_id, sizeof spi->dev.bus_id,
+			"%s.%u", spi->master->dev.bus_id,
+			spi->chip_select);
+
+	/* drivers may modify this initial i/o setup */
+	status = spi->master->setup(spi);
+	if (status < 0) {
+		dev_err(dev, "can't %s %s, status %d\n",
+				"setup", spi->dev.bus_id, status);
+		return status;
+	}
+
+	/* driver core catches callers that misbehave by defining
+	 * devices that already exist.
+	 */
+	status = device_add(&spi->dev);
+	if (status < 0) {
+		dev_err(dev, "can't %s %s, status %d\n",
+				"add", spi->dev.bus_id, status);
+		return status;
+	}
+
+	dev_dbg(dev, "registered child %s\n", spi->dev.bus_id);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spi_add_device);
 
 /**
  * spi_new_device - instantiate one new SPI device
@@ -197,7 +287,6 @@ struct spi_device *spi_new_device(struct spi_master *master,
 				  struct spi_board_info *chip)
 {
 	struct spi_device	*proxy;
-	struct device		*dev = master->dev.parent;
 	int			status;
 
 	/* NOTE:  caller did any chip->bus_num checks necessary.
@@ -207,66 +296,28 @@ struct spi_device *spi_new_device(struct spi_master *master,
 	 * suggests syslogged diagnostics are best here (ugh).
 	 */
 
-	/* Chipselects are numbered 0..max; validate. */
-	if (chip->chip_select >= master->num_chipselect) {
-		dev_err(dev, "cs%d > max %d\n",
-			chip->chip_select,
-			master->num_chipselect);
-		return NULL;
-	}
-
-	if (!spi_master_get(master))
+	proxy = spi_alloc_device(master);
+	if (!proxy)
 		return NULL;
 
 	WARN_ON(strlen(chip->modalias) >= sizeof(proxy->modalias));
 
-	proxy = kzalloc(sizeof *proxy, GFP_KERNEL);
-	if (!proxy) {
-		dev_err(dev, "can't alloc dev for cs%d\n",
-			chip->chip_select);
-		goto fail;
-	}
-	proxy->master = master;
 	proxy->chip_select = chip->chip_select;
 	proxy->max_speed_hz = chip->max_speed_hz;
 	proxy->mode = chip->mode;
 	proxy->irq = chip->irq;
 	strlcpy(proxy->modalias, chip->modalias, sizeof(proxy->modalias));
-
-	snprintf(proxy->dev.bus_id, sizeof proxy->dev.bus_id,
-			"%s.%u", master->dev.bus_id,
-			chip->chip_select);
-	proxy->dev.parent = dev;
-	proxy->dev.bus = &spi_bus_type;
 	proxy->dev.platform_data = (void *) chip->platform_data;
 	proxy->controller_data = chip->controller_data;
 	proxy->controller_state = NULL;
-	proxy->dev.release = spidev_release;
 
-	/* drivers may modify this initial i/o setup */
-	status = master->setup(proxy);
+	status = spi_add_device(proxy);
 	if (status < 0) {
-		dev_err(dev, "can't %s %s, status %d\n",
-				"setup", proxy->dev.bus_id, status);
-		goto fail;
+		spi_dev_put(proxy);
+		return NULL;
 	}
 
-	/* driver core catches callers that misbehave by defining
-	 * devices that already exist.
-	 */
-	status = device_register(&proxy->dev);
-	if (status < 0) {
-		dev_err(dev, "can't %s %s, status %d\n",
-				"add", proxy->dev.bus_id, status);
-		goto fail;
-	}
-	dev_dbg(dev, "registered child %s\n", proxy->dev.bus_id);
 	return proxy;
-
-fail:
-	spi_master_put(master);
-	kfree(proxy);
-	return NULL;
 }
 EXPORT_SYMBOL_GPL(spi_new_device);
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index a9cc29d46653..4be01bb44377 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -778,7 +778,19 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
  * use spi_new_device() to describe each device.  You can also call
  * spi_unregister_device() to start making that device vanish, but
  * normally that would be handled by spi_unregister_master().
+ *
+ * You can also use spi_alloc_device() and spi_add_device() to use a two
+ * stage registration sequence for each spi_device.  This gives the caller
+ * some more control over the spi_device structure before it is registered,
+ * but requires that caller to initialize fields that would otherwise
+ * be defined using the board info.
  */
+extern struct spi_device *
+spi_alloc_device(struct spi_master *master);
+
+extern int
+spi_add_device(struct spi_device *spi);
+
 extern struct spi_device *
 spi_new_device(struct spi_master *, struct spi_board_info *);
 
-- 
cgit v1.2.3


From 284b01897340974000bcc84de87a4e1becc8a83d Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Fri, 16 May 2008 11:37:09 -0600
Subject: spi: Add OF binding support for SPI busses

This patch adds support for populating an SPI bus based on data in the
OF device tree.  This is useful for powerpc platforms which use the
device tree instead of discrete code for describing platform layout.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/Kconfig     |  6 ++++
 drivers/of/Makefile    |  1 +
 drivers/of/of_spi.c    | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_spi.h | 18 ++++++++++
 4 files changed, 118 insertions(+)
 create mode 100644 drivers/of/of_spi.c
 create mode 100644 include/linux/of_spi.h

(limited to 'include/linux')

diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 1d7ec3129349..f821dbc952a4 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -13,3 +13,9 @@ config OF_I2C
 	depends on PPC_OF && I2C
 	help
 	  OpenFirmware I2C accessors
+
+config OF_SPI
+	def_tristate SPI
+	depends on OF && PPC_OF && SPI
+	help
+	  OpenFirmware SPI accessors
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 548772e871fd..4c3c6f8e36f5 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -2,3 +2,4 @@ obj-y = base.o
 obj-$(CONFIG_OF_DEVICE) += device.o platform.o
 obj-$(CONFIG_OF_GPIO)   += gpio.o
 obj-$(CONFIG_OF_I2C)	+= of_i2c.o
+obj-$(CONFIG_OF_SPI)	+= of_spi.o
diff --git a/drivers/of/of_spi.c b/drivers/of/of_spi.c
new file mode 100644
index 000000000000..b01eec026f68
--- /dev/null
+++ b/drivers/of/of_spi.c
@@ -0,0 +1,93 @@
+/*
+ * SPI OF support routines
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ *
+ * Support routines for deriving SPI device attachments from the device
+ * tree.
+ */
+
+#include <linux/of.h>
+#include <linux/device.h>
+#include <linux/spi/spi.h>
+#include <linux/of_spi.h>
+
+/**
+ * of_register_spi_devices - Register child devices onto the SPI bus
+ * @master:	Pointer to spi_master device
+ * @np:		parent node of SPI device nodes
+ *
+ * Registers an spi_device for each child node of 'np' which has a 'reg'
+ * property.
+ */
+void of_register_spi_devices(struct spi_master *master, struct device_node *np)
+{
+	struct spi_device *spi;
+	struct device_node *nc;
+	const u32 *prop;
+	int rc;
+	int len;
+
+	for_each_child_of_node(np, nc) {
+		/* Alloc an spi_device */
+		spi = spi_alloc_device(master);
+		if (!spi) {
+			dev_err(&master->dev, "spi_device alloc error for %s\n",
+				nc->full_name);
+			spi_dev_put(spi);
+			continue;
+		}
+
+		/* Select device driver */
+		if (of_modalias_node(nc, spi->modalias,
+				     sizeof(spi->modalias)) < 0) {
+			dev_err(&master->dev, "cannot find modalias for %s\n",
+				nc->full_name);
+			spi_dev_put(spi);
+			continue;
+		}
+
+		/* Device address */
+		prop = of_get_property(nc, "reg", &len);
+		if (!prop || len < sizeof(*prop)) {
+			dev_err(&master->dev, "%s has no 'reg' property\n",
+				nc->full_name);
+			spi_dev_put(spi);
+			continue;
+		}
+		spi->chip_select = *prop;
+
+		/* Mode (clock phase/polarity/etc.) */
+		if (of_find_property(nc, "spi-cpha", NULL))
+			spi->mode |= SPI_CPHA;
+		if (of_find_property(nc, "spi-cpol", NULL))
+			spi->mode |= SPI_CPOL;
+
+		/* Device speed */
+		prop = of_get_property(nc, "spi-max-frequency", &len);
+		if (!prop || len < sizeof(*prop)) {
+			dev_err(&master->dev, "%s has no 'spi-max-frequency' property\n",
+				nc->full_name);
+			spi_dev_put(spi);
+			continue;
+		}
+		spi->max_speed_hz = *prop;
+
+		/* IRQ */
+		spi->irq = irq_of_parse_and_map(nc, 0);
+
+		/* Store a pointer to the node in the device structure */
+		of_node_get(nc);
+		spi->dev.archdata.of_node = nc;
+
+		/* Register the new device */
+		request_module(spi->modalias);
+		rc = spi_add_device(spi);
+		if (rc) {
+			dev_err(&master->dev, "spi_device register error %s\n",
+				nc->full_name);
+			spi_dev_put(spi);
+		}
+
+	}
+}
+EXPORT_SYMBOL(of_register_spi_devices);
diff --git a/include/linux/of_spi.h b/include/linux/of_spi.h
new file mode 100644
index 000000000000..5f71ee8c0868
--- /dev/null
+++ b/include/linux/of_spi.h
@@ -0,0 +1,18 @@
+/*
+ * OpenFirmware SPI support routines
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ *
+ * Support routines for deriving SPI device attachments from the device
+ * tree.
+ */
+
+#ifndef __LINUX_OF_SPI_H
+#define __LINUX_OF_SPI_H
+
+#include <linux/of.h>
+#include <linux/spi/spi.h>
+
+extern void of_register_spi_devices(struct spi_master *master,
+				    struct device_node *np);
+
+#endif /* __LINUX_OF_SPI */
-- 
cgit v1.2.3


From ec34c702ca8b7d6f0aa54379c3b0d0ec10b8ff23 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Fri, 25 Jul 2008 21:45:49 -0700
Subject: net: drop unused BUG_TRAP()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index f4d386c191f5..ca643b13b026 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -755,13 +755,6 @@ extern void __rtnl_unlock(void);
 	} \
 } while(0)
 
-#define BUG_TRAP(x) do { \
-	if (unlikely(!(x))) { \
-		printk(KERN_ERR "KERNEL: assertion (%s) failed at %s (%d)\n", \
-			#x,  __FILE__ , __LINE__); \
-	} \
-} while(0)
-
 static inline u32 rtm_get_table(struct rtattr **rta, u8 table)
 {
 	return RTA_GET_U32(rta[RTA_TABLE-1]);
-- 
cgit v1.2.3


From 36ac26171afa8dbf29226199699fe955d4a0b6f6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 26 Jul 2008 11:22:33 +0200
Subject: crashdump: fix undefined reference to `elfcorehdr_addr'

fix build bug introduced by 95b68dec0d5 "calgary iommu: use the first
kernels TCE tables in kdump":

arch/x86/kernel/built-in.o: In function `calgary_iommu_init':
(.init.text+0x8399): undefined reference to `elfcorehdr_addr'
arch/x86/kernel/built-in.o: In function `calgary_iommu_init':
(.init.text+0x856c): undefined reference to `elfcorehdr_addr'
arch/x86/kernel/built-in.o: In function `detect_calgary':
(.init.text+0x8c68): undefined reference to `elfcorehdr_addr'
arch/x86/kernel/built-in.o: In function `detect_calgary':
(.init.text+0x8d0c): undefined reference to `elfcorehdr_addr'

make elfcorehdr_addr a generally available symbol.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/crash_dump.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 6cd39a927e1f..025e4f575103 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -8,7 +8,13 @@
 #include <linux/proc_fs.h>
 
 #define ELFCORE_ADDR_MAX	(-1ULL)
+
+#ifdef CONFIG_PROC_VMCORE
 extern unsigned long long elfcorehdr_addr;
+#else
+static const unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+#endif
+
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
 extern const struct file_operations proc_vmcore_operations;
-- 
cgit v1.2.3


From 3bc9f79ee1ddc913be0a6d3592036683ef8a3148 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 25 Jul 2008 14:57:58 +0200
Subject: iommu: add iommu_num_pages helper function

Calculating the number of pages from given address and length numbers is a task
required in multiple IOMMU implementations. So implement this as a generic
function into the IOMMU helper code.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: iommu@lists.linux-foundation.org
Cc: bhavna.sarathy@amd.com
Cc: robert.richter@amd.com
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/iommu-helper.h | 1 +
 lib/iommu-helper.c           | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index c975caf75385..f8598f583944 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -8,3 +8,4 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 				      unsigned long align_mask);
 extern void iommu_area_free(unsigned long *map, unsigned long start,
 			    unsigned int nr);
+extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len);
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index a3b8d4c3f77a..889ddce2021e 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -80,3 +80,11 @@ void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
 	}
 }
 EXPORT_SYMBOL(iommu_area_free);
+
+unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
+{
+	unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
+
+	return size >> PAGE_SHIFT;
+}
+EXPORT_SYMBOL(iommu_num_pages);
-- 
cgit v1.2.3


From b8d317d10cca76cabe6b03ebfeb23cc99118b731 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Thu, 24 Jul 2008 18:21:29 -0700
Subject: cpumask: make cpumask_of_cpu_map generic

If an arch doesn't define cpumask_of_cpu_map, create a generic
statically-initialized one for them.  This allows removal of the buggy
cpumask_of_cpu() macro (&cpumask_of_cpu() gives address of
out-of-scope var).

An arch with NR_CPUS of 4096 probably wants to allocate this itself
based on the actual number of CPUs, since otherwise they're using 2MB
of rodata (1024 cpus means 128k).  That's what
CONFIG_HAVE_CPUMASK_OF_CPU_MAP is for (only x86/64 does so at the
moment).

In future as we support more CPUs, we'll need to resort to a
get_cpu_map()/put_cpu_map() allocation scheme.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jack Steiner <steiner@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cpumask.h |  41 ++----------------
 kernel/cpu.c            | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 112 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 1b5c98e7fef7..8fa3b6d4a320 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -62,15 +62,7 @@
  * int next_cpu_nr(cpu, mask)		Next cpu past 'cpu', or nr_cpu_ids
  *
  * cpumask_t cpumask_of_cpu(cpu)	Return cpumask with bit 'cpu' set
- *ifdef CONFIG_HAS_CPUMASK_OF_CPU
- * cpumask_of_cpu_ptr_declare(v)	Declares cpumask_t *v
- * cpumask_of_cpu_ptr_next(v, cpu)	Sets v = &cpumask_of_cpu_map[cpu]
- * cpumask_of_cpu_ptr(v, cpu)		Combines above two operations
- *else
- * cpumask_of_cpu_ptr_declare(v)	Declares cpumask_t _v and *v = &_v
- * cpumask_of_cpu_ptr_next(v, cpu)	Sets _v = cpumask_of_cpu(cpu)
- * cpumask_of_cpu_ptr(v, cpu)		Combines above two operations
- *endif
+ *					(can be used as an lvalue)
  * CPU_MASK_ALL				Initializer - all bits set
  * CPU_MASK_NONE			Initializer - no bits set
  * unsigned long *cpus_addr(mask)	Array of unsigned long's in mask
@@ -274,36 +266,9 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 }
 
 
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-extern cpumask_t *cpumask_of_cpu_map;
+/* cpumask_of_cpu_map[] is in kernel/cpu.c */
+extern const cpumask_t *cpumask_of_cpu_map;
 #define cpumask_of_cpu(cpu)	(cpumask_of_cpu_map[cpu])
-#define	cpumask_of_cpu_ptr(v, cpu)					\
-		const cpumask_t *v = &cpumask_of_cpu(cpu)
-#define	cpumask_of_cpu_ptr_declare(v)					\
-		const cpumask_t *v
-#define cpumask_of_cpu_ptr_next(v, cpu)					\
-					v = &cpumask_of_cpu(cpu)
-#else
-#define cpumask_of_cpu(cpu)						\
-({									\
-	typeof(_unused_cpumask_arg_) m;					\
-	if (sizeof(m) == sizeof(unsigned long)) {			\
-		m.bits[0] = 1UL<<(cpu);					\
-	} else {							\
-		cpus_clear(m);						\
-		cpu_set((cpu), m);					\
-	}								\
-	m;								\
-})
-#define	cpumask_of_cpu_ptr(v, cpu) 					\
-		cpumask_t _##v = cpumask_of_cpu(cpu);			\
-		const cpumask_t *v = &_##v
-#define	cpumask_of_cpu_ptr_declare(v)					\
-		cpumask_t _##v;						\
-		const cpumask_t *v = &_##v
-#define cpumask_of_cpu_ptr_next(v, cpu)					\
-					_##v = cpumask_of_cpu(cpu)
-#endif
 
 #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 10ba5f1004a5..fe31ff3d3809 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -461,3 +461,112 @@ out:
 #endif /* CONFIG_PM_SLEEP_SMP */
 
 #endif /* CONFIG_SMP */
+
+#ifndef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
+/* 64 bits of zeros, for initializers. */
+#if BITS_PER_LONG == 32
+#define Z64 0, 0
+#else
+#define Z64 0
+#endif
+
+/* Initializer macros. */
+#define CMI0(n) { .bits = { 1UL << (n) } }
+#define CMI(n, ...) { .bits = { __VA_ARGS__, 1UL << ((n) % BITS_PER_LONG) } }
+
+#define CMI8(n, ...)						\
+	CMI((n), __VA_ARGS__), CMI((n)+1, __VA_ARGS__),		\
+	CMI((n)+2, __VA_ARGS__), CMI((n)+3, __VA_ARGS__),	\
+	CMI((n)+4, __VA_ARGS__), CMI((n)+5, __VA_ARGS__),	\
+	CMI((n)+6, __VA_ARGS__), CMI((n)+7, __VA_ARGS__)
+
+#if BITS_PER_LONG == 32
+#define CMI64(n, ...)							\
+	CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__),		\
+	CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__),		\
+	CMI8((n)+32, 0, __VA_ARGS__), CMI8((n)+40, 0, __VA_ARGS__),	\
+	CMI8((n)+48, 0, __VA_ARGS__), CMI8((n)+56, 0, __VA_ARGS__)
+#else
+#define CMI64(n, ...)							\
+	CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__),		\
+	CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__),		\
+	CMI8((n)+32, __VA_ARGS__), CMI8((n)+40, __VA_ARGS__),	\
+	CMI8((n)+48, __VA_ARGS__), CMI8((n)+56, __VA_ARGS__)
+#endif
+
+#define CMI256(n, ...)							\
+	CMI64((n), __VA_ARGS__), CMI64((n)+64, Z64, __VA_ARGS__),	\
+	CMI64((n)+128, Z64, Z64, __VA_ARGS__),				\
+	CMI64((n)+192, Z64, Z64, Z64, __VA_ARGS__)
+#define Z256 Z64, Z64, Z64, Z64
+
+#define CMI1024(n, ...)					\
+	CMI256((n), __VA_ARGS__),			\
+	CMI256((n)+256, Z256, __VA_ARGS__),		\
+	CMI256((n)+512, Z256, Z256, __VA_ARGS__),	\
+	CMI256((n)+768, Z256, Z256, Z256, __VA_ARGS__)
+#define Z1024 Z256, Z256, Z256, Z256
+
+/* We want this statically initialized, just to be safe.  We try not
+ * to waste too much space, either. */
+static const cpumask_t cpumask_map[] = {
+	CMI0(0), CMI0(1), CMI0(2), CMI0(3),
+#if NR_CPUS > 4
+	CMI0(4), CMI0(5), CMI0(6), CMI0(7),
+#endif
+#if NR_CPUS > 8
+	CMI0(8), CMI0(9), CMI0(10), CMI0(11),
+	CMI0(12), CMI0(13), CMI0(14), CMI0(15),
+#endif
+#if NR_CPUS > 16
+	CMI0(16), CMI0(17), CMI0(18), CMI0(19),
+	CMI0(20), CMI0(21), CMI0(22), CMI0(23),
+	CMI0(24), CMI0(25), CMI0(26), CMI0(27),
+	CMI0(28), CMI0(29), CMI0(30), CMI0(31),
+#endif
+#if NR_CPUS > 32
+#if BITS_PER_LONG == 32
+	CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0),
+	CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0),
+	CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0),
+	CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0),
+	CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0),
+	CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0),
+	CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0),
+	CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0),
+#else
+	CMI0(32), CMI0(33), CMI0(34), CMI0(35),
+	CMI0(36), CMI0(37), CMI0(38), CMI0(39),
+	CMI0(40), CMI0(41), CMI0(42), CMI0(43),
+	CMI0(44), CMI0(45), CMI0(46), CMI0(47),
+	CMI0(48), CMI0(49), CMI0(50), CMI0(51),
+	CMI0(52), CMI0(53), CMI0(54), CMI0(55),
+	CMI0(56), CMI0(57), CMI0(58), CMI0(59),
+	CMI0(60), CMI0(61), CMI0(62), CMI0(63),
+#endif /* BITS_PER_LONG == 64 */
+#endif
+#if NR_CPUS > 64
+	CMI64(64, Z64),
+#endif
+#if NR_CPUS > 128
+	CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64),
+#endif
+#if NR_CPUS > 256
+	CMI256(256, Z256),
+#endif
+#if NR_CPUS > 512
+	CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256),
+#endif
+#if NR_CPUS > 1024
+	CMI1024(1024, Z1024),
+#endif
+#if NR_CPUS > 2048
+	CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024),
+#endif
+#if NR_CPUS > 4096
+#error NR_CPUS too big.  Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP
+#endif
+};
+
+const cpumask_t *cpumask_of_cpu_map = cpumask_map;
+#endif /* !CONFIG_HAVE_CPUMASK_OF_CPU_MAP */
-- 
cgit v1.2.3


From fdd2a7e2dac56a3384068802be46b822f2aed703 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@infradead.org>
Date: Sat, 26 Jul 2008 13:25:25 -0300
Subject: V4L/DVB (8500a): videotext.h: whitespace cleanup

Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videotext.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/videotext.h b/include/linux/videotext.h
index 018f92047ff8..3e68c8d1c7f7 100644
--- a/include/linux/videotext.h
+++ b/include/linux/videotext.h
@@ -45,10 +45,10 @@
 #define VTXIOCCLRCACHE_OLD 0x710b  /* clear cache on VTX-interface (if avail.) */
 #define VTXIOCSETVIRT_OLD  0x710c  /* turn on virtual mode (this disables TV-display) */
 
-/* 
+/*
  *	Definitions for VTXIOCGETINFO
  */
- 
+
 #define SAA5243 0
 #define SAA5246 1
 #define SAA5249 2
@@ -57,10 +57,10 @@
 
 typedef struct {
 	int version_major, version_minor;	/* version of driver; if version_major changes, driver */
-						/* is not backward compatible!!! CHECK THIS!!! */  
+						/* is not backward compatible!!! CHECK THIS!!! */
 	int numpages;				/* number of page-buffers of vtx-chipset */
 	int cct_type;				/* type of vtx-chipset (SAA5243, SAA5246, SAA5248 or
-  						 * SAA5249) */
+						 * SAA5249) */
 }
 vtx_info_t;
 
@@ -81,7 +81,7 @@ vtx_info_t;
 #define PGMASK_HOUR (HR_TEN | HR_UNIT)
 #define PGMASK_MINUTE (MIN_TEN | MIN_UNIT)
 
-typedef struct 
+typedef struct
 {
 	int page;	/* number of requested page (hexadecimal) */
 	int hour;	/* requested hour (hexadecimal) */
@@ -98,11 +98,11 @@ vtx_pagereq_t;
 /*
  *	Definitions for VTXIOC{GETSTAT,PUTSTAT}
  */
- 
+
 #define VTX_PAGESIZE (40 * 24)
 #define VTX_VIRTUALSIZE (40 * 49)
 
-typedef struct 
+typedef struct
 {
 	int pagenum;			/* number of page (hexadecimal) */
 	int hour;			/* hour (hexadecimal) */
@@ -121,5 +121,5 @@ typedef struct
 	unsigned hamming : 1;		/* hamming-error occurred */
 }
 vtx_pageinfo_t;
- 
+
 #endif /* _VTX_H */
-- 
cgit v1.2.3


From 16d69265b930f7e2fa9eea381715696f780718f4 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 25 Jul 2008 19:44:36 -0700
Subject: uninline arch_pick_mmap_layout()

Fix this, on avr32:

  include/linux/utsname.h:35,
                   from init/main.c:20:
  include/linux/sched.h: In function 'arch_pick_mmap_layout':
  include/linux/sched.h:2149: error: implicit declaration of function 'PAGE_ALIGN'

Reported-by: Adrian Bunk <bunk@kernel.org>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  9 ---------
 mm/util.c             | 10 ++++++++++
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 42036ffe6b00..3260a5c42b91 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2139,16 +2139,7 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
-#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 extern void arch_pick_mmap_layout(struct mm_struct *mm);
-#else
-static inline void arch_pick_mmap_layout(struct mm_struct *mm)
-{
-	mm->mmap_base = TASK_UNMAPPED_BASE;
-	mm->get_unmapped_area = arch_get_unmapped_area;
-	mm->unmap_area = arch_unmap_area;
-}
-#endif
 
 #ifdef CONFIG_TRACING
 extern void
diff --git a/mm/util.c b/mm/util.c
index 8f18683825bc..0efd83097ecf 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1,3 +1,4 @@
+#include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/module.h>
@@ -136,3 +137,12 @@ char *strndup_user(const char __user *s, long n)
 	return p;
 }
 EXPORT_SYMBOL(strndup_user);
+
+#ifndef HAVE_ARCH_PICK_MMAP_LAYOUT
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+	mm->mmap_base = TASK_UNMAPPED_BASE;
+	mm->get_unmapped_area = arch_get_unmapped_area;
+	mm->unmap_area = arch_unmap_area;
+}
+#endif
-- 
cgit v1.2.3


From 8d8bb39b9eba32dd70e87fd5ad5c5dd4ba118e06 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 25 Jul 2008 19:44:49 -0700
Subject: dma-mapping: add the device argument to dma_mapping_error()

Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER
architecture does:

This enables us to cleanly fix the Calgary IOMMU issue that some devices
are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423).

I think that per-device dma_mapping_ops support would be also helpful for
KVM people to support PCI passthrough but Andi thinks that this makes it
difficult to support the PCI passthrough (see the above thread).  So I
CC'ed this to KVM camp.  Comments are appreciated.

A pointer to dma_mapping_ops to struct dev_archdata is added.  If the
pointer is non NULL, DMA operations in asm/dma-mapping.h use it.  If it's
NULL, the system-wide dma_ops pointer is used as before.

If it's useful for KVM people, I plan to implement a mechanism to register
a hook called when a new pci (or dma capable) device is created (it works
with hot plugging).  It enables IOMMUs to set up an appropriate
dma_mapping_ops per device.

The major obstacle is that dma_mapping_error doesn't take a pointer to the
device unlike other DMA operations.  So x86 can't have dma_mapping_ops per
device.  Note all the POWER IOMMUs use the same dma_mapping_error function
so this is not a problem for POWER but x86 IOMMUs use different
dma_mapping_error functions.

The first patch adds the device argument to dma_mapping_error.  The patch
is trivial but large since it touches lots of drivers and dma-mapping.h in
all the architecture.

This patch:

dma_mapping_error() doesn't take a pointer to the device unlike other DMA
operations.  So we can't have dma_mapping_ops per device.

Note that POWER already has dma_mapping_ops per device but all the POWER
IOMMUs use the same dma_mapping_error function.  x86 IOMMUs use device
argument.

[akpm@linux-foundation.org: fix sge]
[akpm@linux-foundation.org: fix svc_rdma]
[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: fix bnx2x]
[akpm@linux-foundation.org: fix s2io]
[akpm@linux-foundation.org: fix pasemi_mac]
[akpm@linux-foundation.org: fix sdhci]
[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: fix sparc]
[akpm@linux-foundation.org: fix ibmvscsi]
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Avi Kivity <avi@qumranet.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DMA-API.txt                      |  4 +-
 arch/arm/common/dmabounce.c                    |  2 +-
 arch/ia64/hp/common/hwsw_iommu.c               |  5 +-
 arch/ia64/hp/common/sba_iommu.c                |  2 +-
 arch/ia64/sn/pci/pci_dma.c                     |  2 +-
 arch/mips/mm/dma-default.c                     |  2 +-
 arch/powerpc/platforms/cell/celleb_scc_pciex.c |  2 +-
 arch/powerpc/platforms/cell/spider-pci.c       |  2 +-
 arch/powerpc/platforms/iseries/mf.c            |  2 +-
 arch/x86/kernel/pci-calgary_64.c               |  2 +-
 arch/x86/kernel/pci-dma.c                      | 27 ++++---
 arch/x86/kernel/pci-gart_64.c                  |  3 +-
 arch/x86/kernel/pci-nommu.c                    | 14 +---
 arch/x86/kernel/pci-swiotlb_64.c               |  2 +-
 drivers/firewire/fw-iso.c                      |  2 +-
 drivers/firewire/fw-ohci.c                     |  2 +-
 drivers/firewire/fw-sbp2.c                     |  8 +--
 drivers/infiniband/hw/ipath/ipath_sdma.c       |  2 +-
 drivers/infiniband/hw/ipath/ipath_user_sdma.c  |  6 +-
 drivers/infiniband/hw/mthca/mthca_eq.c         |  2 +-
 drivers/media/dvb/pluto2/pluto2.c              |  2 +-
 drivers/mmc/host/sdhci.c                       |  4 +-
 drivers/net/arm/ep93xx_eth.c                   |  4 +-
 drivers/net/bnx2x_main.c                       |  4 +-
 drivers/net/cxgb3/sge.c                        |  2 +-
 drivers/net/e100.c                             |  2 +-
 drivers/net/e1000e/ethtool.c                   |  4 +-
 drivers/net/e1000e/netdev.c                    | 11 +--
 drivers/net/ibmveth.c                          | 38 +++++-----
 drivers/net/iseries_veth.c                     |  4 +-
 drivers/net/mlx4/eq.c                          |  2 +-
 drivers/net/pasemi_mac.c                       |  6 +-
 drivers/net/qla3xxx.c                          | 12 ++--
 drivers/net/s2io.c                             | 48 +++++++------
 drivers/net/sfc/rx.c                           |  4 +-
 drivers/net/sfc/tx.c                           |  7 +-
 drivers/net/spider_net.c                       |  4 +-
 drivers/net/tc35815.c                          |  4 +-
 drivers/net/wireless/ath5k/base.c              |  4 +-
 drivers/scsi/ibmvscsi/ibmvfc.c                 |  4 +-
 drivers/scsi/ibmvscsi/ibmvscsi.c               |  4 +-
 drivers/scsi/ibmvscsi/ibmvstgt.c               |  2 +-
 drivers/scsi/ibmvscsi/rpa_vscsi.c              |  2 +-
 drivers/spi/atmel_spi.c                        |  4 +-
 drivers/spi/au1550_spi.c                       |  6 +-
 drivers/spi/omap2_mcspi.c                      |  4 +-
 drivers/spi/pxa2xx_spi.c                       |  4 +-
 drivers/spi/spi_imx.c                          |  6 +-
 include/asm-alpha/dma-mapping.h                |  6 +-
 include/asm-alpha/pci.h                        |  2 +-
 include/asm-arm/dma-mapping.h                  |  2 +-
 include/asm-avr32/dma-mapping.h                |  2 +-
 include/asm-cris/dma-mapping.h                 |  2 +-
 include/asm-frv/dma-mapping.h                  |  2 +-
 include/asm-generic/dma-mapping-broken.h       |  2 +-
 include/asm-generic/dma-mapping.h              |  4 +-
 include/asm-generic/pci-dma-compat.h           |  4 +-
 include/asm-ia64/machvec.h                     |  2 +-
 include/asm-m68k/dma-mapping.h                 |  2 +-
 include/asm-mips/dma-mapping.h                 |  2 +-
 include/asm-mn10300/dma-mapping.h              |  2 +-
 include/asm-parisc/dma-mapping.h               |  2 +-
 include/asm-powerpc/dma-mapping.h              |  2 +-
 include/asm-sh/dma-mapping.h                   |  2 +-
 include/asm-sparc/dma-mapping_64.h             |  2 +-
 include/asm-sparc/pci_32.h                     |  3 +-
 include/asm-sparc/pci_64.h                     |  5 +-
 include/asm-x86/device.h                       |  3 +
 include/asm-x86/dma-mapping.h                  | 99 ++++++++++++++++++--------
 include/asm-x86/swiotlb.h                      |  2 +-
 include/asm-xtensa/dma-mapping.h               |  2 +-
 include/linux/i2o.h                            |  2 +-
 include/linux/ssb/ssb.h                        |  4 +-
 include/rdma/ib_verbs.h                        |  2 +-
 lib/swiotlb.c                                  |  4 +-
 net/sunrpc/xprtrdma/svc_rdma_sendto.c          |  3 +-
 76 files changed, 256 insertions(+), 210 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 80d150458c80..d8b63d164e41 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -298,10 +298,10 @@ recommended that you never use these unless you really know what the
 cache width is.
 
 int
-dma_mapping_error(dma_addr_t dma_addr)
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 
 int
-pci_dma_mapping_error(dma_addr_t dma_addr)
+pci_dma_mapping_error(struct pci_dev *hwdev, dma_addr_t dma_addr)
 
 In some circumstances dma_map_single and dma_map_page will fail to create
 a mapping. A driver can check for these errors by testing the returned
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index dd2947342604..69130f365904 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -280,7 +280,7 @@ unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 	/*
 	 * Trying to unmap an invalid mapping
 	 */
-	if (dma_mapping_error(dma_addr)) {
+	if (dma_mapping_error(dev, dma_addr)) {
 		dev_err(dev, "Trying to unmap invalid mapping\n");
 		return;
 	}
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
index 1c44ec2a1d58..88b6e6f3fd88 100644
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -186,9 +186,10 @@ hwsw_dma_supported (struct device *dev, u64 mask)
 }
 
 int
-hwsw_dma_mapping_error (dma_addr_t dma_addr)
+hwsw_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
-	return hwiommu_dma_mapping_error (dma_addr) || swiotlb_dma_mapping_error(dma_addr);
+	return hwiommu_dma_mapping_error(dev, dma_addr) ||
+		swiotlb_dma_mapping_error(dev, dma_addr);
 }
 
 EXPORT_SYMBOL(hwsw_dma_mapping_error);
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 34421aed1e2a..4956be40d7b5 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2147,7 +2147,7 @@ sba_dma_supported (struct device *dev, u64 mask)
 }
 
 int
-sba_dma_mapping_error (dma_addr_t dma_addr)
+sba_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 52175af299a0..53ebb6484495 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -350,7 +350,7 @@ void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 }
 EXPORT_SYMBOL(sn_dma_sync_sg_for_device);
 
-int sn_dma_mapping_error(dma_addr_t dma_addr)
+int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index ae39dd88b9aa..891312f8e5a6 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -348,7 +348,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nele
 
 EXPORT_SYMBOL(dma_sync_sg_for_device);
 
-int dma_mapping_error(dma_addr_t dma_addr)
+int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
index 0e04f8fb152a..3e7e0f1568ef 100644
--- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
@@ -281,7 +281,7 @@ static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data)
 
 	dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va,
 				       PAGE_SIZE, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dummy_page_da)) {
+	if (dma_mapping_error(bus->phb->parent, dummy_page_da)) {
 		pr_err("PCIEX:Map dummy page failed.\n");
 		kfree(dummy_page_va);
 		return -1;
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index 418b605ac35a..5122ec145271 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -111,7 +111,7 @@ static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
 
 	dummy_page_da = dma_map_single(phb->parent, dummy_page_va,
 				       PAGE_SIZE, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dummy_page_da)) {
+	if (dma_mapping_error(phb->parent, dummy_page_da)) {
 		pr_err("SPIDER-IOWA:Map dummy page filed.\n");
 		kfree(dummy_page_va);
 		return -1;
diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c
index 1dc7295746da..731d7b157749 100644
--- a/arch/powerpc/platforms/iseries/mf.c
+++ b/arch/powerpc/platforms/iseries/mf.c
@@ -871,7 +871,7 @@ static int proc_mf_dump_cmdline(char *page, char **start, off_t off,
 		count = 256 - off;
 
 	dma_addr = iseries_hv_map(page, off + count, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dma_addr))
+	if (dma_mapping_error(NULL, dma_addr))
 		return -ENOMEM;
 	memset(page, 0, off + count);
 	memset(&vsp_cmd, 0, sizeof(vsp_cmd));
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 19e7fc7c2c4f..1eb86be93d7a 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -544,7 +544,7 @@ error:
 	return ret;
 }
 
-static const struct dma_mapping_ops calgary_dma_ops = {
+static struct dma_mapping_ops calgary_dma_ops = {
 	.alloc_coherent = calgary_alloc_coherent,
 	.map_single = calgary_map_single,
 	.unmap_single = calgary_unmap_single,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index cbecb05551bb..37544123896d 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -11,7 +11,7 @@
 
 static int forbid_dac __read_mostly;
 
-const struct dma_mapping_ops *dma_ops;
+struct dma_mapping_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
 static int iommu_sac_force __read_mostly;
@@ -312,6 +312,8 @@ static int dma_release_coherent(struct device *dev, int order, void *vaddr)
 
 int dma_supported(struct device *dev, u64 mask)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+
 #ifdef CONFIG_PCI
 	if (mask > 0xffffffff && forbid_dac > 0) {
 		dev_info(dev, "PCI: Disallowing DAC for device\n");
@@ -319,8 +321,8 @@ int dma_supported(struct device *dev, u64 mask)
 	}
 #endif
 
-	if (dma_ops->dma_supported)
-		return dma_ops->dma_supported(dev, mask);
+	if (ops->dma_supported)
+		return ops->dma_supported(dev, mask);
 
 	/* Copied from i386. Doesn't make much sense, because it will
 	   only work for pci_alloc_coherent.
@@ -367,6 +369,7 @@ void *
 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		   gfp_t gfp)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
 	void *memory = NULL;
 	struct page *page;
 	unsigned long dma_mask = 0;
@@ -435,8 +438,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 			/* Let low level make its own zone decisions */
 			gfp &= ~(GFP_DMA32|GFP_DMA);
 
-			if (dma_ops->alloc_coherent)
-				return dma_ops->alloc_coherent(dev, size,
+			if (ops->alloc_coherent)
+				return ops->alloc_coherent(dev, size,
 							   dma_handle, gfp);
 			return NULL;
 		}
@@ -448,14 +451,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		}
 	}
 
-	if (dma_ops->alloc_coherent) {
+	if (ops->alloc_coherent) {
 		free_pages((unsigned long)memory, get_order(size));
 		gfp &= ~(GFP_DMA|GFP_DMA32);
-		return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
+		return ops->alloc_coherent(dev, size, dma_handle, gfp);
 	}
 
-	if (dma_ops->map_simple) {
-		*dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory),
+	if (ops->map_simple) {
+		*dma_handle = ops->map_simple(dev, virt_to_phys(memory),
 					      size,
 					      PCI_DMA_BIDIRECTIONAL);
 		if (*dma_handle != bad_dma_address)
@@ -477,12 +480,14 @@ EXPORT_SYMBOL(dma_alloc_coherent);
 void dma_free_coherent(struct device *dev, size_t size,
 			 void *vaddr, dma_addr_t bus)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+
 	int order = get_order(size);
 	WARN_ON(irqs_disabled());	/* for portability */
 	if (dma_release_coherent(dev, order, vaddr))
 		return;
-	if (dma_ops->unmap_single)
-		dma_ops->unmap_single(dev, bus, size, 0);
+	if (ops->unmap_single)
+		ops->unmap_single(dev, bus, size, 0);
 	free_pages((unsigned long)vaddr, order);
 }
 EXPORT_SYMBOL(dma_free_coherent);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index df5f142657d2..744126e64950 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -692,8 +692,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
 extern int agp_amd64_init(void);
 
-static const struct dma_mapping_ops gart_dma_ops = {
-	.mapping_error			= NULL,
+static struct dma_mapping_ops gart_dma_ops = {
 	.map_single			= gart_map_single,
 	.map_simple			= gart_map_simple,
 	.unmap_single			= gart_unmap_single,
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 792b9179eff3..3f91f71cdc3e 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
 	return nents;
 }
 
-/* Make sure we keep the same behaviour */
-static int nommu_mapping_error(dma_addr_t dma_addr)
-{
-#ifdef CONFIG_X86_32
-	return 0;
-#else
-	return (dma_addr == bad_dma_address);
-#endif
-}
-
-
-const struct dma_mapping_ops nommu_dma_ops = {
+struct dma_mapping_ops nommu_dma_ops = {
 	.map_single = nommu_map_single,
 	.map_sg = nommu_map_sg,
-	.mapping_error = nommu_mapping_error,
 	.is_phys = 1,
 };
 
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 20df839b9c20..c4ce0332759e 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
 	return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
 }
 
-const struct dma_mapping_ops swiotlb_dma_ops = {
+struct dma_mapping_ops swiotlb_dma_ops = {
 	.mapping_error = swiotlb_dma_mapping_error,
 	.alloc_coherent = swiotlb_alloc_coherent,
 	.free_coherent = swiotlb_free_coherent,
diff --git a/drivers/firewire/fw-iso.c b/drivers/firewire/fw-iso.c
index bcbe794a3ea5..e14c03dc0065 100644
--- a/drivers/firewire/fw-iso.c
+++ b/drivers/firewire/fw-iso.c
@@ -50,7 +50,7 @@ fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card,
 
 		address = dma_map_page(card->device, buffer->pages[i],
 				       0, PAGE_SIZE, direction);
-		if (dma_mapping_error(address)) {
+		if (dma_mapping_error(card->device, address)) {
 			__free_page(buffer->pages[i]);
 			goto out_pages;
 		}
diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c
index 333b12544dd1..566672e0bcff 100644
--- a/drivers/firewire/fw-ohci.c
+++ b/drivers/firewire/fw-ohci.c
@@ -953,7 +953,7 @@ at_context_queue_packet(struct context *ctx, struct fw_packet *packet)
 		payload_bus =
 			dma_map_single(ohci->card.device, packet->payload,
 				       packet->payload_length, DMA_TO_DEVICE);
-		if (dma_mapping_error(payload_bus)) {
+		if (dma_mapping_error(ohci->card.device, payload_bus)) {
 			packet->ack = RCODE_SEND_ERROR;
 			return -1;
 		}
diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c
index 53fc5a641e6d..aaff50ebba1d 100644
--- a/drivers/firewire/fw-sbp2.c
+++ b/drivers/firewire/fw-sbp2.c
@@ -543,7 +543,7 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id,
 	orb->response_bus =
 		dma_map_single(device->card->device, &orb->response,
 			       sizeof(orb->response), DMA_FROM_DEVICE);
-	if (dma_mapping_error(orb->response_bus))
+	if (dma_mapping_error(device->card->device, orb->response_bus))
 		goto fail_mapping_response;
 
 	orb->request.response.high = 0;
@@ -577,7 +577,7 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id,
 	orb->base.request_bus =
 		dma_map_single(device->card->device, &orb->request,
 			       sizeof(orb->request), DMA_TO_DEVICE);
-	if (dma_mapping_error(orb->base.request_bus))
+	if (dma_mapping_error(device->card->device, orb->base.request_bus))
 		goto fail_mapping_request;
 
 	sbp2_send_orb(&orb->base, lu, node_id, generation,
@@ -1424,7 +1424,7 @@ sbp2_map_scatterlist(struct sbp2_command_orb *orb, struct fw_device *device,
 	orb->page_table_bus =
 		dma_map_single(device->card->device, orb->page_table,
 			       sizeof(orb->page_table), DMA_TO_DEVICE);
-	if (dma_mapping_error(orb->page_table_bus))
+	if (dma_mapping_error(device->card->device, orb->page_table_bus))
 		goto fail_page_table;
 
 	/*
@@ -1509,7 +1509,7 @@ static int sbp2_scsi_queuecommand(struct scsi_cmnd *cmd, scsi_done_fn_t done)
 	orb->base.request_bus =
 		dma_map_single(device->card->device, &orb->request,
 			       sizeof(orb->request), DMA_TO_DEVICE);
-	if (dma_mapping_error(orb->base.request_bus))
+	if (dma_mapping_error(device->card->device, orb->base.request_bus))
 		goto out;
 
 	sbp2_send_orb(&orb->base, lu, lu->tgt->node_id, lu->generation,
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index eaba03273e4f..284c9bca517e 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -698,7 +698,7 @@ retry:
 
 	addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
 			      tx->map_len, DMA_TO_DEVICE);
-	if (dma_mapping_error(addr)) {
+	if (dma_mapping_error(&dd->pcidev->dev, addr)) {
 		ret = -EIO;
 		goto unlock;
 	}
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
index 86e016916cd1..82d9a0b5ca2f 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -206,7 +206,7 @@ static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
 
 	dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
 				DMA_TO_DEVICE);
-	if (dma_mapping_error(dma_addr)) {
+	if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
 		ret = -ENOMEM;
 		goto free_unmap;
 	}
@@ -301,7 +301,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
 				     pages[j], 0, flen, DMA_TO_DEVICE);
 		unsigned long fofs = addr & ~PAGE_MASK;
 
-		if (dma_mapping_error(dma_addr)) {
+		if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
 			ret = -ENOMEM;
 			goto done;
 		}
@@ -508,7 +508,7 @@ static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
 		if (page) {
 			dma_addr = dma_map_page(&dd->pcidev->dev,
 						page, 0, len, DMA_TO_DEVICE);
-			if (dma_mapping_error(dma_addr)) {
+			if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
 				ret = -ENOMEM;
 				goto free_pbc;
 			}
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 4e36aa7cb3d2..cc6858f0b65b 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -780,7 +780,7 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
 		return -ENOMEM;
 	dev->eq_table.icm_dma  = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
 					      PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	if (pci_dma_mapping_error(dev->eq_table.icm_dma)) {
+	if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) {
 		__free_page(dev->eq_table.icm_page);
 		return -ENOMEM;
 	}
diff --git a/drivers/media/dvb/pluto2/pluto2.c b/drivers/media/dvb/pluto2/pluto2.c
index 1360403b88b6..a9653c63f4db 100644
--- a/drivers/media/dvb/pluto2/pluto2.c
+++ b/drivers/media/dvb/pluto2/pluto2.c
@@ -242,7 +242,7 @@ static int __devinit pluto_dma_map(struct pluto *pluto)
 	pluto->dma_addr = pci_map_single(pluto->pdev, pluto->dma_buf,
 			TS_DMA_BYTES, PCI_DMA_FROMDEVICE);
 
-	return pci_dma_mapping_error(pluto->dma_addr);
+	return pci_dma_mapping_error(pluto->pdev, pluto->dma_addr);
 }
 
 static void pluto_dma_unmap(struct pluto *pluto)
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index c3a5db72ddd7..5f95e10229b5 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -337,7 +337,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 
 	host->align_addr = dma_map_single(mmc_dev(host->mmc),
 		host->align_buffer, 128 * 4, direction);
-	if (dma_mapping_error(host->align_addr))
+	if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr))
 		goto fail;
 	BUG_ON(host->align_addr & 0x3);
 
@@ -439,7 +439,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 
 	host->adma_addr = dma_map_single(mmc_dev(host->mmc),
 		host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE);
-	if (dma_mapping_error(host->align_addr))
+	if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr))
 		goto unmap_entries;
 	BUG_ON(host->adma_addr & 0x3);
 
diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c
index 7a14980f3472..18d3eeb7eab2 100644
--- a/drivers/net/arm/ep93xx_eth.c
+++ b/drivers/net/arm/ep93xx_eth.c
@@ -482,7 +482,7 @@ static int ep93xx_alloc_buffers(struct ep93xx_priv *ep)
 			goto err;
 
 		d = dma_map_single(NULL, page, PAGE_SIZE, DMA_FROM_DEVICE);
-		if (dma_mapping_error(d)) {
+		if (dma_mapping_error(NULL, d)) {
 			free_page((unsigned long)page);
 			goto err;
 		}
@@ -505,7 +505,7 @@ static int ep93xx_alloc_buffers(struct ep93xx_priv *ep)
 			goto err;
 
 		d = dma_map_single(NULL, page, PAGE_SIZE, DMA_TO_DEVICE);
-		if (dma_mapping_error(d)) {
+		if (dma_mapping_error(NULL, d)) {
 			free_page((unsigned long)page);
 			goto err;
 		}
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index 0263bef9cc6d..c7cc760a1777 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -1020,7 +1020,7 @@ static inline int bnx2x_alloc_rx_sge(struct bnx2x *bp,
 
 	mapping = pci_map_page(bp->pdev, page, 0, BCM_PAGE_SIZE*PAGES_PER_SGE,
 			       PCI_DMA_FROMDEVICE);
-	if (unlikely(dma_mapping_error(mapping))) {
+	if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
 		__free_pages(page, PAGES_PER_SGE_SHIFT);
 		return -ENOMEM;
 	}
@@ -1048,7 +1048,7 @@ static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp,
 
 	mapping = pci_map_single(bp->pdev, skb->data, bp->rx_buf_use_size,
 				 PCI_DMA_FROMDEVICE);
-	if (unlikely(dma_mapping_error(mapping))) {
+	if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
 		dev_kfree_skb(skb);
 		return -ENOMEM;
 	}
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index a96331c875e6..1b0861d73ab7 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -386,7 +386,7 @@ static inline int add_one_rx_buf(void *va, unsigned int len,
 	dma_addr_t mapping;
 
 	mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
-	if (unlikely(pci_dma_mapping_error(mapping)))
+	if (unlikely(pci_dma_mapping_error(pdev, mapping)))
 		return -ENOMEM;
 
 	pci_unmap_addr_set(sd, dma_addr, mapping);
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 1037b1332312..19d32a227be1 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1790,7 +1790,7 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx)
 	rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data,
 		RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL);
 
-	if (pci_dma_mapping_error(rx->dma_addr)) {
+	if (pci_dma_mapping_error(nic->pdev, rx->dma_addr)) {
 		dev_kfree_skb_any(rx->skb);
 		rx->skb = NULL;
 		rx->dma_addr = 0;
diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
index a14561f40db0..9350564065e7 100644
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -1090,7 +1090,7 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter)
 		tx_ring->buffer_info[i].dma =
 			pci_map_single(pdev, skb->data, skb->len,
 				       PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(tx_ring->buffer_info[i].dma)) {
+		if (pci_dma_mapping_error(pdev, tx_ring->buffer_info[i].dma)) {
 			ret_val = 4;
 			goto err_nomem;
 		}
@@ -1153,7 +1153,7 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter)
 		rx_ring->buffer_info[i].dma =
 			pci_map_single(pdev, skb->data, 2048,
 				       PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(rx_ring->buffer_info[i].dma)) {
+		if (pci_dma_mapping_error(pdev, rx_ring->buffer_info[i].dma)) {
 			ret_val = 8;
 			goto err_nomem;
 		}
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 9c0f56b3c518..d13677899767 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -195,7 +195,7 @@ map_skb:
 		buffer_info->dma = pci_map_single(pdev, skb->data,
 						  adapter->rx_buffer_len,
 						  PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(buffer_info->dma)) {
+		if (pci_dma_mapping_error(pdev, buffer_info->dma)) {
 			dev_err(&pdev->dev, "RX DMA map failed\n");
 			adapter->rx_dma_failed++;
 			break;
@@ -265,7 +265,7 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
 						   ps_page->page,
 						   0, PAGE_SIZE,
 						   PCI_DMA_FROMDEVICE);
-				if (pci_dma_mapping_error(ps_page->dma)) {
+				if (pci_dma_mapping_error(pdev, ps_page->dma)) {
 					dev_err(&adapter->pdev->dev,
 					  "RX DMA page map failed\n");
 					adapter->rx_dma_failed++;
@@ -300,7 +300,7 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
 		buffer_info->dma = pci_map_single(pdev, skb->data,
 						  adapter->rx_ps_bsize0,
 						  PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(buffer_info->dma)) {
+		if (pci_dma_mapping_error(pdev, buffer_info->dma)) {
 			dev_err(&pdev->dev, "RX DMA map failed\n");
 			adapter->rx_dma_failed++;
 			/* cleanup skb */
@@ -3344,7 +3344,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 				skb->data + offset,
 				size,
 				PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(buffer_info->dma)) {
+		if (pci_dma_mapping_error(adapter->pdev, buffer_info->dma)) {
 			dev_err(&adapter->pdev->dev, "TX DMA map failed\n");
 			adapter->tx_dma_failed++;
 			return -1;
@@ -3382,7 +3382,8 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 					offset,
 					size,
 					PCI_DMA_TODEVICE);
-			if (pci_dma_mapping_error(buffer_info->dma)) {
+			if (pci_dma_mapping_error(adapter->pdev,
+						  buffer_info->dma)) {
 				dev_err(&adapter->pdev->dev,
 					"TX DMA page map failed\n");
 				adapter->tx_dma_failed++;
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index e5a6e2e84540..91ec9fdc7184 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -260,7 +260,7 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
 		dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
 				pool->buff_size, DMA_FROM_DEVICE);
 
-		if (dma_mapping_error(dma_addr))
+		if (dma_mapping_error((&adapter->vdev->dev, dma_addr))
 			goto failure;
 
 		pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
@@ -294,7 +294,7 @@ failure:
 		pool->consumer_index = pool->size - 1;
 	else
 		pool->consumer_index--;
-	if (!dma_mapping_error(dma_addr))
+	if (!dma_mapping_error((&adapter->vdev->dev, dma_addr))
 		dma_unmap_single(&adapter->vdev->dev,
 		                 pool->dma_addr[index], pool->buff_size,
 		                 DMA_FROM_DEVICE);
@@ -448,11 +448,11 @@ static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter)
 static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
 {
 	int i;
+	struct device *dev = &adapter->vdev->dev;
 
 	if(adapter->buffer_list_addr != NULL) {
-		if(!dma_mapping_error(adapter->buffer_list_dma)) {
-			dma_unmap_single(&adapter->vdev->dev,
-					adapter->buffer_list_dma, 4096,
+		if (!dma_mapping_error(dev, adapter->buffer_list_dma)) {
+			dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
 					DMA_BIDIRECTIONAL);
 			adapter->buffer_list_dma = DMA_ERROR_CODE;
 		}
@@ -461,9 +461,8 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
 	}
 
 	if(adapter->filter_list_addr != NULL) {
-		if(!dma_mapping_error(adapter->filter_list_dma)) {
-			dma_unmap_single(&adapter->vdev->dev,
-					adapter->filter_list_dma, 4096,
+		if (!dma_mapping_error(dev, adapter->filter_list_dma)) {
+			dma_unmap_single(dev, adapter->filter_list_dma, 4096,
 					DMA_BIDIRECTIONAL);
 			adapter->filter_list_dma = DMA_ERROR_CODE;
 		}
@@ -472,8 +471,8 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
 	}
 
 	if(adapter->rx_queue.queue_addr != NULL) {
-		if(!dma_mapping_error(adapter->rx_queue.queue_dma)) {
-			dma_unmap_single(&adapter->vdev->dev,
+		if (!dma_mapping_error(dev, adapter->rx_queue.queue_dma)) {
+			dma_unmap_single(dev,
 					adapter->rx_queue.queue_dma,
 					adapter->rx_queue.queue_len,
 					DMA_BIDIRECTIONAL);
@@ -535,6 +534,7 @@ static int ibmveth_open(struct net_device *netdev)
 	int rc;
 	union ibmveth_buf_desc rxq_desc;
 	int i;
+	struct device *dev;
 
 	ibmveth_debug_printk("open starting\n");
 
@@ -563,17 +563,19 @@ static int ibmveth_open(struct net_device *netdev)
 		return -ENOMEM;
 	}
 
-	adapter->buffer_list_dma = dma_map_single(&adapter->vdev->dev,
+	dev = &adapter->vdev->dev;
+
+	adapter->buffer_list_dma = dma_map_single(dev,
 			adapter->buffer_list_addr, 4096, DMA_BIDIRECTIONAL);
-	adapter->filter_list_dma = dma_map_single(&adapter->vdev->dev,
+	adapter->filter_list_dma = dma_map_single(dev,
 			adapter->filter_list_addr, 4096, DMA_BIDIRECTIONAL);
-	adapter->rx_queue.queue_dma = dma_map_single(&adapter->vdev->dev,
+	adapter->rx_queue.queue_dma = dma_map_single(dev,
 			adapter->rx_queue.queue_addr,
 			adapter->rx_queue.queue_len, DMA_BIDIRECTIONAL);
 
-	if((dma_mapping_error(adapter->buffer_list_dma) ) ||
-	   (dma_mapping_error(adapter->filter_list_dma)) ||
-	   (dma_mapping_error(adapter->rx_queue.queue_dma))) {
+	if ((dma_mapping_error(dev, adapter->buffer_list_dma)) ||
+	    (dma_mapping_error(dev, adapter->filter_list_dma)) ||
+	    (dma_mapping_error(dev, adapter->rx_queue.queue_dma))) {
 		ibmveth_error_printk("unable to map filter or buffer list pages\n");
 		ibmveth_cleanup(adapter);
 		napi_disable(&adapter->napi);
@@ -645,7 +647,7 @@ static int ibmveth_open(struct net_device *netdev)
 	adapter->bounce_buffer_dma =
 	    dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer,
 			   netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(adapter->bounce_buffer_dma)) {
+	if (dma_mapping_error(dev, adapter->bounce_buffer_dma)) {
 		ibmveth_error_printk("unable to map bounce buffer\n");
 		ibmveth_cleanup(adapter);
 		napi_disable(&adapter->napi);
@@ -922,7 +924,7 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 		buf[1] = 0;
 	}
 
-	if (dma_mapping_error(data_dma_addr)) {
+	if (dma_mapping_error((&adapter->vdev->dev, data_dma_addr)) {
 		if (!firmware_has_feature(FW_FEATURE_CMO))
 			ibmveth_error_printk("tx: unable to map xmit buffer\n");
 		skb_copy_from_linear_data(skb, adapter->bounce_buffer,
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index b8d0639c1cdf..c46864d626b2 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -1128,7 +1128,7 @@ static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp,
 	msg->data.addr[0] = dma_map_single(port->dev, skb->data,
 				skb->len, DMA_TO_DEVICE);
 
-	if (dma_mapping_error(msg->data.addr[0]))
+	if (dma_mapping_error(port->dev, msg->data.addr[0]))
 		goto recycle_and_drop;
 
 	msg->dev = port->dev;
@@ -1226,7 +1226,7 @@ static void veth_recycle_msg(struct veth_lpar_connection *cnx,
 		dma_address = msg->data.addr[0];
 		dma_length = msg->data.len[0];
 
-		if (!dma_mapping_error(dma_address))
+		if (!dma_mapping_error(msg->dev, dma_address))
 			dma_unmap_single(msg->dev, dma_address, dma_length,
 					DMA_TO_DEVICE);
 
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index ea3a09aaa844..7df928d3a3d8 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -526,7 +526,7 @@ int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
 		return -ENOMEM;
 	priv->eq_table.icm_dma  = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0,
 					       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	if (pci_dma_mapping_error(priv->eq_table.icm_dma)) {
+	if (pci_dma_mapping_error(dev->pdev, priv->eq_table.icm_dma)) {
 		__free_page(priv->eq_table.icm_page);
 		return -ENOMEM;
 	}
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 993d87c9296f..edc0fd588985 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -650,7 +650,7 @@ static void pasemi_mac_replenish_rx_ring(const struct net_device *dev,
 				     mac->bufsz - LOCAL_SKB_ALIGN,
 				     PCI_DMA_FROMDEVICE);
 
-		if (unlikely(dma_mapping_error(dma))) {
+		if (unlikely(pci_dma_mapping_error(mac->dma_pdev, dma))) {
 			dev_kfree_skb_irq(info->skb);
 			break;
 		}
@@ -1519,7 +1519,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 	map[0] = pci_map_single(mac->dma_pdev, skb->data, skb_headlen(skb),
 				PCI_DMA_TODEVICE);
 	map_size[0] = skb_headlen(skb);
-	if (dma_mapping_error(map[0]))
+	if (pci_dma_mapping_error(mac->dma_pdev, map[0]))
 		goto out_err_nolock;
 
 	for (i = 0; i < nfrags; i++) {
@@ -1529,7 +1529,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 					frag->page_offset, frag->size,
 					PCI_DMA_TODEVICE);
 		map_size[i+1] = frag->size;
-		if (dma_mapping_error(map[i+1])) {
+		if (pci_dma_mapping_error(mac->dma_pdev, map[i+1])) {
 			nfrags = i;
 			goto out_err_nolock;
 		}
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index e7d48a352beb..e82b37bbd6c3 100644
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -328,7 +328,7 @@ static void ql_release_to_lrg_buf_free_list(struct ql3_adapter *qdev,
 					     qdev->lrg_buffer_len -
 					     QL_HEADER_SPACE,
 					     PCI_DMA_FROMDEVICE);
-			err = pci_dma_mapping_error(map);
+			err = pci_dma_mapping_error(qdev->pdev, map);
 			if(err) {
 				printk(KERN_ERR "%s: PCI mapping failed with error: %d\n",
 				       qdev->ndev->name, err);
@@ -1919,7 +1919,7 @@ static int ql_populate_free_queue(struct ql3_adapter *qdev)
 						     QL_HEADER_SPACE,
 						     PCI_DMA_FROMDEVICE);
 
-				err = pci_dma_mapping_error(map);
+				err = pci_dma_mapping_error(qdev->pdev, map);
 				if(err) {
 					printk(KERN_ERR "%s: PCI mapping failed with error: %d\n",
 					       qdev->ndev->name, err);
@@ -2454,7 +2454,7 @@ static int ql_send_map(struct ql3_adapter *qdev,
 	 */
 	map = pci_map_single(qdev->pdev, skb->data, len, PCI_DMA_TODEVICE);
 
-	err = pci_dma_mapping_error(map);
+	err = pci_dma_mapping_error(qdev->pdev, map);
 	if(err) {
 		printk(KERN_ERR "%s: PCI mapping failed with error: %d\n",
 		       qdev->ndev->name, err);
@@ -2487,7 +2487,7 @@ static int ql_send_map(struct ql3_adapter *qdev,
 						     sizeof(struct oal),
 						     PCI_DMA_TODEVICE);
 
-				err = pci_dma_mapping_error(map);
+				err = pci_dma_mapping_error(qdev->pdev, map);
 				if(err) {
 
 					printk(KERN_ERR "%s: PCI mapping outbound address list with error: %d\n",
@@ -2514,7 +2514,7 @@ static int ql_send_map(struct ql3_adapter *qdev,
 					 frag->page_offset, frag->size,
 					 PCI_DMA_TODEVICE);
 
-			err = pci_dma_mapping_error(map);
+			err = pci_dma_mapping_error(qdev->pdev, map);
 			if(err) {
 				printk(KERN_ERR "%s: PCI mapping frags failed with error: %d\n",
 				       qdev->ndev->name, err);
@@ -2916,7 +2916,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev)
 					     QL_HEADER_SPACE,
 					     PCI_DMA_FROMDEVICE);
 
-			err = pci_dma_mapping_error(map);
+			err = pci_dma_mapping_error(qdev->pdev, map);
 			if(err) {
 				printk(KERN_ERR "%s: PCI mapping failed with error: %d\n",
 				       qdev->ndev->name, err);
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 9dae40ccf048..86d77d05190a 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -2512,8 +2512,8 @@ static void stop_nic(struct s2io_nic *nic)
  *   Return Value:
  *  SUCCESS on success or an appropriate -ve value on failure.
  */
-
-static int fill_rx_buffers(struct ring_info *ring, int from_card_up)
+static int fill_rx_buffers(struct s2io_nic *nic, struct ring_info *ring,
+				int from_card_up)
 {
 	struct sk_buff *skb;
 	struct RxD_t *rxdp;
@@ -2602,7 +2602,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up)
 			rxdp1->Buffer0_ptr = pci_map_single
 			    (ring->pdev, skb->data, size - NET_IP_ALIGN,
 				PCI_DMA_FROMDEVICE);
-			if(pci_dma_mapping_error(rxdp1->Buffer0_ptr))
+			if (pci_dma_mapping_error(nic->pdev,
+						rxdp1->Buffer0_ptr))
 				goto pci_map_failed;
 
 			rxdp->Control_2 =
@@ -2636,7 +2637,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up)
 				rxdp3->Buffer0_ptr =
 				   pci_map_single(ring->pdev, ba->ba_0,
 					BUF0_LEN, PCI_DMA_FROMDEVICE);
-				if (pci_dma_mapping_error(rxdp3->Buffer0_ptr))
+			if (pci_dma_mapping_error(nic->pdev,
+						rxdp3->Buffer0_ptr))
 					goto pci_map_failed;
 			} else
 				pci_dma_sync_single_for_device(ring->pdev,
@@ -2655,7 +2657,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up)
 				(ring->pdev, skb->data, ring->mtu + 4,
 						PCI_DMA_FROMDEVICE);
 
-				if (pci_dma_mapping_error(rxdp3->Buffer2_ptr))
+				if (pci_dma_mapping_error(nic->pdev,
+							rxdp3->Buffer2_ptr))
 					goto pci_map_failed;
 
 				if (from_card_up) {
@@ -2664,8 +2667,8 @@ static int fill_rx_buffers(struct ring_info *ring, int from_card_up)
 						ba->ba_1, BUF1_LEN,
 						PCI_DMA_FROMDEVICE);
 
-					if (pci_dma_mapping_error
-						(rxdp3->Buffer1_ptr)) {
+					if (pci_dma_mapping_error(nic->pdev,
+						rxdp3->Buffer1_ptr)) {
 						pci_unmap_single
 							(ring->pdev,
 						    (dma_addr_t)(unsigned long)
@@ -2806,9 +2809,9 @@ static void free_rx_buffers(struct s2io_nic *sp)
 	}
 }
 
-static int s2io_chk_rx_buffers(struct ring_info *ring)
+static int s2io_chk_rx_buffers(struct s2io_nic *nic, struct ring_info *ring)
 {
-	if (fill_rx_buffers(ring, 0) == -ENOMEM) {
+	if (fill_rx_buffers(nic, ring, 0) == -ENOMEM) {
 		DBG_PRINT(INFO_DBG, "%s:Out of memory", ring->dev->name);
 		DBG_PRINT(INFO_DBG, " in Rx Intr!!\n");
 	}
@@ -2848,7 +2851,7 @@ static int s2io_poll_msix(struct napi_struct *napi, int budget)
 		return 0;
 
 	pkts_processed = rx_intr_handler(ring, budget);
-	s2io_chk_rx_buffers(ring);
+	s2io_chk_rx_buffers(nic, ring);
 
 	if (pkts_processed < budget_org) {
 		netif_rx_complete(dev, napi);
@@ -2882,7 +2885,7 @@ static int s2io_poll_inta(struct napi_struct *napi, int budget)
 	for (i = 0; i < config->rx_ring_num; i++) {
 		ring = &mac_control->rings[i];
 		ring_pkts_processed = rx_intr_handler(ring, budget);
-		s2io_chk_rx_buffers(ring);
+		s2io_chk_rx_buffers(nic, ring);
 		pkts_processed += ring_pkts_processed;
 		budget -= ring_pkts_processed;
 		if (budget <= 0)
@@ -2939,7 +2942,8 @@ static void s2io_netpoll(struct net_device *dev)
 		rx_intr_handler(&mac_control->rings[i], 0);
 
 	for (i = 0; i < config->rx_ring_num; i++) {
-		if (fill_rx_buffers(&mac_control->rings[i], 0) == -ENOMEM) {
+		if (fill_rx_buffers(nic, &mac_control->rings[i], 0) ==
+				-ENOMEM) {
 			DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name);
 			DBG_PRINT(INFO_DBG, " in Rx Netpoll!!\n");
 			break;
@@ -4235,14 +4239,14 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 		txdp->Buffer_Pointer = pci_map_single(sp->pdev,
 					fifo->ufo_in_band_v,
 					sizeof(u64), PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(txdp->Buffer_Pointer))
+		if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer))
 			goto pci_map_failed;
 		txdp++;
 	}
 
 	txdp->Buffer_Pointer = pci_map_single
 	    (sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(txdp->Buffer_Pointer))
+	if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer))
 		goto pci_map_failed;
 
 	txdp->Host_Control = (unsigned long) skb;
@@ -4345,7 +4349,7 @@ static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id)
 		netif_rx_schedule(dev, &ring->napi);
 	} else {
 		rx_intr_handler(ring, 0);
-		s2io_chk_rx_buffers(ring);
+		s2io_chk_rx_buffers(sp, ring);
 	}
 
 	return IRQ_HANDLED;
@@ -4826,7 +4830,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id)
 		 */
 		if (!config->napi) {
 			for (i = 0; i < config->rx_ring_num; i++)
-				s2io_chk_rx_buffers(&mac_control->rings[i]);
+				s2io_chk_rx_buffers(sp, &mac_control->rings[i]);
 		}
 		writeq(sp->general_int_mask, &bar0->general_int_mask);
 		readl(&bar0->general_int_status);
@@ -6859,7 +6863,7 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp,
 				pci_map_single( sp->pdev, (*skb)->data,
 					size - NET_IP_ALIGN,
 					PCI_DMA_FROMDEVICE);
-			if (pci_dma_mapping_error(rxdp1->Buffer0_ptr))
+			if (pci_dma_mapping_error(sp->pdev, rxdp1->Buffer0_ptr))
 				goto memalloc_failed;
 			rxdp->Host_Control = (unsigned long) (*skb);
 		}
@@ -6886,12 +6890,13 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp,
 				pci_map_single(sp->pdev, (*skb)->data,
 					       dev->mtu + 4,
 					       PCI_DMA_FROMDEVICE);
-			if (pci_dma_mapping_error(rxdp3->Buffer2_ptr))
+			if (pci_dma_mapping_error(sp->pdev, rxdp3->Buffer2_ptr))
 				goto memalloc_failed;
 			rxdp3->Buffer0_ptr = *temp0 =
 				pci_map_single( sp->pdev, ba->ba_0, BUF0_LEN,
 						PCI_DMA_FROMDEVICE);
-			if (pci_dma_mapping_error(rxdp3->Buffer0_ptr)) {
+			if (pci_dma_mapping_error(sp->pdev,
+						rxdp3->Buffer0_ptr)) {
 				pci_unmap_single (sp->pdev,
 					(dma_addr_t)rxdp3->Buffer2_ptr,
 					dev->mtu + 4, PCI_DMA_FROMDEVICE);
@@ -6903,7 +6908,8 @@ static int set_rxd_buffer_pointer(struct s2io_nic *sp, struct RxD_t *rxdp,
 			rxdp3->Buffer1_ptr = *temp1 =
 				pci_map_single(sp->pdev, ba->ba_1, BUF1_LEN,
 						PCI_DMA_FROMDEVICE);
-			if (pci_dma_mapping_error(rxdp3->Buffer1_ptr)) {
+			if (pci_dma_mapping_error(sp->pdev,
+						rxdp3->Buffer1_ptr)) {
 				pci_unmap_single (sp->pdev,
 					(dma_addr_t)rxdp3->Buffer0_ptr,
 					BUF0_LEN, PCI_DMA_FROMDEVICE);
@@ -7187,7 +7193,7 @@ static int s2io_card_up(struct s2io_nic * sp)
 
 	for (i = 0; i < config->rx_ring_num; i++) {
 		mac_control->rings[i].mtu = dev->mtu;
-		ret = fill_rx_buffers(&mac_control->rings[i], 1);
+		ret = fill_rx_buffers(sp, &mac_control->rings[i], 1);
 		if (ret) {
 			DBG_PRINT(ERR_DBG, "%s: Out of memory in Open\n",
 				  dev->name);
diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c
index 601b001437c0..0d27dd39bc09 100644
--- a/drivers/net/sfc/rx.c
+++ b/drivers/net/sfc/rx.c
@@ -233,7 +233,7 @@ static inline int efx_init_rx_buffer_skb(struct efx_rx_queue *rx_queue,
 					  rx_buf->data, rx_buf->len,
 					  PCI_DMA_FROMDEVICE);
 
-	if (unlikely(pci_dma_mapping_error(rx_buf->dma_addr))) {
+	if (unlikely(pci_dma_mapping_error(efx->pci_dev, rx_buf->dma_addr))) {
 		dev_kfree_skb_any(rx_buf->skb);
 		rx_buf->skb = NULL;
 		return -EIO;
@@ -275,7 +275,7 @@ static inline int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue,
 					0, efx_rx_buf_size(efx),
 					PCI_DMA_FROMDEVICE);
 
-		if (unlikely(pci_dma_mapping_error(dma_addr))) {
+		if (unlikely(pci_dma_mapping_error(efx->pci_dev, dma_addr))) {
 			__free_pages(rx_buf->page, efx->rx_buffer_order);
 			rx_buf->page = NULL;
 			return -EIO;
diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c
index 5cdd082ab8f6..5e8374ab28ee 100644
--- a/drivers/net/sfc/tx.c
+++ b/drivers/net/sfc/tx.c
@@ -172,7 +172,7 @@ static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
 
 	/* Process all fragments */
 	while (1) {
-		if (unlikely(pci_dma_mapping_error(dma_addr)))
+		if (unlikely(pci_dma_mapping_error(pci_dev, dma_addr)))
 			goto pci_err;
 
 		/* Store fields for marking in the per-fragment final
@@ -661,7 +661,8 @@ efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len)
 	tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev,
 					TSOH_BUFFER(tsoh), header_len,
 					PCI_DMA_TODEVICE);
-	if (unlikely(pci_dma_mapping_error(tsoh->dma_addr))) {
+	if (unlikely(pci_dma_mapping_error(tx_queue->efx->pci_dev,
+					   tsoh->dma_addr))) {
 		kfree(tsoh);
 		return NULL;
 	}
@@ -863,7 +864,7 @@ static inline int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
 
 	st->ifc.unmap_addr = pci_map_page(efx->pci_dev, page, page_off,
 					  len, PCI_DMA_TODEVICE);
-	if (likely(!pci_dma_mapping_error(st->ifc.unmap_addr))) {
+	if (likely(!pci_dma_mapping_error(efx->pci_dev, st->ifc.unmap_addr))) {
 		st->ifc.unmap_len = len;
 		st->ifc.len = len;
 		st->ifc.dma_addr = st->ifc.unmap_addr;
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 00aa0b108cb9..b6435d0d71f9 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -452,7 +452,7 @@ spider_net_prepare_rx_descr(struct spider_net_card *card,
 	/* iommu-map the skb */
 	buf = pci_map_single(card->pdev, descr->skb->data,
 			SPIDER_NET_MAX_FRAME, PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(buf)) {
+	if (pci_dma_mapping_error(card->pdev, buf)) {
 		dev_kfree_skb_any(descr->skb);
 		descr->skb = NULL;
 		if (netif_msg_rx_err(card) && net_ratelimit())
@@ -691,7 +691,7 @@ spider_net_prepare_tx_descr(struct spider_net_card *card,
 	unsigned long flags;
 
 	buf = pci_map_single(card->pdev, skb->data, skb->len, PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(buf)) {
+	if (pci_dma_mapping_error(card->pdev, buf)) {
 		if (netif_msg_tx_err(card) && net_ratelimit())
 			dev_err(&card->netdev->dev, "could not iommu-map packet (%p, %i). "
 				  "Dropping packet\n", skb->data, skb->len);
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index a645e5028c14..8487ace9d2e3 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -506,7 +506,7 @@ static void *alloc_rxbuf_page(struct pci_dev *hwdev, dma_addr_t *dma_handle)
 		return NULL;
 	*dma_handle = pci_map_single(hwdev, buf, PAGE_SIZE,
 				     PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(*dma_handle)) {
+	if (pci_dma_mapping_error(hwdev, *dma_handle)) {
 		free_page((unsigned long)buf);
 		return NULL;
 	}
@@ -536,7 +536,7 @@ static struct sk_buff *alloc_rxbuf_skb(struct net_device *dev,
 		return NULL;
 	*dma_handle = pci_map_single(hwdev, skb->data, RX_BUF_SIZE,
 				     PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(*dma_handle)) {
+	if (pci_dma_mapping_error(hwdev, *dma_handle)) {
 		dev_kfree_skb_any(skb);
 		return NULL;
 	}
diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 217d506527a9..d9769c527346 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -1166,7 +1166,7 @@ ath5k_rxbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 		bf->skb = skb;
 		bf->skbaddr = pci_map_single(sc->pdev,
 			skb->data, sc->rxbufsize, PCI_DMA_FROMDEVICE);
-		if (unlikely(pci_dma_mapping_error(bf->skbaddr))) {
+		if (unlikely(pci_dma_mapping_error(sc->pdev, bf->skbaddr))) {
 			ATH5K_ERR(sc, "%s: DMA mapping failed\n", __func__);
 			dev_kfree_skb(skb);
 			bf->skb = NULL;
@@ -1918,7 +1918,7 @@ ath5k_beacon_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 	ATH5K_DBG(sc, ATH5K_DEBUG_BEACON, "skb %p [data %p len %u] "
 			"skbaddr %llx\n", skb, skb->data, skb->len,
 			(unsigned long long)bf->skbaddr);
-	if (pci_dma_mapping_error(bf->skbaddr)) {
+	if (pci_dma_mapping_error(sc->pdev, bf->skbaddr)) {
 		ATH5K_ERR(sc, "beacon DMA mapping failed\n");
 		return -EIO;
 	}
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index c4a7c06793c5..61f8fdea2d96 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -3525,7 +3525,7 @@ static int ibmvfc_init_crq(struct ibmvfc_host *vhost)
 	crq->msg_token = dma_map_single(dev, crq->msgs,
 					PAGE_SIZE, DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(crq->msg_token))
+	if (dma_mapping_error(dev, crq->msg_token))
 		goto map_failed;
 
 	retrc = rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
@@ -3618,7 +3618,7 @@ static int ibmvfc_alloc_mem(struct ibmvfc_host *vhost)
 					    async_q->size * sizeof(*async_q->msgs),
 					    DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(async_q->msg_token)) {
+	if (dma_mapping_error(dev, async_q->msg_token)) {
 		dev_err(dev, "Failed to map async queue\n");
 		goto free_async_crq;
 	}
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 20000ec79b04..6b24b9cdb04c 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -859,7 +859,7 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
 					    sizeof(hostdata->madapter_info),
 					    DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(req->buffer)) {
+	if (dma_mapping_error(hostdata->dev, req->buffer)) {
 		if (!firmware_has_feature(FW_FEATURE_CMO))
 			dev_err(hostdata->dev,
 			        "Unable to map request_buffer for "
@@ -1407,7 +1407,7 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata,
 						    length,
 						    DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(host_config->buffer)) {
+	if (dma_mapping_error(hostdata->dev, host_config->buffer)) {
 		if (!firmware_has_feature(FW_FEATURE_CMO))
 			dev_err(hostdata->dev,
 			        "dma_mapping error getting host config\n");
diff --git a/drivers/scsi/ibmvscsi/ibmvstgt.c b/drivers/scsi/ibmvscsi/ibmvstgt.c
index 3b9514c8f1f1..2e13ec00172a 100644
--- a/drivers/scsi/ibmvscsi/ibmvstgt.c
+++ b/drivers/scsi/ibmvscsi/ibmvstgt.c
@@ -564,7 +564,7 @@ static int crq_queue_create(struct crq_queue *queue, struct srp_target *target)
 					  queue->size * sizeof(*queue->msgs),
 					  DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(queue->msg_token))
+	if (dma_mapping_error(target->dev, queue->msg_token))
 		goto map_failed;
 
 	err = h_reg_crq(vport->dma_dev->unit_address, queue->msg_token,
diff --git a/drivers/scsi/ibmvscsi/rpa_vscsi.c b/drivers/scsi/ibmvscsi/rpa_vscsi.c
index 182146100dc1..462a8574dad9 100644
--- a/drivers/scsi/ibmvscsi/rpa_vscsi.c
+++ b/drivers/scsi/ibmvscsi/rpa_vscsi.c
@@ -253,7 +253,7 @@ static int rpavscsi_init_crq_queue(struct crq_queue *queue,
 					  queue->size * sizeof(*queue->msgs),
 					  DMA_BIDIRECTIONAL);
 
-	if (dma_mapping_error(queue->msg_token))
+	if (dma_mapping_error(hostdata->dev, queue->msg_token))
 		goto map_failed;
 
 	gather_partition_info();
diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c
index e81d59d78910..0c7165660853 100644
--- a/drivers/spi/atmel_spi.c
+++ b/drivers/spi/atmel_spi.c
@@ -313,14 +313,14 @@ atmel_spi_dma_map_xfer(struct atmel_spi *as, struct spi_transfer *xfer)
 		xfer->tx_dma = dma_map_single(dev,
 				(void *) xfer->tx_buf, xfer->len,
 				DMA_TO_DEVICE);
-		if (dma_mapping_error(xfer->tx_dma))
+		if (dma_mapping_error(dev, xfer->tx_dma))
 			return -ENOMEM;
 	}
 	if (xfer->rx_buf) {
 		xfer->rx_dma = dma_map_single(dev,
 				xfer->rx_buf, xfer->len,
 				DMA_FROM_DEVICE);
-		if (dma_mapping_error(xfer->rx_dma)) {
+		if (dma_mapping_error(dev, xfer->rx_dma)) {
 			if (xfer->tx_buf)
 				dma_unmap_single(dev,
 						xfer->tx_dma, xfer->len,
diff --git a/drivers/spi/au1550_spi.c b/drivers/spi/au1550_spi.c
index 9149689c79d9..87b73e0169c5 100644
--- a/drivers/spi/au1550_spi.c
+++ b/drivers/spi/au1550_spi.c
@@ -334,7 +334,7 @@ static int au1550_spi_dma_rxtmp_alloc(struct au1550_spi *hw, unsigned size)
 	hw->dma_rx_tmpbuf_size = size;
 	hw->dma_rx_tmpbuf_addr = dma_map_single(hw->dev, hw->dma_rx_tmpbuf,
 			size, DMA_FROM_DEVICE);
-	if (dma_mapping_error(hw->dma_rx_tmpbuf_addr)) {
+	if (dma_mapping_error(hw->dev, hw->dma_rx_tmpbuf_addr)) {
 		kfree(hw->dma_rx_tmpbuf);
 		hw->dma_rx_tmpbuf = 0;
 		hw->dma_rx_tmpbuf_size = 0;
@@ -378,7 +378,7 @@ static int au1550_spi_dma_txrxb(struct spi_device *spi, struct spi_transfer *t)
 			dma_rx_addr = dma_map_single(hw->dev,
 					(void *)t->rx_buf,
 					t->len, DMA_FROM_DEVICE);
-			if (dma_mapping_error(dma_rx_addr))
+			if (dma_mapping_error(hw->dev, dma_rx_addr))
 				dev_err(hw->dev, "rx dma map error\n");
 		}
 	} else {
@@ -401,7 +401,7 @@ static int au1550_spi_dma_txrxb(struct spi_device *spi, struct spi_transfer *t)
 			dma_tx_addr = dma_map_single(hw->dev,
 					(void *)t->tx_buf,
 					t->len, DMA_TO_DEVICE);
-			if (dma_mapping_error(dma_tx_addr))
+			if (dma_mapping_error(hw->dev, dma_tx_addr))
 				dev_err(hw->dev, "tx dma map error\n");
 		}
 	} else {
diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index b1cc148036c1..f6f987bb71ca 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -836,7 +836,7 @@ static int omap2_mcspi_transfer(struct spi_device *spi, struct spi_message *m)
 		if (tx_buf != NULL) {
 			t->tx_dma = dma_map_single(&spi->dev, (void *) tx_buf,
 					len, DMA_TO_DEVICE);
-			if (dma_mapping_error(t->tx_dma)) {
+			if (dma_mapping_error(&spi->dev, t->tx_dma)) {
 				dev_dbg(&spi->dev, "dma %cX %d bytes error\n",
 						'T', len);
 				return -EINVAL;
@@ -845,7 +845,7 @@ static int omap2_mcspi_transfer(struct spi_device *spi, struct spi_message *m)
 		if (rx_buf != NULL) {
 			t->rx_dma = dma_map_single(&spi->dev, rx_buf, t->len,
 					DMA_FROM_DEVICE);
-			if (dma_mapping_error(t->rx_dma)) {
+			if (dma_mapping_error(&spi->dev, t->rx_dma)) {
 				dev_dbg(&spi->dev, "dma %cX %d bytes error\n",
 						'R', len);
 				if (tx_buf != NULL)
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 0c452c46ab07..067299d6d192 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -353,7 +353,7 @@ static int map_dma_buffers(struct driver_data *drv_data)
 	drv_data->rx_dma = dma_map_single(dev, drv_data->rx,
 						drv_data->rx_map_len,
 						DMA_FROM_DEVICE);
-	if (dma_mapping_error(drv_data->rx_dma))
+	if (dma_mapping_error(dev, drv_data->rx_dma))
 		return 0;
 
 	/* Stream map the tx buffer */
@@ -361,7 +361,7 @@ static int map_dma_buffers(struct driver_data *drv_data)
 						drv_data->tx_map_len,
 						DMA_TO_DEVICE);
 
-	if (dma_mapping_error(drv_data->tx_dma)) {
+	if (dma_mapping_error(dev, drv_data->tx_dma)) {
 		dma_unmap_single(dev, drv_data->rx_dma,
 					drv_data->rx_map_len, DMA_FROM_DEVICE);
 		return 0;
diff --git a/drivers/spi/spi_imx.c b/drivers/spi/spi_imx.c
index 54ac7bea5f8c..6fb77fcc4971 100644
--- a/drivers/spi/spi_imx.c
+++ b/drivers/spi/spi_imx.c
@@ -491,7 +491,7 @@ static int map_dma_buffers(struct driver_data *drv_data)
 							buf,
 							drv_data->tx_map_len,
 							DMA_TO_DEVICE);
-			if (dma_mapping_error(drv_data->tx_dma))
+			if (dma_mapping_error(dev, drv_data->tx_dma))
 				return -1;
 
 			drv_data->tx_dma_needs_unmap = 1;
@@ -516,7 +516,7 @@ static int map_dma_buffers(struct driver_data *drv_data)
 					buf,
 					drv_data->len,
 					DMA_FROM_DEVICE);
-		if (dma_mapping_error(drv_data->rx_dma))
+		if (dma_mapping_error(dev, drv_data->rx_dma))
 			return -1;
 		drv_data->rx_dma_needs_unmap = 1;
 	}
@@ -534,7 +534,7 @@ static int map_dma_buffers(struct driver_data *drv_data)
 					buf,
 					drv_data->tx_map_len,
 					DMA_TO_DEVICE);
-	if (dma_mapping_error(drv_data->tx_dma)) {
+	if (dma_mapping_error(dev, drv_data->tx_dma)) {
 		if (drv_data->rx_dma) {
 			dma_unmap_single(dev,
 					drv_data->rx_dma,
diff --git a/include/asm-alpha/dma-mapping.h b/include/asm-alpha/dma-mapping.h
index db351d1296f4..a5801ae02e4b 100644
--- a/include/asm-alpha/dma-mapping.h
+++ b/include/asm-alpha/dma-mapping.h
@@ -24,8 +24,8 @@
 		pci_unmap_sg(alpha_gendev_to_pci(dev), sg, nents, dir)
 #define dma_supported(dev, mask)			\
 		pci_dma_supported(alpha_gendev_to_pci(dev), mask)
-#define dma_mapping_error(addr)				\
-		pci_dma_mapping_error(addr)
+#define dma_mapping_error(dev, addr)				\
+		pci_dma_mapping_error(alpha_gendev_to_pci(dev), addr)
 
 #else	/* no PCI - no IOMMU. */
 
@@ -45,7 +45,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 #define dma_unmap_page(dev, addr, size, dir)	((void)0)
 #define dma_unmap_sg(dev, sg, nents, dir)	((void)0)
 
-#define dma_mapping_error(addr)  (0)
+#define dma_mapping_error(dev, addr)  (0)
 
 #endif	/* !CONFIG_PCI */
 
diff --git a/include/asm-alpha/pci.h b/include/asm-alpha/pci.h
index d31fd49ff79a..2a14302c17a3 100644
--- a/include/asm-alpha/pci.h
+++ b/include/asm-alpha/pci.h
@@ -106,7 +106,7 @@ extern dma_addr_t pci_map_page(struct pci_dev *, struct page *,
 /* Test for pci_map_single or pci_map_page having generated an error.  */
 
 static inline int
-pci_dma_mapping_error(dma_addr_t dma_addr)
+pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr)
 {
 	return dma_addr == 0;
 }
diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h
index e99406a7bece..f41335ba6337 100644
--- a/include/asm-arm/dma-mapping.h
+++ b/include/asm-arm/dma-mapping.h
@@ -56,7 +56,7 @@ static inline int dma_is_consistent(struct device *dev, dma_addr_t handle)
 /*
  * DMA errors are defined by all-bits-set in the DMA address.
  */
-static inline int dma_mapping_error(dma_addr_t dma_addr)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return dma_addr == ~0;
 }
diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h
index 57dc672bab8e..0399359ab5d8 100644
--- a/include/asm-avr32/dma-mapping.h
+++ b/include/asm-avr32/dma-mapping.h
@@ -35,7 +35,7 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask)
 /*
  * dma_map_single can't fail as it is implemented now.
  */
-static inline int dma_mapping_error(dma_addr_t addr)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t addr)
 {
 	return 0;
 }
diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h
index edc8d1bfaae2..cb2fb25ff8d9 100644
--- a/include/asm-cris/dma-mapping.h
+++ b/include/asm-cris/dma-mapping.h
@@ -120,7 +120,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
 }
 
 static inline int
-dma_mapping_error(dma_addr_t dma_addr)
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/include/asm-frv/dma-mapping.h b/include/asm-frv/dma-mapping.h
index 2e8966ca030d..b2898877c07b 100644
--- a/include/asm-frv/dma-mapping.h
+++ b/include/asm-frv/dma-mapping.h
@@ -126,7 +126,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nele
 }
 
 static inline
-int dma_mapping_error(dma_addr_t dma_addr)
+int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/include/asm-generic/dma-mapping-broken.h b/include/asm-generic/dma-mapping-broken.h
index e2468f894d2a..82cd0cb1c3fe 100644
--- a/include/asm-generic/dma-mapping-broken.h
+++ b/include/asm-generic/dma-mapping-broken.h
@@ -61,7 +61,7 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
 #define dma_sync_sg_for_device dma_sync_sg_for_cpu
 
 extern int
-dma_mapping_error(dma_addr_t dma_addr);
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 
 extern int
 dma_supported(struct device *dev, u64 mask);
diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h
index 783ab9944d70..189486c3f92e 100644
--- a/include/asm-generic/dma-mapping.h
+++ b/include/asm-generic/dma-mapping.h
@@ -144,9 +144,9 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
 }
 
 static inline int
-dma_mapping_error(dma_addr_t dma_addr)
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
-	return pci_dma_mapping_error(dma_addr);
+	return pci_dma_mapping_error(to_pci_dev(dev), dma_addr);
 }
 
 
diff --git a/include/asm-generic/pci-dma-compat.h b/include/asm-generic/pci-dma-compat.h
index 25c10e96b2b7..37b3706226e7 100644
--- a/include/asm-generic/pci-dma-compat.h
+++ b/include/asm-generic/pci-dma-compat.h
@@ -99,9 +99,9 @@ pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg,
 }
 
 static inline int
-pci_dma_mapping_error(dma_addr_t dma_addr)
+pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr)
 {
-	return dma_mapping_error(dma_addr);
+	return dma_mapping_error(&pdev->dev, dma_addr);
 }
 
 #endif
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h
index 0721a5e8271e..a6d50c77b6bf 100644
--- a/include/asm-ia64/machvec.h
+++ b/include/asm-ia64/machvec.h
@@ -54,7 +54,7 @@ typedef void ia64_mv_dma_sync_single_for_cpu (struct device *, dma_addr_t, size_
 typedef void ia64_mv_dma_sync_sg_for_cpu (struct device *, struct scatterlist *, int, int);
 typedef void ia64_mv_dma_sync_single_for_device (struct device *, dma_addr_t, size_t, int);
 typedef void ia64_mv_dma_sync_sg_for_device (struct device *, struct scatterlist *, int, int);
-typedef int ia64_mv_dma_mapping_error (dma_addr_t dma_addr);
+typedef int ia64_mv_dma_mapping_error(struct device *, dma_addr_t dma_addr);
 typedef int ia64_mv_dma_supported (struct device *, u64);
 
 typedef dma_addr_t ia64_mv_dma_map_single_attrs (struct device *, void *, size_t, int, struct dma_attrs *);
diff --git a/include/asm-m68k/dma-mapping.h b/include/asm-m68k/dma-mapping.h
index a26cdeb46a57..91f7944333d4 100644
--- a/include/asm-m68k/dma-mapping.h
+++ b/include/asm-m68k/dma-mapping.h
@@ -84,7 +84,7 @@ static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *s
 {
 }
 
-static inline int dma_mapping_error(dma_addr_t handle)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t handle)
 {
 	return 0;
 }
diff --git a/include/asm-mips/dma-mapping.h b/include/asm-mips/dma-mapping.h
index 230b3f1b69b1..c64afb40cd06 100644
--- a/include/asm-mips/dma-mapping.h
+++ b/include/asm-mips/dma-mapping.h
@@ -42,7 +42,7 @@ extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 	int nelems, enum dma_data_direction direction);
 extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 	int nelems, enum dma_data_direction direction);
-extern int dma_mapping_error(dma_addr_t dma_addr);
+extern int dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 extern int dma_supported(struct device *dev, u64 mask);
 
 static inline int
diff --git a/include/asm-mn10300/dma-mapping.h b/include/asm-mn10300/dma-mapping.h
index 7c882fca9ec8..ccae8f6c6326 100644
--- a/include/asm-mn10300/dma-mapping.h
+++ b/include/asm-mn10300/dma-mapping.h
@@ -182,7 +182,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 }
 
 static inline
-int dma_mapping_error(dma_addr_t dma_addr)
+int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/include/asm-parisc/dma-mapping.h b/include/asm-parisc/dma-mapping.h
index c6c0e9ff6bde..53af696f23d2 100644
--- a/include/asm-parisc/dma-mapping.h
+++ b/include/asm-parisc/dma-mapping.h
@@ -248,6 +248,6 @@ void * sba_get_iommu(struct parisc_device *dev);
 #endif
 
 /* At the moment, we panic on error for IOMMU resource exaustion */
-#define dma_mapping_error(x)	0
+#define dma_mapping_error(dev, x)	0
 
 #endif
diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h
index 74c549780987..c7ca45f97dd2 100644
--- a/include/asm-powerpc/dma-mapping.h
+++ b/include/asm-powerpc/dma-mapping.h
@@ -415,7 +415,7 @@ static inline void dma_sync_sg_for_device(struct device *dev,
 		__dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
 }
 
-static inline int dma_mapping_error(dma_addr_t dma_addr)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 #ifdef CONFIG_PPC64
 	return (dma_addr == DMA_ERROR_CODE);
diff --git a/include/asm-sh/dma-mapping.h b/include/asm-sh/dma-mapping.h
index 22cc419389fe..6c0b8a2de143 100644
--- a/include/asm-sh/dma-mapping.h
+++ b/include/asm-sh/dma-mapping.h
@@ -171,7 +171,7 @@ static inline int dma_get_cache_alignment(void)
 	return L1_CACHE_BYTES;
 }
 
-static inline int dma_mapping_error(dma_addr_t dma_addr)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return dma_addr == 0;
 }
diff --git a/include/asm-sparc/dma-mapping_64.h b/include/asm-sparc/dma-mapping_64.h
index 38cbec76a33f..bfa64f9702d5 100644
--- a/include/asm-sparc/dma-mapping_64.h
+++ b/include/asm-sparc/dma-mapping_64.h
@@ -135,7 +135,7 @@ static inline void dma_sync_sg_for_device(struct device *dev,
 	/* No flushing needed to sync cpu writes to the device.  */
 }
 
-static inline int dma_mapping_error(dma_addr_t dma_addr)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return (dma_addr == DMA_ERROR_CODE);
 }
diff --git a/include/asm-sparc/pci_32.h b/include/asm-sparc/pci_32.h
index b93b6c79e08f..0ee949d220c0 100644
--- a/include/asm-sparc/pci_32.h
+++ b/include/asm-sparc/pci_32.h
@@ -154,7 +154,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 
 #define PCI_DMA_ERROR_CODE      (~(dma_addr_t)0x0)
 
-static inline int pci_dma_mapping_error(dma_addr_t dma_addr)
+static inline int pci_dma_mapping_error(struct pci_dev *pdev,
+					dma_addr_t dma_addr)
 {
         return (dma_addr == PCI_DMA_ERROR_CODE);
 }
diff --git a/include/asm-sparc/pci_64.h b/include/asm-sparc/pci_64.h
index f59f2571295b..4f79a54948f6 100644
--- a/include/asm-sparc/pci_64.h
+++ b/include/asm-sparc/pci_64.h
@@ -140,9 +140,10 @@ extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask);
 #define PCI64_REQUIRED_MASK	(~(dma64_addr_t)0)
 #define PCI64_ADDR_BASE		0xfffc000000000000UL
 
-static inline int pci_dma_mapping_error(dma_addr_t dma_addr)
+static inline int pci_dma_mapping_error(struct pci_dev *pdev,
+					dma_addr_t dma_addr)
 {
-	return dma_mapping_error(dma_addr);
+	return dma_mapping_error(&pdev->dev, dma_addr);
 }
 
 #ifdef CONFIG_PCI
diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h
index 87a715367a1b..3c034f48fdb0 100644
--- a/include/asm-x86/device.h
+++ b/include/asm-x86/device.h
@@ -5,6 +5,9 @@ struct dev_archdata {
 #ifdef CONFIG_ACPI
 	void	*acpi_handle;
 #endif
+#ifdef CONFIG_X86_64
+struct dma_mapping_ops *dma_ops;
+#endif
 #ifdef CONFIG_DMAR
 	void *iommu; /* hook for IOMMU specific extension */
 #endif
diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h
index c2ddd3d1b883..0eaa9bf6011f 100644
--- a/include/asm-x86/dma-mapping.h
+++ b/include/asm-x86/dma-mapping.h
@@ -17,7 +17,8 @@ extern int panic_on_overflow;
 extern int force_iommu;
 
 struct dma_mapping_ops {
-	int             (*mapping_error)(dma_addr_t dma_addr);
+	int             (*mapping_error)(struct device *dev,
+					 dma_addr_t dma_addr);
 	void*           (*alloc_coherent)(struct device *dev, size_t size,
 				dma_addr_t *dma_handle, gfp_t gfp);
 	void            (*free_coherent)(struct device *dev, size_t size,
@@ -56,14 +57,32 @@ struct dma_mapping_ops {
 	int		is_phys;
 };
 
-extern const struct dma_mapping_ops *dma_ops;
+extern struct dma_mapping_ops *dma_ops;
 
-static inline int dma_mapping_error(dma_addr_t dma_addr)
+static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
 {
-	if (dma_ops->mapping_error)
-		return dma_ops->mapping_error(dma_addr);
+#ifdef CONFIG_X86_32
+	return dma_ops;
+#else
+	if (unlikely(!dev) || !dev->archdata.dma_ops)
+		return dma_ops;
+	else
+		return dev->archdata.dma_ops;
+#endif
+}
+
+/* Make sure we keep the same behaviour */
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+#ifdef CONFIG_X86_32
+	return 0;
+#else
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+	if (ops->mapping_error)
+		return ops->mapping_error(dev, dma_addr);
 
 	return (dma_addr == bad_dma_address);
+#endif
 }
 
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
@@ -83,44 +102,53 @@ static inline dma_addr_t
 dma_map_single(struct device *hwdev, void *ptr, size_t size,
 	       int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	return dma_ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
+	return ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
 }
 
 static inline void
 dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
 		 int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->unmap_single)
-		dma_ops->unmap_single(dev, addr, size, direction);
+	if (ops->unmap_single)
+		ops->unmap_single(dev, addr, size, direction);
 }
 
 static inline int
 dma_map_sg(struct device *hwdev, struct scatterlist *sg,
 	   int nents, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	return dma_ops->map_sg(hwdev, sg, nents, direction);
+	return ops->map_sg(hwdev, sg, nents, direction);
 }
 
 static inline void
 dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
 	     int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->unmap_sg)
-		dma_ops->unmap_sg(hwdev, sg, nents, direction);
+	if (ops->unmap_sg)
+		ops->unmap_sg(hwdev, sg, nents, direction);
 }
 
 static inline void
 dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
 			size_t size, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_single_for_cpu)
-		dma_ops->sync_single_for_cpu(hwdev, dma_handle, size,
-					     direction);
+	if (ops->sync_single_for_cpu)
+		ops->sync_single_for_cpu(hwdev, dma_handle, size, direction);
 	flush_write_buffers();
 }
 
@@ -128,10 +156,11 @@ static inline void
 dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
 			   size_t size, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_single_for_device)
-		dma_ops->sync_single_for_device(hwdev, dma_handle, size,
-						direction);
+	if (ops->sync_single_for_device)
+		ops->sync_single_for_device(hwdev, dma_handle, size, direction);
 	flush_write_buffers();
 }
 
@@ -139,11 +168,12 @@ static inline void
 dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
 			      unsigned long offset, size_t size, int direction)
 {
-	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_single_range_for_cpu)
-		dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset,
-						   size, direction);
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
 
+	BUG_ON(!valid_dma_direction(direction));
+	if (ops->sync_single_range_for_cpu)
+		ops->sync_single_range_for_cpu(hwdev, dma_handle, offset,
+					       size, direction);
 	flush_write_buffers();
 }
 
@@ -152,11 +182,12 @@ dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
 				 unsigned long offset, size_t size,
 				 int direction)
 {
-	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_single_range_for_device)
-		dma_ops->sync_single_range_for_device(hwdev, dma_handle,
-						      offset, size, direction);
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
 
+	BUG_ON(!valid_dma_direction(direction));
+	if (ops->sync_single_range_for_device)
+		ops->sync_single_range_for_device(hwdev, dma_handle,
+						  offset, size, direction);
 	flush_write_buffers();
 }
 
@@ -164,9 +195,11 @@ static inline void
 dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
 		    int nelems, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_sg_for_cpu)
-		dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
+	if (ops->sync_sg_for_cpu)
+		ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
 	flush_write_buffers();
 }
 
@@ -174,9 +207,11 @@ static inline void
 dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 		       int nelems, int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	if (dma_ops->sync_sg_for_device)
-		dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction);
+	if (ops->sync_sg_for_device)
+		ops->sync_sg_for_device(hwdev, sg, nelems, direction);
 
 	flush_write_buffers();
 }
@@ -185,9 +220,11 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
 				      size_t offset, size_t size,
 				      int direction)
 {
+	struct dma_mapping_ops *ops = get_dma_ops(dev);
+
 	BUG_ON(!valid_dma_direction(direction));
-	return dma_ops->map_single(dev, page_to_phys(page)+offset,
-				   size, direction);
+	return ops->map_single(dev, page_to_phys(page) + offset,
+			       size, direction);
 }
 
 static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h
index c706a7442633..2730b351afcf 100644
--- a/include/asm-x86/swiotlb.h
+++ b/include/asm-x86/swiotlb.h
@@ -35,7 +35,7 @@ extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
 			  int nents, int direction);
 extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
 			     int nents, int direction);
-extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
+extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
 extern void swiotlb_free_coherent(struct device *hwdev, size_t size,
 				  void *vaddr, dma_addr_t dma_handle);
 extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
diff --git a/include/asm-xtensa/dma-mapping.h b/include/asm-xtensa/dma-mapping.h
index 3c7d537dd15d..51882ae3db4d 100644
--- a/include/asm-xtensa/dma-mapping.h
+++ b/include/asm-xtensa/dma-mapping.h
@@ -139,7 +139,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
 		consistent_sync(sg_virt(sg), sg->length, dir);
 }
 static inline int
-dma_mapping_error(dma_addr_t dma_addr)
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return 0;
 }
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 7d51cbca49ab..75ae6d8aba4f 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -758,7 +758,7 @@ static inline dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr,
 	}
 
 	dma_addr = dma_map_single(&c->pdev->dev, ptr, size, direction);
-	if (!dma_mapping_error(dma_addr)) {
+	if (!dma_mapping_error(&c->pdev->dev, dma_addr)) {
 #ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
 		if ((sizeof(dma_addr_t) > 4) && c->pae_support) {
 			*mptr++ = cpu_to_le32(0x7C020002);
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 4bf8cade9dbc..e530026eedf7 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -427,9 +427,9 @@ static inline int ssb_dma_mapping_error(struct ssb_device *dev, dma_addr_t addr)
 {
 	switch (dev->bus->bustype) {
 	case SSB_BUSTYPE_PCI:
-		return pci_dma_mapping_error(addr);
+		return pci_dma_mapping_error(dev->bus->host_pci, addr);
 	case SSB_BUSTYPE_SSB:
-		return dma_mapping_error(addr);
+		return dma_mapping_error(dev->dev, addr);
 	default:
 		__ssb_dma_not_implemented(dev);
 	}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 90b529f7a154..936e333e7ce5 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1590,7 +1590,7 @@ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
 {
 	if (dev->dma_ops)
 		return dev->dma_ops->mapping_error(dev, dma_addr);
-	return dma_mapping_error(dma_addr);
+	return dma_mapping_error(dev->dma_device, dma_addr);
 }
 
 /**
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d568894df8cc..977edbdbc1de 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -492,7 +492,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		 */
 		dma_addr_t handle;
 		handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
-		if (swiotlb_dma_mapping_error(handle))
+		if (swiotlb_dma_mapping_error(hwdev, handle))
 			return NULL;
 
 		ret = bus_to_virt(handle);
@@ -824,7 +824,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 }
 
 int
-swiotlb_dma_mapping_error(dma_addr_t dma_addr)
+swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 {
 	return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index a19b22b452a3..84d328329d98 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -169,7 +169,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 					  (void *)
 					  vec->sge[xdr_sge_no].iov_base + sge_off,
 					  sge_bytes, DMA_TO_DEVICE);
-		if (dma_mapping_error(sge[sge_no].addr))
+		if (dma_mapping_error(xprt->sc_cm_id->device->dma_device,
+					sge[sge_no].addr))
 			goto err;
 		sge_off = 0;
 		sge_no++;
-- 
cgit v1.2.3


From 929dfb24fbcd60e2544b2de7bfb4a68da4dfc747 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 19:44:54 -0700
Subject: parport/share.c: proper externs

This patch adds proper externs for parport_default_timeslice and
parport_default_spintime in include/linux/parport.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/parport/procfs.c | 3 ---
 include/linux/parport.h  | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index d950fc34320a..554e11f9e1ce 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -429,9 +429,6 @@ struct parport_default_sysctl_table
 	ctl_table dev_dir[2];
 };
 
-extern unsigned long parport_default_timeslice;
-extern int parport_default_spintime;
-
 static struct parport_default_sysctl_table
 parport_default_sysctl_table = {
 	.sysctl_header	= NULL,
diff --git a/include/linux/parport.h b/include/linux/parport.h
index dcb9e01a69ca..6a0d7cdb5774 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -560,5 +560,8 @@ extern int parport_device_proc_unregister(struct pardevice *device);
 
 #endif /*  !CONFIG_PARPORT_NOT_PC  */
 
+extern unsigned long parport_default_timeslice;
+extern int parport_default_spintime;
+
 #endif /* __KERNEL__ */
 #endif /* _PARPORT_H_ */
-- 
cgit v1.2.3


From b77899985bdfd85a8e5a6e485033a9b4713d2471 Mon Sep 17 00:00:00 2001
From: Alex Dubov <oakad@yahoo.com>
Date: Fri, 25 Jul 2008 19:45:00 -0700
Subject: memstick: allow "set_param" method to return an error code

Some controllers (Jmicron, for instance) can report temporal failure
condition during power-on.  It is desirable to account for this using a
return value of "set_param" device method.  The return value can also be
handy to distinguish between supported and unsupported device parameters
in run time.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alex Dubov <oakad@yahoo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/memstick/core/memstick.c  | 18 ++++++++---
 drivers/memstick/host/jmb38x_ms.c | 67 ++++++++++++++++++++++++++++-----------
 drivers/memstick/host/tifm_ms.c   | 17 +++++-----
 include/linux/memstick.h          |  2 +-
 4 files changed, 71 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index 61b98c333cb0..3c7d9a79c1ea 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -415,10 +415,14 @@ err_out:
 	return NULL;
 }
 
-static void memstick_power_on(struct memstick_host *host)
+static int memstick_power_on(struct memstick_host *host)
 {
-	host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_ON);
-	host->set_param(host, MEMSTICK_INTERFACE, MEMSTICK_SERIAL);
+	int rc = host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_ON);
+
+	if (!rc)
+		rc = host->set_param(host, MEMSTICK_INTERFACE, MEMSTICK_SERIAL);
+
+	return rc;
 }
 
 static void memstick_check(struct work_struct *work)
@@ -573,11 +577,15 @@ EXPORT_SYMBOL(memstick_suspend_host);
  */
 void memstick_resume_host(struct memstick_host *host)
 {
+	int rc = 0;
+
 	mutex_lock(&host->lock);
 	if (host->card)
-		memstick_power_on(host);
+		rc = memstick_power_on(host);
 	mutex_unlock(&host->lock);
-	memstick_detect_change(host);
+
+	if (!rc)
+		memstick_detect_change(host);
 }
 EXPORT_SYMBOL(memstick_resume_host);
 
diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c
index 4e3bfbcdf155..9d82e67737db 100644
--- a/drivers/memstick/host/jmb38x_ms.c
+++ b/drivers/memstick/host/jmb38x_ms.c
@@ -609,36 +609,68 @@ static void jmb38x_ms_request(struct memstick_host *msh)
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
-static void jmb38x_ms_reset(struct jmb38x_ms_host *host)
+static int jmb38x_ms_reset(struct jmb38x_ms_host *host)
 {
-	unsigned int host_ctl = readl(host->addr + HOST_CONTROL);
+	int cnt;
 
-	writel(HOST_CONTROL_RESET_REQ, host->addr + HOST_CONTROL);
+	writel(HOST_CONTROL_RESET_REQ | HOST_CONTROL_CLOCK_EN
+	       | readl(host->addr + HOST_CONTROL),
+	       host->addr + HOST_CONTROL);
+	mmiowb();
+
+	for (cnt = 0; cnt < 20; ++cnt) {
+		if (!(HOST_CONTROL_RESET_REQ
+		      & readl(host->addr + HOST_CONTROL)))
+			goto reset_next;
 
-	while (HOST_CONTROL_RESET_REQ
-	       & (host_ctl = readl(host->addr + HOST_CONTROL))) {
 		ndelay(20);
-		dev_dbg(&host->chip->pdev->dev, "reset %08x\n", host_ctl);
 	}
+	dev_dbg(&host->chip->pdev->dev, "reset_req timeout\n");
+	return -EIO;
 
-	writel(HOST_CONTROL_RESET, host->addr + HOST_CONTROL);
+reset_next:
+	writel(HOST_CONTROL_RESET | HOST_CONTROL_CLOCK_EN
+	       | readl(host->addr + HOST_CONTROL),
+	       host->addr + HOST_CONTROL);
+	mmiowb();
+
+	for (cnt = 0; cnt < 20; ++cnt) {
+		if (!(HOST_CONTROL_RESET
+		      & readl(host->addr + HOST_CONTROL)))
+			goto reset_ok;
+
+		ndelay(20);
+	}
+	dev_dbg(&host->chip->pdev->dev, "reset timeout\n");
+	return -EIO;
+
+reset_ok:
 	mmiowb();
 	writel(INT_STATUS_ALL, host->addr + INT_SIGNAL_ENABLE);
 	writel(INT_STATUS_ALL, host->addr + INT_STATUS_ENABLE);
+	return 0;
 }
 
-static void jmb38x_ms_set_param(struct memstick_host *msh,
-				enum memstick_param param,
-				int value)
+static int jmb38x_ms_set_param(struct memstick_host *msh,
+			       enum memstick_param param,
+			       int value)
 {
 	struct jmb38x_ms_host *host = memstick_priv(msh);
 	unsigned int host_ctl = readl(host->addr + HOST_CONTROL);
 	unsigned int clock_ctl = CLOCK_CONTROL_40MHZ, clock_delay = 0;
+	int rc = 0;
 
 	switch (param) {
 	case MEMSTICK_POWER:
 		if (value == MEMSTICK_POWER_ON) {
-			jmb38x_ms_reset(host);
+			rc = jmb38x_ms_reset(host);
+			if (rc)
+				return rc;
+
+			host_ctl = 7;
+			host_ctl |= HOST_CONTROL_POWER_EN
+				 | HOST_CONTROL_CLOCK_EN;
+			writel(host_ctl, host->addr + HOST_CONTROL);
 
 			writel(host->id ? PAD_PU_PD_ON_MS_SOCK1
 					: PAD_PU_PD_ON_MS_SOCK0,
@@ -647,11 +679,7 @@ static void jmb38x_ms_set_param(struct memstick_host *msh,
 			writel(PAD_OUTPUT_ENABLE_MS,
 			       host->addr + PAD_OUTPUT_ENABLE);
 
-			host_ctl = 7;
-			host_ctl |= HOST_CONTROL_POWER_EN
-				 | HOST_CONTROL_CLOCK_EN;
-			writel(host_ctl, host->addr + HOST_CONTROL);
-
+			msleep(10);
 			dev_dbg(&host->chip->pdev->dev, "power on\n");
 		} else if (value == MEMSTICK_POWER_OFF) {
 			host_ctl &= ~(HOST_CONTROL_POWER_EN
@@ -660,7 +688,8 @@ static void jmb38x_ms_set_param(struct memstick_host *msh,
 			writel(0, host->addr + PAD_OUTPUT_ENABLE);
 			writel(PAD_PU_PD_OFF, host->addr + PAD_PU_PD);
 			dev_dbg(&host->chip->pdev->dev, "power off\n");
-		}
+		} else
+			return -EINVAL;
 		break;
 	case MEMSTICK_INTERFACE:
 		host_ctl &= ~(3 << HOST_CONTROL_IF_SHIFT);
@@ -686,12 +715,14 @@ static void jmb38x_ms_set_param(struct memstick_host *msh,
 			host_ctl &= ~HOST_CONTROL_REI;
 			clock_ctl = CLOCK_CONTROL_60MHZ;
 			clock_delay = 0;
-		}
+		} else
+			return -EINVAL;
 		writel(host_ctl, host->addr + HOST_CONTROL);
 		writel(clock_ctl, host->addr + CLOCK_CONTROL);
 		writel(clock_delay, host->addr + CLOCK_DELAY);
 		break;
 	};
+	return 0;
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/memstick/host/tifm_ms.c b/drivers/memstick/host/tifm_ms.c
index 8577de4ebb0e..14458764588c 100644
--- a/drivers/memstick/host/tifm_ms.c
+++ b/drivers/memstick/host/tifm_ms.c
@@ -489,15 +489,12 @@ static void tifm_ms_request(struct memstick_host *msh)
 	return;
 }
 
-static void tifm_ms_set_param(struct memstick_host *msh,
-			      enum memstick_param param,
-			      int value)
+static int tifm_ms_set_param(struct memstick_host *msh,
+			     enum memstick_param param,
+			     int value)
 {
 	struct tifm_ms *host = memstick_priv(msh);
 	struct tifm_dev *sock = host->dev;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sock->lock, flags);
 
 	switch (param) {
 	case MEMSTICK_POWER:
@@ -512,7 +509,8 @@ static void tifm_ms_set_param(struct memstick_host *msh,
 			writel(TIFM_MS_SYS_FCLR | TIFM_MS_SYS_INTCLR,
 			       sock->addr + SOCK_MS_SYSTEM);
 			writel(0xffffffff, sock->addr + SOCK_MS_STATUS);
-		}
+		} else
+			return -EINVAL;
 		break;
 	case MEMSTICK_INTERFACE:
 		if (value == MEMSTICK_SERIAL) {
@@ -525,11 +523,12 @@ static void tifm_ms_set_param(struct memstick_host *msh,
 			writel(TIFM_CTRL_FAST_CLK
 			       | readl(sock->addr + SOCK_CONTROL),
 			       sock->addr + SOCK_CONTROL);
-		}
+		} else
+			return -EINVAL;
 		break;
 	};
 
-	spin_unlock_irqrestore(&sock->lock, flags);
+	return 0;
 }
 
 static void tifm_ms_abort(unsigned long data)
diff --git a/include/linux/memstick.h b/include/linux/memstick.h
index 37a5cdb03918..2fe599c66d52 100644
--- a/include/linux/memstick.h
+++ b/include/linux/memstick.h
@@ -284,7 +284,7 @@ struct memstick_host {
 	/* Notify the host that some requests are pending. */
 	void                (*request)(struct memstick_host *host);
 	/* Set host IO parameters (power, clock, etc).     */
-	void                (*set_param)(struct memstick_host *host,
+	int                 (*set_param)(struct memstick_host *host,
 					 enum memstick_param param,
 					 int value);
 	unsigned long       private[0] ____cacheline_aligned;
-- 
cgit v1.2.3


From 17017d8d2c005734d7088d8281ce2daab8fcb097 Mon Sep 17 00:00:00 2001
From: Alex Dubov <oakad@yahoo.com>
Date: Fri, 25 Jul 2008 19:45:01 -0700
Subject: memstick: add "start" and "stop" methods to memstick device

In some cases it may be desirable to ensure that associated driver is not
going to access the media in some period of time.  "start" and "stop"
methods are provided therefore to allow it.

Signed-off-by: Alex Dubov <oakad@yahoo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/memstick/core/memstick.c    | 11 ++++++++---
 drivers/memstick/core/mspro_block.c | 33 +++++++++++++++++++++++++++++++++
 include/linux/memstick.h            |  4 ++++
 3 files changed, 45 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index 3c7d9a79c1ea..7162f772bbfb 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -433,8 +433,11 @@ static void memstick_check(struct work_struct *work)
 
 	dev_dbg(&host->dev, "memstick_check started\n");
 	mutex_lock(&host->lock);
-	if (!host->card)
-		memstick_power_on(host);
+	if (!host->card) {
+		if (memstick_power_on(host))
+			goto out_power_off;
+	} else
+		host->card->stop(host->card);
 
 	card = memstick_alloc_card(host);
 
@@ -452,7 +455,8 @@ static void memstick_check(struct work_struct *work)
 			    || !(host->card->check(host->card))) {
 				device_unregister(&host->card->dev);
 				host->card = NULL;
-			}
+			} else
+				host->card->start(host->card);
 		}
 
 		if (!host->card) {
@@ -465,6 +469,7 @@ static void memstick_check(struct work_struct *work)
 			kfree(card);
 	}
 
+out_power_off:
 	if (!host->card)
 		host->set_param(host, MEMSTICK_POWER, MEMSTICK_POWER_OFF);
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 477d0fb6e588..004ac4d176d9 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -752,6 +752,37 @@ static int mspro_block_has_request(struct mspro_block_data *msb)
 	return rc;
 }
 
+static void mspro_block_stop(struct memstick_dev *card)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	int rc = 0;
+	unsigned long flags;
+
+	while (1) {
+		spin_lock_irqsave(&msb->q_lock, flags);
+		if (!msb->has_request) {
+			blk_stop_queue(msb->queue);
+			rc = 1;
+		}
+		spin_unlock_irqrestore(&msb->q_lock, flags);
+
+		if (rc)
+			break;
+
+		wait_for_completion(&card->mrq_complete);
+	}
+}
+
+static void mspro_block_start(struct memstick_dev *card)
+{
+	struct mspro_block_data *msb = memstick_get_drvdata(card);
+	unsigned long flags;
+
+	spin_lock_irqsave(&msb->q_lock, flags);
+	blk_start_queue(msb->queue);
+	spin_unlock_irqrestore(&msb->q_lock, flags);
+}
+
 static int mspro_block_queue_thread(void *data)
 {
 	struct memstick_dev *card = data;
@@ -1272,6 +1303,8 @@ static int mspro_block_probe(struct memstick_dev *card)
 	rc = mspro_block_init_disk(card);
 	if (!rc) {
 		card->check = mspro_block_check_card;
+		card->stop = mspro_block_stop;
+		card->start = mspro_block_start;
 		return 0;
 	}
 
diff --git a/include/linux/memstick.h b/include/linux/memstick.h
index 2fe599c66d52..a9f998a3f48b 100644
--- a/include/linux/memstick.h
+++ b/include/linux/memstick.h
@@ -263,6 +263,10 @@ struct memstick_dev {
 	/* Get next request from the media driver.                         */
 	int                      (*next_request)(struct memstick_dev *card,
 						 struct memstick_request **mrq);
+	/* Tell the media driver to stop doing things                      */
+	void                     (*stop)(struct memstick_dev *card);
+	/* Allow the media driver to continue                              */
+	void                     (*start)(struct memstick_dev *card);
 
 	struct device            dev;
 };
-- 
cgit v1.2.3


From 3ab83521378268044a448113c6aa9a9e245f4d2f Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 25 Jul 2008 19:45:07 -0700
Subject: kexec jump

This patch provides an enhancement to kexec/kdump.  It implements the
following features:

- Backup/restore memory used by the original kernel before/after
  kexec.

- Save/restore CPU state before/after kexec.

The features of this patch can be used as a general method to call program in
physical mode (paging turning off).  This can be used to call BIOS code under
Linux.

kexec-tools needs to be patched to support kexec jump. The patches and
the precompiled kexec can be download from the following URL:

       source: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-src_git_kh10.tar.bz2
       patches: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-patches_git_kh10.tar.bz2
       binary: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec_git_kh10

Usage example of calling some physical mode code and return:

1. Compile and install patched kernel with following options selected:

CONFIG_X86_32=y
CONFIG_KEXEC=y
CONFIG_PM=y
CONFIG_KEXEC_JUMP=y

2. Build patched kexec-tool or download the pre-built one.

3. Build some physical mode executable named such as "phy_mode"

4. Boot kernel compiled in step 1.

5. Load physical mode executable with /sbin/kexec. The shell command
   line can be as follow:

   /sbin/kexec --load-preserve-context --args-none phy_mode

6. Call physical mode executable with following shell command line:

   /sbin/kexec -e

Implementation point:

To support jumping without reserving memory.  One shadow backup page (source
page) is allocated for each page used by kexeced code image (destination
page).  When do kexec_load, the image of kexeced code is loaded into source
pages, and before executing, the destination pages and the source pages are
swapped, so the contents of destination pages are backupped.  Before jumping
to the kexeced code image and after jumping back to the original kernel, the
destination pages and the source pages are swapped too.

C ABI (calling convention) is used as communication protocol between
kernel and called code.

A flag named KEXEC_PRESERVE_CONTEXT for sys_kexec_load is added to
indicate that the loaded kernel image is used for jumping back.

Now, only the i386 architecture is supported.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@nigel.suspend2.net>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/machine_kexec.c  |   2 +-
 arch/sh/kernel/machine_kexec.c       |   2 +-
 arch/x86/Kconfig                     |   7 ++
 arch/x86/kernel/machine_kexec_32.c   |  27 ++++--
 arch/x86/kernel/machine_kexec_64.c   |   2 +-
 arch/x86/kernel/relocate_kernel_32.S | 174 ++++++++++++++++++++++++++++++-----
 include/asm-x86/kexec.h              |  18 ++--
 include/linux/kexec.h                |  17 +++-
 kernel/kexec.c                       |  57 ++++++++++++
 kernel/sys.c                         |  31 ++-----
 10 files changed, 269 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 29a0e039d436..aab76887a842 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -48,7 +48,7 @@ void machine_kexec_cleanup(struct kimage *image)
  * Do not allocate memory (or fail in any way) in machine_kexec().
  * We are past the point of no return, committed to rebooting now.
  */
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
 {
 	if (ppc_md.machine_kexec)
 		ppc_md.machine_kexec(image);
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index 5c17de51987e..ec1eadce4aaa 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -70,7 +70,7 @@ static void kexec_info(struct kimage *image)
  * Do not allocate memory (or fail in any way) in machine_kexec().
  * We are past the point of no return, committed to rebooting now.
  */
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
 {
 
 	unsigned long page_list;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e3cba0b45600..7ecb679f0130 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1279,6 +1279,13 @@ config CRASH_DUMP
 	  (CONFIG_RELOCATABLE=y).
 	  For more details see Documentation/kdump/kdump.txt
 
+config KEXEC_JUMP
+	bool "kexec jump (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	depends on KEXEC && PM_SLEEP && X86_32
+	help
+	  Invoke code in physical address mode via KEXEC
+
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
 	default "0x1000000" if X86_NUMAQ
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8864230d55af..2b67609d0a1c 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -22,6 +22,7 @@
 #include <asm/cpufeature.h>
 #include <asm/desc.h>
 #include <asm/system.h>
+#include <asm/cacheflush.h>
 
 #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
 static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -85,10 +86,12 @@ static void load_segments(void)
  * reboot code buffer to allow us to avoid allocations
  * later.
  *
- * Currently nothing.
+ * Make control page executable.
  */
 int machine_kexec_prepare(struct kimage *image)
 {
+	if (nx_enabled)
+		set_pages_x(image->control_code_page, 1);
 	return 0;
 }
 
@@ -98,16 +101,24 @@ int machine_kexec_prepare(struct kimage *image)
  */
 void machine_kexec_cleanup(struct kimage *image)
 {
+	if (nx_enabled)
+		set_pages_nx(image->control_code_page, 1);
 }
 
 /*
  * Do not allocate memory (or fail in any way) in machine_kexec().
  * We are past the point of no return, committed to rebooting now.
  */
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
 {
 	unsigned long page_list[PAGES_NR];
 	void *control_page;
+	asmlinkage unsigned long
+		(*relocate_kernel_ptr)(unsigned long indirection_page,
+				       unsigned long control_page,
+				       unsigned long start_address,
+				       unsigned int has_pae,
+				       unsigned int preserve_context);
 
 	tracer_disable();
 
@@ -115,10 +126,11 @@ NORET_TYPE void machine_kexec(struct kimage *image)
 	local_irq_disable();
 
 	control_page = page_address(image->control_code_page);
-	memcpy(control_page, relocate_kernel, PAGE_SIZE);
+	memcpy(control_page, relocate_kernel, PAGE_SIZE/2);
 
+	relocate_kernel_ptr = control_page;
 	page_list[PA_CONTROL_PAGE] = __pa(control_page);
-	page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
+	page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
 	page_list[PA_PGD] = __pa(kexec_pgd);
 	page_list[VA_PGD] = (unsigned long)kexec_pgd;
 #ifdef CONFIG_X86_PAE
@@ -131,6 +143,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
 	page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
 	page_list[PA_PTE_1] = __pa(kexec_pte1);
 	page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+	page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT);
 
 	/* The segment registers are funny things, they have both a
 	 * visible and an invisible part.  Whenever the visible part is
@@ -149,8 +162,10 @@ NORET_TYPE void machine_kexec(struct kimage *image)
 	set_idt(phys_to_virt(0),0);
 
 	/* now call it */
-	relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
-			image->start, cpu_has_pae);
+	image->start = relocate_kernel_ptr((unsigned long)image->head,
+					   (unsigned long)page_list,
+					   image->start, cpu_has_pae,
+					   image->preserve_context);
 }
 
 void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 9dd9262693a3..c43caa3a91f3 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image)
  * Do not allocate memory (or fail in any way) in machine_kexec().
  * We are past the point of no return, committed to rebooting now.
  */
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
 {
 	unsigned long page_list[PAGES_NR];
 	void *control_page;
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index c30fe25d470d..703310a99023 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -20,11 +20,44 @@
 #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define PAE_PGD_ATTR (_PAGE_PRESENT)
 
+/* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are
+ * used to save some data for jumping back
+ */
+#define DATA(offset)		(PAGE_SIZE/2+(offset))
+
+/* Minimal CPU state */
+#define ESP			DATA(0x0)
+#define CR0			DATA(0x4)
+#define CR3			DATA(0x8)
+#define CR4			DATA(0xc)
+
+/* other data */
+#define CP_VA_CONTROL_PAGE	DATA(0x10)
+#define CP_PA_PGD		DATA(0x14)
+#define CP_PA_SWAP_PAGE		DATA(0x18)
+#define CP_PA_BACKUP_PAGES_MAP	DATA(0x1c)
+
 	.text
 	.align PAGE_SIZE
 	.globl relocate_kernel
 relocate_kernel:
-	movl	8(%esp), %ebp /* list of pages */
+	/* Save the CPU context, used for jumping back */
+
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebp
+	pushf
+
+	movl	20+8(%esp), %ebp /* list of pages */
+	movl	PTR(VA_CONTROL_PAGE)(%ebp), %edi
+	movl	%esp, ESP(%edi)
+	movl	%cr0, %eax
+	movl	%eax, CR0(%edi)
+	movl	%cr3, %eax
+	movl	%eax, CR3(%edi)
+	movl	%cr4, %eax
+	movl	%eax, CR4(%edi)
 
 #ifdef CONFIG_X86_PAE
 	/* map the control page at its virtual address */
@@ -138,15 +171,25 @@ relocate_kernel:
 
 relocate_new_kernel:
 	/* read the arguments and say goodbye to the stack */
-	movl  4(%esp), %ebx /* page_list */
-	movl  8(%esp), %ebp /* list of pages */
-	movl  12(%esp), %edx /* start address */
-	movl  16(%esp), %ecx /* cpu_has_pae */
+	movl  20+4(%esp), %ebx /* page_list */
+	movl  20+8(%esp), %ebp /* list of pages */
+	movl  20+12(%esp), %edx /* start address */
+	movl  20+16(%esp), %ecx /* cpu_has_pae */
+	movl  20+20(%esp), %esi /* preserve_context */
 
 	/* zero out flags, and disable interrupts */
 	pushl $0
 	popfl
 
+	/* save some information for jumping back */
+	movl	PTR(VA_CONTROL_PAGE)(%ebp), %edi
+	movl	%edi, CP_VA_CONTROL_PAGE(%edi)
+	movl	PTR(PA_PGD)(%ebp), %eax
+	movl	%eax, CP_PA_PGD(%edi)
+	movl	PTR(PA_SWAP_PAGE)(%ebp), %eax
+	movl	%eax, CP_PA_SWAP_PAGE(%edi)
+	movl	%ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
+
 	/* get physical address of control page now */
 	/* this is impossible after page table switch */
 	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edi
@@ -197,8 +240,90 @@ identity_mapped:
 	xorl	%eax, %eax
 	movl	%eax, %cr3
 
+	movl	CP_PA_SWAP_PAGE(%edi), %eax
+	pushl	%eax
+	pushl	%ebx
+	call	swap_pages
+	addl	$8, %esp
+
+	/* To be certain of avoiding problems with self-modifying code
+	 * I need to execute a serializing instruction here.
+	 * So I flush the TLB, it's handy, and not processor dependent.
+	 */
+	xorl	%eax, %eax
+	movl	%eax, %cr3
+
+	/* set all of the registers to known values */
+	/* leave %esp alone */
+
+	testl	%esi, %esi
+	jnz 1f
+	xorl	%edi, %edi
+	xorl	%eax, %eax
+	xorl	%ebx, %ebx
+	xorl    %ecx, %ecx
+	xorl    %edx, %edx
+	xorl    %esi, %esi
+	xorl    %ebp, %ebp
+	ret
+1:
+	popl	%edx
+	movl	CP_PA_SWAP_PAGE(%edi), %esp
+	addl	$PAGE_SIZE, %esp
+2:
+	call	*%edx
+
+	/* get the re-entry point of the peer system */
+	movl	0(%esp), %ebp
+	call	1f
+1:
+	popl	%ebx
+	subl	$(1b - relocate_kernel), %ebx
+	movl	CP_VA_CONTROL_PAGE(%ebx), %edi
+	lea	PAGE_SIZE(%ebx), %esp
+	movl	CP_PA_SWAP_PAGE(%ebx), %eax
+	movl	CP_PA_BACKUP_PAGES_MAP(%ebx), %edx
+	pushl	%eax
+	pushl	%edx
+	call	swap_pages
+	addl	$8, %esp
+	movl	CP_PA_PGD(%ebx), %eax
+	movl	%eax, %cr3
+	movl	%cr0, %eax
+	orl	$(1<<31), %eax
+	movl	%eax, %cr0
+	lea	PAGE_SIZE(%edi), %esp
+	movl	%edi, %eax
+	addl	$(virtual_mapped - relocate_kernel), %eax
+	pushl	%eax
+	ret
+
+virtual_mapped:
+	movl	CR4(%edi), %eax
+	movl	%eax, %cr4
+	movl	CR3(%edi), %eax
+	movl	%eax, %cr3
+	movl	CR0(%edi), %eax
+	movl	%eax, %cr0
+	movl	ESP(%edi), %esp
+	movl	%ebp, %eax
+
+	popf
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	ret
+
 	/* Do the copies */
-	movl	%ebx, %ecx
+swap_pages:
+	movl	8(%esp), %edx
+	movl	4(%esp), %ecx
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+	movl	%ecx, %ebx
 	jmp	1f
 
 0:	/* top, read another word from the indirection page */
@@ -226,27 +351,28 @@ identity_mapped:
 	movl    %ecx,   %esi /* For every source page do a copy */
 	andl    $0xfffff000, %esi
 
+	movl	%edi, %eax
+	movl	%esi, %ebp
+
+	movl	%edx, %edi
 	movl    $1024, %ecx
 	rep ; movsl
-	jmp     0b
 
-3:
-
-	/* To be certain of avoiding problems with self-modifying code
-	 * I need to execute a serializing instruction here.
-	 * So I flush the TLB, it's handy, and not processor dependent.
-	 */
-	xorl	%eax, %eax
-	movl	%eax, %cr3
+	movl	%ebp, %edi
+	movl	%eax, %esi
+	movl	$1024, %ecx
+	rep ; movsl
 
-	/* set all of the registers to known values */
-	/* leave %esp alone */
+	movl	%eax, %edi
+	movl	%edx, %esi
+	movl	$1024, %ecx
+	rep ; movsl
 
-	xorl	%eax, %eax
-	xorl	%ebx, %ebx
-	xorl    %ecx, %ecx
-	xorl    %edx, %edx
-	xorl    %esi, %esi
-	xorl    %edi, %edi
-	xorl    %ebp, %ebp
+	lea	PAGE_SIZE(%ebp), %esi
+	jmp     0b
+3:
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
 	ret
diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h
index 8f855a15f64d..c0e52a14fd4d 100644
--- a/include/asm-x86/kexec.h
+++ b/include/asm-x86/kexec.h
@@ -10,14 +10,15 @@
 # define VA_PTE_0		5
 # define PA_PTE_1		6
 # define VA_PTE_1		7
+# define PA_SWAP_PAGE		8
 # ifdef CONFIG_X86_PAE
-#  define PA_PMD_0		8
-#  define VA_PMD_0		9
-#  define PA_PMD_1		10
-#  define VA_PMD_1		11
-#  define PAGES_NR		12
+#  define PA_PMD_0		9
+#  define VA_PMD_0		10
+#  define PA_PMD_1		11
+#  define VA_PMD_1		12
+#  define PAGES_NR		13
 # else
-#  define PAGES_NR		8
+#  define PAGES_NR		9
 # endif
 #else
 # define PA_CONTROL_PAGE	0
@@ -152,11 +153,12 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
 }
 
 #ifdef CONFIG_X86_32
-asmlinkage NORET_TYPE void
+asmlinkage unsigned long
 relocate_kernel(unsigned long indirection_page,
 		unsigned long control_page,
 		unsigned long start_address,
-		unsigned int has_pae) ATTRIB_NORET;
+		unsigned int has_pae,
+		unsigned int preserve_context);
 #else
 NORET_TYPE void
 relocate_kernel(unsigned long indirection_page,
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 3265968cd2cd..82f88a8a827b 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -83,6 +83,7 @@ struct kimage {
 
 	unsigned long start;
 	struct page *control_code_page;
+	struct page *swap_page;
 
 	unsigned long nr_segments;
 	struct kexec_segment segment[KEXEC_SEGMENT_MAX];
@@ -98,18 +99,20 @@ struct kimage {
 	unsigned int type : 1;
 #define KEXEC_TYPE_DEFAULT 0
 #define KEXEC_TYPE_CRASH   1
+	unsigned int preserve_context : 1;
 };
 
 
 /* kexec interface functions */
-extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET;
+extern void machine_kexec(struct kimage *image);
 extern int machine_kexec_prepare(struct kimage *image);
 extern void machine_kexec_cleanup(struct kimage *image);
 extern asmlinkage long sys_kexec_load(unsigned long entry,
 					unsigned long nr_segments,
 					struct kexec_segment __user *segments,
 					unsigned long flags);
+extern int kernel_kexec(void);
 #ifdef CONFIG_COMPAT
 extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
 				unsigned long nr_segments,
@@ -156,8 +159,9 @@ extern struct kimage *kexec_crash_image;
 #define kexec_flush_icache_page(page)
 #endif
 
-#define KEXEC_ON_CRASH  0x00000001
-#define KEXEC_ARCH_MASK 0xffff0000
+#define KEXEC_ON_CRASH		0x00000001
+#define KEXEC_PRESERVE_CONTEXT	0x00000002
+#define KEXEC_ARCH_MASK		0xffff0000
 
 /* These values match the ELF architecture values.
  * Unless there is a good reason that should continue to be the case.
@@ -174,7 +178,12 @@ extern struct kimage *kexec_crash_image;
 #define KEXEC_ARCH_MIPS_LE (10 << 16)
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
 
-#define KEXEC_FLAGS    (KEXEC_ON_CRASH)  /* List of defined/legal kexec flags */
+/* List of defined/legal kexec flags */
+#ifndef CONFIG_KEXEC_JUMP
+#define KEXEC_FLAGS    KEXEC_ON_CRASH
+#else
+#define KEXEC_FLAGS    (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
+#endif
 
 #define VMCOREINFO_BYTES           (4096)
 #define VMCOREINFO_NOTE_NAME       "VMCOREINFO"
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 6db42ff8d520..a0d920915b38 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -24,6 +24,8 @@
 #include <linux/utsrelease.h>
 #include <linux/utsname.h>
 #include <linux/numa.h>
+#include <linux/suspend.h>
+#include <linux/device.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -242,6 +244,12 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
 		goto out;
 	}
 
+	image->swap_page = kimage_alloc_control_pages(image, 0);
+	if (!image->swap_page) {
+		printk(KERN_ERR "Could not allocate swap buffer\n");
+		goto out;
+	}
+
 	result = 0;
  out:
 	if (result == 0)
@@ -986,6 +994,8 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
 		if (result)
 			goto out;
 
+		if (flags & KEXEC_PRESERVE_CONTEXT)
+			image->preserve_context = 1;
 		result = machine_kexec_prepare(image);
 		if (result)
 			goto out;
@@ -1411,3 +1421,50 @@ static int __init crash_save_vmcoreinfo_init(void)
 }
 
 module_init(crash_save_vmcoreinfo_init)
+
+/**
+ *	kernel_kexec - reboot the system
+ *
+ *	Move into place and start executing a preloaded standalone
+ *	executable.  If nothing was preloaded return an error.
+ */
+int kernel_kexec(void)
+{
+	int error = 0;
+
+	if (xchg(&kexec_lock, 1))
+		return -EBUSY;
+	if (!kexec_image) {
+		error = -EINVAL;
+		goto Unlock;
+	}
+
+	if (kexec_image->preserve_context) {
+#ifdef CONFIG_KEXEC_JUMP
+		local_irq_disable();
+		save_processor_state();
+#endif
+	} else {
+		blocking_notifier_call_chain(&reboot_notifier_list,
+					     SYS_RESTART, NULL);
+		system_state = SYSTEM_RESTART;
+		device_shutdown();
+		sysdev_shutdown();
+		printk(KERN_EMERG "Starting new kernel\n");
+		machine_shutdown();
+	}
+
+	machine_kexec(kexec_image);
+
+	if (kexec_image->preserve_context) {
+#ifdef CONFIG_KEXEC_JUMP
+		restore_processor_state();
+		local_irq_enable();
+#endif
+	}
+
+ Unlock:
+	xchg(&kexec_lock, 0);
+
+	return error;
+}
diff --git a/kernel/sys.c b/kernel/sys.c
index 0c9d3fa1f5ff..c01858090a98 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -301,26 +301,6 @@ void kernel_restart(char *cmd)
 }
 EXPORT_SYMBOL_GPL(kernel_restart);
 
-/**
- *	kernel_kexec - reboot the system
- *
- *	Move into place and start executing a preloaded standalone
- *	executable.  If nothing was preloaded return an error.
- */
-static void kernel_kexec(void)
-{
-#ifdef CONFIG_KEXEC
-	struct kimage *image;
-	image = xchg(&kexec_image, NULL);
-	if (!image)
-		return;
-	kernel_restart_prepare(NULL);
-	printk(KERN_EMERG "Starting new kernel\n");
-	machine_shutdown();
-	machine_kexec(image);
-#endif
-}
-
 static void kernel_shutdown_prepare(enum system_states state)
 {
 	blocking_notifier_call_chain(&reboot_notifier_list,
@@ -425,10 +405,15 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
 		kernel_restart(buffer);
 		break;
 
+#ifdef CONFIG_KEXEC
 	case LINUX_REBOOT_CMD_KEXEC:
-		kernel_kexec();
-		unlock_kernel();
-		return -EINVAL;
+		{
+			int ret;
+			ret = kernel_kexec();
+			unlock_kernel();
+			return ret;
+		}
+#endif
 
 #ifdef CONFIG_HIBERNATION
 	case LINUX_REBOOT_CMD_SW_SUSPEND:
-- 
cgit v1.2.3


From 89081d17f7bb81d89fa1aa9b70f821c5cf4d39e9 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 25 Jul 2008 19:45:10 -0700
Subject: kexec jump: save/restore device state

This patch implements devices state save/restore before after kexec.

This patch together with features in kexec_jump patch can be used for
following:

- A simple hibernation implementation without ACPI support.  You can kexec a
  hibernating kernel, save the memory image of original system and shutdown
  the system.  When resuming, you restore the memory image of original system
  via ordinary kexec load then jump back.

- Kernel/system debug through making system snapshot.  You can make system
  snapshot, jump back, do some thing and make another system snapshot.

- Cooperative multi-kernel/system.  With kexec jump, you can switch between
  several kernels/systems quickly without boot process except the first time.
  This appears like swap a whole kernel/system out/in.

- A general method to call program in physical mode (paging turning
  off). This can be used to invoke BIOS code under Linux.

The following user-space tools can be used with kexec jump:

- kexec-tools needs to be patched to support kexec jump. The patches
  and the precompiled kexec can be download from the following URL:
       source: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-src_git_kh10.tar.bz2
       patches: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-patches_git_kh10.tar.bz2
       binary: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec_git_kh10

- makedumpfile with patches are used as memory image saving tool, it
  can exclude free pages from original kernel memory image file. The
  patches and the precompiled makedumpfile can be download from the
  following URL:
       source: http://khibernation.sourceforge.net/download/release_v10/makedumpfile/makedumpfile-src_cvs_kh10.tar.bz2
       patches: http://khibernation.sourceforge.net/download/release_v10/makedumpfile/makedumpfile-patches_cvs_kh10.tar.bz2
       binary: http://khibernation.sourceforge.net/download/release_v10/makedumpfile/makedumpfile_cvs_kh10

- An initramfs image can be used as the root file system of kexeced
  kernel. An initramfs image built with "BuildRoot" can be downloaded
  from the following URL:
       initramfs image: http://khibernation.sourceforge.net/download/release_v10/initramfs/rootfs_cvs_kh10.gz
  All user space tools above are included in the initramfs image.

Usage example of simple hibernation:

1. Compile and install patched kernel with following options selected:

CONFIG_X86_32=y
CONFIG_RELOCATABLE=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
CONFIG_PM=y
CONFIG_HIBERNATION=y
CONFIG_KEXEC_JUMP=y

2. Build an initramfs image contains kexec-tool and makedumpfile, or
   download the pre-built initramfs image, called rootfs.gz in
   following text.

3. Prepare a partition to save memory image of original kernel, called
   hibernating partition in following text.

4. Boot kernel compiled in step 1 (kernel A).

5. In the kernel A, load kernel compiled in step 1 (kernel B) with
   /sbin/kexec. The shell command line can be as follow:

   /sbin/kexec --load-preserve-context /boot/bzImage --mem-min=0x100000
     --mem-max=0xffffff --initrd=rootfs.gz

6. Boot the kernel B with following shell command line:

   /sbin/kexec -e

7. The kernel B will boot as normal kexec. In kernel B the memory
   image of kernel A can be saved into hibernating partition as
   follow:

   jump_back_entry=`cat /proc/cmdline | tr ' ' '\n' | grep kexec_jump_back_entry | cut -d '='`
   echo $jump_back_entry > kexec_jump_back_entry
   cp /proc/vmcore dump.elf

   Then you can shutdown the machine as normal.

8. Boot kernel compiled in step 1 (kernel C). Use the rootfs.gz as
   root file system.

9. In kernel C, load the memory image of kernel A as follow:

   /sbin/kexec -l --args-none --entry=`cat kexec_jump_back_entry` dump.elf

10. Jump back to the kernel A as follow:

   /sbin/kexec -e

   Then, kernel A is resumed.

Implementation point:

To support jumping between two kernels, before jumping to (executing)
the new kernel and jumping back to the original kernel, the devices
are put into quiescent state, and the state of devices and CPU is
saved. After jumping back from kexeced kernel and jumping to the new
kernel, the state of devices and CPU are restored accordingly. The
devices/CPU state save/restore code of software suspend is called to
implement corresponding function.

Known issues:

- Because the segment number supported by sys_kexec_load is limited,
  hibernation image with many segments may not be load. This is
  planned to be eliminated by adding a new flag to sys_kexec_load to
  make a image can be loaded with multiple sys_kexec_load invoking.

Now, only the i386 architecture is supported.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@nigel.suspend2.net>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig                   |  5 +++--
 arch/x86/kernel/machine_kexec_32.c | 12 ++++++++++++
 include/linux/suspend.h            |  2 ++
 kernel/kexec.c                     | 39 ++++++++++++++++++++++++++++++++++++++
 kernel/power/power.h               |  2 --
 5 files changed, 56 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7ecb679f0130..6b2debfabddc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1282,9 +1282,10 @@ config CRASH_DUMP
 config KEXEC_JUMP
 	bool "kexec jump (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
-	depends on KEXEC && PM_SLEEP && X86_32
+	depends on KEXEC && HIBERNATION && X86_32
 	help
-	  Invoke code in physical address mode via KEXEC
+	  Jump between original kernel and kexeced kernel and invoke
+	  code in physical address mode via KEXEC
 
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 2b67609d0a1c..9fe478d98406 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -125,6 +125,18 @@ void machine_kexec(struct kimage *image)
 	/* Interrupts aren't acceptable while we reboot */
 	local_irq_disable();
 
+	if (image->preserve_context) {
+#ifdef CONFIG_X86_IO_APIC
+		/* We need to put APICs in legacy mode so that we can
+		 * get timer interrupts in second kernel. kexec/kdump
+		 * paths already have calls to disable_IO_APIC() in
+		 * one form or other. kexec jump path also need
+		 * one.
+		 */
+		disable_IO_APIC();
+#endif
+	}
+
 	control_page = page_address(image->control_code_page);
 	memcpy(control_page, relocate_kernel, PAGE_SIZE/2);
 
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index e8e69159af71..c63435095970 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -278,4 +278,6 @@ static inline void register_nosave_region_late(unsigned long b, unsigned long e)
 }
 #endif
 
+extern struct mutex pm_mutex;
+
 #endif /* _LINUX_SUSPEND_H */
diff --git a/kernel/kexec.c b/kernel/kexec.c
index a0d920915b38..c8a4370e2a34 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -26,6 +26,10 @@
 #include <linux/numa.h>
 #include <linux/suspend.h>
 #include <linux/device.h>
+#include <linux/freezer.h>
+#include <linux/pm.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -1441,7 +1445,31 @@ int kernel_kexec(void)
 
 	if (kexec_image->preserve_context) {
 #ifdef CONFIG_KEXEC_JUMP
+		mutex_lock(&pm_mutex);
+		pm_prepare_console();
+		error = freeze_processes();
+		if (error) {
+			error = -EBUSY;
+			goto Restore_console;
+		}
+		suspend_console();
+		error = device_suspend(PMSG_FREEZE);
+		if (error)
+			goto Resume_console;
+		error = disable_nonboot_cpus();
+		if (error)
+			goto Resume_devices;
 		local_irq_disable();
+		/* At this point, device_suspend() has been called,
+		 * but *not* device_power_down(). We *must*
+		 * device_power_down() now.  Otherwise, drivers for
+		 * some devices (e.g. interrupt controllers) become
+		 * desynchronized with the actual state of the
+		 * hardware at resume time, and evil weirdness ensues.
+		 */
+		error = device_power_down(PMSG_FREEZE);
+		if (error)
+			goto Enable_irqs;
 		save_processor_state();
 #endif
 	} else {
@@ -1459,7 +1487,18 @@ int kernel_kexec(void)
 	if (kexec_image->preserve_context) {
 #ifdef CONFIG_KEXEC_JUMP
 		restore_processor_state();
+		device_power_up(PMSG_RESTORE);
+ Enable_irqs:
 		local_irq_enable();
+		enable_nonboot_cpus();
+ Resume_devices:
+		device_resume(PMSG_RESTORE);
+ Resume_console:
+		resume_console();
+		thaw_processes();
+ Restore_console:
+		pm_restore_console();
+		mutex_unlock(&pm_mutex);
 #endif
 	}
 
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 700f44ec8406..acc0c101dbd5 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -53,8 +53,6 @@ extern int hibernation_platform_enter(void);
 
 extern int pfn_is_nosave(unsigned long);
 
-extern struct mutex pm_mutex;
-
 #define power_attr(_name) \
 static struct kobj_attribute _name##_attr = {	\
 	.attr	= {				\
-- 
cgit v1.2.3


From c2147a5092cfe13dbf3210e54e8a622015edeecc Mon Sep 17 00:00:00 2001
From: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Date: Fri, 25 Jul 2008 19:45:11 -0700
Subject: Better interface for hooking early initcalls

Added early initcall (pre-SMP) support, using an identical interface to
that of regular initcalls.  Functions called from do_pre_smp_initcalls()
could be converted to use this cleaner interface.

This is required by CPU hotplug, because early users have to register
notifiers before going SMP.  One such CPU hotplug user is the relay
interface with buffer-only channels, which needs to register such a
notifier, to be usable in early code.  This in turn is used by kmemtrace.

Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Cc: Tom Zanussi <tzanussi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/vmlinux.lds.h |  2 ++
 include/linux/init.h              |  7 +++++++
 init/main.c                       | 13 +++++++++++--
 3 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 729f6b0a60e9..9cd44b162ba1 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -359,6 +359,8 @@
 	}
 
 #define INITCALLS							\
+	*(.initcallearly.init)						\
+	__early_initcall_end = .;					\
   	*(.initcall0.init)						\
   	*(.initcall0s.init)						\
   	*(.initcall1.init)						\
diff --git a/include/linux/init.h b/include/linux/init.h
index 42ae95411a93..11b84e106053 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -169,6 +169,13 @@ extern void (*late_time_init)(void);
 	static initcall_t __initcall_##fn##id __used \
 	__attribute__((__section__(".initcall" level ".init"))) = fn
 
+/*
+ * Early initcalls run before initializing SMP.
+ *
+ * Only for built-in code, not modules.
+ */
+#define early_initcall(fn)		__define_initcall("early",fn,early)
+
 /*
  * A "pure" initcall has no dependencies on anything else, and purely
  * initializes variables that couldn't be statically initialized.
diff --git a/init/main.c b/init/main.c
index 0604cbcaf1e4..b6fec08dbbef 100644
--- a/init/main.c
+++ b/init/main.c
@@ -743,13 +743,13 @@ static void __init do_one_initcall(initcall_t fn)
 }
 
 
-extern initcall_t __initcall_start[], __initcall_end[];
+extern initcall_t __initcall_start[], __initcall_end[], __early_initcall_end[];
 
 static void __init do_initcalls(void)
 {
 	initcall_t *call;
 
-	for (call = __initcall_start; call < __initcall_end; call++)
+	for (call = __early_initcall_end; call < __initcall_end; call++)
 		do_one_initcall(*call);
 
 	/* Make sure there is no pending stuff from the initcall sequence */
@@ -783,6 +783,14 @@ static int __init nosoftlockup_setup(char *str)
 }
 __setup("nosoftlockup", nosoftlockup_setup);
 
+static void __init __do_pre_smp_initcalls(void)
+{
+	initcall_t *call;
+
+	for (call = __initcall_start; call < __early_initcall_end; call++)
+		do_one_initcall(*call);
+}
+
 static void __init do_pre_smp_initcalls(void)
 {
 	extern int spawn_ksoftirqd(void);
@@ -865,6 +873,7 @@ static int __init kernel_init(void * unused)
 
 	smp_prepare_cpus(setup_max_cpus);
 
+	__do_pre_smp_initcalls();
 	do_pre_smp_initcalls();
 
 	smp_init();
-- 
cgit v1.2.3


From 7babe8db99d305340cf4828ce1f5a1481d5622ef Mon Sep 17 00:00:00 2001
From: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Date: Fri, 25 Jul 2008 19:45:11 -0700
Subject: Full conversion to early_initcall() interface, remove old interface

A previous patch added the early_initcall(), to allow a cleaner hooking of
pre-SMP initcalls.  Now we remove the older interface, converting all
existing users to the new one.

[akpm@linux-foundation.org: cleanups]
[akpm@linux-foundation.org: build fix]
[kosaki.motohiro@jp.fujitsu.com: warning fix]
[kosaki.motohiro@jp.fujitsu.com: warning fix]
Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Cc: Tom Zanussi <tzanussi@gmail.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  9 ---------
 include/linux/smp.h   |  5 -----
 init/main.c           | 23 +----------------------
 kernel/sched.c        |  5 ++++-
 kernel/smp.c          |  4 +++-
 kernel/softirq.c      |  3 ++-
 kernel/softlockup.c   | 25 ++++++++++++++++++++++---
 7 files changed, 32 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3260a5c42b91..adb8077dc463 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -292,7 +292,6 @@ extern void sched_show_task(struct task_struct *p);
 
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 extern void softlockup_tick(void);
-extern void spawn_softlockup_task(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern unsigned int  softlockup_panic;
@@ -2222,14 +2221,6 @@ static inline void inc_syscw(struct task_struct *tsk)
 }
 #endif
 
-#ifdef CONFIG_SMP
-void migration_init(void);
-#else
-static inline void migration_init(void)
-{
-}
-#endif
-
 #ifndef TASK_SIZE_OF
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 48262f86c969..66484d4a8459 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -74,15 +74,10 @@ void __smp_call_function_single(int cpuid, struct call_single_data *data);
 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
 void generic_smp_call_function_single_interrupt(void);
 void generic_smp_call_function_interrupt(void);
-void init_call_single_data(void);
 void ipi_call_lock(void);
 void ipi_call_unlock(void);
 void ipi_call_lock_irq(void);
 void ipi_call_unlock_irq(void);
-#else
-static inline void init_call_single_data(void)
-{
-}
 #endif
 
 /*
diff --git a/init/main.c b/init/main.c
index b6fec08dbbef..20fdc9884b77 100644
--- a/init/main.c
+++ b/init/main.c
@@ -774,16 +774,7 @@ static void __init do_basic_setup(void)
 	do_initcalls();
 }
 
-static int __initdata nosoftlockup;
-
-static int __init nosoftlockup_setup(char *str)
-{
-	nosoftlockup = 1;
-	return 1;
-}
-__setup("nosoftlockup", nosoftlockup_setup);
-
-static void __init __do_pre_smp_initcalls(void)
+static void __init do_pre_smp_initcalls(void)
 {
 	initcall_t *call;
 
@@ -791,17 +782,6 @@ static void __init __do_pre_smp_initcalls(void)
 		do_one_initcall(*call);
 }
 
-static void __init do_pre_smp_initcalls(void)
-{
-	extern int spawn_ksoftirqd(void);
-
-	init_call_single_data();
-	migration_init();
-	spawn_ksoftirqd();
-	if (!nosoftlockup)
-		spawn_softlockup_task();
-}
-
 static void run_init_process(char *init_filename)
 {
 	argv_init[0] = init_filename;
@@ -873,7 +853,6 @@ static int __init kernel_init(void * unused)
 
 	smp_prepare_cpus(setup_max_cpus);
 
-	__do_pre_smp_initcalls();
 	do_pre_smp_initcalls();
 
 	smp_init();
diff --git a/kernel/sched.c b/kernel/sched.c
index 0047bd9b96aa..fde1a1026359 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6389,7 +6389,7 @@ static struct notifier_block __cpuinitdata migration_notifier = {
 	.priority = 10
 };
 
-void __init migration_init(void)
+static int __init migration_init(void)
 {
 	void *cpu = (void *)(long)smp_processor_id();
 	int err;
@@ -6399,7 +6399,10 @@ void __init migration_init(void)
 	BUG_ON(err == NOTIFY_BAD);
 	migration_call(&migration_notifier, CPU_ONLINE, cpu);
 	register_cpu_notifier(&migration_notifier);
+
+	return err;
 }
+early_initcall(migration_init);
 #endif
 
 #ifdef CONFIG_SMP
diff --git a/kernel/smp.c b/kernel/smp.c
index 462c785ca1ee..96fc7c0edc59 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -33,7 +33,7 @@ struct call_single_queue {
 	spinlock_t lock;
 };
 
-void __cpuinit init_call_single_data(void)
+static int __cpuinit init_call_single_data(void)
 {
 	int i;
 
@@ -43,7 +43,9 @@ void __cpuinit init_call_single_data(void)
 		spin_lock_init(&q->lock);
 		INIT_LIST_HEAD(&q->list);
 	}
+	return 0;
 }
+early_initcall(init_call_single_data);
 
 static void csd_flag_wait(struct call_single_data *data)
 {
diff --git a/kernel/softirq.c b/kernel/softirq.c
index f6b03d56c2bf..c506f266a6b9 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -630,7 +630,7 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
 	.notifier_call = cpu_callback
 };
 
-__init int spawn_ksoftirqd(void)
+static __init int spawn_ksoftirqd(void)
 {
 	void *cpu = (void *)(long)smp_processor_id();
 	int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
@@ -640,6 +640,7 @@ __init int spawn_ksoftirqd(void)
 	register_cpu_notifier(&cpu_nfb);
 	return 0;
 }
+early_initcall(spawn_ksoftirqd);
 
 #ifdef CONFIG_SMP
 /*
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 7bd8d1aadd5d..b75b492fbfcf 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -338,14 +338,33 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
 	.notifier_call = cpu_callback
 };
 
-__init void spawn_softlockup_task(void)
+static int __initdata nosoftlockup;
+
+static int __init nosoftlockup_setup(char *str)
+{
+	nosoftlockup = 1;
+	return 1;
+}
+__setup("nosoftlockup", nosoftlockup_setup);
+
+static int __init spawn_softlockup_task(void)
 {
 	void *cpu = (void *)(long)smp_processor_id();
-	int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
+	int err;
 
-	BUG_ON(err == NOTIFY_BAD);
+	if (nosoftlockup)
+		return 0;
+
+	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
+	if (err == NOTIFY_BAD) {
+		BUG();
+		return 1;
+	}
 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
 	register_cpu_notifier(&cpu_nfb);
 
 	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+
+	return 0;
 }
+early_initcall(spawn_softlockup_task);
-- 
cgit v1.2.3


From 20d8b67c06fa5e74f44e80b0a0fd68c8327f7c6a Mon Sep 17 00:00:00 2001
From: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Date: Fri, 25 Jul 2008 19:45:12 -0700
Subject: relay: add buffer-only channels; useful for early logging

Allows one to create and use a channel with no associated files.  Files
can be initialized later.  This is useful in scenarios such as logging in
early code, before VFS is up.  Therefore, such channels can be created and
used as soon as kmem_cache_init() completed.

This is needed by kmemtrace to do tracing in early kernel code.

[kosaki.motohiro@jp.fujitsu.com: build fix]
Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Cc: Tom Zanussi <tzanussi@gmail.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/relay.txt |  10 +++
 include/linux/relay.h               |   5 ++
 kernel/relay.c                      | 170 ++++++++++++++++++++++++++++++------
 3 files changed, 156 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt
index 094f2d2f38b1..510b722667ac 100644
--- a/Documentation/filesystems/relay.txt
+++ b/Documentation/filesystems/relay.txt
@@ -294,6 +294,16 @@ user-defined data with a channel, and is immediately available
 (including in create_buf_file()) via chan->private_data or
 buf->chan->private_data.
 
+Buffer-only channels
+--------------------
+
+These channels have no files associated and can be created with
+relay_open(NULL, NULL, ...). Such channels are useful in scenarios such
+as when doing early tracing in the kernel, before the VFS is up. In these
+cases, one may open a buffer-only channel and then call
+relay_late_setup_files() when the kernel is ready to handle files,
+to expose the buffered data to the userspace.
+
 Channel 'modes'
 ---------------
 
diff --git a/include/linux/relay.h b/include/linux/relay.h
index 6cd8c4425fc7..953fc055e875 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -48,6 +48,7 @@ struct rchan_buf
 	size_t *padding;		/* padding counts per sub-buffer */
 	size_t prev_padding;		/* temporary variable */
 	size_t bytes_consumed;		/* bytes consumed in cur read subbuf */
+	size_t early_bytes;		/* bytes consumed before VFS inited */
 	unsigned int cpu;		/* this buf's cpu */
 } ____cacheline_aligned;
 
@@ -68,6 +69,7 @@ struct rchan
 	int is_global;			/* One global buffer ? */
 	struct list_head list;		/* for channel list */
 	struct dentry *parent;		/* parent dentry passed to open */
+	int has_base_filename;		/* has a filename associated? */
 	char base_filename[NAME_MAX];	/* saved base filename */
 };
 
@@ -169,6 +171,9 @@ struct rchan *relay_open(const char *base_filename,
 			 size_t n_subbufs,
 			 struct rchan_callbacks *cb,
 			 void *private_data);
+extern int relay_late_setup_files(struct rchan *chan,
+				  const char *base_filename,
+				  struct dentry *parent);
 extern void relay_close(struct rchan *chan);
 extern void relay_flush(struct rchan *chan);
 extern void relay_subbufs_consumed(struct rchan *chan,
diff --git a/kernel/relay.c b/kernel/relay.c
index 7de644cdec43..04006ef970b8 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -407,6 +407,35 @@ void relay_reset(struct rchan *chan)
 }
 EXPORT_SYMBOL_GPL(relay_reset);
 
+static inline void relay_set_buf_dentry(struct rchan_buf *buf,
+					struct dentry *dentry)
+{
+	buf->dentry = dentry;
+	buf->dentry->d_inode->i_size = buf->early_bytes;
+}
+
+static struct dentry *relay_create_buf_file(struct rchan *chan,
+					    struct rchan_buf *buf,
+					    unsigned int cpu)
+{
+	struct dentry *dentry;
+	char *tmpname;
+
+	tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL);
+	if (!tmpname)
+		return NULL;
+	snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu);
+
+	/* Create file in fs */
+	dentry = chan->cb->create_buf_file(tmpname, chan->parent,
+					   S_IRUSR, buf,
+					   &chan->is_global);
+
+	kfree(tmpname);
+
+	return dentry;
+}
+
 /*
  *	relay_open_buf - create a new relay channel buffer
  *
@@ -416,45 +445,34 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu)
 {
  	struct rchan_buf *buf = NULL;
 	struct dentry *dentry;
- 	char *tmpname;
 
  	if (chan->is_global)
 		return chan->buf[0];
 
-	tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL);
- 	if (!tmpname)
- 		goto end;
- 	snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu);
-
 	buf = relay_create_buf(chan);
 	if (!buf)
- 		goto free_name;
+		return NULL;
+
+	if (chan->has_base_filename) {
+		dentry = relay_create_buf_file(chan, buf, cpu);
+		if (!dentry)
+			goto free_buf;
+		relay_set_buf_dentry(buf, dentry);
+	}
 
  	buf->cpu = cpu;
  	__relay_reset(buf, 1);
 
-	/* Create file in fs */
- 	dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR,
- 					   buf, &chan->is_global);
- 	if (!dentry)
- 		goto free_buf;
-
-	buf->dentry = dentry;
-
  	if(chan->is_global) {
  		chan->buf[0] = buf;
  		buf->cpu = 0;
   	}
 
- 	goto free_name;
+	return buf;
 
 free_buf:
  	relay_destroy_buf(buf);
- 	buf = NULL;
-free_name:
- 	kfree(tmpname);
-end:
-	return buf;
+	return NULL;
 }
 
 /**
@@ -537,8 +555,8 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb,
 
 /**
  *	relay_open - create a new relay channel
- *	@base_filename: base name of files to create
- *	@parent: dentry of parent directory, %NULL for root directory
+ *	@base_filename: base name of files to create, %NULL for buffering only
+ *	@parent: dentry of parent directory, %NULL for root directory or buffer
  *	@subbuf_size: size of sub-buffers
  *	@n_subbufs: number of sub-buffers
  *	@cb: client callback functions
@@ -560,8 +578,6 @@ struct rchan *relay_open(const char *base_filename,
 {
 	unsigned int i;
 	struct rchan *chan;
-	if (!base_filename)
-		return NULL;
 
 	if (!(subbuf_size && n_subbufs))
 		return NULL;
@@ -576,7 +592,10 @@ struct rchan *relay_open(const char *base_filename,
 	chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs);
 	chan->parent = parent;
 	chan->private_data = private_data;
-	strlcpy(chan->base_filename, base_filename, NAME_MAX);
+	if (base_filename) {
+		chan->has_base_filename = 1;
+		strlcpy(chan->base_filename, base_filename, NAME_MAX);
+	}
 	setup_callbacks(chan, cb);
 	kref_init(&chan->kref);
 
@@ -604,6 +623,94 @@ free_bufs:
 }
 EXPORT_SYMBOL_GPL(relay_open);
 
+struct rchan_percpu_buf_dispatcher {
+	struct rchan_buf *buf;
+	struct dentry *dentry;
+};
+
+/* Called in atomic context. */
+static void __relay_set_buf_dentry(void *info)
+{
+	struct rchan_percpu_buf_dispatcher *p = info;
+
+	relay_set_buf_dentry(p->buf, p->dentry);
+}
+
+/**
+ *	relay_late_setup_files - triggers file creation
+ *	@chan: channel to operate on
+ *	@base_filename: base name of files to create
+ *	@parent: dentry of parent directory, %NULL for root directory
+ *
+ *	Returns 0 if successful, non-zero otherwise.
+ *
+ *	Use to setup files for a previously buffer-only channel.
+ *	Useful to do early tracing in kernel, before VFS is up, for example.
+ */
+int relay_late_setup_files(struct rchan *chan,
+			   const char *base_filename,
+			   struct dentry *parent)
+{
+	int err = 0;
+	unsigned int i, curr_cpu;
+	unsigned long flags;
+	struct dentry *dentry;
+	struct rchan_percpu_buf_dispatcher disp;
+
+	if (!chan || !base_filename)
+		return -EINVAL;
+
+	strlcpy(chan->base_filename, base_filename, NAME_MAX);
+
+	mutex_lock(&relay_channels_mutex);
+	/* Is chan already set up? */
+	if (unlikely(chan->has_base_filename))
+		return -EEXIST;
+	chan->has_base_filename = 1;
+	chan->parent = parent;
+	curr_cpu = get_cpu();
+	/*
+	 * The CPU hotplug notifier ran before us and created buffers with
+	 * no files associated. So it's safe to call relay_setup_buf_file()
+	 * on all currently online CPUs.
+	 */
+	for_each_online_cpu(i) {
+		if (unlikely(!chan->buf[i])) {
+			printk(KERN_ERR "relay_late_setup_files: CPU %u "
+					"has no buffer, it must have!\n", i);
+			BUG();
+			err = -EINVAL;
+			break;
+		}
+
+		dentry = relay_create_buf_file(chan, chan->buf[i], i);
+		if (unlikely(!dentry)) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (curr_cpu == i) {
+			local_irq_save(flags);
+			relay_set_buf_dentry(chan->buf[i], dentry);
+			local_irq_restore(flags);
+		} else {
+			disp.buf = chan->buf[i];
+			disp.dentry = dentry;
+			smp_mb();
+			/* relay_channels_mutex must be held, so wait. */
+			err = smp_call_function_single(i,
+						       __relay_set_buf_dentry,
+						       &disp, 1);
+		}
+		if (unlikely(err))
+			break;
+	}
+	put_cpu();
+	mutex_unlock(&relay_channels_mutex);
+
+	return err;
+}
+
 /**
  *	relay_switch_subbuf - switch to a new sub-buffer
  *	@buf: channel buffer
@@ -627,8 +734,13 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
 		old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
 		buf->padding[old_subbuf] = buf->prev_padding;
 		buf->subbufs_produced++;
-		buf->dentry->d_inode->i_size += buf->chan->subbuf_size -
-			buf->padding[old_subbuf];
+		if (buf->dentry)
+			buf->dentry->d_inode->i_size +=
+				buf->chan->subbuf_size -
+				buf->padding[old_subbuf];
+		else
+			buf->early_bytes += buf->chan->subbuf_size -
+					    buf->padding[old_subbuf];
 		smp_mb();
 		if (waitqueue_active(&buf->read_wait))
 			/*
@@ -1237,4 +1349,4 @@ static __init int relay_init(void)
 	return 0;
 }
 
-module_init(relay_init);
+early_initcall(relay_init);
-- 
cgit v1.2.3


From 080ccd4573607a930367c2128fc709814b2ade5d Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Fri, 25 Jul 2008 19:45:13 -0700
Subject: include/linux/aio.h: removed duplicated include

Removed duplicated include <linux/uio.h> in include/linux/aio.h

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/aio.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/aio.h b/include/linux/aio.h
index b51ddd28444e..09b276c35227 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -7,7 +7,6 @@
 #include <linux/uio.h>
 
 #include <asm/atomic.h>
-#include <linux/uio.h>
 
 #define AIO_MAXSEGS		4
 #define AIO_KIOGRP_NR_ATOMIC	8
-- 
cgit v1.2.3


From 21cc199baa815d7b3f1ace4be20b9558cbddc00f Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Fri, 25 Jul 2008 19:45:22 -0700
Subject: mm: introduce get_user_pages_fast

Introduce a new get_user_pages_fast mm API, which is basically a
get_user_pages with a less general API (but still tends to be suited to
the common case):

- task and mm are always current and current->mm
- force is always 0
- pages is always non-NULL
- don't pass back vmas

This restricted API can be implemented in a much more scalable way on many
architectures when the ptes are present, by walking the page tables
locklessly (no mmap_sem or page table locks).  When the ptes are not
populated, get_user_pages_fast() could be slower.

This is implemented locklessly on x86, and used in some key direct IO call
sites, in later patches, which provides nearly 10% performance improvement
on a threaded database workload.

Lots of other code could use this too, depending on use cases (eg.  grep
drivers/).  And it might inspire some new and clever ways to use it.

[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d87a5a5fe87d..f3fd70d6029f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -833,6 +833,39 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
 			  struct vm_area_struct **pprev, unsigned long start,
 			  unsigned long end, unsigned long newflags);
 
+#ifdef CONFIG_HAVE_GET_USER_PAGES_FAST
+/*
+ * get_user_pages_fast provides equivalent functionality to get_user_pages,
+ * operating on current and current->mm (force=0 and doesn't return any vmas).
+ *
+ * get_user_pages_fast may take mmap_sem and page tables, so no assumptions
+ * can be made about locking. get_user_pages_fast is to be implemented in a
+ * way that is advantageous (vs get_user_pages()) when the user memory area is
+ * already faulted in and present in ptes. However if the pages have to be
+ * faulted in, it may turn out to be slightly slower).
+ */
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			struct page **pages);
+
+#else
+/*
+ * Should probably be moved to asm-generic, and architectures can include it if
+ * they don't implement their own get_user_pages_fast.
+ */
+#define get_user_pages_fast(start, nr_pages, write, pages)	\
+({								\
+	struct mm_struct *mm = current->mm;			\
+	int ret;						\
+								\
+	down_read(&mm->mmap_sem);				\
+	ret = get_user_pages(current, mm, start, nr_pages,	\
+					write, 0, pages, NULL);	\
+	up_read(&mm->mmap_sem);					\
+								\
+	ret;							\
+})
+#endif
+
 /*
  * A callback you can register to apply pressure to ageable caches.
  *
-- 
cgit v1.2.3


From 47feff2c8eefe85099f87c43d3096855f0085ca0 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Fri, 25 Jul 2008 19:45:29 -0700
Subject: radix-tree: add gang_lookup_slot, gang_lookup_slot_tag

Introduce gang_lookup_slot() and gang_lookup_slot_tag() functions, which
are used by lockless pagecache.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h |  12 ++-
 lib/radix-tree.c           | 178 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 166 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index b8ce2b444bb5..a916c6660dfa 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -99,12 +99,15 @@ do {									\
  *
  * The notable exceptions to this rule are the following functions:
  * radix_tree_lookup
+ * radix_tree_lookup_slot
  * radix_tree_tag_get
  * radix_tree_gang_lookup
+ * radix_tree_gang_lookup_slot
  * radix_tree_gang_lookup_tag
+ * radix_tree_gang_lookup_tag_slot
  * radix_tree_tagged
  *
- * The first 4 functions are able to be called locklessly, using RCU. The
+ * The first 7 functions are able to be called locklessly, using RCU. The
  * caller must ensure calls to these functions are made within rcu_read_lock()
  * regions. Other readers (lock-free or otherwise) and modifications may be
  * running concurrently.
@@ -159,6 +162,9 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
 unsigned int
 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 			unsigned long first_index, unsigned int max_items);
+unsigned int
+radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
+			unsigned long first_index, unsigned int max_items);
 unsigned long radix_tree_next_hole(struct radix_tree_root *root,
 				unsigned long index, unsigned long max_scan);
 int radix_tree_preload(gfp_t gfp_mask);
@@ -173,6 +179,10 @@ unsigned int
 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 		unsigned long first_index, unsigned int max_items,
 		unsigned int tag);
+unsigned int
+radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
+		unsigned long first_index, unsigned int max_items,
+		unsigned int tag);
 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
 
 static inline void radix_tree_preload_end(void)
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 56ec21a7f73d..9c4f1ffa2864 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -359,18 +359,17 @@ EXPORT_SYMBOL(radix_tree_insert);
  *	Returns:  the slot corresponding to the position @index in the
  *	radix tree @root. This is useful for update-if-exists operations.
  *
- *	This function cannot be called under rcu_read_lock, it must be
- *	excluded from writers, as must the returned slot for subsequent
- *	use by radix_tree_deref_slot() and radix_tree_replace slot.
- *	Caller must hold tree write locked across slot lookup and
- *	replace.
+ *	This function can be called under rcu_read_lock iff the slot is not
+ *	modified by radix_tree_replace_slot, otherwise it must be called
+ *	exclusive from other writers. Any dereference of the slot must be done
+ *	using radix_tree_deref_slot.
  */
 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
 {
 	unsigned int height, shift;
 	struct radix_tree_node *node, **slot;
 
-	node = root->rnode;
+	node = rcu_dereference(root->rnode);
 	if (node == NULL)
 		return NULL;
 
@@ -390,7 +389,7 @@ void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
 	do {
 		slot = (struct radix_tree_node **)
 			(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
-		node = *slot;
+		node = rcu_dereference(*slot);
 		if (node == NULL)
 			return NULL;
 
@@ -667,7 +666,7 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root,
 EXPORT_SYMBOL(radix_tree_next_hole);
 
 static unsigned int
-__lookup(struct radix_tree_node *slot, void **results, unsigned long index,
+__lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
 	unsigned int max_items, unsigned long *next_index)
 {
 	unsigned int nr_found = 0;
@@ -701,11 +700,9 @@ __lookup(struct radix_tree_node *slot, void **results, unsigned long index,
 
 	/* Bottom level: grab some items */
 	for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
-		struct radix_tree_node *node;
 		index++;
-		node = slot->slots[i];
-		if (node) {
-			results[nr_found++] = rcu_dereference(node);
+		if (slot->slots[i]) {
+			results[nr_found++] = &(slot->slots[i]);
 			if (nr_found == max_items)
 				goto out;
 		}
@@ -759,13 +756,22 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 
 	ret = 0;
 	while (ret < max_items) {
-		unsigned int nr_found;
+		unsigned int nr_found, slots_found, i;
 		unsigned long next_index;	/* Index of next search */
 
 		if (cur_index > max_index)
 			break;
-		nr_found = __lookup(node, results + ret, cur_index,
+		slots_found = __lookup(node, (void ***)results + ret, cur_index,
 					max_items - ret, &next_index);
+		nr_found = 0;
+		for (i = 0; i < slots_found; i++) {
+			struct radix_tree_node *slot;
+			slot = *(((void ***)results)[ret + i]);
+			if (!slot)
+				continue;
+			results[ret + nr_found] = rcu_dereference(slot);
+			nr_found++;
+		}
 		ret += nr_found;
 		if (next_index == 0)
 			break;
@@ -776,12 +782,71 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup);
 
+/**
+ *	radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
+ *	@root:		radix tree root
+ *	@results:	where the results of the lookup are placed
+ *	@first_index:	start the lookup from this key
+ *	@max_items:	place up to this many items at *results
+ *
+ *	Performs an index-ascending scan of the tree for present items.  Places
+ *	their slots at *@results and returns the number of items which were
+ *	placed at *@results.
+ *
+ *	The implementation is naive.
+ *
+ *	Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
+ *	be dereferenced with radix_tree_deref_slot, and if using only RCU
+ *	protection, radix_tree_deref_slot may fail requiring a retry.
+ */
+unsigned int
+radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
+			unsigned long first_index, unsigned int max_items)
+{
+	unsigned long max_index;
+	struct radix_tree_node *node;
+	unsigned long cur_index = first_index;
+	unsigned int ret;
+
+	node = rcu_dereference(root->rnode);
+	if (!node)
+		return 0;
+
+	if (!radix_tree_is_indirect_ptr(node)) {
+		if (first_index > 0)
+			return 0;
+		results[0] = (void **)&root->rnode;
+		return 1;
+	}
+	node = radix_tree_indirect_to_ptr(node);
+
+	max_index = radix_tree_maxindex(node->height);
+
+	ret = 0;
+	while (ret < max_items) {
+		unsigned int slots_found;
+		unsigned long next_index;	/* Index of next search */
+
+		if (cur_index > max_index)
+			break;
+		slots_found = __lookup(node, results + ret, cur_index,
+					max_items - ret, &next_index);
+		ret += slots_found;
+		if (next_index == 0)
+			break;
+		cur_index = next_index;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
+
 /*
  * FIXME: the two tag_get()s here should use find_next_bit() instead of
  * open-coding the search.
  */
 static unsigned int
-__lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index,
+__lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
 	unsigned int max_items, unsigned long *next_index, unsigned int tag)
 {
 	unsigned int nr_found = 0;
@@ -811,11 +876,9 @@ __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index,
 			unsigned long j = index & RADIX_TREE_MAP_MASK;
 
 			for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
-				struct radix_tree_node *node;
 				index++;
 				if (!tag_get(slot, tag, j))
 					continue;
-				node = slot->slots[j];
 				/*
 				 * Even though the tag was found set, we need to
 				 * recheck that we have a non-NULL node, because
@@ -826,9 +889,8 @@ __lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index,
 				 * lookup ->slots[x] without a lock (ie. can't
 				 * rely on its value remaining the same).
 				 */
-				if (node) {
-					node = rcu_dereference(node);
-					results[nr_found++] = node;
+				if (slot->slots[j]) {
+					results[nr_found++] = &(slot->slots[j]);
 					if (nr_found == max_items)
 						goto out;
 				}
@@ -887,13 +949,22 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 
 	ret = 0;
 	while (ret < max_items) {
-		unsigned int nr_found;
+		unsigned int nr_found, slots_found, i;
 		unsigned long next_index;	/* Index of next search */
 
 		if (cur_index > max_index)
 			break;
-		nr_found = __lookup_tag(node, results + ret, cur_index,
-					max_items - ret, &next_index, tag);
+		slots_found = __lookup_tag(node, (void ***)results + ret,
+				cur_index, max_items - ret, &next_index, tag);
+		nr_found = 0;
+		for (i = 0; i < slots_found; i++) {
+			struct radix_tree_node *slot;
+			slot = *(((void ***)results)[ret + i]);
+			if (!slot)
+				continue;
+			results[ret + nr_found] = rcu_dereference(slot);
+			nr_found++;
+		}
 		ret += nr_found;
 		if (next_index == 0)
 			break;
@@ -904,6 +975,67 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
 
+/**
+ *	radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
+ *					  radix tree based on a tag
+ *	@root:		radix tree root
+ *	@results:	where the results of the lookup are placed
+ *	@first_index:	start the lookup from this key
+ *	@max_items:	place up to this many items at *results
+ *	@tag:		the tag index (< RADIX_TREE_MAX_TAGS)
+ *
+ *	Performs an index-ascending scan of the tree for present items which
+ *	have the tag indexed by @tag set.  Places the slots at *@results and
+ *	returns the number of slots which were placed at *@results.
+ */
+unsigned int
+radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
+		unsigned long first_index, unsigned int max_items,
+		unsigned int tag)
+{
+	struct radix_tree_node *node;
+	unsigned long max_index;
+	unsigned long cur_index = first_index;
+	unsigned int ret;
+
+	/* check the root's tag bit */
+	if (!root_tag_get(root, tag))
+		return 0;
+
+	node = rcu_dereference(root->rnode);
+	if (!node)
+		return 0;
+
+	if (!radix_tree_is_indirect_ptr(node)) {
+		if (first_index > 0)
+			return 0;
+		results[0] = (void **)&root->rnode;
+		return 1;
+	}
+	node = radix_tree_indirect_to_ptr(node);
+
+	max_index = radix_tree_maxindex(node->height);
+
+	ret = 0;
+	while (ret < max_items) {
+		unsigned int slots_found;
+		unsigned long next_index;	/* Index of next search */
+
+		if (cur_index > max_index)
+			break;
+		slots_found = __lookup_tag(node, results + ret,
+				cur_index, max_items - ret, &next_index, tag);
+		ret += slots_found;
+		if (next_index == 0)
+			break;
+		cur_index = next_index;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
+
+
 /**
  *	radix_tree_shrink    -    shrink height of a radix tree to minimal
  *	@root		radix tree root
-- 
cgit v1.2.3


From e286781d5f2e9c846e012a39653a166e9d31777d Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Fri, 25 Jul 2008 19:45:30 -0700
Subject: mm: speculative page references

If we can be sure that elevating the page_count on a pagecache page will
pin it, we can speculatively run this operation, and subsequently check to
see if we hit the right page rather than relying on holding a lock or
otherwise pinning a reference to the page.

This can be done if get_page/put_page behaves consistently throughout the
whole tree (ie.  if we "get" the page after it has been used for something
else, we must be able to free it with a put_page).

Actually, there is a period where the count behaves differently: when the
page is free or if it is a constituent page of a compound page.  We need
an atomic_inc_not_zero operation to ensure we don't try to grab the page
in either case.

This patch introduces the core locking protocol to the pagecache (ie.
adds page_cache_get_speculative, and tweaks some update-side code to make
it work).

Thanks to Hugh for pointing out an improvement to the algorithm setting
page_count to zero when we have control of all references, in order to
hold off speculative getters.

[kamezawa.hiroyu@jp.fujitsu.com: fix migration_entry_wait()]
[hugh@veritas.com: fix add_to_page_cache]
[akpm@linux-foundation.org: repair a comment]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jeff Garzik <jeff@garzik.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/net/cassini.c   |  12 ++++++
 include/linux/pagemap.h | 111 +++++++++++++++++++++++++++++++++++++++++++++++-
 mm/filemap.c            |  32 ++++++++------
 mm/migrate.c            |  20 ++++++++-
 mm/shmem.c              |   6 +--
 mm/swap_state.c         |  17 +++++---
 mm/vmscan.c             |  74 +++++++++++++++++++++++---------
 7 files changed, 227 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 83768df27806..f1936d51b458 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -576,6 +576,18 @@ static void cas_spare_recover(struct cas *cp, const gfp_t flags)
 	list_for_each_safe(elem, tmp, &list) {
 		cas_page_t *page = list_entry(elem, cas_page_t, list);
 
+		/*
+		 * With the lockless pagecache, cassini buffering scheme gets
+		 * slightly less accurate: we might find that a page has an
+		 * elevated reference count here, due to a speculative ref,
+		 * and skip it as in-use. Ideally we would be able to reclaim
+		 * it. However this would be such a rare case, it doesn't
+		 * matter too much as we should pick it up the next time round.
+		 *
+		 * Importantly, if we find that the page has a refcount of 1
+		 * here (our refcount), then we know it is definitely not inuse
+		 * so we can reuse it.
+		 */
 		if (page_count(page->buffer) > 1)
 			continue;
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ee1ec2c7723c..a81d81890422 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -12,6 +12,7 @@
 #include <asm/uaccess.h>
 #include <linux/gfp.h>
 #include <linux/bitops.h>
+#include <linux/hardirq.h> /* for in_interrupt() */
 
 /*
  * Bits in mapping->flags.  The lower __GFP_BITS_SHIFT bits are the page
@@ -62,6 +63,98 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 #define page_cache_release(page)	put_page(page)
 void release_pages(struct page **pages, int nr, int cold);
 
+/*
+ * speculatively take a reference to a page.
+ * If the page is free (_count == 0), then _count is untouched, and 0
+ * is returned. Otherwise, _count is incremented by 1 and 1 is returned.
+ *
+ * This function must be called inside the same rcu_read_lock() section as has
+ * been used to lookup the page in the pagecache radix-tree (or page table):
+ * this allows allocators to use a synchronize_rcu() to stabilize _count.
+ *
+ * Unless an RCU grace period has passed, the count of all pages coming out
+ * of the allocator must be considered unstable. page_count may return higher
+ * than expected, and put_page must be able to do the right thing when the
+ * page has been finished with, no matter what it is subsequently allocated
+ * for (because put_page is what is used here to drop an invalid speculative
+ * reference).
+ *
+ * This is the interesting part of the lockless pagecache (and lockless
+ * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
+ * has the following pattern:
+ * 1. find page in radix tree
+ * 2. conditionally increment refcount
+ * 3. check the page is still in pagecache (if no, goto 1)
+ *
+ * Remove-side that cares about stability of _count (eg. reclaim) has the
+ * following (with tree_lock held for write):
+ * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
+ * B. remove page from pagecache
+ * C. free the page
+ *
+ * There are 2 critical interleavings that matter:
+ * - 2 runs before A: in this case, A sees elevated refcount and bails out
+ * - A runs before 2: in this case, 2 sees zero refcount and retries;
+ *   subsequently, B will complete and 1 will find no page, causing the
+ *   lookup to return NULL.
+ *
+ * It is possible that between 1 and 2, the page is removed then the exact same
+ * page is inserted into the same position in pagecache. That's OK: the
+ * old find_get_page using tree_lock could equally have run before or after
+ * such a re-insertion, depending on order that locks are granted.
+ *
+ * Lookups racing against pagecache insertion isn't a big problem: either 1
+ * will find the page or it will not. Likewise, the old find_get_page could run
+ * either before the insertion or afterwards, depending on timing.
+ */
+static inline int page_cache_get_speculative(struct page *page)
+{
+	VM_BUG_ON(in_interrupt());
+
+#if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU)
+# ifdef CONFIG_PREEMPT
+	VM_BUG_ON(!in_atomic());
+# endif
+	/*
+	 * Preempt must be disabled here - we rely on rcu_read_lock doing
+	 * this for us.
+	 *
+	 * Pagecache won't be truncated from interrupt context, so if we have
+	 * found a page in the radix tree here, we have pinned its refcount by
+	 * disabling preempt, and hence no need for the "speculative get" that
+	 * SMP requires.
+	 */
+	VM_BUG_ON(page_count(page) == 0);
+	atomic_inc(&page->_count);
+
+#else
+	if (unlikely(!get_page_unless_zero(page))) {
+		/*
+		 * Either the page has been freed, or will be freed.
+		 * In either case, retry here and the caller should
+		 * do the right thing (see comments above).
+		 */
+		return 0;
+	}
+#endif
+	VM_BUG_ON(PageTail(page));
+
+	return 1;
+}
+
+static inline int page_freeze_refs(struct page *page, int count)
+{
+	return likely(atomic_cmpxchg(&page->_count, count, 0) == count);
+}
+
+static inline void page_unfreeze_refs(struct page *page, int count)
+{
+	VM_BUG_ON(page_count(page) != 0);
+	VM_BUG_ON(count == 0);
+
+	atomic_set(&page->_count, count);
+}
+
 #ifdef CONFIG_NUMA
 extern struct page *__page_cache_alloc(gfp_t gfp);
 #else
@@ -133,13 +226,29 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
 	return read_cache_page(mapping, index, filler, data);
 }
 
-int add_to_page_cache(struct page *page, struct address_space *mapping,
+int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
 extern void remove_from_page_cache(struct page *page);
 extern void __remove_from_page_cache(struct page *page);
 
+/*
+ * Like add_to_page_cache_locked, but used to add newly allocated pages:
+ * the page is new, so we can just run SetPageLocked() against it.
+ */
+static inline int add_to_page_cache(struct page *page,
+		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
+{
+	int error;
+
+	SetPageLocked(page);
+	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
+	if (unlikely(error))
+		ClearPageLocked(page);
+	return error;
+}
+
 /*
  * Return byte-offset into filesystem object for page.
  */
diff --git a/mm/filemap.c b/mm/filemap.c
index 2d3ec1ffc66e..4e182a9a14c0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -442,39 +442,43 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 }
 
 /**
- * add_to_page_cache - add newly allocated pagecache pages
+ * add_to_page_cache_locked - add a locked page to the pagecache
  * @page:	page to add
  * @mapping:	the page's address_space
  * @offset:	page index
  * @gfp_mask:	page allocation mode
  *
- * This function is used to add newly allocated pagecache pages;
- * the page is new, so we can just run SetPageLocked() against it.
- * The other page state flags were set by rmqueue().
- *
+ * This function is used to add a page to the pagecache. It must be locked.
  * This function does not add the page to the LRU.  The caller must do that.
  */
-int add_to_page_cache(struct page *page, struct address_space *mapping,
+int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		pgoff_t offset, gfp_t gfp_mask)
 {
-	int error = mem_cgroup_cache_charge(page, current->mm,
+	int error;
+
+	VM_BUG_ON(!PageLocked(page));
+
+	error = mem_cgroup_cache_charge(page, current->mm,
 					gfp_mask & ~__GFP_HIGHMEM);
 	if (error)
 		goto out;
 
 	error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
 	if (error == 0) {
+		page_cache_get(page);
+		page->mapping = mapping;
+		page->index = offset;
+
 		write_lock_irq(&mapping->tree_lock);
 		error = radix_tree_insert(&mapping->page_tree, offset, page);
-		if (!error) {
-			page_cache_get(page);
-			SetPageLocked(page);
-			page->mapping = mapping;
-			page->index = offset;
+		if (likely(!error)) {
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
-		} else
+		} else {
+			page->mapping = NULL;
 			mem_cgroup_uncharge_cache_page(page);
+			page_cache_release(page);
+		}
 
 		write_unlock_irq(&mapping->tree_lock);
 		radix_tree_preload_end();
@@ -483,7 +487,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
 out:
 	return error;
 }
-EXPORT_SYMBOL(add_to_page_cache);
+EXPORT_SYMBOL(add_to_page_cache_locked);
 
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 				pgoff_t offset, gfp_t gfp_mask)
diff --git a/mm/migrate.c b/mm/migrate.c
index d8c65a65c61d..3ca6392e82cc 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -285,7 +285,15 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 
 	page = migration_entry_to_page(entry);
 
-	get_page(page);
+	/*
+	 * Once radix-tree replacement of page migration started, page_count
+	 * *must* be zero. And, we don't want to call wait_on_page_locked()
+	 * against a page without get_page().
+	 * So, we use get_page_unless_zero(), here. Even failed, page fault
+	 * will occur again.
+	 */
+	if (!get_page_unless_zero(page))
+		goto out;
 	pte_unmap_unlock(ptep, ptl);
 	wait_on_page_locked(page);
 	put_page(page);
@@ -305,6 +313,7 @@ out:
 static int migrate_page_move_mapping(struct address_space *mapping,
 		struct page *newpage, struct page *page)
 {
+	int expected_count;
 	void **pslot;
 
 	if (!mapping) {
@@ -319,12 +328,18 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	pslot = radix_tree_lookup_slot(&mapping->page_tree,
  					page_index(page));
 
-	if (page_count(page) != 2 + !!PagePrivate(page) ||
+	expected_count = 2 + !!PagePrivate(page);
+	if (page_count(page) != expected_count ||
 			(struct page *)radix_tree_deref_slot(pslot) != page) {
 		write_unlock_irq(&mapping->tree_lock);
 		return -EAGAIN;
 	}
 
+	if (!page_freeze_refs(page, expected_count)) {
+		write_unlock_irq(&mapping->tree_lock);
+		return -EAGAIN;
+	}
+
 	/*
 	 * Now we know that no one else is looking at the page.
 	 */
@@ -338,6 +353,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 
 	radix_tree_replace_slot(pslot, newpage);
 
+	page_unfreeze_refs(page, expected_count);
 	/*
 	 * Drop cache reference from old page.
 	 * We know this isn't the last reference.
diff --git a/mm/shmem.c b/mm/shmem.c
index f92fea94d037..1089092aecaf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -936,7 +936,7 @@ found:
 	spin_lock(&info->lock);
 	ptr = shmem_swp_entry(info, idx, NULL);
 	if (ptr && ptr->val == entry.val) {
-		error = add_to_page_cache(page, inode->i_mapping,
+		error = add_to_page_cache_locked(page, inode->i_mapping,
 						idx, GFP_NOWAIT);
 		/* does mem_cgroup_uncharge_cache_page on error */
 	} else	/* we must compensate for our precharge above */
@@ -1301,8 +1301,8 @@ repeat:
 			SetPageUptodate(filepage);
 			set_page_dirty(filepage);
 			swap_free(swap);
-		} else if (!(error = add_to_page_cache(
-				swappage, mapping, idx, GFP_NOWAIT))) {
+		} else if (!(error = add_to_page_cache_locked(swappage, mapping,
+					idx, GFP_NOWAIT))) {
 			info->flags |= SHMEM_PAGEIN;
 			shmem_swp_set(info, entry, 0);
 			shmem_swp_unmap(entry);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index d8aadaf2a0ba..3e3381d6c7ee 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -64,7 +64,7 @@ void show_swap_cache_info(void)
 }
 
 /*
- * add_to_swap_cache resembles add_to_page_cache on swapper_space,
+ * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
  * but sets SwapCache flag and private instead of mapping and index.
  */
 int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
@@ -76,19 +76,26 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 	BUG_ON(PagePrivate(page));
 	error = radix_tree_preload(gfp_mask);
 	if (!error) {
+		page_cache_get(page);
+		SetPageSwapCache(page);
+		set_page_private(page, entry.val);
+
 		write_lock_irq(&swapper_space.tree_lock);
 		error = radix_tree_insert(&swapper_space.page_tree,
 						entry.val, page);
-		if (!error) {
-			page_cache_get(page);
-			SetPageSwapCache(page);
-			set_page_private(page, entry.val);
+		if (likely(!error)) {
 			total_swapcache_pages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
 			INC_CACHE_INFO(add_total);
 		}
 		write_unlock_irq(&swapper_space.tree_lock);
 		radix_tree_preload_end();
+
+		if (unlikely(error)) {
+			set_page_private(page, 0UL);
+			ClearPageSwapCache(page);
+			page_cache_release(page);
+		}
 	}
 	return error;
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 26672c6cd3ce..0075eac1cd04 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -391,12 +391,10 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 }
 
 /*
- * Attempt to detach a locked page from its ->mapping.  If it is dirty or if
- * someone else has a ref on the page, abort and return 0.  If it was
- * successfully detached, return 1.  Assumes the caller has a single ref on
- * this page.
+ * Same as remove_mapping, but if the page is removed from the mapping, it
+ * gets returned with a refcount of 0.
  */
-int remove_mapping(struct address_space *mapping, struct page *page)
+static int __remove_mapping(struct address_space *mapping, struct page *page)
 {
 	BUG_ON(!PageLocked(page));
 	BUG_ON(mapping != page_mapping(page));
@@ -427,24 +425,24 @@ int remove_mapping(struct address_space *mapping, struct page *page)
 	 * Note that if SetPageDirty is always performed via set_page_dirty,
 	 * and thus under tree_lock, then this ordering is not required.
 	 */
-	if (unlikely(page_count(page) != 2))
+	if (!page_freeze_refs(page, 2))
 		goto cannot_free;
-	smp_rmb();
-	if (unlikely(PageDirty(page)))
+	/* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
+	if (unlikely(PageDirty(page))) {
+		page_unfreeze_refs(page, 2);
 		goto cannot_free;
+	}
 
 	if (PageSwapCache(page)) {
 		swp_entry_t swap = { .val = page_private(page) };
 		__delete_from_swap_cache(page);
 		write_unlock_irq(&mapping->tree_lock);
 		swap_free(swap);
-		__put_page(page);	/* The pagecache ref */
-		return 1;
+	} else {
+		__remove_from_page_cache(page);
+		write_unlock_irq(&mapping->tree_lock);
 	}
 
-	__remove_from_page_cache(page);
-	write_unlock_irq(&mapping->tree_lock);
-	__put_page(page);
 	return 1;
 
 cannot_free:
@@ -452,6 +450,26 @@ cannot_free:
 	return 0;
 }
 
+/*
+ * Attempt to detach a locked page from its ->mapping.  If it is dirty or if
+ * someone else has a ref on the page, abort and return 0.  If it was
+ * successfully detached, return 1.  Assumes the caller has a single ref on
+ * this page.
+ */
+int remove_mapping(struct address_space *mapping, struct page *page)
+{
+	if (__remove_mapping(mapping, page)) {
+		/*
+		 * Unfreezing the refcount with 1 rather than 2 effectively
+		 * drops the pagecache ref for us without requiring another
+		 * atomic operation.
+		 */
+		page_unfreeze_refs(page, 1);
+		return 1;
+	}
+	return 0;
+}
+
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -598,18 +616,34 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		if (PagePrivate(page)) {
 			if (!try_to_release_page(page, sc->gfp_mask))
 				goto activate_locked;
-			if (!mapping && page_count(page) == 1)
-				goto free_it;
+			if (!mapping && page_count(page) == 1) {
+				unlock_page(page);
+				if (put_page_testzero(page))
+					goto free_it;
+				else {
+					/*
+					 * rare race with speculative reference.
+					 * the speculative reference will free
+					 * this page shortly, so we may
+					 * increment nr_reclaimed here (and
+					 * leave it off the LRU).
+					 */
+					nr_reclaimed++;
+					continue;
+				}
+			}
 		}
 
-		if (!mapping || !remove_mapping(mapping, page))
+		if (!mapping || !__remove_mapping(mapping, page))
 			goto keep_locked;
 
-free_it:
 		unlock_page(page);
+free_it:
 		nr_reclaimed++;
-		if (!pagevec_add(&freed_pvec, page))
-			__pagevec_release_nonlru(&freed_pvec);
+		if (!pagevec_add(&freed_pvec, page)) {
+			__pagevec_free(&freed_pvec);
+			pagevec_reinit(&freed_pvec);
+		}
 		continue;
 
 activate_locked:
@@ -623,7 +657,7 @@ keep:
 	}
 	list_splice(&ret_pages, page_list);
 	if (pagevec_count(&freed_pvec))
-		__pagevec_release_nonlru(&freed_pvec);
+		__pagevec_free(&freed_pvec);
 	count_vm_events(PGACTIVATE, pgactivate);
 	return nr_reclaimed;
 }
-- 
cgit v1.2.3


From 19fd6231279be3c3bdd02ed99f9b0eb195978064 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Fri, 25 Jul 2008 19:45:32 -0700
Subject: mm: spinlock tree_lock

mapping->tree_lock has no read lockers.  convert the lock from an rwlock
to a spinlock.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                     |  4 ++--
 fs/inode.c                      |  2 +-
 include/asm-arm/cacheflush.h    |  4 ++--
 include/asm-parisc/cacheflush.h |  4 ++--
 include/linux/fs.h              |  2 +-
 mm/filemap.c                    | 10 +++++-----
 mm/migrate.c                    | 11 +++++------
 mm/page-writeback.c             | 12 ++++++------
 mm/swap_state.c                 | 10 +++++-----
 mm/swapfile.c                   |  4 ++--
 mm/truncate.c                   |  6 +++---
 mm/vmscan.c                     |  8 ++++----
 12 files changed, 38 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index d48caee12e2a..109b261192d9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -706,7 +706,7 @@ static int __set_page_dirty(struct page *page,
 	if (TestSetPageDirty(page))
 		return 0;
 
-	write_lock_irq(&mapping->tree_lock);
+	spin_lock_irq(&mapping->tree_lock);
 	if (page->mapping) {	/* Race with truncate? */
 		WARN_ON_ONCE(warn && !PageUptodate(page));
 
@@ -719,7 +719,7 @@ static int __set_page_dirty(struct page *page,
 		radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 	}
-	write_unlock_irq(&mapping->tree_lock);
+	spin_unlock_irq(&mapping->tree_lock);
 	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 
 	return 1;
diff --git a/fs/inode.c b/fs/inode.c
index c36d9480335c..35b6414522ea 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -209,7 +209,7 @@ void inode_init_once(struct inode *inode)
 	INIT_LIST_HEAD(&inode->i_dentry);
 	INIT_LIST_HEAD(&inode->i_devices);
 	INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
-	rwlock_init(&inode->i_data.tree_lock);
+	spin_lock_init(&inode->i_data.tree_lock);
 	spin_lock_init(&inode->i_data.i_mmap_lock);
 	INIT_LIST_HEAD(&inode->i_data.private_list);
 	spin_lock_init(&inode->i_data.private_lock);
diff --git a/include/asm-arm/cacheflush.h b/include/asm-arm/cacheflush.h
index 70b0fe724b62..03cf1ee977b7 100644
--- a/include/asm-arm/cacheflush.h
+++ b/include/asm-arm/cacheflush.h
@@ -424,9 +424,9 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 }
 
 #define flush_dcache_mmap_lock(mapping) \
-	write_lock_irq(&(mapping)->tree_lock)
+	spin_lock_irq(&(mapping)->tree_lock)
 #define flush_dcache_mmap_unlock(mapping) \
-	write_unlock_irq(&(mapping)->tree_lock)
+	spin_unlock_irq(&(mapping)->tree_lock)
 
 #define flush_icache_user_range(vma,page,addr,len) \
 	flush_dcache_page(page)
diff --git a/include/asm-parisc/cacheflush.h b/include/asm-parisc/cacheflush.h
index 2f1e1b05440a..b7ca6dc7fddc 100644
--- a/include/asm-parisc/cacheflush.h
+++ b/include/asm-parisc/cacheflush.h
@@ -45,9 +45,9 @@ void flush_cache_mm(struct mm_struct *mm);
 extern void flush_dcache_page(struct page *page);
 
 #define flush_dcache_mmap_lock(mapping) \
-	write_lock_irq(&(mapping)->tree_lock)
+	spin_lock_irq(&(mapping)->tree_lock)
 #define flush_dcache_mmap_unlock(mapping) \
-	write_unlock_irq(&(mapping)->tree_lock)
+	spin_unlock_irq(&(mapping)->tree_lock)
 
 #define flush_icache_page(vma,page)	do { 		\
 	flush_kernel_dcache_page(page);			\
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 49d8eb7a71be..53d2edb709b3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -499,7 +499,7 @@ struct backing_dev_info;
 struct address_space {
 	struct inode		*host;		/* owner: inode, block_device */
 	struct radix_tree_root	page_tree;	/* radix tree of all pages */
-	rwlock_t		tree_lock;	/* and rwlock protecting it */
+	spinlock_t		tree_lock;	/* and lock protecting it */
 	unsigned int		i_mmap_writable;/* count VM_SHARED mappings */
 	struct prio_tree_root	i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
diff --git a/mm/filemap.c b/mm/filemap.c
index feb8448d8618..2ed8b0389c51 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -109,7 +109,7 @@
 /*
  * Remove a page from the page cache and free it. Caller has to make
  * sure the page is locked and that nobody else uses it - or that usage
- * is safe.  The caller must hold a write_lock on the mapping's tree_lock.
+ * is safe.  The caller must hold the mapping's tree_lock.
  */
 void __remove_from_page_cache(struct page *page)
 {
@@ -141,9 +141,9 @@ void remove_from_page_cache(struct page *page)
 
 	BUG_ON(!PageLocked(page));
 
-	write_lock_irq(&mapping->tree_lock);
+	spin_lock_irq(&mapping->tree_lock);
 	__remove_from_page_cache(page);
-	write_unlock_irq(&mapping->tree_lock);
+	spin_unlock_irq(&mapping->tree_lock);
 }
 
 static int sync_page(void *word)
@@ -469,7 +469,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		page->mapping = mapping;
 		page->index = offset;
 
-		write_lock_irq(&mapping->tree_lock);
+		spin_lock_irq(&mapping->tree_lock);
 		error = radix_tree_insert(&mapping->page_tree, offset, page);
 		if (likely(!error)) {
 			mapping->nrpages++;
@@ -480,7 +480,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 			page_cache_release(page);
 		}
 
-		write_unlock_irq(&mapping->tree_lock);
+		spin_unlock_irq(&mapping->tree_lock);
 		radix_tree_preload_end();
 	} else
 		mem_cgroup_uncharge_cache_page(page);
diff --git a/mm/migrate.c b/mm/migrate.c
index 3ca6392e82cc..153572fb60b8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -323,7 +323,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 		return 0;
 	}
 
-	write_lock_irq(&mapping->tree_lock);
+	spin_lock_irq(&mapping->tree_lock);
 
 	pslot = radix_tree_lookup_slot(&mapping->page_tree,
  					page_index(page));
@@ -331,12 +331,12 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	expected_count = 2 + !!PagePrivate(page);
 	if (page_count(page) != expected_count ||
 			(struct page *)radix_tree_deref_slot(pslot) != page) {
-		write_unlock_irq(&mapping->tree_lock);
+		spin_unlock_irq(&mapping->tree_lock);
 		return -EAGAIN;
 	}
 
 	if (!page_freeze_refs(page, expected_count)) {
-		write_unlock_irq(&mapping->tree_lock);
+		spin_unlock_irq(&mapping->tree_lock);
 		return -EAGAIN;
 	}
 
@@ -373,10 +373,9 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	__dec_zone_page_state(page, NR_FILE_PAGES);
 	__inc_zone_page_state(newpage, NR_FILE_PAGES);
 
-	write_unlock_irq(&mapping->tree_lock);
-	if (!PageSwapCache(newpage)) {
+	spin_unlock_irq(&mapping->tree_lock);
+	if (!PageSwapCache(newpage))
 		mem_cgroup_uncharge_cache_page(page);
-	}
 
 	return 0;
 }
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 94c6d8988ab3..24de8b65fdbd 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1088,7 +1088,7 @@ int __set_page_dirty_nobuffers(struct page *page)
 		if (!mapping)
 			return 1;
 
-		write_lock_irq(&mapping->tree_lock);
+		spin_lock_irq(&mapping->tree_lock);
 		mapping2 = page_mapping(page);
 		if (mapping2) { /* Race with truncate? */
 			BUG_ON(mapping2 != mapping);
@@ -1102,7 +1102,7 @@ int __set_page_dirty_nobuffers(struct page *page)
 			radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 		}
-		write_unlock_irq(&mapping->tree_lock);
+		spin_unlock_irq(&mapping->tree_lock);
 		if (mapping->host) {
 			/* !PageAnon && !swapper_space */
 			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -1258,7 +1258,7 @@ int test_clear_page_writeback(struct page *page)
 		struct backing_dev_info *bdi = mapping->backing_dev_info;
 		unsigned long flags;
 
-		write_lock_irqsave(&mapping->tree_lock, flags);
+		spin_lock_irqsave(&mapping->tree_lock, flags);
 		ret = TestClearPageWriteback(page);
 		if (ret) {
 			radix_tree_tag_clear(&mapping->page_tree,
@@ -1269,7 +1269,7 @@ int test_clear_page_writeback(struct page *page)
 				__bdi_writeout_inc(bdi);
 			}
 		}
-		write_unlock_irqrestore(&mapping->tree_lock, flags);
+		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 	} else {
 		ret = TestClearPageWriteback(page);
 	}
@@ -1287,7 +1287,7 @@ int test_set_page_writeback(struct page *page)
 		struct backing_dev_info *bdi = mapping->backing_dev_info;
 		unsigned long flags;
 
-		write_lock_irqsave(&mapping->tree_lock, flags);
+		spin_lock_irqsave(&mapping->tree_lock, flags);
 		ret = TestSetPageWriteback(page);
 		if (!ret) {
 			radix_tree_tag_set(&mapping->page_tree,
@@ -1300,7 +1300,7 @@ int test_set_page_writeback(struct page *page)
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
-		write_unlock_irqrestore(&mapping->tree_lock, flags);
+		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 	} else {
 		ret = TestSetPageWriteback(page);
 	}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3e3381d6c7ee..2c217e33d497 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -39,7 +39,7 @@ static struct backing_dev_info swap_backing_dev_info = {
 
 struct address_space swapper_space = {
 	.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
-	.tree_lock	= __RW_LOCK_UNLOCKED(swapper_space.tree_lock),
+	.tree_lock	= __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock),
 	.a_ops		= &swap_aops,
 	.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
 	.backing_dev_info = &swap_backing_dev_info,
@@ -80,7 +80,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 		SetPageSwapCache(page);
 		set_page_private(page, entry.val);
 
-		write_lock_irq(&swapper_space.tree_lock);
+		spin_lock_irq(&swapper_space.tree_lock);
 		error = radix_tree_insert(&swapper_space.page_tree,
 						entry.val, page);
 		if (likely(!error)) {
@@ -88,7 +88,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 			__inc_zone_page_state(page, NR_FILE_PAGES);
 			INC_CACHE_INFO(add_total);
 		}
-		write_unlock_irq(&swapper_space.tree_lock);
+		spin_unlock_irq(&swapper_space.tree_lock);
 		radix_tree_preload_end();
 
 		if (unlikely(error)) {
@@ -182,9 +182,9 @@ void delete_from_swap_cache(struct page *page)
 
 	entry.val = page_private(page);
 
-	write_lock_irq(&swapper_space.tree_lock);
+	spin_lock_irq(&swapper_space.tree_lock);
 	__delete_from_swap_cache(page);
-	write_unlock_irq(&swapper_space.tree_lock);
+	spin_unlock_irq(&swapper_space.tree_lock);
 
 	swap_free(entry);
 	page_cache_release(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2f33edb8bee9..af283933c14e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -369,13 +369,13 @@ int remove_exclusive_swap_page(struct page *page)
 	retval = 0;
 	if (p->swap_map[swp_offset(entry)] == 1) {
 		/* Recheck the page count with the swapcache lock held.. */
-		write_lock_irq(&swapper_space.tree_lock);
+		spin_lock_irq(&swapper_space.tree_lock);
 		if ((page_count(page) == 2) && !PageWriteback(page)) {
 			__delete_from_swap_cache(page);
 			SetPageDirty(page);
 			retval = 1;
 		}
-		write_unlock_irq(&swapper_space.tree_lock);
+		spin_unlock_irq(&swapper_space.tree_lock);
 	}
 	spin_unlock(&swap_lock);
 
diff --git a/mm/truncate.c b/mm/truncate.c
index b8961cb63414..e68443d74567 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -349,18 +349,18 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
 		return 0;
 
-	write_lock_irq(&mapping->tree_lock);
+	spin_lock_irq(&mapping->tree_lock);
 	if (PageDirty(page))
 		goto failed;
 
 	BUG_ON(PagePrivate(page));
 	__remove_from_page_cache(page);
-	write_unlock_irq(&mapping->tree_lock);
+	spin_unlock_irq(&mapping->tree_lock);
 	ClearPageUptodate(page);
 	page_cache_release(page);	/* pagecache ref */
 	return 1;
 failed:
-	write_unlock_irq(&mapping->tree_lock);
+	spin_unlock_irq(&mapping->tree_lock);
 	return 0;
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0075eac1cd04..8f71761bc4b7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -399,7 +399,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
 	BUG_ON(!PageLocked(page));
 	BUG_ON(mapping != page_mapping(page));
 
-	write_lock_irq(&mapping->tree_lock);
+	spin_lock_irq(&mapping->tree_lock);
 	/*
 	 * The non racy check for a busy page.
 	 *
@@ -436,17 +436,17 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
 	if (PageSwapCache(page)) {
 		swp_entry_t swap = { .val = page_private(page) };
 		__delete_from_swap_cache(page);
-		write_unlock_irq(&mapping->tree_lock);
+		spin_unlock_irq(&mapping->tree_lock);
 		swap_free(swap);
 	} else {
 		__remove_from_page_cache(page);
-		write_unlock_irq(&mapping->tree_lock);
+		spin_unlock_irq(&mapping->tree_lock);
 	}
 
 	return 1;
 
 cannot_free:
-	write_unlock_irq(&mapping->tree_lock);
+	spin_unlock_irq(&mapping->tree_lock);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 51cc50685a4275c6a02653670af9f108a64e01cf Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 25 Jul 2008 19:45:34 -0700
Subject: SL*B: drop kmem cache argument from constructor

Kmem cache passed to constructor is only needed for constructors that are
themselves multiplexeres.  Nobody uses this "feature", nor does anybody uses
passed kmem cache in non-trivial way, so pass only pointer to object.

Non-trivial places are:
	arch/powerpc/mm/init_64.c
	arch/powerpc/mm/hugetlbpage.c

This is flag day, yes.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Jon Tollefson <kniht@linux.vnet.ibm.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Matt Mackall <mpm@selenic.com>
[akpm@linux-foundation.org: fix arch/powerpc/mm/hugetlbpage.c]
[akpm@linux-foundation.org: fix mm/slab.c]
[akpm@linux-foundation.org: fix ubifs]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/plat-s3c24xx/dma.c               |  2 +-
 arch/powerpc/kernel/rtas_flash.c          |  2 +-
 arch/powerpc/mm/hugetlbpage.c             |  9 ++-------
 arch/powerpc/mm/init_64.c                 | 24 +++++++++---------------
 arch/powerpc/platforms/cell/spufs/inode.c |  2 +-
 arch/sh/mm/pmb.c                          |  2 +-
 arch/xtensa/mm/init.c                     |  2 +-
 drivers/usb/mon/mon_text.c                |  4 ++--
 fs/adfs/super.c                           |  2 +-
 fs/affs/super.c                           |  2 +-
 fs/afs/super.c                            |  4 ++--
 fs/befs/linuxvfs.c                        |  2 +-
 fs/bfs/inode.c                            |  2 +-
 fs/block_dev.c                            |  2 +-
 fs/buffer.c                               |  2 +-
 fs/cifs/cifsfs.c                          |  2 +-
 fs/coda/inode.c                           |  2 +-
 fs/ecryptfs/main.c                        |  4 ++--
 fs/efs/super.c                            |  2 +-
 fs/ext2/super.c                           |  2 +-
 fs/ext3/super.c                           |  2 +-
 fs/ext4/super.c                           |  2 +-
 fs/fat/cache.c                            |  2 +-
 fs/fat/inode.c                            |  2 +-
 fs/fuse/inode.c                           |  2 +-
 fs/gfs2/main.c                            |  4 ++--
 fs/hfs/super.c                            |  2 +-
 fs/hfsplus/super.c                        |  2 +-
 fs/hpfs/super.c                           |  2 +-
 fs/hugetlbfs/inode.c                      |  2 +-
 fs/inode.c                                |  2 +-
 fs/isofs/inode.c                          |  2 +-
 fs/jffs2/super.c                          |  2 +-
 fs/jfs/jfs_metapage.c                     |  2 +-
 fs/jfs/super.c                            |  2 +-
 fs/locks.c                                |  2 +-
 fs/minix/inode.c                          |  2 +-
 fs/ncpfs/inode.c                          |  2 +-
 fs/nfs/inode.c                            |  2 +-
 fs/ntfs/super.c                           |  2 +-
 fs/ocfs2/dlm/dlmfs.c                      |  3 +--
 fs/ocfs2/super.c                          |  2 +-
 fs/openpromfs/inode.c                     |  2 +-
 fs/proc/inode.c                           |  2 +-
 fs/qnx4/inode.c                           |  2 +-
 fs/reiserfs/super.c                       |  2 +-
 fs/romfs/inode.c                          |  2 +-
 fs/smbfs/inode.c                          |  2 +-
 fs/sysv/inode.c                           |  2 +-
 fs/ubifs/super.c                          |  2 +-
 fs/udf/super.c                            |  2 +-
 fs/ufs/super.c                            |  2 +-
 fs/xfs/linux-2.6/kmem.h                   |  2 +-
 fs/xfs/linux-2.6/xfs_super.c              |  1 -
 include/linux/slab.h                      |  2 +-
 include/linux/slub_def.h                  |  2 +-
 ipc/mqueue.c                              |  2 +-
 kernel/fork.c                             |  2 +-
 lib/idr.c                                 |  2 +-
 lib/radix-tree.c                          |  2 +-
 mm/rmap.c                                 |  2 +-
 mm/shmem.c                                |  2 +-
 mm/slab.c                                 | 11 +++++------
 mm/slob.c                                 |  7 +++----
 mm/slub.c                                 | 13 ++++++-------
 net/socket.c                              |  2 +-
 net/sunrpc/rpc_pipe.c                     |  2 +-
 67 files changed, 90 insertions(+), 106 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index 60f162dc4fad..8c5e656d5d8c 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -1304,7 +1304,7 @@ struct sysdev_class dma_sysclass = {
 
 /* kmem cache implementation */
 
-static void s3c2410_dma_cache_ctor(struct kmem_cache *c, void *p)
+static void s3c2410_dma_cache_ctor(void *p)
 {
 	memset(p, 0, sizeof(struct s3c2410_dma_buf));
 }
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 09ded5c424a9..149cb112cd1a 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -286,7 +286,7 @@ static ssize_t rtas_flash_read(struct file *file, char __user *buf,
 }
 
 /* constructor for flash_block_cache */
-void rtas_block_ctor(struct kmem_cache *cache, void *ptr)
+void rtas_block_ctor(void *ptr)
 {
 	memset(ptr, 0, RTAS_BLK_SIZE);
 }
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index fb42c4dd3217..ed0aab0208a6 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -113,7 +113,7 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 			   unsigned long address, unsigned int psize)
 {
-	pte_t *new = kmem_cache_alloc(huge_pgtable_cache(psize),
+	pte_t *new = kmem_cache_zalloc(huge_pgtable_cache(psize),
 				      GFP_KERNEL|__GFP_REPEAT);
 
 	if (! new)
@@ -730,11 +730,6 @@ static int __init hugepage_setup_sz(char *str)
 }
 __setup("hugepagesz=", hugepage_setup_sz);
 
-static void zero_ctor(struct kmem_cache *cache, void *addr)
-{
-	memset(addr, 0, kmem_cache_size(cache));
-}
-
 static int __init hugetlbpage_init(void)
 {
 	unsigned int psize;
@@ -756,7 +751,7 @@ static int __init hugetlbpage_init(void)
 						HUGEPTE_TABLE_SIZE(psize),
 						HUGEPTE_TABLE_SIZE(psize),
 						0,
-						zero_ctor);
+						NULL);
 			if (!huge_pgtable_cache(psize))
 				panic("hugetlbpage_init(): could not create %s"\
 				      "\n", HUGEPTE_CACHE_NAME(psize));
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a41bc5aa2043..4f7df85129d8 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -136,9 +136,14 @@ static int __init setup_kcore(void)
 module_init(setup_kcore);
 #endif
 
-static void zero_ctor(struct kmem_cache *cache, void *addr)
+static void pgd_ctor(void *addr)
 {
-	memset(addr, 0, kmem_cache_size(cache));
+	memset(addr, 0, PGD_TABLE_SIZE);
+}
+
+static void pmd_ctor(void *addr)
+{
+	memset(addr, 0, PMD_TABLE_SIZE);
 }
 
 static const unsigned int pgtable_cache_size[2] = {
@@ -163,19 +168,8 @@ struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
 
 void pgtable_cache_init(void)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
-		int size = pgtable_cache_size[i];
-		const char *name = pgtable_cache_name[i];
-
-		pr_debug("Allocating page table cache %s (#%d) "
-			"for size: %08x...\n", name, i, size);
-		pgtable_cache[i] = kmem_cache_create(name,
-						     size, size,
-						     SLAB_PANIC,
-						     zero_ctor);
-	}
+	pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor);
+	pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor);
 }
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 7123472801d9..690ca7b0dcf6 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -78,7 +78,7 @@ spufs_destroy_inode(struct inode *inode)
 }
 
 static void
-spufs_init_once(struct kmem_cache *cachep, void *p)
+spufs_init_once(void *p)
 {
 	struct spufs_inode_info *ei = p;
 
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index 0b0ec6e04753..46911bcbf17b 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -293,7 +293,7 @@ void pmb_unmap(unsigned long addr)
 	} while (pmbe);
 }
 
-static void pmb_cache_ctor(struct kmem_cache *cachep, void *pmb)
+static void pmb_cache_ctor(void *pmb)
 {
 	struct pmb_entry *pmbe = pmb;
 
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 81d0560eaea2..ee261005b363 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -309,7 +309,7 @@ void show_mem(void)
 
 struct kmem_cache *pgtable_cache __read_mostly;
 
-static void pgd_ctor(struct kmem_cache *cache, void* addr)
+static void pgd_ctor(void* addr)
 {
 	pte_t* ptep = (pte_t*)addr;
 	int i;
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index 5e3e4e9b6c77..1f715436d6d3 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -87,7 +87,7 @@ struct mon_reader_text {
 
 static struct dentry *mon_dir;		/* Usually /sys/kernel/debug/usbmon */
 
-static void mon_text_ctor(struct kmem_cache *, void *);
+static void mon_text_ctor(void *);
 
 struct mon_text_ptr {
 	int cnt, limit;
@@ -720,7 +720,7 @@ void mon_text_del(struct mon_bus *mbus)
 /*
  * Slab interface: constructor.
  */
-static void mon_text_ctor(struct kmem_cache *slab, void *mem)
+static void mon_text_ctor(void *mem)
 {
 	/*
 	 * Nothing to initialize. No, really!
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9e421eeb672b..26f3b43726bb 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -249,7 +249,7 @@ static void adfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
 
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4e0309566406..3a89094f93d0 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -90,7 +90,7 @@ static void affs_destroy_inode(struct inode *inode)
 	kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct affs_inode_info *ei = (struct affs_inode_info *) foo;
 
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 7e3faeef6818..250d8c4d66e4 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -27,7 +27,7 @@
 
 #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
 
-static void afs_i_init_once(struct kmem_cache *cachep, void *foo);
+static void afs_i_init_once(void *foo);
 static int afs_get_sb(struct file_system_type *fs_type,
 		      int flags, const char *dev_name,
 		      void *data, struct vfsmount *mnt);
@@ -449,7 +449,7 @@ static void afs_put_super(struct super_block *sb)
 /*
  * initialise an inode cache slab element prior to any use
  */
-static void afs_i_init_once(struct kmem_cache *cachep, void *_vnode)
+static void afs_i_init_once(void *_vnode)
 {
 	struct afs_vnode *vnode = _vnode;
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e8717de3bab3..02c6e62b72f8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode)
         kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
         struct befs_inode_info *bi = (struct befs_inode_info *) foo;
 
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 053e690ec9ed..0ed57b5ee012 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -264,7 +264,7 @@ static void bfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct bfs_inode_info *bi = foo;
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 10d8a0aa871a..dcf37cada369 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -271,7 +271,7 @@ static void bdev_destroy_inode(struct inode *inode)
 	kmem_cache_free(bdev_cachep, bdi);
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct bdev_inode *ei = (struct bdev_inode *) foo;
 	struct block_device *bdev = &ei->bdev;
diff --git a/fs/buffer.c b/fs/buffer.c
index 109b261192d9..5fd497cdd6f3 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3272,7 +3272,7 @@ int bh_submit_read(struct buffer_head *bh)
 EXPORT_SYMBOL(bh_submit_read);
 
 static void
-init_buffer_head(struct kmem_cache *cachep, void *data)
+init_buffer_head(void *data)
 {
 	struct buffer_head *bh = data;
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 22857c639df5..fe5f6809cba6 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -766,7 +766,7 @@ const struct file_operations cifs_dir_ops = {
 };
 
 static void
-cifs_init_once(struct kmem_cache *cachep, void *inode)
+cifs_init_once(void *inode)
 {
 	struct cifsInodeInfo *cifsi = inode;
 
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 2f58dfc70083..830f51abb971 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode)
 	kmem_cache_free(coda_inode_cachep, ITOC(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct coda_inode_info *ei = (struct coda_inode_info *) foo;
 
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 6f403cfba14f..448dfd597b5f 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -578,7 +578,7 @@ static struct file_system_type ecryptfs_fs_type = {
  * Initializes the ecryptfs_inode_info_cache when it is created
  */
 static void
-inode_info_init_once(struct kmem_cache *cachep, void *vptr)
+inode_info_init_once(void *vptr)
 {
 	struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr;
 
@@ -589,7 +589,7 @@ static struct ecryptfs_cache_info {
 	struct kmem_cache **cache;
 	const char *name;
 	size_t size;
-	void (*ctor)(struct kmem_cache *cache, void *obj);
+	void (*ctor)(void *obj);
 } ecryptfs_cache_infos[] = {
 	{
 		.cache = &ecryptfs_auth_tok_list_item_cache,
diff --git a/fs/efs/super.c b/fs/efs/super.c
index d733531b55e2..567b134fa1f1 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -70,7 +70,7 @@ static void efs_destroy_inode(struct inode *inode)
 	kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct efs_inode_info *ei = (struct efs_inode_info *) foo;
 
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 31308a3b0b8b..fd88c7b43e66 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -159,7 +159,7 @@ static void ext2_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 615788c6843a..8ddced384674 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -472,7 +472,7 @@ static void ext3_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1cb371dcd609..b5479b1dff14 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -595,7 +595,7 @@ static void ext4_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 3a9ecac8d61f..3222f51c41cf 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -36,7 +36,7 @@ static inline int fat_max_cache(struct inode *inode)
 
 static struct kmem_cache *fat_cache_cachep;
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct fat_cache *cache = (struct fat_cache *)foo;
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 23676f9d79ce..6d266d793e2c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -498,7 +498,7 @@ static void fat_destroy_inode(struct inode *inode)
 	kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7d2f7d6e22e2..d2249f174e20 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -956,7 +956,7 @@ static inline void unregister_fuseblk(void)
 }
 #endif
 
-static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo)
+static void fuse_inode_init_once(void *foo)
 {
 	struct inode * inode = foo;
 
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index bcc668d0fadd..bb2cc303ac29 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -24,7 +24,7 @@
 #include "util.h"
 #include "glock.h"
 
-static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo)
+static void gfs2_init_inode_once(void *foo)
 {
 	struct gfs2_inode *ip = foo;
 
@@ -33,7 +33,7 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo)
 	ip->i_alloc = NULL;
 }
 
-static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo)
+static void gfs2_init_glock_once(void *foo)
 {
 	struct gfs2_glock *gl = foo;
 
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index ac2ec5ef66e4..4abb1047c689 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -432,7 +432,7 @@ static struct file_system_type hfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void hfs_init_once(struct kmem_cache *cachep, void *p)
+static void hfs_init_once(void *p)
 {
 	struct hfs_inode_info *i = p;
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 3859118531c7..e834e578c93f 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -485,7 +485,7 @@ static struct file_system_type hfsplus_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void hfsplus_init_once(struct kmem_cache *cachep, void *p)
+static void hfsplus_init_once(void *p)
 {
 	struct hfsplus_inode_info *i = p;
 
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f63a699ec659..b8ae9c90ada0 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -173,7 +173,7 @@ static void hpfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index dbd01d262ca4..3f58923fb39b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -705,7 +705,7 @@ static const struct address_space_operations hugetlbfs_aops = {
 };
 
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
 
diff --git a/fs/inode.c b/fs/inode.c
index 35b6414522ea..b6726f644530 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -224,7 +224,7 @@ void inode_init_once(struct inode *inode)
 
 EXPORT_SYMBOL(inode_init_once);
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct inode * inode = (struct inode *) foo;
 
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 044a254d526b..26948a6033b6 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode)
 	kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct iso_inode_info *ei = foo;
 
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 7da69eae49e4..efd401257ed9 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode)
 	kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
 }
 
-static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo)
+static void jffs2_i_init_once(void *foo)
 {
 	struct jffs2_inode_info *f = foo;
 
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 854ff0ec574f..c350057087dd 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -182,7 +182,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
 
 #endif
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct metapage *mp = (struct metapage *)foo;
 
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 359c091d8965..3630718be395 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -760,7 +760,7 @@ static struct file_system_type jfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
 
diff --git a/fs/locks.c b/fs/locks.c
index 01490300f7cb..5eb259e3cd38 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -201,7 +201,7 @@ EXPORT_SYMBOL(locks_init_lock);
  * Initialises the fields of the file lock which are invariant for
  * free file_locks.
  */
-static void init_once(struct kmem_cache *cache, void *foo)
+static void init_once(void *foo)
 {
 	struct file_lock *lock = (struct file_lock *) foo;
 
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 523d73713418..d1d1eb84679d 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,7 +68,7 @@ static void minix_destroy_inode(struct inode *inode)
 	kmem_cache_free(minix_inode_cachep, minix_i(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct minix_inode_info *ei = (struct minix_inode_info *) foo;
 
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 2e5ab1204dec..d642f0e5b365 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -64,7 +64,7 @@ static void ncp_destroy_inode(struct inode *inode)
 	kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index df23f987da6b..52daefa2f521 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1242,7 +1242,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
 #endif
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct nfs_inode *nfsi = (struct nfs_inode *) foo;
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 3e76f3b216bc..4a46743b5077 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3080,7 +3080,7 @@ struct kmem_cache *ntfs_inode_cache;
 struct kmem_cache *ntfs_big_inode_cache;
 
 /* Init once constructor for the inode slab cache. */
-static void ntfs_big_inode_init_once(struct kmem_cache *cachep, void *foo)
+static void ntfs_big_inode_init_once(void *foo)
 {
 	ntfs_inode *ni = (ntfs_inode *)foo;
 
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index e48aba698b77..533a789c3ef8 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -267,8 +267,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
 	return writelen;
 }
 
-static void dlmfs_init_once(struct kmem_cache *cachep,
-			    void *foo)
+static void dlmfs_init_once(void *foo)
 {
 	struct dlmfs_inode_private *ip =
 		(struct dlmfs_inode_private *) foo;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ccecfe5094fa..2560b33889aa 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1118,7 +1118,7 @@ bail:
 	return status;
 }
 
-static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data)
+static void ocfs2_inode_init_once(void *data)
 {
 	struct ocfs2_inode_info *oi = data;
 
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index d17b4fd204e1..9f5b054f06b9 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -430,7 +430,7 @@ static struct file_system_type openprom_fs_type = {
 	.kill_sb	= kill_anon_super,
 };
 
-static void op_inode_init_once(struct kmem_cache * cachep, void *data)
+static void op_inode_init_once(void *data)
 {
 	struct op_inode_info *oi = (struct op_inode_info *) data;
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 02eca2ed9dd7..b37f25dc45a5 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -94,7 +94,7 @@ static void proc_destroy_inode(struct inode *inode)
 	kmem_cache_free(proc_inode_cachep, PROC_I(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct proc_inode *ei = (struct proc_inode *) foo;
 
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index b31ab78052b3..2aad1044b84c 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -553,7 +553,7 @@ static void qnx4_destroy_inode(struct inode *inode)
 	kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 2ec748ba0bd3..879e54d35c2d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -521,7 +521,7 @@ static void reiserfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
 
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 3f13d491c7c7..8e51a2aaa977 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -577,7 +577,7 @@ static void romfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct romfs_inode_info *ei = foo;
 
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 376ef3ee6ed7..3528f40ffb0f 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -67,7 +67,7 @@ static void smb_destroy_inode(struct inode *inode)
 	kmem_cache_free(smb_inode_cachep, SMB_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct smb_inode_info *ei = (struct smb_inode_info *) foo;
 
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index c5d60de0658f..df0d435baa48 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -326,7 +326,7 @@ static void sysv_destroy_inode(struct inode *inode)
 	kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *p)
+static void init_once(void *p)
 {
 	struct sysv_inode_info *si = (struct sysv_inode_info *)p;
 
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 00eb9c68ad03..ca1e2d4e03cc 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1841,7 +1841,7 @@ static struct file_system_type ubifs_fs_type = {
 /*
  * Inode slab cache constructor.
  */
-static void inode_slab_ctor(struct kmem_cache *cachep, void *obj)
+static void inode_slab_ctor(void *obj)
 {
 	struct ubifs_inode *ui = obj;
 	inode_init_once(&ui->vfs_inode);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 44cc702f96cc..5698bbf83bbf 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -148,7 +148,7 @@ static void udf_destroy_inode(struct inode *inode)
 	kmem_cache_free(udf_inode_cachep, UDF_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct udf_inode_info *ei = (struct udf_inode_info *)foo;
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 227c9d700040..3e30e40aa24d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1302,7 +1302,7 @@ static void ufs_destroy_inode(struct inode *inode)
 	kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
 }
 
-static void init_once(struct kmem_cache * cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
 
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 5e9564902976..a20683cf74dd 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -79,7 +79,7 @@ kmem_zone_init(int size, char *zone_name)
 
 static inline kmem_zone_t *
 kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
-		     void (*construct)(kmem_zone_t *, void *))
+		     void (*construct)(void *))
 {
 	return kmem_cache_create(zone_name, size, 0, flags, construct);
 }
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 742b2c7852c1..943381284e2e 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -843,7 +843,6 @@ xfs_fs_destroy_inode(
 
 STATIC void
 xfs_fs_inode_init_once(
-	kmem_zone_t		*zonep,
 	void			*vnode)
 {
 	inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 41103910f8a2..9ff8e8499403 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -58,7 +58,7 @@ int slab_is_available(void);
 
 struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			unsigned long,
-			void (*)(struct kmem_cache *, void *));
+			void (*)(void *));
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index d117ea2825a9..5bad61a93f65 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -85,7 +85,7 @@ struct kmem_cache {
 	struct kmem_cache_order_objects min;
 	gfp_t allocflags;	/* gfp flags to use on each alloc */
 	int refcount;		/* Refcount for slab cache destroy */
-	void (*ctor)(struct kmem_cache *, void *);
+	void (*ctor)(void *);
 	int inuse;		/* Offset to metadata */
 	int align;		/* Alignment */
 	const char *name;	/* Name (only for display!) */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 1fdc2eb2f6d8..474984f9e032 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -207,7 +207,7 @@ static int mqueue_get_sb(struct file_system_type *fs_type,
 	return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt);
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index b99d73e971a4..80e83e459b17 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1442,7 +1442,7 @@ long do_fork(unsigned long clone_flags,
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
 
-static void sighand_ctor(struct kmem_cache *cachep, void *data)
+static void sighand_ctor(void *data)
 {
 	struct sighand_struct *sighand = data;
 
diff --git a/lib/idr.c b/lib/idr.c
index 3476f8203e97..e728c7fccc4d 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -607,7 +607,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
 }
 EXPORT_SYMBOL(idr_replace);
 
-static void idr_cache_ctor(struct kmem_cache *idr_layer_cache, void *idr_layer)
+static void idr_cache_ctor(void *idr_layer)
 {
 	memset(idr_layer, 0, sizeof(struct idr_layer));
 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 9c4f1ffa2864..be86b32bc874 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1183,7 +1183,7 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
 EXPORT_SYMBOL(radix_tree_tagged);
 
 static void
-radix_tree_node_ctor(struct kmem_cache *cachep, void *node)
+radix_tree_node_ctor(void *node)
 {
 	memset(node, 0, sizeof(struct radix_tree_node));
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index abbd29f7c43f..39ae5a9bf382 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -138,7 +138,7 @@ void anon_vma_unlink(struct vm_area_struct *vma)
 		anon_vma_free(anon_vma);
 }
 
-static void anon_vma_ctor(struct kmem_cache *cachep, void *data)
+static void anon_vma_ctor(void *data)
 {
 	struct anon_vma *anon_vma = data;
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 1089092aecaf..952d361774bb 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2352,7 +2352,7 @@ static void shmem_destroy_inode(struct inode *inode)
 	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
 
diff --git a/mm/slab.c b/mm/slab.c
index 052e7d64537e..918f04f7fef1 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -406,7 +406,7 @@ struct kmem_cache {
 	unsigned int dflags;		/* dynamic flags */
 
 	/* constructor func */
-	void (*ctor)(struct kmem_cache *, void *);
+	void (*ctor)(void *obj);
 
 /* 5) cache creation/removal */
 	const char *name;
@@ -2137,8 +2137,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
  */
 struct kmem_cache *
 kmem_cache_create (const char *name, size_t size, size_t align,
-	unsigned long flags,
-	void (*ctor)(struct kmem_cache *, void *))
+	unsigned long flags, void (*ctor)(void *))
 {
 	size_t left_over, slab_size, ralign;
 	struct kmem_cache *cachep = NULL, *pc;
@@ -2653,7 +2652,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
 		 * They must also be threaded.
 		 */
 		if (cachep->ctor && !(cachep->flags & SLAB_POISON))
-			cachep->ctor(cachep, objp + obj_offset(cachep));
+			cachep->ctor(objp + obj_offset(cachep));
 
 		if (cachep->flags & SLAB_RED_ZONE) {
 			if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
@@ -2669,7 +2668,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
 					 cachep->buffer_size / PAGE_SIZE, 0);
 #else
 		if (cachep->ctor)
-			cachep->ctor(cachep, objp);
+			cachep->ctor(objp);
 #endif
 		slab_bufctl(slabp)[i] = i + 1;
 	}
@@ -3093,7 +3092,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 #endif
 	objp += obj_offset(cachep);
 	if (cachep->ctor && cachep->flags & SLAB_POISON)
-		cachep->ctor(cachep, objp);
+		cachep->ctor(objp);
 #if ARCH_SLAB_MINALIGN
 	if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
 		printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
diff --git a/mm/slob.c b/mm/slob.c
index de268eb7ac70..d8fbd4d1bfa7 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -525,12 +525,11 @@ struct kmem_cache {
 	unsigned int size, align;
 	unsigned long flags;
 	const char *name;
-	void (*ctor)(struct kmem_cache *, void *);
+	void (*ctor)(void *);
 };
 
 struct kmem_cache *kmem_cache_create(const char *name, size_t size,
-	size_t align, unsigned long flags,
-	void (*ctor)(struct kmem_cache *, void *))
+	size_t align, unsigned long flags, void (*ctor)(void *))
 {
 	struct kmem_cache *c;
 
@@ -575,7 +574,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 		b = slob_new_page(flags, get_order(c->size), node);
 
 	if (c->ctor)
-		c->ctor(c, b);
+		c->ctor(b);
 
 	return b;
 }
diff --git a/mm/slub.c b/mm/slub.c
index 77c21cf53ff9..b7e2cd5d82db 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1012,7 +1012,7 @@ __setup("slub_debug", setup_slub_debug);
 
 static unsigned long kmem_cache_flags(unsigned long objsize,
 	unsigned long flags, const char *name,
-	void (*ctor)(struct kmem_cache *, void *))
+	void (*ctor)(void *))
 {
 	/*
 	 * Enable debugging if selected on the kernel commandline.
@@ -1040,7 +1040,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page,
 static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
 static inline unsigned long kmem_cache_flags(unsigned long objsize,
 	unsigned long flags, const char *name,
-	void (*ctor)(struct kmem_cache *, void *))
+	void (*ctor)(void *))
 {
 	return flags;
 }
@@ -1103,7 +1103,7 @@ static void setup_object(struct kmem_cache *s, struct page *page,
 {
 	setup_object_debug(s, page, object);
 	if (unlikely(s->ctor))
-		s->ctor(s, object);
+		s->ctor(object);
 }
 
 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -2286,7 +2286,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
 		const char *name, size_t size,
 		size_t align, unsigned long flags,
-		void (*ctor)(struct kmem_cache *, void *))
+		void (*ctor)(void *))
 {
 	memset(s, 0, kmem_size);
 	s->name = name;
@@ -3042,7 +3042,7 @@ static int slab_unmergeable(struct kmem_cache *s)
 
 static struct kmem_cache *find_mergeable(size_t size,
 		size_t align, unsigned long flags, const char *name,
-		void (*ctor)(struct kmem_cache *, void *))
+		void (*ctor)(void *))
 {
 	struct kmem_cache *s;
 
@@ -3082,8 +3082,7 @@ static struct kmem_cache *find_mergeable(size_t size,
 }
 
 struct kmem_cache *kmem_cache_create(const char *name, size_t size,
-		size_t align, unsigned long flags,
-		void (*ctor)(struct kmem_cache *, void *))
+		size_t align, unsigned long flags, void (*ctor)(void *))
 {
 	struct kmem_cache *s;
 
diff --git a/net/socket.c b/net/socket.c
index 1310a82cbba7..8ef8ba81b9e2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -265,7 +265,7 @@ static void sock_destroy_inode(struct inode *inode)
 			container_of(inode, struct socket_alloc, vfs_inode));
 }
 
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
 {
 	struct socket_alloc *ei = (struct socket_alloc *)foo;
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 5a9b0e7828cd..23a2b8f6dc49 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -897,7 +897,7 @@ static struct file_system_type rpc_pipe_fs_type = {
 };
 
 static void
-init_once(struct kmem_cache * cachep, void *foo)
+init_once(void *foo)
 {
 	struct rpc_inode *rpci = (struct rpc_inode *) foo;
 
-- 
cgit v1.2.3


From 88ac2921a71f788ed693bcd44731dd6bc1994640 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:43 -0700
Subject: tracehook: add linux/tracehook.h

This patch series introduces the "tracehook" interface layer of inlines in
<linux/tracehook.h>.  There are more details in the log entry for patch
01/23 and in the header file comments inside that patch.  Most of these
changes move code around with little or no change, and they should not
break anything or change any behavior.

This sets a new standard for uniform arch support to enable clean
arch-independent implementations of new debugging and tracing stuff,
denoted by CONFIG_HAVE_ARCH_TRACEHOOK.  Patch 20/23 adds that symbol to
arch/Kconfig, with comments listing everything an arch has to do before
setting "select HAVE_ARCH_TRACEHOOK".  These are elaborted a bit at:

	http://sourceware.org/systemtap/wiki/utrace/arch/HowTo

The new inlines that arch code must define or call have detailed kerneldoc
comments in the generic header files that say what is required.

No arch is obligated to do any work, and no arch's build should be broken
by these changes.  There are several steps that each arch should take so
it can set HAVE_ARCH_TRACEHOOK.  Most of these are simple.  Providing this
support will let new things people add for doing debugging and tracing of
user-level threads "just work" for your arch in the future.  For an arch
that does not provide HAVE_ARCH_TRACEHOOK, some new options for such
features will not be available for config.

I have done some arch work and will submit this to the arch maintainers
after the generic tracehook series settles in.  For now, that work is
available in my GIT repositories, and in patch and mbox-of-patches form at
http://people.redhat.com/roland/utrace/2.6-current/

This paves the way for my "utrace" work, to be submitted later.  But it is
not innately tied to that.  I hope that the tracehook series can go in
soon regardless of what eventually does or doesn't go on top of it.  For
anyone implementing any kind of new tracing/debugging plan, or just
understanding all the context of the existing ptrace implementation,
having tracehook.h makes things much easier to find and understand.

This patch:

This adds the new kernel-internal header file <linux/tracehook.h>.  This
is not yet used at all.  The comments in the header introduce what the
following series of patches is about.

The aim is to formalize and consolidate all the places that the core
kernel code and the arch code now ties into the ptrace implementation.

These patches mostly don't cause any functional change.  They just move
the details of ptrace logic out of core code into tracehook.h inlines,
where they are mostly compiled away to the same as before.  All that
changes is that everything is thoroughly documented and any future
reworking of ptrace, or addition of something new, would not have to touch
core code all over, just change the tracehook.h inlines.

The new linux/ptrace.h inlines are used by the following patches in the
new tracehook_*() inlines.  Using these helpers for the ptrace event stops
makes it simple to change or disable the old ptrace implementation of
these stops conditionally later.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h    | 33 ++++++++++++++++++++++++++++++
 include/linux/tracehook.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+)
 create mode 100644 include/linux/tracehook.h

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index c6f5f9dd0cee..c74abfc4c7e8 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -121,6 +121,39 @@ static inline void ptrace_unlink(struct task_struct *child)
 int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data);
 int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data);
 
+/**
+ * task_ptrace - return %PT_* flags that apply to a task
+ * @task:	pointer to &task_struct in question
+ *
+ * Returns the %PT_* flags that apply to @task.
+ */
+static inline int task_ptrace(struct task_struct *task)
+{
+	return task->ptrace;
+}
+
+/**
+ * ptrace_event - possibly stop for a ptrace event notification
+ * @mask:	%PT_* bit to check in @current->ptrace
+ * @event:	%PTRACE_EVENT_* value to report if @mask is set
+ * @message:	value for %PTRACE_GETEVENTMSG to return
+ *
+ * This checks the @mask bit to see if ptrace wants stops for this event.
+ * If so we stop, reporting @event and @message to the ptrace parent.
+ *
+ * Returns nonzero if we did a ptrace notification, zero if not.
+ *
+ * Called without locks.
+ */
+static inline int ptrace_event(int mask, int event, unsigned long message)
+{
+	if (mask && likely(!(current->ptrace & mask)))
+		return 0;
+	current->ptrace_message = message;
+	ptrace_notify((event << 8) | SIGTRAP);
+	return 1;
+}
+
 #ifndef force_successful_syscall_return
 /*
  * System call handlers that, upon successful completion, need to return a
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
new file mode 100644
index 000000000000..bea0f3eeff54
--- /dev/null
+++ b/include/linux/tracehook.h
@@ -0,0 +1,52 @@
+/*
+ * Tracing hooks
+ *
+ * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * This file defines hook entry points called by core code where
+ * user tracing/debugging support might need to do something.  These
+ * entry points are called tracehook_*().  Each hook declared below
+ * has a detailed kerneldoc comment giving the context (locking et
+ * al) from which it is called, and the meaning of its return value.
+ *
+ * Each function here typically has only one call site, so it is ok
+ * to have some nontrivial tracehook_*() inlines.  In all cases, the
+ * fast path when no tracing is enabled should be very short.
+ *
+ * The purpose of this file and the tracehook_* layer is to consolidate
+ * the interface that the kernel core and arch code uses to enable any
+ * user debugging or tracing facility (such as ptrace).  The interfaces
+ * here are carefully documented so that maintainers of core and arch
+ * code do not need to think about the implementation details of the
+ * tracing facilities.  Likewise, maintainers of the tracing code do not
+ * need to understand all the calling core or arch code in detail, just
+ * documented circumstances of each call, such as locking conditions.
+ *
+ * If the calling core code changes so that locking is different, then
+ * it is ok to change the interface documented here.  The maintainer of
+ * core code changing should notify the maintainers of the tracing code
+ * that they need to work out the change.
+ *
+ * Some tracehook_*() inlines take arguments that the current tracing
+ * implementations might not necessarily use.  These function signatures
+ * are chosen to pass in all the information that is on hand in the
+ * caller and might conceivably be relevant to a tracer, so that the
+ * core code won't have to be updated when tracing adds more features.
+ * If a call site changes so that some of those parameters are no longer
+ * already on hand without extra work, then the tracehook_* interface
+ * can change so there is no make-work burden on the core code.  The
+ * maintainer of core code changing should notify the maintainers of the
+ * tracing code that they need to work out the change.
+ */
+
+#ifndef _LINUX_TRACEHOOK_H
+#define _LINUX_TRACEHOOK_H	1
+
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+
+#endif	/* <linux/tracehook.h> */
-- 
cgit v1.2.3


From 6341c393fcc37d58727865f1ee2f65e632e9d4f0 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:44 -0700
Subject: tracehook: exec

This moves all the ptrace hooks related to exec into tracehook.h inlines.

This also lifts the calls for tracing out of the binfmt load_binary hooks
into search_binary_handler() after it calls into the binfmt module.  This
change has no effect, since all the binfmt modules' load_binary functions
did the call at the end on success, and now search_binary_handler() does
it immediately after return if successful.  We consolidate the repeated
code, and binfmt modules no longer need to import ptrace_notify().

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32_aout.c |  6 ------
 fs/binfmt_aout.c          |  6 ------
 fs/binfmt_elf.c           |  6 ------
 fs/binfmt_elf_fdpic.c     |  7 -------
 fs/binfmt_flat.c          |  3 ---
 fs/binfmt_som.c           |  2 --
 fs/exec.c                 | 12 ++++--------
 include/linux/tracehook.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 50 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 58cccb6483b0..a0e1dbe67dc1 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -441,12 +441,6 @@ beyond_if:
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 =
 	regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
 	set_fs(USER_DS);
-	if (unlikely(current->ptrace & PT_PTRACED)) {
-		if (current->ptrace & PT_TRACE_EXEC)
-			ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-		else
-			send_sig(SIGTRAP, current, 0);
-	}
 	return 0;
 }
 
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ba4cddb92f1d..204cfd1d7676 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -444,12 +444,6 @@ beyond_if:
 	regs->gp = ex.a_gpvalue;
 #endif
 	start_thread(regs, ex.a_entry, current->mm->start_stack);
-	if (unlikely(current->ptrace & PT_PTRACED)) {
-		if (current->ptrace & PT_TRACE_EXEC)
-			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-		else
-			send_sig(SIGTRAP, current, 0);
-	}
 	return 0;
 }
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3b6ff854d983..655ed8d30a86 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1003,12 +1003,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 #endif
 
 	start_thread(regs, elf_entry, bprm->p);
-	if (unlikely(current->ptrace & PT_PTRACED)) {
-		if (current->ptrace & PT_TRACE_EXEC)
-			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-		else
-			send_sig(SIGTRAP, current, 0);
-	}
 	retval = 0;
 out:
 	kfree(loc);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 1b59b1edf26d..fdeadab2f18b 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -433,13 +433,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
 	entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
 	start_thread(regs, entryaddr, current->mm->start_stack);
 
-	if (unlikely(current->ptrace & PT_PTRACED)) {
-		if (current->ptrace & PT_TRACE_EXEC)
-			ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-		else
-			send_sig(SIGTRAP, current, 0);
-	}
-
 	retval = 0;
 
 error:
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 2cb1acda3a82..56372ecf1690 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -920,9 +920,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	
 	start_thread(regs, start_addr, current->mm->start_stack);
 
-	if (current->ptrace & PT_PTRACED)
-		send_sig(SIGTRAP, current, 0);
-
 	return 0;
 }
 
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index fdc36bfd6a7b..68be580ba289 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -274,8 +274,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	map_hpux_gateway_page(current,current->mm);
 
 	start_thread_som(regs, som_entry, bprm->p);
-	if (current->ptrace & PT_PTRACED)
-		send_sig(SIGTRAP, current, 0);
 	return 0;
 
 	/* error cleanup */
diff --git a/fs/exec.c b/fs/exec.c
index 5e559013e303..b8792a131533 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -42,13 +42,13 @@
 #include <linux/module.h>
 #include <linux/namei.h>
 #include <linux/proc_fs.h>
-#include <linux/ptrace.h>
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1071,13 +1071,8 @@ EXPORT_SYMBOL(prepare_binprm);
 
 static int unsafe_exec(struct task_struct *p)
 {
-	int unsafe = 0;
-	if (p->ptrace & PT_PTRACED) {
-		if (p->ptrace & PT_PTRACE_CAP)
-			unsafe |= LSM_UNSAFE_PTRACE_CAP;
-		else
-			unsafe |= LSM_UNSAFE_PTRACE;
-	}
+	int unsafe = tracehook_unsafe_exec(p);
+
 	if (atomic_read(&p->fs->count) > 1 ||
 	    atomic_read(&p->files->count) > 1 ||
 	    atomic_read(&p->sighand->count) > 1)
@@ -1214,6 +1209,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 			read_unlock(&binfmt_lock);
 			retval = fn(bprm, regs);
 			if (retval >= 0) {
+				tracehook_report_exec(fmt, bprm, regs);
 				put_binfmt(fmt);
 				allow_write_access(bprm->file);
 				if (bprm->file)
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index bea0f3eeff54..6276353709c1 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -48,5 +48,51 @@
 
 #include <linux/sched.h>
 #include <linux/ptrace.h>
+#include <linux/security.h>
+struct linux_binprm;
+
+/**
+ * tracehook_unsafe_exec - check for exec declared unsafe due to tracing
+ * @task:		current task doing exec
+ *
+ * Return %LSM_UNSAFE_* bits applied to an exec because of tracing.
+ *
+ * Called with task_lock() held on @task.
+ */
+static inline int tracehook_unsafe_exec(struct task_struct *task)
+{
+	int unsafe = 0;
+	int ptrace = task_ptrace(task);
+	if (ptrace & PT_PTRACED) {
+		if (ptrace & PT_PTRACE_CAP)
+			unsafe |= LSM_UNSAFE_PTRACE_CAP;
+		else
+			unsafe |= LSM_UNSAFE_PTRACE;
+	}
+	return unsafe;
+}
+
+/**
+ * tracehook_report_exec - a successful exec was completed
+ * @fmt:		&struct linux_binfmt that performed the exec
+ * @bprm:		&struct linux_binprm containing exec details
+ * @regs:		user-mode register state
+ *
+ * An exec just completed, we are shortly going to return to user mode.
+ * The freshly initialized register state can be seen and changed in @regs.
+ * The name, file and other pointers in @bprm are still on hand to be
+ * inspected, but will be freed as soon as this returns.
+ *
+ * Called with no locks, but with some kernel resources held live
+ * and a reference on @fmt->module.
+ */
+static inline void tracehook_report_exec(struct linux_binfmt *fmt,
+					 struct linux_binprm *bprm,
+					 struct pt_regs *regs)
+{
+	if (!ptrace_event(PT_TRACE_EXEC, PTRACE_EVENT_EXEC, 0) &&
+	    unlikely(task_ptrace(current) & PT_PTRACED))
+		send_sig(SIGTRAP, current, 0);
+}
 
 #endif	/* <linux/tracehook.h> */
-- 
cgit v1.2.3


From 30199f5a46aee204bf437a4f5b0740f3efe448b7 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:46 -0700
Subject: tracehook: exit

This moves the PTRACE_EVENT_EXIT tracing into a tracehook.h inline,
tracehook_report_exec().  The change has no effect, just clean-up.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 15 +++++++++++++++
 kernel/exit.c             |  6 ++----
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 6276353709c1..967ab473afbc 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -95,4 +95,19 @@ static inline void tracehook_report_exec(struct linux_binfmt *fmt,
 		send_sig(SIGTRAP, current, 0);
 }
 
+/**
+ * tracehook_report_exit - task has begun to exit
+ * @exit_code:		pointer to value destined for @current->exit_code
+ *
+ * @exit_code points to the value passed to do_exit(), which tracing
+ * might change here.  This is almost the first thing in do_exit(),
+ * before freeing any resources or setting the %PF_EXITING flag.
+ *
+ * Called with no locks held.
+ */
+static inline void tracehook_report_exit(long *exit_code)
+{
+	ptrace_event(PT_TRACE_EXIT, PTRACE_EVENT_EXIT, *exit_code);
+}
+
 #endif	/* <linux/tracehook.h> */
diff --git a/kernel/exit.c b/kernel/exit.c
index ad933bb29ec7..c3691cbc220a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -46,6 +46,7 @@
 #include <linux/resource.h>
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -1029,10 +1030,7 @@ NORET_TYPE void do_exit(long code)
 	if (unlikely(!tsk->pid))
 		panic("Attempted to kill the idle task!");
 
-	if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
-		current->ptrace_message = code;
-		ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
-	}
+	tracehook_report_exit(&code);
 
 	/*
 	 * We're taking recursive faults here in do_exit. Safest is to just
-- 
cgit v1.2.3


From 09a05394fe2448a4139b014936330af23fa7ec83 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:47 -0700
Subject: tracehook: clone

This moves all the ptrace initialization and tracing logic for task
creation into tracehook.h and ptrace.h inlines.  It reorganizes the code
slightly, but should not change any behavior.

There are four tracehook entry points, at each important stage of task
creation.  This keeps the interface from the core fork.c code fairly
clean, while supporting the complex setup required for ptrace or something
like it.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h    |  22 ++++++++++
 include/linux/tracehook.h | 100 ++++++++++++++++++++++++++++++++++++++++++++++
 kernel/fork.c             |  69 +++++++++++++-------------------
 3 files changed, 150 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index c74abfc4c7e8..dae6d85520fb 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -154,6 +154,28 @@ static inline int ptrace_event(int mask, int event, unsigned long message)
 	return 1;
 }
 
+/**
+ * ptrace_init_task - initialize ptrace state for a new child
+ * @child:		new child task
+ * @ptrace:		true if child should be ptrace'd by parent's tracer
+ *
+ * This is called immediately after adding @child to its parent's children
+ * list.  @ptrace is false in the normal case, and true to ptrace @child.
+ *
+ * Called with current's siglock and write_lock_irq(&tasklist_lock) held.
+ */
+static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
+{
+	INIT_LIST_HEAD(&child->ptrace_entry);
+	INIT_LIST_HEAD(&child->ptraced);
+	child->parent = child->real_parent;
+	child->ptrace = 0;
+	if (unlikely(ptrace)) {
+		child->ptrace = current->ptrace;
+		__ptrace_link(child, current->parent);
+	}
+}
+
 #ifndef force_successful_syscall_return
 /*
  * System call handlers that, upon successful completion, need to return a
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 967ab473afbc..3ebc58b59766 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -110,4 +110,104 @@ static inline void tracehook_report_exit(long *exit_code)
 	ptrace_event(PT_TRACE_EXIT, PTRACE_EVENT_EXIT, *exit_code);
 }
 
+/**
+ * tracehook_prepare_clone - prepare for new child to be cloned
+ * @clone_flags:	%CLONE_* flags from clone/fork/vfork system call
+ *
+ * This is called before a new user task is to be cloned.
+ * Its return value will be passed to tracehook_finish_clone().
+ *
+ * Called with no locks held.
+ */
+static inline int tracehook_prepare_clone(unsigned clone_flags)
+{
+	if (clone_flags & CLONE_UNTRACED)
+		return 0;
+
+	if (clone_flags & CLONE_VFORK) {
+		if (current->ptrace & PT_TRACE_VFORK)
+			return PTRACE_EVENT_VFORK;
+	} else if ((clone_flags & CSIGNAL) != SIGCHLD) {
+		if (current->ptrace & PT_TRACE_CLONE)
+			return PTRACE_EVENT_CLONE;
+	} else if (current->ptrace & PT_TRACE_FORK)
+		return PTRACE_EVENT_FORK;
+
+	return 0;
+}
+
+/**
+ * tracehook_finish_clone - new child created and being attached
+ * @child:		new child task
+ * @clone_flags:	%CLONE_* flags from clone/fork/vfork system call
+ * @trace:		return value from tracehook_clone_prepare()
+ *
+ * This is called immediately after adding @child to its parent's children list.
+ * The @trace value is that returned by tracehook_prepare_clone().
+ *
+ * Called with current's siglock and write_lock_irq(&tasklist_lock) held.
+ */
+static inline void tracehook_finish_clone(struct task_struct *child,
+					  unsigned long clone_flags, int trace)
+{
+	ptrace_init_task(child, (clone_flags & CLONE_PTRACE) || trace);
+}
+
+/**
+ * tracehook_report_clone - in parent, new child is about to start running
+ * @trace:		return value from tracehook_clone_prepare()
+ * @regs:		parent's user register state
+ * @clone_flags:	flags from parent's system call
+ * @pid:		new child's PID in the parent's namespace
+ * @child:		new child task
+ *
+ * Called after a child is set up, but before it has been started running.
+ * The @trace value is that returned by tracehook_clone_prepare().
+ * This is not a good place to block, because the child has not started yet.
+ * Suspend the child here if desired, and block in tracehook_clone_complete().
+ * This must prevent the child from self-reaping if tracehook_clone_complete()
+ * uses the @child pointer; otherwise it might have died and been released by
+ * the time tracehook_report_clone_complete() is called.
+ *
+ * Called with no locks held, but the child cannot run until this returns.
+ */
+static inline void tracehook_report_clone(int trace, struct pt_regs *regs,
+					  unsigned long clone_flags,
+					  pid_t pid, struct task_struct *child)
+{
+	if (unlikely(trace)) {
+		/*
+		 * The child starts up with an immediate SIGSTOP.
+		 */
+		sigaddset(&child->pending.signal, SIGSTOP);
+		set_tsk_thread_flag(child, TIF_SIGPENDING);
+	}
+}
+
+/**
+ * tracehook_report_clone_complete - new child is running
+ * @trace:		return value from tracehook_clone_prepare()
+ * @regs:		parent's user register state
+ * @clone_flags:	flags from parent's system call
+ * @pid:		new child's PID in the parent's namespace
+ * @child:		child task, already running
+ *
+ * This is called just after the child has started running.  This is
+ * just before the clone/fork syscall returns, or blocks for vfork
+ * child completion if @clone_flags has the %CLONE_VFORK bit set.
+ * The @child pointer may be invalid if a self-reaping child died and
+ * tracehook_report_clone() took no action to prevent it from self-reaping.
+ *
+ * Called with no locks held.
+ */
+static inline void tracehook_report_clone_complete(int trace,
+						   struct pt_regs *regs,
+						   unsigned long clone_flags,
+						   pid_t pid,
+						   struct task_struct *child)
+{
+	if (unlikely(trace))
+		ptrace_event(0, trace, pid);
+}
+
 #endif	/* <linux/tracehook.h> */
diff --git a/kernel/fork.c b/kernel/fork.c
index 80e83e459b17..b42f8ed23611 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -37,6 +37,7 @@
 #include <linux/swap.h>
 #include <linux/syscalls.h>
 #include <linux/jiffies.h>
+#include <linux/tracehook.h>
 #include <linux/futex.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/rcupdate.h>
@@ -865,8 +866,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 
 	new_flags &= ~PF_SUPERPRIV;
 	new_flags |= PF_FORKNOEXEC;
-	if (!(clone_flags & CLONE_PTRACE))
-		p->ptrace = 0;
+	new_flags |= PF_STARTING;
 	p->flags = new_flags;
 	clear_freeze_flag(p);
 }
@@ -907,7 +907,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 					struct pt_regs *regs,
 					unsigned long stack_size,
 					int __user *child_tidptr,
-					struct pid *pid)
+					struct pid *pid,
+					int trace)
 {
 	int retval;
 	struct task_struct *p;
@@ -1163,8 +1164,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	 */
 	p->group_leader = p;
 	INIT_LIST_HEAD(&p->thread_group);
-	INIT_LIST_HEAD(&p->ptrace_entry);
-	INIT_LIST_HEAD(&p->ptraced);
 
 	/* Now that the task is set up, run cgroup callbacks if
 	 * necessary. We need to run them before the task is visible
@@ -1195,7 +1194,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		p->real_parent = current->real_parent;
 	else
 		p->real_parent = current;
-	p->parent = p->real_parent;
 
 	spin_lock(&current->sighand->siglock);
 
@@ -1237,8 +1235,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	if (likely(p->pid)) {
 		list_add_tail(&p->sibling, &p->real_parent->children);
-		if (unlikely(p->ptrace & PT_PTRACED))
-			__ptrace_link(p, current->parent);
+		tracehook_finish_clone(p, clone_flags, trace);
 
 		if (thread_group_leader(p)) {
 			if (clone_flags & CLONE_NEWPID)
@@ -1323,29 +1320,13 @@ struct task_struct * __cpuinit fork_idle(int cpu)
 	struct pt_regs regs;
 
 	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
-				&init_struct_pid);
+			    &init_struct_pid, 0);
 	if (!IS_ERR(task))
 		init_idle(task, cpu);
 
 	return task;
 }
 
-static int fork_traceflag(unsigned clone_flags)
-{
-	if (clone_flags & CLONE_UNTRACED)
-		return 0;
-	else if (clone_flags & CLONE_VFORK) {
-		if (current->ptrace & PT_TRACE_VFORK)
-			return PTRACE_EVENT_VFORK;
-	} else if ((clone_flags & CSIGNAL) != SIGCHLD) {
-		if (current->ptrace & PT_TRACE_CLONE)
-			return PTRACE_EVENT_CLONE;
-	} else if (current->ptrace & PT_TRACE_FORK)
-		return PTRACE_EVENT_FORK;
-
-	return 0;
-}
-
 /*
  *  Ok, this is the main fork-routine.
  *
@@ -1380,14 +1361,14 @@ long do_fork(unsigned long clone_flags,
 		}
 	}
 
-	if (unlikely(current->ptrace)) {
-		trace = fork_traceflag (clone_flags);
-		if (trace)
-			clone_flags |= CLONE_PTRACE;
-	}
+	/*
+	 * When called from kernel_thread, don't do user tracing stuff.
+	 */
+	if (likely(user_mode(regs)))
+		trace = tracehook_prepare_clone(clone_flags);
 
 	p = copy_process(clone_flags, stack_start, regs, stack_size,
-			child_tidptr, NULL);
+			 child_tidptr, NULL, trace);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
@@ -1405,24 +1386,30 @@ long do_fork(unsigned long clone_flags,
 			init_completion(&vfork);
 		}
 
-		if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {
+		tracehook_report_clone(trace, regs, clone_flags, nr, p);
+
+		/*
+		 * We set PF_STARTING at creation in case tracing wants to
+		 * use this to distinguish a fully live task from one that
+		 * hasn't gotten to tracehook_report_clone() yet.  Now we
+		 * clear it and set the child going.
+		 */
+		p->flags &= ~PF_STARTING;
+
+		if (unlikely(clone_flags & CLONE_STOPPED)) {
 			/*
 			 * We'll start up with an immediate SIGSTOP.
 			 */
 			sigaddset(&p->pending.signal, SIGSTOP);
 			set_tsk_thread_flag(p, TIF_SIGPENDING);
-		}
-
-		if (!(clone_flags & CLONE_STOPPED))
-			wake_up_new_task(p, clone_flags);
-		else
 			__set_task_state(p, TASK_STOPPED);
-
-		if (unlikely (trace)) {
-			current->ptrace_message = nr;
-			ptrace_notify ((trace << 8) | SIGTRAP);
+		} else {
+			wake_up_new_task(p, clone_flags);
 		}
 
+		tracehook_report_clone_complete(trace, regs,
+						clone_flags, nr, p);
+
 		if (clone_flags & CLONE_VFORK) {
 			freezer_do_not_count();
 			wait_for_completion(&vfork);
-- 
cgit v1.2.3


From daded34be96b1975ff8539ff62ad8b158ce7d842 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:47 -0700
Subject: tracehook: vfork-done

This moves the PTRACE_EVENT_VFORK_DONE tracing into a tracehook.h inline,
tracehook_report_vfork_done().  The change has no effect, just clean-up.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 18 ++++++++++++++++++
 kernel/fork.c             |  5 +----
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 3ebc58b59766..830e6e16097d 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -210,4 +210,22 @@ static inline void tracehook_report_clone_complete(int trace,
 		ptrace_event(0, trace, pid);
 }
 
+/**
+ * tracehook_report_vfork_done - vfork parent's child has exited or exec'd
+ * @child:		child task, already running
+ * @pid:		new child's PID in the parent's namespace
+ *
+ * Called after a %CLONE_VFORK parent has waited for the child to complete.
+ * The clone/vfork system call will return immediately after this.
+ * The @child pointer may be invalid if a self-reaping child died and
+ * tracehook_report_clone() took no action to prevent it from self-reaping.
+ *
+ * Called with no locks held.
+ */
+static inline void tracehook_report_vfork_done(struct task_struct *child,
+					       pid_t pid)
+{
+	ptrace_event(PT_TRACE_VFORK_DONE, PTRACE_EVENT_VFORK_DONE, pid);
+}
+
 #endif	/* <linux/tracehook.h> */
diff --git a/kernel/fork.c b/kernel/fork.c
index b42f8ed23611..abb3ed6298f6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1414,10 +1414,7 @@ long do_fork(unsigned long clone_flags,
 			freezer_do_not_count();
 			wait_for_completion(&vfork);
 			freezer_count();
-			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
-				current->ptrace_message = nr;
-				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
-			}
+			tracehook_report_vfork_done(p, nr);
 		}
 	} else {
 		nr = PTR_ERR(p);
-- 
cgit v1.2.3


From dae33574dcf5211e1f43c7e45fa29f73ba3e00cb Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:48 -0700
Subject: tracehook: release_task

This moves the ptrace-related logic from release_task into tracehook.h and
ptrace.h inlines.  It provides clean hooks both before and after locking
tasklist_lock, for future tracing logic to do more cleanup without the
lock.

This also changes release_task() itself in the rare "zap_leader" case to
set the leader to EXIT_DEAD before iterating.  This maintains the
invariant that release_task() only ever handles a task in EXIT_DEAD.  This
is a common-sense invariant that is already always true except in this one
arcane case of zombie leader whose parent ignores SIGCHLD.

This change is harmless and only costs one store in this one rare case.
It keeps the expected state more consisently sane, which is nicer when
debugging weirdness in release_task().  It also lets some future code in
the tracehook entry points rely on this invariant for bookkeeping.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h    | 13 +++++++++++++
 include/linux/tracehook.h | 28 ++++++++++++++++++++++++++++
 kernel/exit.c             | 21 +++++++++------------
 3 files changed, 50 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index dae6d85520fb..ed69c03692d9 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -176,6 +176,19 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 	}
 }
 
+/**
+ * ptrace_release_task - final ptrace-related cleanup of a zombie being reaped
+ * @task:	task in %EXIT_DEAD state
+ *
+ * Called with write_lock(&tasklist_lock) held.
+ */
+static inline void ptrace_release_task(struct task_struct *task)
+{
+	BUG_ON(!list_empty(&task->ptraced));
+	ptrace_unlink(task);
+	BUG_ON(!list_empty(&task->ptrace_entry));
+}
+
 #ifndef force_successful_syscall_return
 /*
  * System call handlers that, upon successful completion, need to return a
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 830e6e16097d..9a5b3be2503a 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -228,4 +228,32 @@ static inline void tracehook_report_vfork_done(struct task_struct *child,
 	ptrace_event(PT_TRACE_VFORK_DONE, PTRACE_EVENT_VFORK_DONE, pid);
 }
 
+/**
+ * tracehook_prepare_release_task - task is being reaped, clean up tracing
+ * @task:		task in %EXIT_DEAD state
+ *
+ * This is called in release_task() just before @task gets finally reaped
+ * and freed.  This would be the ideal place to remove and clean up any
+ * tracing-related state for @task.
+ *
+ * Called with no locks held.
+ */
+static inline void tracehook_prepare_release_task(struct task_struct *task)
+{
+}
+
+/**
+ * tracehook_finish_release_task - task is being reaped, clean up tracing
+ * @task:		task in %EXIT_DEAD state
+ *
+ * This is called in release_task() when @task is being in the middle of
+ * being reaped.  After this, there must be no tracing entanglements.
+ *
+ * Called with write_lock_irq(&tasklist_lock) held.
+ */
+static inline void tracehook_finish_release_task(struct task_struct *task)
+{
+	ptrace_release_task(task);
+}
+
 #endif	/* <linux/tracehook.h> */
diff --git a/kernel/exit.c b/kernel/exit.c
index c3691cbc220a..da28745f7c38 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -163,27 +163,17 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 	put_task_struct(container_of(rhp, struct task_struct, rcu));
 }
 
-/*
- * Do final ptrace-related cleanup of a zombie being reaped.
- *
- * Called with write_lock(&tasklist_lock) held.
- */
-static void ptrace_release_task(struct task_struct *p)
-{
-	BUG_ON(!list_empty(&p->ptraced));
-	ptrace_unlink(p);
-	BUG_ON(!list_empty(&p->ptrace_entry));
-}
 
 void release_task(struct task_struct * p)
 {
 	struct task_struct *leader;
 	int zap_leader;
 repeat:
+	tracehook_prepare_release_task(p);
 	atomic_dec(&p->user->processes);
 	proc_flush_task(p);
 	write_lock_irq(&tasklist_lock);
-	ptrace_release_task(p);
+	tracehook_finish_release_task(p);
 	__exit_signal(p);
 
 	/*
@@ -205,6 +195,13 @@ repeat:
 		 * that case.
 		 */
 		zap_leader = task_detached(leader);
+
+		/*
+		 * This maintains the invariant that release_task()
+		 * only runs on a task in EXIT_DEAD, just for sanity.
+		 */
+		if (zap_leader)
+			leader->exit_state = EXIT_DEAD;
 	}
 
 	write_unlock_irq(&tasklist_lock);
-- 
cgit v1.2.3


From 0d094efeb1e98010c6b99923f1eb7e17bf1e3a74 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:49 -0700
Subject: tracehook: tracehook_tracer_task

This adds the tracehook_tracer_task() hook to consolidate all forms of
"Who is using ptrace on me?" logic.  This is used for "TracerPid:" in
/proc and for permission checks.  We also clean up the selinux code the
called an identical accessor.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/array.c           |  9 +++++++--
 fs/proc/base.c            | 13 +++++++++----
 include/linux/tracehook.h | 18 ++++++++++++++++++
 security/selinux/hooks.c  | 22 +++-------------------
 4 files changed, 37 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 797d775e0354..0d6eb33597c6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -80,6 +80,7 @@
 #include <linux/delayacct.h>
 #include <linux/seq_file.h>
 #include <linux/pid_namespace.h>
+#include <linux/tracehook.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -168,8 +169,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	rcu_read_lock();
 	ppid = pid_alive(p) ?
 		task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
-	tpid = pid_alive(p) && p->ptrace ?
-		task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0;
+	tpid = 0;
+	if (pid_alive(p)) {
+		struct task_struct *tracer = tracehook_tracer_task(p);
+		if (tracer)
+			tpid = task_pid_nr_ns(tracer, ns);
+	}
 	seq_printf(m,
 		"State:\t%s\n"
 		"Tgid:\t%d\n"
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a891fe4cb43b..4b74dba69a6d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -69,6 +69,7 @@
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/cgroup.h>
 #include <linux/cpuset.h>
 #include <linux/audit.h>
@@ -231,10 +232,14 @@ static int check_mem_permission(struct task_struct *task)
 	 * If current is actively ptrace'ing, and would also be
 	 * permitted to freshly attach with ptrace now, permit it.
 	 */
-	if (task->parent == current && (task->ptrace & PT_PTRACED) &&
-	    task_is_stopped_or_traced(task) &&
-	    ptrace_may_access(task, PTRACE_MODE_ATTACH))
-		return 0;
+	if (task_is_stopped_or_traced(task)) {
+		int match;
+		rcu_read_lock();
+		match = (tracehook_tracer_task(task) == current);
+		rcu_read_unlock();
+		if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
+			return 0;
+	}
 
 	/*
 	 * Noone else is allowed.
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 9a5b3be2503a..6468ca0fe69b 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -72,6 +72,24 @@ static inline int tracehook_unsafe_exec(struct task_struct *task)
 	return unsafe;
 }
 
+/**
+ * tracehook_tracer_task - return the task that is tracing the given task
+ * @tsk:		task to consider
+ *
+ * Returns NULL if noone is tracing @task, or the &struct task_struct
+ * pointer to its tracer.
+ *
+ * Must called under rcu_read_lock().  The pointer returned might be kept
+ * live only by RCU.  During exec, this may be called with task_lock()
+ * held on @task, still held from when tracehook_unsafe_exec() was called.
+ */
+static inline struct task_struct *tracehook_tracer_task(struct task_struct *tsk)
+{
+	if (task_ptrace(tsk) & PT_PTRACED)
+		return rcu_dereference(tsk->parent);
+	return NULL;
+}
+
 /**
  * tracehook_report_exec - a successful exec was completed
  * @fmt:		&struct linux_binfmt that performed the exec
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 63f131fc42e4..3481cde5bf15 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -25,7 +25,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/security.h>
@@ -1971,22 +1971,6 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
 }
 
-/**
- * task_tracer_task - return the task that is tracing the given task
- * @task:		task to consider
- *
- * Returns NULL if noone is tracing @task, or the &struct task_struct
- * pointer to its tracer.
- *
- * Must be called under rcu_read_lock().
- */
-static struct task_struct *task_tracer_task(struct task_struct *task)
-{
-	if (task->ptrace & PT_PTRACED)
-		return rcu_dereference(task->parent);
-	return NULL;
-}
-
 /* binprm security operations */
 
 static int selinux_bprm_alloc_security(struct linux_binprm *bprm)
@@ -2238,7 +2222,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe)
 			u32 ptsid = 0;
 
 			rcu_read_lock();
-			tracer = task_tracer_task(current);
+			tracer = tracehook_tracer_task(current);
 			if (likely(tracer != NULL)) {
 				sec = tracer->security;
 				ptsid = sec->sid;
@@ -5247,7 +5231,7 @@ static int selinux_setprocattr(struct task_struct *p,
 		   Otherwise, leave SID unchanged and fail. */
 		task_lock(p);
 		rcu_read_lock();
-		tracer = task_tracer_task(p);
+		tracer = tracehook_tracer_task(p);
 		if (tracer != NULL) {
 			struct task_security_struct *ptsec = tracer->security;
 			u32 ptsid = ptsec->sid;
-- 
cgit v1.2.3


From fa8e26ccd485216fc45c8c2dd1ec3b7ef1a0a2f8 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:50 -0700
Subject: tracehook: tracehook_expect_breakpoints

This adds tracehook_expect_breakpoints() as a formal hook for the nommu
code to use for its, "Is text-poking likely?" check at mmap time.  This
names the actual semantics the code means to test, and documents it.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 15 +++++++++++++++
 mm/nommu.c                |  4 ++--
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 6468ca0fe69b..e113e09b0341 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -51,6 +51,21 @@
 #include <linux/security.h>
 struct linux_binprm;
 
+/**
+ * tracehook_expect_breakpoints - guess if task memory might be touched
+ * @task:		current task, making a new mapping
+ *
+ * Return nonzero if @task is expected to want breakpoint insertion in
+ * its memory at some point.  A zero return is no guarantee it won't
+ * be done, but this is a hint that it's known to be likely.
+ *
+ * May be called with @task->mm->mmap_sem held for writing.
+ */
+static inline int tracehook_expect_breakpoints(struct task_struct *task)
+{
+	return (task_ptrace(task) & PT_PTRACED) != 0;
+}
+
 /**
  * tracehook_unsafe_exec - check for exec declared unsafe due to tracing
  * @task:		current task doing exec
diff --git a/mm/nommu.c b/mm/nommu.c
index 4462b6a3fcb9..5edccd9c9218 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -22,7 +22,7 @@
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
-#include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/mount.h>
@@ -745,7 +745,7 @@ static unsigned long determine_vm_flags(struct file *file,
 	 * it's being traced - otherwise breakpoints set in it may interfere
 	 * with another untraced process
 	 */
-	if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED))
+	if ((flags & MAP_PRIVATE) && tracehook_expect_breakpoints(current))
 		vm_flags &= ~VM_MAYSHARE;
 
 	return vm_flags;
-- 
cgit v1.2.3


From c45aea27617d6a1e0aacddc3b0233f704222fcbd Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:50 -0700
Subject: tracehook: tracehook_signal_handler

This defines tracehook_signal_handler() as a hook for the arch signal
handling code to call.  It gives ptrace the opportunity to stop for a
pseudo-single-step trap immediately after signal handler setup is done.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index e113e09b0341..2d1426f8e33b 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -289,4 +289,27 @@ static inline void tracehook_finish_release_task(struct task_struct *task)
 	ptrace_release_task(task);
 }
 
+/**
+ * tracehook_signal_handler - signal handler setup is complete
+ * @sig:		number of signal being delivered
+ * @info:		siginfo_t of signal being delivered
+ * @ka:			sigaction setting that chose the handler
+ * @regs:		user register state
+ * @stepping:		nonzero if debugger single-step or block-step in use
+ *
+ * Called by the arch code after a signal handler has been set up.
+ * Register and stack state reflects the user handler about to run.
+ * Signal mask changes have already been made.
+ *
+ * Called without locks, shortly before returning to user mode
+ * (or handling more signals).
+ */
+static inline void tracehook_signal_handler(int sig, siginfo_t *info,
+					    const struct k_sigaction *ka,
+					    struct pt_regs *regs, int stepping)
+{
+	if (stepping)
+		ptrace_notify(SIGTRAP);
+}
+
 #endif	/* <linux/tracehook.h> */
-- 
cgit v1.2.3


From 35de254dc60f91004b3b5ebb1fc7b2c3093d6032 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:51 -0700
Subject: tracehook: tracehook_consider_ignored_signal

This defines tracehook_consider_ignored_signal() has a fine-grained hook
for deciding to prevent the normal short-circuit of sending an ignored
signal, as ptrace does.  There is no change, only cleanup.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 19 +++++++++++++++++++
 kernel/signal.c           | 27 ++++++++++++++++-----------
 2 files changed, 35 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 2d1426f8e33b..8cffd34f88d5 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -312,4 +312,23 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info,
 		ptrace_notify(SIGTRAP);
 }
 
+/**
+ * tracehook_consider_ignored_signal - suppress short-circuit of ignored signal
+ * @task:		task receiving the signal
+ * @sig:		signal number being sent
+ * @handler:		%SIG_IGN or %SIG_DFL
+ *
+ * Return zero iff tracing doesn't care to examine this ignored signal,
+ * so it can short-circuit normal delivery and never even get queued.
+ * Either @handler is %SIG_DFL and @sig's default is ignore, or it's %SIG_IGN.
+ *
+ * Called with @task->sighand->siglock held.
+ */
+static inline int tracehook_consider_ignored_signal(struct task_struct *task,
+						    int sig,
+						    void __user *handler)
+{
+	return (task_ptrace(task) & PT_PTRACED) != 0;
+}
+
 #endif	/* <linux/tracehook.h> */
diff --git a/kernel/signal.c b/kernel/signal.c
index 8715c18b27b9..9efd1cee6d0b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -22,6 +22,7 @@
 #include <linux/ptrace.h>
 #include <linux/signal.h>
 #include <linux/signalfd.h>
+#include <linux/tracehook.h>
 #include <linux/capability.h>
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
@@ -39,24 +40,21 @@
 
 static struct kmem_cache *sigqueue_cachep;
 
-static int __sig_ignored(struct task_struct *t, int sig)
+static void __user *sig_handler(struct task_struct *t, int sig)
 {
-	void __user *handler;
+	return t->sighand->action[sig - 1].sa.sa_handler;
+}
 
+static int sig_handler_ignored(void __user *handler, int sig)
+{
 	/* Is it explicitly or implicitly ignored? */
-
-	handler = t->sighand->action[sig - 1].sa.sa_handler;
 	return handler == SIG_IGN ||
 		(handler == SIG_DFL && sig_kernel_ignore(sig));
 }
 
 static int sig_ignored(struct task_struct *t, int sig)
 {
-	/*
-	 * Tracers always want to know about signals..
-	 */
-	if (t->ptrace & PT_PTRACED)
-		return 0;
+	void __user *handler;
 
 	/*
 	 * Blocked signals are never ignored, since the
@@ -66,7 +64,14 @@ static int sig_ignored(struct task_struct *t, int sig)
 	if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
 		return 0;
 
-	return __sig_ignored(t, sig);
+	handler = sig_handler(t, sig);
+	if (!sig_handler_ignored(handler, sig))
+		return 0;
+
+	/*
+	 * Tracers may want to know about even ignored signals.
+	 */
+	return !tracehook_consider_ignored_signal(t, sig, handler);
 }
 
 /*
@@ -2298,7 +2303,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 		 *   (for example, SIGCHLD), shall cause the pending signal to
 		 *   be discarded, whether or not it is blocked"
 		 */
-		if (__sig_ignored(t, sig)) {
+		if (sig_handler_ignored(sig_handler(t, sig), sig)) {
 			sigemptyset(&mask);
 			sigaddset(&mask, sig);
 			rm_from_queue_full(&mask, &t->signal->shared_pending);
-- 
cgit v1.2.3


From 445a91d2fe3667fb8fc251433645f686933cf56a Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:52 -0700
Subject: tracehook: tracehook_consider_fatal_signal

This defines tracehook_consider_fatal_signal() has a fine-grained hook for
deciding to skip the special cases for a fatal signal, as ptrace does.
There is no change, only cleanup.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 21 +++++++++++++++++++++
 kernel/signal.c           |  9 +++++----
 2 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 8cffd34f88d5..8b4c15e208fe 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -331,4 +331,25 @@ static inline int tracehook_consider_ignored_signal(struct task_struct *task,
 	return (task_ptrace(task) & PT_PTRACED) != 0;
 }
 
+/**
+ * tracehook_consider_fatal_signal - suppress special handling of fatal signal
+ * @task:		task receiving the signal
+ * @sig:		signal number being sent
+ * @handler:		%SIG_DFL or %SIG_IGN
+ *
+ * Return nonzero to prevent special handling of this termination signal.
+ * Normally @handler is %SIG_DFL.  It can be %SIG_IGN if @sig is ignored,
+ * in which case force_sig() is about to reset it to %SIG_DFL.
+ * When this returns zero, this signal might cause a quick termination
+ * that does not give the debugger a chance to intercept the signal.
+ *
+ * Called with or without @task->sighand->siglock held.
+ */
+static inline int tracehook_consider_fatal_signal(struct task_struct *task,
+						  int sig,
+						  void __user *handler)
+{
+	return (task_ptrace(task) & PT_PTRACED) != 0;
+}
+
 #endif	/* <linux/tracehook.h> */
diff --git a/kernel/signal.c b/kernel/signal.c
index 9efd1cee6d0b..1a942ce32ba0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -300,12 +300,12 @@ flush_signal_handlers(struct task_struct *t, int force_default)
 
 int unhandled_signal(struct task_struct *tsk, int sig)
 {
+	void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
 	if (is_global_init(tsk))
 		return 1;
-	if (tsk->ptrace & PT_PTRACED)
+	if (handler != SIG_IGN && handler != SIG_DFL)
 		return 0;
-	return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
-		(tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
+	return !tracehook_consider_fatal_signal(tsk, sig, handler);
 }
 
 
@@ -761,7 +761,8 @@ static void complete_signal(int sig, struct task_struct *p, int group)
 	if (sig_fatal(p, sig) &&
 	    !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) &&
 	    !sigismember(&t->real_blocked, sig) &&
-	    (sig == SIGKILL || !(t->ptrace & PT_PTRACED))) {
+	    (sig == SIGKILL ||
+	     !tracehook_consider_fatal_signal(t, sig, SIG_DFL))) {
 		/*
 		 * This signal will be fatal to the whole group.
 		 */
-- 
cgit v1.2.3


From 283d7559e7712f95a05331eb0a85394c6368101b Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:52 -0700
Subject: tracehook: syscall

This adds standard tracehook.h inlines for arch code to call when
TIF_SYSCALL_TRACE has been set.  This replaces having each arch implement
the ptrace guts for its syscall tracing support.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 70 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 8b4c15e208fe..3548694a24db 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -66,6 +66,76 @@ static inline int tracehook_expect_breakpoints(struct task_struct *task)
 	return (task_ptrace(task) & PT_PTRACED) != 0;
 }
 
+/*
+ * ptrace report for syscall entry and exit looks identical.
+ */
+static inline void ptrace_report_syscall(struct pt_regs *regs)
+{
+	int ptrace = task_ptrace(current);
+
+	if (!(ptrace & PT_PTRACED))
+		return;
+
+	ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
+
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
+
+/**
+ * tracehook_report_syscall_entry - task is about to attempt a system call
+ * @regs:		user register state of current task
+ *
+ * This will be called if %TIF_SYSCALL_TRACE has been set, when the
+ * current task has just entered the kernel for a system call.
+ * Full user register state is available here.  Changing the values
+ * in @regs can affect the system call number and arguments to be tried.
+ * It is safe to block here, preventing the system call from beginning.
+ *
+ * Returns zero normally, or nonzero if the calling arch code should abort
+ * the system call.  That must prevent normal entry so no system call is
+ * made.  If @task ever returns to user mode after this, its register state
+ * is unspecified, but should be something harmless like an %ENOSYS error
+ * return.
+ *
+ * Called without locks, just after entering kernel mode.
+ */
+static inline __must_check int tracehook_report_syscall_entry(
+	struct pt_regs *regs)
+{
+	ptrace_report_syscall(regs);
+	return 0;
+}
+
+/**
+ * tracehook_report_syscall_exit - task has just finished a system call
+ * @regs:		user register state of current task
+ * @step:		nonzero if simulating single-step or block-step
+ *
+ * This will be called if %TIF_SYSCALL_TRACE has been set, when the
+ * current task has just finished an attempted system call.  Full
+ * user register state is available here.  It is safe to block here,
+ * preventing signals from being processed.
+ *
+ * If @step is nonzero, this report is also in lieu of the normal
+ * trap that would follow the system call instruction because
+ * user_enable_block_step() or user_enable_single_step() was used.
+ * In this case, %TIF_SYSCALL_TRACE might not be set.
+ *
+ * Called without locks, just before checking for pending signals.
+ */
+static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
+{
+	ptrace_report_syscall(regs);
+}
+
 /**
  * tracehook_unsafe_exec - check for exec declared unsafe due to tracing
  * @task:		current task doing exec
-- 
cgit v1.2.3


From 7bcf6a2ca5f639b038c48711ebe6c4eca2036641 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:53 -0700
Subject: tracehook: get_signal_to_deliver

This defines the tracehook_get_signal() hook to allow tracing code to slip
in before normal signal dequeuing.  This lays the groundwork for new
tracing features that can inject synthetic signals outside the normal
queue or control the disposition of delivered signals.  The calling
convention lets tracehook_get_signal() decide both exactly what will
happen and what signal number to report in the handler/exit.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 29 +++++++++++++++++++++++++++++
 kernel/signal.c           | 38 +++++++++++++++++++++++++++-----------
 2 files changed, 56 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 3548694a24db..42a0d7b11959 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -422,4 +422,33 @@ static inline int tracehook_consider_fatal_signal(struct task_struct *task,
 	return (task_ptrace(task) & PT_PTRACED) != 0;
 }
 
+/**
+ * tracehook_get_signal - deliver synthetic signal to traced task
+ * @task:		@current
+ * @regs:		task_pt_regs(@current)
+ * @info:		details of synthetic signal
+ * @return_ka:		sigaction for synthetic signal
+ *
+ * Return zero to check for a real pending signal normally.
+ * Return -1 after releasing the siglock to repeat the check.
+ * Return a signal number to induce an artifical signal delivery,
+ * setting *@info and *@return_ka to specify its details and behavior.
+ *
+ * The @return_ka->sa_handler value controls the disposition of the
+ * signal, no matter the signal number.  For %SIG_DFL, the return value
+ * is a representative signal to indicate the behavior (e.g. %SIGTERM
+ * for death, %SIGQUIT for core dump, %SIGSTOP for job control stop,
+ * %SIGTSTP for stop unless in an orphaned pgrp), but the signal number
+ * reported will be @info->si_signo instead.
+ *
+ * Called with @task->sighand->siglock held, before dequeuing pending signals.
+ */
+static inline int tracehook_get_signal(struct task_struct *task,
+				       struct pt_regs *regs,
+				       siginfo_t *info,
+				       struct k_sigaction *return_ka)
+{
+	return 0;
+}
+
 #endif	/* <linux/tracehook.h> */
diff --git a/kernel/signal.c b/kernel/signal.c
index 1a942ce32ba0..10b31ecdd9c8 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1754,17 +1754,33 @@ relock:
 		    do_signal_stop(0))
 			goto relock;
 
-		signr = dequeue_signal(current, &current->blocked, info);
-		if (!signr)
-			break; /* will return 0 */
+		/*
+		 * Tracing can induce an artifical signal and choose sigaction.
+		 * The return value in @signr determines the default action,
+		 * but @info->si_signo is the signal number we will report.
+		 */
+		signr = tracehook_get_signal(current, regs, info, return_ka);
+		if (unlikely(signr < 0))
+			goto relock;
+		if (unlikely(signr != 0))
+			ka = return_ka;
+		else {
+			signr = dequeue_signal(current, &current->blocked,
+					       info);
 
-		if (signr != SIGKILL) {
-			signr = ptrace_signal(signr, info, regs, cookie);
 			if (!signr)
-				continue;
+				break; /* will return 0 */
+
+			if (signr != SIGKILL) {
+				signr = ptrace_signal(signr, info,
+						      regs, cookie);
+				if (!signr)
+					continue;
+			}
+
+			ka = &sighand->action[signr-1];
 		}
 
-		ka = &sighand->action[signr-1];
 		if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
 			continue;
 		if (ka->sa.sa_handler != SIG_DFL) {
@@ -1812,7 +1828,7 @@ relock:
 				spin_lock_irq(&sighand->siglock);
 			}
 
-			if (likely(do_signal_stop(signr))) {
+			if (likely(do_signal_stop(info->si_signo))) {
 				/* It released the siglock.  */
 				goto relock;
 			}
@@ -1833,7 +1849,7 @@ relock:
 
 		if (sig_kernel_coredump(signr)) {
 			if (print_fatal_signals)
-				print_fatal_signal(regs, signr);
+				print_fatal_signal(regs, info->si_signo);
 			/*
 			 * If it was able to dump core, this kills all
 			 * other threads in the group and synchronizes with
@@ -1842,13 +1858,13 @@ relock:
 			 * first and our do_group_exit call below will use
 			 * that value and ignore the one we pass it.
 			 */
-			do_coredump((long)signr, signr, regs);
+			do_coredump(info->si_signo, info->si_signo, regs);
 		}
 
 		/*
 		 * Death signals, no core dump.
 		 */
-		do_group_exit(signr);
+		do_group_exit(info->si_signo);
 		/* NOTREACHED */
 	}
 	spin_unlock_irq(&sighand->siglock);
-- 
cgit v1.2.3


From fa00b80b3c41a845b3d56f866fb40a2e98754c51 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:54 -0700
Subject: tracehook: job control

This defines the tracehook_notify_jctl() hook to formalize the ptrace
effects on the job control notifications.  There is no change, only
cleanup.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 20 ++++++++++++++++++++
 kernel/signal.c           | 10 +++++-----
 2 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 42a0d7b11959..6dc428dd2f38 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -451,4 +451,24 @@ static inline int tracehook_get_signal(struct task_struct *task,
 	return 0;
 }
 
+/**
+ * tracehook_notify_jctl - report about job control stop/continue
+ * @notify:		nonzero if this is the last thread in the group to stop
+ * @why:		%CLD_STOPPED or %CLD_CONTINUED
+ *
+ * This is called when we might call do_notify_parent_cldstop().
+ * It's called when about to stop for job control; we are already in
+ * %TASK_STOPPED state, about to call schedule().  It's also called when
+ * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made.
+ *
+ * Return nonzero to generate a %SIGCHLD with @why, which is
+ * normal if @notify is nonzero.
+ *
+ * Called with no locks held.
+ */
+static inline int tracehook_notify_jctl(int notify, int why)
+{
+	return notify || (current->ptrace & PT_PTRACED);
+}
+
 #endif	/* <linux/tracehook.h> */
diff --git a/kernel/signal.c b/kernel/signal.c
index 10b31ecdd9c8..e9e699f4b1bd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -596,9 +596,6 @@ static int check_kill_permission(int sig, struct siginfo *info,
 	return security_task_kill(t, info, sig, 0);
 }
 
-/* forward decl */
-static void do_notify_parent_cldstop(struct task_struct *tsk, int why);
-
 /*
  * Handle magic process-wide effects of stop/continue signals. Unlike
  * the signal actions, these happen immediately at signal-generation
@@ -1605,7 +1602,7 @@ finish_stop(int stop_count)
 	 * a group stop in progress and we are the last to stop,
 	 * report to the parent.  When ptraced, every thread reports itself.
 	 */
-	if (stop_count == 0 || (current->ptrace & PT_PTRACED)) {
+	if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) {
 		read_lock(&tasklist_lock);
 		do_notify_parent_cldstop(current, CLD_STOPPED);
 		read_unlock(&tasklist_lock);
@@ -1741,6 +1738,9 @@ relock:
 		signal->flags &= ~SIGNAL_CLD_MASK;
 		spin_unlock_irq(&sighand->siglock);
 
+		if (unlikely(!tracehook_notify_jctl(1, why)))
+			goto relock;
+
 		read_lock(&tasklist_lock);
 		do_notify_parent_cldstop(current->group_leader, why);
 		read_unlock(&tasklist_lock);
@@ -1906,7 +1906,7 @@ void exit_signals(struct task_struct *tsk)
 out:
 	spin_unlock_irq(&tsk->sighand->siglock);
 
-	if (unlikely(group_stop)) {
+	if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) {
 		read_lock(&tasklist_lock);
 		do_notify_parent_cldstop(tsk, CLD_STOPPED);
 		read_unlock(&tasklist_lock);
-- 
cgit v1.2.3


From 2b2a1ff64afbadac842bbc58c5166962cf4f7664 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:54 -0700
Subject: tracehook: death

This moves the ptrace logic in task death (exit_notify) into tracehook.h
inlines.  Some code is rearranged slightly to make things nicer.  There is
no change, only cleanup.

There is one hook called with the tasklist_lock write-locked, as ptrace
needs.  There is also a new hook called after exit_state changes and
without locks.  This is a better place for tracing work to be in the
future, since it doesn't delay the whole system with locking.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h     |  2 +-
 include/linux/tracehook.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++
 kernel/exit.c             | 26 ++++++++----------------
 kernel/signal.c           | 10 ++++++---
 4 files changed, 69 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index adb8077dc463..a95d84d0da95 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1796,7 +1796,7 @@ extern int kill_pid_info_as_uid(int, struct siginfo *, struct pid *, uid_t, uid_
 extern int kill_pgrp(struct pid *pid, int sig, int priv);
 extern int kill_pid(struct pid *pid, int sig, int priv);
 extern int kill_proc_info(int, struct siginfo *, pid_t);
-extern void do_notify_parent(struct task_struct *, int);
+extern int do_notify_parent(struct task_struct *, int);
 extern void force_sig(int, struct task_struct *);
 extern void force_sig_specific(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 6dc428dd2f38..4c50e1b57349 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -471,4 +471,56 @@ static inline int tracehook_notify_jctl(int notify, int why)
 	return notify || (current->ptrace & PT_PTRACED);
 }
 
+/**
+ * tracehook_notify_death - task is dead, ready to notify parent
+ * @task:		@current task now exiting
+ * @death_cookie:	value to pass to tracehook_report_death()
+ * @group_dead:		nonzero if this was the last thread in the group to die
+ *
+ * Return the signal number to send our parent with do_notify_parent(), or
+ * zero to send no signal and leave a zombie, or -1 to self-reap right now.
+ *
+ * Called with write_lock_irq(&tasklist_lock) held.
+ */
+static inline int tracehook_notify_death(struct task_struct *task,
+					 void **death_cookie, int group_dead)
+{
+	if (task->exit_signal == -1)
+		return task->ptrace ? SIGCHLD : -1;
+
+	/*
+	 * If something other than our normal parent is ptracing us, then
+	 * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
+	 * only has special meaning to our real parent.
+	 */
+	if (thread_group_empty(task) && !ptrace_reparented(task))
+		return task->exit_signal;
+
+	return task->ptrace ? SIGCHLD : 0;
+}
+
+/**
+ * tracehook_report_death - task is dead and ready to be reaped
+ * @task:		@current task now exiting
+ * @signal:		signal number sent to parent, or 0 or -1
+ * @death_cookie:	value passed back from tracehook_notify_death()
+ * @group_dead:		nonzero if this was the last thread in the group to die
+ *
+ * Thread has just become a zombie or is about to self-reap.  If positive,
+ * @signal is the signal number just sent to the parent (usually %SIGCHLD).
+ * If @signal is -1, this thread will self-reap.  If @signal is 0, this is
+ * a delayed_group_leader() zombie.  The @death_cookie was passed back by
+ * tracehook_notify_death().
+ *
+ * If normal reaping is not inhibited, @task->exit_state might be changing
+ * in parallel.
+ *
+ * Called without locks.
+ */
+static inline void tracehook_report_death(struct task_struct *task,
+					  int signal, void *death_cookie,
+					  int group_dead)
+{
+}
+
 #endif	/* <linux/tracehook.h> */
diff --git a/kernel/exit.c b/kernel/exit.c
index da28745f7c38..6cdf60712bd2 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -885,7 +885,8 @@ static void forget_original_parent(struct task_struct *father)
  */
 static void exit_notify(struct task_struct *tsk, int group_dead)
 {
-	int state;
+	int signal;
+	void *cookie;
 
 	/*
 	 * This does two things:
@@ -922,22 +923,11 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	    !capable(CAP_KILL))
 		tsk->exit_signal = SIGCHLD;
 
-	/* If something other than our normal parent is ptracing us, then
-	 * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
-	 * only has special meaning to our real parent.
-	 */
-	if (!task_detached(tsk) && thread_group_empty(tsk)) {
-		int signal = ptrace_reparented(tsk) ?
-				SIGCHLD : tsk->exit_signal;
-		do_notify_parent(tsk, signal);
-	} else if (tsk->ptrace) {
-		do_notify_parent(tsk, SIGCHLD);
-	}
+	signal = tracehook_notify_death(tsk, &cookie, group_dead);
+	if (signal > 0)
+		signal = do_notify_parent(tsk, signal);
 
-	state = EXIT_ZOMBIE;
-	if (task_detached(tsk) && likely(!tsk->ptrace))
-		state = EXIT_DEAD;
-	tsk->exit_state = state;
+	tsk->exit_state = signal < 0 ? EXIT_DEAD : EXIT_ZOMBIE;
 
 	/* mt-exec, de_thread() is waiting for us */
 	if (thread_group_leader(tsk) &&
@@ -947,8 +937,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 
 	write_unlock_irq(&tasklist_lock);
 
+	tracehook_report_death(tsk, signal, cookie, group_dead);
+
 	/* If the process is dead, release it - nobody will wait for it */
-	if (state == EXIT_DEAD)
+	if (signal < 0)
 		release_task(tsk);
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index e9e699f4b1bd..0e862d3130ff 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1326,9 +1326,11 @@ static inline void __wake_up_parent(struct task_struct *p,
 /*
  * Let a parent know about the death of a child.
  * For a stopped/continued status change, use do_notify_parent_cldstop instead.
+ *
+ * Returns -1 if our parent ignored us and so we've switched to
+ * self-reaping, or else @sig.
  */
-
-void do_notify_parent(struct task_struct *tsk, int sig)
+int do_notify_parent(struct task_struct *tsk, int sig)
 {
 	struct siginfo info;
 	unsigned long flags;
@@ -1399,12 +1401,14 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 		 */
 		tsk->exit_signal = -1;
 		if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
-			sig = 0;
+			sig = -1;
 	}
 	if (valid_signal(sig) && sig > 0)
 		__group_send_sig_info(sig, &info, tsk->parent);
 	__wake_up_parent(tsk, tsk->parent);
 	spin_unlock_irqrestore(&psig->siglock, flags);
+
+	return sig;
 }
 
 static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
-- 
cgit v1.2.3


From b787f7ba677840da16a2228c16571ce8a1fcb799 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:55 -0700
Subject: tracehook: force signal_pending()

This defines a new hook tracehook_force_sigpending() that lets tracing
code decide to force TIF_SIGPENDING on in recalc_sigpending().

This is not used yet, so it compiles away to nothing for now.  It lays the
groundwork for new tracing code that can interrupt a task synthetically
without actually sending a signal.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 14 ++++++++++++++
 kernel/signal.c           |  4 +++-
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 4c50e1b57349..43bc51b6bd33 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -422,6 +422,20 @@ static inline int tracehook_consider_fatal_signal(struct task_struct *task,
 	return (task_ptrace(task) & PT_PTRACED) != 0;
 }
 
+/**
+ * tracehook_force_sigpending - let tracing force signal_pending(current) on
+ *
+ * Called when recomputing our signal_pending() flag.  Return nonzero
+ * to force the signal_pending() flag on, so that tracehook_get_signal()
+ * will be called before the next return to user mode.
+ *
+ * Called with @current->sighand->siglock held.
+ */
+static inline int tracehook_force_sigpending(void)
+{
+	return 0;
+}
+
 /**
  * tracehook_get_signal - deliver synthetic signal to traced task
  * @task:		@current
diff --git a/kernel/signal.c b/kernel/signal.c
index 0e862d3130ff..954f77d7e3bc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -134,7 +134,9 @@ void recalc_sigpending_and_wake(struct task_struct *t)
 
 void recalc_sigpending(void)
 {
-	if (!recalc_sigpending_tsk(current) && !freezing(current))
+	if (unlikely(tracehook_force_sigpending()))
+		set_thread_flag(TIF_SIGPENDING);
+	else if (!recalc_sigpending_tsk(current) && !freezing(current))
 		clear_thread_flag(TIF_SIGPENDING);
 
 }
-- 
cgit v1.2.3


From 64b1208d5b0ef8859fd52ea7ae286a3eb994669b Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:56 -0700
Subject: tracehook: TIF_NOTIFY_RESUME

This adds tracehook.h inlines to enable a new arch feature in support of
user debugging/tracing.  This is not used yet, but it lays the groundwork
for a debugger to be able to wrangle a task that's possibly running,
without interrupting its syscalls in progress.

Each arch should define TIF_NOTIFY_RESUME, and in their entry.S code treat
it much like TIF_SIGPENDING.  That is, it causes you to take the slow path
when returning to user mode, where you get the full user-mode state
accessible as for signal handling or ptrace.  The arch code should check
TIF_NOTIFY_RESUME after handling TIF_SIGPENDING.  When it's set, clear it
and then call tracehook_notify_resume().

In future, tracing code will call set_notify_resume() when it wants to get
a callback in tracehook_notify_resume().

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 43bc51b6bd33..32867ab86c70 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -537,4 +537,38 @@ static inline void tracehook_report_death(struct task_struct *task,
 {
 }
 
+#ifdef TIF_NOTIFY_RESUME
+/**
+ * set_notify_resume - cause tracehook_notify_resume() to be called
+ * @task:		task that will call tracehook_notify_resume()
+ *
+ * Calling this arranges that @task will call tracehook_notify_resume()
+ * before returning to user mode.  If it's already running in user mode,
+ * it will enter the kernel and call tracehook_notify_resume() soon.
+ * If it's blocked, it will not be woken.
+ */
+static inline void set_notify_resume(struct task_struct *task)
+{
+	if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME))
+		kick_process(task);
+}
+
+/**
+ * tracehook_notify_resume - report when about to return to user mode
+ * @regs:		user-mode registers of @current task
+ *
+ * This is called when %TIF_NOTIFY_RESUME has been set.  Now we are
+ * about to return to user mode, and the user state in @regs can be
+ * inspected or adjusted.  The caller in arch code has cleared
+ * %TIF_NOTIFY_RESUME before the call.  If the flag gets set again
+ * asynchronously, this will be called again before we return to
+ * user mode.
+ *
+ * Called without locks.
+ */
+static inline void tracehook_notify_resume(struct pt_regs *regs)
+{
+}
+#endif	/* TIF_NOTIFY_RESUME */
+
 #endif	/* <linux/tracehook.h> */
-- 
cgit v1.2.3


From 828c365cc8b8d38c346fccb19fa80d28f2240831 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:57 -0700
Subject: tracehook: asm/syscall.h

This adds asm-generic/syscall.h, which documents what a real
asm-ARCH/syscall.h file should define.  This is not used yet, but will
provide all the machine-dependent details of examining a user system call
about to begin, in progress, or just ended.

Each arch should add an asm-ARCH/syscall.h that defines all the entry
points documented in asm-generic/syscall.h, as short inlines if possible.
This lets us write new tracing code that understands user system call
registers, without any new arch-specific work.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/syscall.h | 141 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/tracehook.h     |   3 +-
 2 files changed, 143 insertions(+), 1 deletion(-)
 create mode 100644 include/asm-generic/syscall.h

(limited to 'include/linux')

diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
new file mode 100644
index 000000000000..abcf34c2fdc7
--- /dev/null
+++ b/include/asm-generic/syscall.h
@@ -0,0 +1,141 @@
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * This file is a stub providing documentation for what functions
+ * asm-ARCH/syscall.h files need to define.  Most arch definitions
+ * will be simple inlines.
+ *
+ * All of these functions expect to be called with no locks,
+ * and only when the caller is sure that the task of interest
+ * cannot return to user mode while we are looking at it.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H	1
+
+struct task_struct;
+struct pt_regs;
+
+/**
+ * syscall_get_nr - find what system call a task is executing
+ * @task:	task of interest, must be blocked
+ * @regs:	task_pt_regs() of @task
+ *
+ * If @task is executing a system call or is at system call
+ * tracing about to attempt one, returns the system call number.
+ * If @task is not executing a system call, i.e. it's blocked
+ * inside the kernel for a fault or signal, returns -1.
+ *
+ * It's only valid to call this when @task is known to be blocked.
+ */
+long syscall_get_nr(struct task_struct *task, struct pt_regs *regs);
+
+/**
+ * syscall_rollback - roll back registers after an aborted system call
+ * @task:	task of interest, must be in system call exit tracing
+ * @regs:	task_pt_regs() of @task
+ *
+ * It's only valid to call this when @task is stopped for system
+ * call exit tracing (due to TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT),
+ * after tracehook_report_syscall_entry() returned nonzero to prevent
+ * the system call from taking place.
+ *
+ * This rolls back the register state in @regs so it's as if the
+ * system call instruction was a no-op.  The registers containing
+ * the system call number and arguments are as they were before the
+ * system call instruction.  This may not be the same as what the
+ * register state looked like at system call entry tracing.
+ */
+void syscall_rollback(struct task_struct *task, struct pt_regs *regs);
+
+/**
+ * syscall_get_error - check result of traced system call
+ * @task:	task of interest, must be blocked
+ * @regs:	task_pt_regs() of @task
+ *
+ * Returns 0 if the system call succeeded, or -ERRORCODE if it failed.
+ *
+ * It's only valid to call this when @task is stopped for tracing on exit
+ * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
+ */
+long syscall_get_error(struct task_struct *task, struct pt_regs *regs);
+
+/**
+ * syscall_get_return_value - get the return value of a traced system call
+ * @task:	task of interest, must be blocked
+ * @regs:	task_pt_regs() of @task
+ *
+ * Returns the return value of the successful system call.
+ * This value is meaningless if syscall_get_error() returned nonzero.
+ *
+ * It's only valid to call this when @task is stopped for tracing on exit
+ * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
+ */
+long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs);
+
+/**
+ * syscall_set_return_value - change the return value of a traced system call
+ * @task:	task of interest, must be blocked
+ * @regs:	task_pt_regs() of @task
+ * @error:	negative error code, or zero to indicate success
+ * @val:	user return value if @error is zero
+ *
+ * This changes the results of the system call that user mode will see.
+ * If @error is zero, the user sees a successful system call with a
+ * return value of @val.  If @error is nonzero, it's a negated errno
+ * code; the user sees a failed system call with this errno code.
+ *
+ * It's only valid to call this when @task is stopped for tracing on exit
+ * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
+ */
+void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
+			      int error, long val);
+
+/**
+ * syscall_get_arguments - extract system call parameter values
+ * @task:	task of interest, must be blocked
+ * @regs:	task_pt_regs() of @task
+ * @i:		argument index [0,5]
+ * @n:		number of arguments; n+i must be [1,6].
+ * @args:	array filled with argument values
+ *
+ * Fetches @n arguments to the system call starting with the @i'th argument
+ * (from 0 through 5).  Argument @i is stored in @args[0], and so on.
+ * An arch inline version is probably optimal when @i and @n are constants.
+ *
+ * It's only valid to call this when @task is stopped for tracing on
+ * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
+ * It's invalid to call this with @i + @n > 6; we only support system calls
+ * taking up to 6 arguments.
+ */
+void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
+			   unsigned int i, unsigned int n, unsigned long *args);
+
+/**
+ * syscall_set_arguments - change system call parameter value
+ * @task:	task of interest, must be in system call entry tracing
+ * @regs:	task_pt_regs() of @task
+ * @i:		argument index [0,5]
+ * @n:		number of arguments; n+i must be [1,6].
+ * @args:	array of argument values to store
+ *
+ * Changes @n arguments to the system call starting with the @i'th argument.
+ * @n'th argument to @val.  Argument @i gets value @args[0], and so on.
+ * An arch inline version is probably optimal when @i and @n are constants.
+ *
+ * It's only valid to call this when @task is stopped for tracing on
+ * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
+ * It's invalid to call this with @i + @n > 6; we only support system calls
+ * taking up to 6 arguments.
+ */
+void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
+			   unsigned int i, unsigned int n,
+			   const unsigned long *args);
+
+#endif	/* _ASM_SYSCALL_H */
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 32867ab86c70..589f429619c9 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -103,7 +103,8 @@ static inline void ptrace_report_syscall(struct pt_regs *regs)
  * the system call.  That must prevent normal entry so no system call is
  * made.  If @task ever returns to user mode after this, its register state
  * is unspecified, but should be something harmless like an %ENOSYS error
- * return.
+ * return.  It should preserve enough information so that syscall_rollback()
+ * can work (see asm-generic/syscall.h).
  *
  * Called without locks, just after entering kernel mode.
  */
-- 
cgit v1.2.3


From 85ba2d862e521375a8ee01526c5c46b1f24bb4af Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:58 -0700
Subject: tracehook: wait_task_inactive

This extends wait_task_inactive() with a new argument so it can be used in
a "soft" mode where it will check for the task changing state unexpectedly
and back off.  There is no change to existing callers.  This lays the
groundwork to allow robust, noninvasive tracing that can try to sample a
blocked thread but back off safely if it wakes up.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/perfmon.c |  4 ++--
 include/linux/sched.h      |  8 ++++++--
 kernel/kthread.c           |  2 +-
 kernel/ptrace.c            |  2 +-
 kernel/sched.c             | 29 +++++++++++++++++++++++++++--
 5 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 19d4493c6193..fc8f3509df27 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2626,7 +2626,7 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
 	/*
 	 * make sure the task is off any CPU
 	 */
-	wait_task_inactive(task);
+	wait_task_inactive(task, 0);
 
 	/* more to come... */
 
@@ -4774,7 +4774,7 @@ recheck:
 
 		UNPROTECT_CTX(ctx, flags);
 
-		wait_task_inactive(task);
+		wait_task_inactive(task, 0);
 
 		PROTECT_CTX(ctx, flags);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a95d84d0da95..f59318a0099b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1882,9 +1882,13 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
-extern void wait_task_inactive(struct task_struct * p);
+extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
-#define wait_task_inactive(p)	do { } while (0)
+static inline unsigned long wait_task_inactive(struct task_struct *p,
+					       long match_state)
+{
+	return 1;
+}
 #endif
 
 #define next_task(p)	list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 6111c27491b1..96cff2f8710b 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -176,7 +176,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu)
 		return;
 	}
 	/* Must have done schedule() in kthread() before we set_task_cpu */
-	wait_task_inactive(k);
+	wait_task_inactive(k, 0);
 	set_task_cpu(k, cpu);
 	k->cpus_allowed = cpumask_of_cpu(cpu);
 	k->rt.nr_cpus_allowed = 1;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 8392a9da6450..082b3fcb32a0 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -107,7 +107,7 @@ int ptrace_check_attach(struct task_struct *child, int kill)
 	read_unlock(&tasklist_lock);
 
 	if (!ret && !kill)
-		wait_task_inactive(child);
+		ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH;
 
 	/* All systems go.. */
 	return ret;
diff --git a/kernel/sched.c b/kernel/sched.c
index fde1a1026359..0236958addcb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1867,16 +1867,24 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
+ * If @match_state is nonzero, it's the @p->state value just checked and
+ * not expected to change.  If it changes, i.e. @p might have woken up,
+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
+ * we return a positive number (its total switch count).  If a second call
+ * a short while later returns the same number, the caller can be sure that
+ * @p has remained unscheduled the whole time.
+ *
  * The caller must ensure that the task *will* unschedule sometime soon,
  * else this function might spin for a *long* time. This function can't
  * be called with interrupts off, or it may introduce deadlock with
  * smp_call_function() if an IPI is sent by the same process we are
  * waiting to become inactive.
  */
-void wait_task_inactive(struct task_struct *p)
+unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 {
 	unsigned long flags;
 	int running, on_rq;
+	unsigned long ncsw;
 	struct rq *rq;
 
 	for (;;) {
@@ -1899,8 +1907,11 @@ void wait_task_inactive(struct task_struct *p)
 		 * return false if the runqueue has changed and p
 		 * is actually now running somewhere else!
 		 */
-		while (task_running(rq, p))
+		while (task_running(rq, p)) {
+			if (match_state && unlikely(p->state != match_state))
+				return 0;
 			cpu_relax();
+		}
 
 		/*
 		 * Ok, time to look more closely! We need the rq
@@ -1910,8 +1921,20 @@ void wait_task_inactive(struct task_struct *p)
 		rq = task_rq_lock(p, &flags);
 		running = task_running(rq, p);
 		on_rq = p->se.on_rq;
+		ncsw = 0;
+		if (!match_state || p->state == match_state) {
+			ncsw = p->nivcsw + p->nvcsw;
+			if (unlikely(!ncsw))
+				ncsw = 1;
+		}
 		task_rq_unlock(rq, &flags);
 
+		/*
+		 * If it changed from the expected state, bail out now.
+		 */
+		if (unlikely(!ncsw))
+			break;
+
 		/*
 		 * Was it really running after all now that we
 		 * checked with the proper locks actually held?
@@ -1944,6 +1967,8 @@ void wait_task_inactive(struct task_struct *p)
 		 */
 		break;
 	}
+
+	return ncsw;
 }
 
 /***
-- 
cgit v1.2.3


From bbc698636ed48b6fcd323964e0f847a6a796325d Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 25 Jul 2008 19:45:59 -0700
Subject: task_current_syscall

This adds the new function task_current_syscall() on machines where the
asm/syscall.h interface is supported (CONFIG_HAVE_ARCH_TRACEHOOK).  It's
exported for modules to use in the future.  This function safely samples
the state of a blocked thread to collect what system call it is blocked
in, and the six system call argument registers.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h |  4 +++
 lib/Makefile           |  2 ++
 lib/syscall.c          | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+)
 create mode 100644 lib/syscall.c

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index ed69c03692d9..fd31756e1a00 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -314,6 +314,10 @@ static inline void user_enable_block_step(struct task_struct *task)
 #define arch_ptrace_stop(code, info)		do { } while (0)
 #endif
 
+extern int task_current_syscall(struct task_struct *target, long *callno,
+				unsigned long args[6], unsigned int maxargs,
+				unsigned long *sp, unsigned long *pc);
+
 #endif
 
 #endif
diff --git a/lib/Makefile b/lib/Makefile
index 9085ad6fa53d..942c7250f603 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -78,6 +78,8 @@ lib-$(CONFIG_GENERIC_BUG) += bug.o
 
 obj-$(CONFIG_HAVE_LMB) += lmb.o
 
+obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/syscall.c b/lib/syscall.c
new file mode 100644
index 000000000000..a4f7067f72fa
--- /dev/null
+++ b/lib/syscall.c
@@ -0,0 +1,75 @@
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <asm/syscall.h>
+
+static int collect_syscall(struct task_struct *target, long *callno,
+			   unsigned long args[6], unsigned int maxargs,
+			   unsigned long *sp, unsigned long *pc)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	if (unlikely(!regs))
+		return -EAGAIN;
+
+	*sp = user_stack_pointer(regs);
+	*pc = instruction_pointer(regs);
+
+	*callno = syscall_get_nr(target, regs);
+	if (*callno != -1L && maxargs > 0)
+		syscall_get_arguments(target, regs, 0, maxargs, args);
+
+	return 0;
+}
+
+/**
+ * task_current_syscall - Discover what a blocked task is doing.
+ * @target:		thread to examine
+ * @callno:		filled with system call number or -1
+ * @args:		filled with @maxargs system call arguments
+ * @maxargs:		number of elements in @args to fill
+ * @sp:			filled with user stack pointer
+ * @pc:			filled with user PC
+ *
+ * If @target is blocked in a system call, returns zero with *@callno
+ * set to the the call's number and @args filled in with its arguments.
+ * Registers not used for system call arguments may not be available and
+ * it is not kosher to use &struct user_regset calls while the system
+ * call is still in progress.  Note we may get this result if @target
+ * has finished its system call but not yet returned to user mode, such
+ * as when it's stopped for signal handling or syscall exit tracing.
+ *
+ * If @target is blocked in the kernel during a fault or exception,
+ * returns zero with *@callno set to -1 and does not fill in @args.
+ * If so, it's now safe to examine @target using &struct user_regset
+ * get() calls as long as we're sure @target won't return to user mode.
+ *
+ * Returns -%EAGAIN if @target does not remain blocked.
+ *
+ * Returns -%EINVAL if @maxargs is too large (maximum is six).
+ */
+int task_current_syscall(struct task_struct *target, long *callno,
+			 unsigned long args[6], unsigned int maxargs,
+			 unsigned long *sp, unsigned long *pc)
+{
+	long state;
+	unsigned long ncsw;
+
+	if (unlikely(maxargs > 6))
+		return -EINVAL;
+
+	if (target == current)
+		return collect_syscall(target, callno, args, maxargs, sp, pc);
+
+	state = target->state;
+	if (unlikely(!state))
+		return -EAGAIN;
+
+	ncsw = wait_task_inactive(target, state);
+	if (unlikely(!ncsw) ||
+	    unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) ||
+	    unlikely(wait_task_inactive(target, state) != ncsw))
+		return -EAGAIN;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(task_current_syscall);
-- 
cgit v1.2.3


From 9d8fddfb17aaee4ffc5e3d0560620d0fa8b50a42 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 19:46:23 -0700
Subject: mm/allocpercpu.c: make 4 functions static

This patch makes the following needlessly global functions static:
 - percpu_depopulate()
 - __percpu_depopulate_mask()
 - percpu_populate()
 - __percpu_populate_mask()

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu.h | 29 -----------------------------
 mm/allocpercpu.c       | 20 +++++++++++---------
 2 files changed, 11 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 4cdd393e71e1..fac3337547eb 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -74,11 +74,6 @@ struct percpu_data {
         (__typeof__(ptr))__p->ptrs[(cpu)];	          \
 })
 
-extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu);
-extern void percpu_depopulate(void *__pdata, int cpu);
-extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
-				  cpumask_t *mask);
-extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask);
 extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
 extern void percpu_free(void *__pdata);
 
@@ -86,26 +81,6 @@ extern void percpu_free(void *__pdata);
 
 #define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
 
-static inline void percpu_depopulate(void *__pdata, int cpu)
-{
-}
-
-static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
-{
-}
-
-static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp,
-				    int cpu)
-{
-	return percpu_ptr(__pdata, cpu);
-}
-
-static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
-					 cpumask_t *mask)
-{
-	return 0;
-}
-
 static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
 {
 	return kzalloc(size, gfp);
@@ -118,10 +93,6 @@ static inline void percpu_free(void *__pdata)
 
 #endif /* CONFIG_SMP */
 
-#define percpu_populate_mask(__pdata, size, gfp, mask) \
-	__percpu_populate_mask((__pdata), (size), (gfp), &(mask))
-#define percpu_depopulate_mask(__pdata, mask) \
-	__percpu_depopulate_mask((__pdata), &(mask))
 #define percpu_alloc_mask(size, gfp, mask) \
 	__percpu_alloc_mask((size), (gfp), &(mask))
 
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index 843364594e23..4297bc41bfd2 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -18,27 +18,28 @@
  * Depopulating per-cpu data for a cpu going offline would be a typical
  * use case. You need to register a cpu hotplug handler for that purpose.
  */
-void percpu_depopulate(void *__pdata, int cpu)
+static void percpu_depopulate(void *__pdata, int cpu)
 {
 	struct percpu_data *pdata = __percpu_disguise(__pdata);
 
 	kfree(pdata->ptrs[cpu]);
 	pdata->ptrs[cpu] = NULL;
 }
-EXPORT_SYMBOL_GPL(percpu_depopulate);
 
 /**
  * percpu_depopulate_mask - depopulate per-cpu data for some cpu's
  * @__pdata: per-cpu data to depopulate
  * @mask: depopulate per-cpu data for cpu's selected through mask bits
  */
-void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+static void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
 {
 	int cpu;
 	for_each_cpu_mask_nr(cpu, *mask)
 		percpu_depopulate(__pdata, cpu);
 }
-EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
+
+#define percpu_depopulate_mask(__pdata, mask) \
+	__percpu_depopulate_mask((__pdata), &(mask))
 
 /**
  * percpu_populate - populate per-cpu data for given cpu
@@ -51,7 +52,7 @@ EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
  * use case. You need to register a cpu hotplug handler for that purpose.
  * Per-cpu object is populated with zeroed buffer.
  */
-void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
+static void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
 {
 	struct percpu_data *pdata = __percpu_disguise(__pdata);
 	int node = cpu_to_node(cpu);
@@ -68,7 +69,6 @@ void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
 		pdata->ptrs[cpu] = kzalloc(size, gfp);
 	return pdata->ptrs[cpu];
 }
-EXPORT_SYMBOL_GPL(percpu_populate);
 
 /**
  * percpu_populate_mask - populate per-cpu data for more cpu's
@@ -79,8 +79,8 @@ EXPORT_SYMBOL_GPL(percpu_populate);
  *
  * Per-cpu objects are populated with zeroed buffers.
  */
-int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
-			   cpumask_t *mask)
+static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+				  cpumask_t *mask)
 {
 	cpumask_t populated;
 	int cpu;
@@ -94,7 +94,9 @@ int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
 			cpu_set(cpu, populated);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(__percpu_populate_mask);
+
+#define percpu_populate_mask(__pdata, size, gfp, mask) \
+	__percpu_populate_mask((__pdata), (size), (gfp), &(mask))
 
 /**
  * percpu_alloc_mask - initial setup of per-cpu data
-- 
cgit v1.2.3


From 15f59adae001766a2c7f7fe4f196387bb04bcff5 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 19:46:23 -0700
Subject: make mm/memory.c:print_bad_pte() static

This patch makes the needlessly global print_bad_pte() static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 -
 mm/memory.c        | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f3fd70d6029f..6e695eaab4ce 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -810,7 +810,6 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *
 
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
 		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
-void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
 
 extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned long offset);
diff --git a/mm/memory.c b/mm/memory.c
index 262e3eb6601a..a8ca04faaea6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -374,7 +374,8 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
  *
  * The calling function must still handle the error.
  */
-void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
+static void print_bad_pte(struct vm_area_struct *vma, pte_t pte,
+			  unsigned long vaddr)
 {
 	printk(KERN_ERR "Bad pte = %08llx, process = %s, "
 			"vm_flags = %lx, vaddr = %lx\n",
-- 
cgit v1.2.3


From 7c363b8c6536f26934172d3c46f0bbec01a97c61 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 19:46:24 -0700
Subject: mm/swapfile.c: make code static

This patch makes the following needlessly global code static:
 - swap_lock
 - nr_swapfiles
 - struct swap_list

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 3 ---
 mm/swapfile.c        | 6 +++---
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0b3377650c85..de40f169a4e4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -237,7 +237,6 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
 
 /* linux/mm/swapfile.c */
 extern long total_swap_pages;
-extern unsigned int nr_swapfiles;
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(void);
 extern swp_entry_t get_swap_page_of_type(int);
@@ -254,8 +253,6 @@ extern int can_share_swap_page(struct page *);
 extern int remove_exclusive_swap_page(struct page *);
 struct backing_dev_info;
 
-extern spinlock_t swap_lock;
-
 /* linux/mm/thrash.c */
 extern struct mm_struct * swap_token_mm;
 extern void grab_swap_token(void);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index af283933c14e..6beb6251e99d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -33,8 +33,8 @@
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
 
-DEFINE_SPINLOCK(swap_lock);
-unsigned int nr_swapfiles;
+static DEFINE_SPINLOCK(swap_lock);
+static unsigned int nr_swapfiles;
 long total_swap_pages;
 static int swap_overflow;
 static int least_priority;
@@ -44,7 +44,7 @@ static const char Unused_file[] = "Unused swap file entry ";
 static const char Bad_offset[] = "Bad swap offset entry ";
 static const char Unused_offset[] = "Unused swap offset entry ";
 
-struct swap_list_t swap_list = {-1, -1};
+static struct swap_list_t swap_list = {-1, -1};
 
 static struct swap_info_struct swap_info[MAX_SWAPFILES];
 
-- 
cgit v1.2.3


From 9580d85f9cdb076c4bfb467bc6c0d3c5e499957a Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Fri, 25 Jul 2008 19:46:25 -0700
Subject: drivers/char/rtc.c: make 2 functions static

The following functions can now become static:
 - rtc_interrupt()
 - rtc_get_rtc_time()

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Bernhard Walle <bwalle@suse.de>
Acked-by: Paul Gortmaker <p_gortmaker@yahoo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/rtc.c  | 5 +++--
 include/linux/rtc.h | 2 --
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index dbefbb30ed44..d9799e2bcfbf 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -144,6 +144,7 @@ static ssize_t rtc_read(struct file *file, char __user *buf,
 			size_t count, loff_t *ppos);
 
 static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+static void rtc_get_rtc_time(struct rtc_time *rtc_tm);
 
 #ifdef RTC_IRQ
 static unsigned int rtc_poll(struct file *file, poll_table *wait);
@@ -235,7 +236,7 @@ static inline unsigned char rtc_is_updating(void)
  *	(See ./arch/XXXX/kernel/time.c for the set_rtc_mmss() function.)
  */
 
-irqreturn_t rtc_interrupt(int irq, void *dev_id)
+static irqreturn_t rtc_interrupt(int irq, void *dev_id)
 {
 	/*
 	 *	Can be an alarm interrupt, update complete interrupt,
@@ -1303,7 +1304,7 @@ static int rtc_proc_open(struct inode *inode, struct file *file)
 }
 #endif
 
-void rtc_get_rtc_time(struct rtc_time *rtc_tm)
+static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
 {
 	unsigned long uip_watchdog = jiffies, flags;
 	unsigned char ctrl;
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index b01fe004cb5e..91f597ad6acc 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -225,8 +225,6 @@ typedef struct rtc_task {
 int rtc_register(rtc_task_t *task);
 int rtc_unregister(rtc_task_t *task);
 int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg);
-void rtc_get_rtc_time(struct rtc_time *rtc_tm);
-irqreturn_t rtc_interrupt(int irq, void *dev_id);
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From a9906a19193db69ad0158f289f839edf8aaf103f Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Sat, 26 Jul 2008 14:41:26 -0700
Subject: tracehook: comment fixes

This fixes some typos and errors in <linux/tracehook.h> comments.
No code changes.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 include/linux/tracehook.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 589f429619c9..b1875582c1a1 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -244,7 +244,7 @@ static inline int tracehook_prepare_clone(unsigned clone_flags)
  * tracehook_finish_clone - new child created and being attached
  * @child:		new child task
  * @clone_flags:	%CLONE_* flags from clone/fork/vfork system call
- * @trace:		return value from tracehook_clone_prepare()
+ * @trace:		return value from tracehook_prepare_clone()
  *
  * This is called immediately after adding @child to its parent's children list.
  * The @trace value is that returned by tracehook_prepare_clone().
@@ -259,19 +259,20 @@ static inline void tracehook_finish_clone(struct task_struct *child,
 
 /**
  * tracehook_report_clone - in parent, new child is about to start running
- * @trace:		return value from tracehook_clone_prepare()
+ * @trace:		return value from tracehook_prepare_clone()
  * @regs:		parent's user register state
  * @clone_flags:	flags from parent's system call
  * @pid:		new child's PID in the parent's namespace
  * @child:		new child task
  *
- * Called after a child is set up, but before it has been started running.
- * The @trace value is that returned by tracehook_clone_prepare().
- * This is not a good place to block, because the child has not started yet.
- * Suspend the child here if desired, and block in tracehook_clone_complete().
- * This must prevent the child from self-reaping if tracehook_clone_complete()
- * uses the @child pointer; otherwise it might have died and been released by
- * the time tracehook_report_clone_complete() is called.
+ * Called after a child is set up, but before it has been started
+ * running.  @trace is the value returned by tracehook_prepare_clone().
+ * This is not a good place to block, because the child has not started
+ * yet.  Suspend the child here if desired, and then block in
+ * tracehook_report_clone_complete().  This must prevent the child from
+ * self-reaping if tracehook_report_clone_complete() uses the @child
+ * pointer; otherwise it might have died and been released by the time
+ * tracehook_report_report_clone_complete() is called.
  *
  * Called with no locks held, but the child cannot run until this returns.
  */
@@ -290,7 +291,7 @@ static inline void tracehook_report_clone(int trace, struct pt_regs *regs,
 
 /**
  * tracehook_report_clone_complete - new child is running
- * @trace:		return value from tracehook_clone_prepare()
+ * @trace:		return value from tracehook_prepare_clone()
  * @regs:		parent's user register state
  * @clone_flags:	flags from parent's system call
  * @pid:		new child's PID in the parent's namespace
@@ -347,7 +348,7 @@ static inline void tracehook_prepare_release_task(struct task_struct *task)
 }
 
 /**
- * tracehook_finish_release_task - task is being reaped, clean up tracing
+ * tracehook_finish_release_task - final tracing clean-up
  * @task:		task in %EXIT_DEAD state
  *
  * This is called in release_task() when @task is being in the middle of
-- 
cgit v1.2.3


From 6edd8ee60ac9b974bd6ec3b1bcb2aab02762fa8c Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Date: Thu, 24 Jul 2008 14:18:57 +0200
Subject: mmc: Export internal host state through debugfs

When CONFIG_DEBUG_FS is set, create a few files under /sys/kernel/debug
containing information about an mmc host's internal state. Currently,
just a single file is created, "ios", which contains information about
the current operating parameters for the bus (clock speed, bus width,
etc.)

Host drivers can add additional files and directories under the host's
root directory by passing the debugfs_root field in struct mmc_host as
the 'parent' parameter to debugfs_create_*.

Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
---
 drivers/mmc/core/Makefile  |   1 +
 drivers/mmc/core/core.h    |   4 ++
 drivers/mmc/core/debugfs.c | 164 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/mmc/core/host.c    |   8 +++
 include/linux/mmc/host.h   |   2 +
 5 files changed, 179 insertions(+)
 create mode 100644 drivers/mmc/core/debugfs.c

(limited to 'include/linux')

diff --git a/drivers/mmc/core/Makefile b/drivers/mmc/core/Makefile
index 19a1a254a0c5..889e5f898f6f 100644
--- a/drivers/mmc/core/Makefile
+++ b/drivers/mmc/core/Makefile
@@ -12,3 +12,4 @@ mmc_core-y			:= core.o bus.o host.o \
 				   sdio.o sdio_ops.o sdio_bus.o \
 				   sdio_cis.o sdio_io.o sdio_irq.o
 
+mmc_core-$(CONFIG_DEBUG_FS)	+= debugfs.o
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index cdb332b7dedc..745da9881aa7 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -52,5 +52,9 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr);
 
 extern int use_spi_crc;
 
+/* Debugfs information for hosts and cards */
+void mmc_add_host_debugfs(struct mmc_host *host);
+void mmc_remove_host_debugfs(struct mmc_host *host);
+
 #endif
 
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
new file mode 100644
index 000000000000..133c6e51f26b
--- /dev/null
+++ b/drivers/mmc/core/debugfs.c
@@ -0,0 +1,164 @@
+/*
+ * Debugfs support for hosts and cards
+ *
+ * Copyright (C) 2008 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/stat.h>
+
+#include <linux/mmc/host.h>
+
+#include "core.h"
+
+/* The debugfs functions are optimized away when CONFIG_DEBUG_FS isn't set. */
+static int mmc_ios_show(struct seq_file *s, void *data)
+{
+	static const char *vdd_str[] = {
+		[8]	= "2.0",
+		[9]	= "2.1",
+		[10]	= "2.2",
+		[11]	= "2.3",
+		[12]	= "2.4",
+		[13]	= "2.5",
+		[14]	= "2.6",
+		[15]	= "2.7",
+		[16]	= "2.8",
+		[17]	= "2.9",
+		[18]	= "3.0",
+		[19]	= "3.1",
+		[20]	= "3.2",
+		[21]	= "3.3",
+		[22]	= "3.4",
+		[23]	= "3.5",
+		[24]	= "3.6",
+	};
+	struct mmc_host	*host = s->private;
+	struct mmc_ios	*ios = &host->ios;
+	const char *str;
+
+	seq_printf(s, "clock:\t\t%u Hz\n", ios->clock);
+	seq_printf(s, "vdd:\t\t%u ", ios->vdd);
+	if ((1 << ios->vdd) & MMC_VDD_165_195)
+		seq_printf(s, "(1.65 - 1.95 V)\n");
+	else if (ios->vdd < (ARRAY_SIZE(vdd_str) - 1)
+			&& vdd_str[ios->vdd] && vdd_str[ios->vdd + 1])
+		seq_printf(s, "(%s ~ %s V)\n", vdd_str[ios->vdd],
+				vdd_str[ios->vdd + 1]);
+	else
+		seq_printf(s, "(invalid)\n");
+
+	switch (ios->bus_mode) {
+	case MMC_BUSMODE_OPENDRAIN:
+		str = "open drain";
+		break;
+	case MMC_BUSMODE_PUSHPULL:
+		str = "push-pull";
+		break;
+	default:
+		str = "invalid";
+		break;
+	}
+	seq_printf(s, "bus mode:\t%u (%s)\n", ios->bus_mode, str);
+
+	switch (ios->chip_select) {
+	case MMC_CS_DONTCARE:
+		str = "don't care";
+		break;
+	case MMC_CS_HIGH:
+		str = "active high";
+		break;
+	case MMC_CS_LOW:
+		str = "active low";
+		break;
+	default:
+		str = "invalid";
+		break;
+	}
+	seq_printf(s, "chip select:\t%u (%s)\n", ios->chip_select, str);
+
+	switch (ios->power_mode) {
+	case MMC_POWER_OFF:
+		str = "off";
+		break;
+	case MMC_POWER_UP:
+		str = "up";
+		break;
+	case MMC_POWER_ON:
+		str = "on";
+		break;
+	default:
+		str = "invalid";
+		break;
+	}
+	seq_printf(s, "power mode:\t%u (%s)\n", ios->power_mode, str);
+	seq_printf(s, "bus width:\t%u (%u bits)\n",
+			ios->bus_width, 1 << ios->bus_width);
+
+	switch (ios->timing) {
+	case MMC_TIMING_LEGACY:
+		str = "legacy";
+		break;
+	case MMC_TIMING_MMC_HS:
+		str = "mmc high-speed";
+		break;
+	case MMC_TIMING_SD_HS:
+		str = "sd high-speed";
+		break;
+	default:
+		str = "invalid";
+		break;
+	}
+	seq_printf(s, "timing spec:\t%u (%s)\n", ios->timing, str);
+
+	return 0;
+}
+
+static int mmc_ios_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mmc_ios_show, inode->i_private);
+}
+
+static const struct file_operations mmc_ios_fops = {
+	.open		= mmc_ios_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void mmc_add_host_debugfs(struct mmc_host *host)
+{
+	struct dentry *root;
+
+	root = debugfs_create_dir(mmc_hostname(host), NULL);
+	if (IS_ERR(root))
+		/* Don't complain -- debugfs just isn't enabled */
+		return;
+	if (!root)
+		/* Complain -- debugfs is enabled, but it failed to
+		 * create the directory. */
+		goto err_root;
+
+	host->debugfs_root = root;
+
+	if (!debugfs_create_file("ios", S_IRUSR, root, host, &mmc_ios_fops))
+		goto err_ios;
+
+	return;
+
+err_ios:
+	debugfs_remove_recursive(root);
+	host->debugfs_root = NULL;
+err_root:
+	dev_err(&host->class_dev, "failed to initialize debugfs\n");
+}
+
+void mmc_remove_host_debugfs(struct mmc_host *host)
+{
+	debugfs_remove_recursive(host->debugfs_root);
+}
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 1d795c5379b5..6da80fd4d974 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -127,6 +127,10 @@ int mmc_add_host(struct mmc_host *host)
 	if (err)
 		return err;
 
+#ifdef CONFIG_DEBUG_FS
+	mmc_add_host_debugfs(host);
+#endif
+
 	mmc_start_host(host);
 
 	return 0;
@@ -146,6 +150,10 @@ void mmc_remove_host(struct mmc_host *host)
 {
 	mmc_stop_host(host);
 
+#ifdef CONFIG_DEBUG_FS
+	mmc_remove_host_debugfs(host);
+#endif
+
 	device_del(&host->class_dev);
 
 	led_trigger_unregister_simple(host->led);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 10a2080086ca..9c288c909878 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -157,6 +157,8 @@ struct mmc_host {
 	struct led_trigger	*led;		/* activity led */
 #endif
 
+	struct dentry		*debugfs_root;
+
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
-- 
cgit v1.2.3


From f4b7f927b531ca350cfc4ca1bdc3377dac7f9a32 Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Date: Thu, 24 Jul 2008 14:18:58 +0200
Subject: mmc: Add per-card debugfs support

For each card successfully added to the bus, create a subdirectory under
the host's debugfs root with information about the card.

At the moment, only a single file is added to the card directory for
all cards: "state". It reflects the "state" field in struct mmc_card,
indicating whether the card is present, readonly, etc.

For MMC and SD cards (not SDIO), another file is added: "status".
Reading this file will ask the card about its current status and
return it. This can be useful if the card just refuses to respond to
any commands, which might indicate that the card state is not what the
MMC core thinks it is (due to a missing stop command, for example.)

Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
---
 drivers/mmc/core/bus.c     |  8 ++++++
 drivers/mmc/core/core.h    |  3 +++
 drivers/mmc/core/debugfs.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/card.h   |  2 ++
 4 files changed, 74 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index fd95b18e988b..0d9b2d6f9ebf 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -252,6 +252,10 @@ int mmc_add_card(struct mmc_card *card)
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_DEBUG_FS
+	mmc_add_card_debugfs(card);
+#endif
+
 	mmc_card_set_present(card);
 
 	return 0;
@@ -263,6 +267,10 @@ int mmc_add_card(struct mmc_card *card)
  */
 void mmc_remove_card(struct mmc_card *card)
 {
+#ifdef CONFIG_DEBUG_FS
+	mmc_remove_card_debugfs(card);
+#endif
+
 	if (mmc_card_present(card)) {
 		if (mmc_host_is_spi(card->host)) {
 			printk(KERN_INFO "%s: SPI card removed\n",
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 745da9881aa7..c819effa1032 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -56,5 +56,8 @@ extern int use_spi_crc;
 void mmc_add_host_debugfs(struct mmc_host *host);
 void mmc_remove_host_debugfs(struct mmc_host *host);
 
+void mmc_add_card_debugfs(struct mmc_card *card);
+void mmc_remove_card_debugfs(struct mmc_card *card);
+
 #endif
 
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 133c6e51f26b..1237bb4c722b 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -12,9 +12,11 @@
 #include <linux/seq_file.h>
 #include <linux/stat.h>
 
+#include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
 
 #include "core.h"
+#include "mmc_ops.h"
 
 /* The debugfs functions are optimized away when CONFIG_DEBUG_FS isn't set. */
 static int mmc_ios_show(struct seq_file *s, void *data)
@@ -162,3 +164,62 @@ void mmc_remove_host_debugfs(struct mmc_host *host)
 {
 	debugfs_remove_recursive(host->debugfs_root);
 }
+
+static int mmc_dbg_card_status_get(void *data, u64 *val)
+{
+	struct mmc_card	*card = data;
+	u32		status;
+	int		ret;
+
+	mmc_claim_host(card->host);
+
+	ret = mmc_send_status(data, &status);
+	if (!ret)
+		*val = status;
+
+	mmc_release_host(card->host);
+
+	return ret;
+}
+DEFINE_SIMPLE_ATTRIBUTE(mmc_dbg_card_status_fops, mmc_dbg_card_status_get,
+		NULL, "%08llx\n");
+
+void mmc_add_card_debugfs(struct mmc_card *card)
+{
+	struct mmc_host	*host = card->host;
+	struct dentry	*root;
+
+	if (!host->debugfs_root)
+		return;
+
+	root = debugfs_create_dir(mmc_card_id(card), host->debugfs_root);
+	if (IS_ERR(root))
+		/* Don't complain -- debugfs just isn't enabled */
+		return;
+	if (!root)
+		/* Complain -- debugfs is enabled, but it failed to
+		 * create the directory. */
+		goto err;
+
+	card->debugfs_root = root;
+
+	if (!debugfs_create_x32("state", S_IRUSR, root, &card->state))
+		goto err;
+
+	if (mmc_card_mmc(card) || mmc_card_sd(card))
+		if (!debugfs_create_file("status", S_IRUSR, root, card,
+					&mmc_dbg_card_status_fops))
+			goto err;
+
+	return;
+
+err:
+	debugfs_remove_recursive(root);
+	card->debugfs_root = NULL;
+	dev_err(&card->dev, "failed to initialize debugfs\n");
+}
+
+void mmc_remove_card_debugfs(struct mmc_card *card)
+{
+	debugfs_remove_recursive(card->debugfs_root);
+}
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 0d508ac17d64..ee6e822d5994 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -111,6 +111,8 @@ struct mmc_card {
 	unsigned		num_info;	/* number of info strings */
 	const char		**info;		/* info strings */
 	struct sdio_func_tuple	*tuples;	/* unknown common tuples */
+
+	struct dentry		*debugfs_root;
 };
 
 #define mmc_card_mmc(c)		((c)->type == MMC_TYPE_MMC)
-- 
cgit v1.2.3


From 04578dd330f1ec6bc9c4233833bee0d0ca73ff09 Mon Sep 17 00:00:00 2001
From: Karsten Keil <kkeil@suse.de>
Date: Sat, 26 Jul 2008 18:52:34 +0200
Subject: Define AF_ISDN and PF_ISDN

Define the address and protocol family value for mISDN.

Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 include/linux/socket.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 950af631e7fb..dc5086fe7736 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -189,7 +189,8 @@ struct ucred {
 #define AF_BLUETOOTH	31	/* Bluetooth sockets 		*/
 #define AF_IUCV		32	/* IUCV sockets			*/
 #define AF_RXRPC	33	/* RxRPC sockets 		*/
-#define AF_MAX		34	/* For now.. */
+#define AF_ISDN		34	/* mISDN sockets 		*/
+#define AF_MAX		35	/* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC	AF_UNSPEC
@@ -225,6 +226,7 @@ struct ucred {
 #define PF_BLUETOOTH	AF_BLUETOOTH
 #define PF_IUCV		AF_IUCV
 #define PF_RXRPC	AF_RXRPC
+#define PF_ISDN		AF_ISDN
 #define PF_MAX		AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
-- 
cgit v1.2.3


From 1b2b03f8e514e4f68e293846ba511a948b80243c Mon Sep 17 00:00:00 2001
From: Karsten Keil <kkeil@suse.de>
Date: Sun, 27 Jul 2008 01:54:58 +0200
Subject: Add mISDN core files

Add mISDN core files

Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 drivers/isdn/mISDN/Kconfig     |    9 +
 drivers/isdn/mISDN/Makefile    |    9 +
 drivers/isdn/mISDN/core.c      |  244 +++++
 drivers/isdn/mISDN/core.h      |   77 ++
 drivers/isdn/mISDN/fsm.c       |  183 ++++
 drivers/isdn/mISDN/fsm.h       |   67 ++
 drivers/isdn/mISDN/hwchannel.c |  365 +++++++
 drivers/isdn/mISDN/layer1.c    |  403 ++++++++
 drivers/isdn/mISDN/layer1.h    |   26 +
 drivers/isdn/mISDN/layer2.c    | 2216 ++++++++++++++++++++++++++++++++++++++++
 drivers/isdn/mISDN/layer2.h    |  140 +++
 drivers/isdn/mISDN/socket.c    |  781 ++++++++++++++
 drivers/isdn/mISDN/stack.c     |  674 ++++++++++++
 drivers/isdn/mISDN/tei.c       | 1340 ++++++++++++++++++++++++
 drivers/isdn/mISDN/timerdev.c  |  301 ++++++
 include/linux/mISDNhw.h        |  193 ++++
 include/linux/mISDNif.h        |  487 +++++++++
 17 files changed, 7515 insertions(+)
 create mode 100644 drivers/isdn/mISDN/Kconfig
 create mode 100644 drivers/isdn/mISDN/Makefile
 create mode 100644 drivers/isdn/mISDN/core.c
 create mode 100644 drivers/isdn/mISDN/core.h
 create mode 100644 drivers/isdn/mISDN/fsm.c
 create mode 100644 drivers/isdn/mISDN/fsm.h
 create mode 100644 drivers/isdn/mISDN/hwchannel.c
 create mode 100644 drivers/isdn/mISDN/layer1.c
 create mode 100644 drivers/isdn/mISDN/layer1.h
 create mode 100644 drivers/isdn/mISDN/layer2.c
 create mode 100644 drivers/isdn/mISDN/layer2.h
 create mode 100644 drivers/isdn/mISDN/socket.c
 create mode 100644 drivers/isdn/mISDN/stack.c
 create mode 100644 drivers/isdn/mISDN/tei.c
 create mode 100644 drivers/isdn/mISDN/timerdev.c
 create mode 100644 include/linux/mISDNhw.h
 create mode 100644 include/linux/mISDNif.h

(limited to 'include/linux')

diff --git a/drivers/isdn/mISDN/Kconfig b/drivers/isdn/mISDN/Kconfig
new file mode 100644
index 000000000000..231bd0d08316
--- /dev/null
+++ b/drivers/isdn/mISDN/Kconfig
@@ -0,0 +1,9 @@
+#
+# modularer ISDN driver
+#
+
+menuconfig MISDN
+	tristate "Modular ISDN driver"
+	help
+	  Enable support for the modular ISDN driver.
+
diff --git a/drivers/isdn/mISDN/Makefile b/drivers/isdn/mISDN/Makefile
new file mode 100644
index 000000000000..87c563d33612
--- /dev/null
+++ b/drivers/isdn/mISDN/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the modular ISDN driver
+#
+
+obj-$(CONFIG_MISDN) += mISDN_core.o
+
+# multi objects
+
+mISDN_core-objs := core.o fsm.o socket.o hwchannel.o stack.o layer1.o layer2.o tei.o timerdev.o
diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c
new file mode 100644
index 000000000000..33068177b7c9
--- /dev/null
+++ b/drivers/isdn/mISDN/core.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/mISDNif.h>
+#include "core.h"
+
+static u_int debug;
+
+MODULE_AUTHOR("Karsten Keil");
+MODULE_LICENSE("GPL");
+module_param(debug, uint, S_IRUGO | S_IWUSR);
+
+static LIST_HEAD(devices);
+DEFINE_RWLOCK(device_lock);
+static u64		device_ids;
+#define MAX_DEVICE_ID	63
+
+static LIST_HEAD(Bprotocols);
+DEFINE_RWLOCK(bp_lock);
+
+struct mISDNdevice
+*get_mdevice(u_int id)
+{
+	struct mISDNdevice	*dev;
+
+	read_lock(&device_lock);
+	list_for_each_entry(dev, &devices, D.list)
+		if (dev->id == id) {
+			read_unlock(&device_lock);
+			return dev;
+		}
+	read_unlock(&device_lock);
+	return NULL;
+}
+
+int
+get_mdevice_count(void)
+{
+	struct mISDNdevice	*dev;
+	int			cnt = 0;
+
+	read_lock(&device_lock);
+	list_for_each_entry(dev, &devices, D.list)
+		cnt++;
+	read_unlock(&device_lock);
+	return cnt;
+}
+
+static int
+get_free_devid(void)
+{
+	u_int	i;
+
+	for (i = 0; i <= MAX_DEVICE_ID; i++)
+		if (!test_and_set_bit(i, (u_long *)&device_ids))
+			return i;
+	return -1;
+}
+
+int
+mISDN_register_device(struct mISDNdevice *dev, char *name)
+{
+	u_long	flags;
+	int	err;
+
+	dev->id = get_free_devid();
+	if (dev->id < 0)
+		return -EBUSY;
+	if (name && name[0])
+		strcpy(dev->name, name);
+	else
+		sprintf(dev->name, "mISDN%d", dev->id);
+	if (debug & DEBUG_CORE)
+		printk(KERN_DEBUG "mISDN_register %s %d\n",
+			dev->name, dev->id);
+	err = create_stack(dev);
+	if (err)
+		return err;
+	write_lock_irqsave(&device_lock, flags);
+	list_add_tail(&dev->D.list, &devices);
+	write_unlock_irqrestore(&device_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(mISDN_register_device);
+
+void
+mISDN_unregister_device(struct mISDNdevice *dev) {
+	u_long	flags;
+
+	if (debug & DEBUG_CORE)
+		printk(KERN_DEBUG "mISDN_unregister %s %d\n",
+			dev->name, dev->id);
+	write_lock_irqsave(&device_lock, flags);
+	list_del(&dev->D.list);
+	write_unlock_irqrestore(&device_lock, flags);
+	test_and_clear_bit(dev->id, (u_long *)&device_ids);
+	delete_stack(dev);
+}
+EXPORT_SYMBOL(mISDN_unregister_device);
+
+u_int
+get_all_Bprotocols(void)
+{
+	struct Bprotocol	*bp;
+	u_int	m = 0;
+
+	read_lock(&bp_lock);
+	list_for_each_entry(bp, &Bprotocols, list)
+		m |= bp->Bprotocols;
+	read_unlock(&bp_lock);
+	return m;
+}
+
+struct Bprotocol *
+get_Bprotocol4mask(u_int m)
+{
+	struct Bprotocol	*bp;
+
+	read_lock(&bp_lock);
+	list_for_each_entry(bp, &Bprotocols, list)
+		if (bp->Bprotocols & m) {
+			read_unlock(&bp_lock);
+			return bp;
+		}
+	read_unlock(&bp_lock);
+	return NULL;
+}
+
+struct Bprotocol *
+get_Bprotocol4id(u_int id)
+{
+	u_int	m;
+
+	if (id < ISDN_P_B_START || id > 63) {
+		printk(KERN_WARNING "%s id not in range  %d\n",
+		    __func__, id);
+		return NULL;
+	}
+	m = 1 << (id & ISDN_P_B_MASK);
+	return get_Bprotocol4mask(m);
+}
+
+int
+mISDN_register_Bprotocol(struct Bprotocol *bp)
+{
+	u_long			flags;
+	struct Bprotocol	*old;
+
+	if (debug & DEBUG_CORE)
+		printk(KERN_DEBUG "%s: %s/%x\n", __func__,
+		    bp->name, bp->Bprotocols);
+	old = get_Bprotocol4mask(bp->Bprotocols);
+	if (old) {
+		printk(KERN_WARNING
+		    "register duplicate protocol old %s/%x new %s/%x\n",
+		    old->name, old->Bprotocols, bp->name, bp->Bprotocols);
+		return -EBUSY;
+	}
+	write_lock_irqsave(&bp_lock, flags);
+	list_add_tail(&bp->list, &Bprotocols);
+	write_unlock_irqrestore(&bp_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(mISDN_register_Bprotocol);
+
+void
+mISDN_unregister_Bprotocol(struct Bprotocol *bp)
+{
+	u_long	flags;
+
+	if (debug & DEBUG_CORE)
+		printk(KERN_DEBUG "%s: %s/%x\n", __func__, bp->name,
+			bp->Bprotocols);
+	write_lock_irqsave(&bp_lock, flags);
+	list_del(&bp->list);
+	write_unlock_irqrestore(&bp_lock, flags);
+}
+EXPORT_SYMBOL(mISDN_unregister_Bprotocol);
+
+int
+mISDNInit(void)
+{
+	int	err;
+
+	printk(KERN_INFO "Modular ISDN core version %d.%d.%d\n",
+		MISDN_MAJOR_VERSION, MISDN_MINOR_VERSION, MISDN_RELEASE);
+	mISDN_initstack(&debug);
+	err = mISDN_inittimer(&debug);
+	if (err)
+		goto error;
+	err = l1_init(&debug);
+	if (err) {
+		mISDN_timer_cleanup();
+		goto error;
+	}
+	err = Isdnl2_Init(&debug);
+	if (err) {
+		mISDN_timer_cleanup();
+		l1_cleanup();
+		goto error;
+	}
+	err = misdn_sock_init(&debug);
+	if (err) {
+		mISDN_timer_cleanup();
+		l1_cleanup();
+		Isdnl2_cleanup();
+	}
+error:
+	return err;
+}
+
+void mISDN_cleanup(void)
+{
+	misdn_sock_cleanup();
+	mISDN_timer_cleanup();
+	l1_cleanup();
+	Isdnl2_cleanup();
+
+	if (!list_empty(&devices))
+		printk(KERN_ERR "%s devices still registered\n", __func__);
+
+	if (!list_empty(&Bprotocols))
+		printk(KERN_ERR "%s Bprotocols still registered\n", __func__);
+	printk(KERN_DEBUG "mISDNcore unloaded\n");
+}
+
+module_init(mISDNInit);
+module_exit(mISDN_cleanup);
+
diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h
new file mode 100644
index 000000000000..7da7233b4c1a
--- /dev/null
+++ b/drivers/isdn/mISDN/core.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef mISDN_CORE_H
+#define mISDN_CORE_H
+
+extern struct mISDNdevice	*get_mdevice(u_int);
+extern int			get_mdevice_count(void);
+
+/* stack status flag */
+#define mISDN_STACK_ACTION_MASK		0x0000ffff
+#define mISDN_STACK_COMMAND_MASK	0x000f0000
+#define mISDN_STACK_STATUS_MASK		0xfff00000
+/* action bits 0-15 */
+#define mISDN_STACK_WORK	0
+#define mISDN_STACK_SETUP	1
+#define mISDN_STACK_CLEARING	2
+#define mISDN_STACK_RESTART	3
+#define mISDN_STACK_WAKEUP	4
+#define mISDN_STACK_ABORT	15
+/* command bits 16-19 */
+#define mISDN_STACK_STOPPED	16
+#define mISDN_STACK_INIT	17
+#define mISDN_STACK_THREADSTART	18
+/* status bits 20-31 */
+#define mISDN_STACK_BCHANNEL	20
+#define mISDN_STACK_ACTIVE      29
+#define mISDN_STACK_RUNNING     30
+#define mISDN_STACK_KILLED      31
+
+
+/* manager options */
+#define MGR_OPT_USER		24
+#define MGR_OPT_NETWORK		25
+
+extern int	connect_Bstack(struct mISDNdevice *, struct mISDNchannel *,
+			u_int, struct sockaddr_mISDN *);
+extern int	connect_layer1(struct mISDNdevice *, struct mISDNchannel *,
+			u_int, struct sockaddr_mISDN *);
+extern int	create_l2entity(struct mISDNdevice *, struct mISDNchannel *,
+			u_int, struct sockaddr_mISDN *);
+
+extern int	create_stack(struct mISDNdevice *);
+extern int	create_teimanager(struct mISDNdevice *);
+extern void	delete_teimanager(struct mISDNchannel *);
+extern void	delete_channel(struct mISDNchannel *);
+extern void	delete_stack(struct mISDNdevice *);
+extern void	mISDN_initstack(u_int *);
+extern int      misdn_sock_init(u_int *);
+extern void     misdn_sock_cleanup(void);
+extern void	add_layer2(struct mISDNchannel *, struct mISDNstack *);
+extern void	__add_layer2(struct mISDNchannel *, struct mISDNstack *);
+
+extern u_int		get_all_Bprotocols(void);
+struct Bprotocol	*get_Bprotocol4mask(u_int);
+struct Bprotocol	*get_Bprotocol4id(u_int);
+
+extern int	mISDN_inittimer(u_int *);
+extern void	mISDN_timer_cleanup(void);
+
+extern int	l1_init(u_int *);
+extern void	l1_cleanup(void);
+extern int 	Isdnl2_Init(u_int *);
+extern void	Isdnl2_cleanup(void);
+
+#endif
diff --git a/drivers/isdn/mISDN/fsm.c b/drivers/isdn/mISDN/fsm.c
new file mode 100644
index 000000000000..b5d6553f2dc8
--- /dev/null
+++ b/drivers/isdn/mISDN/fsm.c
@@ -0,0 +1,183 @@
+/*
+ * finite state machine implementation
+ *
+ * Author       Karsten Keil <kkeil@novell.com>
+ *
+ * Thanks to    Jan den Ouden
+ *              Fritz Elfert
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include "fsm.h"
+
+#define FSM_TIMER_DEBUG 0
+
+void
+mISDN_FsmNew(struct Fsm *fsm,
+       struct FsmNode *fnlist, int fncount)
+{
+	int i;
+
+	fsm->jumpmatrix = kzalloc(sizeof(FSMFNPTR) * fsm->state_count *
+		fsm->event_count, GFP_KERNEL);
+
+	for (i = 0; i < fncount; i++)
+		if ((fnlist[i].state >= fsm->state_count) ||
+		    (fnlist[i].event >= fsm->event_count)) {
+			printk(KERN_ERR
+			    "mISDN_FsmNew Error: %d st(%ld/%ld) ev(%ld/%ld)\n",
+			    i, (long)fnlist[i].state, (long)fsm->state_count,
+			    (long)fnlist[i].event, (long)fsm->event_count);
+		} else
+			fsm->jumpmatrix[fsm->state_count * fnlist[i].event +
+			    fnlist[i].state] = (FSMFNPTR) fnlist[i].routine;
+}
+EXPORT_SYMBOL(mISDN_FsmNew);
+
+void
+mISDN_FsmFree(struct Fsm *fsm)
+{
+	kfree((void *) fsm->jumpmatrix);
+}
+EXPORT_SYMBOL(mISDN_FsmFree);
+
+int
+mISDN_FsmEvent(struct FsmInst *fi, int event, void *arg)
+{
+	FSMFNPTR r;
+
+	if ((fi->state >= fi->fsm->state_count) ||
+	    (event >= fi->fsm->event_count)) {
+		printk(KERN_ERR
+		    "mISDN_FsmEvent Error st(%ld/%ld) ev(%d/%ld)\n",
+		    (long)fi->state, (long)fi->fsm->state_count, event,
+		    (long)fi->fsm->event_count);
+		return 1;
+	}
+	r = fi->fsm->jumpmatrix[fi->fsm->state_count * event + fi->state];
+	if (r) {
+		if (fi->debug)
+			fi->printdebug(fi, "State %s Event %s",
+				fi->fsm->strState[fi->state],
+				fi->fsm->strEvent[event]);
+		r(fi, event, arg);
+		return 0;
+	} else {
+		if (fi->debug)
+			fi->printdebug(fi, "State %s Event %s no action",
+				fi->fsm->strState[fi->state],
+				fi->fsm->strEvent[event]);
+		return 1;
+	}
+}
+EXPORT_SYMBOL(mISDN_FsmEvent);
+
+void
+mISDN_FsmChangeState(struct FsmInst *fi, int newstate)
+{
+	fi->state = newstate;
+	if (fi->debug)
+		fi->printdebug(fi, "ChangeState %s",
+			fi->fsm->strState[newstate]);
+}
+EXPORT_SYMBOL(mISDN_FsmChangeState);
+
+static void
+FsmExpireTimer(struct FsmTimer *ft)
+{
+#if FSM_TIMER_DEBUG
+	if (ft->fi->debug)
+		ft->fi->printdebug(ft->fi, "FsmExpireTimer %lx", (long) ft);
+#endif
+	mISDN_FsmEvent(ft->fi, ft->event, ft->arg);
+}
+
+void
+mISDN_FsmInitTimer(struct FsmInst *fi, struct FsmTimer *ft)
+{
+	ft->fi = fi;
+	ft->tl.function = (void *) FsmExpireTimer;
+	ft->tl.data = (long) ft;
+#if FSM_TIMER_DEBUG
+	if (ft->fi->debug)
+		ft->fi->printdebug(ft->fi, "mISDN_FsmInitTimer %lx", (long) ft);
+#endif
+	init_timer(&ft->tl);
+}
+EXPORT_SYMBOL(mISDN_FsmInitTimer);
+
+void
+mISDN_FsmDelTimer(struct FsmTimer *ft, int where)
+{
+#if FSM_TIMER_DEBUG
+	if (ft->fi->debug)
+		ft->fi->printdebug(ft->fi, "mISDN_FsmDelTimer %lx %d",
+			(long) ft, where);
+#endif
+	del_timer(&ft->tl);
+}
+EXPORT_SYMBOL(mISDN_FsmDelTimer);
+
+int
+mISDN_FsmAddTimer(struct FsmTimer *ft,
+	    int millisec, int event, void *arg, int where)
+{
+
+#if FSM_TIMER_DEBUG
+	if (ft->fi->debug)
+		ft->fi->printdebug(ft->fi, "mISDN_FsmAddTimer %lx %d %d",
+			(long) ft, millisec, where);
+#endif
+
+	if (timer_pending(&ft->tl)) {
+		if (ft->fi->debug) {
+			printk(KERN_WARNING
+				"mISDN_FsmAddTimer: timer already active!\n");
+			ft->fi->printdebug(ft->fi,
+				"mISDN_FsmAddTimer already active!");
+		}
+		return -1;
+	}
+	init_timer(&ft->tl);
+	ft->event = event;
+	ft->arg = arg;
+	ft->tl.expires = jiffies + (millisec * HZ) / 1000;
+	add_timer(&ft->tl);
+	return 0;
+}
+EXPORT_SYMBOL(mISDN_FsmAddTimer);
+
+void
+mISDN_FsmRestartTimer(struct FsmTimer *ft,
+	    int millisec, int event, void *arg, int where)
+{
+
+#if FSM_TIMER_DEBUG
+	if (ft->fi->debug)
+		ft->fi->printdebug(ft->fi, "mISDN_FsmRestartTimer %lx %d %d",
+			(long) ft, millisec, where);
+#endif
+
+	if (timer_pending(&ft->tl))
+		del_timer(&ft->tl);
+	init_timer(&ft->tl);
+	ft->event = event;
+	ft->arg = arg;
+	ft->tl.expires = jiffies + (millisec * HZ) / 1000;
+	add_timer(&ft->tl);
+}
+EXPORT_SYMBOL(mISDN_FsmRestartTimer);
diff --git a/drivers/isdn/mISDN/fsm.h b/drivers/isdn/mISDN/fsm.h
new file mode 100644
index 000000000000..928f5be192c1
--- /dev/null
+++ b/drivers/isdn/mISDN/fsm.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * Author       Karsten Keil <kkeil@novell.com>
+ *
+ * Thanks to    Jan den Ouden
+ *              Fritz Elfert
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _MISDN_FSM_H
+#define _MISDN_FSM_H
+
+#include <linux/timer.h>
+
+/* Statemachine */
+
+struct FsmInst;
+
+typedef void (*FSMFNPTR)(struct FsmInst *, int, void *);
+
+struct Fsm {
+	FSMFNPTR *jumpmatrix;
+	int state_count, event_count;
+	char **strEvent, **strState;
+};
+
+struct FsmInst {
+	struct Fsm *fsm;
+	int state;
+	int debug;
+	void *userdata;
+	int userint;
+	void (*printdebug) (struct FsmInst *, char *, ...);
+};
+
+struct FsmNode {
+	int state, event;
+	void (*routine) (struct FsmInst *, int, void *);
+};
+
+struct FsmTimer {
+	struct FsmInst *fi;
+	struct timer_list tl;
+	int event;
+	void *arg;
+};
+
+extern void mISDN_FsmNew(struct Fsm *, struct FsmNode *, int);
+extern void mISDN_FsmFree(struct Fsm *);
+extern int mISDN_FsmEvent(struct FsmInst *, int , void *);
+extern void mISDN_FsmChangeState(struct FsmInst *, int);
+extern void mISDN_FsmInitTimer(struct FsmInst *, struct FsmTimer *);
+extern int mISDN_FsmAddTimer(struct FsmTimer *, int, int, void *, int);
+extern void mISDN_FsmRestartTimer(struct FsmTimer *, int, int, void *, int);
+extern void mISDN_FsmDelTimer(struct FsmTimer *, int);
+
+#endif
diff --git a/drivers/isdn/mISDN/hwchannel.c b/drivers/isdn/mISDN/hwchannel.c
new file mode 100644
index 000000000000..2596fba4e614
--- /dev/null
+++ b/drivers/isdn/mISDN/hwchannel.c
@@ -0,0 +1,365 @@
+/*
+ *
+ * Author	Karsten Keil <kkeil@novell.com>
+ *
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/mISDNhw.h>
+
+static void
+dchannel_bh(struct work_struct *ws)
+{
+	struct dchannel	*dch  = container_of(ws, struct dchannel, workq);
+	struct sk_buff	*skb;
+	int		err;
+
+	if (test_and_clear_bit(FLG_RECVQUEUE, &dch->Flags)) {
+		while ((skb = skb_dequeue(&dch->rqueue))) {
+			if (likely(dch->dev.D.peer)) {
+				err = dch->dev.D.recv(dch->dev.D.peer, skb);
+				if (err)
+					dev_kfree_skb(skb);
+			} else
+				dev_kfree_skb(skb);
+		}
+	}
+	if (test_and_clear_bit(FLG_PHCHANGE, &dch->Flags)) {
+		if (dch->phfunc)
+			dch->phfunc(dch);
+	}
+}
+
+static void
+bchannel_bh(struct work_struct *ws)
+{
+	struct bchannel	*bch  = container_of(ws, struct bchannel, workq);
+	struct sk_buff	*skb;
+	int		err;
+
+	if (test_and_clear_bit(FLG_RECVQUEUE, &bch->Flags)) {
+		while ((skb = skb_dequeue(&bch->rqueue))) {
+			if (bch->rcount >= 64)
+				printk(KERN_WARNING "B-channel %p receive "
+					"queue if full, but empties...\n", bch);
+			bch->rcount--;
+			if (likely(bch->ch.peer)) {
+				err = bch->ch.recv(bch->ch.peer, skb);
+				if (err)
+					dev_kfree_skb(skb);
+			} else
+				dev_kfree_skb(skb);
+		}
+	}
+}
+
+int
+mISDN_initdchannel(struct dchannel *ch, int maxlen, void *phf)
+{
+	test_and_set_bit(FLG_HDLC, &ch->Flags);
+	ch->maxlen = maxlen;
+	ch->hw = NULL;
+	ch->rx_skb = NULL;
+	ch->tx_skb = NULL;
+	ch->tx_idx = 0;
+	ch->phfunc = phf;
+	skb_queue_head_init(&ch->squeue);
+	skb_queue_head_init(&ch->rqueue);
+	INIT_LIST_HEAD(&ch->dev.bchannels);
+	INIT_WORK(&ch->workq, dchannel_bh);
+	return 0;
+}
+EXPORT_SYMBOL(mISDN_initdchannel);
+
+int
+mISDN_initbchannel(struct bchannel *ch, int maxlen)
+{
+	ch->Flags = 0;
+	ch->maxlen = maxlen;
+	ch->hw = NULL;
+	ch->rx_skb = NULL;
+	ch->tx_skb = NULL;
+	ch->tx_idx = 0;
+	skb_queue_head_init(&ch->rqueue);
+	ch->rcount = 0;
+	ch->next_skb = NULL;
+	INIT_WORK(&ch->workq, bchannel_bh);
+	return 0;
+}
+EXPORT_SYMBOL(mISDN_initbchannel);
+
+int
+mISDN_freedchannel(struct dchannel *ch)
+{
+	if (ch->tx_skb) {
+		dev_kfree_skb(ch->tx_skb);
+		ch->tx_skb = NULL;
+	}
+	if (ch->rx_skb) {
+		dev_kfree_skb(ch->rx_skb);
+		ch->rx_skb = NULL;
+	}
+	skb_queue_purge(&ch->squeue);
+	skb_queue_purge(&ch->rqueue);
+	flush_scheduled_work();
+	return 0;
+}
+EXPORT_SYMBOL(mISDN_freedchannel);
+
+int
+mISDN_freebchannel(struct bchannel *ch)
+{
+	if (ch->tx_skb) {
+		dev_kfree_skb(ch->tx_skb);
+		ch->tx_skb = NULL;
+	}
+	if (ch->rx_skb) {
+		dev_kfree_skb(ch->rx_skb);
+		ch->rx_skb = NULL;
+	}
+	if (ch->next_skb) {
+		dev_kfree_skb(ch->next_skb);
+		ch->next_skb = NULL;
+	}
+	skb_queue_purge(&ch->rqueue);
+	ch->rcount = 0;
+	flush_scheduled_work();
+	return 0;
+}
+EXPORT_SYMBOL(mISDN_freebchannel);
+
+static inline u_int
+get_sapi_tei(u_char *p)
+{
+	u_int	sapi, tei;
+
+	sapi = *p >> 2;
+	tei = p[1] >> 1;
+	return sapi | (tei << 8);
+}
+
+void
+recv_Dchannel(struct dchannel *dch)
+{
+	struct mISDNhead *hh;
+
+	if (dch->rx_skb->len < 2) { /* at least 2 for sapi / tei */
+		dev_kfree_skb(dch->rx_skb);
+		dch->rx_skb = NULL;
+		return;
+	}
+	hh = mISDN_HEAD_P(dch->rx_skb);
+	hh->prim = PH_DATA_IND;
+	hh->id = get_sapi_tei(dch->rx_skb->data);
+	skb_queue_tail(&dch->rqueue, dch->rx_skb);
+	dch->rx_skb = NULL;
+	schedule_event(dch, FLG_RECVQUEUE);
+}
+EXPORT_SYMBOL(recv_Dchannel);
+
+void
+recv_Bchannel(struct bchannel *bch)
+{
+	struct mISDNhead *hh;
+
+	hh = mISDN_HEAD_P(bch->rx_skb);
+	hh->prim = PH_DATA_IND;
+	hh->id = MISDN_ID_ANY;
+	if (bch->rcount >= 64) {
+		dev_kfree_skb(bch->rx_skb);
+		bch->rx_skb = NULL;
+		return;
+	}
+	bch->rcount++;
+	skb_queue_tail(&bch->rqueue, bch->rx_skb);
+	bch->rx_skb = NULL;
+	schedule_event(bch, FLG_RECVQUEUE);
+}
+EXPORT_SYMBOL(recv_Bchannel);
+
+void
+recv_Dchannel_skb(struct dchannel *dch, struct sk_buff *skb)
+{
+	skb_queue_tail(&dch->rqueue, skb);
+	schedule_event(dch, FLG_RECVQUEUE);
+}
+EXPORT_SYMBOL(recv_Dchannel_skb);
+
+void
+recv_Bchannel_skb(struct bchannel *bch, struct sk_buff *skb)
+{
+	if (bch->rcount >= 64) {
+		dev_kfree_skb(skb);
+		return;
+	}
+	bch->rcount++;
+	skb_queue_tail(&bch->rqueue, skb);
+	schedule_event(bch, FLG_RECVQUEUE);
+}
+EXPORT_SYMBOL(recv_Bchannel_skb);
+
+static void
+confirm_Dsend(struct dchannel *dch)
+{
+	struct sk_buff	*skb;
+
+	skb = _alloc_mISDN_skb(PH_DATA_CNF, mISDN_HEAD_ID(dch->tx_skb),
+	    0, NULL, GFP_ATOMIC);
+	if (!skb) {
+		printk(KERN_ERR "%s: no skb id %x\n", __func__,
+		    mISDN_HEAD_ID(dch->tx_skb));
+		return;
+	}
+	skb_queue_tail(&dch->rqueue, skb);
+	schedule_event(dch, FLG_RECVQUEUE);
+}
+
+int
+get_next_dframe(struct dchannel *dch)
+{
+	dch->tx_idx = 0;
+	dch->tx_skb = skb_dequeue(&dch->squeue);
+	if (dch->tx_skb) {
+		confirm_Dsend(dch);
+		return 1;
+	}
+	dch->tx_skb = NULL;
+	test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
+	return 0;
+}
+EXPORT_SYMBOL(get_next_dframe);
+
+void
+confirm_Bsend(struct bchannel *bch)
+{
+	struct sk_buff	*skb;
+
+	if (bch->rcount >= 64)
+		return;
+	skb = _alloc_mISDN_skb(PH_DATA_CNF, mISDN_HEAD_ID(bch->tx_skb),
+	    0, NULL, GFP_ATOMIC);
+	if (!skb) {
+		printk(KERN_ERR "%s: no skb id %x\n", __func__,
+		    mISDN_HEAD_ID(bch->tx_skb));
+		return;
+	}
+	bch->rcount++;
+	skb_queue_tail(&bch->rqueue, skb);
+	schedule_event(bch, FLG_RECVQUEUE);
+}
+EXPORT_SYMBOL(confirm_Bsend);
+
+int
+get_next_bframe(struct bchannel *bch)
+{
+	bch->tx_idx = 0;
+	if (test_bit(FLG_TX_NEXT, &bch->Flags)) {
+		bch->tx_skb = bch->next_skb;
+		if (bch->tx_skb) {
+			bch->next_skb = NULL;
+			test_and_clear_bit(FLG_TX_NEXT, &bch->Flags);
+			if (!test_bit(FLG_TRANSPARENT, &bch->Flags))
+				confirm_Bsend(bch); /* not for transparent */
+			return 1;
+		} else {
+			test_and_clear_bit(FLG_TX_NEXT, &bch->Flags);
+			printk(KERN_WARNING "B TX_NEXT without skb\n");
+		}
+	}
+	bch->tx_skb = NULL;
+	test_and_clear_bit(FLG_TX_BUSY, &bch->Flags);
+	return 0;
+}
+EXPORT_SYMBOL(get_next_bframe);
+
+void
+queue_ch_frame(struct mISDNchannel *ch, u_int pr, int id, struct sk_buff *skb)
+{
+	struct mISDNhead *hh;
+
+	if (!skb) {
+		_queue_data(ch, pr, id, 0, NULL, GFP_ATOMIC);
+	} else {
+		if (ch->peer) {
+			hh = mISDN_HEAD_P(skb);
+			hh->prim = pr;
+			hh->id = id;
+			if (!ch->recv(ch->peer, skb))
+				return;
+		}
+		dev_kfree_skb(skb);
+	}
+}
+EXPORT_SYMBOL(queue_ch_frame);
+
+int
+dchannel_senddata(struct dchannel *ch, struct sk_buff *skb)
+{
+	/* check oversize */
+	if (skb->len <= 0) {
+		printk(KERN_WARNING "%s: skb too small\n", __func__);
+		return -EINVAL;
+	}
+	if (skb->len > ch->maxlen) {
+		printk(KERN_WARNING "%s: skb too large(%d/%d)\n",
+			__func__, skb->len, ch->maxlen);
+		return -EINVAL;
+	}
+	/* HW lock must be obtained */
+	if (test_and_set_bit(FLG_TX_BUSY, &ch->Flags)) {
+		skb_queue_tail(&ch->squeue, skb);
+		return 0;
+	} else {
+		/* write to fifo */
+		ch->tx_skb = skb;
+		ch->tx_idx = 0;
+		return 1;
+	}
+}
+EXPORT_SYMBOL(dchannel_senddata);
+
+int
+bchannel_senddata(struct bchannel *ch, struct sk_buff *skb)
+{
+
+	/* check oversize */
+	if (skb->len <= 0) {
+		printk(KERN_WARNING "%s: skb too small\n", __func__);
+		return -EINVAL;
+	}
+	if (skb->len > ch->maxlen) {
+		printk(KERN_WARNING "%s: skb too large(%d/%d)\n",
+			__func__, skb->len, ch->maxlen);
+		return -EINVAL;
+	}
+	/* HW lock must be obtained */
+	/* check for pending next_skb */
+	if (ch->next_skb) {
+		printk(KERN_WARNING
+		    "%s: next_skb exist ERROR (skb->len=%d next_skb->len=%d)\n",
+		    __func__, skb->len, ch->next_skb->len);
+		return -EBUSY;
+	}
+	if (test_and_set_bit(FLG_TX_BUSY, &ch->Flags)) {
+		test_and_set_bit(FLG_TX_NEXT, &ch->Flags);
+		ch->next_skb = skb;
+		return 0;
+	} else {
+		/* write to fifo */
+		ch->tx_skb = skb;
+		ch->tx_idx = 0;
+		return 1;
+	}
+}
+EXPORT_SYMBOL(bchannel_senddata);
diff --git a/drivers/isdn/mISDN/layer1.c b/drivers/isdn/mISDN/layer1.c
new file mode 100644
index 000000000000..fced1a2755f8
--- /dev/null
+++ b/drivers/isdn/mISDN/layer1.c
@@ -0,0 +1,403 @@
+/*
+ *
+ * Author	Karsten Keil <kkeil@novell.com>
+ *
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/mISDNhw.h>
+#include "layer1.h"
+#include "fsm.h"
+
+static int *debug;
+
+struct layer1 {
+	u_long			Flags;
+	struct FsmInst		l1m;
+	struct FsmTimer 	timer;
+	int			delay;
+	struct dchannel		*dch;
+	dchannel_l1callback	*dcb;
+};
+
+#define TIMER3_VALUE 7000
+
+static
+struct Fsm l1fsm_s = {NULL, 0, 0, NULL, NULL};
+
+enum {
+	ST_L1_F2,
+	ST_L1_F3,
+	ST_L1_F4,
+	ST_L1_F5,
+	ST_L1_F6,
+	ST_L1_F7,
+	ST_L1_F8,
+};
+
+#define L1S_STATE_COUNT (ST_L1_F8+1)
+
+static char *strL1SState[] =
+{
+	"ST_L1_F2",
+	"ST_L1_F3",
+	"ST_L1_F4",
+	"ST_L1_F5",
+	"ST_L1_F6",
+	"ST_L1_F7",
+	"ST_L1_F8",
+};
+
+enum {
+	EV_PH_ACTIVATE,
+	EV_PH_DEACTIVATE,
+	EV_RESET_IND,
+	EV_DEACT_CNF,
+	EV_DEACT_IND,
+	EV_POWER_UP,
+	EV_ANYSIG_IND,
+	EV_INFO2_IND,
+	EV_INFO4_IND,
+	EV_TIMER_DEACT,
+	EV_TIMER_ACT,
+	EV_TIMER3,
+};
+
+#define L1_EVENT_COUNT (EV_TIMER3 + 1)
+
+static char *strL1Event[] =
+{
+	"EV_PH_ACTIVATE",
+	"EV_PH_DEACTIVATE",
+	"EV_RESET_IND",
+	"EV_DEACT_CNF",
+	"EV_DEACT_IND",
+	"EV_POWER_UP",
+	"EV_ANYSIG_IND",
+	"EV_INFO2_IND",
+	"EV_INFO4_IND",
+	"EV_TIMER_DEACT",
+	"EV_TIMER_ACT",
+	"EV_TIMER3",
+};
+
+static void
+l1m_debug(struct FsmInst *fi, char *fmt, ...)
+{
+	struct layer1 *l1 = fi->userdata;
+	va_list va;
+
+	va_start(va, fmt);
+	printk(KERN_DEBUG "%s: ", l1->dch->dev.name);
+	vprintk(fmt, va);
+	printk("\n");
+	va_end(va);
+}
+
+static void
+l1_reset(struct FsmInst *fi, int event, void *arg)
+{
+	mISDN_FsmChangeState(fi, ST_L1_F3);
+}
+
+static void
+l1_deact_cnf(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer1 *l1 = fi->userdata;
+
+	mISDN_FsmChangeState(fi, ST_L1_F3);
+	if (test_bit(FLG_L1_ACTIVATING, &l1->Flags))
+		l1->dcb(l1->dch, HW_POWERUP_REQ);
+}
+
+static void
+l1_deact_req_s(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer1 *l1 = fi->userdata;
+
+	mISDN_FsmChangeState(fi, ST_L1_F3);
+	mISDN_FsmRestartTimer(&l1->timer, 550, EV_TIMER_DEACT, NULL, 2);
+	test_and_set_bit(FLG_L1_DEACTTIMER, &l1->Flags);
+}
+
+static void
+l1_power_up_s(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer1 *l1 = fi->userdata;
+
+	if (test_bit(FLG_L1_ACTIVATING, &l1->Flags)) {
+		mISDN_FsmChangeState(fi, ST_L1_F4);
+		l1->dcb(l1->dch, INFO3_P8);
+	} else
+		mISDN_FsmChangeState(fi, ST_L1_F3);
+}
+
+static void
+l1_go_F5(struct FsmInst *fi, int event, void *arg)
+{
+	mISDN_FsmChangeState(fi, ST_L1_F5);
+}
+
+static void
+l1_go_F8(struct FsmInst *fi, int event, void *arg)
+{
+	mISDN_FsmChangeState(fi, ST_L1_F8);
+}
+
+static void
+l1_info2_ind(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer1 *l1 = fi->userdata;
+
+	mISDN_FsmChangeState(fi, ST_L1_F6);
+	l1->dcb(l1->dch, INFO3_P8);
+}
+
+static void
+l1_info4_ind(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer1 *l1 = fi->userdata;
+
+	mISDN_FsmChangeState(fi, ST_L1_F7);
+	l1->dcb(l1->dch, INFO3_P8);
+	if (test_and_clear_bit(FLG_L1_DEACTTIMER, &l1->Flags))
+		mISDN_FsmDelTimer(&l1->timer, 4);
+	if (!test_bit(FLG_L1_ACTIVATED, &l1->Flags)) {
+		if (test_and_clear_bit(FLG_L1_T3RUN, &l1->Flags))
+			mISDN_FsmDelTimer(&l1->timer, 3);
+		mISDN_FsmRestartTimer(&l1->timer, 110, EV_TIMER_ACT, NULL, 2);
+		test_and_set_bit(FLG_L1_ACTTIMER, &l1->Flags);
+	}
+}
+
+static void
+l1_timer3(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer1 *l1 = fi->userdata;
+
+	test_and_clear_bit(FLG_L1_T3RUN, &l1->Flags);
+	if (test_and_clear_bit(FLG_L1_ACTIVATING, &l1->Flags)) {
+		if (test_and_clear_bit(FLG_L1_DBLOCKED, &l1->Flags))
+			l1->dcb(l1->dch, HW_D_NOBLOCKED);
+		l1->dcb(l1->dch, PH_DEACTIVATE_IND);
+	}
+	if (l1->l1m.state != ST_L1_F6) {
+		mISDN_FsmChangeState(fi, ST_L1_F3);
+		l1->dcb(l1->dch, HW_POWERUP_REQ);
+	}
+}
+
+static void
+l1_timer_act(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer1 *l1 = fi->userdata;
+
+	test_and_clear_bit(FLG_L1_ACTTIMER, &l1->Flags);
+	test_and_set_bit(FLG_L1_ACTIVATED, &l1->Flags);
+	l1->dcb(l1->dch, PH_ACTIVATE_IND);
+}
+
+static void
+l1_timer_deact(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer1 *l1 = fi->userdata;
+
+	test_and_clear_bit(FLG_L1_DEACTTIMER, &l1->Flags);
+	test_and_clear_bit(FLG_L1_ACTIVATED, &l1->Flags);
+	if (test_and_clear_bit(FLG_L1_DBLOCKED, &l1->Flags))
+		l1->dcb(l1->dch, HW_D_NOBLOCKED);
+	l1->dcb(l1->dch, PH_DEACTIVATE_IND);
+	l1->dcb(l1->dch, HW_DEACT_REQ);
+}
+
+static void
+l1_activate_s(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer1 *l1 = fi->userdata;
+
+	mISDN_FsmRestartTimer(&l1->timer, TIMER3_VALUE, EV_TIMER3, NULL, 2);
+	test_and_set_bit(FLG_L1_T3RUN, &l1->Flags);
+	l1->dcb(l1->dch, HW_RESET_REQ);
+}
+
+static void
+l1_activate_no(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer1 *l1 = fi->userdata;
+
+	if ((!test_bit(FLG_L1_DEACTTIMER, &l1->Flags)) &&
+	    (!test_bit(FLG_L1_T3RUN, &l1->Flags))) {
+		test_and_clear_bit(FLG_L1_ACTIVATING, &l1->Flags);
+		if (test_and_clear_bit(FLG_L1_DBLOCKED, &l1->Flags))
+			l1->dcb(l1->dch, HW_D_NOBLOCKED);
+		l1->dcb(l1->dch, PH_DEACTIVATE_IND);
+	}
+}
+
+static struct FsmNode L1SFnList[] =
+{
+	{ST_L1_F3, EV_PH_ACTIVATE, l1_activate_s},
+	{ST_L1_F6, EV_PH_ACTIVATE, l1_activate_no},
+	{ST_L1_F8, EV_PH_ACTIVATE, l1_activate_no},
+	{ST_L1_F3, EV_RESET_IND, l1_reset},
+	{ST_L1_F4, EV_RESET_IND, l1_reset},
+	{ST_L1_F5, EV_RESET_IND, l1_reset},
+	{ST_L1_F6, EV_RESET_IND, l1_reset},
+	{ST_L1_F7, EV_RESET_IND, l1_reset},
+	{ST_L1_F8, EV_RESET_IND, l1_reset},
+	{ST_L1_F3, EV_DEACT_CNF, l1_deact_cnf},
+	{ST_L1_F4, EV_DEACT_CNF, l1_deact_cnf},
+	{ST_L1_F5, EV_DEACT_CNF, l1_deact_cnf},
+	{ST_L1_F6, EV_DEACT_CNF, l1_deact_cnf},
+	{ST_L1_F7, EV_DEACT_CNF, l1_deact_cnf},
+	{ST_L1_F8, EV_DEACT_CNF, l1_deact_cnf},
+	{ST_L1_F6, EV_DEACT_IND, l1_deact_req_s},
+	{ST_L1_F7, EV_DEACT_IND, l1_deact_req_s},
+	{ST_L1_F8, EV_DEACT_IND, l1_deact_req_s},
+	{ST_L1_F3, EV_POWER_UP,  l1_power_up_s},
+	{ST_L1_F4, EV_ANYSIG_IND, l1_go_F5},
+	{ST_L1_F6, EV_ANYSIG_IND, l1_go_F8},
+	{ST_L1_F7, EV_ANYSIG_IND, l1_go_F8},
+	{ST_L1_F3, EV_INFO2_IND, l1_info2_ind},
+	{ST_L1_F4, EV_INFO2_IND, l1_info2_ind},
+	{ST_L1_F5, EV_INFO2_IND, l1_info2_ind},
+	{ST_L1_F7, EV_INFO2_IND, l1_info2_ind},
+	{ST_L1_F8, EV_INFO2_IND, l1_info2_ind},
+	{ST_L1_F3, EV_INFO4_IND, l1_info4_ind},
+	{ST_L1_F4, EV_INFO4_IND, l1_info4_ind},
+	{ST_L1_F5, EV_INFO4_IND, l1_info4_ind},
+	{ST_L1_F6, EV_INFO4_IND, l1_info4_ind},
+	{ST_L1_F8, EV_INFO4_IND, l1_info4_ind},
+	{ST_L1_F3, EV_TIMER3, l1_timer3},
+	{ST_L1_F4, EV_TIMER3, l1_timer3},
+	{ST_L1_F5, EV_TIMER3, l1_timer3},
+	{ST_L1_F6, EV_TIMER3, l1_timer3},
+	{ST_L1_F8, EV_TIMER3, l1_timer3},
+	{ST_L1_F7, EV_TIMER_ACT, l1_timer_act},
+	{ST_L1_F3, EV_TIMER_DEACT, l1_timer_deact},
+	{ST_L1_F4, EV_TIMER_DEACT, l1_timer_deact},
+	{ST_L1_F5, EV_TIMER_DEACT, l1_timer_deact},
+	{ST_L1_F6, EV_TIMER_DEACT, l1_timer_deact},
+	{ST_L1_F7, EV_TIMER_DEACT, l1_timer_deact},
+	{ST_L1_F8, EV_TIMER_DEACT, l1_timer_deact},
+};
+
+static void
+release_l1(struct layer1 *l1) {
+	mISDN_FsmDelTimer(&l1->timer, 0);
+	if (l1->dch)
+		l1->dch->l1 = NULL;
+	module_put(THIS_MODULE);
+	kfree(l1);
+}
+
+int
+l1_event(struct layer1 *l1, u_int event)
+{
+	int		err = 0;
+
+	if (!l1)
+		return -EINVAL;
+	switch (event) {
+	case HW_RESET_IND:
+		mISDN_FsmEvent(&l1->l1m, EV_RESET_IND, NULL);
+		break;
+	case HW_DEACT_IND:
+		mISDN_FsmEvent(&l1->l1m, EV_DEACT_IND, NULL);
+		break;
+	case HW_POWERUP_IND:
+		mISDN_FsmEvent(&l1->l1m, EV_POWER_UP, NULL);
+		break;
+	case HW_DEACT_CNF:
+		mISDN_FsmEvent(&l1->l1m, EV_DEACT_CNF, NULL);
+		break;
+	case ANYSIGNAL:
+		mISDN_FsmEvent(&l1->l1m, EV_ANYSIG_IND, NULL);
+		break;
+	case LOSTFRAMING:
+		mISDN_FsmEvent(&l1->l1m, EV_ANYSIG_IND, NULL);
+		break;
+	case INFO2:
+		mISDN_FsmEvent(&l1->l1m, EV_INFO2_IND, NULL);
+		break;
+	case INFO4_P8:
+		mISDN_FsmEvent(&l1->l1m, EV_INFO4_IND, NULL);
+		break;
+	case INFO4_P10:
+		mISDN_FsmEvent(&l1->l1m, EV_INFO4_IND, NULL);
+		break;
+	case PH_ACTIVATE_REQ:
+		if (test_bit(FLG_L1_ACTIVATED, &l1->Flags))
+			l1->dcb(l1->dch, PH_ACTIVATE_IND);
+		else {
+			test_and_set_bit(FLG_L1_ACTIVATING, &l1->Flags);
+			mISDN_FsmEvent(&l1->l1m, EV_PH_ACTIVATE, NULL);
+		}
+		break;
+	case CLOSE_CHANNEL:
+		release_l1(l1);
+		break;
+	default:
+		if (*debug & DEBUG_L1)
+			printk(KERN_DEBUG "%s %x unhandled\n",
+			    __func__, event);
+		err = -EINVAL;
+	}
+	return err;
+}
+EXPORT_SYMBOL(l1_event);
+
+int
+create_l1(struct dchannel *dch, dchannel_l1callback *dcb) {
+	struct layer1	*nl1;
+
+	nl1 = kzalloc(sizeof(struct layer1), GFP_ATOMIC);
+	if (!nl1) {
+		printk(KERN_ERR "kmalloc struct layer1 failed\n");
+		return -ENOMEM;
+	}
+	nl1->l1m.fsm = &l1fsm_s;
+	nl1->l1m.state = ST_L1_F3;
+	nl1->Flags = 0;
+	nl1->l1m.debug = *debug & DEBUG_L1_FSM;
+	nl1->l1m.userdata = nl1;
+	nl1->l1m.userint = 0;
+	nl1->l1m.printdebug = l1m_debug;
+	nl1->dch = dch;
+	nl1->dcb = dcb;
+	mISDN_FsmInitTimer(&nl1->l1m, &nl1->timer);
+	__module_get(THIS_MODULE);
+	dch->l1 = nl1;
+	return 0;
+}
+EXPORT_SYMBOL(create_l1);
+
+int
+l1_init(u_int *deb)
+{
+	debug = deb;
+	l1fsm_s.state_count = L1S_STATE_COUNT;
+	l1fsm_s.event_count = L1_EVENT_COUNT;
+	l1fsm_s.strEvent = strL1Event;
+	l1fsm_s.strState = strL1SState;
+	mISDN_FsmNew(&l1fsm_s, L1SFnList, ARRAY_SIZE(L1SFnList));
+	return 0;
+}
+
+void
+l1_cleanup(void)
+{
+	mISDN_FsmFree(&l1fsm_s);
+}
diff --git a/drivers/isdn/mISDN/layer1.h b/drivers/isdn/mISDN/layer1.h
new file mode 100644
index 000000000000..9c8125fd89af
--- /dev/null
+++ b/drivers/isdn/mISDN/layer1.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * Layer 1 defines
+ *
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define FLG_L1_ACTIVATING	1
+#define FLG_L1_ACTIVATED	2
+#define FLG_L1_DEACTTIMER	3
+#define FLG_L1_ACTTIMER		4
+#define FLG_L1_T3RUN		5
+#define FLG_L1_PULL_REQ		6
+#define FLG_L1_UINT		7
+#define FLG_L1_DBLOCKED		8
+
diff --git a/drivers/isdn/mISDN/layer2.c b/drivers/isdn/mISDN/layer2.c
new file mode 100644
index 000000000000..f5ad888ee71e
--- /dev/null
+++ b/drivers/isdn/mISDN/layer2.c
@@ -0,0 +1,2216 @@
+/*
+ *
+ * Author	Karsten Keil <kkeil@novell.com>
+ *
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "fsm.h"
+#include "layer2.h"
+
+static int *debug;
+
+static
+struct Fsm l2fsm = {NULL, 0, 0, NULL, NULL};
+
+static char *strL2State[] =
+{
+	"ST_L2_1",
+	"ST_L2_2",
+	"ST_L2_3",
+	"ST_L2_4",
+	"ST_L2_5",
+	"ST_L2_6",
+	"ST_L2_7",
+	"ST_L2_8",
+};
+
+enum {
+	EV_L2_UI,
+	EV_L2_SABME,
+	EV_L2_DISC,
+	EV_L2_DM,
+	EV_L2_UA,
+	EV_L2_FRMR,
+	EV_L2_SUPER,
+	EV_L2_I,
+	EV_L2_DL_DATA,
+	EV_L2_ACK_PULL,
+	EV_L2_DL_UNITDATA,
+	EV_L2_DL_ESTABLISH_REQ,
+	EV_L2_DL_RELEASE_REQ,
+	EV_L2_MDL_ASSIGN,
+	EV_L2_MDL_REMOVE,
+	EV_L2_MDL_ERROR,
+	EV_L1_DEACTIVATE,
+	EV_L2_T200,
+	EV_L2_T203,
+	EV_L2_SET_OWN_BUSY,
+	EV_L2_CLEAR_OWN_BUSY,
+	EV_L2_FRAME_ERROR,
+};
+
+#define L2_EVENT_COUNT (EV_L2_FRAME_ERROR+1)
+
+static char *strL2Event[] =
+{
+	"EV_L2_UI",
+	"EV_L2_SABME",
+	"EV_L2_DISC",
+	"EV_L2_DM",
+	"EV_L2_UA",
+	"EV_L2_FRMR",
+	"EV_L2_SUPER",
+	"EV_L2_I",
+	"EV_L2_DL_DATA",
+	"EV_L2_ACK_PULL",
+	"EV_L2_DL_UNITDATA",
+	"EV_L2_DL_ESTABLISH_REQ",
+	"EV_L2_DL_RELEASE_REQ",
+	"EV_L2_MDL_ASSIGN",
+	"EV_L2_MDL_REMOVE",
+	"EV_L2_MDL_ERROR",
+	"EV_L1_DEACTIVATE",
+	"EV_L2_T200",
+	"EV_L2_T203",
+	"EV_L2_SET_OWN_BUSY",
+	"EV_L2_CLEAR_OWN_BUSY",
+	"EV_L2_FRAME_ERROR",
+};
+
+static void
+l2m_debug(struct FsmInst *fi, char *fmt, ...)
+{
+	struct layer2 *l2 = fi->userdata;
+	va_list va;
+
+	if (!(*debug & DEBUG_L2_FSM))
+		return;
+	va_start(va, fmt);
+	printk(KERN_DEBUG "l2 (tei %d): ", l2->tei);
+	vprintk(fmt, va);
+	printk("\n");
+	va_end(va);
+}
+
+inline u_int
+l2headersize(struct layer2 *l2, int ui)
+{
+	return ((test_bit(FLG_MOD128, &l2->flag) && (!ui)) ? 2 : 1) +
+		(test_bit(FLG_LAPD, &l2->flag) ? 2 : 1);
+}
+
+inline u_int
+l2addrsize(struct layer2 *l2)
+{
+	return test_bit(FLG_LAPD, &l2->flag) ? 2 : 1;
+}
+
+static u_int
+l2_newid(struct layer2 *l2)
+{
+	u_int	id;
+
+	id = l2->next_id++;
+	if (id == 0x7fff)
+		l2->next_id = 1;
+	id <<= 16;
+	id |= l2->tei << 8;
+	id |= l2->sapi;
+	return id;
+}
+
+static void
+l2up(struct layer2 *l2, u_int prim, struct sk_buff *skb)
+{
+	int	err;
+
+	if (!l2->up)
+		return;
+	mISDN_HEAD_PRIM(skb) = prim;
+	mISDN_HEAD_ID(skb) = (l2->ch.nr << 16) | l2->ch.addr;
+	err = l2->up->send(l2->up, skb);
+	if (err) {
+		printk(KERN_WARNING "%s: err=%d\n", __func__, err);
+		dev_kfree_skb(skb);
+	}
+}
+
+static void
+l2up_create(struct layer2 *l2, u_int prim, int len, void *arg)
+{
+	struct sk_buff	*skb;
+	struct mISDNhead *hh;
+	int		err;
+
+	if (!l2->up)
+		return;
+	skb = mI_alloc_skb(len, GFP_ATOMIC);
+	if (!skb)
+		return;
+	hh = mISDN_HEAD_P(skb);
+	hh->prim = prim;
+	hh->id = (l2->ch.nr << 16) | l2->ch.addr;
+	if (len)
+		memcpy(skb_put(skb, len), arg, len);
+	err = l2->up->send(l2->up, skb);
+	if (err) {
+		printk(KERN_WARNING "%s: err=%d\n", __func__, err);
+		dev_kfree_skb(skb);
+	}
+}
+
+static int
+l2down_skb(struct layer2 *l2, struct sk_buff *skb) {
+	int ret;
+
+	ret = l2->ch.recv(l2->ch.peer, skb);
+	if (ret && (*debug & DEBUG_L2_RECV))
+		printk(KERN_DEBUG "l2down_skb: ret(%d)\n", ret);
+	return ret;
+}
+
+static int
+l2down_raw(struct layer2 *l2, struct sk_buff *skb)
+{
+	struct mISDNhead *hh = mISDN_HEAD_P(skb);
+
+	if (hh->prim == PH_DATA_REQ) {
+		if (test_and_set_bit(FLG_L1_NOTREADY, &l2->flag)) {
+			skb_queue_tail(&l2->down_queue, skb);
+			return 0;
+		}
+		l2->down_id = mISDN_HEAD_ID(skb);
+	}
+	return l2down_skb(l2, skb);
+}
+
+static int
+l2down(struct layer2 *l2, u_int prim, u_int id, struct sk_buff *skb)
+{
+	struct mISDNhead *hh = mISDN_HEAD_P(skb);
+
+	hh->prim = prim;
+	hh->id = id;
+	return l2down_raw(l2, skb);
+}
+
+static int
+l2down_create(struct layer2 *l2, u_int prim, u_int id, int len, void *arg)
+{
+	struct sk_buff	*skb;
+	int		err;
+	struct mISDNhead *hh;
+
+	skb = mI_alloc_skb(len, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+	hh = mISDN_HEAD_P(skb);
+	hh->prim = prim;
+	hh->id = id;
+	if (len)
+		memcpy(skb_put(skb, len), arg, len);
+	err = l2down_raw(l2, skb);
+	if (err)
+		dev_kfree_skb(skb);
+	return err;
+}
+
+static int
+ph_data_confirm(struct layer2 *l2, struct mISDNhead *hh, struct sk_buff *skb) {
+	struct sk_buff *nskb = skb;
+	int ret = -EAGAIN;
+
+	if (test_bit(FLG_L1_NOTREADY, &l2->flag)) {
+		if (hh->id == l2->down_id) {
+			nskb = skb_dequeue(&l2->down_queue);
+			if (nskb) {
+				l2->down_id = mISDN_HEAD_ID(nskb);
+				if (l2down_skb(l2, nskb)) {
+					dev_kfree_skb(nskb);
+					l2->down_id = MISDN_ID_NONE;
+				}
+			} else
+				l2->down_id = MISDN_ID_NONE;
+			if (ret) {
+				dev_kfree_skb(skb);
+				ret = 0;
+			}
+			if (l2->down_id == MISDN_ID_NONE) {
+				test_and_clear_bit(FLG_L1_NOTREADY, &l2->flag);
+				mISDN_FsmEvent(&l2->l2m, EV_L2_ACK_PULL, NULL);
+			}
+		}
+	}
+	if (!test_and_set_bit(FLG_L1_NOTREADY, &l2->flag)) {
+		nskb = skb_dequeue(&l2->down_queue);
+		if (nskb) {
+			l2->down_id = mISDN_HEAD_ID(nskb);
+			if (l2down_skb(l2, nskb)) {
+				dev_kfree_skb(nskb);
+				l2->down_id = MISDN_ID_NONE;
+				test_and_clear_bit(FLG_L1_NOTREADY, &l2->flag);
+			}
+		} else
+			test_and_clear_bit(FLG_L1_NOTREADY, &l2->flag);
+	}
+	return ret;
+}
+
+static int
+l2mgr(struct layer2 *l2, u_int prim, void *arg) {
+	long c = (long)arg;
+
+	printk(KERN_WARNING
+	    "l2mgr: addr:%x prim %x %c\n", l2->id, prim, (char)c);
+	if (test_bit(FLG_LAPD, &l2->flag) &&
+		!test_bit(FLG_FIXED_TEI, &l2->flag)) {
+		switch (c) {
+		case 'C':
+		case 'D':
+		case 'G':
+		case 'H':
+			l2_tei(l2, prim, (u_long)arg);
+			break;
+		}
+	}
+	return 0;
+}
+
+static void
+set_peer_busy(struct layer2 *l2) {
+	test_and_set_bit(FLG_PEER_BUSY, &l2->flag);
+	if (skb_queue_len(&l2->i_queue) || skb_queue_len(&l2->ui_queue))
+		test_and_set_bit(FLG_L2BLOCK, &l2->flag);
+}
+
+static void
+clear_peer_busy(struct layer2 *l2) {
+	if (test_and_clear_bit(FLG_PEER_BUSY, &l2->flag))
+		test_and_clear_bit(FLG_L2BLOCK, &l2->flag);
+}
+
+static void
+InitWin(struct layer2 *l2)
+{
+	int i;
+
+	for (i = 0; i < MAX_WINDOW; i++)
+		l2->windowar[i] = NULL;
+}
+
+static int
+freewin(struct layer2 *l2)
+{
+	int i, cnt = 0;
+
+	for (i = 0; i < MAX_WINDOW; i++) {
+		if (l2->windowar[i]) {
+			cnt++;
+			dev_kfree_skb(l2->windowar[i]);
+			l2->windowar[i] = NULL;
+		}
+	}
+	return cnt;
+}
+
+static void
+ReleaseWin(struct layer2 *l2)
+{
+	int cnt = freewin(l2);
+
+	if (cnt)
+		printk(KERN_WARNING
+		    "isdnl2 freed %d skbuffs in release\n", cnt);
+}
+
+inline unsigned int
+cansend(struct layer2 *l2)
+{
+	unsigned int p1;
+
+	if (test_bit(FLG_MOD128, &l2->flag))
+		p1 = (l2->vs - l2->va) % 128;
+	else
+		p1 = (l2->vs - l2->va) % 8;
+	return (p1 < l2->window) && !test_bit(FLG_PEER_BUSY, &l2->flag);
+}
+
+inline void
+clear_exception(struct layer2 *l2)
+{
+	test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
+	test_and_clear_bit(FLG_REJEXC, &l2->flag);
+	test_and_clear_bit(FLG_OWN_BUSY, &l2->flag);
+	clear_peer_busy(l2);
+}
+
+static int
+sethdraddr(struct layer2 *l2, u_char *header, int rsp)
+{
+	u_char *ptr = header;
+	int crbit = rsp;
+
+	if (test_bit(FLG_LAPD, &l2->flag)) {
+		if (test_bit(FLG_LAPD_NET, &l2->flag))
+			crbit = !crbit;
+		*ptr++ = (l2->sapi << 2) | (crbit ? 2 : 0);
+		*ptr++ = (l2->tei << 1) | 1;
+		return 2;
+	} else {
+		if (test_bit(FLG_ORIG, &l2->flag))
+			crbit = !crbit;
+		if (crbit)
+			*ptr++ = l2->addr.B;
+		else
+			*ptr++ = l2->addr.A;
+		return 1;
+	}
+}
+
+static inline void
+enqueue_super(struct layer2 *l2, struct sk_buff *skb)
+{
+	if (l2down(l2, PH_DATA_REQ, l2_newid(l2), skb))
+		dev_kfree_skb(skb);
+}
+
+static inline void
+enqueue_ui(struct layer2 *l2, struct sk_buff *skb)
+{
+	if (l2->tm)
+		l2_tei(l2, MDL_STATUS_UI_IND, 0);
+	if (l2down(l2, PH_DATA_REQ, l2_newid(l2), skb))
+		dev_kfree_skb(skb);
+}
+
+inline int
+IsUI(u_char *data)
+{
+	return (data[0] & 0xef) == UI;
+}
+
+inline int
+IsUA(u_char *data)
+{
+	return (data[0] & 0xef) == UA;
+}
+
+inline int
+IsDM(u_char *data)
+{
+	return (data[0] & 0xef) == DM;
+}
+
+inline int
+IsDISC(u_char *data)
+{
+	return (data[0] & 0xef) == DISC;
+}
+
+inline int
+IsRR(u_char *data, struct layer2 *l2)
+{
+	if (test_bit(FLG_MOD128, &l2->flag))
+		return data[0] == RR;
+	else
+		return (data[0] & 0xf) == 1;
+}
+
+inline int
+IsSFrame(u_char *data, struct layer2 *l2)
+{
+	register u_char d = *data;
+
+	if (!test_bit(FLG_MOD128, &l2->flag))
+		d &= 0xf;
+	return ((d & 0xf3) == 1) && ((d & 0x0c) != 0x0c);
+}
+
+inline int
+IsSABME(u_char *data, struct layer2 *l2)
+{
+	u_char d = data[0] & ~0x10;
+
+	return test_bit(FLG_MOD128, &l2->flag) ? d == SABME : d == SABM;
+}
+
+inline int
+IsREJ(u_char *data, struct layer2 *l2)
+{
+	return test_bit(FLG_MOD128, &l2->flag) ?
+		data[0] == REJ : (data[0] & 0xf) == REJ;
+}
+
+inline int
+IsFRMR(u_char *data)
+{
+	return (data[0] & 0xef) == FRMR;
+}
+
+inline int
+IsRNR(u_char *data, struct layer2 *l2)
+{
+	return test_bit(FLG_MOD128, &l2->flag) ?
+	    data[0] == RNR : (data[0] & 0xf) == RNR;
+}
+
+int
+iframe_error(struct layer2 *l2, struct sk_buff *skb)
+{
+	u_int	i;
+	int	rsp = *skb->data & 0x2;
+
+	i = l2addrsize(l2) + (test_bit(FLG_MOD128, &l2->flag) ? 2 : 1);
+	if (test_bit(FLG_ORIG, &l2->flag))
+		rsp = !rsp;
+	if (rsp)
+		return 'L';
+	if (skb->len < i)
+		return 'N';
+	if ((skb->len - i) > l2->maxlen)
+		return 'O';
+	return 0;
+}
+
+int
+super_error(struct layer2 *l2, struct sk_buff *skb)
+{
+	if (skb->len != l2addrsize(l2) +
+	    (test_bit(FLG_MOD128, &l2->flag) ? 2 : 1))
+		return 'N';
+	return 0;
+}
+
+int
+unnum_error(struct layer2 *l2, struct sk_buff *skb, int wantrsp)
+{
+	int rsp = (*skb->data & 0x2) >> 1;
+	if (test_bit(FLG_ORIG, &l2->flag))
+		rsp = !rsp;
+	if (rsp != wantrsp)
+		return 'L';
+	if (skb->len != l2addrsize(l2) + 1)
+		return 'N';
+	return 0;
+}
+
+int
+UI_error(struct layer2 *l2, struct sk_buff *skb)
+{
+	int rsp = *skb->data & 0x2;
+	if (test_bit(FLG_ORIG, &l2->flag))
+		rsp = !rsp;
+	if (rsp)
+		return 'L';
+	if (skb->len > l2->maxlen + l2addrsize(l2) + 1)
+		return 'O';
+	return 0;
+}
+
+int
+FRMR_error(struct layer2 *l2, struct sk_buff *skb)
+{
+	u_int	headers = l2addrsize(l2) + 1;
+	u_char	*datap = skb->data + headers;
+	int	rsp = *skb->data & 0x2;
+
+	if (test_bit(FLG_ORIG, &l2->flag))
+		rsp = !rsp;
+	if (!rsp)
+		return 'L';
+	if (test_bit(FLG_MOD128, &l2->flag)) {
+		if (skb->len < headers + 5)
+			return 'N';
+		else if (*debug & DEBUG_L2)
+			l2m_debug(&l2->l2m,
+			    "FRMR information %2x %2x %2x %2x %2x",
+			    datap[0], datap[1], datap[2], datap[3], datap[4]);
+	} else {
+		if (skb->len < headers + 3)
+			return 'N';
+		else if (*debug & DEBUG_L2)
+			l2m_debug(&l2->l2m,
+			    "FRMR information %2x %2x %2x",
+			    datap[0], datap[1], datap[2]);
+	}
+	return 0;
+}
+
+static unsigned int
+legalnr(struct layer2 *l2, unsigned int nr)
+{
+	if (test_bit(FLG_MOD128, &l2->flag))
+		return ((nr - l2->va) % 128) <= ((l2->vs - l2->va) % 128);
+	else
+		return ((nr - l2->va) % 8) <= ((l2->vs - l2->va) % 8);
+}
+
+static void
+setva(struct layer2 *l2, unsigned int nr)
+{
+	struct sk_buff	*skb;
+
+	while (l2->va != nr) {
+		l2->va++;
+		if (test_bit(FLG_MOD128, &l2->flag))
+			l2->va %= 128;
+		else
+			l2->va %= 8;
+		if (l2->windowar[l2->sow]) {
+			skb_trim(l2->windowar[l2->sow], 0);
+			skb_queue_tail(&l2->tmp_queue, l2->windowar[l2->sow]);
+			l2->windowar[l2->sow] = NULL;
+		}
+		l2->sow = (l2->sow + 1) % l2->window;
+	}
+	skb = skb_dequeue(&l2->tmp_queue);
+	while (skb) {
+		dev_kfree_skb(skb);
+		skb = skb_dequeue(&l2->tmp_queue);
+	}
+}
+
+static void
+send_uframe(struct layer2 *l2, struct sk_buff *skb, u_char cmd, u_char cr)
+{
+	u_char tmp[MAX_L2HEADER_LEN];
+	int i;
+
+	i = sethdraddr(l2, tmp, cr);
+	tmp[i++] = cmd;
+	if (skb)
+		skb_trim(skb, 0);
+	else {
+		skb = mI_alloc_skb(i, GFP_ATOMIC);
+		if (!skb) {
+			printk(KERN_WARNING "%s: can't alloc skbuff\n",
+				__func__);
+			return;
+		}
+	}
+	memcpy(skb_put(skb, i), tmp, i);
+	enqueue_super(l2, skb);
+}
+
+
+inline u_char
+get_PollFlag(struct layer2 *l2, struct sk_buff *skb)
+{
+	return skb->data[l2addrsize(l2)] & 0x10;
+}
+
+inline u_char
+get_PollFlagFree(struct layer2 *l2, struct sk_buff *skb)
+{
+	u_char PF;
+
+	PF = get_PollFlag(l2, skb);
+	dev_kfree_skb(skb);
+	return PF;
+}
+
+inline void
+start_t200(struct layer2 *l2, int i)
+{
+	mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, i);
+	test_and_set_bit(FLG_T200_RUN, &l2->flag);
+}
+
+inline void
+restart_t200(struct layer2 *l2, int i)
+{
+	mISDN_FsmRestartTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, i);
+	test_and_set_bit(FLG_T200_RUN, &l2->flag);
+}
+
+inline void
+stop_t200(struct layer2 *l2, int i)
+{
+	if (test_and_clear_bit(FLG_T200_RUN, &l2->flag))
+		mISDN_FsmDelTimer(&l2->t200, i);
+}
+
+inline void
+st5_dl_release_l2l3(struct layer2 *l2)
+{
+	int pr;
+
+	if (test_and_clear_bit(FLG_PEND_REL, &l2->flag))
+		pr = DL_RELEASE_CNF;
+	else
+		pr = DL_RELEASE_IND;
+	l2up_create(l2, pr, 0, NULL);
+}
+
+inline void
+lapb_dl_release_l2l3(struct layer2 *l2, int f)
+{
+	if (test_bit(FLG_LAPB, &l2->flag))
+		l2down_create(l2, PH_DEACTIVATE_REQ, l2_newid(l2), 0, NULL);
+	l2up_create(l2, f, 0, NULL);
+}
+
+static void
+establishlink(struct FsmInst *fi)
+{
+	struct layer2 *l2 = fi->userdata;
+	u_char cmd;
+
+	clear_exception(l2);
+	l2->rc = 0;
+	cmd = (test_bit(FLG_MOD128, &l2->flag) ? SABME : SABM) | 0x10;
+	send_uframe(l2, NULL, cmd, CMD);
+	mISDN_FsmDelTimer(&l2->t203, 1);
+	restart_t200(l2, 1);
+	test_and_clear_bit(FLG_PEND_REL, &l2->flag);
+	freewin(l2);
+	mISDN_FsmChangeState(fi, ST_L2_5);
+}
+
+static void
+l2_mdl_error_ua(struct FsmInst *fi, int event, void *arg)
+{
+	struct sk_buff *skb = arg;
+	struct layer2 *l2 = fi->userdata;
+
+	if (get_PollFlagFree(l2, skb))
+		l2mgr(l2, MDL_ERROR_IND, (void *) 'C');
+	else
+		l2mgr(l2, MDL_ERROR_IND, (void *) 'D');
+
+}
+
+static void
+l2_mdl_error_dm(struct FsmInst *fi, int event, void *arg)
+{
+	struct sk_buff *skb = arg;
+	struct layer2 *l2 = fi->userdata;
+
+	if (get_PollFlagFree(l2, skb))
+		l2mgr(l2, MDL_ERROR_IND, (void *) 'B');
+	else {
+		l2mgr(l2, MDL_ERROR_IND, (void *) 'E');
+		establishlink(fi);
+		test_and_clear_bit(FLG_L3_INIT, &l2->flag);
+	}
+}
+
+static void
+l2_st8_mdl_error_dm(struct FsmInst *fi, int event, void *arg)
+{
+	struct sk_buff *skb = arg;
+	struct layer2 *l2 = fi->userdata;
+
+	if (get_PollFlagFree(l2, skb))
+		l2mgr(l2, MDL_ERROR_IND, (void *) 'B');
+	else
+		l2mgr(l2, MDL_ERROR_IND, (void *) 'E');
+	establishlink(fi);
+	test_and_clear_bit(FLG_L3_INIT, &l2->flag);
+}
+
+static void
+l2_go_st3(struct FsmInst *fi, int event, void *arg)
+{
+	dev_kfree_skb((struct sk_buff *)arg);
+	mISDN_FsmChangeState(fi, ST_L2_3);
+}
+
+static void
+l2_mdl_assign(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2	*l2 = fi->userdata;
+
+	mISDN_FsmChangeState(fi, ST_L2_3);
+	dev_kfree_skb((struct sk_buff *)arg);
+	l2_tei(l2, MDL_ASSIGN_IND, 0);
+}
+
+static void
+l2_queue_ui_assign(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_queue_tail(&l2->ui_queue, skb);
+	mISDN_FsmChangeState(fi, ST_L2_2);
+	l2_tei(l2, MDL_ASSIGN_IND, 0);
+}
+
+static void
+l2_queue_ui(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_queue_tail(&l2->ui_queue, skb);
+}
+
+static void
+tx_ui(struct layer2 *l2)
+{
+	struct sk_buff *skb;
+	u_char header[MAX_L2HEADER_LEN];
+	int i;
+
+	i = sethdraddr(l2, header, CMD);
+	if (test_bit(FLG_LAPD_NET, &l2->flag))
+		header[1] = 0xff; /* tei 127 */
+	header[i++] = UI;
+	while ((skb = skb_dequeue(&l2->ui_queue))) {
+		memcpy(skb_push(skb, i), header, i);
+		enqueue_ui(l2, skb);
+	}
+}
+
+static void
+l2_send_ui(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_queue_tail(&l2->ui_queue, skb);
+	tx_ui(l2);
+}
+
+static void
+l2_got_ui(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_pull(skb, l2headersize(l2, 1));
+/*
+ *		in states 1-3 for broadcast
+ */
+
+	if (l2->tm)
+		l2_tei(l2, MDL_STATUS_UI_IND, 0);
+	l2up(l2, DL_UNITDATA_IND, skb);
+}
+
+static void
+l2_establish(struct FsmInst *fi, int event, void *arg)
+{
+	struct sk_buff *skb = arg;
+	struct layer2 *l2 = fi->userdata;
+
+	establishlink(fi);
+	test_and_set_bit(FLG_L3_INIT, &l2->flag);
+	dev_kfree_skb(skb);
+}
+
+static void
+l2_discard_i_setl3(struct FsmInst *fi, int event, void *arg)
+{
+	struct sk_buff *skb = arg;
+	struct layer2 *l2 = fi->userdata;
+
+	skb_queue_purge(&l2->i_queue);
+	test_and_set_bit(FLG_L3_INIT, &l2->flag);
+	test_and_clear_bit(FLG_PEND_REL, &l2->flag);
+	dev_kfree_skb(skb);
+}
+
+static void
+l2_l3_reestablish(struct FsmInst *fi, int event, void *arg)
+{
+	struct sk_buff *skb = arg;
+	struct layer2 *l2 = fi->userdata;
+
+	skb_queue_purge(&l2->i_queue);
+	establishlink(fi);
+	test_and_set_bit(FLG_L3_INIT, &l2->flag);
+	dev_kfree_skb(skb);
+}
+
+static void
+l2_release(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_trim(skb, 0);
+	l2up(l2, DL_RELEASE_CNF, skb);
+}
+
+static void
+l2_pend_rel(struct FsmInst *fi, int event, void *arg)
+{
+	struct sk_buff *skb = arg;
+	struct layer2 *l2 = fi->userdata;
+
+	test_and_set_bit(FLG_PEND_REL, &l2->flag);
+	dev_kfree_skb(skb);
+}
+
+static void
+l2_disconnect(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_queue_purge(&l2->i_queue);
+	freewin(l2);
+	mISDN_FsmChangeState(fi, ST_L2_6);
+	l2->rc = 0;
+	send_uframe(l2, NULL, DISC | 0x10, CMD);
+	mISDN_FsmDelTimer(&l2->t203, 1);
+	restart_t200(l2, 2);
+	if (skb)
+		dev_kfree_skb(skb);
+}
+
+static void
+l2_start_multi(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2	*l2 = fi->userdata;
+	struct sk_buff	*skb = arg;
+
+	l2->vs = 0;
+	l2->va = 0;
+	l2->vr = 0;
+	l2->sow = 0;
+	clear_exception(l2);
+	send_uframe(l2, NULL, UA | get_PollFlag(l2, skb), RSP);
+	mISDN_FsmChangeState(fi, ST_L2_7);
+	mISDN_FsmAddTimer(&l2->t203, l2->T203, EV_L2_T203, NULL, 3);
+	skb_trim(skb, 0);
+	l2up(l2, DL_ESTABLISH_IND, skb);
+	if (l2->tm)
+		l2_tei(l2, MDL_STATUS_UP_IND, 0);
+}
+
+static void
+l2_send_UA(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	send_uframe(l2, skb, UA | get_PollFlag(l2, skb), RSP);
+}
+
+static void
+l2_send_DM(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	send_uframe(l2, skb, DM | get_PollFlag(l2, skb), RSP);
+}
+
+static void
+l2_restart_multi(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2	*l2 = fi->userdata;
+	struct sk_buff	*skb = arg;
+	int		est = 0;
+
+	send_uframe(l2, skb, UA | get_PollFlag(l2, skb), RSP);
+
+	l2mgr(l2, MDL_ERROR_IND, (void *) 'F');
+
+	if (l2->vs != l2->va) {
+		skb_queue_purge(&l2->i_queue);
+		est = 1;
+	}
+
+	clear_exception(l2);
+	l2->vs = 0;
+	l2->va = 0;
+	l2->vr = 0;
+	l2->sow = 0;
+	mISDN_FsmChangeState(fi, ST_L2_7);
+	stop_t200(l2, 3);
+	mISDN_FsmRestartTimer(&l2->t203, l2->T203, EV_L2_T203, NULL, 3);
+
+	if (est)
+		l2up_create(l2, DL_ESTABLISH_IND, 0, NULL);
+/*		mISDN_queue_data(&l2->inst, l2->inst.id | MSG_BROADCAST,
+ *		    MGR_SHORTSTATUS | INDICATION, SSTATUS_L2_ESTABLISHED,
+ *		    0, NULL, 0);
+ */
+	if (skb_queue_len(&l2->i_queue) && cansend(l2))
+		mISDN_FsmEvent(fi, EV_L2_ACK_PULL, NULL);
+}
+
+static void
+l2_stop_multi(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2	*l2 = fi->userdata;
+	struct sk_buff	*skb = arg;
+
+	mISDN_FsmChangeState(fi, ST_L2_4);
+	mISDN_FsmDelTimer(&l2->t203, 3);
+	stop_t200(l2, 4);
+
+	send_uframe(l2, skb, UA | get_PollFlag(l2, skb), RSP);
+	skb_queue_purge(&l2->i_queue);
+	freewin(l2);
+	lapb_dl_release_l2l3(l2, DL_RELEASE_IND);
+	if (l2->tm)
+		l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
+}
+
+static void
+l2_connected(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2	*l2 = fi->userdata;
+	struct sk_buff	*skb = arg;
+	int pr = -1;
+
+	if (!get_PollFlag(l2, skb)) {
+		l2_mdl_error_ua(fi, event, arg);
+		return;
+	}
+	dev_kfree_skb(skb);
+	if (test_and_clear_bit(FLG_PEND_REL, &l2->flag))
+		l2_disconnect(fi, event, NULL);
+	if (test_and_clear_bit(FLG_L3_INIT, &l2->flag)) {
+		pr = DL_ESTABLISH_CNF;
+	} else if (l2->vs != l2->va) {
+		skb_queue_purge(&l2->i_queue);
+		pr = DL_ESTABLISH_IND;
+	}
+	stop_t200(l2, 5);
+	l2->vr = 0;
+	l2->vs = 0;
+	l2->va = 0;
+	l2->sow = 0;
+	mISDN_FsmChangeState(fi, ST_L2_7);
+	mISDN_FsmAddTimer(&l2->t203, l2->T203, EV_L2_T203, NULL, 4);
+	if (pr != -1)
+		l2up_create(l2, pr, 0, NULL);
+
+	if (skb_queue_len(&l2->i_queue) && cansend(l2))
+		mISDN_FsmEvent(fi, EV_L2_ACK_PULL, NULL);
+
+	if (l2->tm)
+		l2_tei(l2, MDL_STATUS_UP_IND, 0);
+}
+
+static void
+l2_released(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	if (!get_PollFlag(l2, skb)) {
+		l2_mdl_error_ua(fi, event, arg);
+		return;
+	}
+	dev_kfree_skb(skb);
+	stop_t200(l2, 6);
+	lapb_dl_release_l2l3(l2, DL_RELEASE_CNF);
+	mISDN_FsmChangeState(fi, ST_L2_4);
+	if (l2->tm)
+		l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
+}
+
+static void
+l2_reestablish(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	if (!get_PollFlagFree(l2, skb)) {
+		establishlink(fi);
+		test_and_set_bit(FLG_L3_INIT, &l2->flag);
+	}
+}
+
+static void
+l2_st5_dm_release(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	if (get_PollFlagFree(l2, skb)) {
+		stop_t200(l2, 7);
+		if (!test_bit(FLG_L3_INIT, &l2->flag))
+			skb_queue_purge(&l2->i_queue);
+		if (test_bit(FLG_LAPB, &l2->flag))
+			l2down_create(l2, PH_DEACTIVATE_REQ,
+				l2_newid(l2), 0, NULL);
+		st5_dl_release_l2l3(l2);
+		mISDN_FsmChangeState(fi, ST_L2_4);
+		if (l2->tm)
+			l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
+	}
+}
+
+static void
+l2_st6_dm_release(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	if (get_PollFlagFree(l2, skb)) {
+		stop_t200(l2, 8);
+		lapb_dl_release_l2l3(l2, DL_RELEASE_CNF);
+		mISDN_FsmChangeState(fi, ST_L2_4);
+		if (l2->tm)
+			l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
+	}
+}
+
+void
+enquiry_cr(struct layer2 *l2, u_char typ, u_char cr, u_char pf)
+{
+	struct sk_buff *skb;
+	u_char tmp[MAX_L2HEADER_LEN];
+	int i;
+
+	i = sethdraddr(l2, tmp, cr);
+	if (test_bit(FLG_MOD128, &l2->flag)) {
+		tmp[i++] = typ;
+		tmp[i++] = (l2->vr << 1) | (pf ? 1 : 0);
+	} else
+		tmp[i++] = (l2->vr << 5) | typ | (pf ? 0x10 : 0);
+	skb = mI_alloc_skb(i, GFP_ATOMIC);
+	if (!skb) {
+		printk(KERN_WARNING
+		    "isdnl2 can't alloc sbbuff for enquiry_cr\n");
+		return;
+	}
+	memcpy(skb_put(skb, i), tmp, i);
+	enqueue_super(l2, skb);
+}
+
+inline void
+enquiry_response(struct layer2 *l2)
+{
+	if (test_bit(FLG_OWN_BUSY, &l2->flag))
+		enquiry_cr(l2, RNR, RSP, 1);
+	else
+		enquiry_cr(l2, RR, RSP, 1);
+	test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
+}
+
+inline void
+transmit_enquiry(struct layer2 *l2)
+{
+	if (test_bit(FLG_OWN_BUSY, &l2->flag))
+		enquiry_cr(l2, RNR, CMD, 1);
+	else
+		enquiry_cr(l2, RR, CMD, 1);
+	test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
+	start_t200(l2, 9);
+}
+
+
+static void
+nrerrorrecovery(struct FsmInst *fi)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	l2mgr(l2, MDL_ERROR_IND, (void *) 'J');
+	establishlink(fi);
+	test_and_clear_bit(FLG_L3_INIT, &l2->flag);
+}
+
+static void
+invoke_retransmission(struct layer2 *l2, unsigned int nr)
+{
+	u_int	p1;
+
+	if (l2->vs != nr) {
+		while (l2->vs != nr) {
+			(l2->vs)--;
+			if (test_bit(FLG_MOD128, &l2->flag)) {
+				l2->vs %= 128;
+				p1 = (l2->vs - l2->va) % 128;
+			} else {
+				l2->vs %= 8;
+				p1 = (l2->vs - l2->va) % 8;
+			}
+			p1 = (p1 + l2->sow) % l2->window;
+			if (l2->windowar[p1])
+				skb_queue_head(&l2->i_queue, l2->windowar[p1]);
+			else
+				printk(KERN_WARNING
+				    "%s: windowar[%d] is NULL\n",
+				    __func__, p1);
+			l2->windowar[p1] = NULL;
+		}
+		mISDN_FsmEvent(&l2->l2m, EV_L2_ACK_PULL, NULL);
+	}
+}
+
+static void
+l2_st7_got_super(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+	int PollFlag, rsp, typ = RR;
+	unsigned int nr;
+
+	rsp = *skb->data & 0x2;
+	if (test_bit(FLG_ORIG, &l2->flag))
+		rsp = !rsp;
+
+	skb_pull(skb, l2addrsize(l2));
+	if (IsRNR(skb->data, l2)) {
+		set_peer_busy(l2);
+		typ = RNR;
+	} else
+		clear_peer_busy(l2);
+	if (IsREJ(skb->data, l2))
+		typ = REJ;
+
+	if (test_bit(FLG_MOD128, &l2->flag)) {
+		PollFlag = (skb->data[1] & 0x1) == 0x1;
+		nr = skb->data[1] >> 1;
+	} else {
+		PollFlag = (skb->data[0] & 0x10);
+		nr = (skb->data[0] >> 5) & 0x7;
+	}
+	dev_kfree_skb(skb);
+
+	if (PollFlag) {
+		if (rsp)
+			l2mgr(l2, MDL_ERROR_IND, (void *) 'A');
+		else
+			enquiry_response(l2);
+	}
+	if (legalnr(l2, nr)) {
+		if (typ == REJ) {
+			setva(l2, nr);
+			invoke_retransmission(l2, nr);
+			stop_t200(l2, 10);
+			if (mISDN_FsmAddTimer(&l2->t203, l2->T203,
+					EV_L2_T203, NULL, 6))
+				l2m_debug(&l2->l2m, "Restart T203 ST7 REJ");
+		} else if ((nr == l2->vs) && (typ == RR)) {
+			setva(l2, nr);
+			stop_t200(l2, 11);
+			mISDN_FsmRestartTimer(&l2->t203, l2->T203,
+					EV_L2_T203, NULL, 7);
+		} else if ((l2->va != nr) || (typ == RNR)) {
+			setva(l2, nr);
+			if (typ != RR)
+				mISDN_FsmDelTimer(&l2->t203, 9);
+			restart_t200(l2, 12);
+		}
+		if (skb_queue_len(&l2->i_queue) && (typ == RR))
+			mISDN_FsmEvent(fi, EV_L2_ACK_PULL, NULL);
+	} else
+		nrerrorrecovery(fi);
+}
+
+static void
+l2_feed_i_if_reest(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	if (!test_bit(FLG_L3_INIT, &l2->flag))
+		skb_queue_tail(&l2->i_queue, skb);
+	else
+		dev_kfree_skb(skb);
+}
+
+static void
+l2_feed_i_pull(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_queue_tail(&l2->i_queue, skb);
+	mISDN_FsmEvent(fi, EV_L2_ACK_PULL, NULL);
+}
+
+static void
+l2_feed_iqueue(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_queue_tail(&l2->i_queue, skb);
+}
+
+static void
+l2_got_iframe(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2	*l2 = fi->userdata;
+	struct sk_buff	*skb = arg;
+	int		PollFlag, i;
+	u_int		ns, nr;
+
+	i = l2addrsize(l2);
+	if (test_bit(FLG_MOD128, &l2->flag)) {
+		PollFlag = ((skb->data[i + 1] & 0x1) == 0x1);
+		ns = skb->data[i] >> 1;
+		nr = (skb->data[i + 1] >> 1) & 0x7f;
+	} else {
+		PollFlag = (skb->data[i] & 0x10);
+		ns = (skb->data[i] >> 1) & 0x7;
+		nr = (skb->data[i] >> 5) & 0x7;
+	}
+	if (test_bit(FLG_OWN_BUSY, &l2->flag)) {
+		dev_kfree_skb(skb);
+		if (PollFlag)
+			enquiry_response(l2);
+	} else {
+		if (l2->vr == ns) {
+			l2->vr++;
+			if (test_bit(FLG_MOD128, &l2->flag))
+				l2->vr %= 128;
+			else
+				l2->vr %= 8;
+			test_and_clear_bit(FLG_REJEXC, &l2->flag);
+			if (PollFlag)
+				enquiry_response(l2);
+			else
+				test_and_set_bit(FLG_ACK_PEND, &l2->flag);
+			skb_pull(skb, l2headersize(l2, 0));
+			l2up(l2, DL_DATA_IND, skb);
+		} else {
+			/* n(s)!=v(r) */
+			dev_kfree_skb(skb);
+			if (test_and_set_bit(FLG_REJEXC, &l2->flag)) {
+				if (PollFlag)
+					enquiry_response(l2);
+			} else {
+				enquiry_cr(l2, REJ, RSP, PollFlag);
+				test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
+			}
+		}
+	}
+	if (legalnr(l2, nr)) {
+		if (!test_bit(FLG_PEER_BUSY, &l2->flag) &&
+		    (fi->state == ST_L2_7)) {
+			if (nr == l2->vs) {
+				stop_t200(l2, 13);
+				mISDN_FsmRestartTimer(&l2->t203, l2->T203,
+						EV_L2_T203, NULL, 7);
+			} else if (nr != l2->va)
+				restart_t200(l2, 14);
+		}
+		setva(l2, nr);
+	} else {
+		nrerrorrecovery(fi);
+		return;
+	}
+	if (skb_queue_len(&l2->i_queue) && (fi->state == ST_L2_7))
+		mISDN_FsmEvent(fi, EV_L2_ACK_PULL, NULL);
+	if (test_and_clear_bit(FLG_ACK_PEND, &l2->flag))
+		enquiry_cr(l2, RR, RSP, 0);
+}
+
+static void
+l2_got_tei(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2	*l2 = fi->userdata;
+	u_int		info;
+
+	l2->tei = (signed char)(long)arg;
+	set_channel_address(&l2->ch, l2->sapi, l2->tei);
+	info = DL_INFO_L2_CONNECT;
+	l2up_create(l2, DL_INFORMATION_IND, sizeof(info), &info);
+	if (fi->state == ST_L2_3) {
+		establishlink(fi);
+		test_and_set_bit(FLG_L3_INIT, &l2->flag);
+	} else
+		mISDN_FsmChangeState(fi, ST_L2_4);
+	if (skb_queue_len(&l2->ui_queue))
+		tx_ui(l2);
+}
+
+static void
+l2_st5_tout_200(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	if (test_bit(FLG_LAPD, &l2->flag) &&
+		test_bit(FLG_DCHAN_BUSY, &l2->flag)) {
+		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, 9);
+	} else if (l2->rc == l2->N200) {
+		mISDN_FsmChangeState(fi, ST_L2_4);
+		test_and_clear_bit(FLG_T200_RUN, &l2->flag);
+		skb_queue_purge(&l2->i_queue);
+		l2mgr(l2, MDL_ERROR_IND, (void *) 'G');
+		if (test_bit(FLG_LAPB, &l2->flag))
+			l2down_create(l2, PH_DEACTIVATE_REQ,
+				l2_newid(l2), 0, NULL);
+		st5_dl_release_l2l3(l2);
+		if (l2->tm)
+			l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
+	} else {
+		l2->rc++;
+		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, 9);
+		send_uframe(l2, NULL, (test_bit(FLG_MOD128, &l2->flag) ?
+			SABME : SABM) | 0x10, CMD);
+	}
+}
+
+static void
+l2_st6_tout_200(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	if (test_bit(FLG_LAPD, &l2->flag) &&
+		test_bit(FLG_DCHAN_BUSY, &l2->flag)) {
+		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, 9);
+	} else if (l2->rc == l2->N200) {
+		mISDN_FsmChangeState(fi, ST_L2_4);
+		test_and_clear_bit(FLG_T200_RUN, &l2->flag);
+		l2mgr(l2, MDL_ERROR_IND, (void *) 'H');
+		lapb_dl_release_l2l3(l2, DL_RELEASE_CNF);
+		if (l2->tm)
+			l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
+	} else {
+		l2->rc++;
+		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200,
+			    NULL, 9);
+		send_uframe(l2, NULL, DISC | 0x10, CMD);
+	}
+}
+
+static void
+l2_st7_tout_200(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	if (test_bit(FLG_LAPD, &l2->flag) &&
+		test_bit(FLG_DCHAN_BUSY, &l2->flag)) {
+		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, 9);
+		return;
+	}
+	test_and_clear_bit(FLG_T200_RUN, &l2->flag);
+	l2->rc = 0;
+	mISDN_FsmChangeState(fi, ST_L2_8);
+	transmit_enquiry(l2);
+	l2->rc++;
+}
+
+static void
+l2_st8_tout_200(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	if (test_bit(FLG_LAPD, &l2->flag) &&
+		test_bit(FLG_DCHAN_BUSY, &l2->flag)) {
+		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, 9);
+		return;
+	}
+	test_and_clear_bit(FLG_T200_RUN, &l2->flag);
+	if (l2->rc == l2->N200) {
+		l2mgr(l2, MDL_ERROR_IND, (void *) 'I');
+		establishlink(fi);
+		test_and_clear_bit(FLG_L3_INIT, &l2->flag);
+	} else {
+		transmit_enquiry(l2);
+		l2->rc++;
+	}
+}
+
+static void
+l2_st7_tout_203(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	if (test_bit(FLG_LAPD, &l2->flag) &&
+		test_bit(FLG_DCHAN_BUSY, &l2->flag)) {
+		mISDN_FsmAddTimer(&l2->t203, l2->T203, EV_L2_T203, NULL, 9);
+		return;
+	}
+	mISDN_FsmChangeState(fi, ST_L2_8);
+	transmit_enquiry(l2);
+	l2->rc = 0;
+}
+
+static void
+l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2	*l2 = fi->userdata;
+	struct sk_buff	*skb, *nskb, *oskb;
+	u_char		header[MAX_L2HEADER_LEN];
+	u_int		i, p1;
+
+	if (!cansend(l2))
+		return;
+
+	skb = skb_dequeue(&l2->i_queue);
+	if (!skb)
+		return;
+
+	if (test_bit(FLG_MOD128, &l2->flag))
+		p1 = (l2->vs - l2->va) % 128;
+	else
+		p1 = (l2->vs - l2->va) % 8;
+	p1 = (p1 + l2->sow) % l2->window;
+	if (l2->windowar[p1]) {
+		printk(KERN_WARNING "isdnl2 try overwrite ack queue entry %d\n",
+		    p1);
+		dev_kfree_skb(l2->windowar[p1]);
+	}
+	l2->windowar[p1] = skb;
+	i = sethdraddr(l2, header, CMD);
+	if (test_bit(FLG_MOD128, &l2->flag)) {
+		header[i++] = l2->vs << 1;
+		header[i++] = l2->vr << 1;
+		l2->vs = (l2->vs + 1) % 128;
+	} else {
+		header[i++] = (l2->vr << 5) | (l2->vs << 1);
+		l2->vs = (l2->vs + 1) % 8;
+	}
+
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	p1 = skb_headroom(nskb);
+	if (p1 >= i)
+		memcpy(skb_push(nskb, i), header, i);
+	else {
+		printk(KERN_WARNING
+		    "isdnl2 pull_iqueue skb header(%d/%d) too short\n", i, p1);
+		oskb = nskb;
+		nskb = mI_alloc_skb(oskb->len + i, GFP_ATOMIC);
+		if (!nskb) {
+			dev_kfree_skb(oskb);
+			printk(KERN_WARNING "%s: no skb mem\n", __func__);
+			return;
+		}
+		memcpy(skb_put(nskb, i), header, i);
+		memcpy(skb_put(nskb, oskb->len), oskb->data, oskb->len);
+		dev_kfree_skb(oskb);
+	}
+	l2down(l2, PH_DATA_REQ, l2_newid(l2), nskb);
+	test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
+	if (!test_and_set_bit(FLG_T200_RUN, &l2->flag)) {
+		mISDN_FsmDelTimer(&l2->t203, 13);
+		mISDN_FsmAddTimer(&l2->t200, l2->T200, EV_L2_T200, NULL, 11);
+	}
+}
+
+static void
+l2_st8_got_super(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+	int PollFlag, rsp, rnr = 0;
+	unsigned int nr;
+
+	rsp = *skb->data & 0x2;
+	if (test_bit(FLG_ORIG, &l2->flag))
+		rsp = !rsp;
+
+	skb_pull(skb, l2addrsize(l2));
+
+	if (IsRNR(skb->data, l2)) {
+		set_peer_busy(l2);
+		rnr = 1;
+	} else
+		clear_peer_busy(l2);
+
+	if (test_bit(FLG_MOD128, &l2->flag)) {
+		PollFlag = (skb->data[1] & 0x1) == 0x1;
+		nr = skb->data[1] >> 1;
+	} else {
+		PollFlag = (skb->data[0] & 0x10);
+		nr = (skb->data[0] >> 5) & 0x7;
+	}
+	dev_kfree_skb(skb);
+	if (rsp && PollFlag) {
+		if (legalnr(l2, nr)) {
+			if (rnr) {
+				restart_t200(l2, 15);
+			} else {
+				stop_t200(l2, 16);
+				mISDN_FsmAddTimer(&l2->t203, l2->T203,
+					    EV_L2_T203, NULL, 5);
+				setva(l2, nr);
+			}
+			invoke_retransmission(l2, nr);
+			mISDN_FsmChangeState(fi, ST_L2_7);
+			if (skb_queue_len(&l2->i_queue) && cansend(l2))
+				mISDN_FsmEvent(fi, EV_L2_ACK_PULL, NULL);
+		} else
+			nrerrorrecovery(fi);
+	} else {
+		if (!rsp && PollFlag)
+			enquiry_response(l2);
+		if (legalnr(l2, nr))
+			setva(l2, nr);
+		else
+			nrerrorrecovery(fi);
+	}
+}
+
+static void
+l2_got_FRMR(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_pull(skb, l2addrsize(l2) + 1);
+
+	if (!(skb->data[0] & 1) || ((skb->data[0] & 3) == 1) || /* I or S */
+	    (IsUA(skb->data) && (fi->state == ST_L2_7))) {
+		l2mgr(l2, MDL_ERROR_IND, (void *) 'K');
+		establishlink(fi);
+		test_and_clear_bit(FLG_L3_INIT, &l2->flag);
+	}
+	dev_kfree_skb(skb);
+}
+
+static void
+l2_st24_tei_remove(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	skb_queue_purge(&l2->ui_queue);
+	l2->tei = GROUP_TEI;
+	mISDN_FsmChangeState(fi, ST_L2_1);
+}
+
+static void
+l2_st3_tei_remove(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	skb_queue_purge(&l2->ui_queue);
+	l2->tei = GROUP_TEI;
+	l2up_create(l2, DL_RELEASE_IND, 0, NULL);
+	mISDN_FsmChangeState(fi, ST_L2_1);
+}
+
+static void
+l2_st5_tei_remove(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	skb_queue_purge(&l2->i_queue);
+	skb_queue_purge(&l2->ui_queue);
+	freewin(l2);
+	l2->tei = GROUP_TEI;
+	stop_t200(l2, 17);
+	st5_dl_release_l2l3(l2);
+	mISDN_FsmChangeState(fi, ST_L2_1);
+}
+
+static void
+l2_st6_tei_remove(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	skb_queue_purge(&l2->ui_queue);
+	l2->tei = GROUP_TEI;
+	stop_t200(l2, 18);
+	l2up_create(l2, DL_RELEASE_IND, 0, NULL);
+	mISDN_FsmChangeState(fi, ST_L2_1);
+}
+
+static void
+l2_tei_remove(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	skb_queue_purge(&l2->i_queue);
+	skb_queue_purge(&l2->ui_queue);
+	freewin(l2);
+	l2->tei = GROUP_TEI;
+	stop_t200(l2, 17);
+	mISDN_FsmDelTimer(&l2->t203, 19);
+	l2up_create(l2, DL_RELEASE_IND, 0, NULL);
+/*	mISDN_queue_data(&l2->inst, l2->inst.id | MSG_BROADCAST,
+ *		MGR_SHORTSTATUS_IND, SSTATUS_L2_RELEASED,
+ *		0, NULL, 0);
+ */
+	mISDN_FsmChangeState(fi, ST_L2_1);
+}
+
+static void
+l2_st14_persistant_da(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_queue_purge(&l2->i_queue);
+	skb_queue_purge(&l2->ui_queue);
+	if (test_and_clear_bit(FLG_ESTAB_PEND, &l2->flag))
+		l2up(l2, DL_RELEASE_IND, skb);
+	else
+		dev_kfree_skb(skb);
+}
+
+static void
+l2_st5_persistant_da(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_queue_purge(&l2->i_queue);
+	skb_queue_purge(&l2->ui_queue);
+	freewin(l2);
+	stop_t200(l2, 19);
+	st5_dl_release_l2l3(l2);
+	mISDN_FsmChangeState(fi, ST_L2_4);
+	if (l2->tm)
+		l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
+	dev_kfree_skb(skb);
+}
+
+static void
+l2_st6_persistant_da(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_queue_purge(&l2->ui_queue);
+	stop_t200(l2, 20);
+	l2up(l2, DL_RELEASE_CNF, skb);
+	mISDN_FsmChangeState(fi, ST_L2_4);
+	if (l2->tm)
+		l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
+}
+
+static void
+l2_persistant_da(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	skb_queue_purge(&l2->i_queue);
+	skb_queue_purge(&l2->ui_queue);
+	freewin(l2);
+	stop_t200(l2, 19);
+	mISDN_FsmDelTimer(&l2->t203, 19);
+	l2up(l2, DL_RELEASE_IND, skb);
+	mISDN_FsmChangeState(fi, ST_L2_4);
+	if (l2->tm)
+		l2_tei(l2, MDL_STATUS_DOWN_IND, 0);
+}
+
+static void
+l2_set_own_busy(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	if (!test_and_set_bit(FLG_OWN_BUSY, &l2->flag)) {
+		enquiry_cr(l2, RNR, RSP, 0);
+		test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+}
+
+static void
+l2_clear_own_busy(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+	struct sk_buff *skb = arg;
+
+	if (!test_and_clear_bit(FLG_OWN_BUSY, &l2->flag)) {
+		enquiry_cr(l2, RR, RSP, 0);
+		test_and_clear_bit(FLG_ACK_PEND, &l2->flag);
+	}
+	if (skb)
+		dev_kfree_skb(skb);
+}
+
+static void
+l2_frame_error(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	l2mgr(l2, MDL_ERROR_IND, arg);
+}
+
+static void
+l2_frame_error_reest(struct FsmInst *fi, int event, void *arg)
+{
+	struct layer2 *l2 = fi->userdata;
+
+	l2mgr(l2, MDL_ERROR_IND, arg);
+	establishlink(fi);
+	test_and_clear_bit(FLG_L3_INIT, &l2->flag);
+}
+
+static struct FsmNode L2FnList[] =
+{
+	{ST_L2_1, EV_L2_DL_ESTABLISH_REQ, l2_mdl_assign},
+	{ST_L2_2, EV_L2_DL_ESTABLISH_REQ, l2_go_st3},
+	{ST_L2_4, EV_L2_DL_ESTABLISH_REQ, l2_establish},
+	{ST_L2_5, EV_L2_DL_ESTABLISH_REQ, l2_discard_i_setl3},
+	{ST_L2_7, EV_L2_DL_ESTABLISH_REQ, l2_l3_reestablish},
+	{ST_L2_8, EV_L2_DL_ESTABLISH_REQ, l2_l3_reestablish},
+	{ST_L2_4, EV_L2_DL_RELEASE_REQ, l2_release},
+	{ST_L2_5, EV_L2_DL_RELEASE_REQ, l2_pend_rel},
+	{ST_L2_7, EV_L2_DL_RELEASE_REQ, l2_disconnect},
+	{ST_L2_8, EV_L2_DL_RELEASE_REQ, l2_disconnect},
+	{ST_L2_5, EV_L2_DL_DATA, l2_feed_i_if_reest},
+	{ST_L2_7, EV_L2_DL_DATA, l2_feed_i_pull},
+	{ST_L2_8, EV_L2_DL_DATA, l2_feed_iqueue},
+	{ST_L2_1, EV_L2_DL_UNITDATA, l2_queue_ui_assign},
+	{ST_L2_2, EV_L2_DL_UNITDATA, l2_queue_ui},
+	{ST_L2_3, EV_L2_DL_UNITDATA, l2_queue_ui},
+	{ST_L2_4, EV_L2_DL_UNITDATA, l2_send_ui},
+	{ST_L2_5, EV_L2_DL_UNITDATA, l2_send_ui},
+	{ST_L2_6, EV_L2_DL_UNITDATA, l2_send_ui},
+	{ST_L2_7, EV_L2_DL_UNITDATA, l2_send_ui},
+	{ST_L2_8, EV_L2_DL_UNITDATA, l2_send_ui},
+	{ST_L2_1, EV_L2_MDL_ASSIGN, l2_got_tei},
+	{ST_L2_2, EV_L2_MDL_ASSIGN, l2_got_tei},
+	{ST_L2_3, EV_L2_MDL_ASSIGN, l2_got_tei},
+	{ST_L2_2, EV_L2_MDL_ERROR, l2_st24_tei_remove},
+	{ST_L2_3, EV_L2_MDL_ERROR, l2_st3_tei_remove},
+	{ST_L2_4, EV_L2_MDL_REMOVE, l2_st24_tei_remove},
+	{ST_L2_5, EV_L2_MDL_REMOVE, l2_st5_tei_remove},
+	{ST_L2_6, EV_L2_MDL_REMOVE, l2_st6_tei_remove},
+	{ST_L2_7, EV_L2_MDL_REMOVE, l2_tei_remove},
+	{ST_L2_8, EV_L2_MDL_REMOVE, l2_tei_remove},
+	{ST_L2_4, EV_L2_SABME, l2_start_multi},
+	{ST_L2_5, EV_L2_SABME, l2_send_UA},
+	{ST_L2_6, EV_L2_SABME, l2_send_DM},
+	{ST_L2_7, EV_L2_SABME, l2_restart_multi},
+	{ST_L2_8, EV_L2_SABME, l2_restart_multi},
+	{ST_L2_4, EV_L2_DISC, l2_send_DM},
+	{ST_L2_5, EV_L2_DISC, l2_send_DM},
+	{ST_L2_6, EV_L2_DISC, l2_send_UA},
+	{ST_L2_7, EV_L2_DISC, l2_stop_multi},
+	{ST_L2_8, EV_L2_DISC, l2_stop_multi},
+	{ST_L2_4, EV_L2_UA, l2_mdl_error_ua},
+	{ST_L2_5, EV_L2_UA, l2_connected},
+	{ST_L2_6, EV_L2_UA, l2_released},
+	{ST_L2_7, EV_L2_UA, l2_mdl_error_ua},
+	{ST_L2_8, EV_L2_UA, l2_mdl_error_ua},
+	{ST_L2_4, EV_L2_DM, l2_reestablish},
+	{ST_L2_5, EV_L2_DM, l2_st5_dm_release},
+	{ST_L2_6, EV_L2_DM, l2_st6_dm_release},
+	{ST_L2_7, EV_L2_DM, l2_mdl_error_dm},
+	{ST_L2_8, EV_L2_DM, l2_st8_mdl_error_dm},
+	{ST_L2_1, EV_L2_UI, l2_got_ui},
+	{ST_L2_2, EV_L2_UI, l2_got_ui},
+	{ST_L2_3, EV_L2_UI, l2_got_ui},
+	{ST_L2_4, EV_L2_UI, l2_got_ui},
+	{ST_L2_5, EV_L2_UI, l2_got_ui},
+	{ST_L2_6, EV_L2_UI, l2_got_ui},
+	{ST_L2_7, EV_L2_UI, l2_got_ui},
+	{ST_L2_8, EV_L2_UI, l2_got_ui},
+	{ST_L2_7, EV_L2_FRMR, l2_got_FRMR},
+	{ST_L2_8, EV_L2_FRMR, l2_got_FRMR},
+	{ST_L2_7, EV_L2_SUPER, l2_st7_got_super},
+	{ST_L2_8, EV_L2_SUPER, l2_st8_got_super},
+	{ST_L2_7, EV_L2_I, l2_got_iframe},
+	{ST_L2_8, EV_L2_I, l2_got_iframe},
+	{ST_L2_5, EV_L2_T200, l2_st5_tout_200},
+	{ST_L2_6, EV_L2_T200, l2_st6_tout_200},
+	{ST_L2_7, EV_L2_T200, l2_st7_tout_200},
+	{ST_L2_8, EV_L2_T200, l2_st8_tout_200},
+	{ST_L2_7, EV_L2_T203, l2_st7_tout_203},
+	{ST_L2_7, EV_L2_ACK_PULL, l2_pull_iqueue},
+	{ST_L2_7, EV_L2_SET_OWN_BUSY, l2_set_own_busy},
+	{ST_L2_8, EV_L2_SET_OWN_BUSY, l2_set_own_busy},
+	{ST_L2_7, EV_L2_CLEAR_OWN_BUSY, l2_clear_own_busy},
+	{ST_L2_8, EV_L2_CLEAR_OWN_BUSY, l2_clear_own_busy},
+	{ST_L2_4, EV_L2_FRAME_ERROR, l2_frame_error},
+	{ST_L2_5, EV_L2_FRAME_ERROR, l2_frame_error},
+	{ST_L2_6, EV_L2_FRAME_ERROR, l2_frame_error},
+	{ST_L2_7, EV_L2_FRAME_ERROR, l2_frame_error_reest},
+	{ST_L2_8, EV_L2_FRAME_ERROR, l2_frame_error_reest},
+	{ST_L2_1, EV_L1_DEACTIVATE, l2_st14_persistant_da},
+	{ST_L2_2, EV_L1_DEACTIVATE, l2_st24_tei_remove},
+	{ST_L2_3, EV_L1_DEACTIVATE, l2_st3_tei_remove},
+	{ST_L2_4, EV_L1_DEACTIVATE, l2_st14_persistant_da},
+	{ST_L2_5, EV_L1_DEACTIVATE, l2_st5_persistant_da},
+	{ST_L2_6, EV_L1_DEACTIVATE, l2_st6_persistant_da},
+	{ST_L2_7, EV_L1_DEACTIVATE, l2_persistant_da},
+	{ST_L2_8, EV_L1_DEACTIVATE, l2_persistant_da},
+};
+
+#define L2_FN_COUNT (sizeof(L2FnList)/sizeof(struct FsmNode))
+
+static int
+ph_data_indication(struct layer2 *l2, struct mISDNhead *hh, struct sk_buff *skb)
+{
+	u_char	*datap = skb->data;
+	int	ret = -EINVAL;
+	int	psapi, ptei;
+	u_int	l;
+	int	c = 0;
+
+	l = l2addrsize(l2);
+	if (skb->len <= l) {
+		mISDN_FsmEvent(&l2->l2m, EV_L2_FRAME_ERROR, (void *) 'N');
+		return ret;
+	}
+	if (test_bit(FLG_LAPD, &l2->flag)) { /* Maybe not needed */
+		psapi = *datap++;
+		ptei = *datap++;
+		if ((psapi & 1) || !(ptei & 1)) {
+			printk(KERN_WARNING
+			    "l2 D-channel frame wrong EA0/EA1\n");
+			return ret;
+		}
+		psapi >>= 2;
+		ptei >>= 1;
+		if (psapi != l2->sapi) {
+			/* not our bussiness
+			 * printk(KERN_DEBUG "%s: sapi %d/%d sapi mismatch\n",
+			 *  __func__,
+			 *	psapi, l2->sapi);
+			 */
+			dev_kfree_skb(skb);
+			return 0;
+		}
+		if ((ptei != l2->tei) && (ptei != GROUP_TEI)) {
+			/* not our bussiness
+			 * printk(KERN_DEBUG "%s: tei %d/%d sapi %d mismatch\n",
+			 *  __func__,
+			 *	ptei, l2->tei, psapi);
+			 */
+			dev_kfree_skb(skb);
+			return 0;
+		}
+	} else
+		datap += l;
+	if (!(*datap & 1)) {	/* I-Frame */
+		c = iframe_error(l2, skb);
+		if (!c)
+			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_I, skb);
+	} else if (IsSFrame(datap, l2)) {	/* S-Frame */
+		c = super_error(l2, skb);
+		if (!c)
+			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_SUPER, skb);
+	} else if (IsUI(datap)) {
+		c = UI_error(l2, skb);
+		if (!c)
+			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_UI, skb);
+	} else if (IsSABME(datap, l2)) {
+		c = unnum_error(l2, skb, CMD);
+		if (!c)
+			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_SABME, skb);
+	} else if (IsUA(datap)) {
+		c = unnum_error(l2, skb, RSP);
+		if (!c)
+			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_UA, skb);
+	} else if (IsDISC(datap)) {
+		c = unnum_error(l2, skb, CMD);
+		if (!c)
+			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_DISC, skb);
+	} else if (IsDM(datap)) {
+		c = unnum_error(l2, skb, RSP);
+		if (!c)
+			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_DM, skb);
+	} else if (IsFRMR(datap)) {
+		c = FRMR_error(l2, skb);
+		if (!c)
+			ret = mISDN_FsmEvent(&l2->l2m, EV_L2_FRMR, skb);
+	} else
+		c = 'L';
+	if (c) {
+		printk(KERN_WARNING "l2 D-channel frame error %c\n", c);
+		mISDN_FsmEvent(&l2->l2m, EV_L2_FRAME_ERROR, (void *)(long)c);
+	}
+	return ret;
+}
+
+static int
+l2_send(struct mISDNchannel *ch, struct sk_buff *skb)
+{
+	struct layer2		*l2 = container_of(ch, struct layer2, ch);
+	struct mISDNhead	*hh =  mISDN_HEAD_P(skb);
+	int 			ret = -EINVAL;
+
+	if (*debug & DEBUG_L2_RECV)
+		printk(KERN_DEBUG "%s: prim(%x) id(%x) tei(%d)\n",
+		    __func__, hh->prim, hh->id, l2->tei);
+	switch (hh->prim) {
+	case PH_DATA_IND:
+		ret = ph_data_indication(l2, hh, skb);
+		break;
+	case PH_DATA_CNF:
+		ret = ph_data_confirm(l2, hh, skb);
+		break;
+	case PH_ACTIVATE_IND:
+		test_and_set_bit(FLG_L1_ACTIV, &l2->flag);
+		l2up_create(l2, MPH_ACTIVATE_IND, 0, NULL);
+		if (test_and_clear_bit(FLG_ESTAB_PEND, &l2->flag))
+			ret = mISDN_FsmEvent(&l2->l2m,
+				EV_L2_DL_ESTABLISH_REQ, skb);
+		break;
+	case PH_DEACTIVATE_IND:
+		test_and_clear_bit(FLG_L1_ACTIV, &l2->flag);
+		l2up_create(l2, MPH_DEACTIVATE_IND, 0, NULL);
+		ret = mISDN_FsmEvent(&l2->l2m, EV_L1_DEACTIVATE, skb);
+		break;
+	case MPH_INFORMATION_IND:
+		if (!l2->up)
+			break;
+		ret = l2->up->send(l2->up, skb);
+		break;
+	case DL_DATA_REQ:
+		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_DL_DATA, skb);
+		break;
+	case DL_UNITDATA_REQ:
+		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_DL_UNITDATA, skb);
+		break;
+	case DL_ESTABLISH_REQ:
+		if (test_bit(FLG_LAPB, &l2->flag))
+			test_and_set_bit(FLG_ORIG, &l2->flag);
+		if (test_bit(FLG_L1_ACTIV, &l2->flag)) {
+			if (test_bit(FLG_LAPD, &l2->flag) ||
+				test_bit(FLG_ORIG, &l2->flag))
+				ret = mISDN_FsmEvent(&l2->l2m,
+					EV_L2_DL_ESTABLISH_REQ, skb);
+		} else {
+			if (test_bit(FLG_LAPD, &l2->flag) ||
+				test_bit(FLG_ORIG, &l2->flag)) {
+				test_and_set_bit(FLG_ESTAB_PEND,
+					&l2->flag);
+			}
+			ret = l2down(l2, PH_ACTIVATE_REQ, l2_newid(l2),
+			    skb);
+		}
+		break;
+	case DL_RELEASE_REQ:
+		if (test_bit(FLG_LAPB, &l2->flag))
+			l2down_create(l2, PH_DEACTIVATE_REQ,
+				l2_newid(l2), 0, NULL);
+		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_DL_RELEASE_REQ,
+		    skb);
+		break;
+	default:
+		if (*debug & DEBUG_L2)
+			l2m_debug(&l2->l2m, "l2 unknown pr %04x",
+			    hh->prim);
+	}
+	if (ret) {
+		dev_kfree_skb(skb);
+		ret = 0;
+	}
+	return ret;
+}
+
+int
+tei_l2(struct layer2 *l2, u_int cmd, u_long arg)
+{
+	int		ret = -EINVAL;
+
+	if (*debug & DEBUG_L2_TEI)
+		printk(KERN_DEBUG "%s: cmd(%x)\n", __func__, cmd);
+	switch (cmd) {
+	case (MDL_ASSIGN_REQ):
+		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_MDL_ASSIGN, (void *)arg);
+		break;
+	case (MDL_REMOVE_REQ):
+		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_MDL_REMOVE, NULL);
+		break;
+	case (MDL_ERROR_IND):
+		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_MDL_ERROR, NULL);
+		break;
+	case (MDL_ERROR_RSP):
+		/* ETS 300-125 5.3.2.1 Test: TC13010 */
+		printk(KERN_NOTICE "MDL_ERROR|REQ (tei_l2)\n");
+		ret = mISDN_FsmEvent(&l2->l2m, EV_L2_MDL_ERROR, NULL);
+		break;
+	}
+	return ret;
+}
+
+static void
+release_l2(struct layer2 *l2)
+{
+	mISDN_FsmDelTimer(&l2->t200, 21);
+	mISDN_FsmDelTimer(&l2->t203, 16);
+	skb_queue_purge(&l2->i_queue);
+	skb_queue_purge(&l2->ui_queue);
+	skb_queue_purge(&l2->down_queue);
+	ReleaseWin(l2);
+	if (test_bit(FLG_LAPD, &l2->flag)) {
+		release_tei(l2);
+		if (l2->ch.st)
+			l2->ch.st->dev->D.ctrl(&l2->ch.st->dev->D,
+			    CLOSE_CHANNEL, NULL);
+	}
+	kfree(l2);
+}
+
+static int
+l2_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
+{
+	struct layer2		*l2 = container_of(ch, struct layer2, ch);
+	u_int			info;
+
+	if (*debug & DEBUG_L2_CTRL)
+		printk(KERN_DEBUG "%s:(%x)\n", __func__, cmd);
+
+	switch (cmd) {
+	case OPEN_CHANNEL:
+		if (test_bit(FLG_LAPD, &l2->flag)) {
+			set_channel_address(&l2->ch, l2->sapi, l2->tei);
+			info = DL_INFO_L2_CONNECT;
+			l2up_create(l2, DL_INFORMATION_IND,
+			    sizeof(info), &info);
+		}
+		break;
+	case CLOSE_CHANNEL:
+		if (l2->ch.peer)
+			l2->ch.peer->ctrl(l2->ch.peer, CLOSE_CHANNEL, NULL);
+		release_l2(l2);
+		break;
+	}
+	return 0;
+}
+
+struct layer2 *
+create_l2(struct mISDNchannel *ch, u_int protocol, u_long options, u_long arg)
+{
+	struct layer2		*l2;
+	struct channel_req	rq;
+
+	l2 = kzalloc(sizeof(struct layer2), GFP_KERNEL);
+	if (!l2) {
+		printk(KERN_ERR "kzalloc layer2 failed\n");
+		return NULL;
+	}
+	l2->next_id = 1;
+	l2->down_id = MISDN_ID_NONE;
+	l2->up = ch;
+	l2->ch.st = ch->st;
+	l2->ch.send = l2_send;
+	l2->ch.ctrl = l2_ctrl;
+	switch (protocol) {
+	case ISDN_P_LAPD_NT:
+		test_and_set_bit(FLG_LAPD, &l2->flag);
+		test_and_set_bit(FLG_LAPD_NET, &l2->flag);
+		test_and_set_bit(FLG_MOD128, &l2->flag);
+		l2->sapi = 0;
+		l2->maxlen = MAX_DFRAME_LEN;
+		if (test_bit(OPTION_L2_PMX, &options))
+			l2->window = 7;
+		else
+			l2->window = 1;
+		if (test_bit(OPTION_L2_PTP, &options))
+			test_and_set_bit(FLG_PTP, &l2->flag);
+		if (test_bit(OPTION_L2_FIXEDTEI, &options))
+			test_and_set_bit(FLG_FIXED_TEI, &l2->flag);
+		l2->tei = (u_int)arg;
+		l2->T200 = 1000;
+		l2->N200 = 3;
+		l2->T203 = 10000;
+		if (test_bit(OPTION_L2_PMX, &options))
+			rq.protocol = ISDN_P_NT_E1;
+		else
+			rq.protocol = ISDN_P_NT_S0;
+		rq.adr.channel = 0;
+		l2->ch.st->dev->D.ctrl(&l2->ch.st->dev->D, OPEN_CHANNEL, &rq);
+		break;
+	case ISDN_P_LAPD_TE:
+		test_and_set_bit(FLG_LAPD, &l2->flag);
+		test_and_set_bit(FLG_MOD128, &l2->flag);
+		test_and_set_bit(FLG_ORIG, &l2->flag);
+		l2->sapi = 0;
+		l2->maxlen = MAX_DFRAME_LEN;
+		if (test_bit(OPTION_L2_PMX, &options))
+			l2->window = 7;
+		else
+			l2->window = 1;
+		if (test_bit(OPTION_L2_PTP, &options))
+			test_and_set_bit(FLG_PTP, &l2->flag);
+		if (test_bit(OPTION_L2_FIXEDTEI, &options))
+			test_and_set_bit(FLG_FIXED_TEI, &l2->flag);
+		l2->tei = (u_int)arg;
+		l2->T200 = 1000;
+		l2->N200 = 3;
+		l2->T203 = 10000;
+		if (test_bit(OPTION_L2_PMX, &options))
+			rq.protocol = ISDN_P_TE_E1;
+		else
+			rq.protocol = ISDN_P_TE_S0;
+		rq.adr.channel = 0;
+		l2->ch.st->dev->D.ctrl(&l2->ch.st->dev->D, OPEN_CHANNEL, &rq);
+		break;
+	case ISDN_P_B_X75SLP:
+		test_and_set_bit(FLG_LAPB, &l2->flag);
+		l2->window = 7;
+		l2->maxlen = MAX_DATA_SIZE;
+		l2->T200 = 1000;
+		l2->N200 = 4;
+		l2->T203 = 5000;
+		l2->addr.A = 3;
+		l2->addr.B = 1;
+		break;
+	default:
+		printk(KERN_ERR "layer2 create failed prt %x\n",
+			protocol);
+		kfree(l2);
+		return NULL;
+	}
+	skb_queue_head_init(&l2->i_queue);
+	skb_queue_head_init(&l2->ui_queue);
+	skb_queue_head_init(&l2->down_queue);
+	skb_queue_head_init(&l2->tmp_queue);
+	InitWin(l2);
+	l2->l2m.fsm = &l2fsm;
+	if (test_bit(FLG_LAPB, &l2->flag) ||
+		test_bit(FLG_PTP, &l2->flag) ||
+		test_bit(FLG_LAPD_NET, &l2->flag))
+		l2->l2m.state = ST_L2_4;
+	else
+		l2->l2m.state = ST_L2_1;
+	l2->l2m.debug = *debug;
+	l2->l2m.userdata = l2;
+	l2->l2m.userint = 0;
+	l2->l2m.printdebug = l2m_debug;
+
+	mISDN_FsmInitTimer(&l2->l2m, &l2->t200);
+	mISDN_FsmInitTimer(&l2->l2m, &l2->t203);
+	return l2;
+}
+
+static int
+x75create(struct channel_req *crq)
+{
+	struct layer2	*l2;
+
+	if (crq->protocol != ISDN_P_B_X75SLP)
+		return -EPROTONOSUPPORT;
+	l2 = create_l2(crq->ch, crq->protocol, 0, 0);
+	if (!l2)
+		return -ENOMEM;
+	crq->ch = &l2->ch;
+	crq->protocol = ISDN_P_B_HDLC;
+	return 0;
+}
+
+static struct Bprotocol X75SLP = {
+	.Bprotocols = (1 << (ISDN_P_B_X75SLP & ISDN_P_B_MASK)),
+	.name = "X75SLP",
+	.create = x75create
+};
+
+int
+Isdnl2_Init(u_int *deb)
+{
+	debug = deb;
+	mISDN_register_Bprotocol(&X75SLP);
+	l2fsm.state_count = L2_STATE_COUNT;
+	l2fsm.event_count = L2_EVENT_COUNT;
+	l2fsm.strEvent = strL2Event;
+	l2fsm.strState = strL2State;
+	mISDN_FsmNew(&l2fsm, L2FnList, ARRAY_SIZE(L2FnList));
+	TEIInit(deb);
+	return 0;
+}
+
+void
+Isdnl2_cleanup(void)
+{
+	mISDN_unregister_Bprotocol(&X75SLP);
+	TEIFree();
+	mISDN_FsmFree(&l2fsm);
+}
+
diff --git a/drivers/isdn/mISDN/layer2.h b/drivers/isdn/mISDN/layer2.h
new file mode 100644
index 000000000000..de2dd02056a3
--- /dev/null
+++ b/drivers/isdn/mISDN/layer2.h
@@ -0,0 +1,140 @@
+/*
+ * Layer 2 defines
+ *
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/mISDNif.h>
+#include <linux/skbuff.h>
+#include "fsm.h"
+
+#define MAX_WINDOW	8
+
+struct manager {
+	struct mISDNchannel	ch;
+	struct mISDNchannel	bcast;
+	u_long			options;
+	struct list_head	layer2;
+	rwlock_t		lock;
+	struct FsmInst		deact;
+	struct FsmTimer		datimer;
+	struct sk_buff_head	sendq;
+	struct mISDNchannel	*up;
+	u_int			nextid;
+	u_int			lastid;
+};
+
+struct teimgr {
+	int			ri;
+	int			rcnt;
+	struct FsmInst		tei_m;
+	struct FsmTimer		timer;
+	int			tval, nval;
+	struct layer2		*l2;
+	struct manager		*mgr;
+};
+
+struct laddr {
+	u_char	A;
+	u_char	B;
+};
+
+struct layer2 {
+	struct list_head	list;
+	struct mISDNchannel	ch;
+	u_long			flag;
+	int			id;
+	struct mISDNchannel	*up;
+	signed char		sapi;
+	signed char		tei;
+	struct laddr		addr;
+	u_int			maxlen;
+	struct teimgr		*tm;
+	u_int			vs, va, vr;
+	int			rc;
+	u_int			window;
+	u_int			sow;
+	struct FsmInst		l2m;
+	struct FsmTimer		t200, t203;
+	int			T200, N200, T203;
+	u_int			next_id;
+	u_int			down_id;
+	struct sk_buff		*windowar[MAX_WINDOW];
+	struct sk_buff_head	i_queue;
+	struct sk_buff_head	ui_queue;
+	struct sk_buff_head	down_queue;
+	struct sk_buff_head	tmp_queue;
+};
+
+enum {
+	ST_L2_1,
+	ST_L2_2,
+	ST_L2_3,
+	ST_L2_4,
+	ST_L2_5,
+	ST_L2_6,
+	ST_L2_7,
+	ST_L2_8,
+};
+
+#define L2_STATE_COUNT (ST_L2_8+1)
+
+extern struct layer2	*create_l2(struct mISDNchannel *, u_int,
+				u_long, u_long);
+extern int		tei_l2(struct layer2 *, u_int, u_long arg);
+
+
+/* from tei.c */
+extern int 		l2_tei(struct layer2 *, u_int, u_long arg);
+extern void 		release_tei(struct layer2 *);
+extern int 		TEIInit(u_int *);
+extern void 		TEIFree(void);
+
+#define MAX_L2HEADER_LEN 4
+
+#define RR	0x01
+#define RNR	0x05
+#define REJ	0x09
+#define SABME	0x6f
+#define SABM	0x2f
+#define DM	0x0f
+#define UI	0x03
+#define DISC	0x43
+#define UA	0x63
+#define FRMR	0x87
+#define XID	0xaf
+
+#define CMD	0
+#define RSP	1
+
+#define LC_FLUSH_WAIT 1
+
+#define FLG_LAPB	0
+#define FLG_LAPD	1
+#define FLG_ORIG	2
+#define FLG_MOD128	3
+#define FLG_PEND_REL	4
+#define FLG_L3_INIT	5
+#define FLG_T200_RUN	6
+#define FLG_ACK_PEND	7
+#define FLG_REJEXC	8
+#define FLG_OWN_BUSY	9
+#define FLG_PEER_BUSY	10
+#define FLG_DCHAN_BUSY	11
+#define FLG_L1_ACTIV	12
+#define FLG_ESTAB_PEND	13
+#define FLG_PTP		14
+#define FLG_FIXED_TEI	15
+#define FLG_L2BLOCK	16
+#define FLG_L1_NOTREADY	17
+#define FLG_LAPD_NET	18
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
new file mode 100644
index 000000000000..4ba4cc364c9e
--- /dev/null
+++ b/drivers/isdn/mISDN/socket.c
@@ -0,0 +1,781 @@
+/*
+ *
+ * Author	Karsten Keil <kkeil@novell.com>
+ *
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/mISDNif.h>
+#include "core.h"
+
+static int	*debug;
+
+static struct proto mISDN_proto = {
+	.name		= "misdn",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct mISDN_sock)
+};
+
+#define _pms(sk)	((struct mISDN_sock *)sk)
+
+static struct mISDN_sock_list	data_sockets = {
+	.lock = __RW_LOCK_UNLOCKED(data_sockets.lock)
+};
+
+static struct mISDN_sock_list	base_sockets = {
+	.lock = __RW_LOCK_UNLOCKED(base_sockets.lock)
+};
+
+#define L2_HEADER_LEN	4
+
+static inline struct sk_buff *
+_l2_alloc_skb(unsigned int len, gfp_t gfp_mask)
+{
+	struct sk_buff  *skb;
+
+	skb = alloc_skb(len + L2_HEADER_LEN, gfp_mask);
+	if (likely(skb))
+		skb_reserve(skb, L2_HEADER_LEN);
+	return skb;
+}
+
+static void
+mISDN_sock_link(struct mISDN_sock_list *l, struct sock *sk)
+{
+	write_lock_bh(&l->lock);
+	sk_add_node(sk, &l->head);
+	write_unlock_bh(&l->lock);
+}
+
+static void mISDN_sock_unlink(struct mISDN_sock_list *l, struct sock *sk)
+{
+	write_lock_bh(&l->lock);
+	sk_del_node_init(sk);
+	write_unlock_bh(&l->lock);
+}
+
+static int
+mISDN_send(struct mISDNchannel *ch, struct sk_buff *skb)
+{
+	struct mISDN_sock *msk;
+	int	err;
+
+	msk = container_of(ch, struct mISDN_sock, ch);
+	if (*debug & DEBUG_SOCKET)
+		printk(KERN_DEBUG "%s len %d %p\n", __func__, skb->len, skb);
+	if (msk->sk.sk_state == MISDN_CLOSED)
+		return -EUNATCH;
+	__net_timestamp(skb);
+	err = sock_queue_rcv_skb(&msk->sk, skb);
+	if (err)
+		printk(KERN_WARNING "%s: error %d\n", __func__, err);
+	return err;
+}
+
+static int
+mISDN_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
+{
+	struct mISDN_sock *msk;
+
+	msk = container_of(ch, struct mISDN_sock, ch);
+	if (*debug & DEBUG_SOCKET)
+		printk(KERN_DEBUG "%s(%p, %x, %p)\n", __func__, ch, cmd, arg);
+	switch (cmd) {
+	case CLOSE_CHANNEL:
+		msk->sk.sk_state = MISDN_CLOSED;
+		break;
+	}
+	return 0;
+}
+
+static inline void
+mISDN_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
+{
+	struct timeval	tv;
+
+	if (_pms(sk)->cmask & MISDN_TIME_STAMP) {
+		skb_get_timestamp(skb, &tv);
+		put_cmsg(msg, SOL_MISDN, MISDN_TIME_STAMP, sizeof(tv), &tv);
+	}
+}
+
+static int
+mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+    struct msghdr *msg, size_t len, int flags)
+{
+	struct sk_buff		*skb;
+	struct sock		*sk = sock->sk;
+	struct sockaddr_mISDN	*maddr;
+
+	int		copied, err;
+
+	if (*debug & DEBUG_SOCKET)
+		printk(KERN_DEBUG "%s: len %d, flags %x ch.nr %d, proto %x\n",
+			__func__, (int)len, flags, _pms(sk)->ch.nr,
+			sk->sk_protocol);
+	if (flags & (MSG_OOB))
+		return -EOPNOTSUPP;
+
+	if (sk->sk_state == MISDN_CLOSED)
+		return 0;
+
+	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		return err;
+
+	if (msg->msg_namelen >= sizeof(struct sockaddr_mISDN)) {
+		msg->msg_namelen = sizeof(struct sockaddr_mISDN);
+		maddr = (struct sockaddr_mISDN *)msg->msg_name;
+		maddr->family = AF_ISDN;
+		maddr->dev = _pms(sk)->dev->id;
+		if ((sk->sk_protocol == ISDN_P_LAPD_TE) ||
+		    (sk->sk_protocol == ISDN_P_LAPD_NT)) {
+			maddr->channel = (mISDN_HEAD_ID(skb) >> 16) & 0xff;
+			maddr->tei =  (mISDN_HEAD_ID(skb) >> 8) & 0xff;
+			maddr->sapi = mISDN_HEAD_ID(skb) & 0xff;
+		} else {
+			maddr->channel = _pms(sk)->ch.nr;
+			maddr->sapi = _pms(sk)->ch.addr & 0xFF;
+			maddr->tei =  (_pms(sk)->ch.addr >> 8) & 0xFF;
+		}
+	} else {
+		if (msg->msg_namelen)
+			printk(KERN_WARNING "%s: too small namelen %d\n",
+			    __func__, msg->msg_namelen);
+		msg->msg_namelen = 0;
+	}
+
+	copied = skb->len + MISDN_HEADER_LEN;
+	if (len < copied) {
+		if (flags & MSG_PEEK)
+			atomic_dec(&skb->users);
+		else
+			skb_queue_head(&sk->sk_receive_queue, skb);
+		return -ENOSPC;
+	}
+	memcpy(skb_push(skb, MISDN_HEADER_LEN), mISDN_HEAD_P(skb),
+	    MISDN_HEADER_LEN);
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+
+	mISDN_sock_cmsg(sk, msg, skb);
+
+	skb_free_datagram(sk, skb);
+
+	return err ? : copied;
+}
+
+static int
+mISDN_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+    struct msghdr *msg, size_t len)
+{
+	struct sock		*sk = sock->sk;
+	struct sk_buff		*skb;
+	int			err = -ENOMEM;
+	struct sockaddr_mISDN	*maddr;
+
+	if (*debug & DEBUG_SOCKET)
+		printk(KERN_DEBUG "%s: len %d flags %x ch %d proto %x\n",
+		     __func__, (int)len, msg->msg_flags, _pms(sk)->ch.nr,
+		     sk->sk_protocol);
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE))
+		return -EINVAL;
+
+	if (len < MISDN_HEADER_LEN)
+		return -EINVAL;
+
+	if (sk->sk_state != MISDN_BOUND)
+		return -EBADFD;
+
+	lock_sock(sk);
+
+	skb = _l2_alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		goto done;
+
+	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+		err = -EFAULT;
+		goto drop;
+	}
+
+	memcpy(mISDN_HEAD_P(skb), skb->data, MISDN_HEADER_LEN);
+	skb_pull(skb, MISDN_HEADER_LEN);
+
+	if (msg->msg_namelen >= sizeof(struct sockaddr_mISDN)) {
+		/* if we have a address, we use it */
+		maddr = (struct sockaddr_mISDN *)msg->msg_name;
+		mISDN_HEAD_ID(skb) = maddr->channel;
+	} else { /* use default for L2 messages */
+		if ((sk->sk_protocol == ISDN_P_LAPD_TE) ||
+		    (sk->sk_protocol == ISDN_P_LAPD_NT))
+		    mISDN_HEAD_ID(skb) = _pms(sk)->ch.nr;
+	}
+
+	if (*debug & DEBUG_SOCKET)
+		printk(KERN_DEBUG "%s: ID:%x\n",
+		     __func__, mISDN_HEAD_ID(skb));
+
+	err = -ENODEV;
+	if (!_pms(sk)->ch.peer ||
+	    (err = _pms(sk)->ch.recv(_pms(sk)->ch.peer, skb)))
+		goto drop;
+
+	err = len;
+
+done:
+	release_sock(sk);
+	return err;
+
+drop:
+	kfree_skb(skb);
+	goto done;
+}
+
+static int
+data_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (*debug & DEBUG_SOCKET)
+		printk(KERN_DEBUG "%s(%p) sk=%p\n", __func__, sock, sk);
+	if (!sk)
+		return 0;
+	switch (sk->sk_protocol) {
+	case ISDN_P_TE_S0:
+	case ISDN_P_NT_S0:
+	case ISDN_P_TE_E1:
+	case ISDN_P_NT_E1:
+		if (sk->sk_state == MISDN_BOUND)
+			delete_channel(&_pms(sk)->ch);
+		else
+			mISDN_sock_unlink(&data_sockets, sk);
+		break;
+	case ISDN_P_LAPD_TE:
+	case ISDN_P_LAPD_NT:
+	case ISDN_P_B_RAW:
+	case ISDN_P_B_HDLC:
+	case ISDN_P_B_X75SLP:
+	case ISDN_P_B_L2DTMF:
+	case ISDN_P_B_L2DSP:
+	case ISDN_P_B_L2DSPHDLC:
+		delete_channel(&_pms(sk)->ch);
+		mISDN_sock_unlink(&data_sockets, sk);
+		break;
+	}
+
+	lock_sock(sk);
+
+	sock_orphan(sk);
+	skb_queue_purge(&sk->sk_receive_queue);
+
+	release_sock(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int
+data_sock_ioctl_bound(struct sock *sk, unsigned int cmd, void __user *p)
+{
+	struct mISDN_ctrl_req	cq;
+	int			err = -EINVAL, val;
+	struct mISDNchannel	*bchan, *next;
+
+	lock_sock(sk);
+	if (!_pms(sk)->dev) {
+		err = -ENODEV;
+		goto done;
+	}
+	switch (cmd) {
+	case IMCTRLREQ:
+		if (copy_from_user(&cq, p, sizeof(cq))) {
+			err = -EFAULT;
+			break;
+		}
+		if ((sk->sk_protocol & ~ISDN_P_B_MASK) == ISDN_P_B_START) {
+			list_for_each_entry_safe(bchan, next,
+				&_pms(sk)->dev->bchannels, list) {
+				if (bchan->nr == cq.channel) {
+					err = bchan->ctrl(bchan,
+						CONTROL_CHANNEL, &cq);
+					break;
+				}
+			}
+		} else
+			err = _pms(sk)->dev->D.ctrl(&_pms(sk)->dev->D,
+				CONTROL_CHANNEL, &cq);
+		if (err)
+			break;
+		if (copy_to_user(p, &cq, sizeof(cq)))
+			err = -EFAULT;
+		break;
+	case IMCLEAR_L2:
+		if (sk->sk_protocol != ISDN_P_LAPD_NT) {
+			err = -EINVAL;
+			break;
+		}
+		if (get_user(val, (int __user *)p)) {
+			err = -EFAULT;
+			break;
+		}
+		err = _pms(sk)->dev->teimgr->ctrl(_pms(sk)->dev->teimgr,
+		    CONTROL_CHANNEL, &val);
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int
+data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	int 			err = 0, id;
+	struct sock		*sk = sock->sk;
+	struct mISDNdevice	*dev;
+	struct mISDNversion	ver;
+
+	switch (cmd) {
+	case IMGETVERSION:
+		ver.major = MISDN_MAJOR_VERSION;
+		ver.minor = MISDN_MINOR_VERSION;
+		ver.release = MISDN_RELEASE;
+		if (copy_to_user((void __user *)arg, &ver, sizeof(ver)))
+			err = -EFAULT;
+		break;
+	case IMGETCOUNT:
+		id = get_mdevice_count();
+		if (put_user(id, (int __user *)arg))
+			err = -EFAULT;
+		break;
+	case IMGETDEVINFO:
+		if (get_user(id, (int __user *)arg)) {
+			err = -EFAULT;
+			break;
+		}
+		dev = get_mdevice(id);
+		if (dev) {
+			struct mISDN_devinfo di;
+
+			di.id = dev->id;
+			di.Dprotocols = dev->Dprotocols;
+			di.Bprotocols = dev->Bprotocols | get_all_Bprotocols();
+			di.protocol = dev->D.protocol;
+			memcpy(di.channelmap, dev->channelmap,
+				MISDN_CHMAP_SIZE * 4);
+			di.nrbchan = dev->nrbchan;
+			strcpy(di.name, dev->name);
+			if (copy_to_user((void __user *)arg, &di, sizeof(di)))
+				err = -EFAULT;
+		} else
+			err = -ENODEV;
+		break;
+	default:
+		if (sk->sk_state == MISDN_BOUND)
+			err = data_sock_ioctl_bound(sk, cmd,
+				(void __user *)arg);
+		else
+			err = -ENOTCONN;
+	}
+	return err;
+}
+
+static int data_sock_setsockopt(struct socket *sock, int level, int optname,
+	char __user *optval, int len)
+{
+	struct sock *sk = sock->sk;
+	int err = 0, opt = 0;
+
+	if (*debug & DEBUG_SOCKET)
+		printk(KERN_DEBUG "%s(%p, %d, %x, %p, %d)\n", __func__, sock,
+		    level, optname, optval, len);
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case MISDN_TIME_STAMP:
+		if (get_user(opt, (int __user *)optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (opt)
+			_pms(sk)->cmask |= MISDN_TIME_STAMP;
+		else
+			_pms(sk)->cmask &= ~MISDN_TIME_STAMP;
+		break;
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+	release_sock(sk);
+	return err;
+}
+
+static int data_sock_getsockopt(struct socket *sock, int level, int optname,
+	char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	int len, opt;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	switch (optname) {
+	case MISDN_TIME_STAMP:
+		if (_pms(sk)->cmask & MISDN_TIME_STAMP)
+			opt = 1;
+		else
+			opt = 0;
+
+		if (put_user(opt, optval))
+			return -EFAULT;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	return 0;
+}
+
+static int
+data_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr;
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	if (*debug & DEBUG_SOCKET)
+		printk(KERN_DEBUG "%s(%p) sk=%p\n", __func__, sock, sk);
+	if (addr_len != sizeof(struct sockaddr_mISDN))
+		return -EINVAL;
+	if (!maddr || maddr->family != AF_ISDN)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (_pms(sk)->dev) {
+		err = -EALREADY;
+		goto done;
+	}
+	_pms(sk)->dev = get_mdevice(maddr->dev);
+	if (!_pms(sk)->dev) {
+		err = -ENODEV;
+		goto done;
+	}
+	_pms(sk)->ch.send = mISDN_send;
+	_pms(sk)->ch.ctrl = mISDN_ctrl;
+
+	switch (sk->sk_protocol) {
+	case ISDN_P_TE_S0:
+	case ISDN_P_NT_S0:
+	case ISDN_P_TE_E1:
+	case ISDN_P_NT_E1:
+		mISDN_sock_unlink(&data_sockets, sk);
+		err = connect_layer1(_pms(sk)->dev, &_pms(sk)->ch,
+		    sk->sk_protocol, maddr);
+		if (err)
+			mISDN_sock_link(&data_sockets, sk);
+		break;
+	case ISDN_P_LAPD_TE:
+	case ISDN_P_LAPD_NT:
+		err = create_l2entity(_pms(sk)->dev, &_pms(sk)->ch,
+		    sk->sk_protocol, maddr);
+		break;
+	case ISDN_P_B_RAW:
+	case ISDN_P_B_HDLC:
+	case ISDN_P_B_X75SLP:
+	case ISDN_P_B_L2DTMF:
+	case ISDN_P_B_L2DSP:
+	case ISDN_P_B_L2DSPHDLC:
+		err = connect_Bstack(_pms(sk)->dev, &_pms(sk)->ch,
+		    sk->sk_protocol, maddr);
+		break;
+	default:
+		err = -EPROTONOSUPPORT;
+	}
+	if (err)
+		goto done;
+	sk->sk_state = MISDN_BOUND;
+	_pms(sk)->ch.protocol = sk->sk_protocol;
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static int
+data_sock_getname(struct socket *sock, struct sockaddr *addr,
+    int *addr_len, int peer)
+{
+	struct sockaddr_mISDN 	*maddr = (struct sockaddr_mISDN *) addr;
+	struct sock		*sk = sock->sk;
+
+	if (!_pms(sk)->dev)
+		return -EBADFD;
+
+	lock_sock(sk);
+
+	*addr_len = sizeof(*maddr);
+	maddr->dev = _pms(sk)->dev->id;
+	maddr->channel = _pms(sk)->ch.nr;
+	maddr->sapi = _pms(sk)->ch.addr & 0xff;
+	maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xff;
+	release_sock(sk);
+	return 0;
+}
+
+static const struct proto_ops data_sock_ops = {
+	.family		= PF_ISDN,
+	.owner		= THIS_MODULE,
+	.release	= data_sock_release,
+	.ioctl		= data_sock_ioctl,
+	.bind		= data_sock_bind,
+	.getname	= data_sock_getname,
+	.sendmsg	= mISDN_sock_sendmsg,
+	.recvmsg	= mISDN_sock_recvmsg,
+	.poll		= datagram_poll,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= data_sock_setsockopt,
+	.getsockopt	= data_sock_getsockopt,
+	.connect	= sock_no_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.mmap		= sock_no_mmap
+};
+
+static int
+data_sock_create(struct net *net, struct socket *sock, int protocol)
+{
+	struct sock *sk;
+
+	if (sock->type != SOCK_DGRAM)
+		return -ESOCKTNOSUPPORT;
+
+	sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+
+	sock->ops = &data_sock_ops;
+	sock->state = SS_UNCONNECTED;
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
+	sk->sk_protocol = protocol;
+	sk->sk_state    = MISDN_OPEN;
+	mISDN_sock_link(&data_sockets, sk);
+
+	return 0;
+}
+
+static int
+base_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	printk(KERN_DEBUG "%s(%p) sk=%p\n", __func__, sock, sk);
+	if (!sk)
+		return 0;
+
+	mISDN_sock_unlink(&base_sockets, sk);
+	sock_orphan(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int
+base_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	int 			err = 0, id;
+	struct mISDNdevice	*dev;
+	struct mISDNversion	ver;
+
+	switch (cmd) {
+	case IMGETVERSION:
+		ver.major = MISDN_MAJOR_VERSION;
+		ver.minor = MISDN_MINOR_VERSION;
+		ver.release = MISDN_RELEASE;
+		if (copy_to_user((void __user *)arg, &ver, sizeof(ver)))
+			err = -EFAULT;
+		break;
+	case IMGETCOUNT:
+		id = get_mdevice_count();
+		if (put_user(id, (int __user *)arg))
+			err = -EFAULT;
+		break;
+	case IMGETDEVINFO:
+		if (get_user(id, (int __user *)arg)) {
+			err = -EFAULT;
+			break;
+		}
+		dev = get_mdevice(id);
+		if (dev) {
+			struct mISDN_devinfo di;
+
+			di.id = dev->id;
+			di.Dprotocols = dev->Dprotocols;
+			di.Bprotocols = dev->Bprotocols | get_all_Bprotocols();
+			di.protocol = dev->D.protocol;
+			memcpy(di.channelmap, dev->channelmap,
+				MISDN_CHMAP_SIZE * 4);
+			di.nrbchan = dev->nrbchan;
+			strcpy(di.name, dev->name);
+			if (copy_to_user((void __user *)arg, &di, sizeof(di)))
+				err = -EFAULT;
+		} else
+			err = -ENODEV;
+		break;
+	default:
+		err = -EINVAL;
+	}
+	return err;
+}
+
+static int
+base_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr;
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	if (!maddr || maddr->family != AF_ISDN)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (_pms(sk)->dev) {
+		err = -EALREADY;
+		goto done;
+	}
+
+	_pms(sk)->dev = get_mdevice(maddr->dev);
+	if (!_pms(sk)->dev) {
+		err = -ENODEV;
+		goto done;
+	}
+	sk->sk_state = MISDN_BOUND;
+
+done:
+	release_sock(sk);
+	return err;
+}
+
+static const struct proto_ops base_sock_ops = {
+	.family		= PF_ISDN,
+	.owner		= THIS_MODULE,
+	.release	= base_sock_release,
+	.ioctl		= base_sock_ioctl,
+	.bind		= base_sock_bind,
+	.getname	= sock_no_getname,
+	.sendmsg	= sock_no_sendmsg,
+	.recvmsg	= sock_no_recvmsg,
+	.poll		= sock_no_poll,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= sock_no_setsockopt,
+	.getsockopt	= sock_no_getsockopt,
+	.connect	= sock_no_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.mmap		= sock_no_mmap
+};
+
+
+static int
+base_sock_create(struct net *net, struct socket *sock, int protocol)
+{
+	struct sock *sk;
+
+	if (sock->type != SOCK_RAW)
+		return -ESOCKTNOSUPPORT;
+
+	sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+	sock->ops = &base_sock_ops;
+	sock->state = SS_UNCONNECTED;
+	sock_reset_flag(sk, SOCK_ZAPPED);
+	sk->sk_protocol = protocol;
+	sk->sk_state    = MISDN_OPEN;
+	mISDN_sock_link(&base_sockets, sk);
+
+	return 0;
+}
+
+static int
+mISDN_sock_create(struct net *net, struct socket *sock, int proto)
+{
+	int err = -EPROTONOSUPPORT;
+
+	switch	(proto) {
+	case ISDN_P_BASE:
+		err = base_sock_create(net, sock, proto);
+		break;
+	case ISDN_P_TE_S0:
+	case ISDN_P_NT_S0:
+	case ISDN_P_TE_E1:
+	case ISDN_P_NT_E1:
+	case ISDN_P_LAPD_TE:
+	case ISDN_P_LAPD_NT:
+	case ISDN_P_B_RAW:
+	case ISDN_P_B_HDLC:
+	case ISDN_P_B_X75SLP:
+	case ISDN_P_B_L2DTMF:
+	case ISDN_P_B_L2DSP:
+	case ISDN_P_B_L2DSPHDLC:
+		err = data_sock_create(net, sock, proto);
+		break;
+	default:
+		return err;
+	}
+
+	return err;
+}
+
+static struct
+net_proto_family mISDN_sock_family_ops = {
+	.owner  = THIS_MODULE,
+	.family = PF_ISDN,
+	.create = mISDN_sock_create,
+};
+
+int
+misdn_sock_init(u_int *deb)
+{
+	int err;
+
+	debug = deb;
+	err = sock_register(&mISDN_sock_family_ops);
+	if (err)
+		printk(KERN_ERR "%s: error(%d)\n", __func__, err);
+	return err;
+}
+
+void
+misdn_sock_cleanup(void)
+{
+	sock_unregister(PF_ISDN);
+}
+
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
new file mode 100644
index 000000000000..54cfddcc4784
--- /dev/null
+++ b/drivers/isdn/mISDN/stack.c
@@ -0,0 +1,674 @@
+/*
+ *
+ * Author	Karsten Keil <kkeil@novell.com>
+ *
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/mISDNif.h>
+#include <linux/kthread.h>
+#include "core.h"
+
+static u_int	*debug;
+
+static inline void
+_queue_message(struct mISDNstack *st, struct sk_buff *skb)
+{
+	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
+
+	if (*debug & DEBUG_QUEUE_FUNC)
+		printk(KERN_DEBUG "%s prim(%x) id(%x) %p\n",
+		    __func__, hh->prim, hh->id, skb);
+	skb_queue_tail(&st->msgq, skb);
+	if (likely(!test_bit(mISDN_STACK_STOPPED, &st->status))) {
+		test_and_set_bit(mISDN_STACK_WORK, &st->status);
+		wake_up_interruptible(&st->workq);
+	}
+}
+
+int
+mISDN_queue_message(struct mISDNchannel *ch, struct sk_buff *skb)
+{
+	_queue_message(ch->st, skb);
+	return 0;
+}
+
+static struct mISDNchannel *
+get_channel4id(struct mISDNstack *st, u_int id)
+{
+	struct mISDNchannel	*ch;
+
+	mutex_lock(&st->lmutex);
+	list_for_each_entry(ch, &st->layer2, list) {
+		if (id == ch->nr)
+			goto unlock;
+	}
+	ch = NULL;
+unlock:
+	mutex_unlock(&st->lmutex);
+	return ch;
+}
+
+static void
+send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb)
+{
+	struct hlist_node	*node;
+	struct sock		*sk;
+	struct sk_buff		*cskb = NULL;
+
+	read_lock(&sl->lock);
+	sk_for_each(sk, node, &sl->head) {
+		if (sk->sk_state != MISDN_BOUND)
+			continue;
+		if (!cskb)
+			cskb = skb_copy(skb, GFP_KERNEL);
+		if (!cskb) {
+			printk(KERN_WARNING "%s no skb\n", __func__);
+			break;
+		}
+		if (!sock_queue_rcv_skb(sk, cskb))
+			cskb = NULL;
+	}
+	read_unlock(&sl->lock);
+	if (cskb)
+		dev_kfree_skb(cskb);
+}
+
+static void
+send_layer2(struct mISDNstack *st, struct sk_buff *skb)
+{
+	struct sk_buff		*cskb;
+	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
+	struct mISDNchannel	*ch;
+	int			ret;
+
+	if (!st)
+		return;
+	mutex_lock(&st->lmutex);
+	if ((hh->id & MISDN_ID_ADDR_MASK) == MISDN_ID_ANY) { /* L2 for all */
+		list_for_each_entry(ch, &st->layer2, list) {
+			if (list_is_last(&ch->list, &st->layer2)) {
+				cskb = skb;
+				skb = NULL;
+			} else {
+				cskb = skb_copy(skb, GFP_KERNEL);
+			}
+			if (cskb) {
+				ret = ch->send(ch, cskb);
+				if (ret) {
+					if (*debug & DEBUG_SEND_ERR)
+						printk(KERN_DEBUG
+						    "%s ch%d prim(%x) addr(%x)"
+						    " err %d\n",
+						    __func__, ch->nr,
+						    hh->prim, ch->addr, ret);
+					dev_kfree_skb(cskb);
+				}
+			} else {
+				printk(KERN_WARNING "%s ch%d addr %x no mem\n",
+				    __func__, ch->nr, ch->addr);
+				goto out;
+			}
+		}
+	} else {
+		list_for_each_entry(ch, &st->layer2, list) {
+			if ((hh->id & MISDN_ID_ADDR_MASK) == ch->addr) {
+				ret = ch->send(ch, skb);
+				if (!ret)
+					skb = NULL;
+				goto out;
+			}
+		}
+		ret = st->dev->teimgr->ctrl(st->dev->teimgr, CHECK_DATA, skb);
+		if (!ret)
+			skb = NULL;
+		else if (*debug & DEBUG_SEND_ERR)
+			printk(KERN_DEBUG
+			    "%s ch%d mgr prim(%x) addr(%x) err %d\n",
+			    __func__, ch->nr, hh->prim, ch->addr, ret);
+	}
+out:
+	mutex_unlock(&st->lmutex);
+	if (skb)
+		dev_kfree_skb(skb);
+}
+
+static inline int
+send_msg_to_layer(struct mISDNstack *st, struct sk_buff *skb)
+{
+	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
+	struct mISDNchannel	*ch;
+	int	lm;
+
+	lm = hh->prim & MISDN_LAYERMASK;
+	if (*debug & DEBUG_QUEUE_FUNC)
+		printk(KERN_DEBUG "%s prim(%x) id(%x) %p\n",
+		    __func__, hh->prim, hh->id, skb);
+	if (lm == 0x1) {
+		if (!hlist_empty(&st->l1sock.head)) {
+			__net_timestamp(skb);
+			send_socklist(&st->l1sock, skb);
+		}
+		return st->layer1->send(st->layer1, skb);
+	} else if (lm == 0x2) {
+		if (!hlist_empty(&st->l1sock.head))
+			send_socklist(&st->l1sock, skb);
+		send_layer2(st, skb);
+		return 0;
+	} else if (lm == 0x4) {
+		ch = get_channel4id(st, hh->id);
+		if (ch)
+			return ch->send(ch, skb);
+		else
+			printk(KERN_WARNING
+			    "%s: dev(%s) prim(%x) id(%x) no channel\n",
+			    __func__, st->dev->name, hh->prim, hh->id);
+	} else if (lm == 0x8) {
+		WARN_ON(lm == 0x8);
+		ch = get_channel4id(st, hh->id);
+		if (ch)
+			return ch->send(ch, skb);
+		else
+			printk(KERN_WARNING
+			    "%s: dev(%s) prim(%x) id(%x) no channel\n",
+			    __func__, st->dev->name, hh->prim, hh->id);
+	} else {
+		/* broadcast not handled yet */
+		printk(KERN_WARNING "%s: dev(%s) prim %x not delivered\n",
+		    __func__, st->dev->name, hh->prim);
+	}
+	return -ESRCH;
+}
+
+static void
+do_clear_stack(struct mISDNstack *st)
+{
+}
+
+static int
+mISDNStackd(void *data)
+{
+	struct mISDNstack *st = data;
+	int err = 0;
+
+#ifdef CONFIG_SMP
+	lock_kernel();
+#endif
+	sigfillset(&current->blocked);
+#ifdef CONFIG_SMP
+	unlock_kernel();
+#endif
+	if (*debug & DEBUG_MSG_THREAD)
+		printk(KERN_DEBUG "mISDNStackd %s started\n", st->dev->name);
+
+	if (st->notify != NULL) {
+		complete(st->notify);
+		st->notify = NULL;
+	}
+
+	for (;;) {
+		struct sk_buff	*skb;
+
+		if (unlikely(test_bit(mISDN_STACK_STOPPED, &st->status))) {
+			test_and_clear_bit(mISDN_STACK_WORK, &st->status);
+			test_and_clear_bit(mISDN_STACK_RUNNING, &st->status);
+		} else
+			test_and_set_bit(mISDN_STACK_RUNNING, &st->status);
+		while (test_bit(mISDN_STACK_WORK, &st->status)) {
+			skb = skb_dequeue(&st->msgq);
+			if (!skb) {
+				test_and_clear_bit(mISDN_STACK_WORK,
+					&st->status);
+				/* test if a race happens */
+				skb = skb_dequeue(&st->msgq);
+				if (!skb)
+					continue;
+				test_and_set_bit(mISDN_STACK_WORK,
+				    &st->status);
+			}
+#ifdef MISDN_MSG_STATS
+			st->msg_cnt++;
+#endif
+			err = send_msg_to_layer(st, skb);
+			if (unlikely(err)) {
+				if (*debug & DEBUG_SEND_ERR)
+					printk(KERN_DEBUG
+					    "%s: %s prim(%x) id(%x) "
+					    "send call(%d)\n",
+					    __func__, st->dev->name,
+					    mISDN_HEAD_PRIM(skb),
+					    mISDN_HEAD_ID(skb), err);
+				dev_kfree_skb(skb);
+				continue;
+			}
+			if (unlikely(test_bit(mISDN_STACK_STOPPED,
+			    &st->status))) {
+				test_and_clear_bit(mISDN_STACK_WORK,
+				    &st->status);
+				test_and_clear_bit(mISDN_STACK_RUNNING,
+				    &st->status);
+				break;
+			}
+		}
+		if (test_bit(mISDN_STACK_CLEARING, &st->status)) {
+			test_and_set_bit(mISDN_STACK_STOPPED, &st->status);
+			test_and_clear_bit(mISDN_STACK_RUNNING, &st->status);
+			do_clear_stack(st);
+			test_and_clear_bit(mISDN_STACK_CLEARING, &st->status);
+			test_and_set_bit(mISDN_STACK_RESTART, &st->status);
+		}
+		if (test_and_clear_bit(mISDN_STACK_RESTART, &st->status)) {
+			test_and_clear_bit(mISDN_STACK_STOPPED, &st->status);
+			test_and_set_bit(mISDN_STACK_RUNNING, &st->status);
+			if (!skb_queue_empty(&st->msgq))
+				test_and_set_bit(mISDN_STACK_WORK,
+				    &st->status);
+		}
+		if (test_bit(mISDN_STACK_ABORT, &st->status))
+			break;
+		if (st->notify != NULL) {
+			complete(st->notify);
+			st->notify = NULL;
+		}
+#ifdef MISDN_MSG_STATS
+		st->sleep_cnt++;
+#endif
+		test_and_clear_bit(mISDN_STACK_ACTIVE, &st->status);
+		wait_event_interruptible(st->workq, (st->status &
+		    mISDN_STACK_ACTION_MASK));
+		if (*debug & DEBUG_MSG_THREAD)
+			printk(KERN_DEBUG "%s: %s wake status %08lx\n",
+			    __func__, st->dev->name, st->status);
+		test_and_set_bit(mISDN_STACK_ACTIVE, &st->status);
+
+		test_and_clear_bit(mISDN_STACK_WAKEUP, &st->status);
+
+		if (test_bit(mISDN_STACK_STOPPED, &st->status)) {
+			test_and_clear_bit(mISDN_STACK_RUNNING, &st->status);
+#ifdef MISDN_MSG_STATS
+			st->stopped_cnt++;
+#endif
+		}
+	}
+#ifdef MISDN_MSG_STATS
+	printk(KERN_DEBUG "mISDNStackd daemon for %s proceed %d "
+	    "msg %d sleep %d stopped\n",
+	    st->dev->name, st->msg_cnt, st->sleep_cnt, st->stopped_cnt);
+	printk(KERN_DEBUG
+	    "mISDNStackd daemon for %s utime(%ld) stime(%ld)\n",
+	    st->dev->name, st->thread->utime, st->thread->stime);
+	printk(KERN_DEBUG
+	    "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n",
+	    st->dev->name, st->thread->nvcsw, st->thread->nivcsw);
+	printk(KERN_DEBUG "mISDNStackd daemon for %s killed now\n",
+	    st->dev->name);
+#endif
+	test_and_set_bit(mISDN_STACK_KILLED, &st->status);
+	test_and_clear_bit(mISDN_STACK_RUNNING, &st->status);
+	test_and_clear_bit(mISDN_STACK_ACTIVE, &st->status);
+	test_and_clear_bit(mISDN_STACK_ABORT, &st->status);
+	skb_queue_purge(&st->msgq);
+	st->thread = NULL;
+	if (st->notify != NULL) {
+		complete(st->notify);
+		st->notify = NULL;
+	}
+	return 0;
+}
+
+static int
+l1_receive(struct mISDNchannel *ch, struct sk_buff *skb)
+{
+	if (!ch->st)
+		return -ENODEV;
+	__net_timestamp(skb);
+	_queue_message(ch->st, skb);
+	return 0;
+}
+
+void
+set_channel_address(struct mISDNchannel *ch, u_int sapi, u_int tei)
+{
+	ch->addr = sapi | (tei << 8);
+}
+
+void
+__add_layer2(struct mISDNchannel *ch, struct mISDNstack *st)
+{
+	list_add_tail(&ch->list, &st->layer2);
+}
+
+void
+add_layer2(struct mISDNchannel *ch, struct mISDNstack *st)
+{
+	mutex_lock(&st->lmutex);
+	__add_layer2(ch, st);
+	mutex_unlock(&st->lmutex);
+}
+
+static int
+st_own_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
+{
+	if (!ch->st || ch->st->layer1)
+		return -EINVAL;
+	return ch->st->layer1->ctrl(ch->st->layer1, cmd, arg);
+}
+
+int
+create_stack(struct mISDNdevice *dev)
+{
+	struct mISDNstack	*newst;
+	int			err;
+	DECLARE_COMPLETION_ONSTACK(done);
+
+	newst = kzalloc(sizeof(struct mISDNstack), GFP_KERNEL);
+	if (!newst) {
+		printk(KERN_ERR "kmalloc mISDN_stack failed\n");
+		return -ENOMEM;
+	}
+	newst->dev = dev;
+	INIT_LIST_HEAD(&newst->layer2);
+	INIT_HLIST_HEAD(&newst->l1sock.head);
+	rwlock_init(&newst->l1sock.lock);
+	init_waitqueue_head(&newst->workq);
+	skb_queue_head_init(&newst->msgq);
+	mutex_init(&newst->lmutex);
+	dev->D.st = newst;
+	err = create_teimanager(dev);
+	if (err) {
+		printk(KERN_ERR "kmalloc teimanager failed\n");
+		kfree(newst);
+		return err;
+	}
+	dev->teimgr->peer = &newst->own;
+	dev->teimgr->recv = mISDN_queue_message;
+	dev->teimgr->st = newst;
+	newst->layer1 = &dev->D;
+	dev->D.recv = l1_receive;
+	dev->D.peer = &newst->own;
+	newst->own.st = newst;
+	newst->own.ctrl = st_own_ctrl;
+	newst->own.send = mISDN_queue_message;
+	newst->own.recv = mISDN_queue_message;
+	if (*debug & DEBUG_CORE_FUNC)
+		printk(KERN_DEBUG "%s: st(%s)\n", __func__, newst->dev->name);
+	newst->notify = &done;
+	newst->thread = kthread_run(mISDNStackd, (void *)newst, "mISDN_%s",
+		newst->dev->name);
+	if (IS_ERR(newst->thread)) {
+		err = PTR_ERR(newst->thread);
+		printk(KERN_ERR
+			"mISDN:cannot create kernel thread for %s (%d)\n",
+			newst->dev->name, err);
+		delete_teimanager(dev->teimgr);
+		kfree(newst);
+	} else
+		wait_for_completion(&done);
+	return err;
+}
+
+int
+connect_layer1(struct mISDNdevice *dev, struct mISDNchannel *ch,
+		u_int protocol, struct sockaddr_mISDN *adr)
+{
+	struct mISDN_sock	*msk = container_of(ch, struct mISDN_sock, ch);
+	struct channel_req	rq;
+	int			err;
+
+
+	if (*debug &  DEBUG_CORE_FUNC)
+		printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n",
+			__func__, dev->name, protocol, adr->dev, adr->channel,
+			 adr->sapi, adr->tei);
+	switch (protocol) {
+	case ISDN_P_NT_S0:
+	case ISDN_P_NT_E1:
+	case ISDN_P_TE_S0:
+	case ISDN_P_TE_E1:
+#ifdef PROTOCOL_CHECK
+		/* this should be enhanced */
+		if (!list_empty(&dev->D.st->layer2)
+			&& dev->D.protocol != protocol)
+			return -EBUSY;
+		if (!hlist_empty(&dev->D.st->l1sock.head)
+			&& dev->D.protocol != protocol)
+			return -EBUSY;
+#endif
+		ch->recv = mISDN_queue_message;
+		ch->peer = &dev->D.st->own;
+		ch->st = dev->D.st;
+		rq.protocol = protocol;
+		rq.adr.channel = 0;
+		err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq);
+		printk(KERN_DEBUG "%s: ret 1 %d\n", __func__, err);
+		if (err)
+			return err;
+		write_lock_bh(&dev->D.st->l1sock.lock);
+		sk_add_node(&msk->sk, &dev->D.st->l1sock.head);
+		write_unlock_bh(&dev->D.st->l1sock.lock);
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+	return 0;
+}
+
+int
+connect_Bstack(struct mISDNdevice *dev, struct mISDNchannel *ch,
+    u_int protocol, struct sockaddr_mISDN *adr)
+{
+	struct channel_req	rq, rq2;
+	int			pmask, err;
+	struct Bprotocol	*bp;
+
+	if (*debug &  DEBUG_CORE_FUNC)
+		printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n",
+			__func__, dev->name, protocol,
+			adr->dev, adr->channel, adr->sapi,
+			adr->tei);
+	ch->st = dev->D.st;
+	pmask = 1 << (protocol & ISDN_P_B_MASK);
+	if (pmask & dev->Bprotocols) {
+		rq.protocol = protocol;
+		rq.adr = *adr;
+		err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq);
+		if (err)
+			return err;
+		ch->recv = rq.ch->send;
+		ch->peer = rq.ch;
+		rq.ch->recv = ch->send;
+		rq.ch->peer = ch;
+		rq.ch->st = dev->D.st;
+	} else {
+		bp = get_Bprotocol4mask(pmask);
+		if (!bp)
+			return -ENOPROTOOPT;
+		rq2.protocol = protocol;
+		rq2.adr = *adr;
+		rq2.ch = ch;
+		err = bp->create(&rq2);
+		if (err)
+			return err;
+		ch->recv = rq2.ch->send;
+		ch->peer = rq2.ch;
+		rq2.ch->st = dev->D.st;
+		rq.protocol = rq2.protocol;
+		rq.adr = *adr;
+		err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq);
+		if (err) {
+			rq2.ch->ctrl(rq2.ch, CLOSE_CHANNEL, NULL);
+			return err;
+		}
+		rq2.ch->recv = rq.ch->send;
+		rq2.ch->peer = rq.ch;
+		rq.ch->recv = rq2.ch->send;
+		rq.ch->peer = rq2.ch;
+		rq.ch->st = dev->D.st;
+	}
+	ch->protocol = protocol;
+	ch->nr = rq.ch->nr;
+	return 0;
+}
+
+int
+create_l2entity(struct mISDNdevice *dev, struct mISDNchannel *ch,
+    u_int protocol, struct sockaddr_mISDN *adr)
+{
+	struct channel_req	rq;
+	int			err;
+
+	if (*debug &  DEBUG_CORE_FUNC)
+		printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n",
+			__func__, dev->name, protocol,
+			adr->dev, adr->channel, adr->sapi,
+			adr->tei);
+	rq.protocol = ISDN_P_TE_S0;
+	if (dev->Dprotocols & (1 << ISDN_P_TE_E1))
+		rq.protocol = ISDN_P_TE_E1;
+	switch (protocol) {
+	case ISDN_P_LAPD_NT:
+		rq.protocol = ISDN_P_NT_S0;
+		if (dev->Dprotocols & (1 << ISDN_P_NT_E1))
+			rq.protocol = ISDN_P_NT_E1;
+	case ISDN_P_LAPD_TE:
+#ifdef PROTOCOL_CHECK
+		/* this should be enhanced */
+		if (!list_empty(&dev->D.st->layer2)
+			&& dev->D.protocol != protocol)
+			return -EBUSY;
+		if (!hlist_empty(&dev->D.st->l1sock.head)
+			&& dev->D.protocol != protocol)
+			return -EBUSY;
+#endif
+		ch->recv = mISDN_queue_message;
+		ch->peer = &dev->D.st->own;
+		ch->st = dev->D.st;
+		rq.adr.channel = 0;
+		err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq);
+		printk(KERN_DEBUG "%s: ret 1 %d\n", __func__, err);
+		if (err)
+			break;
+		rq.protocol = protocol;
+		rq.adr = *adr;
+		rq.ch = ch;
+		err = dev->teimgr->ctrl(dev->teimgr, OPEN_CHANNEL, &rq);
+		printk(KERN_DEBUG "%s: ret 2 %d\n", __func__, err);
+		if (!err) {
+			if ((protocol == ISDN_P_LAPD_NT) && !rq.ch)
+				break;
+			add_layer2(rq.ch, dev->D.st);
+			rq.ch->recv = mISDN_queue_message;
+			rq.ch->peer = &dev->D.st->own;
+			rq.ch->ctrl(rq.ch, OPEN_CHANNEL, NULL); /* can't fail */
+		}
+		break;
+	default:
+		err = -EPROTONOSUPPORT;
+	}
+	return err;
+}
+
+void
+delete_channel(struct mISDNchannel *ch)
+{
+	struct mISDN_sock	*msk = container_of(ch, struct mISDN_sock, ch);
+	struct mISDNchannel	*pch;
+
+	if (!ch->st) {
+		printk(KERN_WARNING "%s: no stack\n", __func__);
+		return;
+	}
+	if (*debug & DEBUG_CORE_FUNC)
+		printk(KERN_DEBUG "%s: st(%s) protocol(%x)\n", __func__,
+		    ch->st->dev->name, ch->protocol);
+	if (ch->protocol >= ISDN_P_B_START) {
+		if (ch->peer) {
+			ch->peer->ctrl(ch->peer, CLOSE_CHANNEL, NULL);
+			ch->peer = NULL;
+		}
+		return;
+	}
+	switch (ch->protocol) {
+	case ISDN_P_NT_S0:
+	case ISDN_P_TE_S0:
+	case ISDN_P_NT_E1:
+	case ISDN_P_TE_E1:
+		write_lock_bh(&ch->st->l1sock.lock);
+		sk_del_node_init(&msk->sk);
+		write_unlock_bh(&ch->st->l1sock.lock);
+		ch->st->dev->D.ctrl(&ch->st->dev->D, CLOSE_CHANNEL, NULL);
+		break;
+	case ISDN_P_LAPD_TE:
+		pch = get_channel4id(ch->st, ch->nr);
+		if (pch) {
+			mutex_lock(&ch->st->lmutex);
+			list_del(&pch->list);
+			mutex_unlock(&ch->st->lmutex);
+			pch->ctrl(pch, CLOSE_CHANNEL, NULL);
+			pch = ch->st->dev->teimgr;
+			pch->ctrl(pch, CLOSE_CHANNEL, NULL);
+		} else
+			printk(KERN_WARNING "%s: no l2 channel\n",
+			    __func__);
+		break;
+	case ISDN_P_LAPD_NT:
+		pch = ch->st->dev->teimgr;
+		if (pch) {
+			pch->ctrl(pch, CLOSE_CHANNEL, NULL);
+		} else
+			printk(KERN_WARNING "%s: no l2 channel\n",
+			    __func__);
+		break;
+	default:
+		break;
+	}
+	return;
+}
+
+void
+delete_stack(struct mISDNdevice *dev)
+{
+	struct mISDNstack	*st = dev->D.st;
+	DECLARE_COMPLETION_ONSTACK(done);
+
+	if (*debug & DEBUG_CORE_FUNC)
+		printk(KERN_DEBUG "%s: st(%s)\n", __func__,
+		    st->dev->name);
+	if (dev->teimgr)
+		delete_teimanager(dev->teimgr);
+	if (st->thread) {
+		if (st->notify) {
+			printk(KERN_WARNING "%s: notifier in use\n",
+			    __func__);
+				complete(st->notify);
+		}
+		st->notify = &done;
+		test_and_set_bit(mISDN_STACK_ABORT, &st->status);
+		test_and_set_bit(mISDN_STACK_WAKEUP, &st->status);
+		wake_up_interruptible(&st->workq);
+		wait_for_completion(&done);
+	}
+	if (!list_empty(&st->layer2))
+		printk(KERN_WARNING "%s: layer2 list not empty\n",
+		    __func__);
+	if (!hlist_empty(&st->l1sock.head))
+		printk(KERN_WARNING "%s: layer1 list not empty\n",
+		    __func__);
+	kfree(st);
+}
+
+void
+mISDN_initstack(u_int *dp)
+{
+	debug = dp;
+}
diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c
new file mode 100644
index 000000000000..56a76a0ffddd
--- /dev/null
+++ b/drivers/isdn/mISDN/tei.c
@@ -0,0 +1,1340 @@
+/*
+ *
+ * Author	Karsten Keil <kkeil@novell.com>
+ *
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "layer2.h"
+#include <linux/random.h>
+#include "core.h"
+
+#define ID_REQUEST	1
+#define ID_ASSIGNED	2
+#define ID_DENIED	3
+#define ID_CHK_REQ	4
+#define ID_CHK_RES	5
+#define ID_REMOVE	6
+#define ID_VERIFY	7
+
+#define TEI_ENTITY_ID	0xf
+
+#define MGR_PH_ACTIVE	16
+#define MGR_PH_NOTREADY	17
+
+#define DATIMER_VAL	10000
+
+static 	u_int	*debug;
+
+static struct Fsm deactfsm = {NULL, 0, 0, NULL, NULL};
+static struct Fsm teifsmu = {NULL, 0, 0, NULL, NULL};
+static struct Fsm teifsmn = {NULL, 0, 0, NULL, NULL};
+
+enum {
+	ST_L1_DEACT,
+	ST_L1_DEACT_PENDING,
+	ST_L1_ACTIV,
+};
+#define DEACT_STATE_COUNT (ST_L1_ACTIV+1)
+
+static char *strDeactState[] =
+{
+	"ST_L1_DEACT",
+	"ST_L1_DEACT_PENDING",
+	"ST_L1_ACTIV",
+};
+
+enum {
+	EV_ACTIVATE,
+	EV_ACTIVATE_IND,
+	EV_DEACTIVATE,
+	EV_DEACTIVATE_IND,
+	EV_UI,
+	EV_DATIMER,
+};
+
+#define DEACT_EVENT_COUNT (EV_DATIMER+1)
+
+static char *strDeactEvent[] =
+{
+	"EV_ACTIVATE",
+	"EV_ACTIVATE_IND",
+	"EV_DEACTIVATE",
+	"EV_DEACTIVATE_IND",
+	"EV_UI",
+	"EV_DATIMER",
+};
+
+static void
+da_debug(struct FsmInst *fi, char *fmt, ...)
+{
+	struct manager	*mgr = fi->userdata;
+	va_list va;
+
+	if (!(*debug & DEBUG_L2_TEIFSM))
+		return;
+	va_start(va, fmt);
+	printk(KERN_DEBUG "mgr(%d): ", mgr->ch.st->dev->id);
+	vprintk(fmt, va);
+	printk("\n");
+	va_end(va);
+}
+
+static void
+da_activate(struct FsmInst *fi, int event, void *arg)
+{
+	struct manager	*mgr = fi->userdata;
+
+	if (fi->state == ST_L1_DEACT_PENDING)
+		mISDN_FsmDelTimer(&mgr->datimer, 1);
+	mISDN_FsmChangeState(fi, ST_L1_ACTIV);
+}
+
+static void
+da_deactivate_ind(struct FsmInst *fi, int event, void *arg)
+{
+	mISDN_FsmChangeState(fi, ST_L1_DEACT);
+}
+
+static void
+da_deactivate(struct FsmInst *fi, int event, void *arg)
+{
+	struct manager	*mgr = fi->userdata;
+	struct layer2	*l2;
+	u_long		flags;
+
+	read_lock_irqsave(&mgr->lock, flags);
+	list_for_each_entry(l2, &mgr->layer2, list) {
+		if (l2->l2m.state > ST_L2_4) {
+			/* have still activ TEI */
+			read_unlock_irqrestore(&mgr->lock, flags);
+			return;
+		}
+	}
+	read_unlock_irqrestore(&mgr->lock, flags);
+	/* All TEI are inactiv */
+	mISDN_FsmAddTimer(&mgr->datimer, DATIMER_VAL, EV_DATIMER, NULL, 1);
+	mISDN_FsmChangeState(fi, ST_L1_DEACT_PENDING);
+}
+
+static void
+da_ui(struct FsmInst *fi, int event, void *arg)
+{
+	struct manager	*mgr = fi->userdata;
+
+	/* restart da timer */
+	mISDN_FsmDelTimer(&mgr->datimer, 2);
+	mISDN_FsmAddTimer(&mgr->datimer, DATIMER_VAL, EV_DATIMER, NULL, 2);
+
+}
+
+static void
+da_timer(struct FsmInst *fi, int event, void *arg)
+{
+	struct manager	*mgr = fi->userdata;
+	struct layer2	*l2;
+	u_long		flags;
+
+	/* check again */
+	read_lock_irqsave(&mgr->lock, flags);
+	list_for_each_entry(l2, &mgr->layer2, list) {
+		if (l2->l2m.state > ST_L2_4) {
+			/* have still activ TEI */
+			read_unlock_irqrestore(&mgr->lock, flags);
+			mISDN_FsmChangeState(fi, ST_L1_ACTIV);
+			return;
+		}
+	}
+	read_unlock_irqrestore(&mgr->lock, flags);
+	/* All TEI are inactiv */
+	mISDN_FsmChangeState(fi, ST_L1_DEACT);
+	_queue_data(&mgr->ch, PH_DEACTIVATE_REQ, MISDN_ID_ANY, 0, NULL,
+	    GFP_ATOMIC);
+}
+
+static struct FsmNode DeactFnList[] =
+{
+	{ST_L1_DEACT, EV_ACTIVATE_IND, da_activate},
+	{ST_L1_ACTIV, EV_DEACTIVATE_IND, da_deactivate_ind},
+	{ST_L1_ACTIV, EV_DEACTIVATE, da_deactivate},
+	{ST_L1_DEACT_PENDING, EV_ACTIVATE, da_activate},
+	{ST_L1_DEACT_PENDING, EV_UI, da_ui},
+	{ST_L1_DEACT_PENDING, EV_DATIMER, da_timer},
+};
+
+enum {
+	ST_TEI_NOP,
+	ST_TEI_IDREQ,
+	ST_TEI_IDVERIFY,
+};
+
+#define TEI_STATE_COUNT (ST_TEI_IDVERIFY+1)
+
+static char *strTeiState[] =
+{
+	"ST_TEI_NOP",
+	"ST_TEI_IDREQ",
+	"ST_TEI_IDVERIFY",
+};
+
+enum {
+	EV_IDREQ,
+	EV_ASSIGN,
+	EV_ASSIGN_REQ,
+	EV_DENIED,
+	EV_CHKREQ,
+	EV_CHKRESP,
+	EV_REMOVE,
+	EV_VERIFY,
+	EV_TIMER,
+};
+
+#define TEI_EVENT_COUNT (EV_TIMER+1)
+
+static char *strTeiEvent[] =
+{
+	"EV_IDREQ",
+	"EV_ASSIGN",
+	"EV_ASSIGN_REQ",
+	"EV_DENIED",
+	"EV_CHKREQ",
+	"EV_CHKRESP",
+	"EV_REMOVE",
+	"EV_VERIFY",
+	"EV_TIMER",
+};
+
+static void
+tei_debug(struct FsmInst *fi, char *fmt, ...)
+{
+	struct teimgr	*tm = fi->userdata;
+	va_list va;
+
+	if (!(*debug & DEBUG_L2_TEIFSM))
+		return;
+	va_start(va, fmt);
+	printk(KERN_DEBUG "tei(%d): ", tm->l2->tei);
+	vprintk(fmt, va);
+	printk("\n");
+	va_end(va);
+}
+
+
+
+static int
+get_free_id(struct manager *mgr)
+{
+	u64		ids = 0;
+	int		i;
+	struct layer2	*l2;
+
+	list_for_each_entry(l2, &mgr->layer2, list) {
+		if (l2->ch.nr > 63) {
+			printk(KERN_WARNING
+			    "%s: more as 63 layer2 for one device\n",
+			    __func__);
+			return -EBUSY;
+		}
+		test_and_set_bit(l2->ch.nr, (u_long *)&ids);
+	}
+	for (i = 1; i < 64; i++)
+		if (!test_bit(i, (u_long *)&ids))
+			return i;
+	printk(KERN_WARNING "%s: more as 63 layer2 for one device\n",
+	    __func__);
+	return -EBUSY;
+}
+
+static int
+get_free_tei(struct manager *mgr)
+{
+	u64		ids = 0;
+	int		i;
+	struct layer2	*l2;
+
+	list_for_each_entry(l2, &mgr->layer2, list) {
+		if (l2->ch.nr == 0)
+			continue;
+		if ((l2->ch.addr & 0xff) != 0)
+			continue;
+		i = l2->ch.addr >> 8;
+		if (i < 64)
+			continue;
+		i -= 64;
+
+		test_and_set_bit(i, (u_long *)&ids);
+	}
+	for (i = 0; i < 64; i++)
+		if (!test_bit(i, (u_long *)&ids))
+			return i + 64;
+	printk(KERN_WARNING "%s: more as 63 dynamic tei for one device\n",
+	    __func__);
+	return -1;
+}
+
+static void
+teiup_create(struct manager *mgr, u_int prim, int len, void *arg)
+{
+	struct sk_buff	*skb;
+	struct mISDNhead *hh;
+	int		err;
+
+	skb = mI_alloc_skb(len, GFP_ATOMIC);
+	if (!skb)
+		return;
+	hh = mISDN_HEAD_P(skb);
+	hh->prim = prim;
+	hh->id = (mgr->ch.nr << 16) | mgr->ch.addr;
+	if (len)
+		memcpy(skb_put(skb, len), arg, len);
+	err = mgr->up->send(mgr->up, skb);
+	if (err) {
+		printk(KERN_WARNING "%s: err=%d\n", __func__, err);
+		dev_kfree_skb(skb);
+	}
+}
+
+static u_int
+new_id(struct manager *mgr)
+{
+	u_int	id;
+
+	id = mgr->nextid++;
+	if (id == 0x7fff)
+		mgr->nextid = 1;
+	id <<= 16;
+	id |= GROUP_TEI << 8;
+	id |= TEI_SAPI;
+	return id;
+}
+
+static void
+do_send(struct manager *mgr)
+{
+	if (!test_bit(MGR_PH_ACTIVE, &mgr->options))
+		return;
+
+	if (!test_and_set_bit(MGR_PH_NOTREADY, &mgr->options)) {
+		struct sk_buff	*skb = skb_dequeue(&mgr->sendq);
+
+		if (!skb) {
+			test_and_clear_bit(MGR_PH_NOTREADY, &mgr->options);
+			return;
+		}
+		mgr->lastid = mISDN_HEAD_ID(skb);
+		mISDN_FsmEvent(&mgr->deact, EV_UI, NULL);
+		if (mgr->ch.recv(mgr->ch.peer, skb)) {
+			dev_kfree_skb(skb);
+			test_and_clear_bit(MGR_PH_NOTREADY, &mgr->options);
+			mgr->lastid = MISDN_ID_NONE;
+		}
+	}
+}
+
+static void
+do_ack(struct manager *mgr, u_int id)
+{
+	if (test_bit(MGR_PH_NOTREADY, &mgr->options)) {
+		if (id == mgr->lastid) {
+			if (test_bit(MGR_PH_ACTIVE, &mgr->options)) {
+				struct sk_buff	*skb;
+
+				skb = skb_dequeue(&mgr->sendq);
+				if (skb) {
+					mgr->lastid = mISDN_HEAD_ID(skb);
+					if (!mgr->ch.recv(mgr->ch.peer, skb))
+						return;
+					dev_kfree_skb(skb);
+				}
+			}
+			mgr->lastid = MISDN_ID_NONE;
+			test_and_clear_bit(MGR_PH_NOTREADY, &mgr->options);
+		}
+	}
+}
+
+static void
+mgr_send_down(struct manager *mgr, struct sk_buff *skb)
+{
+	skb_queue_tail(&mgr->sendq, skb);
+	if (!test_bit(MGR_PH_ACTIVE, &mgr->options)) {
+		_queue_data(&mgr->ch, PH_ACTIVATE_REQ, MISDN_ID_ANY, 0,
+		    NULL, GFP_KERNEL);
+	} else {
+		do_send(mgr);
+	}
+}
+
+static int
+dl_unit_data(struct manager *mgr, struct sk_buff *skb)
+{
+	if (!test_bit(MGR_OPT_NETWORK, &mgr->options)) /* only net send UI */
+		return -EINVAL;
+	if (!test_bit(MGR_PH_ACTIVE, &mgr->options))
+		_queue_data(&mgr->ch, PH_ACTIVATE_REQ, MISDN_ID_ANY, 0,
+		    NULL, GFP_KERNEL);
+	skb_push(skb, 3);
+	skb->data[0] = 0x02; /* SAPI 0 C/R = 1 */
+	skb->data[1] = 0xff; /* TEI 127 */
+	skb->data[2] = UI;   /* UI frame */
+	mISDN_HEAD_PRIM(skb) = PH_DATA_REQ;
+	mISDN_HEAD_ID(skb) = new_id(mgr);
+	skb_queue_tail(&mgr->sendq, skb);
+	do_send(mgr);
+	return 0;
+}
+
+unsigned int
+random_ri(void)
+{
+	u16 x;
+
+	get_random_bytes(&x, sizeof(x));
+	return x;
+}
+
+static struct layer2 *
+findtei(struct manager *mgr, int tei)
+{
+	struct layer2	*l2;
+	u_long		flags;
+
+	read_lock_irqsave(&mgr->lock, flags);
+	list_for_each_entry(l2, &mgr->layer2, list) {
+		if ((l2->sapi == 0) && (l2->tei > 0) &&
+		    (l2->tei != GROUP_TEI) && (l2->tei == tei))
+			goto done;
+	}
+	l2 = NULL;
+done:
+	read_unlock_irqrestore(&mgr->lock, flags);
+	return l2;
+}
+
+static void
+put_tei_msg(struct manager *mgr, u_char m_id, unsigned int ri, u_char tei)
+{
+	struct sk_buff *skb;
+	u_char bp[8];
+
+	bp[0] = (TEI_SAPI << 2);
+	if (test_bit(MGR_OPT_NETWORK, &mgr->options))
+		bp[0] |= 2; /* CR:=1 for net command */
+	bp[1] = (GROUP_TEI << 1) | 0x1;
+	bp[2] = UI;
+	bp[3] = TEI_ENTITY_ID;
+	bp[4] = ri >> 8;
+	bp[5] = ri & 0xff;
+	bp[6] = m_id;
+	bp[7] = (tei << 1) | 1;
+	skb = _alloc_mISDN_skb(PH_DATA_REQ, new_id(mgr),
+	    8, bp, GFP_ATOMIC);
+	if (!skb) {
+		printk(KERN_WARNING "%s: no skb for tei msg\n", __func__);
+		return;
+	}
+	mgr_send_down(mgr, skb);
+}
+
+static void
+tei_id_request(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr *tm = fi->userdata;
+
+	if (tm->l2->tei != GROUP_TEI) {
+		tm->tei_m.printdebug(&tm->tei_m,
+			"assign request for allready assigned tei %d",
+			tm->l2->tei);
+		return;
+	}
+	tm->ri = random_ri();
+	if (*debug & DEBUG_L2_TEI)
+		tm->tei_m.printdebug(&tm->tei_m,
+			"assign request ri %d", tm->ri);
+	put_tei_msg(tm->mgr, ID_REQUEST, tm->ri, GROUP_TEI);
+	mISDN_FsmChangeState(fi, ST_TEI_IDREQ);
+	mISDN_FsmAddTimer(&tm->timer, tm->tval, EV_TIMER, NULL, 1);
+	tm->nval = 3;
+}
+
+static void
+tei_id_assign(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr	*tm = fi->userdata;
+	struct layer2	*l2;
+	u_char *dp = arg;
+	int ri, tei;
+
+	ri = ((unsigned int) *dp++ << 8);
+	ri += *dp++;
+	dp++;
+	tei = *dp >> 1;
+	if (*debug & DEBUG_L2_TEI)
+		tm->tei_m.printdebug(fi, "identity assign ri %d tei %d",
+			ri, tei);
+	l2 = findtei(tm->mgr, tei);
+	if (l2) {	/* same tei is in use */
+		if (ri != l2->tm->ri) {
+			tm->tei_m.printdebug(fi,
+				"possible duplicate assignment tei %d", tei);
+			tei_l2(l2, MDL_ERROR_RSP, 0);
+		}
+	} else if (ri == tm->ri) {
+		mISDN_FsmDelTimer(&tm->timer, 1);
+		mISDN_FsmChangeState(fi, ST_TEI_NOP);
+		tei_l2(tm->l2, MDL_ASSIGN_REQ, tei);
+	}
+}
+
+static void
+tei_id_test_dup(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr	*tm = fi->userdata;
+	struct layer2	*l2;
+	u_char *dp = arg;
+	int tei, ri;
+
+	ri = ((unsigned int) *dp++ << 8);
+	ri += *dp++;
+	dp++;
+	tei = *dp >> 1;
+	if (*debug & DEBUG_L2_TEI)
+		tm->tei_m.printdebug(fi, "foreign identity assign ri %d tei %d",
+			ri, tei);
+	l2 = findtei(tm->mgr, tei);
+	if (l2) {	/* same tei is in use */
+		if (ri != l2->tm->ri) {	/* and it wasn't our request */
+			tm->tei_m.printdebug(fi,
+				"possible duplicate assignment tei %d", tei);
+			mISDN_FsmEvent(&l2->tm->tei_m, EV_VERIFY, NULL);
+		}
+	}
+}
+
+static void
+tei_id_denied(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr *tm = fi->userdata;
+	u_char *dp = arg;
+	int ri, tei;
+
+	ri = ((unsigned int) *dp++ << 8);
+	ri += *dp++;
+	dp++;
+	tei = *dp >> 1;
+	if (*debug & DEBUG_L2_TEI)
+		tm->tei_m.printdebug(fi, "identity denied ri %d tei %d",
+			ri, tei);
+}
+
+static void
+tei_id_chk_req(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr *tm = fi->userdata;
+	u_char *dp = arg;
+	int tei;
+
+	tei = *(dp+3) >> 1;
+	if (*debug & DEBUG_L2_TEI)
+		tm->tei_m.printdebug(fi, "identity check req tei %d", tei);
+	if ((tm->l2->tei != GROUP_TEI) && ((tei == GROUP_TEI) ||
+	    (tei == tm->l2->tei))) {
+		mISDN_FsmDelTimer(&tm->timer, 4);
+		mISDN_FsmChangeState(&tm->tei_m, ST_TEI_NOP);
+		put_tei_msg(tm->mgr, ID_CHK_RES, random_ri(), tm->l2->tei);
+	}
+}
+
+static void
+tei_id_remove(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr *tm = fi->userdata;
+	u_char *dp = arg;
+	int tei;
+
+	tei = *(dp+3) >> 1;
+	if (*debug & DEBUG_L2_TEI)
+		tm->tei_m.printdebug(fi, "identity remove tei %d", tei);
+	if ((tm->l2->tei != GROUP_TEI) &&
+	    ((tei == GROUP_TEI) || (tei == tm->l2->tei))) {
+		mISDN_FsmDelTimer(&tm->timer, 5);
+		mISDN_FsmChangeState(&tm->tei_m, ST_TEI_NOP);
+		tei_l2(tm->l2, MDL_REMOVE_REQ, 0);
+	}
+}
+
+static void
+tei_id_verify(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr *tm = fi->userdata;
+
+	if (*debug & DEBUG_L2_TEI)
+		tm->tei_m.printdebug(fi, "id verify request for tei %d",
+			tm->l2->tei);
+	put_tei_msg(tm->mgr, ID_VERIFY, 0, tm->l2->tei);
+	mISDN_FsmChangeState(&tm->tei_m, ST_TEI_IDVERIFY);
+	mISDN_FsmAddTimer(&tm->timer, tm->tval, EV_TIMER, NULL, 2);
+	tm->nval = 2;
+}
+
+static void
+tei_id_req_tout(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr *tm = fi->userdata;
+
+	if (--tm->nval) {
+		tm->ri = random_ri();
+		if (*debug & DEBUG_L2_TEI)
+			tm->tei_m.printdebug(fi, "assign req(%d) ri %d",
+				4 - tm->nval, tm->ri);
+		put_tei_msg(tm->mgr, ID_REQUEST, tm->ri, GROUP_TEI);
+		mISDN_FsmAddTimer(&tm->timer, tm->tval, EV_TIMER, NULL, 3);
+	} else {
+		tm->tei_m.printdebug(fi, "assign req failed");
+		tei_l2(tm->l2, MDL_ERROR_RSP, 0);
+		mISDN_FsmChangeState(fi, ST_TEI_NOP);
+	}
+}
+
+static void
+tei_id_ver_tout(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr *tm = fi->userdata;
+
+	if (--tm->nval) {
+		if (*debug & DEBUG_L2_TEI)
+			tm->tei_m.printdebug(fi,
+				"id verify req(%d) for tei %d",
+				3 - tm->nval, tm->l2->tei);
+		put_tei_msg(tm->mgr, ID_VERIFY, 0, tm->l2->tei);
+		mISDN_FsmAddTimer(&tm->timer, tm->tval, EV_TIMER, NULL, 4);
+	} else {
+		tm->tei_m.printdebug(fi, "verify req for tei %d failed",
+			tm->l2->tei);
+		tei_l2(tm->l2, MDL_REMOVE_REQ, 0);
+		mISDN_FsmChangeState(fi, ST_TEI_NOP);
+	}
+}
+
+static struct FsmNode TeiFnListUser[] =
+{
+	{ST_TEI_NOP, EV_IDREQ, tei_id_request},
+	{ST_TEI_NOP, EV_ASSIGN, tei_id_test_dup},
+	{ST_TEI_NOP, EV_VERIFY, tei_id_verify},
+	{ST_TEI_NOP, EV_REMOVE, tei_id_remove},
+	{ST_TEI_NOP, EV_CHKREQ, tei_id_chk_req},
+	{ST_TEI_IDREQ, EV_TIMER, tei_id_req_tout},
+	{ST_TEI_IDREQ, EV_ASSIGN, tei_id_assign},
+	{ST_TEI_IDREQ, EV_DENIED, tei_id_denied},
+	{ST_TEI_IDVERIFY, EV_TIMER, tei_id_ver_tout},
+	{ST_TEI_IDVERIFY, EV_REMOVE, tei_id_remove},
+	{ST_TEI_IDVERIFY, EV_CHKREQ, tei_id_chk_req},
+};
+
+static void
+tei_l2remove(struct layer2 *l2)
+{
+	put_tei_msg(l2->tm->mgr, ID_REMOVE, 0, l2->tei);
+	tei_l2(l2, MDL_REMOVE_REQ, 0);
+	list_del(&l2->ch.list);
+	l2->ch.ctrl(&l2->ch, CLOSE_CHANNEL, NULL);
+}
+
+static void
+tei_assign_req(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr *tm = fi->userdata;
+	u_char *dp = arg;
+
+	if (tm->l2->tei == GROUP_TEI) {
+		tm->tei_m.printdebug(&tm->tei_m,
+			"net tei assign request without tei");
+		return;
+	}
+	tm->ri = ((unsigned int) *dp++ << 8);
+	tm->ri += *dp++;
+	if (*debug & DEBUG_L2_TEI)
+		tm->tei_m.printdebug(&tm->tei_m,
+			"net assign request ri %d teim %d", tm->ri, *dp);
+	put_tei_msg(tm->mgr, ID_ASSIGNED, tm->ri, tm->l2->tei);
+	mISDN_FsmChangeState(fi, ST_TEI_NOP);
+}
+
+static void
+tei_id_chk_req_net(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr	*tm = fi->userdata;
+
+	if (*debug & DEBUG_L2_TEI)
+		tm->tei_m.printdebug(fi, "id check request for tei %d",
+		    tm->l2->tei);
+	tm->rcnt = 0;
+	put_tei_msg(tm->mgr, ID_CHK_REQ, 0, tm->l2->tei);
+	mISDN_FsmChangeState(&tm->tei_m, ST_TEI_IDVERIFY);
+	mISDN_FsmAddTimer(&tm->timer, tm->tval, EV_TIMER, NULL, 2);
+	tm->nval = 2;
+}
+
+static void
+tei_id_chk_resp(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr *tm = fi->userdata;
+	u_char *dp = arg;
+	int tei;
+
+	tei = dp[3] >> 1;
+	if (*debug & DEBUG_L2_TEI)
+		tm->tei_m.printdebug(fi, "identity check resp tei %d", tei);
+	if (tei == tm->l2->tei)
+		tm->rcnt++;
+}
+
+static void
+tei_id_verify_net(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr *tm = fi->userdata;
+	u_char *dp = arg;
+	int tei;
+
+	tei = dp[3] >> 1;
+	if (*debug & DEBUG_L2_TEI)
+		tm->tei_m.printdebug(fi, "identity verify req tei %d/%d",
+		    tei, tm->l2->tei);
+	if (tei == tm->l2->tei)
+		tei_id_chk_req_net(fi, event, arg);
+}
+
+static void
+tei_id_ver_tout_net(struct FsmInst *fi, int event, void *arg)
+{
+	struct teimgr *tm = fi->userdata;
+
+	if (tm->rcnt == 1) {
+		if (*debug & DEBUG_L2_TEI)
+			tm->tei_m.printdebug(fi,
+			    "check req for tei %d sucessful\n", tm->l2->tei);
+		mISDN_FsmChangeState(fi, ST_TEI_NOP);
+	} else if (tm->rcnt > 1) {
+		/* duplicate assignment; remove */
+		tei_l2remove(tm->l2);
+	} else if (--tm->nval) {
+		if (*debug & DEBUG_L2_TEI)
+			tm->tei_m.printdebug(fi,
+				"id check req(%d) for tei %d",
+				3 - tm->nval, tm->l2->tei);
+		put_tei_msg(tm->mgr, ID_CHK_REQ, 0, tm->l2->tei);
+		mISDN_FsmAddTimer(&tm->timer, tm->tval, EV_TIMER, NULL, 4);
+	} else {
+		tm->tei_m.printdebug(fi, "check req for tei %d failed",
+			tm->l2->tei);
+		mISDN_FsmChangeState(fi, ST_TEI_NOP);
+		tei_l2remove(tm->l2);
+	}
+}
+
+static struct FsmNode TeiFnListNet[] =
+{
+	{ST_TEI_NOP, EV_ASSIGN_REQ, tei_assign_req},
+	{ST_TEI_NOP, EV_VERIFY, tei_id_verify_net},
+	{ST_TEI_NOP, EV_CHKREQ, tei_id_chk_req_net},
+	{ST_TEI_IDVERIFY, EV_TIMER, tei_id_ver_tout_net},
+	{ST_TEI_IDVERIFY, EV_CHKRESP, tei_id_chk_resp},
+};
+
+static void
+tei_ph_data_ind(struct teimgr *tm, u_int mt, u_char *dp, int len)
+{
+	if (test_bit(FLG_FIXED_TEI, &tm->l2->flag))
+		return;
+	if (*debug & DEBUG_L2_TEI)
+		tm->tei_m.printdebug(&tm->tei_m, "tei handler mt %x", mt);
+	if (mt == ID_ASSIGNED)
+		mISDN_FsmEvent(&tm->tei_m, EV_ASSIGN, dp);
+	else if (mt == ID_DENIED)
+		mISDN_FsmEvent(&tm->tei_m, EV_DENIED, dp);
+	else if (mt == ID_CHK_REQ)
+		mISDN_FsmEvent(&tm->tei_m, EV_CHKREQ, dp);
+	else if (mt == ID_REMOVE)
+		mISDN_FsmEvent(&tm->tei_m, EV_REMOVE, dp);
+	else if (mt == ID_VERIFY)
+		mISDN_FsmEvent(&tm->tei_m, EV_VERIFY, dp);
+	else if (mt == ID_CHK_RES)
+		mISDN_FsmEvent(&tm->tei_m, EV_CHKRESP, dp);
+}
+
+static struct layer2 *
+create_new_tei(struct manager *mgr, int tei)
+{
+	u_long		opt = 0;
+	u_long		flags;
+	int		id;
+	struct layer2	*l2;
+
+	if (!mgr->up)
+		return NULL;
+	if (tei < 64)
+		test_and_set_bit(OPTION_L2_FIXEDTEI, &opt);
+	if (mgr->ch.st->dev->Dprotocols
+	  & ((1 << ISDN_P_TE_E1) | (1 << ISDN_P_NT_E1)))
+		test_and_set_bit(OPTION_L2_PMX, &opt);
+	l2 = create_l2(mgr->up, ISDN_P_LAPD_NT, (u_int)opt, (u_long)tei);
+	if (!l2) {
+		printk(KERN_WARNING "%s:no memory for layer2\n", __func__);
+		return NULL;
+	}
+	l2->tm = kzalloc(sizeof(struct teimgr), GFP_KERNEL);
+	if (!l2->tm) {
+		kfree(l2);
+		printk(KERN_WARNING "%s:no memory for teimgr\n", __func__);
+		return NULL;
+	}
+	l2->tm->mgr = mgr;
+	l2->tm->l2 = l2;
+	l2->tm->tei_m.debug = *debug & DEBUG_L2_TEIFSM;
+	l2->tm->tei_m.userdata = l2->tm;
+	l2->tm->tei_m.printdebug = tei_debug;
+	l2->tm->tei_m.fsm = &teifsmn;
+	l2->tm->tei_m.state = ST_TEI_NOP;
+	l2->tm->tval = 2000; /* T202  2 sec */
+	mISDN_FsmInitTimer(&l2->tm->tei_m, &l2->tm->timer);
+	write_lock_irqsave(&mgr->lock, flags);
+	id = get_free_id(mgr);
+	list_add_tail(&l2->list, &mgr->layer2);
+	write_unlock_irqrestore(&mgr->lock, flags);
+	if (id < 0) {
+		l2->ch.ctrl(&l2->ch, CLOSE_CHANNEL, NULL);
+		printk(KERN_WARNING "%s:no free id\n", __func__);
+		return NULL;
+	} else {
+		l2->ch.nr = id;
+		__add_layer2(&l2->ch, mgr->ch.st);
+		l2->ch.recv = mgr->ch.recv;
+		l2->ch.peer = mgr->ch.peer;
+		l2->ch.ctrl(&l2->ch, OPEN_CHANNEL, NULL);
+	}
+	return l2;
+}
+
+static void
+new_tei_req(struct manager *mgr, u_char *dp)
+{
+	int		tei, ri;
+	struct layer2	*l2;
+
+	ri = dp[0] << 8;
+	ri += dp[1];
+	if (!mgr->up)
+		goto denied;
+	tei = get_free_tei(mgr);
+	if (tei < 0) {
+		printk(KERN_WARNING "%s:No free tei\n", __func__);
+		goto denied;
+	}
+	l2 = create_new_tei(mgr, tei);
+	if (!l2)
+		goto denied;
+	else
+		mISDN_FsmEvent(&l2->tm->tei_m, EV_ASSIGN_REQ, dp);
+	return;
+denied:
+	put_tei_msg(mgr, ID_DENIED, ri, GROUP_TEI);
+}
+
+static int
+ph_data_ind(struct manager *mgr, struct sk_buff *skb)
+{
+	int		ret = -EINVAL;
+	struct layer2	*l2;
+	u_long		flags;
+	u_char		mt;
+
+	if (skb->len < 8) {
+		if (*debug  & DEBUG_L2_TEI)
+			printk(KERN_DEBUG "%s: short mgr frame %d/8\n",
+			    __func__, skb->len);
+		goto done;
+	}
+	if (*debug  & DEBUG_L2_TEI)
+
+	if ((skb->data[0] >> 2) != TEI_SAPI) /* not for us */
+		goto done;
+	if (skb->data[0] & 1) /* EA0 formal error */
+		goto done;
+	if (!(skb->data[1] & 1)) /* EA1 formal error */
+		goto done;
+	if ((skb->data[1] >> 1) != GROUP_TEI) /* not for us */
+		goto done;
+	if ((skb->data[2] & 0xef) != UI) /* not UI */
+		goto done;
+	if (skb->data[3] != TEI_ENTITY_ID) /* not tei entity */
+		goto done;
+	mt = skb->data[6];
+	switch (mt) {
+	case ID_REQUEST:
+	case ID_CHK_RES:
+	case ID_VERIFY:
+		if (!test_bit(MGR_OPT_NETWORK, &mgr->options))
+			goto done;
+		break;
+	case ID_ASSIGNED:
+	case ID_DENIED:
+	case ID_CHK_REQ:
+	case ID_REMOVE:
+		if (test_bit(MGR_OPT_NETWORK, &mgr->options))
+			goto done;
+		break;
+	default:
+		goto done;
+	}
+	ret = 0;
+	if (mt == ID_REQUEST) {
+		new_tei_req(mgr, &skb->data[4]);
+		goto done;
+	}
+	read_lock_irqsave(&mgr->lock, flags);
+	list_for_each_entry(l2, &mgr->layer2, list) {
+		tei_ph_data_ind(l2->tm, mt, &skb->data[4], skb->len - 4);
+	}
+	read_unlock_irqrestore(&mgr->lock, flags);
+done:
+	return ret;
+}
+
+int
+l2_tei(struct layer2 *l2, u_int cmd, u_long arg)
+{
+	struct teimgr	*tm = l2->tm;
+
+	if (test_bit(FLG_FIXED_TEI, &l2->flag))
+		return 0;
+	if (*debug & DEBUG_L2_TEI)
+		printk(KERN_DEBUG "%s: cmd(%x)\n", __func__, cmd);
+	switch (cmd) {
+	case MDL_ASSIGN_IND:
+		mISDN_FsmEvent(&tm->tei_m, EV_IDREQ, NULL);
+		break;
+	case MDL_ERROR_IND:
+		if (test_bit(MGR_OPT_NETWORK, &tm->mgr->options))
+			mISDN_FsmEvent(&tm->tei_m, EV_CHKREQ, &l2->tei);
+		if (test_bit(MGR_OPT_USER, &tm->mgr->options))
+			mISDN_FsmEvent(&tm->tei_m, EV_VERIFY, NULL);
+		break;
+	case MDL_STATUS_UP_IND:
+		if (test_bit(MGR_OPT_NETWORK, &tm->mgr->options))
+			mISDN_FsmEvent(&tm->mgr->deact, EV_ACTIVATE, NULL);
+		break;
+	case MDL_STATUS_DOWN_IND:
+		if (test_bit(MGR_OPT_NETWORK, &tm->mgr->options))
+			mISDN_FsmEvent(&tm->mgr->deact, EV_DEACTIVATE, NULL);
+		break;
+	case MDL_STATUS_UI_IND:
+		if (test_bit(MGR_OPT_NETWORK, &tm->mgr->options))
+			mISDN_FsmEvent(&tm->mgr->deact, EV_UI, NULL);
+		break;
+	}
+	return 0;
+}
+
+void
+release_tei(struct layer2 *l2)
+{
+	struct teimgr	*tm = l2->tm;
+	u_long		flags;
+
+	mISDN_FsmDelTimer(&tm->timer, 1);
+	write_lock_irqsave(&tm->mgr->lock, flags);
+	list_del(&l2->list);
+	write_unlock_irqrestore(&tm->mgr->lock, flags);
+	l2->tm = NULL;
+	kfree(tm);
+}
+
+static int
+create_teimgr(struct manager *mgr, struct channel_req *crq)
+{
+	struct layer2	*l2;
+	u_long 		opt = 0;
+	u_long		flags;
+	int		id;
+
+	if (*debug & DEBUG_L2_TEI)
+		printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n",
+			__func__, mgr->ch.st->dev->name, crq->protocol,
+			crq->adr.dev, crq->adr.channel, crq->adr.sapi,
+			crq->adr.tei);
+	if (crq->adr.sapi != 0) /* not supported yet */
+		return -EINVAL;
+	if (crq->adr.tei > GROUP_TEI)
+		return -EINVAL;
+	if (crq->adr.tei < 64)
+		test_and_set_bit(OPTION_L2_FIXEDTEI, &opt);
+	if (crq->adr.tei == 0)
+		test_and_set_bit(OPTION_L2_PTP, &opt);
+	if (test_bit(MGR_OPT_NETWORK, &mgr->options)) {
+		if (crq->protocol == ISDN_P_LAPD_TE)
+			return -EPROTONOSUPPORT;
+		if ((crq->adr.tei != 0) && (crq->adr.tei != 127))
+			return -EINVAL;
+		if (mgr->up) {
+			printk(KERN_WARNING
+			    "%s: only one network manager is allowed\n",
+			    __func__);
+			return -EBUSY;
+		}
+	} else if (test_bit(MGR_OPT_USER, &mgr->options)) {
+		if (crq->protocol == ISDN_P_LAPD_NT)
+			return -EPROTONOSUPPORT;
+		if ((crq->adr.tei >= 64) && (crq->adr.tei < GROUP_TEI))
+			return -EINVAL; /* dyn tei */
+	} else {
+		if (crq->protocol == ISDN_P_LAPD_NT)
+			test_and_set_bit(MGR_OPT_NETWORK, &mgr->options);
+		if (crq->protocol == ISDN_P_LAPD_TE)
+			test_and_set_bit(MGR_OPT_USER, &mgr->options);
+	}
+	if (mgr->ch.st->dev->Dprotocols
+	  & ((1 << ISDN_P_TE_E1) | (1 << ISDN_P_NT_E1)))
+		test_and_set_bit(OPTION_L2_PMX, &opt);
+	if ((crq->protocol == ISDN_P_LAPD_NT) && (crq->adr.tei == 127)) {
+		mgr->up = crq->ch;
+		id = DL_INFO_L2_CONNECT;
+		teiup_create(mgr, DL_INFORMATION_IND, sizeof(id), &id);
+		crq->ch = NULL;
+		if (!list_empty(&mgr->layer2)) {
+			read_lock_irqsave(&mgr->lock, flags);
+			list_for_each_entry(l2, &mgr->layer2, list) {
+				l2->up = mgr->up;
+				l2->ch.ctrl(&l2->ch, OPEN_CHANNEL, NULL);
+			}
+			read_unlock_irqrestore(&mgr->lock, flags);
+		}
+		return 0;
+	}
+	l2 = create_l2(crq->ch, crq->protocol, (u_int)opt,
+		(u_long)crq->adr.tei);
+	if (!l2)
+		return -ENOMEM;
+	l2->tm = kzalloc(sizeof(struct teimgr), GFP_KERNEL);
+	if (!l2->tm) {
+		kfree(l2);
+		printk(KERN_ERR "kmalloc teimgr failed\n");
+		return -ENOMEM;
+	}
+	l2->tm->mgr = mgr;
+	l2->tm->l2 = l2;
+	l2->tm->tei_m.debug = *debug & DEBUG_L2_TEIFSM;
+	l2->tm->tei_m.userdata = l2->tm;
+	l2->tm->tei_m.printdebug = tei_debug;
+	if (crq->protocol == ISDN_P_LAPD_TE) {
+		l2->tm->tei_m.fsm = &teifsmu;
+		l2->tm->tei_m.state = ST_TEI_NOP;
+		l2->tm->tval = 1000; /* T201  1 sec */
+	} else {
+		l2->tm->tei_m.fsm = &teifsmn;
+		l2->tm->tei_m.state = ST_TEI_NOP;
+		l2->tm->tval = 2000; /* T202  2 sec */
+	}
+	mISDN_FsmInitTimer(&l2->tm->tei_m, &l2->tm->timer);
+	write_lock_irqsave(&mgr->lock, flags);
+	id = get_free_id(mgr);
+	list_add_tail(&l2->list, &mgr->layer2);
+	write_unlock_irqrestore(&mgr->lock, flags);
+	if (id < 0) {
+		l2->ch.ctrl(&l2->ch, CLOSE_CHANNEL, NULL);
+	} else {
+		l2->ch.nr = id;
+		l2->up->nr = id;
+		crq->ch = &l2->ch;
+		id = 0;
+	}
+	return id;
+}
+
+static int
+mgr_send(struct mISDNchannel *ch, struct sk_buff *skb)
+{
+	struct manager	*mgr;
+	struct mISDNhead	*hh =  mISDN_HEAD_P(skb);
+	int			ret = -EINVAL;
+
+	mgr = container_of(ch, struct manager, ch);
+	if (*debug & DEBUG_L2_RECV)
+		printk(KERN_DEBUG "%s: prim(%x) id(%x)\n",
+		    __func__, hh->prim, hh->id);
+	switch (hh->prim) {
+	case PH_DATA_IND:
+		mISDN_FsmEvent(&mgr->deact, EV_UI, NULL);
+		ret = ph_data_ind(mgr, skb);
+		break;
+	case PH_DATA_CNF:
+		do_ack(mgr, hh->id);
+		ret = 0;
+		break;
+	case PH_ACTIVATE_IND:
+		test_and_set_bit(MGR_PH_ACTIVE, &mgr->options);
+		mISDN_FsmEvent(&mgr->deact, EV_ACTIVATE_IND, NULL);
+		do_send(mgr);
+		ret = 0;
+		break;
+	case PH_DEACTIVATE_IND:
+		test_and_clear_bit(MGR_PH_ACTIVE, &mgr->options);
+		mISDN_FsmEvent(&mgr->deact, EV_DEACTIVATE_IND, NULL);
+		ret = 0;
+		break;
+	case DL_UNITDATA_REQ:
+		return dl_unit_data(mgr, skb);
+	}
+	if (!ret)
+		dev_kfree_skb(skb);
+	return ret;
+}
+
+static int
+free_teimanager(struct manager *mgr)
+{
+	struct layer2	*l2, *nl2;
+
+	if (test_bit(MGR_OPT_NETWORK, &mgr->options)) {
+		/* not locked lock is taken in release tei */
+		mgr->up = NULL;
+		if (test_bit(OPTION_L2_CLEANUP, &mgr->options)) {
+			list_for_each_entry_safe(l2, nl2, &mgr->layer2, list) {
+				put_tei_msg(mgr, ID_REMOVE, 0, l2->tei);
+				mutex_lock(&mgr->ch.st->lmutex);
+				list_del(&l2->ch.list);
+				mutex_unlock(&mgr->ch.st->lmutex);
+				l2->ch.ctrl(&l2->ch, CLOSE_CHANNEL, NULL);
+			}
+			test_and_clear_bit(MGR_OPT_NETWORK, &mgr->options);
+		} else {
+			list_for_each_entry_safe(l2, nl2, &mgr->layer2, list) {
+				l2->up = NULL;
+			}
+		}
+	}
+	if (test_bit(MGR_OPT_USER, &mgr->options)) {
+		if (list_empty(&mgr->layer2))
+			test_and_clear_bit(MGR_OPT_USER, &mgr->options);
+	}
+	mgr->ch.st->dev->D.ctrl(&mgr->ch.st->dev->D, CLOSE_CHANNEL, NULL);
+	return 0;
+}
+
+static int
+ctrl_teimanager(struct manager *mgr, void *arg)
+{
+	/* currently we only have one option */
+	int	clean = *((int *)arg);
+
+	if (clean)
+		test_and_set_bit(OPTION_L2_CLEANUP, &mgr->options);
+	else
+		test_and_clear_bit(OPTION_L2_CLEANUP, &mgr->options);
+	return 0;
+}
+
+/* This function does create a L2 for fixed TEI in NT Mode */
+static int
+check_data(struct manager *mgr, struct sk_buff *skb)
+{
+	struct mISDNhead	*hh =  mISDN_HEAD_P(skb);
+	int			ret, tei;
+	struct layer2		*l2;
+
+	if (*debug & DEBUG_L2_CTRL)
+		printk(KERN_DEBUG "%s: prim(%x) id(%x)\n",
+		    __func__, hh->prim, hh->id);
+	if (test_bit(MGR_OPT_USER, &mgr->options))
+		return -ENOTCONN;
+	if (hh->prim != PH_DATA_IND)
+		return -ENOTCONN;
+	if (skb->len != 3)
+		return -ENOTCONN;
+	if (skb->data[0] != 0)
+		/* only SAPI 0 command */
+		return -ENOTCONN;
+	if (!(skb->data[1] & 1)) /* invalid EA1 */
+		return -EINVAL;
+	tei = skb->data[1] >> 0;
+	if (tei > 63) /* not a fixed tei */
+		return -ENOTCONN;
+	if ((skb->data[2] & ~0x10) != SABME)
+		return -ENOTCONN;
+	/* We got a SABME for a fixed TEI */
+	l2 = create_new_tei(mgr, tei);
+	if (!l2)
+		return -ENOMEM;
+	ret = l2->ch.send(&l2->ch, skb);
+	return ret;
+}
+
+void
+delete_teimanager(struct mISDNchannel *ch)
+{
+	struct manager	*mgr;
+	struct layer2	*l2, *nl2;
+
+	mgr = container_of(ch, struct manager, ch);
+	/* not locked lock is taken in release tei */
+	list_for_each_entry_safe(l2, nl2, &mgr->layer2, list) {
+		mutex_lock(&mgr->ch.st->lmutex);
+		list_del(&l2->ch.list);
+		mutex_unlock(&mgr->ch.st->lmutex);
+		l2->ch.ctrl(&l2->ch, CLOSE_CHANNEL, NULL);
+	}
+	list_del(&mgr->ch.list);
+	list_del(&mgr->bcast.list);
+	skb_queue_purge(&mgr->sendq);
+	kfree(mgr);
+}
+
+static int
+mgr_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
+{
+	struct manager	*mgr;
+	int		ret = -EINVAL;
+
+	mgr = container_of(ch, struct manager, ch);
+	if (*debug & DEBUG_L2_CTRL)
+		printk(KERN_DEBUG "%s(%x, %p)\n", __func__, cmd, arg);
+	switch (cmd) {
+	case OPEN_CHANNEL:
+		ret = create_teimgr(mgr, arg);
+		break;
+	case CLOSE_CHANNEL:
+		ret = free_teimanager(mgr);
+		break;
+	case CONTROL_CHANNEL:
+		ret = ctrl_teimanager(mgr, arg);
+		break;
+	case CHECK_DATA:
+		ret = check_data(mgr, arg);
+		break;
+	}
+	return ret;
+}
+
+static int
+mgr_bcast(struct mISDNchannel *ch, struct sk_buff *skb)
+{
+	struct manager		*mgr = container_of(ch, struct manager, bcast);
+	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
+	struct sk_buff		*cskb = NULL;
+	struct layer2		*l2;
+	u_long			flags;
+	int			ret;
+
+	read_lock_irqsave(&mgr->lock, flags);
+	list_for_each_entry(l2, &mgr->layer2, list) {
+		if ((hh->id & MISDN_ID_SAPI_MASK) ==
+		    (l2->ch.addr & MISDN_ID_SAPI_MASK)) {
+			if (list_is_last(&l2->list, &mgr->layer2)) {
+				cskb = skb;
+				skb = NULL;
+			} else {
+				if (!cskb)
+					cskb = skb_copy(skb, GFP_KERNEL);
+			}
+			if (cskb) {
+				ret = l2->ch.send(&l2->ch, cskb);
+				if (ret) {
+					if (*debug & DEBUG_SEND_ERR)
+						printk(KERN_DEBUG
+						    "%s ch%d prim(%x) addr(%x)"
+						    " err %d\n",
+						    __func__, l2->ch.nr,
+						    hh->prim, l2->ch.addr, ret);
+				} else
+					cskb = NULL;
+			} else {
+				printk(KERN_WARNING "%s ch%d addr %x no mem\n",
+				    __func__, ch->nr, ch->addr);
+				goto out;
+			}
+		}
+	}
+out:
+	read_unlock_irqrestore(&mgr->lock, flags);
+	if (cskb)
+		dev_kfree_skb(cskb);
+	if (skb)
+		dev_kfree_skb(skb);
+	return 0;
+}
+
+static int
+mgr_bcast_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
+{
+
+	return -EINVAL;
+}
+
+int
+create_teimanager(struct mISDNdevice *dev)
+{
+	struct manager *mgr;
+
+	mgr = kzalloc(sizeof(struct manager), GFP_KERNEL);
+	if (!mgr)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&mgr->layer2);
+	mgr->lock = __RW_LOCK_UNLOCKED(mgr->lock);
+	skb_queue_head_init(&mgr->sendq);
+	mgr->nextid = 1;
+	mgr->lastid = MISDN_ID_NONE;
+	mgr->ch.send = mgr_send;
+	mgr->ch.ctrl = mgr_ctrl;
+	mgr->ch.st = dev->D.st;
+	set_channel_address(&mgr->ch, TEI_SAPI, GROUP_TEI);
+	add_layer2(&mgr->ch, dev->D.st);
+	mgr->bcast.send = mgr_bcast;
+	mgr->bcast.ctrl = mgr_bcast_ctrl;
+	mgr->bcast.st = dev->D.st;
+	set_channel_address(&mgr->bcast, 0, GROUP_TEI);
+	add_layer2(&mgr->bcast, dev->D.st);
+	mgr->deact.debug = *debug & DEBUG_MANAGER;
+	mgr->deact.userdata = mgr;
+	mgr->deact.printdebug = da_debug;
+	mgr->deact.fsm = &deactfsm;
+	mgr->deact.state = ST_L1_DEACT;
+	mISDN_FsmInitTimer(&mgr->deact, &mgr->datimer);
+	dev->teimgr = &mgr->ch;
+	return 0;
+}
+
+int TEIInit(u_int *deb)
+{
+	debug = deb;
+	teifsmu.state_count = TEI_STATE_COUNT;
+	teifsmu.event_count = TEI_EVENT_COUNT;
+	teifsmu.strEvent = strTeiEvent;
+	teifsmu.strState = strTeiState;
+	mISDN_FsmNew(&teifsmu, TeiFnListUser, ARRAY_SIZE(TeiFnListUser));
+	teifsmn.state_count = TEI_STATE_COUNT;
+	teifsmn.event_count = TEI_EVENT_COUNT;
+	teifsmn.strEvent = strTeiEvent;
+	teifsmn.strState = strTeiState;
+	mISDN_FsmNew(&teifsmn, TeiFnListNet, ARRAY_SIZE(TeiFnListNet));
+	deactfsm.state_count =  DEACT_STATE_COUNT;
+	deactfsm.event_count = DEACT_EVENT_COUNT;
+	deactfsm.strEvent = strDeactEvent;
+	deactfsm.strState = strDeactState;
+	mISDN_FsmNew(&deactfsm, DeactFnList, ARRAY_SIZE(DeactFnList));
+	return 0;
+}
+
+void TEIFree(void)
+{
+	mISDN_FsmFree(&teifsmu);
+	mISDN_FsmFree(&teifsmn);
+	mISDN_FsmFree(&deactfsm);
+}
diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c
new file mode 100644
index 000000000000..b5fabc7019d8
--- /dev/null
+++ b/drivers/isdn/mISDN/timerdev.c
@@ -0,0 +1,301 @@
+/*
+ *
+ * general timer device for using in ISDN stacks
+ *
+ * Author	Karsten Keil <kkeil@novell.com>
+ *
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/poll.h>
+#include <linux/vmalloc.h>
+#include <linux/timer.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mISDNif.h>
+
+static int	*debug;
+
+
+struct mISDNtimerdev {
+	int			next_id;
+	struct list_head	pending;
+	struct list_head	expired;
+	wait_queue_head_t	wait;
+	u_int			work;
+	spinlock_t		lock; /* protect lists */
+};
+
+struct mISDNtimer {
+	struct list_head	list;
+	struct  mISDNtimerdev	*dev;
+	struct timer_list	tl;
+	int			id;
+};
+
+static int
+mISDN_open(struct inode *ino, struct file *filep)
+{
+	struct mISDNtimerdev	*dev;
+
+	if (*debug & DEBUG_TIMER)
+		printk(KERN_DEBUG "%s(%p,%p)\n", __func__, ino, filep);
+	dev = kmalloc(sizeof(struct mISDNtimerdev) , GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+	dev->next_id = 1;
+	INIT_LIST_HEAD(&dev->pending);
+	INIT_LIST_HEAD(&dev->expired);
+	spin_lock_init(&dev->lock);
+	dev->work = 0;
+	init_waitqueue_head(&dev->wait);
+	filep->private_data = dev;
+	__module_get(THIS_MODULE);
+	return 0;
+}
+
+static int
+mISDN_close(struct inode *ino, struct file *filep)
+{
+	struct mISDNtimerdev	*dev = filep->private_data;
+	struct mISDNtimer	*timer, *next;
+
+	if (*debug & DEBUG_TIMER)
+		printk(KERN_DEBUG "%s(%p,%p)\n", __func__, ino, filep);
+	list_for_each_entry_safe(timer, next, &dev->pending, list) {
+		del_timer(&timer->tl);
+		kfree(timer);
+	}
+	list_for_each_entry_safe(timer, next, &dev->expired, list) {
+		kfree(timer);
+	}
+	kfree(dev);
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+static ssize_t
+mISDN_read(struct file *filep, char *buf, size_t count, loff_t *off)
+{
+	struct mISDNtimerdev	*dev = filep->private_data;
+	struct mISDNtimer	*timer;
+	u_long	flags;
+	int	ret = 0;
+
+	if (*debug & DEBUG_TIMER)
+		printk(KERN_DEBUG "%s(%p, %p, %d, %p)\n", __func__,
+			filep, buf, (int)count, off);
+	if (*off != filep->f_pos)
+		return -ESPIPE;
+
+	if (list_empty(&dev->expired) && (dev->work == 0)) {
+		if (filep->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		wait_event_interruptible(dev->wait, (dev->work ||
+		    !list_empty(&dev->expired)));
+		if (signal_pending(current))
+			return -ERESTARTSYS;
+	}
+	if (count < sizeof(int))
+		return -ENOSPC;
+	if (dev->work)
+		dev->work = 0;
+	if (!list_empty(&dev->expired)) {
+		spin_lock_irqsave(&dev->lock, flags);
+		timer = (struct mISDNtimer *)dev->expired.next;
+		list_del(&timer->list);
+		spin_unlock_irqrestore(&dev->lock, flags);
+		if (put_user(timer->id, (int *)buf))
+			ret = -EFAULT;
+		else
+			ret = sizeof(int);
+		kfree(timer);
+	}
+	return ret;
+}
+
+static loff_t
+mISDN_llseek(struct file *filep, loff_t offset, int orig)
+{
+	return -ESPIPE;
+}
+
+static ssize_t
+mISDN_write(struct file *filep, const char *buf, size_t count, loff_t *off)
+{
+	return -EOPNOTSUPP;
+}
+
+static unsigned int
+mISDN_poll(struct file *filep, poll_table *wait)
+{
+	struct mISDNtimerdev	*dev = filep->private_data;
+	unsigned int		mask = POLLERR;
+
+	if (*debug & DEBUG_TIMER)
+		printk(KERN_DEBUG "%s(%p, %p)\n", __func__, filep, wait);
+	if (dev) {
+		poll_wait(filep, &dev->wait, wait);
+		mask = 0;
+		if (dev->work || !list_empty(&dev->expired))
+			mask |= (POLLIN | POLLRDNORM);
+		if (*debug & DEBUG_TIMER)
+			printk(KERN_DEBUG "%s work(%d) empty(%d)\n", __func__,
+				dev->work, list_empty(&dev->expired));
+	}
+	return mask;
+}
+
+static void
+dev_expire_timer(struct mISDNtimer *timer)
+{
+	u_long			flags;
+
+	spin_lock_irqsave(&timer->dev->lock, flags);
+	list_del(&timer->list);
+	list_add_tail(&timer->list, &timer->dev->expired);
+	spin_unlock_irqrestore(&timer->dev->lock, flags);
+	wake_up_interruptible(&timer->dev->wait);
+}
+
+static int
+misdn_add_timer(struct mISDNtimerdev *dev, int timeout)
+{
+	int 			id;
+	u_long			flags;
+	struct mISDNtimer	*timer;
+
+	if (!timeout) {
+		dev->work = 1;
+		wake_up_interruptible(&dev->wait);
+		id = 0;
+	} else {
+		timer = kzalloc(sizeof(struct mISDNtimer), GFP_KERNEL);
+		if (!timer)
+			return -ENOMEM;
+		spin_lock_irqsave(&dev->lock, flags);
+		timer->id = dev->next_id++;
+		if (dev->next_id < 0)
+			dev->next_id = 1;
+		list_add_tail(&timer->list, &dev->pending);
+		spin_unlock_irqrestore(&dev->lock, flags);
+		timer->dev = dev;
+		timer->tl.data = (long)timer;
+		timer->tl.function = (void *) dev_expire_timer;
+		init_timer(&timer->tl);
+		timer->tl.expires = jiffies + ((HZ * (u_long)timeout) / 1000);
+		add_timer(&timer->tl);
+		id = timer->id;
+	}
+	return id;
+}
+
+static int
+misdn_del_timer(struct mISDNtimerdev *dev, int id)
+{
+	u_long			flags;
+	struct mISDNtimer	*timer;
+	int			ret = 0;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	list_for_each_entry(timer, &dev->pending, list) {
+		if (timer->id == id) {
+			list_del_init(&timer->list);
+			del_timer(&timer->tl);
+			ret = timer->id;
+			kfree(timer);
+			goto unlock;
+		}
+	}
+unlock:
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return ret;
+}
+
+static int
+mISDN_ioctl(struct inode *inode, struct file *filep, unsigned int cmd,
+    unsigned long arg)
+{
+	struct mISDNtimerdev	*dev = filep->private_data;
+	int			id, tout, ret = 0;
+
+
+	if (*debug & DEBUG_TIMER)
+		printk(KERN_DEBUG "%s(%p, %x, %lx)\n", __func__,
+		    filep, cmd, arg);
+	switch (cmd) {
+	case IMADDTIMER:
+		if (get_user(tout, (int __user *)arg)) {
+			ret = -EFAULT;
+			break;
+		}
+		id = misdn_add_timer(dev, tout);
+		if (*debug & DEBUG_TIMER)
+			printk(KERN_DEBUG "%s add %d id %d\n", __func__,
+			    tout, id);
+		if (id < 0) {
+			ret = id;
+			break;
+		}
+		if (put_user(id, (int __user *)arg))
+			ret = -EFAULT;
+		break;
+	case IMDELTIMER:
+		if (get_user(id, (int __user *)arg)) {
+			ret = -EFAULT;
+			break;
+		}
+		if (*debug & DEBUG_TIMER)
+			printk(KERN_DEBUG "%s del id %d\n", __func__, id);
+		id = misdn_del_timer(dev, id);
+		if (put_user(id, (int __user *)arg))
+			ret = -EFAULT;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+static struct file_operations mISDN_fops = {
+	.llseek		= mISDN_llseek,
+	.read		= mISDN_read,
+	.write		= mISDN_write,
+	.poll		= mISDN_poll,
+	.ioctl		= mISDN_ioctl,
+	.open		= mISDN_open,
+	.release	= mISDN_close,
+};
+
+static struct miscdevice mISDNtimer = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "mISDNtimer",
+	.fops	= &mISDN_fops,
+};
+
+int
+mISDN_inittimer(int *deb)
+{
+	int	err;
+
+	debug = deb;
+	err = misc_register(&mISDNtimer);
+	if (err)
+		printk(KERN_WARNING "mISDN: Could not register timer device\n");
+	return err;
+}
+
+void mISDN_timer_cleanup(void)
+{
+	misc_deregister(&mISDNtimer);
+}
diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h
new file mode 100644
index 000000000000..e794dfb87504
--- /dev/null
+++ b/include/linux/mISDNhw.h
@@ -0,0 +1,193 @@
+/*
+ *
+ * Author	Karsten Keil <kkeil@novell.com>
+ *
+ *   Basic declarations for the mISDN HW channels
+ *
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef MISDNHW_H
+#define MISDNHW_H
+#include <linux/mISDNif.h>
+#include <linux/timer.h>
+
+/*
+ * HW DEBUG 0xHHHHGGGG
+ * H - hardware driver specific bits
+ * G - for all drivers
+ */
+
+#define DEBUG_HW		0x00000001
+#define DEBUG_HW_OPEN		0x00000002
+#define DEBUG_HW_DCHANNEL	0x00000100
+#define DEBUG_HW_DFIFO		0x00000200
+#define DEBUG_HW_BCHANNEL	0x00001000
+#define DEBUG_HW_BFIFO		0x00002000
+
+#define MAX_DFRAME_LEN_L1	300
+#define MAX_MON_FRAME		32
+#define MAX_LOG_SPACE		2048
+#define MISDN_COPY_SIZE		32
+
+/* channel->Flags bit field */
+#define FLG_TX_BUSY		0	/* tx_buf in use */
+#define FLG_TX_NEXT		1	/* next_skb in use */
+#define FLG_L1_BUSY		2	/* L1 is permanent busy */
+#define FLG_L2_ACTIVATED	3	/* activated from L2 */
+#define FLG_OPEN		5	/* channel is in use */
+#define FLG_ACTIVE		6	/* channel is activated */
+#define FLG_BUSY_TIMER		7
+/* channel type */
+#define FLG_DCHANNEL		8	/* channel is D-channel */
+#define FLG_BCHANNEL		9	/* channel is B-channel */
+#define FLG_ECHANNEL		10	/* channel is E-channel */
+#define FLG_TRANSPARENT		12	/* channel use transparent data */
+#define FLG_HDLC		13	/* channel use hdlc data */
+#define FLG_L2DATA		14	/* channel use L2 DATA primitivs */
+#define FLG_ORIGIN		15	/* channel is on origin site */
+/* channel specific stuff */
+/* arcofi specific */
+#define FLG_ARCOFI_TIMER	16
+#define FLG_ARCOFI_ERROR	17
+/* isar specific */
+#define FLG_INITIALIZED		16
+#define FLG_DLEETX		17
+#define FLG_LASTDLE		18
+#define FLG_FIRST		19
+#define FLG_LASTDATA		20
+#define FLG_NMD_DATA		21
+#define FLG_FTI_RUN		22
+#define FLG_LL_OK		23
+#define FLG_LL_CONN		24
+#define FLG_DTMFSEND		25
+
+/* workq events */
+#define FLG_RECVQUEUE		30
+#define	FLG_PHCHANGE		31
+
+#define schedule_event(s, ev)	do { \
+					test_and_set_bit(ev, &((s)->Flags)); \
+					schedule_work(&((s)->workq)); \
+				} while (0)
+
+struct dchannel {
+	struct mISDNdevice	dev;
+	u_long			Flags;
+	struct work_struct	workq;
+	void			(*phfunc) (struct dchannel *);
+	u_int			state;
+	void			*l1;
+	/* HW access */
+	u_char			(*read_reg) (void *, u_char);
+	void			(*write_reg) (void *, u_char, u_char);
+	void			(*read_fifo) (void *, u_char *, int);
+	void			(*write_fifo) (void *, u_char *, int);
+	void			*hw;
+	int			slot;	/* multiport card channel slot */
+	struct timer_list	timer;
+	/* receive data */
+	struct sk_buff		*rx_skb;
+	int			maxlen;
+	/* send data */
+	struct sk_buff_head	squeue;
+	struct sk_buff_head	rqueue;
+	struct sk_buff		*tx_skb;
+	int			tx_idx;
+	int			debug;
+	/* statistics */
+	int			err_crc;
+	int			err_tx;
+	int			err_rx;
+};
+
+typedef int	(dchannel_l1callback)(struct dchannel *, u_int);
+extern int	create_l1(struct dchannel *, dchannel_l1callback *);
+
+/* private L1 commands */
+#define INFO0		0x8002
+#define INFO1		0x8102
+#define INFO2		0x8202
+#define INFO3_P8	0x8302
+#define INFO3_P10	0x8402
+#define INFO4_P8	0x8502
+#define INFO4_P10	0x8602
+#define LOSTFRAMING	0x8702
+#define ANYSIGNAL	0x8802
+#define HW_POWERDOWN	0x8902
+#define HW_RESET_REQ	0x8a02
+#define HW_POWERUP_REQ	0x8b02
+#define HW_DEACT_REQ	0x8c02
+#define HW_ACTIVATE_REQ	0x8e02
+#define HW_D_NOBLOCKED  0x8f02
+#define HW_RESET_IND	0x9002
+#define HW_POWERUP_IND	0x9102
+#define HW_DEACT_IND	0x9202
+#define HW_ACTIVATE_IND	0x9302
+#define HW_DEACT_CNF	0x9402
+#define HW_TESTLOOP	0x9502
+#define HW_TESTRX_RAW	0x9602
+#define HW_TESTRX_HDLC	0x9702
+#define HW_TESTRX_OFF	0x9802
+
+struct layer1;
+extern int	l1_event(struct layer1 *, u_int);
+
+
+struct bchannel {
+	struct mISDNchannel	ch;
+	int			nr;
+	u_long			Flags;
+	struct work_struct	workq;
+	u_int			state;
+	/* HW access */
+	u_char			(*read_reg) (void *, u_char);
+	void			(*write_reg) (void *, u_char, u_char);
+	void			(*read_fifo) (void *, u_char *, int);
+	void			(*write_fifo) (void *, u_char *, int);
+	void			*hw;
+	int			slot;	/* multiport card channel slot */
+	struct timer_list	timer;
+	/* receive data */
+	struct sk_buff		*rx_skb;
+	int			maxlen;
+	/* send data */
+	struct sk_buff		*next_skb;
+	struct sk_buff		*tx_skb;
+	struct sk_buff_head	rqueue;
+	int			rcount;
+	int			tx_idx;
+	int			debug;
+	/* statistics */
+	int			err_crc;
+	int			err_tx;
+	int			err_rx;
+};
+
+extern int	mISDN_initdchannel(struct dchannel *, int, void *);
+extern int	mISDN_initbchannel(struct bchannel *, int);
+extern int	mISDN_freedchannel(struct dchannel *);
+extern int	mISDN_freebchannel(struct bchannel *);
+extern void	queue_ch_frame(struct mISDNchannel *, u_int,
+			int, struct sk_buff *);
+extern int	dchannel_senddata(struct dchannel *, struct sk_buff *);
+extern int	bchannel_senddata(struct bchannel *, struct sk_buff *);
+extern void	recv_Dchannel(struct dchannel *);
+extern void	recv_Bchannel(struct bchannel *);
+extern void	recv_Dchannel_skb(struct dchannel *, struct sk_buff *);
+extern void	recv_Bchannel_skb(struct bchannel *, struct sk_buff *);
+extern void	confirm_Bsend(struct bchannel *bch);
+extern int	get_next_bframe(struct bchannel *);
+extern int	get_next_dframe(struct dchannel *);
+
+#endif
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
new file mode 100644
index 000000000000..5c948f337817
--- /dev/null
+++ b/include/linux/mISDNif.h
@@ -0,0 +1,487 @@
+/*
+ *
+ * Author	Karsten Keil <kkeil@novell.com>
+ *
+ * Copyright 2008  by Karsten Keil <kkeil@novell.com>
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU LESSER GENERAL PUBLIC LICENSE
+ * version 2.1 as published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU LESSER GENERAL PUBLIC LICENSE for more details.
+ *
+ */
+
+#ifndef mISDNIF_H
+#define mISDNIF_H
+
+#include <stdarg.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+
+/*
+ * ABI Version 32 bit
+ *
+ * <8 bit> Major version
+ *		- changed if any interface become backwards incompatible
+ *
+ * <8 bit> Minor version
+ *              - changed if any interface is extended but backwards compatible
+ *
+ * <16 bit> Release number
+ *              - should be incremented on every checkin
+ */
+#define	MISDN_MAJOR_VERSION	1
+#define	MISDN_MINOR_VERSION	0
+#define MISDN_RELEASE		18
+
+/* primitives for information exchange
+ * generell format
+ * <16  bit  0 >
+ * <8  bit command>
+ *    BIT 8 = 1 LAYER private
+ *    BIT 7 = 1 answer
+ *    BIT 6 = 1 DATA
+ * <8  bit target layer mask>
+ *
+ * Layer = 00 is reserved for general commands
+   Layer = 01  L2 -> HW
+   Layer = 02  HW -> L2
+   Layer = 04  L3 -> L2
+   Layer = 08  L2 -> L3
+ * Layer = FF is reserved for broadcast commands
+ */
+
+#define MISDN_CMDMASK		0xff00
+#define MISDN_LAYERMASK		0x00ff
+
+/* generell commands */
+#define OPEN_CHANNEL		0x0100
+#define CLOSE_CHANNEL		0x0200
+#define CONTROL_CHANNEL		0x0300
+#define CHECK_DATA		0x0400
+
+/* layer 2 -> layer 1 */
+#define PH_ACTIVATE_REQ		0x0101
+#define PH_DEACTIVATE_REQ	0x0201
+#define PH_DATA_REQ		0x2001
+#define MPH_ACTIVATE_REQ	0x0501
+#define MPH_DEACTIVATE_REQ	0x0601
+#define MPH_INFORMATION_REQ	0x0701
+#define PH_CONTROL_REQ		0x0801
+
+/* layer 1 -> layer 2 */
+#define PH_ACTIVATE_IND		0x0102
+#define PH_ACTIVATE_CNF		0x4102
+#define PH_DEACTIVATE_IND	0x0202
+#define PH_DEACTIVATE_CNF	0x4202
+#define PH_DATA_IND		0x2002
+#define MPH_ACTIVATE_IND	0x0502
+#define MPH_DEACTIVATE_IND	0x0602
+#define MPH_INFORMATION_IND	0x0702
+#define PH_DATA_CNF		0x6002
+#define PH_CONTROL_IND		0x0802
+#define PH_CONTROL_CNF		0x4802
+
+/* layer 3 -> layer 2 */
+#define DL_ESTABLISH_REQ	0x1004
+#define DL_RELEASE_REQ		0x1104
+#define DL_DATA_REQ		0x3004
+#define DL_UNITDATA_REQ		0x3104
+#define DL_INFORMATION_REQ	0x0004
+
+/* layer 2 -> layer 3 */
+#define DL_ESTABLISH_IND	0x1008
+#define DL_ESTABLISH_CNF	0x5008
+#define DL_RELEASE_IND		0x1108
+#define DL_RELEASE_CNF		0x5108
+#define DL_DATA_IND		0x3008
+#define DL_UNITDATA_IND		0x3108
+#define DL_INFORMATION_IND	0x0008
+
+/* intern layer 2 managment */
+#define MDL_ASSIGN_REQ		0x1804
+#define MDL_ASSIGN_IND		0x1904
+#define MDL_REMOVE_REQ		0x1A04
+#define MDL_REMOVE_IND		0x1B04
+#define MDL_STATUS_UP_IND	0x1C04
+#define MDL_STATUS_DOWN_IND	0x1D04
+#define MDL_STATUS_UI_IND	0x1E04
+#define MDL_ERROR_IND		0x1F04
+#define MDL_ERROR_RSP		0x5F04
+
+/* DL_INFORMATION_IND types */
+#define DL_INFO_L2_CONNECT	0x0001
+#define DL_INFO_L2_REMOVED	0x0002
+
+/* PH_CONTROL types */
+/* TOUCH TONE IS 0x20XX  XX "0"..."9", "A","B","C","D","*","#" */
+#define DTMF_TONE_VAL		0x2000
+#define DTMF_TONE_MASK		0x007F
+#define DTMF_TONE_START		0x2100
+#define DTMF_TONE_STOP		0x2200
+#define DTMF_HFC_COEF		0x4000
+#define DSP_CONF_JOIN		0x2403
+#define DSP_CONF_SPLIT		0x2404
+#define DSP_RECEIVE_OFF		0x2405
+#define DSP_RECEIVE_ON		0x2406
+#define DSP_ECHO_ON		0x2407
+#define DSP_ECHO_OFF		0x2408
+#define DSP_MIX_ON		0x2409
+#define DSP_MIX_OFF		0x240a
+#define DSP_DELAY		0x240b
+#define DSP_JITTER		0x240c
+#define DSP_TXDATA_ON		0x240d
+#define DSP_TXDATA_OFF		0x240e
+#define DSP_TX_DEJITTER		0x240f
+#define DSP_TX_DEJ_OFF		0x2410
+#define DSP_TONE_PATT_ON	0x2411
+#define DSP_TONE_PATT_OFF	0x2412
+#define DSP_VOL_CHANGE_TX	0x2413
+#define DSP_VOL_CHANGE_RX	0x2414
+#define DSP_BF_ENABLE_KEY	0x2415
+#define DSP_BF_DISABLE		0x2416
+#define DSP_BF_ACCEPT		0x2416
+#define DSP_BF_REJECT		0x2417
+#define DSP_PIPELINE_CFG	0x2418
+#define HFC_VOL_CHANGE_TX	0x2601
+#define HFC_VOL_CHANGE_RX	0x2602
+#define HFC_SPL_LOOP_ON		0x2603
+#define HFC_SPL_LOOP_OFF	0x2604
+
+/* DSP_TONE_PATT_ON parameter */
+#define TONE_OFF			0x0000
+#define TONE_GERMAN_DIALTONE		0x0001
+#define TONE_GERMAN_OLDDIALTONE		0x0002
+#define TONE_AMERICAN_DIALTONE		0x0003
+#define TONE_GERMAN_DIALPBX		0x0004
+#define TONE_GERMAN_OLDDIALPBX		0x0005
+#define TONE_AMERICAN_DIALPBX		0x0006
+#define TONE_GERMAN_RINGING		0x0007
+#define TONE_GERMAN_OLDRINGING		0x0008
+#define TONE_AMERICAN_RINGPBX		0x000b
+#define TONE_GERMAN_RINGPBX		0x000c
+#define TONE_GERMAN_OLDRINGPBX		0x000d
+#define TONE_AMERICAN_RINGING		0x000e
+#define TONE_GERMAN_BUSY		0x000f
+#define TONE_GERMAN_OLDBUSY		0x0010
+#define TONE_AMERICAN_BUSY		0x0011
+#define TONE_GERMAN_HANGUP		0x0012
+#define TONE_GERMAN_OLDHANGUP		0x0013
+#define TONE_AMERICAN_HANGUP		0x0014
+#define TONE_SPECIAL_INFO		0x0015
+#define TONE_GERMAN_GASSENBESETZT	0x0016
+#define TONE_GERMAN_AUFSCHALTTON	0x0016
+
+/* MPH_INFORMATION_IND */
+#define L1_SIGNAL_LOS_OFF	0x0010
+#define L1_SIGNAL_LOS_ON	0x0011
+#define L1_SIGNAL_AIS_OFF	0x0012
+#define L1_SIGNAL_AIS_ON	0x0013
+#define L1_SIGNAL_RDI_OFF	0x0014
+#define L1_SIGNAL_RDI_ON	0x0015
+#define L1_SIGNAL_SLIP_RX	0x0020
+#define L1_SIGNAL_SLIP_TX	0x0021
+
+/*
+ * protocol ids
+ * D channel 1-31
+ * B channel 33 - 63
+ */
+
+#define ISDN_P_NONE		0
+#define ISDN_P_BASE		0
+#define ISDN_P_TE_S0		0x01
+#define ISDN_P_NT_S0  		0x02
+#define ISDN_P_TE_E1		0x03
+#define ISDN_P_NT_E1  		0x04
+#define ISDN_P_LAPD_TE		0x10
+#define	ISDN_P_LAPD_NT		0x11
+
+#define ISDN_P_B_MASK		0x1f
+#define ISDN_P_B_START		0x20
+
+#define ISDN_P_B_RAW		0x21
+#define ISDN_P_B_HDLC		0x22
+#define ISDN_P_B_X75SLP		0x23
+#define ISDN_P_B_L2DTMF		0x24
+#define ISDN_P_B_L2DSP		0x25
+#define ISDN_P_B_L2DSPHDLC	0x26
+
+#define OPTION_L2_PMX		1
+#define OPTION_L2_PTP		2
+#define OPTION_L2_FIXEDTEI	3
+#define OPTION_L2_CLEANUP	4
+
+/* should be in sync with linux/kobject.h:KOBJ_NAME_LEN */
+#define MISDN_MAX_IDLEN		20
+
+struct mISDNhead {
+	unsigned int	prim;
+	unsigned int	id;
+}  __attribute__((packed));
+
+#define MISDN_HEADER_LEN	sizeof(struct mISDNhead)
+#define MAX_DATA_SIZE		2048
+#define MAX_DATA_MEM		(MAX_DATA_SIZE + MISDN_HEADER_LEN)
+#define MAX_DFRAME_LEN		260
+
+#define MISDN_ID_ADDR_MASK	0xFFFF
+#define MISDN_ID_TEI_MASK	0xFF00
+#define MISDN_ID_SAPI_MASK	0x00FF
+#define MISDN_ID_TEI_ANY	0x7F00
+
+#define MISDN_ID_ANY		0xFFFF
+#define MISDN_ID_NONE		0xFFFE
+
+#define GROUP_TEI		127
+#define TEI_SAPI		63
+#define CTRL_SAPI		0
+
+#define MISDN_CHMAP_SIZE	4
+
+#define SOL_MISDN	0
+
+struct sockaddr_mISDN {
+	sa_family_t    family;
+	unsigned char	dev;
+	unsigned char	channel;
+	unsigned char	sapi;
+	unsigned char	tei;
+};
+
+/* timer device ioctl */
+#define IMADDTIMER	_IOR('I', 64, int)
+#define IMDELTIMER	_IOR('I', 65, int)
+/* socket ioctls */
+#define	IMGETVERSION	_IOR('I', 66, int)
+#define	IMGETCOUNT	_IOR('I', 67, int)
+#define IMGETDEVINFO	_IOR('I', 68, int)
+#define IMCTRLREQ	_IOR('I', 69, int)
+#define IMCLEAR_L2	_IOR('I', 70, int)
+
+struct mISDNversion {
+	unsigned char	major;
+	unsigned char	minor;
+	unsigned short	release;
+};
+
+struct mISDN_devinfo {
+	u_int			id;
+	u_int			Dprotocols;
+	u_int			Bprotocols;
+	u_int			protocol;
+	u_long			channelmap[MISDN_CHMAP_SIZE];
+	u_int			nrbchan;
+	char			name[MISDN_MAX_IDLEN];
+};
+
+/* CONTROL_CHANNEL parameters */
+#define MISDN_CTRL_GETOP		0x0000
+#define MISDN_CTRL_LOOP			0x0001
+#define MISDN_CTRL_CONNECT		0x0002
+#define MISDN_CTRL_DISCONNECT		0x0004
+#define MISDN_CTRL_PCMCONNECT		0x0010
+#define MISDN_CTRL_PCMDISCONNECT	0x0020
+#define MISDN_CTRL_SETPEER		0x0040
+#define MISDN_CTRL_UNSETPEER		0x0080
+#define MISDN_CTRL_RX_OFF		0x0100
+#define MISDN_CTRL_HW_FEATURES_OP	0x2000
+#define MISDN_CTRL_HW_FEATURES		0x2001
+#define MISDN_CTRL_HFC_OP		0x4000
+#define MISDN_CTRL_HFC_PCM_CONN		0x4001
+#define MISDN_CTRL_HFC_PCM_DISC		0x4002
+#define MISDN_CTRL_HFC_CONF_JOIN	0x4003
+#define MISDN_CTRL_HFC_CONF_SPLIT	0x4004
+#define MISDN_CTRL_HFC_RECEIVE_OFF	0x4005
+#define MISDN_CTRL_HFC_RECEIVE_ON	0x4006
+#define MISDN_CTRL_HFC_ECHOCAN_ON 	0x4007
+#define MISDN_CTRL_HFC_ECHOCAN_OFF 	0x4008
+
+
+/* socket options */
+#define MISDN_TIME_STAMP		0x0001
+
+struct mISDN_ctrl_req {
+	int		op;
+	int		channel;
+	int		p1;
+	int		p2;
+};
+
+/* muxer options */
+#define MISDN_OPT_ALL		1
+#define MISDN_OPT_TEIMGR	2
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <net/sock.h>
+#include <linux/completion.h>
+
+#define DEBUG_CORE		0x000000ff
+#define DEBUG_CORE_FUNC		0x00000002
+#define DEBUG_SOCKET		0x00000004
+#define DEBUG_MANAGER		0x00000008
+#define DEBUG_SEND_ERR		0x00000010
+#define DEBUG_MSG_THREAD	0x00000020
+#define DEBUG_QUEUE_FUNC	0x00000040
+#define DEBUG_L1		0x0000ff00
+#define DEBUG_L1_FSM		0x00000200
+#define DEBUG_L2		0x00ff0000
+#define DEBUG_L2_FSM		0x00020000
+#define DEBUG_L2_CTRL		0x00040000
+#define DEBUG_L2_RECV		0x00080000
+#define DEBUG_L2_TEI		0x00100000
+#define DEBUG_L2_TEIFSM		0x00200000
+#define DEBUG_TIMER		0x01000000
+
+#define mISDN_HEAD_P(s)		((struct mISDNhead *)&s->cb[0])
+#define mISDN_HEAD_PRIM(s)	(((struct mISDNhead *)&s->cb[0])->prim)
+#define mISDN_HEAD_ID(s)	(((struct mISDNhead *)&s->cb[0])->id)
+
+/* socket states */
+#define MISDN_OPEN	1
+#define MISDN_BOUND	2
+#define MISDN_CLOSED	3
+
+struct mISDNchannel;
+struct mISDNdevice;
+struct mISDNstack;
+
+struct channel_req {
+	u_int			protocol;
+	struct sockaddr_mISDN	adr;
+	struct mISDNchannel	*ch;
+};
+
+typedef	int	(ctrl_func_t)(struct mISDNchannel *, u_int, void *);
+typedef	int	(send_func_t)(struct mISDNchannel *, struct sk_buff *);
+typedef int	(create_func_t)(struct channel_req *);
+
+struct Bprotocol {
+	struct list_head	list;
+	char			*name;
+	u_int			Bprotocols;
+	create_func_t		*create;
+};
+
+struct mISDNchannel {
+	struct list_head	list;
+	u_int			protocol;
+	u_int			nr;
+	u_long			opt;
+	u_int			addr;
+	struct mISDNstack	*st;
+	struct mISDNchannel	*peer;
+	send_func_t		*send;
+	send_func_t		*recv;
+	ctrl_func_t		*ctrl;
+};
+
+struct mISDN_sock_list {
+	struct hlist_head	head;
+	rwlock_t		lock;
+};
+
+struct mISDN_sock {
+	struct sock		sk;
+	struct mISDNchannel	ch;
+	u_int			cmask;
+	struct mISDNdevice	*dev;
+};
+
+
+
+struct mISDNdevice {
+	struct mISDNchannel	D;
+	u_int			id;
+	char			name[MISDN_MAX_IDLEN];
+	u_int			Dprotocols;
+	u_int			Bprotocols;
+	u_int			nrbchan;
+	u_long			channelmap[MISDN_CHMAP_SIZE];
+	struct list_head	bchannels;
+	struct mISDNchannel	*teimgr;
+	struct device		dev;
+};
+
+struct mISDNstack {
+	u_long			status;
+	struct mISDNdevice	*dev;
+	struct task_struct	*thread;
+	struct completion	*notify;
+	wait_queue_head_t	workq;
+	struct sk_buff_head	msgq;
+	struct list_head	layer2;
+	struct mISDNchannel	*layer1;
+	struct mISDNchannel	own;
+	struct mutex		lmutex; /* protect lists */
+	struct mISDN_sock_list	l1sock;
+#ifdef MISDN_MSG_STATS
+	u_int			msg_cnt;
+	u_int			sleep_cnt;
+	u_int			stopped_cnt;
+#endif
+};
+
+/* global alloc/queue dunctions */
+
+static inline struct sk_buff *
+mI_alloc_skb(unsigned int len, gfp_t gfp_mask)
+{
+	struct sk_buff	*skb;
+
+	skb = alloc_skb(len + MISDN_HEADER_LEN, gfp_mask);
+	if (likely(skb))
+		skb_reserve(skb, MISDN_HEADER_LEN);
+	return skb;
+}
+
+static inline struct sk_buff *
+_alloc_mISDN_skb(u_int prim, u_int id, u_int len, void *dp, gfp_t gfp_mask)
+{
+	struct sk_buff	*skb = mI_alloc_skb(len, gfp_mask);
+	struct mISDNhead *hh;
+
+	if (!skb)
+		return NULL;
+	if (len)
+		memcpy(skb_put(skb, len), dp, len);
+	hh = mISDN_HEAD_P(skb);
+	hh->prim = prim;
+	hh->id = id;
+	return skb;
+}
+
+static inline void
+_queue_data(struct mISDNchannel *ch, u_int prim,
+    u_int id, u_int len, void *dp, gfp_t gfp_mask)
+{
+	struct sk_buff		*skb;
+
+	if (!ch->peer)
+		return;
+	skb = _alloc_mISDN_skb(prim, id, len, dp, gfp_mask);
+	if (!skb)
+		return;
+	if (ch->recv(ch->peer, skb))
+		dev_kfree_skb(skb);
+}
+
+/* global register/unregister functions */
+
+extern int	mISDN_register_device(struct mISDNdevice *, char *name);
+extern void	mISDN_unregister_device(struct mISDNdevice *);
+extern int	mISDN_register_Bprotocol(struct Bprotocol *);
+extern void	mISDN_unregister_Bprotocol(struct Bprotocol *);
+
+extern void	set_channel_address(struct mISDNchannel *, u_int, u_int);
+
+#endif /* __KERNEL__ */
+#endif /* mISDNIF_H */
-- 
cgit v1.2.3


From 960366cf8dbb3359afaca30cf7fdbf69a6d6dda7 Mon Sep 17 00:00:00 2001
From: Karsten Keil <kkeil@suse.de>
Date: Sun, 27 Jul 2008 01:56:38 +0200
Subject: Add mISDN DSP

Enable support for digital audio processing capability.
This module may be used for special applications that require
cross connecting of bchannels, conferencing, dtmf decoding
echo cancelation, tone generation, and Blowfish encryption and
decryption.
It may use hardware features if available.

Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 drivers/isdn/mISDN/Kconfig        |   18 +
 drivers/isdn/mISDN/Makefile       |    2 +
 drivers/isdn/mISDN/dsp.h          |  263 ++++++
 drivers/isdn/mISDN/dsp_audio.c    |  434 +++++++++
 drivers/isdn/mISDN/dsp_biquad.h   |   65 ++
 drivers/isdn/mISDN/dsp_blowfish.c |  672 +++++++++++++
 drivers/isdn/mISDN/dsp_cmx.c      | 1886 +++++++++++++++++++++++++++++++++++++
 drivers/isdn/mISDN/dsp_core.c     | 1191 +++++++++++++++++++++++
 drivers/isdn/mISDN/dsp_dtmf.c     |  303 ++++++
 drivers/isdn/mISDN/dsp_ecdis.h    |  110 +++
 drivers/isdn/mISDN/dsp_hwec.c     |  138 +++
 drivers/isdn/mISDN/dsp_hwec.h     |   10 +
 drivers/isdn/mISDN/dsp_pipeline.c |  348 +++++++
 drivers/isdn/mISDN/dsp_tones.c    |  551 +++++++++++
 include/linux/mISDNdsp.h          |   37 +
 15 files changed, 6028 insertions(+)
 create mode 100644 drivers/isdn/mISDN/dsp.h
 create mode 100644 drivers/isdn/mISDN/dsp_audio.c
 create mode 100644 drivers/isdn/mISDN/dsp_biquad.h
 create mode 100644 drivers/isdn/mISDN/dsp_blowfish.c
 create mode 100644 drivers/isdn/mISDN/dsp_cmx.c
 create mode 100644 drivers/isdn/mISDN/dsp_core.c
 create mode 100644 drivers/isdn/mISDN/dsp_dtmf.c
 create mode 100644 drivers/isdn/mISDN/dsp_ecdis.h
 create mode 100644 drivers/isdn/mISDN/dsp_hwec.c
 create mode 100644 drivers/isdn/mISDN/dsp_hwec.h
 create mode 100644 drivers/isdn/mISDN/dsp_pipeline.c
 create mode 100644 drivers/isdn/mISDN/dsp_tones.c
 create mode 100644 include/linux/mISDNdsp.h

(limited to 'include/linux')

diff --git a/drivers/isdn/mISDN/Kconfig b/drivers/isdn/mISDN/Kconfig
index 231bd0d08316..6a97e86e7f21 100644
--- a/drivers/isdn/mISDN/Kconfig
+++ b/drivers/isdn/mISDN/Kconfig
@@ -7,3 +7,21 @@ menuconfig MISDN
 	help
 	  Enable support for the modular ISDN driver.
 
+if MISDN != n
+
+config MISDN_DSP
+	tristate "Digital Audio Processing of transparent data"
+	depends on MISDN
+	help
+	  Enable support for digital audio processing capability.
+	  This module may be used for special applications that require
+	  cross connecting of bchannels, conferencing, dtmf decoding
+	  echo cancelation, tone generation, and Blowfish encryption and
+	  decryption.
+	  It may use hardware features if available.
+	  E.g. it is required for PBX4Linux. Go to http://isdn.eversberg.eu
+	  and get more informations about this module and it's usage.
+	  If unsure, say 'N'.
+
+	source "drivers/isdn/hardware/mISDN/Kconfig"
+endif #MISDN
diff --git a/drivers/isdn/mISDN/Makefile b/drivers/isdn/mISDN/Makefile
index 87c563d33612..7f1a21804208 100644
--- a/drivers/isdn/mISDN/Makefile
+++ b/drivers/isdn/mISDN/Makefile
@@ -3,7 +3,9 @@
 #
 
 obj-$(CONFIG_MISDN) += mISDN_core.o
+obj-$(CONFIG_MISDN_DSP) += mISDN_dsp.o
 
 # multi objects
 
 mISDN_core-objs := core.o fsm.o socket.o hwchannel.o stack.o layer1.o layer2.o tei.o timerdev.o
+mISDN_dsp-objs := dsp_core.o dsp_cmx.o dsp_tones.o dsp_dtmf.o dsp_audio.o dsp_blowfish.o dsp_pipeline.o dsp_hwec.o
diff --git a/drivers/isdn/mISDN/dsp.h b/drivers/isdn/mISDN/dsp.h
new file mode 100644
index 000000000000..6c3fed6b8d4f
--- /dev/null
+++ b/drivers/isdn/mISDN/dsp.h
@@ -0,0 +1,263 @@
+/*
+ * Audio support data for ISDN4Linux.
+ *
+ * Copyright 2002/2003 by Andreas Eversberg (jolly@eversberg.eu)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ *
+ */
+
+#define DEBUG_DSP_CTRL		0x0001
+#define DEBUG_DSP_CORE		0x0002
+#define DEBUG_DSP_DTMF		0x0004
+#define DEBUG_DSP_CMX		0x0010
+#define DEBUG_DSP_TONE		0x0020
+#define DEBUG_DSP_BLOWFISH	0x0040
+#define DEBUG_DSP_DELAY		0x0100
+#define DEBUG_DSP_DTMFCOEFF	0x8000 /* heavy output */
+
+/* options may be:
+ *
+ * bit 0 = use ulaw instead of alaw
+ * bit 1 = enable hfc hardware accelleration for all channels
+ *
+ */
+#define DSP_OPT_ULAW		(1<<0)
+#define DSP_OPT_NOHARDWARE	(1<<1)
+
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+
+#include "dsp_ecdis.h"
+
+extern int dsp_options;
+extern int dsp_debug;
+extern int dsp_poll;
+extern int dsp_tics;
+extern spinlock_t dsp_lock;
+extern struct work_struct dsp_workq;
+extern u32 dsp_poll_diff; /* calculated fix-comma corrected poll value */
+
+/***************
+ * audio stuff *
+ ***************/
+
+extern s32 dsp_audio_alaw_to_s32[256];
+extern s32 dsp_audio_ulaw_to_s32[256];
+extern s32 *dsp_audio_law_to_s32;
+extern u8 dsp_audio_s16_to_law[65536];
+extern u8 dsp_audio_alaw_to_ulaw[256];
+extern u8 dsp_audio_mix_law[65536];
+extern u8 dsp_audio_seven2law[128];
+extern u8 dsp_audio_law2seven[256];
+extern void dsp_audio_generate_law_tables(void);
+extern void dsp_audio_generate_s2law_table(void);
+extern void dsp_audio_generate_seven(void);
+extern void dsp_audio_generate_mix_table(void);
+extern void dsp_audio_generate_ulaw_samples(void);
+extern void dsp_audio_generate_volume_changes(void);
+extern u8 dsp_silence;
+
+
+/*************
+ * cmx stuff *
+ *************/
+
+#define MAX_POLL	256	/* maximum number of send-chunks */
+
+#define CMX_BUFF_SIZE	0x8000	/* must be 2**n (0x1000 about 1/2 second) */
+#define CMX_BUFF_HALF	0x4000	/* CMX_BUFF_SIZE / 2 */
+#define CMX_BUFF_MASK	0x7fff	/* CMX_BUFF_SIZE - 1 */
+
+/* how many seconds will we check the lowest delay until the jitter buffer
+   is reduced by that delay */
+#define MAX_SECONDS_JITTER_CHECK 5
+
+extern struct timer_list dsp_spl_tl;
+extern u32 dsp_spl_jiffies;
+
+/* the structure of conferences:
+ *
+ * each conference has a unique number, given by user space.
+ * the conferences are linked in a chain.
+ * each conference has members linked in a chain.
+ * each dsplayer points to a member, each member points to a dsplayer.
+ */
+
+/* all members within a conference (this is linked 1:1 with the dsp) */
+struct dsp;
+struct dsp_conf_member {
+	struct list_head	list;
+	struct dsp		*dsp;
+};
+
+/* the list of all conferences */
+struct dsp_conf {
+	struct list_head	list;
+	u32			id;
+				/* all cmx stacks with the same ID are
+				 connected */
+	struct list_head	mlist;
+	int			software; /* conf is processed by software */
+	int			hardware; /* conf is processed by hardware */
+				/* note: if both unset, has only one member */
+};
+
+
+/**************
+ * DTMF stuff *
+ **************/
+
+#define DSP_DTMF_NPOINTS 102
+
+#define ECHOCAN_BUFLEN (4*128)
+
+struct dsp_dtmf {
+	int		treshold; /* above this is dtmf (square of) */
+	int		software; /* dtmf uses software decoding */
+	int		hardware; /* dtmf uses hardware decoding */
+	int		size; /* number of bytes in buffer */
+	signed short	buffer[DSP_DTMF_NPOINTS];
+		/* buffers one full dtmf frame */
+	u8		lastwhat, lastdigit;
+	int		count;
+	u8		digits[16]; /* just the dtmf result */
+};
+
+
+/******************
+ * pipeline stuff *
+ ******************/
+struct dsp_pipeline {
+	rwlock_t  lock;
+	struct list_head list;
+	int inuse;
+};
+
+/***************
+ * tones stuff *
+ ***************/
+
+struct dsp_tone {
+	int		software; /* tones are generated by software */
+	int		hardware; /* tones are generated by hardware */
+	int		tone;
+	void		*pattern;
+	int		count;
+	int		index;
+	struct timer_list tl;
+};
+
+/*****************
+ * general stuff *
+ *****************/
+
+struct dsp {
+	struct list_head list;
+	struct mISDNchannel	ch;
+	struct mISDNchannel	*up;
+	unsigned char	name[64];
+	int		b_active;
+	int		echo; /* echo is enabled */
+	int		rx_disabled; /* what the user wants */
+	int		rx_is_off; /* what the card is */
+	int		tx_mix;
+	struct dsp_tone	tone;
+	struct dsp_dtmf	dtmf;
+	int		tx_volume, rx_volume;
+
+	/* queue for sending frames */
+	struct work_struct	workq;
+	struct sk_buff_head	sendq;
+	int		hdlc;	/* if mode is hdlc */
+	int		data_pending;	/* currently an unconfirmed frame */
+
+	/* conference stuff */
+	u32		conf_id;
+	struct dsp_conf	*conf;
+	struct dsp_conf_member
+			*member;
+
+	/* buffer stuff */
+	int		rx_W; /* current write pos for data without timestamp */
+	int		rx_R; /* current read pos for transmit clock */
+	int		rx_init; /* if set, pointers will be adjusted first */
+	int		tx_W; /* current write pos for transmit data */
+	int		tx_R; /* current read pos for transmit clock */
+	int		rx_delay[MAX_SECONDS_JITTER_CHECK];
+	int		tx_delay[MAX_SECONDS_JITTER_CHECK];
+	u8		tx_buff[CMX_BUFF_SIZE];
+	u8		rx_buff[CMX_BUFF_SIZE];
+	int		last_tx; /* if set, we transmitted last poll interval */
+	int		cmx_delay; /* initial delay of buffers,
+				or 0 for dynamic jitter buffer */
+	int		tx_dejitter; /* if set, dejitter tx buffer */
+	int		tx_data; /* enables tx-data of CMX to upper layer */
+
+	/* hardware stuff */
+	struct dsp_features features;
+	int		features_rx_off; /* set if rx_off is featured */
+	int		pcm_slot_rx; /* current PCM slot (or -1) */
+	int		pcm_bank_rx;
+	int		pcm_slot_tx;
+	int		pcm_bank_tx;
+	int		hfc_conf; /* unique id of current conference (or -1) */
+
+	/* encryption stuff */
+	int		bf_enable;
+	u32		bf_p[18];
+	u32		bf_s[1024];
+	int		bf_crypt_pos;
+	u8		bf_data_in[9];
+	u8		bf_crypt_out[9];
+	int		bf_decrypt_in_pos;
+	int		bf_decrypt_out_pos;
+	u8		bf_crypt_inring[16];
+	u8		bf_data_out[9];
+	int		bf_sync;
+
+	struct dsp_pipeline
+			pipeline;
+};
+
+/* functions */
+
+extern void dsp_change_volume(struct sk_buff *skb, int volume);
+
+extern struct list_head dsp_ilist;
+extern struct list_head conf_ilist;
+extern void dsp_cmx_debug(struct dsp *dsp);
+extern void dsp_cmx_hardware(struct dsp_conf *conf, struct dsp *dsp);
+extern int dsp_cmx_conf(struct dsp *dsp, u32 conf_id);
+extern void dsp_cmx_receive(struct dsp *dsp, struct sk_buff *skb);
+extern void dsp_cmx_hdlc(struct dsp *dsp, struct sk_buff *skb);
+extern void dsp_cmx_send(void *arg);
+extern void dsp_cmx_transmit(struct dsp *dsp, struct sk_buff *skb);
+extern int dsp_cmx_del_conf_member(struct dsp *dsp);
+extern int dsp_cmx_del_conf(struct dsp_conf *conf);
+
+extern void dsp_dtmf_goertzel_init(struct dsp *dsp);
+extern void dsp_dtmf_hardware(struct dsp *dsp);
+extern u8 *dsp_dtmf_goertzel_decode(struct dsp *dsp, u8 *data, int len,
+		int fmt);
+
+extern int dsp_tone(struct dsp *dsp, int tone);
+extern void dsp_tone_copy(struct dsp *dsp, u8 *data, int len);
+extern void dsp_tone_timeout(void *arg);
+
+extern void dsp_bf_encrypt(struct dsp *dsp, u8 *data, int len);
+extern void dsp_bf_decrypt(struct dsp *dsp, u8 *data, int len);
+extern int dsp_bf_init(struct dsp *dsp, const u8 *key, unsigned int keylen);
+extern void dsp_bf_cleanup(struct dsp *dsp);
+
+extern int  dsp_pipeline_module_init(void);
+extern void dsp_pipeline_module_exit(void);
+extern int  dsp_pipeline_init(struct dsp_pipeline *pipeline);
+extern void dsp_pipeline_destroy(struct dsp_pipeline *pipeline);
+extern int  dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg);
+extern void dsp_pipeline_process_tx(struct dsp_pipeline *pipeline, u8 *data,
+		int len);
+extern void dsp_pipeline_process_rx(struct dsp_pipeline *pipeline, u8 *data,
+		int len);
+
diff --git a/drivers/isdn/mISDN/dsp_audio.c b/drivers/isdn/mISDN/dsp_audio.c
new file mode 100644
index 000000000000..1c2dd5694773
--- /dev/null
+++ b/drivers/isdn/mISDN/dsp_audio.c
@@ -0,0 +1,434 @@
+/*
+ * Audio support data for mISDN_dsp.
+ *
+ * Copyright 2002/2003 by Andreas Eversberg (jolly@eversberg.eu)
+ * Rewritten by Peter
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/mISDNif.h>
+#include <linux/mISDNdsp.h>
+#include "core.h"
+#include "dsp.h"
+
+/* ulaw[unsigned char] -> signed 16-bit */
+s32 dsp_audio_ulaw_to_s32[256];
+/* alaw[unsigned char] -> signed 16-bit */
+s32 dsp_audio_alaw_to_s32[256];
+
+s32 *dsp_audio_law_to_s32;
+EXPORT_SYMBOL(dsp_audio_law_to_s32);
+
+/* signed 16-bit -> law */
+u8 dsp_audio_s16_to_law[65536];
+EXPORT_SYMBOL(dsp_audio_s16_to_law);
+
+/* alaw -> ulaw */
+u8 dsp_audio_alaw_to_ulaw[256];
+/* ulaw -> alaw */
+u8 dsp_audio_ulaw_to_alaw[256];
+u8 dsp_silence;
+
+
+/*****************************************************
+ * generate table for conversion of s16 to alaw/ulaw *
+ *****************************************************/
+
+#define AMI_MASK 0x55
+
+static inline unsigned char linear2alaw(short int linear)
+{
+	int mask;
+	int seg;
+	int pcm_val;
+	static int seg_end[8] = {
+		0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF
+	};
+
+	pcm_val = linear;
+	if (pcm_val >= 0) {
+		/* Sign (7th) bit = 1 */
+		mask = AMI_MASK | 0x80;
+	} else {
+		/* Sign bit = 0 */
+		mask = AMI_MASK;
+		pcm_val = -pcm_val;
+	}
+
+	/* Convert the scaled magnitude to segment number. */
+	for (seg = 0;  seg < 8;  seg++) {
+		if (pcm_val <= seg_end[seg])
+			break;
+	}
+	/* Combine the sign, segment, and quantization bits. */
+	return  ((seg << 4) |
+		 ((pcm_val >> ((seg)  ?  (seg + 3)  :  4)) & 0x0F)) ^ mask;
+}
+
+
+static inline short int alaw2linear(unsigned char alaw)
+{
+	int i;
+	int seg;
+
+	alaw ^= AMI_MASK;
+	i = ((alaw & 0x0F) << 4) + 8 /* rounding error */;
+	seg = (((int) alaw & 0x70) >> 4);
+	if (seg)
+		i = (i + 0x100) << (seg - 1);
+	return (short int) ((alaw & 0x80)  ?  i  :  -i);
+}
+
+static inline short int ulaw2linear(unsigned char ulaw)
+{
+	short mu, e, f, y;
+	static short etab[] = {0, 132, 396, 924, 1980, 4092, 8316, 16764};
+
+	mu = 255 - ulaw;
+	e = (mu & 0x70) / 16;
+	f = mu & 0x0f;
+	y = f * (1 << (e + 3));
+	y += etab[e];
+	if (mu & 0x80)
+		y = -y;
+	return y;
+}
+
+#define BIAS 0x84   /*!< define the add-in bias for 16 bit samples */
+
+static unsigned char linear2ulaw(short sample)
+{
+	static int exp_lut[256] = {
+		0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+		4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+		5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+		5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7};
+	int sign, exponent, mantissa;
+	unsigned char ulawbyte;
+
+	/* Get the sample into sign-magnitude. */
+	sign = (sample >> 8) & 0x80;	  /* set aside the sign */
+	if (sign != 0)
+		sample = -sample;	      /* get magnitude */
+
+	/* Convert from 16 bit linear to ulaw. */
+	sample = sample + BIAS;
+	exponent = exp_lut[(sample >> 7) & 0xFF];
+	mantissa = (sample >> (exponent + 3)) & 0x0F;
+	ulawbyte = ~(sign | (exponent << 4) | mantissa);
+
+	return ulawbyte;
+}
+
+static int reverse_bits(int i)
+{
+	int z, j;
+	z = 0;
+
+	for (j = 0; j < 8; j++) {
+		if ((i & (1 << j)) != 0)
+			z |= 1 << (7 - j);
+	}
+	return z;
+}
+
+
+void dsp_audio_generate_law_tables(void)
+{
+	int i;
+	for (i = 0; i < 256; i++)
+		dsp_audio_alaw_to_s32[i] = alaw2linear(reverse_bits(i));
+
+	for (i = 0; i < 256; i++)
+		dsp_audio_ulaw_to_s32[i] = ulaw2linear(reverse_bits(i));
+
+	for (i = 0; i < 256; i++) {
+		dsp_audio_alaw_to_ulaw[i] =
+			linear2ulaw(dsp_audio_alaw_to_s32[i]);
+		dsp_audio_ulaw_to_alaw[i] =
+			linear2alaw(dsp_audio_ulaw_to_s32[i]);
+	}
+}
+
+void
+dsp_audio_generate_s2law_table(void)
+{
+	int i;
+
+	if (dsp_options & DSP_OPT_ULAW) {
+		/* generating ulaw-table */
+		for (i = -32768; i < 32768; i++) {
+			dsp_audio_s16_to_law[i & 0xffff] =
+				reverse_bits(linear2ulaw(i));
+		}
+	} else {
+		/* generating alaw-table */
+		for (i = -32768; i < 32768; i++) {
+			dsp_audio_s16_to_law[i & 0xffff] =
+				reverse_bits(linear2alaw(i));
+		}
+	}
+}
+
+
+/*
+ * the seven bit sample is the number of every second alaw-sample ordered by
+ * aplitude. 0x00 is negative, 0x7f is positive amplitude.
+ */
+u8 dsp_audio_seven2law[128];
+u8 dsp_audio_law2seven[256];
+
+/********************************************************************
+ * generate table for conversion law from/to 7-bit alaw-like sample *
+ ********************************************************************/
+
+void
+dsp_audio_generate_seven(void)
+{
+	int i, j, k;
+	u8 spl;
+	u8 sorted_alaw[256];
+
+	/* generate alaw table, sorted by the linear value */
+	for (i = 0; i < 256; i++) {
+		j = 0;
+		for (k = 0; k < 256; k++) {
+			if (dsp_audio_alaw_to_s32[k]
+				< dsp_audio_alaw_to_s32[i]) {
+			j++;
+			}
+		}
+		sorted_alaw[j] = i;
+	}
+
+	/* generate tabels */
+	for (i = 0; i < 256; i++) {
+		/* spl is the source: the law-sample (converted to alaw) */
+		spl = i;
+		if (dsp_options & DSP_OPT_ULAW)
+			spl = dsp_audio_ulaw_to_alaw[i];
+		/* find the 7-bit-sample */
+		for (j = 0; j < 256; j++) {
+			if (sorted_alaw[j] == spl)
+				break;
+		}
+		/* write 7-bit audio value */
+		dsp_audio_law2seven[i] = j >> 1;
+	}
+	for (i = 0; i < 128; i++) {
+		spl = sorted_alaw[i << 1];
+		if (dsp_options & DSP_OPT_ULAW)
+			spl = dsp_audio_alaw_to_ulaw[spl];
+		dsp_audio_seven2law[i] = spl;
+	}
+}
+
+
+/* mix 2*law -> law */
+u8 dsp_audio_mix_law[65536];
+
+/******************************************************
+ * generate mix table to mix two law samples into one *
+ ******************************************************/
+
+void
+dsp_audio_generate_mix_table(void)
+{
+	int i, j;
+	s32 sample;
+
+	i = 0;
+	while (i < 256) {
+		j = 0;
+		while (j < 256) {
+			sample = dsp_audio_law_to_s32[i];
+			sample += dsp_audio_law_to_s32[j];
+			if (sample > 32767)
+				sample = 32767;
+			if (sample < -32768)
+				sample = -32768;
+			dsp_audio_mix_law[(i<<8)|j] =
+				dsp_audio_s16_to_law[sample & 0xffff];
+			j++;
+		}
+		i++;
+	}
+}
+
+
+/*************************************
+ * generate different volume changes *
+ *************************************/
+
+static u8 dsp_audio_reduce8[256];
+static u8 dsp_audio_reduce7[256];
+static u8 dsp_audio_reduce6[256];
+static u8 dsp_audio_reduce5[256];
+static u8 dsp_audio_reduce4[256];
+static u8 dsp_audio_reduce3[256];
+static u8 dsp_audio_reduce2[256];
+static u8 dsp_audio_reduce1[256];
+static u8 dsp_audio_increase1[256];
+static u8 dsp_audio_increase2[256];
+static u8 dsp_audio_increase3[256];
+static u8 dsp_audio_increase4[256];
+static u8 dsp_audio_increase5[256];
+static u8 dsp_audio_increase6[256];
+static u8 dsp_audio_increase7[256];
+static u8 dsp_audio_increase8[256];
+
+static u8 *dsp_audio_volume_change[16] = {
+	dsp_audio_reduce8,
+	dsp_audio_reduce7,
+	dsp_audio_reduce6,
+	dsp_audio_reduce5,
+	dsp_audio_reduce4,
+	dsp_audio_reduce3,
+	dsp_audio_reduce2,
+	dsp_audio_reduce1,
+	dsp_audio_increase1,
+	dsp_audio_increase2,
+	dsp_audio_increase3,
+	dsp_audio_increase4,
+	dsp_audio_increase5,
+	dsp_audio_increase6,
+	dsp_audio_increase7,
+	dsp_audio_increase8,
+};
+
+void
+dsp_audio_generate_volume_changes(void)
+{
+	register s32 sample;
+	int i;
+	int num[]   = { 110, 125, 150, 175, 200, 300, 400, 500 };
+	int denum[] = { 100, 100, 100, 100, 100, 100, 100, 100 };
+
+	i = 0;
+	while (i < 256) {
+		dsp_audio_reduce8[i] = dsp_audio_s16_to_law[
+			(dsp_audio_law_to_s32[i] * denum[7] / num[7]) & 0xffff];
+		dsp_audio_reduce7[i] = dsp_audio_s16_to_law[
+			(dsp_audio_law_to_s32[i] * denum[6] / num[6]) & 0xffff];
+		dsp_audio_reduce6[i] = dsp_audio_s16_to_law[
+			(dsp_audio_law_to_s32[i] * denum[5] / num[5]) & 0xffff];
+		dsp_audio_reduce5[i] = dsp_audio_s16_to_law[
+			(dsp_audio_law_to_s32[i] * denum[4] / num[4]) & 0xffff];
+		dsp_audio_reduce4[i] = dsp_audio_s16_to_law[
+			(dsp_audio_law_to_s32[i] * denum[3] / num[3]) & 0xffff];
+		dsp_audio_reduce3[i] = dsp_audio_s16_to_law[
+			(dsp_audio_law_to_s32[i] * denum[2] / num[2]) & 0xffff];
+		dsp_audio_reduce2[i] = dsp_audio_s16_to_law[
+			(dsp_audio_law_to_s32[i] * denum[1] / num[1]) & 0xffff];
+		dsp_audio_reduce1[i] = dsp_audio_s16_to_law[
+			(dsp_audio_law_to_s32[i] * denum[0] / num[0]) & 0xffff];
+		sample = dsp_audio_law_to_s32[i] * num[0] / denum[0];
+		if (sample < -32768)
+			sample = -32768;
+		else if (sample > 32767)
+			sample = 32767;
+		dsp_audio_increase1[i] = dsp_audio_s16_to_law[sample & 0xffff];
+		sample = dsp_audio_law_to_s32[i] * num[1] / denum[1];
+		if (sample < -32768)
+			sample = -32768;
+		else if (sample > 32767)
+			sample = 32767;
+		dsp_audio_increase2[i] = dsp_audio_s16_to_law[sample & 0xffff];
+		sample = dsp_audio_law_to_s32[i] * num[2] / denum[2];
+		if (sample < -32768)
+			sample = -32768;
+		else if (sample > 32767)
+			sample = 32767;
+		dsp_audio_increase3[i] = dsp_audio_s16_to_law[sample & 0xffff];
+		sample = dsp_audio_law_to_s32[i] * num[3] / denum[3];
+		if (sample < -32768)
+			sample = -32768;
+		else if (sample > 32767)
+			sample = 32767;
+		dsp_audio_increase4[i] = dsp_audio_s16_to_law[sample & 0xffff];
+		sample = dsp_audio_law_to_s32[i] * num[4] / denum[4];
+		if (sample < -32768)
+			sample = -32768;
+		else if (sample > 32767)
+			sample = 32767;
+		dsp_audio_increase5[i] = dsp_audio_s16_to_law[sample & 0xffff];
+		sample = dsp_audio_law_to_s32[i] * num[5] / denum[5];
+		if (sample < -32768)
+			sample = -32768;
+		else if (sample > 32767)
+			sample = 32767;
+		dsp_audio_increase6[i] = dsp_audio_s16_to_law[sample & 0xffff];
+		sample = dsp_audio_law_to_s32[i] * num[6] / denum[6];
+		if (sample < -32768)
+			sample = -32768;
+		else if (sample > 32767)
+			sample = 32767;
+		dsp_audio_increase7[i] = dsp_audio_s16_to_law[sample & 0xffff];
+		sample = dsp_audio_law_to_s32[i] * num[7] / denum[7];
+		if (sample < -32768)
+			sample = -32768;
+		else if (sample > 32767)
+			sample = 32767;
+		dsp_audio_increase8[i] = dsp_audio_s16_to_law[sample & 0xffff];
+
+		i++;
+	}
+}
+
+
+/**************************************
+ * change the volume of the given skb *
+ **************************************/
+
+/* this is a helper function for changing volume of skb. the range may be
+ * -8 to 8, which is a shift to the power of 2. 0 == no volume, 3 == volume*8
+ */
+void
+dsp_change_volume(struct sk_buff *skb, int volume)
+{
+	u8 *volume_change;
+	int i, ii;
+	u8 *p;
+	int shift;
+
+	if (volume == 0)
+		return;
+
+	/* get correct conversion table */
+	if (volume < 0) {
+		shift = volume + 8;
+		if (shift < 0)
+			shift = 0;
+	} else {
+		shift = volume + 7;
+		if (shift > 15)
+			shift = 15;
+	}
+	volume_change = dsp_audio_volume_change[shift];
+	i = 0;
+	ii = skb->len;
+	p = skb->data;
+	/* change volume */
+	while (i < ii) {
+		*p = volume_change[*p];
+		p++;
+		i++;
+	}
+}
+
diff --git a/drivers/isdn/mISDN/dsp_biquad.h b/drivers/isdn/mISDN/dsp_biquad.h
new file mode 100644
index 000000000000..038191bc45f5
--- /dev/null
+++ b/drivers/isdn/mISDN/dsp_biquad.h
@@ -0,0 +1,65 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * biquad.h - General telephony bi-quad section routines (currently this just
+ *            handles canonic/type 2 form)
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *
+ * Copyright (C) 2001 Steve Underwood
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+struct biquad2_state {
+	int32_t gain;
+	int32_t a1;
+	int32_t a2;
+	int32_t b1;
+	int32_t b2;
+
+	int32_t z1;
+	int32_t z2;
+};
+
+static inline void biquad2_init(struct biquad2_state *bq,
+    int32_t gain, int32_t a1, int32_t a2, int32_t b1, int32_t b2)
+{
+	bq->gain = gain;
+	bq->a1 = a1;
+	bq->a2 = a2;
+	bq->b1 = b1;
+	bq->b2 = b2;
+
+	bq->z1 = 0;
+	bq->z2 = 0;
+}
+
+static inline int16_t biquad2(struct biquad2_state *bq, int16_t sample)
+{
+	int32_t y;
+	int32_t z0;
+
+	z0 = sample*bq->gain + bq->z1*bq->a1 + bq->z2*bq->a2;
+	y = z0 + bq->z1*bq->b1 + bq->z2*bq->b2;
+
+	bq->z2 = bq->z1;
+	bq->z1 = z0 >> 15;
+	y >>= 15;
+	return  y;
+}
diff --git a/drivers/isdn/mISDN/dsp_blowfish.c b/drivers/isdn/mISDN/dsp_blowfish.c
new file mode 100644
index 000000000000..18e411e95bba
--- /dev/null
+++ b/drivers/isdn/mISDN/dsp_blowfish.c
@@ -0,0 +1,672 @@
+/*
+ * Blowfish encryption/decryption for mISDN_dsp.
+ *
+ * Copyright Andreas Eversberg (jolly@eversberg.eu)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ *
+ */
+
+#include <linux/mISDNif.h>
+#include <linux/mISDNdsp.h>
+#include "core.h"
+#include "dsp.h"
+
+/*
+ * how to encode a sample stream to 64-bit blocks that will be encryped
+ *
+ * first of all, data is collected until a block of 9 samples are received.
+ * of course, a packet may have much more than 9 sample, but is may have
+ * not excacly the multiple of 9 samples. if there is a rest, the next
+ * received data will complete the block.
+ *
+ * the block is then converted to 9 uLAW samples without the least sigificant
+ * bit. the result is a 7-bit encoded sample.
+ *
+ * the samples will be reoganised to form 8 bytes of data:
+ * (5(6) means: encoded sample no. 5, bit 6)
+ *
+ * 0(6) 0(5) 0(4) 0(3) 0(2) 0(1) 0(0) 1(6)
+ * 1(5) 1(4) 1(3) 1(2) 1(1) 1(0) 2(6) 2(5)
+ * 2(4) 2(3) 2(2) 2(1) 2(0) 3(6) 3(5) 3(4)
+ * 3(3) 3(2) 3(1) 3(0) 4(6) 4(5) 4(4) 4(3)
+ * 4(2) 4(1) 4(0) 5(6) 5(5) 5(4) 5(3) 5(2)
+ * 5(1) 5(0) 6(6) 6(5) 6(4) 6(3) 6(2) 6(1)
+ * 6(0) 7(6) 7(5) 7(4) 7(3) 7(2) 7(1) 7(0)
+ * 8(6) 8(5) 8(4) 8(3) 8(2) 8(1) 8(0)
+ *
+ * the missing bit 0 of the last byte is filled with some
+ * random noise, to fill all 8 bytes.
+ *
+ * the 8 bytes will be encrypted using blowfish.
+ *
+ * the result will be converted into 9 bytes. the bit 7 is used for
+ * checksumme (CS) for sync (0, 1) and for the last bit:
+ * (5(6) means: crypted byte 5, bit 6)
+ *
+ * 1    0(7) 0(6) 0(5) 0(4) 0(3) 0(2) 0(1)
+ * 0    0(0) 1(7) 1(6) 1(5) 1(4) 1(3) 1(2)
+ * 0    1(1) 1(0) 2(7) 2(6) 2(5) 2(4) 2(3)
+ * 0    2(2) 2(1) 2(0) 3(7) 3(6) 3(5) 3(4)
+ * 0    3(3) 3(2) 3(1) 3(0) 4(7) 4(6) 4(5)
+ * CS   4(4) 4(3) 4(2) 4(1) 4(0) 5(7) 5(6)
+ * CS   5(5) 5(4) 5(3) 5(2) 5(1) 5(0) 6(7)
+ * CS   6(6) 6(5) 6(4) 6(3) 6(2) 6(1) 6(0)
+ * 7(0) 7(6) 7(5) 7(4) 7(3) 7(2) 7(1) 7(0)
+ *
+ * the checksum is used to detect transmission errors and frame drops.
+ *
+ * synchronisation of received block is done by shifting the upper bit of each
+ * byte (bit 7) to a shift register. if the rigister has the first five bits
+ * (10000), this is used to find the sync. only if sync has been found, the
+ * current block of 9 received bytes are decrypted. before that the check
+ * sum is calculated. if it is incorrect the block is dropped.
+ * this will avoid loud noise due to corrupt encrypted data.
+ *
+ * if the last block is corrupt, the current decoded block is repeated
+ * until a valid block has been received.
+ */
+
+/*
+ *  some blowfish parts are taken from the
+ * crypto-api for faster implementation
+ */
+
+struct bf_ctx {
+	u32 p[18];
+	u32 s[1024];
+};
+
+static const u32 bf_pbox[16 + 2] = {
+	0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
+	0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
+	0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
+	0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917,
+	0x9216d5d9, 0x8979fb1b,
+};
+
+static const u32 bf_sbox[256 * 4] = {
+	0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
+	0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
+	0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
+	0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e,
+	0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee,
+	0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
+	0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef,
+	0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e,
+	0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
+	0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440,
+	0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce,
+	0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
+	0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e,
+	0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677,
+	0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
+	0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032,
+	0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88,
+	0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
+	0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e,
+	0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0,
+	0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
+	0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98,
+	0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88,
+	0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
+	0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6,
+	0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d,
+	0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
+	0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7,
+	0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba,
+	0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
+	0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f,
+	0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09,
+	0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
+	0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb,
+	0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279,
+	0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
+	0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab,
+	0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82,
+	0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
+	0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573,
+	0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0,
+	0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
+	0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790,
+	0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8,
+	0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
+	0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0,
+	0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7,
+	0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
+	0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad,
+	0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1,
+	0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
+	0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9,
+	0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477,
+	0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
+	0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49,
+	0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af,
+	0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
+	0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5,
+	0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41,
+	0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
+	0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400,
+	0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915,
+	0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
+	0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a,
+	0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
+	0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
+	0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
+	0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e,
+	0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6,
+	0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
+	0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e,
+	0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1,
+	0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
+	0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8,
+	0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff,
+	0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
+	0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701,
+	0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7,
+	0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
+	0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331,
+	0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf,
+	0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
+	0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e,
+	0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87,
+	0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
+	0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2,
+	0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16,
+	0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
+	0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b,
+	0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509,
+	0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
+	0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3,
+	0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f,
+	0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
+	0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4,
+	0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960,
+	0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
+	0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28,
+	0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802,
+	0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
+	0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510,
+	0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf,
+	0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
+	0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e,
+	0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50,
+	0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
+	0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8,
+	0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281,
+	0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
+	0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696,
+	0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128,
+	0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
+	0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0,
+	0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0,
+	0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
+	0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250,
+	0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3,
+	0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
+	0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00,
+	0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061,
+	0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
+	0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e,
+	0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735,
+	0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
+	0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9,
+	0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340,
+	0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
+	0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7,
+	0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
+	0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
+	0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
+	0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840,
+	0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45,
+	0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
+	0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a,
+	0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb,
+	0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
+	0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6,
+	0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42,
+	0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
+	0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2,
+	0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb,
+	0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
+	0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b,
+	0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33,
+	0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
+	0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3,
+	0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc,
+	0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
+	0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564,
+	0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b,
+	0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
+	0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922,
+	0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728,
+	0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
+	0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e,
+	0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37,
+	0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
+	0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804,
+	0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b,
+	0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
+	0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb,
+	0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d,
+	0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
+	0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350,
+	0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9,
+	0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
+	0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe,
+	0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d,
+	0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
+	0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f,
+	0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61,
+	0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
+	0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9,
+	0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2,
+	0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
+	0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e,
+	0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633,
+	0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
+	0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169,
+	0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52,
+	0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
+	0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5,
+	0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62,
+	0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
+	0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76,
+	0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24,
+	0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
+	0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4,
+	0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c,
+	0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
+	0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0,
+	0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
+	0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
+	0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
+	0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4,
+	0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8,
+	0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
+	0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304,
+	0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22,
+	0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
+	0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6,
+	0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9,
+	0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
+	0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593,
+	0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51,
+	0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
+	0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c,
+	0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b,
+	0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
+	0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c,
+	0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd,
+	0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
+	0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319,
+	0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb,
+	0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
+	0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991,
+	0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32,
+	0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
+	0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166,
+	0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae,
+	0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
+	0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5,
+	0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47,
+	0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
+	0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d,
+	0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84,
+	0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
+	0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8,
+	0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd,
+	0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
+	0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7,
+	0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38,
+	0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
+	0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c,
+	0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525,
+	0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
+	0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442,
+	0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964,
+	0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
+	0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8,
+	0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d,
+	0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
+	0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299,
+	0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02,
+	0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
+	0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614,
+	0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a,
+	0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
+	0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b,
+	0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0,
+	0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
+	0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e,
+	0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9,
+	0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
+	0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6,
+};
+
+/*
+ * Round loop unrolling macros, S is a pointer to a S-Box array
+ * organized in 4 unsigned longs at a row.
+ */
+#define GET32_3(x) (((x) & 0xff))
+#define GET32_2(x) (((x) >> (8)) & (0xff))
+#define GET32_1(x) (((x) >> (16)) & (0xff))
+#define GET32_0(x) (((x) >> (24)) & (0xff))
+
+#define bf_F(x) (((S[GET32_0(x)] + S[256 + GET32_1(x)]) ^ \
+    S[512 + GET32_2(x)]) + S[768 + GET32_3(x)])
+
+#define EROUND(a, b, n)  do { b ^= P[n]; a ^= bf_F(b); } while (0)
+#define DROUND(a, b, n)  do { a ^= bf_F(b); b ^= P[n]; } while (0)
+
+
+/*
+ * encrypt isdn data frame
+ * every block with 9 samples is encrypted
+ */
+void
+dsp_bf_encrypt(struct dsp *dsp, u8 *data, int len)
+{
+	int i = 0, j = dsp->bf_crypt_pos;
+	u8 *bf_data_in = dsp->bf_data_in;
+	u8 *bf_crypt_out = dsp->bf_crypt_out;
+	u32 *P = dsp->bf_p;
+	u32 *S = dsp->bf_s;
+	u32 yl, yr;
+	u32 cs;
+	u8 nibble;
+
+	while (i < len) {
+		/* collect a block of 9 samples */
+		if (j < 9) {
+			bf_data_in[j] = *data;
+			*data++ = bf_crypt_out[j++];
+			i++;
+			continue;
+		}
+		j = 0;
+		/* transcode 9 samples xlaw to 8 bytes */
+		yl = dsp_audio_law2seven[bf_data_in[0]];
+		yl = (yl<<7) | dsp_audio_law2seven[bf_data_in[1]];
+		yl = (yl<<7) | dsp_audio_law2seven[bf_data_in[2]];
+		yl = (yl<<7) | dsp_audio_law2seven[bf_data_in[3]];
+		nibble = dsp_audio_law2seven[bf_data_in[4]];
+		yr = nibble;
+		yl = (yl<<4) | (nibble>>3);
+		yr = (yr<<7) | dsp_audio_law2seven[bf_data_in[5]];
+		yr = (yr<<7) | dsp_audio_law2seven[bf_data_in[6]];
+		yr = (yr<<7) | dsp_audio_law2seven[bf_data_in[7]];
+		yr = (yr<<7) | dsp_audio_law2seven[bf_data_in[8]];
+		yr = (yr<<1) | (bf_data_in[0] & 1);
+
+		/* fill unused bit with random noise of audio input */
+		/* encrypt */
+
+		EROUND(yr, yl, 0);
+		EROUND(yl, yr, 1);
+		EROUND(yr, yl, 2);
+		EROUND(yl, yr, 3);
+		EROUND(yr, yl, 4);
+		EROUND(yl, yr, 5);
+		EROUND(yr, yl, 6);
+		EROUND(yl, yr, 7);
+		EROUND(yr, yl, 8);
+		EROUND(yl, yr, 9);
+		EROUND(yr, yl, 10);
+		EROUND(yl, yr, 11);
+		EROUND(yr, yl, 12);
+		EROUND(yl, yr, 13);
+		EROUND(yr, yl, 14);
+		EROUND(yl, yr, 15);
+		yl ^= P[16];
+		yr ^= P[17];
+
+		/* calculate 3-bit checksumme */
+		cs = yl ^ (yl>>3) ^ (yl>>6) ^ (yl>>9) ^ (yl>>12) ^ (yl>>15)
+			^ (yl>>18) ^ (yl>>21) ^ (yl>>24) ^ (yl>>27) ^ (yl>>30)
+			^ (yr<<2) ^ (yr>>1) ^ (yr>>4) ^ (yr>>7) ^ (yr>>10)
+			^ (yr>>13) ^ (yr>>16) ^ (yr>>19) ^ (yr>>22) ^ (yr>>25)
+			^ (yr>>28) ^ (yr>>31);
+
+		/*
+		 * transcode 8 crypted bytes to 9 data bytes with sync
+		 * and checksum information
+		 */
+		bf_crypt_out[0] = (yl>>25) | 0x80;
+		bf_crypt_out[1] = (yl>>18) & 0x7f;
+		bf_crypt_out[2] = (yl>>11) & 0x7f;
+		bf_crypt_out[3] = (yl>>4) & 0x7f;
+		bf_crypt_out[4] = ((yl<<3) & 0x78) | ((yr>>29) & 0x07);
+		bf_crypt_out[5] = ((yr>>22) & 0x7f) | ((cs<<5) & 0x80);
+		bf_crypt_out[6] = ((yr>>15) & 0x7f) | ((cs<<6) & 0x80);
+		bf_crypt_out[7] = ((yr>>8) & 0x7f) | (cs<<7);
+		bf_crypt_out[8] = yr;
+	}
+
+	/* write current count */
+	dsp->bf_crypt_pos = j;
+
+}
+
+
+/*
+ * decrypt isdn data frame
+ * every block with 9 bytes is decrypted
+ */
+void
+dsp_bf_decrypt(struct dsp *dsp, u8 *data, int len)
+{
+	int i = 0;
+	u8 j = dsp->bf_decrypt_in_pos;
+	u8 k = dsp->bf_decrypt_out_pos;
+	u8 *bf_crypt_inring = dsp->bf_crypt_inring;
+	u8 *bf_data_out = dsp->bf_data_out;
+	u16 sync = dsp->bf_sync;
+	u32 *P = dsp->bf_p;
+	u32 *S = dsp->bf_s;
+	u32 yl, yr;
+	u8 nibble;
+	u8 cs, cs0, cs1, cs2;
+
+	while (i < len) {
+		/*
+		 * shift upper bit and rotate data to buffer ring
+		 * send current decrypted data
+		 */
+		sync = (sync<<1) | ((*data)>>7);
+		bf_crypt_inring[j++ & 15] = *data;
+		*data++ = bf_data_out[k++];
+		i++;
+		if (k == 9)
+			k = 0; /* repeat if no sync has been found */
+		/* check if not in sync */
+		if ((sync&0x1f0) != 0x100)
+			continue;
+		j -= 9;
+		/* transcode receive data to 64 bit block of encrypted data */
+		yl = bf_crypt_inring[j++ & 15];
+		yl = (yl<<7) | bf_crypt_inring[j++ & 15]; /* bit7 = 0 */
+		yl = (yl<<7) | bf_crypt_inring[j++ & 15]; /* bit7 = 0 */
+		yl = (yl<<7) | bf_crypt_inring[j++ & 15]; /* bit7 = 0 */
+		nibble = bf_crypt_inring[j++ & 15]; /* bit7 = 0 */
+		yr = nibble;
+		yl = (yl<<4) | (nibble>>3);
+		cs2 = bf_crypt_inring[j++ & 15];
+		yr = (yr<<7) | (cs2 & 0x7f);
+		cs1 = bf_crypt_inring[j++ & 15];
+		yr = (yr<<7) | (cs1 & 0x7f);
+		cs0 = bf_crypt_inring[j++ & 15];
+		yr = (yr<<7) | (cs0 & 0x7f);
+		yr = (yr<<8) | bf_crypt_inring[j++ & 15];
+
+		/* calculate 3-bit checksumme */
+		cs = yl ^ (yl>>3) ^ (yl>>6) ^ (yl>>9) ^ (yl>>12) ^ (yl>>15)
+			^ (yl>>18) ^ (yl>>21) ^ (yl>>24) ^ (yl>>27) ^ (yl>>30)
+			^ (yr<<2) ^ (yr>>1) ^ (yr>>4) ^ (yr>>7) ^ (yr>>10)
+			^ (yr>>13) ^ (yr>>16) ^ (yr>>19) ^ (yr>>22) ^ (yr>>25)
+			^ (yr>>28) ^ (yr>>31);
+
+		/* check if frame is valid */
+		if ((cs&0x7) != (((cs2>>5)&4) | ((cs1>>6)&2) | (cs0 >> 7))) {
+			if (dsp_debug & DEBUG_DSP_BLOWFISH)
+				printk(KERN_DEBUG
+				    "DSP BLOWFISH: received corrupt frame, "
+				    "checksumme is not correct\n");
+			continue;
+		}
+
+		/* decrypt */
+		yr ^= P[17];
+		yl ^= P[16];
+		DROUND(yl, yr, 15);
+		DROUND(yr, yl, 14);
+		DROUND(yl, yr, 13);
+		DROUND(yr, yl, 12);
+		DROUND(yl, yr, 11);
+		DROUND(yr, yl, 10);
+		DROUND(yl, yr, 9);
+		DROUND(yr, yl, 8);
+		DROUND(yl, yr, 7);
+		DROUND(yr, yl, 6);
+		DROUND(yl, yr, 5);
+		DROUND(yr, yl, 4);
+		DROUND(yl, yr, 3);
+		DROUND(yr, yl, 2);
+		DROUND(yl, yr, 1);
+		DROUND(yr, yl, 0);
+
+		/* transcode 8 crypted bytes to 9 sample bytes */
+		bf_data_out[0] = dsp_audio_seven2law[(yl>>25) & 0x7f];
+		bf_data_out[1] = dsp_audio_seven2law[(yl>>18) & 0x7f];
+		bf_data_out[2] = dsp_audio_seven2law[(yl>>11) & 0x7f];
+		bf_data_out[3] = dsp_audio_seven2law[(yl>>4) & 0x7f];
+		bf_data_out[4] = dsp_audio_seven2law[((yl<<3) & 0x78) |
+		    ((yr>>29) & 0x07)];
+
+		bf_data_out[5] = dsp_audio_seven2law[(yr>>22) & 0x7f];
+		bf_data_out[6] = dsp_audio_seven2law[(yr>>15) & 0x7f];
+		bf_data_out[7] = dsp_audio_seven2law[(yr>>8) & 0x7f];
+		bf_data_out[8] = dsp_audio_seven2law[(yr>>1) & 0x7f];
+		k = 0; /* start with new decoded frame */
+	}
+
+	/* write current count and sync */
+	dsp->bf_decrypt_in_pos = j;
+	dsp->bf_decrypt_out_pos = k;
+	dsp->bf_sync = sync;
+}
+
+
+/* used to encrypt S and P boxes */
+static inline void
+encrypt_block(const u32 *P, const u32 *S, u32 *dst, u32 *src)
+{
+	u32 yl = src[0];
+	u32 yr = src[1];
+
+	EROUND(yr, yl, 0);
+	EROUND(yl, yr, 1);
+	EROUND(yr, yl, 2);
+	EROUND(yl, yr, 3);
+	EROUND(yr, yl, 4);
+	EROUND(yl, yr, 5);
+	EROUND(yr, yl, 6);
+	EROUND(yl, yr, 7);
+	EROUND(yr, yl, 8);
+	EROUND(yl, yr, 9);
+	EROUND(yr, yl, 10);
+	EROUND(yl, yr, 11);
+	EROUND(yr, yl, 12);
+	EROUND(yl, yr, 13);
+	EROUND(yr, yl, 14);
+	EROUND(yl, yr, 15);
+
+	yl ^= P[16];
+	yr ^= P[17];
+
+	dst[0] = yr;
+	dst[1] = yl;
+}
+
+/*
+ * initialize the dsp for encryption and decryption using the same key
+ * Calculates the blowfish S and P boxes for encryption and decryption.
+ * The margin of keylen must be 4-56 bytes.
+ * returns 0 if ok.
+ */
+int
+dsp_bf_init(struct dsp *dsp, const u8 *key, uint keylen)
+{
+	short i, j, count;
+	u32 data[2], temp;
+	u32 *P = (u32 *)dsp->bf_p;
+	u32 *S = (u32 *)dsp->bf_s;
+
+	if (keylen < 4 || keylen > 56)
+		return 1;
+
+	/* Set dsp states */
+	i = 0;
+	while (i < 9) {
+		dsp->bf_crypt_out[i] = 0xff;
+		dsp->bf_data_out[i] = dsp_silence;
+		i++;
+	}
+	dsp->bf_crypt_pos = 0;
+	dsp->bf_decrypt_in_pos = 0;
+	dsp->bf_decrypt_out_pos = 0;
+	dsp->bf_sync = 0x1ff;
+	dsp->bf_enable = 1;
+
+	/* Copy the initialization s-boxes */
+	for (i = 0, count = 0; i < 256; i++)
+		for (j = 0; j < 4; j++, count++)
+			S[count] = bf_sbox[count];
+
+	/* Set the p-boxes */
+	for (i = 0; i < 16 + 2; i++)
+		P[i] = bf_pbox[i];
+
+	/* Actual subkey generation */
+	for (j = 0, i = 0; i < 16 + 2; i++) {
+		temp = (((u32)key[j] << 24) |
+		    ((u32)key[(j + 1) % keylen] << 16) |
+		    ((u32)key[(j + 2) % keylen] << 8) |
+		    ((u32)key[(j + 3) % keylen]));
+
+		P[i] = P[i] ^ temp;
+		j = (j + 4) % keylen;
+	}
+
+	data[0] = 0x00000000;
+	data[1] = 0x00000000;
+
+	for (i = 0; i < 16 + 2; i += 2) {
+		encrypt_block(P, S, data, data);
+
+		P[i] = data[0];
+		P[i + 1] = data[1];
+	}
+
+	for (i = 0; i < 4; i++) {
+		for (j = 0, count = i * 256; j < 256; j += 2, count += 2) {
+			encrypt_block(P, S, data, data);
+
+			S[count] = data[0];
+			S[count + 1] = data[1];
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+ * turn encryption off
+ */
+void
+dsp_bf_cleanup(struct dsp *dsp)
+{
+	dsp->bf_enable = 0;
+}
diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c
new file mode 100644
index 000000000000..e92b1ba4b45e
--- /dev/null
+++ b/drivers/isdn/mISDN/dsp_cmx.c
@@ -0,0 +1,1886 @@
+/*
+ * Audio crossconnecting/conferrencing (hardware level).
+ *
+ * Copyright 2002 by Andreas Eversberg (jolly@eversberg.eu)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ *
+ */
+
+/*
+ * The process of adding and removing parties to/from a conference:
+ *
+ * There is a chain of struct dsp_conf which has one or more members in a chain
+ * of struct dsp_conf_member.
+ *
+ * After a party is added, the conference is checked for hardware capability.
+ * Also if a party is removed, the conference is checked again.
+ *
+ * There are 3 different solutions: -1 = software, 0 = hardware-crossconnect
+ * 1-n = hardware-conference. The n will give the conference number.
+ *
+ * Depending on the change after removal or insertion of a party, hardware
+ * commands are given.
+ *
+ * The current solution is stored within the struct dsp_conf entry.
+ */
+
+/*
+ * HOW THE CMX WORKS:
+ *
+ * There are 3 types of interaction: One member is alone, in this case only
+ * data flow from upper to lower layer is done.
+ * Two members will also exchange their data so they are crossconnected.
+ * Three or more members will be added in a conference and will hear each
+ * other but will not receive their own speech (echo) if not enabled.
+ *
+ * Features of CMX are:
+ *  - Crossconnecting or even conference, if more than two members are together.
+ *  - Force mixing of transmit data with other crossconnect/conference members.
+ *  - Echo generation to benchmark the delay of audio processing.
+ *  - Use hardware to minimize cpu load, disable FIFO load and minimize delay.
+ *  - Dejittering and clock generation.
+ *
+ * There are 2 buffers:
+ *
+ *
+ * RX-Buffer
+ *                 R             W
+ *                 |             |
+ * ----------------+-------------+-------------------
+ *
+ * The rx-buffer is a ring buffer used to store the received data for each
+ * individual member. This is only the case if data needs to be dejittered
+ * or in case of a conference where different clocks require reclocking.
+ * The transmit-clock (R) will read the buffer.
+ * If the clock overruns the write-pointer, we will have a buffer underrun.
+ * If the write pointer always has a certain distance from the transmit-
+ * clock, we will have a delay. The delay will dynamically be increased and
+ * reduced.
+ *
+ *
+ * TX-Buffer
+ *                  R        W
+ *                  |        |
+ * -----------------+--------+-----------------------
+ *
+ * The tx-buffer is a ring buffer to queue the transmit data from user space
+ * until it will be mixed or sent. There are two pointers, R and W. If the write
+ * pointer W would reach or overrun R, the buffer would overrun. In this case
+ * (some) data is dropped so that it will not overrun.
+ * Additionally a dynamic dejittering can be enabled. this allows data from
+ * user space that have jitter and different clock source.
+ *
+ *
+ * Clock:
+ *
+ * A Clock is not required, if the data source has exactly one clock. In this
+ * case the data source is forwarded to the destination.
+ *
+ * A Clock is required, because the data source
+ *  - has multiple clocks.
+ *  - has no usable clock due to jitter or packet loss (VoIP).
+ * In this case the system's clock is used. The clock resolution depends on
+ * the jiffie resolution.
+ *
+ * If a member joins a conference:
+ *
+ * - If a member joins, its rx_buff is set to silence and change read pointer
+ *   to transmit clock.
+ *
+ * The procedure of received data from card is explained in cmx_receive.
+ * The procedure of received data from user space is explained in cmx_transmit.
+ * The procedure of transmit data to card is cmx_send.
+ *
+ *
+ * Interaction with other features:
+ *
+ * DTMF:
+ * DTMF decoding is done before the data is crossconnected.
+ *
+ * Volume change:
+ * Changing rx-volume is done before the data is crossconnected. The tx-volume
+ * must be changed whenever data is transmitted to the card by the cmx.
+ *
+ * Tones:
+ * If a tone is enabled, it will be processed whenever data is transmitted to
+ * the card. It will replace the tx-data from the user space.
+ * If tones are generated by hardware, this conference member is removed for
+ * this time.
+ *
+ * Disable rx-data:
+ * If cmx is realized in hardware, rx data will be disabled if requested by
+ * the upper layer. If dtmf decoding is done by software and enabled, rx data
+ * will not be diabled but blocked to the upper layer.
+ *
+ * HFC conference engine:
+ * If it is possible to realize all features using hardware, hardware will be
+ * used if not forbidden by control command. Disabling rx-data provides
+ * absolutely traffic free audio processing. (except for the quick 1-frame
+ * upload of a tone loop, only once for a new tone)
+ *
+ */
+
+/* delay.h is required for hw_lock.h */
+
+#include <linux/delay.h>
+#include <linux/mISDNif.h>
+#include <linux/mISDNdsp.h>
+#include "core.h"
+#include "dsp.h"
+/*
+ * debugging of multi party conference,
+ * by using conference even with two members
+ */
+
+/* #define CMX_CONF_DEBUG */
+
+/*#define CMX_DEBUG * massive read/write pointer output */
+/*#define CMX_TX_DEBUG * massive read/write on tx-buffer with content */
+
+static inline int
+count_list_member(struct list_head *head)
+{
+	int			cnt = 0;
+	struct list_head	*m;
+
+	list_for_each(m, head)
+		cnt++;
+	return cnt;
+}
+
+/*
+ * debug cmx memory structure
+ */
+void
+dsp_cmx_debug(struct dsp *dsp)
+{
+	struct dsp_conf	*conf;
+	struct dsp_conf_member	*member;
+	struct dsp		*odsp;
+
+	printk(KERN_DEBUG "-----Current DSP\n");
+	list_for_each_entry(odsp, &dsp_ilist, list) {
+		printk(KERN_DEBUG "* %s echo=%d txmix=%d",
+		    odsp->name, odsp->echo, odsp->tx_mix);
+		if (odsp->conf)
+			printk(" (Conf %d)", odsp->conf->id);
+		if (dsp == odsp)
+			printk(" *this*");
+		printk("\n");
+	}
+	printk(KERN_DEBUG "-----Current Conf:\n");
+	list_for_each_entry(conf, &conf_ilist, list) {
+		printk(KERN_DEBUG "* Conf %d (%p)\n", conf->id, conf);
+		list_for_each_entry(member, &conf->mlist, list) {
+			printk(KERN_DEBUG
+			    "  - member = %s (slot_tx %d, bank_tx %d, "
+			    "slot_rx %d, bank_rx %d hfc_conf %d)%s\n",
+			    member->dsp->name, member->dsp->pcm_slot_tx,
+			    member->dsp->pcm_bank_tx, member->dsp->pcm_slot_rx,
+			    member->dsp->pcm_bank_rx, member->dsp->hfc_conf,
+			    (member->dsp == dsp) ? " *this*" : "");
+		}
+	}
+	printk(KERN_DEBUG "-----end\n");
+}
+
+/*
+ * search conference
+ */
+static struct dsp_conf *
+dsp_cmx_search_conf(u32 id)
+{
+	struct dsp_conf *conf;
+
+	if (!id) {
+		printk(KERN_WARNING "%s: conference ID is 0.\n", __func__);
+		return NULL;
+	}
+
+	/* search conference */
+	list_for_each_entry(conf, &conf_ilist, list)
+		if (conf->id == id)
+			return conf;
+
+	return NULL;
+}
+
+
+/*
+ * add member to conference
+ */
+static int
+dsp_cmx_add_conf_member(struct dsp *dsp, struct dsp_conf *conf)
+{
+	struct dsp_conf_member *member;
+
+	if (!conf || !dsp) {
+		printk(KERN_WARNING "%s: conf or dsp is 0.\n", __func__);
+		return -EINVAL;
+	}
+	if (dsp->member) {
+		printk(KERN_WARNING "%s: dsp is already member in a conf.\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (dsp->conf) {
+		printk(KERN_WARNING "%s: dsp is already in a conf.\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	member = kzalloc(sizeof(struct dsp_conf_member), GFP_ATOMIC);
+	if (!member) {
+		printk(KERN_ERR "kmalloc struct dsp_conf_member failed\n");
+		return -ENOMEM;
+	}
+	member->dsp = dsp;
+	/* clear rx buffer */
+	memset(dsp->rx_buff, dsp_silence, sizeof(dsp->rx_buff));
+	dsp->rx_init = 1; /* rx_W and rx_R will be adjusted on first frame */
+	dsp->rx_W = 0;
+	dsp->rx_R = 0;
+
+	list_add_tail(&member->list, &conf->mlist);
+
+	dsp->conf = conf;
+	dsp->member = member;
+
+	return 0;
+}
+
+
+/*
+ * del member from conference
+ */
+int
+dsp_cmx_del_conf_member(struct dsp *dsp)
+{
+	struct dsp_conf_member *member;
+
+	if (!dsp) {
+		printk(KERN_WARNING "%s: dsp is 0.\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (!dsp->conf) {
+		printk(KERN_WARNING "%s: dsp is not in a conf.\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (list_empty(&dsp->conf->mlist)) {
+		printk(KERN_WARNING "%s: dsp has linked an empty conf.\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* find us in conf */
+	list_for_each_entry(member, &dsp->conf->mlist, list) {
+		if (member->dsp == dsp) {
+			list_del(&member->list);
+			dsp->conf = NULL;
+			dsp->member = NULL;
+			kfree(member);
+			return 0;
+		}
+	}
+	printk(KERN_WARNING
+	    "%s: dsp is not present in its own conf_meber list.\n",
+	    __func__);
+
+	return -EINVAL;
+}
+
+
+/*
+ * new conference
+ */
+static struct dsp_conf
+*dsp_cmx_new_conf(u32 id)
+{
+	struct dsp_conf *conf;
+
+	if (!id) {
+		printk(KERN_WARNING "%s: id is 0.\n",
+		    __func__);
+		return NULL;
+	}
+
+	conf = kzalloc(sizeof(struct dsp_conf), GFP_ATOMIC);
+	if (!conf) {
+		printk(KERN_ERR "kmalloc struct dsp_conf failed\n");
+		return NULL;
+	}
+	INIT_LIST_HEAD(&conf->mlist);
+	conf->id = id;
+
+	list_add_tail(&conf->list, &conf_ilist);
+
+	return conf;
+}
+
+
+/*
+ * del conference
+ */
+int
+dsp_cmx_del_conf(struct dsp_conf *conf)
+{
+	if (!conf) {
+		printk(KERN_WARNING "%s: conf is null.\n",
+		    __func__);
+		return -EINVAL;
+	}
+
+	if (!list_empty(&conf->mlist)) {
+		printk(KERN_WARNING "%s: conf not empty.\n",
+		    __func__);
+		return -EINVAL;
+	}
+	list_del(&conf->list);
+	kfree(conf);
+
+	return 0;
+}
+
+
+/*
+ * send HW message to hfc card
+ */
+static void
+dsp_cmx_hw_message(struct dsp *dsp, u32 message, u32 param1, u32 param2,
+    u32 param3, u32 param4)
+{
+	struct mISDN_ctrl_req cq;
+
+	memset(&cq, 0, sizeof(cq));
+	cq.op = message;
+	cq.p1 = param1 | (param2 << 8);
+	cq.p2 = param3 | (param4 << 8);
+	if (dsp->ch.peer)
+		dsp->ch.peer->ctrl(dsp->ch.peer, CONTROL_CHANNEL, &cq);
+}
+
+
+/*
+ * do hardware update and set the software/hardware flag
+ *
+ * either a conference or a dsp instance can be given
+ * if only dsp instance is given, the instance is not associated with a conf
+ * and therefore removed. if a conference is given, the dsp is expected to
+ * be member of that conference.
+ */
+void
+dsp_cmx_hardware(struct dsp_conf *conf, struct dsp *dsp)
+{
+	struct dsp_conf_member	*member, *nextm;
+	struct dsp		*finddsp;
+	int		memb = 0, i, ii, i1, i2;
+	int		freeunits[8];
+	u_char		freeslots[256];
+	int		same_hfc = -1, same_pcm = -1, current_conf = -1,
+	    all_conf = 1;
+
+	/* dsp gets updated (no conf) */
+	if (!conf) {
+		if (!dsp)
+			return;
+		if (dsp_debug & DEBUG_DSP_CMX)
+			printk(KERN_DEBUG "%s checking dsp %s\n",
+			    __func__, dsp->name);
+one_member:
+		/* remove HFC conference if enabled */
+		if (dsp->hfc_conf >= 0) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s removing %s from HFC conf %d "
+				    "because dsp is split\n", __func__,
+				    dsp->name, dsp->hfc_conf);
+			dsp_cmx_hw_message(dsp, MISDN_CTRL_HFC_CONF_SPLIT,
+			    0, 0, 0, 0);
+			dsp->hfc_conf = -1;
+		}
+		/* process hw echo */
+		if (dsp->features.pcm_banks < 1)
+			return;
+		if (!dsp->echo) {
+			/* NO ECHO: remove PCM slot if assigned */
+			if (dsp->pcm_slot_tx >= 0 || dsp->pcm_slot_rx >= 0) {
+				if (dsp_debug & DEBUG_DSP_CMX)
+					printk(KERN_DEBUG "%s removing %s from"
+					    " PCM slot %d (TX) %d (RX) because"
+					    " dsp is split (no echo)\n",
+					    __func__, dsp->name,
+					    dsp->pcm_slot_tx, dsp->pcm_slot_rx);
+				dsp_cmx_hw_message(dsp, MISDN_CTRL_HFC_PCM_DISC,
+				    0, 0, 0, 0);
+				dsp->pcm_slot_tx = -1;
+				dsp->pcm_bank_tx = -1;
+				dsp->pcm_slot_rx = -1;
+				dsp->pcm_bank_rx = -1;
+			}
+			return;
+		}
+		/* ECHO: already echo */
+		if (dsp->pcm_slot_tx >= 0 && dsp->pcm_slot_rx < 0 &&
+		    dsp->pcm_bank_tx == 2 && dsp->pcm_bank_rx == 2)
+			return;
+		/* ECHO: if slot already assigned */
+		if (dsp->pcm_slot_tx >= 0) {
+			dsp->pcm_slot_rx = dsp->pcm_slot_tx;
+			dsp->pcm_bank_tx = 2; /* 2 means loop */
+			dsp->pcm_bank_rx = 2;
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s refresh %s for echo using slot %d\n",
+				    __func__, dsp->name,
+				    dsp->pcm_slot_tx);
+			dsp_cmx_hw_message(dsp, MISDN_CTRL_HFC_PCM_CONN,
+			    dsp->pcm_slot_tx, 2, dsp->pcm_slot_rx, 2);
+			return;
+		}
+		/* ECHO: find slot */
+		dsp->pcm_slot_tx = -1;
+		dsp->pcm_slot_rx = -1;
+		memset(freeslots, 1, sizeof(freeslots));
+		list_for_each_entry(finddsp, &dsp_ilist, list) {
+			if (finddsp->features.pcm_id == dsp->features.pcm_id) {
+				if (finddsp->pcm_slot_rx >= 0 &&
+				    finddsp->pcm_slot_rx < sizeof(freeslots))
+					freeslots[finddsp->pcm_slot_tx] = 0;
+				if (finddsp->pcm_slot_tx >= 0 &&
+				    finddsp->pcm_slot_tx < sizeof(freeslots))
+					freeslots[finddsp->pcm_slot_rx] = 0;
+			}
+		}
+		i = 0;
+		ii = dsp->features.pcm_slots;
+		while (i < ii) {
+			if (freeslots[i])
+				break;
+			i++;
+		}
+		if (i == ii) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s no slot available for echo\n",
+				    __func__);
+			/* no more slots available */
+			return;
+		}
+		/* assign free slot */
+		dsp->pcm_slot_tx = i;
+		dsp->pcm_slot_rx = i;
+		dsp->pcm_bank_tx = 2; /* loop */
+		dsp->pcm_bank_rx = 2;
+		if (dsp_debug & DEBUG_DSP_CMX)
+			printk(KERN_DEBUG
+			    "%s assign echo for %s using slot %d\n",
+			    __func__, dsp->name, dsp->pcm_slot_tx);
+		dsp_cmx_hw_message(dsp, MISDN_CTRL_HFC_PCM_CONN,
+		    dsp->pcm_slot_tx, 2, dsp->pcm_slot_rx, 2);
+		return;
+	}
+
+	/* conf gets updated (all members) */
+	if (dsp_debug & DEBUG_DSP_CMX)
+		printk(KERN_DEBUG "%s checking conference %d\n",
+		    __func__, conf->id);
+
+	if (list_empty(&conf->mlist)) {
+		printk(KERN_ERR "%s: conference whithout members\n",
+		    __func__);
+		return;
+	}
+	member = list_entry(conf->mlist.next, struct dsp_conf_member, list);
+	same_hfc = member->dsp->features.hfc_id;
+	same_pcm = member->dsp->features.pcm_id;
+	/* check all members in our conference */
+	list_for_each_entry(member, &conf->mlist, list) {
+		/* check if member uses mixing */
+		if (member->dsp->tx_mix) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s dsp %s cannot form a conf, because "
+				    "tx_mix is turned on\n", __func__,
+				    member->dsp->name);
+conf_software:
+			list_for_each_entry(member, &conf->mlist, list) {
+				dsp = member->dsp;
+				/* remove HFC conference if enabled */
+				if (dsp->hfc_conf >= 0) {
+					if (dsp_debug & DEBUG_DSP_CMX)
+						printk(KERN_DEBUG
+						    "%s removing %s from HFC "
+						    "conf %d because not "
+						    "possible with hardware\n",
+						    __func__,
+						    dsp->name,
+						    dsp->hfc_conf);
+					dsp_cmx_hw_message(dsp,
+					    MISDN_CTRL_HFC_CONF_SPLIT,
+					    0, 0, 0, 0);
+					dsp->hfc_conf = -1;
+				}
+				/* remove PCM slot if assigned */
+				if (dsp->pcm_slot_tx >= 0 ||
+				    dsp->pcm_slot_rx >= 0) {
+					if (dsp_debug & DEBUG_DSP_CMX)
+						printk(KERN_DEBUG "%s removing "
+						    "%s from PCM slot %d (TX)"
+						    " slot %d (RX) because not"
+						    " possible with hardware\n",
+						    __func__,
+						    dsp->name,
+						    dsp->pcm_slot_tx,
+						    dsp->pcm_slot_rx);
+					dsp_cmx_hw_message(dsp,
+					    MISDN_CTRL_HFC_PCM_DISC,
+					    0, 0, 0, 0);
+					dsp->pcm_slot_tx = -1;
+					dsp->pcm_bank_tx = -1;
+					dsp->pcm_slot_rx = -1;
+					dsp->pcm_bank_rx = -1;
+				}
+			}
+			conf->hardware = 0;
+			conf->software = 1;
+			return;
+		}
+		/* check if member has echo turned on */
+		if (member->dsp->echo) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s dsp %s cannot form a conf, because "
+				    "echo is turned on\n", __func__,
+				    member->dsp->name);
+			goto conf_software;
+		}
+		/* check if member has tx_mix turned on */
+		if (member->dsp->tx_mix) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s dsp %s cannot form a conf, because "
+				    "tx_mix is turned on\n",
+				    __func__, member->dsp->name);
+			goto conf_software;
+		}
+		/* check if member changes volume at an not suppoted level */
+		if (member->dsp->tx_volume) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s dsp %s cannot form a conf, because "
+				    "tx_volume is changed\n",
+				    __func__, member->dsp->name);
+			goto conf_software;
+		}
+		if (member->dsp->rx_volume) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s dsp %s cannot form a conf, because "
+				    "rx_volume is changed\n",
+				    __func__, member->dsp->name);
+			goto conf_software;
+		}
+		/* check if tx-data turned on */
+		if (member->dsp->tx_data) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s dsp %s cannot form a conf, because "
+				    "tx_data is turned on\n",
+				    __func__, member->dsp->name);
+			goto conf_software;
+		}
+		/* check if pipeline exists */
+		if (member->dsp->pipeline.inuse) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s dsp %s cannot form a conf, because "
+				    "pipeline exists\n", __func__,
+				    member->dsp->name);
+			goto conf_software;
+		}
+		/* check if encryption is enabled */
+		if (member->dsp->bf_enable) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG "%s dsp %s cannot form a "
+				    "conf, because encryption is enabled\n",
+				    __func__, member->dsp->name);
+			goto conf_software;
+		}
+		/* check if member is on a card with PCM support */
+		if (member->dsp->features.pcm_id < 0) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s dsp %s cannot form a conf, because "
+				    "dsp has no PCM bus\n",
+				    __func__, member->dsp->name);
+			goto conf_software;
+		}
+		/* check if relations are on the same PCM bus */
+		if (member->dsp->features.pcm_id != same_pcm) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s dsp %s cannot form a conf, because "
+				    "dsp is on a different PCM bus than the "
+				    "first dsp\n",
+				    __func__, member->dsp->name);
+			goto conf_software;
+		}
+		/* determine if members are on the same hfc chip */
+		if (same_hfc != member->dsp->features.hfc_id)
+			same_hfc = -1;
+		/* if there are members already in a conference */
+		if (current_conf < 0 && member->dsp->hfc_conf >= 0)
+			current_conf = member->dsp->hfc_conf;
+		/* if any member is not in a conference */
+		if (member->dsp->hfc_conf < 0)
+			all_conf = 0;
+
+		memb++;
+	}
+
+	/* if no member, this is an error */
+	if (memb < 1)
+		return;
+
+	/* one member */
+	if (memb == 1) {
+		if (dsp_debug & DEBUG_DSP_CMX)
+			printk(KERN_DEBUG
+			    "%s conf %d cannot form a HW conference, "
+			    "because dsp is alone\n", __func__, conf->id);
+		conf->hardware = 0;
+		conf->software = 0;
+		member = list_entry(conf->mlist.next, struct dsp_conf_member,
+			list);
+		dsp = member->dsp;
+		goto one_member;
+	}
+
+	/*
+	 * ok, now we are sure that all members are on the same pcm.
+	 * now we will see if we have only two members, so we can do
+	 * crossconnections, which don't have any limitations.
+	 */
+
+	/* if we have only two members */
+	if (memb == 2) {
+		member = list_entry(conf->mlist.next, struct dsp_conf_member,
+			list);
+		nextm = list_entry(member->list.next, struct dsp_conf_member,
+			list);
+		/* remove HFC conference if enabled */
+		if (member->dsp->hfc_conf >= 0) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s removing %s from HFC conf %d because "
+				    "two parties require only a PCM slot\n",
+				    __func__, member->dsp->name,
+				    member->dsp->hfc_conf);
+			dsp_cmx_hw_message(member->dsp,
+			    MISDN_CTRL_HFC_CONF_SPLIT, 0, 0, 0, 0);
+			member->dsp->hfc_conf = -1;
+		}
+		if (nextm->dsp->hfc_conf >= 0) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s removing %s from HFC conf %d because "
+				    "two parties require only a PCM slot\n",
+				    __func__, nextm->dsp->name,
+				    nextm->dsp->hfc_conf);
+			dsp_cmx_hw_message(nextm->dsp,
+			    MISDN_CTRL_HFC_CONF_SPLIT, 0, 0, 0, 0);
+			nextm->dsp->hfc_conf = -1;
+		}
+		/* if members have two banks (and not on the same chip) */
+		if (member->dsp->features.pcm_banks > 1 &&
+		    nextm->dsp->features.pcm_banks > 1 &&
+		    member->dsp->features.hfc_id !=
+		    nextm->dsp->features.hfc_id) {
+			/* if both members have same slots with crossed banks */
+			if (member->dsp->pcm_slot_tx >= 0 &&
+			    member->dsp->pcm_slot_rx >= 0 &&
+			    nextm->dsp->pcm_slot_tx >= 0 &&
+			    nextm->dsp->pcm_slot_rx >= 0 &&
+			    nextm->dsp->pcm_slot_tx ==
+			    member->dsp->pcm_slot_rx &&
+			    nextm->dsp->pcm_slot_rx ==
+			    member->dsp->pcm_slot_tx &&
+			    nextm->dsp->pcm_slot_tx ==
+			    member->dsp->pcm_slot_tx &&
+			    member->dsp->pcm_bank_tx !=
+			    member->dsp->pcm_bank_rx &&
+			    nextm->dsp->pcm_bank_tx !=
+			    nextm->dsp->pcm_bank_rx) {
+				/* all members have same slot */
+				if (dsp_debug & DEBUG_DSP_CMX)
+					printk(KERN_DEBUG
+					    "%s dsp %s & %s stay joined on "
+					    "PCM slot %d bank %d (TX) bank %d "
+					    "(RX) (on different chips)\n",
+					    __func__,
+					    member->dsp->name,
+					    nextm->dsp->name,
+					    member->dsp->pcm_slot_tx,
+					    member->dsp->pcm_bank_tx,
+					    member->dsp->pcm_bank_rx);
+				conf->hardware = 0;
+				conf->software = 1;
+				return;
+			}
+			/* find a new slot */
+			memset(freeslots, 1, sizeof(freeslots));
+			list_for_each_entry(dsp, &dsp_ilist, list) {
+				if (dsp != member->dsp &&
+				    dsp != nextm->dsp &&
+				    member->dsp->features.pcm_id ==
+				    dsp->features.pcm_id) {
+					if (dsp->pcm_slot_rx >= 0 &&
+					    dsp->pcm_slot_rx <
+					    sizeof(freeslots))
+						freeslots[dsp->pcm_slot_tx] = 0;
+					if (dsp->pcm_slot_tx >= 0 &&
+					    dsp->pcm_slot_tx <
+					    sizeof(freeslots))
+						freeslots[dsp->pcm_slot_rx] = 0;
+				}
+			}
+			i = 0;
+			ii = member->dsp->features.pcm_slots;
+			while (i < ii) {
+				if (freeslots[i])
+					break;
+				i++;
+			}
+			if (i == ii) {
+				if (dsp_debug & DEBUG_DSP_CMX)
+					printk(KERN_DEBUG
+					    "%s no slot available for "
+					    "%s & %s\n", __func__,
+					    member->dsp->name,
+					    nextm->dsp->name);
+				/* no more slots available */
+				goto conf_software;
+			}
+			/* assign free slot */
+			member->dsp->pcm_slot_tx = i;
+			member->dsp->pcm_slot_rx = i;
+			nextm->dsp->pcm_slot_tx = i;
+			nextm->dsp->pcm_slot_rx = i;
+			member->dsp->pcm_bank_rx = 0;
+			member->dsp->pcm_bank_tx = 1;
+			nextm->dsp->pcm_bank_rx = 1;
+			nextm->dsp->pcm_bank_tx = 0;
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s adding %s & %s to new PCM slot %d "
+				    "(TX and RX on different chips) because "
+				    "both members have not same slots\n",
+				    __func__,
+				    member->dsp->name,
+				    nextm->dsp->name,
+				    member->dsp->pcm_slot_tx);
+			dsp_cmx_hw_message(member->dsp, MISDN_CTRL_HFC_PCM_CONN,
+			    member->dsp->pcm_slot_tx, member->dsp->pcm_bank_tx,
+			    member->dsp->pcm_slot_rx, member->dsp->pcm_bank_rx);
+			dsp_cmx_hw_message(nextm->dsp, MISDN_CTRL_HFC_PCM_CONN,
+			    nextm->dsp->pcm_slot_tx, nextm->dsp->pcm_bank_tx,
+			    nextm->dsp->pcm_slot_rx, nextm->dsp->pcm_bank_rx);
+			conf->hardware = 1;
+			conf->software = 0;
+			return;
+		/* if members have one bank (or on the same chip) */
+		} else {
+			/* if both members have different crossed slots */
+			if (member->dsp->pcm_slot_tx >= 0 &&
+			    member->dsp->pcm_slot_rx >= 0 &&
+			    nextm->dsp->pcm_slot_tx >= 0 &&
+			    nextm->dsp->pcm_slot_rx >= 0 &&
+			    nextm->dsp->pcm_slot_tx ==
+			    member->dsp->pcm_slot_rx &&
+			    nextm->dsp->pcm_slot_rx ==
+			    member->dsp->pcm_slot_tx &&
+			    member->dsp->pcm_slot_tx !=
+			    member->dsp->pcm_slot_rx &&
+			    member->dsp->pcm_bank_tx == 0 &&
+			    member->dsp->pcm_bank_rx == 0 &&
+			    nextm->dsp->pcm_bank_tx == 0 &&
+			    nextm->dsp->pcm_bank_rx == 0) {
+				/* all members have same slot */
+				if (dsp_debug & DEBUG_DSP_CMX)
+					printk(KERN_DEBUG
+					    "%s dsp %s & %s stay joined on PCM "
+					    "slot %d (TX) %d (RX) on same chip "
+					    "or one bank PCM)\n", __func__,
+					    member->dsp->name,
+					    nextm->dsp->name,
+					    member->dsp->pcm_slot_tx,
+					    member->dsp->pcm_slot_rx);
+				conf->hardware = 0;
+				conf->software = 1;
+				return;
+			}
+			/* find two new slot */
+			memset(freeslots, 1, sizeof(freeslots));
+			list_for_each_entry(dsp, &dsp_ilist, list) {
+				if (dsp != member->dsp &&
+				    dsp != nextm->dsp &&
+				    member->dsp->features.pcm_id ==
+				    dsp->features.pcm_id) {
+					if (dsp->pcm_slot_rx >= 0 &&
+					    dsp->pcm_slot_rx <
+					    sizeof(freeslots))
+						freeslots[dsp->pcm_slot_tx] = 0;
+					if (dsp->pcm_slot_tx >= 0 &&
+					    dsp->pcm_slot_tx <
+					    sizeof(freeslots))
+						freeslots[dsp->pcm_slot_rx] = 0;
+				}
+			}
+			i1 = 0;
+			ii = member->dsp->features.pcm_slots;
+			while (i1 < ii) {
+				if (freeslots[i1])
+					break;
+				i1++;
+			}
+			if (i1 == ii) {
+				if (dsp_debug & DEBUG_DSP_CMX)
+					printk(KERN_DEBUG
+					    "%s no slot available "
+					    "for %s & %s\n", __func__,
+					    member->dsp->name,
+					    nextm->dsp->name);
+				/* no more slots available */
+				goto conf_software;
+			}
+			i2 = i1+1;
+			while (i2 < ii) {
+				if (freeslots[i2])
+					break;
+				i2++;
+			}
+			if (i2 == ii) {
+				if (dsp_debug & DEBUG_DSP_CMX)
+					printk(KERN_DEBUG
+					    "%s no slot available "
+					    "for %s & %s\n",
+					    __func__,
+					    member->dsp->name,
+					    nextm->dsp->name);
+				/* no more slots available */
+				goto conf_software;
+			}
+			/* assign free slots */
+			member->dsp->pcm_slot_tx = i1;
+			member->dsp->pcm_slot_rx = i2;
+			nextm->dsp->pcm_slot_tx = i2;
+			nextm->dsp->pcm_slot_rx = i1;
+			member->dsp->pcm_bank_rx = 0;
+			member->dsp->pcm_bank_tx = 0;
+			nextm->dsp->pcm_bank_rx = 0;
+			nextm->dsp->pcm_bank_tx = 0;
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s adding %s & %s to new PCM slot %d "
+				    "(TX) %d (RX) on same chip or one bank "
+				    "PCM, because both members have not "
+				    "crossed slots\n", __func__,
+				    member->dsp->name,
+				    nextm->dsp->name,
+				    member->dsp->pcm_slot_tx,
+				    member->dsp->pcm_slot_rx);
+			dsp_cmx_hw_message(member->dsp, MISDN_CTRL_HFC_PCM_CONN,
+			    member->dsp->pcm_slot_tx, member->dsp->pcm_bank_tx,
+			    member->dsp->pcm_slot_rx, member->dsp->pcm_bank_rx);
+			dsp_cmx_hw_message(nextm->dsp, MISDN_CTRL_HFC_PCM_CONN,
+			    nextm->dsp->pcm_slot_tx, nextm->dsp->pcm_bank_tx,
+			    nextm->dsp->pcm_slot_rx, nextm->dsp->pcm_bank_rx);
+			conf->hardware = 1;
+			conf->software = 0;
+			return;
+		}
+	}
+
+	/*
+	 * if we have more than two, we may check if we have a conference
+	 * unit available on the chip. also all members must be on the same
+	 */
+
+	/* if not the same HFC chip */
+	if (same_hfc < 0) {
+		if (dsp_debug & DEBUG_DSP_CMX)
+			printk(KERN_DEBUG
+			    "%s conference %d cannot be formed, because "
+			    "members are on different chips or not "
+			    "on HFC chip\n",
+			    __func__, conf->id);
+		goto conf_software;
+	}
+
+	/* for more than two members.. */
+
+	/* in case of hdlc, we change to software */
+	if (dsp->hdlc)
+		goto conf_software;
+
+	/* if all members already have the same conference */
+	if (all_conf)
+		return;
+
+	/*
+	 * if there is an existing conference, but not all members have joined
+	 */
+	if (current_conf >= 0) {
+join_members:
+		list_for_each_entry(member, &conf->mlist, list) {
+			/* join to current conference */
+			if (member->dsp->hfc_conf == current_conf)
+				continue;
+			/* get a free timeslot first */
+			memset(freeslots, 1, sizeof(freeslots));
+			list_for_each_entry(dsp, &dsp_ilist, list) {
+				/*
+				 * not checking current member, because
+				 * slot will be overwritten.
+				 */
+				if (
+				    dsp != member->dsp &&
+				/* dsp must be on the same PCM */
+				    member->dsp->features.pcm_id ==
+				    dsp->features.pcm_id) {
+					/* dsp must be on a slot */
+					if (dsp->pcm_slot_tx >= 0 &&
+					    dsp->pcm_slot_tx <
+					    sizeof(freeslots))
+						freeslots[dsp->pcm_slot_tx] = 0;
+					if (dsp->pcm_slot_rx >= 0 &&
+					    dsp->pcm_slot_rx <
+					    sizeof(freeslots))
+						freeslots[dsp->pcm_slot_rx] = 0;
+				}
+			}
+			i = 0;
+			ii = member->dsp->features.pcm_slots;
+			while (i < ii) {
+				if (freeslots[i])
+					break;
+				i++;
+			}
+			if (i == ii) {
+				/* no more slots available */
+				if (dsp_debug & DEBUG_DSP_CMX)
+					printk(KERN_DEBUG
+					    "%s conference %d cannot be formed,"
+					    " because no slot free\n",
+					    __func__, conf->id);
+				goto conf_software;
+			}
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "%s changing dsp %s to HW conference "
+				    "%d slot %d\n", __func__,
+				    member->dsp->name, current_conf, i);
+			/* assign free slot & set PCM & join conf */
+			member->dsp->pcm_slot_tx = i;
+			member->dsp->pcm_slot_rx = i;
+			member->dsp->pcm_bank_tx = 2; /* loop */
+			member->dsp->pcm_bank_rx = 2;
+			member->dsp->hfc_conf = current_conf;
+			dsp_cmx_hw_message(member->dsp, MISDN_CTRL_HFC_PCM_CONN,
+			    i, 2, i, 2);
+			dsp_cmx_hw_message(member->dsp,
+			    MISDN_CTRL_HFC_CONF_JOIN, current_conf, 0, 0, 0);
+		}
+		return;
+	}
+
+	/*
+	 * no member is in a conference yet, so we find a free one
+	 */
+	memset(freeunits, 1, sizeof(freeunits));
+	list_for_each_entry(dsp, &dsp_ilist, list) {
+		/* dsp must be on the same chip */
+		if (dsp->features.hfc_id == same_hfc &&
+		    /* dsp must have joined a HW conference */
+		    dsp->hfc_conf >= 0 &&
+		    /* slot must be within range */
+		    dsp->hfc_conf < 8)
+			freeunits[dsp->hfc_conf] = 0;
+	}
+	i = 0;
+	ii = 8;
+	while (i < ii) {
+		if (freeunits[i])
+			break;
+		i++;
+	}
+	if (i == ii) {
+		/* no more conferences available */
+		if (dsp_debug & DEBUG_DSP_CMX)
+			printk(KERN_DEBUG
+			    "%s conference %d cannot be formed, because "
+			    "no conference number free\n",
+			    __func__, conf->id);
+		goto conf_software;
+	}
+	/* join all members */
+	current_conf = i;
+	goto join_members;
+}
+
+
+/*
+ * conf_id != 0: join or change conference
+ * conf_id == 0: split from conference if not already
+ */
+int
+dsp_cmx_conf(struct dsp *dsp, u32 conf_id)
+{
+	int err;
+	struct dsp_conf *conf;
+	struct dsp_conf_member	*member;
+
+	/* if conference doesn't change */
+	if (dsp->conf_id == conf_id)
+		return 0;
+
+	/* first remove us from current conf */
+	if (dsp->conf_id) {
+		if (dsp_debug & DEBUG_DSP_CMX)
+			printk(KERN_DEBUG "removing us from conference %d\n",
+				dsp->conf->id);
+		/* remove us from conf */
+		conf = dsp->conf;
+		err = dsp_cmx_del_conf_member(dsp);
+		if (err)
+			return err;
+		dsp->conf_id = 0;
+
+		/* update hardware */
+		dsp_cmx_hardware(NULL, dsp);
+
+		/* conf now empty? */
+		if (list_empty(&conf->mlist)) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "conference is empty, so we remove it.\n");
+			err = dsp_cmx_del_conf(conf);
+			if (err)
+				return err;
+		} else {
+			/* update members left on conf */
+			dsp_cmx_hardware(conf, NULL);
+		}
+	}
+
+	/* if split */
+	if (!conf_id)
+		return 0;
+
+	/* now add us to conf */
+	if (dsp_debug & DEBUG_DSP_CMX)
+		printk(KERN_DEBUG "searching conference %d\n",
+			conf_id);
+	conf = dsp_cmx_search_conf(conf_id);
+	if (!conf) {
+		if (dsp_debug & DEBUG_DSP_CMX)
+			printk(KERN_DEBUG
+			    "conference doesn't exist yet, creating.\n");
+		/* the conference doesn't exist, so we create */
+		conf = dsp_cmx_new_conf(conf_id);
+		if (!conf)
+			return -EINVAL;
+	} else if (!list_empty(&conf->mlist)) {
+		member = list_entry(conf->mlist.next, struct dsp_conf_member,
+			list);
+		if (dsp->hdlc && !member->dsp->hdlc) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "cannot join transparent conference.\n");
+			return -EINVAL;
+		}
+		if (!dsp->hdlc && member->dsp->hdlc) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "cannot join hdlc conference.\n");
+			return -EINVAL;
+		}
+	}
+	/* add conference member */
+	err = dsp_cmx_add_conf_member(dsp, conf);
+	if (err)
+		return err;
+	dsp->conf_id = conf_id;
+
+	/* if we are alone, we do nothing! */
+	if (list_empty(&conf->mlist)) {
+		if (dsp_debug & DEBUG_DSP_CMX)
+			printk(KERN_DEBUG
+			    "we are alone in this conference, so exit.\n");
+		/* update hardware */
+		dsp_cmx_hardware(NULL, dsp);
+		return 0;
+	}
+
+	/* update members on conf */
+	dsp_cmx_hardware(conf, NULL);
+
+	return 0;
+}
+
+
+/*
+ * audio data is received from card
+ */
+void
+dsp_cmx_receive(struct dsp *dsp, struct sk_buff *skb)
+{
+	u8 *d, *p;
+	int len = skb->len;
+	struct mISDNhead *hh = mISDN_HEAD_P(skb);
+	int w, i, ii;
+
+	/* check if we have sompen */
+	if (len < 1)
+		return;
+
+	/* half of the buffer should be larger than maximum packet size */
+	if (len >= CMX_BUFF_HALF) {
+		printk(KERN_ERR
+		    "%s line %d: packet from card is too large (%d bytes). "
+		    "please make card send smaller packets OR increase "
+		    "CMX_BUFF_SIZE\n", __FILE__, __LINE__, len);
+		return;
+	}
+
+	/*
+	 * initialize pointers if not already -
+	 * also add delay if requested by PH_SIGNAL
+	 */
+	if (dsp->rx_init) {
+		dsp->rx_init = 0;
+		if (dsp->features.unordered) {
+			dsp->rx_R = (hh->id & CMX_BUFF_MASK);
+			dsp->rx_W = (dsp->rx_R + dsp->cmx_delay)
+				& CMX_BUFF_MASK;
+		} else {
+			dsp->rx_R = 0;
+			dsp->rx_W = dsp->cmx_delay;
+		}
+	}
+	/* if frame contains time code, write directly */
+	if (dsp->features.unordered) {
+		dsp->rx_W = (hh->id & CMX_BUFF_MASK);
+		/* printk(KERN_DEBUG "%s %08x\n", dsp->name, hh->id); */
+	}
+	/*
+	 * if we underrun (or maybe overrun),
+	 * we set our new read pointer, and write silence to buffer
+	 */
+	if (((dsp->rx_W-dsp->rx_R) & CMX_BUFF_MASK) >= CMX_BUFF_HALF) {
+		if (dsp_debug & DEBUG_DSP_CMX)
+			printk(KERN_DEBUG
+			    "cmx_receive(dsp=%lx): UNDERRUN (or overrun the "
+			    "maximum delay), adjusting read pointer! "
+			    "(inst %s)\n", (u_long)dsp, dsp->name);
+		/* flush buffer */
+		if (dsp->features.unordered) {
+			dsp->rx_R = (hh->id & CMX_BUFF_MASK);
+			dsp->rx_W = (dsp->rx_R + dsp->cmx_delay)
+				& CMX_BUFF_MASK;
+		} else {
+			dsp->rx_R = 0;
+			dsp->rx_W = dsp->cmx_delay;
+		}
+		memset(dsp->rx_buff, dsp_silence, sizeof(dsp->rx_buff));
+	}
+	/* if we have reached double delay, jump back to middle */
+	if (dsp->cmx_delay)
+		if (((dsp->rx_W - dsp->rx_R) & CMX_BUFF_MASK) >=
+		    (dsp->cmx_delay << 1)) {
+			if (dsp_debug & DEBUG_DSP_CMX)
+				printk(KERN_DEBUG
+				    "cmx_receive(dsp=%lx): OVERRUN (because "
+				    "twice the delay is reached), adjusting "
+				    "read pointer! (inst %s)\n",
+				    (u_long)dsp, dsp->name);
+		/* flush buffer */
+		if (dsp->features.unordered) {
+			dsp->rx_R = (hh->id & CMX_BUFF_MASK);
+			dsp->rx_W = (dsp->rx_R + dsp->cmx_delay)
+				& CMX_BUFF_MASK;
+		} else {
+			dsp->rx_R = 0;
+			dsp->rx_W = dsp->cmx_delay;
+		}
+		memset(dsp->rx_buff, dsp_silence, sizeof(dsp->rx_buff));
+	}
+
+	/* show where to write */
+#ifdef CMX_DEBUG
+	printk(KERN_DEBUG
+	    "cmx_receive(dsp=%lx): rx_R(dsp)=%05x rx_W(dsp)=%05x len=%d %s\n",
+	    (u_long)dsp, dsp->rx_R, dsp->rx_W, len, dsp->name);
+#endif
+
+	/* write data into rx_buffer */
+	p = skb->data;
+	d = dsp->rx_buff;
+	w = dsp->rx_W;
+	i = 0;
+	ii = len;
+	while (i < ii) {
+		d[w++ & CMX_BUFF_MASK] = *p++;
+		i++;
+	}
+
+	/* increase write-pointer */
+	dsp->rx_W = ((dsp->rx_W+len) & CMX_BUFF_MASK);
+}
+
+
+/*
+ * send (mixed) audio data to card and control jitter
+ */
+static void
+dsp_cmx_send_member(struct dsp *dsp, int len, s32 *c, int members)
+{
+	struct dsp_conf *conf = dsp->conf;
+	struct dsp *member, *other;
+	register s32 sample;
+	u8 *d, *p, *q, *o_q;
+	struct sk_buff *nskb, *txskb;
+	int r, rr, t, tt, o_r, o_rr;
+	int preload = 0;
+	struct mISDNhead *hh, *thh;
+
+	/* don't process if: */
+	if (!dsp->b_active) { /* if not active */
+		dsp->last_tx = 0;
+		return;
+	}
+	if (dsp->pcm_slot_tx >= 0 && /* connected to pcm slot */
+	    dsp->tx_R == dsp->tx_W && /* AND no tx-data */
+	    !(dsp->tone.tone && dsp->tone.software)) { /* AND not soft tones */
+		dsp->last_tx = 0;
+		return;
+	}
+
+#ifdef CMX_DEBUG
+	printk(KERN_DEBUG
+	    "SEND members=%d dsp=%s, conf=%p, rx_R=%05x rx_W=%05x\n",
+	    members, dsp->name, conf, dsp->rx_R, dsp->rx_W);
+#endif
+
+	/* preload if we have delay set */
+	if (dsp->cmx_delay && !dsp->last_tx) {
+		preload = len;
+		if (preload < 128)
+			preload = 128;
+	}
+
+	/* PREPARE RESULT */
+	nskb = mI_alloc_skb(len + preload, GFP_ATOMIC);
+	if (!nskb) {
+		printk(KERN_ERR
+		    "FATAL ERROR in mISDN_dsp.o: cannot alloc %d bytes\n",
+		    len + preload);
+		return;
+	}
+	hh = mISDN_HEAD_P(nskb);
+	hh->prim = PH_DATA_REQ;
+	hh->id = 0;
+	dsp->last_tx = 1;
+
+	/* set pointers, indexes and stuff */
+	member = dsp;
+	p = dsp->tx_buff; /* transmit data */
+	q = dsp->rx_buff; /* received data */
+	d = skb_put(nskb, preload + len); /* result */
+	t = dsp->tx_R; /* tx-pointers */
+	tt = dsp->tx_W;
+	r = dsp->rx_R; /* rx-pointers */
+	rr = (r + len) & CMX_BUFF_MASK;
+
+	/* preload with silence, if required */
+	if (preload) {
+		memset(d, dsp_silence, preload);
+		d += preload;
+	}
+
+	/* PROCESS TONES/TX-DATA ONLY */
+	if (dsp->tone.tone && dsp->tone.software) {
+		/* -> copy tone */
+		dsp_tone_copy(dsp, d, len);
+		dsp->tx_R = 0; /* clear tx buffer */
+		dsp->tx_W = 0;
+		goto send_packet;
+	}
+	/* if we have tx-data but do not use mixing */
+	if (!dsp->tx_mix && t != tt) {
+		/* -> send tx-data and continue when not enough */
+#ifdef CMX_TX_DEBUG
+	sprintf(debugbuf, "TX sending (%04x-%04x)%p: ", t, tt, p);
+#endif
+		while (r != rr && t != tt) {
+#ifdef CMX_TX_DEBUG
+			if (strlen(debugbuf) < 48)
+			    sprintf(debugbuf+strlen(debugbuf), " %02x", p[t]);
+#endif
+			*d++ = p[t]; /* write tx_buff */
+			t = (t+1) & CMX_BUFF_MASK;
+			r = (r+1) & CMX_BUFF_MASK;
+		}
+		if (r == rr) {
+			dsp->tx_R = t;
+#ifdef CMX_TX_DEBUG
+	printk(KERN_DEBUG "%s\n", debugbuf);
+#endif
+			goto send_packet;
+		}
+	}
+#ifdef CMX_TX_DEBUG
+	printk(KERN_DEBUG "%s\n", debugbuf);
+#endif
+
+	/* PROCESS DATA (one member / no conf) */
+	if (!conf || members <= 1) {
+		/* -> if echo is NOT enabled */
+		if (!dsp->echo) {
+			/* -> send tx-data if available or use 0-volume */
+			while (r != rr && t != tt) {
+				*d++ = p[t]; /* write tx_buff */
+				t = (t+1) & CMX_BUFF_MASK;
+				r = (r+1) & CMX_BUFF_MASK;
+			}
+			if (r != rr)
+				memset(d, dsp_silence, (rr-r)&CMX_BUFF_MASK);
+		/* -> if echo is enabled */
+		} else {
+			/*
+			 * -> mix tx-data with echo if available,
+			 * or use echo only
+			 */
+			while (r != rr && t != tt) {
+				*d++ = dsp_audio_mix_law[(p[t]<<8)|q[r]];
+				t = (t+1) & CMX_BUFF_MASK;
+				r = (r+1) & CMX_BUFF_MASK;
+			}
+			while (r != rr) {
+				*d++ = q[r]; /* echo */
+				r = (r+1) & CMX_BUFF_MASK;
+			}
+		}
+		dsp->tx_R = t;
+		goto send_packet;
+	}
+	/* PROCESS DATA (two members) */
+#ifdef CMX_CONF_DEBUG
+	if (0) {
+#else
+	if (members == 2) {
+#endif
+		/* "other" becomes other party */
+		other = (list_entry(conf->mlist.next,
+		    struct dsp_conf_member, list))->dsp;
+		if (other == member)
+			other = (list_entry(conf->mlist.prev,
+			    struct dsp_conf_member, list))->dsp;
+		o_q = other->rx_buff; /* received data */
+		o_rr = (other->rx_R + len) & CMX_BUFF_MASK;
+			/* end of rx-pointer */
+		o_r = (o_rr - rr + r) & CMX_BUFF_MASK;
+			/* start rx-pointer at current read position*/
+		/* -> if echo is NOT enabled */
+		if (!dsp->echo) {
+			/*
+			 * -> copy other member's rx-data,
+			 * if tx-data is available, mix
+			 */
+			while (o_r != o_rr && t != tt) {
+				*d++ = dsp_audio_mix_law[(p[t]<<8)|o_q[o_r]];
+				t = (t+1) & CMX_BUFF_MASK;
+				o_r = (o_r+1) & CMX_BUFF_MASK;
+			}
+			while (o_r != o_rr) {
+				*d++ = o_q[o_r];
+				o_r = (o_r+1) & CMX_BUFF_MASK;
+			}
+		/* -> if echo is enabled */
+		} else {
+			/*
+			 * -> mix other member's rx-data with echo,
+			 * if tx-data is available, mix
+			 */
+			while (r != rr && t != tt) {
+				sample = dsp_audio_law_to_s32[p[t]] +
+				    dsp_audio_law_to_s32[q[r]] +
+				    dsp_audio_law_to_s32[o_q[o_r]];
+				if (sample < -32768)
+					sample = -32768;
+				else if (sample > 32767)
+					sample = 32767;
+				*d++ = dsp_audio_s16_to_law[sample & 0xffff];
+				    /* tx-data + rx_data + echo */
+				t = (t+1) & CMX_BUFF_MASK;
+				r = (r+1) & CMX_BUFF_MASK;
+				o_r = (o_r+1) & CMX_BUFF_MASK;
+			}
+			while (r != rr) {
+				*d++ = dsp_audio_mix_law[(q[r]<<8)|o_q[o_r]];
+				r = (r+1) & CMX_BUFF_MASK;
+				o_r = (o_r+1) & CMX_BUFF_MASK;
+			}
+		}
+		dsp->tx_R = t;
+		goto send_packet;
+	}
+#ifdef DSP_NEVER_DEFINED
+	}
+#endif
+	/* PROCESS DATA (three or more members) */
+	/* -> if echo is NOT enabled */
+	if (!dsp->echo) {
+		/*
+		 * -> substract rx-data from conf-data,
+		 * if tx-data is available, mix
+		 */
+		while (r != rr && t != tt) {
+			sample = dsp_audio_law_to_s32[p[t]] + *c++ -
+			    dsp_audio_law_to_s32[q[r]];
+			if (sample < -32768)
+				sample = -32768;
+			else if (sample > 32767)
+				sample = 32767;
+			*d++ = dsp_audio_s16_to_law[sample & 0xffff];
+			    /* conf-rx+tx */
+			r = (r+1) & CMX_BUFF_MASK;
+			t = (t+1) & CMX_BUFF_MASK;
+		}
+		while (r != rr) {
+			sample = *c++ - dsp_audio_law_to_s32[q[r]];
+			if (sample < -32768)
+				sample = -32768;
+			else if (sample > 32767)
+				sample = 32767;
+			*d++ = dsp_audio_s16_to_law[sample & 0xffff];
+			    /* conf-rx */
+			r = (r+1) & CMX_BUFF_MASK;
+		}
+	/* -> if echo is enabled */
+	} else {
+		/*
+		 * -> encode conf-data, if tx-data
+		 * is available, mix
+		 */
+		while (r != rr && t != tt) {
+			sample = dsp_audio_law_to_s32[p[t]] + *c++;
+			if (sample < -32768)
+				sample = -32768;
+			else if (sample > 32767)
+				sample = 32767;
+			*d++ = dsp_audio_s16_to_law[sample & 0xffff];
+			    /* conf(echo)+tx */
+			t = (t+1) & CMX_BUFF_MASK;
+			r = (r+1) & CMX_BUFF_MASK;
+		}
+		while (r != rr) {
+			sample = *c++;
+			if (sample < -32768)
+				sample = -32768;
+			else if (sample > 32767)
+				sample = 32767;
+			*d++ = dsp_audio_s16_to_law[sample & 0xffff];
+			    /* conf(echo) */
+			r = (r+1) & CMX_BUFF_MASK;
+		}
+	}
+	dsp->tx_R = t;
+	goto send_packet;
+
+send_packet:
+	/*
+	 * send tx-data if enabled - don't filter,
+	 * becuase we want what we send, not what we filtered
+	 */
+	if (dsp->tx_data) {
+		/* PREPARE RESULT */
+		txskb = mI_alloc_skb(len, GFP_ATOMIC);
+		if (!txskb) {
+			printk(KERN_ERR
+			    "FATAL ERROR in mISDN_dsp.o: "
+			    "cannot alloc %d bytes\n", len);
+		} else {
+			thh = mISDN_HEAD_P(txskb);
+			thh->prim = DL_DATA_REQ;
+			thh->id = 0;
+			memcpy(skb_put(txskb, len), nskb->data+preload, len);
+			/* queue (trigger later) */
+			skb_queue_tail(&dsp->sendq, txskb);
+		}
+	}
+	/* adjust volume */
+	if (dsp->tx_volume)
+		dsp_change_volume(nskb, dsp->tx_volume);
+	/* pipeline */
+	if (dsp->pipeline.inuse)
+		dsp_pipeline_process_tx(&dsp->pipeline, nskb->data, nskb->len);
+	/* crypt */
+	if (dsp->bf_enable)
+		dsp_bf_encrypt(dsp, nskb->data, nskb->len);
+	/* queue and trigger */
+	skb_queue_tail(&dsp->sendq, nskb);
+	schedule_work(&dsp->workq);
+}
+
+u32	samplecount;
+struct timer_list dsp_spl_tl;
+u32	dsp_spl_jiffies; /* calculate the next time to fire */
+u32	dsp_start_jiffies; /* jiffies at the time, the calculation begins */
+struct timeval dsp_start_tv; /* time at start of calculation */
+
+void
+dsp_cmx_send(void *arg)
+{
+	struct dsp_conf *conf;
+	struct dsp_conf_member *member;
+	struct dsp *dsp;
+	int mustmix, members;
+	s32 mixbuffer[MAX_POLL+100], *c;
+	u8 *p, *q;
+	int r, rr;
+	int jittercheck = 0, delay, i;
+	u_long flags;
+	struct timeval tv;
+	u32 elapsed;
+	s16 length;
+
+	/* lock */
+	spin_lock_irqsave(&dsp_lock, flags);
+
+	if (!dsp_start_tv.tv_sec) {
+		do_gettimeofday(&dsp_start_tv);
+		length = dsp_poll;
+	} else {
+		do_gettimeofday(&tv);
+		elapsed = ((tv.tv_sec - dsp_start_tv.tv_sec) * 8000)
+		    + ((s32)(tv.tv_usec / 125) - (dsp_start_tv.tv_usec / 125));
+		dsp_start_tv.tv_sec = tv.tv_sec;
+		dsp_start_tv.tv_usec = tv.tv_usec;
+		length = elapsed;
+	}
+	if (length > MAX_POLL + 100)
+		length = MAX_POLL + 100;
+/* printk(KERN_DEBUG "len=%d dsp_count=0x%x.%04x dsp_poll_diff=0x%x.%04x\n",
+ length, dsp_count >> 16, dsp_count & 0xffff, dsp_poll_diff >> 16,
+ dsp_poll_diff & 0xffff);
+ */
+
+	/*
+	 * check if jitter needs to be checked
+	 * (this is about every second = 8192 samples)
+	 */
+	samplecount += length;
+	if ((samplecount & 8191) < length)
+		jittercheck = 1;
+
+	/* loop all members that do not require conference mixing */
+	list_for_each_entry(dsp, &dsp_ilist, list) {
+		if (dsp->hdlc)
+			continue;
+		conf = dsp->conf;
+		mustmix = 0;
+		members = 0;
+		if (conf) {
+			members = count_list_member(&conf->mlist);
+#ifdef CMX_CONF_DEBUG
+			if (conf->software && members > 1)
+#else
+			if (conf->software && members > 2)
+#endif
+				mustmix = 1;
+		}
+
+		/* transmission required */
+		if (!mustmix) {
+			dsp_cmx_send_member(dsp, length, mixbuffer, members);
+
+			/*
+			 * unused mixbuffer is given to prevent a
+			 * potential null-pointer-bug
+			 */
+		}
+	}
+
+	/* loop all members that require conference mixing */
+	list_for_each_entry(conf, &conf_ilist, list) {
+		/* count members and check hardware */
+		members = count_list_member(&conf->mlist);
+#ifdef CMX_CONF_DEBUG
+		if (conf->software && members > 1) {
+#else
+		if (conf->software && members > 2) {
+#endif
+			/* check for hdlc conf */
+			member = list_entry(conf->mlist.next,
+				struct dsp_conf_member, list);
+			if (member->dsp->hdlc)
+				continue;
+			/* mix all data */
+			memset(mixbuffer, 0, length*sizeof(s32));
+			list_for_each_entry(member, &conf->mlist, list) {
+				dsp = member->dsp;
+				/* get range of data to mix */
+				c = mixbuffer;
+				q = dsp->rx_buff;
+				r = dsp->rx_R;
+				rr = (r + length) & CMX_BUFF_MASK;
+				/* add member's data */
+				while (r != rr) {
+					*c++ += dsp_audio_law_to_s32[q[r]];
+					r = (r+1) & CMX_BUFF_MASK;
+				}
+			}
+
+			/* process each member */
+			list_for_each_entry(member, &conf->mlist, list) {
+				/* transmission */
+				dsp_cmx_send_member(member->dsp, length,
+				    mixbuffer, members);
+			}
+		}
+	}
+
+	/* delete rx-data, increment buffers, change pointers */
+	list_for_each_entry(dsp, &dsp_ilist, list) {
+		if (dsp->hdlc)
+			continue;
+		p = dsp->rx_buff;
+		q = dsp->tx_buff;
+		r = dsp->rx_R;
+		/* move receive pointer when receiving */
+		if (!dsp->rx_is_off) {
+			rr = (r + length) & CMX_BUFF_MASK;
+			/* delete rx-data */
+			while (r != rr) {
+				p[r] = dsp_silence;
+				r = (r+1) & CMX_BUFF_MASK;
+			}
+			/* increment rx-buffer pointer */
+			dsp->rx_R = r; /* write incremented read pointer */
+		}
+
+		/* check current rx_delay */
+		delay = (dsp->rx_W-dsp->rx_R) & CMX_BUFF_MASK;
+		if (delay >= CMX_BUFF_HALF)
+			delay = 0; /* will be the delay before next write */
+		/* check for lower delay */
+		if (delay < dsp->rx_delay[0])
+			dsp->rx_delay[0] = delay;
+		/* check current tx_delay */
+		delay = (dsp->tx_W-dsp->tx_R) & CMX_BUFF_MASK;
+		if (delay >= CMX_BUFF_HALF)
+			delay = 0; /* will be the delay before next write */
+		/* check for lower delay */
+		if (delay < dsp->tx_delay[0])
+			dsp->tx_delay[0] = delay;
+		if (jittercheck) {
+			/* find the lowest of all rx_delays */
+			delay = dsp->rx_delay[0];
+			i = 1;
+			while (i < MAX_SECONDS_JITTER_CHECK) {
+				if (delay > dsp->rx_delay[i])
+					delay = dsp->rx_delay[i];
+				i++;
+			}
+			/*
+			 * remove rx_delay only if we have delay AND we
+			 * have not preset cmx_delay
+			 */
+			if (delay && !dsp->cmx_delay) {
+				if (dsp_debug & DEBUG_DSP_CMX)
+					printk(KERN_DEBUG
+					    "%s lowest rx_delay of %d bytes for"
+					    " dsp %s are now removed.\n",
+					    __func__, delay,
+					    dsp->name);
+				r = dsp->rx_R;
+				rr = (r + delay) & CMX_BUFF_MASK;
+				/* delete rx-data */
+				while (r != rr) {
+					p[r] = dsp_silence;
+					r = (r+1) & CMX_BUFF_MASK;
+				}
+				/* increment rx-buffer pointer */
+				dsp->rx_R = r;
+				    /* write incremented read pointer */
+			}
+			/* find the lowest of all tx_delays */
+			delay = dsp->tx_delay[0];
+			i = 1;
+			while (i < MAX_SECONDS_JITTER_CHECK) {
+				if (delay > dsp->tx_delay[i])
+					delay = dsp->tx_delay[i];
+				i++;
+			}
+			/*
+			 * remove delay only if we have delay AND we
+			 * have enabled tx_dejitter
+			 */
+			if (delay && dsp->tx_dejitter) {
+				if (dsp_debug & DEBUG_DSP_CMX)
+					printk(KERN_DEBUG
+					    "%s lowest tx_delay of %d bytes for"
+					    " dsp %s are now removed.\n",
+					    __func__, delay,
+					    dsp->name);
+				r = dsp->tx_R;
+				rr = (r + delay) & CMX_BUFF_MASK;
+				/* delete tx-data */
+				while (r != rr) {
+					q[r] = dsp_silence;
+					r = (r+1) & CMX_BUFF_MASK;
+				}
+				/* increment rx-buffer pointer */
+				dsp->tx_R = r;
+				    /* write incremented read pointer */
+			}
+			/* scroll up delays */
+			i = MAX_SECONDS_JITTER_CHECK - 1;
+			while (i) {
+				dsp->rx_delay[i] = dsp->rx_delay[i-1];
+				dsp->tx_delay[i] = dsp->tx_delay[i-1];
+				i--;
+			}
+			dsp->tx_delay[0] = CMX_BUFF_HALF; /* (infinite) delay */
+			dsp->rx_delay[0] = CMX_BUFF_HALF; /* (infinite) delay */
+		}
+	}
+
+	/* if next event would be in the past ... */
+	if ((s32)(dsp_spl_jiffies+dsp_tics-jiffies) <= 0)
+		dsp_spl_jiffies = jiffies + 1;
+	else
+		dsp_spl_jiffies += dsp_tics;
+
+	dsp_spl_tl.expires = dsp_spl_jiffies;
+	add_timer(&dsp_spl_tl);
+
+	/* unlock */
+	spin_unlock_irqrestore(&dsp_lock, flags);
+}
+
+/*
+ * audio data is transmitted from upper layer to the dsp
+ */
+void
+dsp_cmx_transmit(struct dsp *dsp, struct sk_buff *skb)
+{
+	u_int w, ww;
+	u8 *d, *p;
+	int space; /* todo: , l = skb->len; */
+#ifdef CMX_TX_DEBUG
+	char debugbuf[256] = "";
+#endif
+
+	/* check if there is enough space, and then copy */
+	w = dsp->tx_W;
+	ww = dsp->tx_R;
+	p = dsp->tx_buff;
+	d = skb->data;
+	space = ww-w;
+	if (space <= 0)
+		space += CMX_BUFF_SIZE;
+	/* write-pointer should not overrun nor reach read pointer */
+	if (space-1 < skb->len)
+		/* write to the space we have left */
+		ww = (ww - 1) & CMX_BUFF_MASK;
+	else
+		/* write until all byte are copied */
+		ww = (w + skb->len) & CMX_BUFF_MASK;
+	dsp->tx_W = ww;
+
+	/* show current buffer */
+#ifdef CMX_DEBUG
+	printk(KERN_DEBUG
+	    "cmx_transmit(dsp=%lx) %d bytes to 0x%x-0x%x. %s\n",
+	    (u_long)dsp, (ww-w)&CMX_BUFF_MASK, w, ww, dsp->name);
+#endif
+
+	/* copy transmit data to tx-buffer */
+#ifdef CMX_TX_DEBUG
+	sprintf(debugbuf, "TX getting (%04x-%04x)%p: ", w, ww, p);
+#endif
+	while (w != ww) {
+#ifdef CMX_TX_DEBUG
+		if (strlen(debugbuf) < 48)
+			sprintf(debugbuf+strlen(debugbuf), " %02x", *d);
+#endif
+		p[w] = *d++;
+		w = (w+1) & CMX_BUFF_MASK;
+	}
+#ifdef CMX_TX_DEBUG
+	printk(KERN_DEBUG "%s\n", debugbuf);
+#endif
+
+}
+
+/*
+ * hdlc data is received from card and sent to all members.
+ */
+void
+dsp_cmx_hdlc(struct dsp *dsp, struct sk_buff *skb)
+{
+	struct sk_buff *nskb = NULL;
+	struct dsp_conf_member *member;
+	struct mISDNhead *hh;
+
+	/* not if not active */
+	if (!dsp->b_active)
+		return;
+
+	/* check if we have sompen */
+	if (skb->len < 1)
+		return;
+
+	/* no conf */
+	if (!dsp->conf) {
+		/* in case of hardware (echo) */
+		if (dsp->pcm_slot_tx >= 0)
+			return;
+		if (dsp->echo)
+			nskb = skb_clone(skb, GFP_ATOMIC);
+			if (nskb) {
+				hh = mISDN_HEAD_P(nskb);
+				hh->prim = PH_DATA_REQ;
+				hh->id = 0;
+				skb_queue_tail(&dsp->sendq, nskb);
+				schedule_work(&dsp->workq);
+			}
+		return;
+	}
+	/* in case of hardware conference */
+	if (dsp->conf->hardware)
+		return;
+	list_for_each_entry(member, &dsp->conf->mlist, list) {
+		if (dsp->echo || member->dsp != dsp) {
+			nskb = skb_clone(skb, GFP_ATOMIC);
+			if (nskb) {
+				hh = mISDN_HEAD_P(nskb);
+				hh->prim = PH_DATA_REQ;
+				hh->id = 0;
+				skb_queue_tail(&member->dsp->sendq, nskb);
+				schedule_work(&member->dsp->workq);
+			}
+		}
+	}
+}
+
+
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
new file mode 100644
index 000000000000..2f10ed82c0db
--- /dev/null
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -0,0 +1,1191 @@
+/*
+ * Author       Andreas Eversberg (jolly@eversberg.eu)
+ * Based on source code structure by
+ *		Karsten Keil (keil@isdn4linux.de)
+ *
+ *		This file is (c) under GNU PUBLIC LICENSE
+ *		For changes and modifications please read
+ *		../../../Documentation/isdn/mISDN.cert
+ *
+ * Thanks to    Karsten Keil (great drivers)
+ *              Cologne Chip (great chips)
+ *
+ * This module does:
+ *		Real-time tone generation
+ *		DTMF detection
+ *		Real-time cross-connection and conferrence
+ *		Compensate jitter due to system load and hardware fault.
+ *		All features are done in kernel space and will be realized
+ *		using hardware, if available and supported by chip set.
+ *		Blowfish encryption/decryption
+ */
+
+/* STRUCTURE:
+ *
+ * The dsp module provides layer 2 for b-channels (64kbit). It provides
+ * transparent audio forwarding with special digital signal processing:
+ *
+ * - (1) generation of tones
+ * - (2) detection of dtmf tones
+ * - (3) crossconnecting and conferences (clocking)
+ * - (4) echo generation for delay test
+ * - (5) volume control
+ * - (6) disable receive data
+ * - (7) pipeline
+ * - (8) encryption/decryption
+ *
+ * Look:
+ *             TX            RX
+ *         ------upper layer------
+ *             |             ^
+ *             |             |(6)
+ *             v             |
+ *       +-----+-------------+-----+
+ *       |(3)(4)                   |
+ *       |           CMX           |
+ *       |                         |
+ *       |           +-------------+
+ *       |           |       ^
+ *       |           |       |
+ *       |+---------+|  +----+----+
+ *       ||(1)      ||  |(2)      |
+ *       ||         ||  |         |
+ *       ||  Tones  ||  |  DTMF   |
+ *       ||         ||  |         |
+ *       ||         ||  |         |
+ *       |+----+----+|  +----+----+
+ *       +-----+-----+       ^
+ *             |             |
+ *             v             |
+ *        +----+----+   +----+----+
+ *        |(5)      |   |(5)      |
+ *        |         |   |         |
+ *        |TX Volume|   |RX Volume|
+ *        |         |   |         |
+ *        |         |   |         |
+ *        +----+----+   +----+----+
+ *             |             ^
+ *             |             |
+ *             v             |
+ *        +----+-------------+----+
+ *        |(7)                    |
+ *        |                       |
+ *        |  Pipeline Processing  |
+ *        |                       |
+ *        |                       |
+ *        +----+-------------+----+
+ *             |             ^
+ *             |             |
+ *             v             |
+ *        +----+----+   +----+----+
+ *        |(8)      |   |(8)      |
+ *        |         |   |         |
+ *        | Encrypt |   | Decrypt |
+ *        |         |   |         |
+ *        |         |   |         |
+ *        +----+----+   +----+----+
+ *             |             ^
+ *             |             |
+ *             v             |
+ *         ------card  layer------
+ *             TX            RX
+ *
+ * Above you can see the logical data flow. If software is used to do the
+ * process, it is actually the real data flow. If hardware is used, data
+ * may not flow, but hardware commands to the card, to provide the data flow
+ * as shown.
+ *
+ * NOTE: The channel must be activated in order to make dsp work, even if
+ * no data flow to the upper layer is intended. Activation can be done
+ * after and before controlling the setting using PH_CONTROL requests.
+ *
+ * DTMF: Will be detected by hardware if possible. It is done before CMX
+ * processing.
+ *
+ * Tones: Will be generated via software if endless looped audio fifos are
+ * not supported by hardware. Tones will override all data from CMX.
+ * It is not required to join a conference to use tones at any time.
+ *
+ * CMX: Is transparent when not used. When it is used, it will do
+ * crossconnections and conferences via software if not possible through
+ * hardware. If hardware capability is available, hardware is used.
+ *
+ * Echo: Is generated by CMX and is used to check performane of hard and
+ * software CMX.
+ *
+ * The CMX has special functions for conferences with one, two and more
+ * members. It will allow different types of data flow. Receive and transmit
+ * data to/form upper layer may be swithed on/off individually without loosing
+ * features of CMX, Tones and DTMF.
+ *
+ * Echo Cancellation: Sometimes we like to cancel echo from the interface.
+ * Note that a VoIP call may not have echo caused by the IP phone. The echo
+ * is generated by the telephone line connected to it. Because the delay
+ * is high, it becomes an echo. RESULT: Echo Cachelation is required if
+ * both echo AND delay is applied to an interface.
+ * Remember that software CMX always generates a more or less delay.
+ *
+ * If all used features can be realized in hardware, and if transmit and/or
+ * receive data ist disabled, the card may not send/receive any data at all.
+ * Not receiving is usefull if only announcements are played. Not sending is
+ * usefull if an answering machine records audio. Not sending and receiving is
+ * usefull during most states of the call. If supported by hardware, tones
+ * will be played without cpu load. Small PBXs and NT-Mode applications will
+ * not need expensive hardware when processing calls.
+ *
+ *
+ * LOCKING:
+ *
+ * When data is received from upper or lower layer (card), the complete dsp
+ * module is locked by a global lock.  This lock MUST lock irq, because it
+ * must lock timer events by DSP poll timer.
+ * When data is ready to be transmitted down, the data is queued and sent
+ * outside lock and timer event.
+ * PH_CONTROL must not change any settings, join or split conference members
+ * during process of data.
+ *
+ * HDLC:
+ *
+ * It works quite the same as transparent, except that HDLC data is forwarded
+ * to all other conference members if no hardware bridging is possible.
+ * Send data will be writte to sendq. Sendq will be sent if confirm is received.
+ * Conference cannot join, if one member is not hdlc.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/mISDNif.h>
+#include <linux/mISDNdsp.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include "core.h"
+#include "dsp.h"
+
+const char *mISDN_dsp_revision = "2.0";
+
+static int debug;
+static int options;
+static int poll;
+static int dtmfthreshold = 100;
+
+MODULE_AUTHOR("Andreas Eversberg");
+module_param(debug, uint, S_IRUGO | S_IWUSR);
+module_param(options, uint, S_IRUGO | S_IWUSR);
+module_param(poll, uint, S_IRUGO | S_IWUSR);
+module_param(dtmfthreshold, uint, S_IRUGO | S_IWUSR);
+MODULE_LICENSE("GPL");
+
+/*int spinnest = 0;*/
+
+spinlock_t dsp_lock; /* global dsp lock */
+struct list_head dsp_ilist;
+struct list_head conf_ilist;
+int dsp_debug;
+int dsp_options;
+int dsp_poll, dsp_tics;
+
+/* check if rx may be turned off or must be turned on */
+static void
+dsp_rx_off_member(struct dsp *dsp)
+{
+	struct mISDN_ctrl_req	cq;
+	int rx_off = 1;
+
+	if (!dsp->features_rx_off)
+		return;
+
+	/* not disabled */
+	if (!dsp->rx_disabled)
+		rx_off = 0;
+	/* software dtmf */
+	else if (dsp->dtmf.software)
+		rx_off = 0;
+	/* echo in software */
+	else if (dsp->echo && dsp->pcm_slot_tx < 0)
+		rx_off = 0;
+	/* bridge in software */
+	else if (dsp->conf) {
+		if (dsp->conf->software)
+			rx_off = 0;
+	}
+
+	if (rx_off == dsp->rx_is_off)
+		return;
+
+	if (!dsp->ch.peer) {
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: no peer, no rx_off\n",
+				__func__);
+		return;
+	}
+	cq.op = MISDN_CTRL_RX_OFF;
+	cq.p1 = rx_off;
+	if (dsp->ch.peer->ctrl(dsp->ch.peer, CONTROL_CHANNEL, &cq)) {
+		printk(KERN_DEBUG "%s: 2nd CONTROL_CHANNEL failed\n",
+			__func__);
+		return;
+	}
+	dsp->rx_is_off = rx_off;
+	if (dsp_debug & DEBUG_DSP_CORE)
+		printk(KERN_DEBUG "%s: %s set rx_off = %d\n",
+			__func__, dsp->name, rx_off);
+}
+static void
+dsp_rx_off(struct dsp *dsp)
+{
+	struct dsp_conf_member	*member;
+
+	if (dsp_options & DSP_OPT_NOHARDWARE)
+		return;
+
+	/* no conf */
+	if (!dsp->conf) {
+		dsp_rx_off_member(dsp);
+		return;
+	}
+	/* check all members in conf */
+	list_for_each_entry(member, &dsp->conf->mlist, list) {
+		dsp_rx_off_member(member->dsp);
+	}
+}
+
+static int
+dsp_control_req(struct dsp *dsp, struct mISDNhead *hh, struct sk_buff *skb)
+{
+	struct		sk_buff *nskb;
+	int ret = 0;
+	int cont;
+	u8 *data;
+	int len;
+
+	if (skb->len < sizeof(int))
+		printk(KERN_ERR "%s: PH_CONTROL message too short\n", __func__);
+	cont = *((int *)skb->data);
+	len = skb->len - sizeof(int);
+	data = skb->data + sizeof(int);
+
+	switch (cont) {
+	case DTMF_TONE_START: /* turn on DTMF */
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: start dtmf\n", __func__);
+		if (len == sizeof(int)) {
+			printk(KERN_NOTICE "changing DTMF Threshold "
+				"to %d\n", *((int *)data));
+			dsp->dtmf.treshold = (*(int *)data) * 10000;
+		}
+		/* init goertzel */
+		dsp_dtmf_goertzel_init(dsp);
+
+		/* check dtmf hardware */
+		dsp_dtmf_hardware(dsp);
+		break;
+	case DTMF_TONE_STOP: /* turn off DTMF */
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: stop dtmf\n", __func__);
+		dsp->dtmf.hardware = 0;
+		dsp->dtmf.software = 0;
+		break;
+	case DSP_CONF_JOIN: /* join / update conference */
+		if (len < sizeof(int)) {
+			ret = -EINVAL;
+			break;
+		}
+		if (*((u32 *)data) == 0)
+			goto conf_split;
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: join conference %d\n",
+				__func__, *((u32 *)data));
+		ret = dsp_cmx_conf(dsp, *((u32 *)data));
+			/* dsp_cmx_hardware will also be called here */
+		dsp_rx_off(dsp);
+		if (dsp_debug & DEBUG_DSP_CMX)
+			dsp_cmx_debug(dsp);
+		break;
+	case DSP_CONF_SPLIT: /* remove from conference */
+conf_split:
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: release conference\n", __func__);
+		ret = dsp_cmx_conf(dsp, 0);
+			/* dsp_cmx_hardware will also be called here */
+		if (dsp_debug & DEBUG_DSP_CMX)
+			dsp_cmx_debug(dsp);
+		dsp_rx_off(dsp);
+		break;
+	case DSP_TONE_PATT_ON: /* play tone */
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		if (len < sizeof(int)) {
+			ret = -EINVAL;
+			break;
+		}
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: turn tone 0x%x on\n",
+				__func__, *((int *)skb->data));
+		ret = dsp_tone(dsp, *((int *)data));
+		if (!ret) {
+			dsp_cmx_hardware(dsp->conf, dsp);
+			dsp_rx_off(dsp);
+		}
+		if (!dsp->tone.tone)
+			goto tone_off;
+		break;
+	case DSP_TONE_PATT_OFF: /* stop tone */
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: turn tone off\n", __func__);
+		dsp_tone(dsp, 0);
+		dsp_cmx_hardware(dsp->conf, dsp);
+		dsp_rx_off(dsp);
+		/* reset tx buffers (user space data) */
+tone_off:
+		dsp->rx_W = 0;
+		dsp->rx_R = 0;
+		break;
+	case DSP_VOL_CHANGE_TX: /* change volume */
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		if (len < sizeof(int)) {
+			ret = -EINVAL;
+			break;
+		}
+		dsp->tx_volume = *((int *)data);
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: change tx vol to %d\n",
+				__func__, dsp->tx_volume);
+		dsp_cmx_hardware(dsp->conf, dsp);
+		dsp_dtmf_hardware(dsp);
+		dsp_rx_off(dsp);
+		break;
+	case DSP_VOL_CHANGE_RX: /* change volume */
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		if (len < sizeof(int)) {
+			ret = -EINVAL;
+			break;
+		}
+		dsp->rx_volume = *((int *)data);
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: change rx vol to %d\n",
+				__func__, dsp->tx_volume);
+		dsp_cmx_hardware(dsp->conf, dsp);
+		dsp_dtmf_hardware(dsp);
+		dsp_rx_off(dsp);
+		break;
+	case DSP_ECHO_ON: /* enable echo */
+		dsp->echo = 1; /* soft echo */
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: enable cmx-echo\n", __func__);
+		dsp_cmx_hardware(dsp->conf, dsp);
+		dsp_rx_off(dsp);
+		if (dsp_debug & DEBUG_DSP_CMX)
+			dsp_cmx_debug(dsp);
+		break;
+	case DSP_ECHO_OFF: /* disable echo */
+		dsp->echo = 0;
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: disable cmx-echo\n", __func__);
+		dsp_cmx_hardware(dsp->conf, dsp);
+		dsp_rx_off(dsp);
+		if (dsp_debug & DEBUG_DSP_CMX)
+			dsp_cmx_debug(dsp);
+		break;
+	case DSP_RECEIVE_ON: /* enable receive to user space */
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: enable receive to user "
+				"space\n", __func__);
+		dsp->rx_disabled = 0;
+		dsp_rx_off(dsp);
+		break;
+	case DSP_RECEIVE_OFF: /* disable receive to user space */
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: disable receive to "
+				"user space\n", __func__);
+		dsp->rx_disabled = 1;
+		dsp_rx_off(dsp);
+		break;
+	case DSP_MIX_ON: /* enable mixing of tx data */
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: enable mixing of "
+				"tx-data with conf mebers\n", __func__);
+		dsp->tx_mix = 1;
+		dsp_cmx_hardware(dsp->conf, dsp);
+		dsp_rx_off(dsp);
+		if (dsp_debug & DEBUG_DSP_CMX)
+			dsp_cmx_debug(dsp);
+		break;
+	case DSP_MIX_OFF: /* disable mixing of tx data */
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: disable mixing of "
+				"tx-data with conf mebers\n", __func__);
+		dsp->tx_mix = 0;
+		dsp_cmx_hardware(dsp->conf, dsp);
+		dsp_rx_off(dsp);
+		if (dsp_debug & DEBUG_DSP_CMX)
+			dsp_cmx_debug(dsp);
+		break;
+	case DSP_TXDATA_ON: /* enable txdata */
+		dsp->tx_data = 1;
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: enable tx-data\n", __func__);
+		dsp_cmx_hardware(dsp->conf, dsp);
+		dsp_rx_off(dsp);
+		if (dsp_debug & DEBUG_DSP_CMX)
+			dsp_cmx_debug(dsp);
+		break;
+	case DSP_TXDATA_OFF: /* disable txdata */
+		dsp->tx_data = 0;
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: disable tx-data\n", __func__);
+		dsp_cmx_hardware(dsp->conf, dsp);
+		dsp_rx_off(dsp);
+		if (dsp_debug & DEBUG_DSP_CMX)
+			dsp_cmx_debug(dsp);
+		break;
+	case DSP_DELAY: /* use delay algorithm instead of dynamic
+			   jitter algorithm */
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		if (len < sizeof(int)) {
+			ret = -EINVAL;
+			break;
+		}
+		dsp->cmx_delay = (*((int *)data)) << 3;
+			/* miliseconds to samples */
+		if (dsp->cmx_delay >= (CMX_BUFF_HALF>>1))
+			/* clip to half of maximum usable buffer
+			(half of half buffer) */
+			dsp->cmx_delay = (CMX_BUFF_HALF>>1) - 1;
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: use delay algorithm to "
+				"compensate jitter (%d samples)\n",
+				__func__, dsp->cmx_delay);
+		break;
+	case DSP_JITTER: /* use dynamic jitter algorithm instead of
+		    delay algorithm */
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		dsp->cmx_delay = 0;
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: use jitter algorithm to "
+				"compensate jitter\n", __func__);
+		break;
+	case DSP_TX_DEJITTER: /* use dynamic jitter algorithm for tx-buffer */
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		dsp->tx_dejitter = 1;
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: use dejitter on TX "
+				"buffer\n", __func__);
+		break;
+	case DSP_TX_DEJ_OFF: /* use tx-buffer without dejittering*/
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		dsp->tx_dejitter = 0;
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: use TX buffer without "
+				"dejittering\n", __func__);
+		break;
+	case DSP_PIPELINE_CFG:
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		if (len > 0 && ((char *)data)[len - 1]) {
+			printk(KERN_DEBUG "%s: pipeline config string "
+				"is not NULL terminated!\n", __func__);
+			ret = -EINVAL;
+		} else {
+			dsp->pipeline.inuse = 1;
+			dsp_cmx_hardware(dsp->conf, dsp);
+			ret = dsp_pipeline_build(&dsp->pipeline,
+				len > 0 ? (char *)data : NULL);
+			dsp_cmx_hardware(dsp->conf, dsp);
+			dsp_rx_off(dsp);
+		}
+		break;
+	case DSP_BF_ENABLE_KEY: /* turn blowfish on */
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		if (len < 4 || len > 56) {
+			ret = -EINVAL;
+			break;
+		}
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: turn blowfish on (key "
+				"not shown)\n", __func__);
+		ret = dsp_bf_init(dsp, (u8 *)data, len);
+		/* set new cont */
+		if (!ret)
+			cont = DSP_BF_ACCEPT;
+		else
+			cont = DSP_BF_REJECT;
+		/* send indication if it worked to set it */
+		nskb = _alloc_mISDN_skb(PH_CONTROL_IND, MISDN_ID_ANY,
+			sizeof(int), &cont, GFP_ATOMIC);
+		if (nskb) {
+			if (dsp->up) {
+				if (dsp->up->send(dsp->up, nskb))
+					dev_kfree_skb(nskb);
+			} else
+				dev_kfree_skb(nskb);
+		}
+		if (!ret) {
+			dsp_cmx_hardware(dsp->conf, dsp);
+			dsp_dtmf_hardware(dsp);
+			dsp_rx_off(dsp);
+		}
+		break;
+	case DSP_BF_DISABLE: /* turn blowfish off */
+		if (dsp->hdlc) {
+			ret = -EINVAL;
+			break;
+		}
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: turn blowfish off\n", __func__);
+		dsp_bf_cleanup(dsp);
+		dsp_cmx_hardware(dsp->conf, dsp);
+		dsp_dtmf_hardware(dsp);
+		dsp_rx_off(dsp);
+		break;
+	default:
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: ctrl req %x unhandled\n",
+				__func__, cont);
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+static void
+get_features(struct mISDNchannel *ch)
+{
+	struct dsp		*dsp = container_of(ch, struct dsp, ch);
+	struct mISDN_ctrl_req	cq;
+
+	if (dsp_options & DSP_OPT_NOHARDWARE)
+		return;
+	if (!ch->peer) {
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: no peer, no features\n",
+				__func__);
+		return;
+	}
+	memset(&cq, 0, sizeof(cq));
+	cq.op = MISDN_CTRL_GETOP;
+	if (ch->peer->ctrl(ch->peer, CONTROL_CHANNEL, &cq) < 0) {
+		printk(KERN_DEBUG "%s: CONTROL_CHANNEL failed\n",
+			__func__);
+		return;
+	}
+	if (cq.op & MISDN_CTRL_RX_OFF)
+		dsp->features_rx_off = 1;
+	if ((cq.op & MISDN_CTRL_HW_FEATURES_OP)) {
+		cq.op = MISDN_CTRL_HW_FEATURES;
+		*((u_long *)&cq.p1) = (u_long)&dsp->features;
+		if (ch->peer->ctrl(ch->peer, CONTROL_CHANNEL, &cq)) {
+			printk(KERN_DEBUG "%s: 2nd CONTROL_CHANNEL failed\n",
+				__func__);
+		}
+	} else
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: features not supported for %s\n",
+				__func__, dsp->name);
+}
+
+static int
+dsp_function(struct mISDNchannel *ch,  struct sk_buff *skb)
+{
+	struct dsp			*dsp = container_of(ch, struct dsp, ch);
+	struct mISDNhead	*hh;
+	int			ret = 0;
+	u8			*digits;
+	int			cont;
+	struct			sk_buff *nskb;
+	u_long			flags;
+
+	hh = mISDN_HEAD_P(skb);
+	switch (hh->prim) {
+	/* FROM DOWN */
+	case (PH_DATA_CNF):
+		dsp->data_pending = 0;
+		/* trigger next hdlc frame, if any */
+		if (dsp->hdlc) {
+			spin_lock_irqsave(&dsp_lock, flags);
+			if (dsp->b_active)
+				schedule_work(&dsp->workq);
+			spin_unlock_irqrestore(&dsp_lock, flags);
+		}
+		break;
+	case (PH_DATA_IND):
+	case (DL_DATA_IND):
+		if (skb->len < 1) {
+			ret = -EINVAL;
+			break;
+		}
+		if (dsp->rx_is_off) {
+			if (dsp_debug & DEBUG_DSP_CORE)
+				printk(KERN_DEBUG "%s: rx-data during rx_off"
+					" for %s\n",
+				__func__, dsp->name);
+		}
+		if (dsp->hdlc) {
+			/* hdlc */
+			spin_lock_irqsave(&dsp_lock, flags);
+			dsp_cmx_hdlc(dsp, skb);
+			spin_unlock_irqrestore(&dsp_lock, flags);
+			if (dsp->rx_disabled) {
+				/* if receive is not allowed */
+				break;
+			}
+			hh->prim = DL_DATA_IND;
+			if (dsp->up)
+				return dsp->up->send(dsp->up, skb);
+			break;
+		}
+
+		/* decrypt if enabled */
+		if (dsp->bf_enable)
+			dsp_bf_decrypt(dsp, skb->data, skb->len);
+		/* pipeline */
+		if (dsp->pipeline.inuse)
+			dsp_pipeline_process_rx(&dsp->pipeline, skb->data,
+				skb->len);
+		/* change volume if requested */
+		if (dsp->rx_volume)
+			dsp_change_volume(skb, dsp->rx_volume);
+
+		/* check if dtmf soft decoding is turned on */
+		if (dsp->dtmf.software) {
+			digits = dsp_dtmf_goertzel_decode(dsp, skb->data,
+				skb->len, (dsp_options&DSP_OPT_ULAW)?1:0);
+			while (*digits) {
+				if (dsp_debug & DEBUG_DSP_DTMF)
+					printk(KERN_DEBUG "%s: digit"
+					    "(%c) to layer %s\n",
+					    __func__, *digits, dsp->name);
+				cont = DTMF_TONE_VAL | *digits;
+				nskb = _alloc_mISDN_skb(PH_CONTROL_IND,
+				    MISDN_ID_ANY, sizeof(int), &cont,
+				    GFP_ATOMIC);
+				if (nskb) {
+					if (dsp->up) {
+						if (dsp->up->send(
+						    dsp->up, nskb))
+						dev_kfree_skb(nskb);
+					} else
+						dev_kfree_skb(nskb);
+				}
+				digits++;
+			}
+		}
+		/* we need to process receive data if software */
+		spin_lock_irqsave(&dsp_lock, flags);
+		if (dsp->pcm_slot_tx < 0 && dsp->pcm_slot_rx < 0) {
+			/* process data from card at cmx */
+			dsp_cmx_receive(dsp, skb);
+		}
+		spin_unlock_irqrestore(&dsp_lock, flags);
+
+		if (dsp->rx_disabled) {
+			/* if receive is not allowed */
+			break;
+		}
+		hh->prim = DL_DATA_IND;
+		if (dsp->up)
+			return dsp->up->send(dsp->up, skb);
+		break;
+	case (PH_CONTROL_IND):
+		if (dsp_debug & DEBUG_DSP_DTMFCOEFF)
+			printk(KERN_DEBUG "%s: PH_CONTROL INDICATION "
+				"received: %x (len %d) %s\n", __func__,
+				hh->id, skb->len, dsp->name);
+		switch (hh->id) {
+		case (DTMF_HFC_COEF): /* getting coefficients */
+			if (!dsp->dtmf.hardware) {
+				if (dsp_debug & DEBUG_DSP_DTMFCOEFF)
+					printk(KERN_DEBUG "%s: ignoring DTMF "
+						"coefficients from HFC\n",
+						__func__);
+				break;
+			}
+			digits = dsp_dtmf_goertzel_decode(dsp, skb->data,
+				skb->len, 2);
+			while (*digits) {
+				int k;
+				struct sk_buff *nskb;
+				if (dsp_debug & DEBUG_DSP_DTMF)
+					printk(KERN_DEBUG "%s: digit"
+					    "(%c) to layer %s\n",
+					    __func__, *digits, dsp->name);
+				k = *digits | DTMF_TONE_VAL;
+				nskb = _alloc_mISDN_skb(PH_CONTROL_IND,
+					MISDN_ID_ANY, sizeof(int), &k,
+					GFP_ATOMIC);
+				if (nskb) {
+					if (dsp->up) {
+						if (dsp->up->send(
+						    dsp->up, nskb))
+						dev_kfree_skb(nskb);
+					} else
+						dev_kfree_skb(nskb);
+				}
+				digits++;
+			}
+			break;
+		case (HFC_VOL_CHANGE_TX): /* change volume */
+			if (skb->len != sizeof(int)) {
+				ret = -EINVAL;
+				break;
+			}
+			spin_lock_irqsave(&dsp_lock, flags);
+			dsp->tx_volume = *((int *)skb->data);
+			if (dsp_debug & DEBUG_DSP_CORE)
+				printk(KERN_DEBUG "%s: change tx volume to "
+					"%d\n", __func__, dsp->tx_volume);
+			dsp_cmx_hardware(dsp->conf, dsp);
+			dsp_dtmf_hardware(dsp);
+			dsp_rx_off(dsp);
+			spin_unlock_irqrestore(&dsp_lock, flags);
+			break;
+		default:
+			if (dsp_debug & DEBUG_DSP_CORE)
+				printk(KERN_DEBUG "%s: ctrl ind %x unhandled "
+					"%s\n", __func__, hh->id, dsp->name);
+			ret = -EINVAL;
+		}
+		break;
+	case (PH_ACTIVATE_IND):
+	case (PH_ACTIVATE_CNF):
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: b_channel is now active %s\n",
+				__func__, dsp->name);
+		/* bchannel now active */
+		spin_lock_irqsave(&dsp_lock, flags);
+		dsp->b_active = 1;
+		dsp->data_pending = 0;
+		dsp->rx_init = 1;
+			/* rx_W and rx_R will be adjusted on first frame */
+		dsp->rx_W = 0;
+		dsp->rx_R = 0;
+		memset(dsp->rx_buff, 0, sizeof(dsp->rx_buff));
+		dsp_cmx_hardware(dsp->conf, dsp);
+		dsp_dtmf_hardware(dsp);
+		dsp_rx_off(dsp);
+		spin_unlock_irqrestore(&dsp_lock, flags);
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: done with activation, sending "
+				"confirm to user space. %s\n", __func__,
+				dsp->name);
+		/* send activation to upper layer */
+		hh->prim = DL_ESTABLISH_CNF;
+		if (dsp->up)
+			return dsp->up->send(dsp->up, skb);
+		break;
+	case (PH_DEACTIVATE_IND):
+	case (PH_DEACTIVATE_CNF):
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: b_channel is now inactive %s\n",
+				__func__, dsp->name);
+		/* bchannel now inactive */
+		spin_lock_irqsave(&dsp_lock, flags);
+		dsp->b_active = 0;
+		dsp->data_pending = 0;
+		dsp_cmx_hardware(dsp->conf, dsp);
+		dsp_rx_off(dsp);
+		spin_unlock_irqrestore(&dsp_lock, flags);
+		hh->prim = DL_RELEASE_CNF;
+		if (dsp->up)
+			return dsp->up->send(dsp->up, skb);
+		break;
+	/* FROM UP */
+	case (DL_DATA_REQ):
+	case (PH_DATA_REQ):
+		if (skb->len < 1) {
+			ret = -EINVAL;
+			break;
+		}
+		if (dsp->hdlc) {
+			/* hdlc */
+			spin_lock_irqsave(&dsp_lock, flags);
+			if (dsp->b_active) {
+				skb_queue_tail(&dsp->sendq, skb);
+				schedule_work(&dsp->workq);
+			}
+			spin_unlock_irqrestore(&dsp_lock, flags);
+			return 0;
+		}
+		/* send data to tx-buffer (if no tone is played) */
+		if (!dsp->tone.tone) {
+			spin_lock_irqsave(&dsp_lock, flags);
+			dsp_cmx_transmit(dsp, skb);
+			spin_unlock_irqrestore(&dsp_lock, flags);
+		}
+		break;
+	case (PH_CONTROL_REQ):
+		spin_lock_irqsave(&dsp_lock, flags);
+		ret = dsp_control_req(dsp, hh, skb);
+		spin_unlock_irqrestore(&dsp_lock, flags);
+		break;
+	case (DL_ESTABLISH_REQ):
+	case (PH_ACTIVATE_REQ):
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: activating b_channel %s\n",
+				__func__, dsp->name);
+		if (dsp->dtmf.hardware || dsp->dtmf.software)
+			dsp_dtmf_goertzel_init(dsp);
+		get_features(ch);
+		/* send ph_activate */
+		hh->prim = PH_ACTIVATE_REQ;
+		if (ch->peer)
+			return ch->recv(ch->peer, skb);
+		break;
+	case (DL_RELEASE_REQ):
+	case (PH_DEACTIVATE_REQ):
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: releasing b_channel %s\n",
+				__func__, dsp->name);
+		spin_lock_irqsave(&dsp_lock, flags);
+		dsp->tone.tone = 0;
+		dsp->tone.hardware = 0;
+		dsp->tone.software = 0;
+		if (timer_pending(&dsp->tone.tl))
+			del_timer(&dsp->tone.tl);
+		if (dsp->conf)
+			dsp_cmx_conf(dsp, 0); /* dsp_cmx_hardware will also be
+						 called here */
+		skb_queue_purge(&dsp->sendq);
+		spin_unlock_irqrestore(&dsp_lock, flags);
+		hh->prim = PH_DEACTIVATE_REQ;
+		if (ch->peer)
+			return ch->recv(ch->peer, skb);
+		break;
+	default:
+		if (dsp_debug & DEBUG_DSP_CORE)
+			printk(KERN_DEBUG "%s: msg %x unhandled %s\n",
+				__func__, hh->prim, dsp->name);
+		ret = -EINVAL;
+	}
+	if (!ret)
+		dev_kfree_skb(skb);
+	return ret;
+}
+
+static int
+dsp_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
+{
+	struct dsp		*dsp = container_of(ch, struct dsp, ch);
+	u_long		flags;
+	int		err = 0;
+
+	if (debug & DEBUG_DSP_CTRL)
+	printk(KERN_DEBUG "%s:(%x)\n", __func__, cmd);
+
+	switch (cmd) {
+	case OPEN_CHANNEL:
+		break;
+	case CLOSE_CHANNEL:
+		if (dsp->ch.peer)
+			dsp->ch.peer->ctrl(dsp->ch.peer, CLOSE_CHANNEL, NULL);
+
+		/* wait until workqueue has finished,
+		 * must lock here, or we may hit send-process currently
+		 * queueing. */
+		spin_lock_irqsave(&dsp_lock, flags);
+		dsp->b_active = 0;
+		spin_unlock_irqrestore(&dsp_lock, flags);
+		/* MUST not be locked, because it waits until queue is done. */
+		cancel_work_sync(&dsp->workq);
+		spin_lock_irqsave(&dsp_lock, flags);
+		if (timer_pending(&dsp->tone.tl))
+			del_timer(&dsp->tone.tl);
+		skb_queue_purge(&dsp->sendq);
+		if (dsp_debug & DEBUG_DSP_CTRL)
+			printk(KERN_DEBUG "%s: releasing member %s\n",
+				__func__, dsp->name);
+		dsp->b_active = 0;
+		dsp_cmx_conf(dsp, 0); /* dsp_cmx_hardware will also be called
+					 here */
+		dsp_pipeline_destroy(&dsp->pipeline);
+
+		if (dsp_debug & DEBUG_DSP_CTRL)
+			printk(KERN_DEBUG "%s: remove & destroy object %s\n",
+				__func__, dsp->name);
+		list_del(&dsp->list);
+		spin_unlock_irqrestore(&dsp_lock, flags);
+
+		if (dsp_debug & DEBUG_DSP_CTRL)
+			printk(KERN_DEBUG "%s: dsp instance released\n",
+				__func__);
+		vfree(dsp);
+		module_put(THIS_MODULE);
+		break;
+	}
+	return err;
+}
+
+static void
+dsp_send_bh(struct work_struct *work)
+{
+	struct dsp *dsp = container_of(work, struct dsp, workq);
+	struct sk_buff *skb;
+	struct mISDNhead	*hh;
+
+	if (dsp->hdlc && dsp->data_pending)
+		return; /* wait until data has been acknowledged */
+
+	/* send queued data */
+	while ((skb = skb_dequeue(&dsp->sendq))) {
+		/* in locked date, we must have still data in queue */
+		if (dsp->data_pending) {
+			if (dsp_debug & DEBUG_DSP_CORE)
+				printk(KERN_DEBUG "%s: fifo full %s, this is "
+					"no bug!\n", __func__, dsp->name);
+			/* flush transparent data, if not acked */
+			dev_kfree_skb(skb);
+			continue;
+		}
+		hh = mISDN_HEAD_P(skb);
+		if (hh->prim == DL_DATA_REQ) {
+			/* send packet up */
+			if (dsp->up) {
+				if (dsp->up->send(dsp->up, skb))
+					dev_kfree_skb(skb);
+			} else
+				dev_kfree_skb(skb);
+		} else {
+			/* send packet down */
+			if (dsp->ch.peer) {
+				dsp->data_pending = 1;
+				if (dsp->ch.recv(dsp->ch.peer, skb)) {
+					dev_kfree_skb(skb);
+					dsp->data_pending = 0;
+				}
+			} else
+				dev_kfree_skb(skb);
+		}
+	}
+}
+
+static int
+dspcreate(struct channel_req *crq)
+{
+	struct dsp		*ndsp;
+	u_long		flags;
+
+	if (crq->protocol != ISDN_P_B_L2DSP
+	 && crq->protocol != ISDN_P_B_L2DSPHDLC)
+		return -EPROTONOSUPPORT;
+	ndsp = vmalloc(sizeof(struct dsp));
+	if (!ndsp) {
+		printk(KERN_ERR "%s: vmalloc struct dsp failed\n", __func__);
+		return -ENOMEM;
+	}
+	memset(ndsp, 0, sizeof(struct dsp));
+	if (dsp_debug & DEBUG_DSP_CTRL)
+		printk(KERN_DEBUG "%s: creating new dsp instance\n", __func__);
+
+	/* default enabled */
+	INIT_WORK(&ndsp->workq, (void *)dsp_send_bh);
+	skb_queue_head_init(&ndsp->sendq);
+	ndsp->ch.send = dsp_function;
+	ndsp->ch.ctrl = dsp_ctrl;
+	ndsp->up = crq->ch;
+	crq->ch = &ndsp->ch;
+	if (crq->protocol == ISDN_P_B_L2DSP) {
+		crq->protocol = ISDN_P_B_RAW;
+		ndsp->hdlc = 0;
+	} else {
+		crq->protocol = ISDN_P_B_HDLC;
+		ndsp->hdlc = 1;
+	}
+	if (!try_module_get(THIS_MODULE))
+		printk(KERN_WARNING "%s:cannot get module\n",
+			__func__);
+
+	sprintf(ndsp->name, "DSP_C%x(0x%p)",
+		ndsp->up->st->dev->id + 1, ndsp);
+	/* set frame size to start */
+	ndsp->features.hfc_id = -1; /* current PCM id */
+	ndsp->features.pcm_id = -1; /* current PCM id */
+	ndsp->pcm_slot_rx = -1; /* current CPM slot */
+	ndsp->pcm_slot_tx = -1;
+	ndsp->pcm_bank_rx = -1;
+	ndsp->pcm_bank_tx = -1;
+	ndsp->hfc_conf = -1; /* current conference number */
+	/* set tone timer */
+	ndsp->tone.tl.function = (void *)dsp_tone_timeout;
+	ndsp->tone.tl.data = (long) ndsp;
+	init_timer(&ndsp->tone.tl);
+
+	if (dtmfthreshold < 20 || dtmfthreshold > 500)
+		dtmfthreshold = 200;
+	ndsp->dtmf.treshold = dtmfthreshold*10000;
+
+	/* init pipeline append to list */
+	spin_lock_irqsave(&dsp_lock, flags);
+	dsp_pipeline_init(&ndsp->pipeline);
+	list_add_tail(&ndsp->list, &dsp_ilist);
+	spin_unlock_irqrestore(&dsp_lock, flags);
+
+	return 0;
+}
+
+
+static struct Bprotocol DSP = {
+	.Bprotocols = (1 << (ISDN_P_B_L2DSP & ISDN_P_B_MASK))
+		| (1 << (ISDN_P_B_L2DSPHDLC & ISDN_P_B_MASK)),
+	.name = "dsp",
+	.create = dspcreate
+};
+
+static int dsp_init(void)
+{
+	int err;
+	int tics;
+
+	printk(KERN_INFO "DSP modul %s\n", mISDN_dsp_revision);
+
+	dsp_options = options;
+	dsp_debug = debug;
+
+	/* set packet size */
+	dsp_poll = poll;
+	if (dsp_poll) {
+		if (dsp_poll > MAX_POLL) {
+			printk(KERN_ERR "%s: Wrong poll value (%d), use %d "
+				"maximum.\n", __func__, poll, MAX_POLL);
+			err = -EINVAL;
+			return err;
+		}
+		if (dsp_poll < 8) {
+			printk(KERN_ERR "%s: Wrong poll value (%d), use 8 "
+				"minimum.\n", __func__, dsp_poll);
+			err = -EINVAL;
+			return err;
+		}
+		dsp_tics = poll * HZ / 8000;
+		if (dsp_tics * 8000 != poll * HZ) {
+			printk(KERN_INFO "mISDN_dsp: Cannot clock every %d "
+				"samples (0,125 ms). It is not a multiple of "
+				"%d HZ.\n", poll, HZ);
+			err = -EINVAL;
+			return err;
+		}
+	} else {
+		poll = 8;
+		while (poll <= MAX_POLL) {
+			tics = poll * HZ / 8000;
+			if (tics * 8000 == poll * HZ) {
+				dsp_tics = tics;
+				dsp_poll = poll;
+				if (poll >= 64)
+					break;
+			}
+			poll++;
+		}
+	}
+	if (dsp_poll == 0) {
+		printk(KERN_INFO "mISDN_dsp: There is no multiple of kernel "
+			"clock that equals exactly the duration of 8-256 "
+			"samples. (Choose kernel clock speed like 100, 250, "
+			"300, 1000)\n");
+		err = -EINVAL;
+		return err;
+	}
+	printk(KERN_INFO "mISDN_dsp: DSP clocks every %d samples. This equals "
+		"%d jiffies.\n", dsp_poll, dsp_tics);
+
+	spin_lock_init(&dsp_lock);
+	INIT_LIST_HEAD(&dsp_ilist);
+	INIT_LIST_HEAD(&conf_ilist);
+
+	/* init conversion tables */
+	dsp_audio_generate_law_tables();
+	dsp_silence = (dsp_options&DSP_OPT_ULAW)?0xff:0x2a;
+	dsp_audio_law_to_s32 = (dsp_options&DSP_OPT_ULAW)?dsp_audio_ulaw_to_s32:
+		dsp_audio_alaw_to_s32;
+	dsp_audio_generate_s2law_table();
+	dsp_audio_generate_seven();
+	dsp_audio_generate_mix_table();
+	if (dsp_options & DSP_OPT_ULAW)
+		dsp_audio_generate_ulaw_samples();
+	dsp_audio_generate_volume_changes();
+
+	err = dsp_pipeline_module_init();
+	if (err) {
+		printk(KERN_ERR "mISDN_dsp: Can't initialize pipeline, "
+			"error(%d)\n", err);
+		return err;
+	}
+
+	err = mISDN_register_Bprotocol(&DSP);
+	if (err) {
+		printk(KERN_ERR "Can't register %s error(%d)\n", DSP.name, err);
+		return err;
+	}
+
+	/* set sample timer */
+	dsp_spl_tl.function = (void *)dsp_cmx_send;
+	dsp_spl_tl.data = 0;
+	init_timer(&dsp_spl_tl);
+	dsp_spl_tl.expires = jiffies + dsp_tics;
+	dsp_spl_jiffies = dsp_spl_tl.expires;
+	add_timer(&dsp_spl_tl);
+
+	return 0;
+}
+
+
+static void dsp_cleanup(void)
+{
+	mISDN_unregister_Bprotocol(&DSP);
+
+	if (timer_pending(&dsp_spl_tl))
+		del_timer(&dsp_spl_tl);
+
+	if (!list_empty(&dsp_ilist)) {
+		printk(KERN_ERR "mISDN_dsp: Audio DSP object inst list not "
+			"empty.\n");
+	}
+	if (!list_empty(&conf_ilist)) {
+		printk(KERN_ERR "mISDN_dsp: Conference list not empty. Not "
+			"all memory freed.\n");
+	}
+
+	dsp_pipeline_module_exit();
+}
+
+module_init(dsp_init);
+module_exit(dsp_cleanup);
+
diff --git a/drivers/isdn/mISDN/dsp_dtmf.c b/drivers/isdn/mISDN/dsp_dtmf.c
new file mode 100644
index 000000000000..efc371c1f0dc
--- /dev/null
+++ b/drivers/isdn/mISDN/dsp_dtmf.c
@@ -0,0 +1,303 @@
+/*
+ * DTMF decoder.
+ *
+ * Copyright            by Andreas Eversberg (jolly@eversberg.eu)
+ *			based on different decoders such as ISDN4Linux
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ *
+ */
+
+#include <linux/mISDNif.h>
+#include <linux/mISDNdsp.h>
+#include "core.h"
+#include "dsp.h"
+
+#define NCOEFF            8     /* number of frequencies to be analyzed */
+
+/* For DTMF recognition:
+ * 2 * cos(2 * PI * k / N) precalculated for all k
+ */
+static u64 cos2pik[NCOEFF] =
+{
+	/* k << 15 (source: hfc-4s/8s documentation (www.colognechip.de)) */
+	55960, 53912, 51402, 48438, 38146, 32650, 26170, 18630
+};
+
+/* digit matrix */
+static char dtmf_matrix[4][4] =
+{
+	{'1', '2', '3', 'A'},
+	{'4', '5', '6', 'B'},
+	{'7', '8', '9', 'C'},
+	{'*', '0', '#', 'D'}
+};
+
+/* dtmf detection using goertzel algorithm
+ * init function
+ */
+void dsp_dtmf_goertzel_init(struct dsp *dsp)
+{
+	dsp->dtmf.size = 0;
+	dsp->dtmf.lastwhat = '\0';
+	dsp->dtmf.lastdigit = '\0';
+	dsp->dtmf.count = 0;
+}
+
+/* check for hardware or software features
+ */
+void dsp_dtmf_hardware(struct dsp *dsp)
+{
+	int hardware = 1;
+
+	if (!dsp->features.hfc_dtmf)
+		hardware = 0;
+
+	/* check for volume change */
+	if (dsp->tx_volume) {
+		if (dsp_debug & DEBUG_DSP_DTMF)
+			printk(KERN_DEBUG "%s dsp %s cannot do hardware DTMF, "
+				"because tx_volume is changed\n",
+				__func__, dsp->name);
+		hardware = 0;
+	}
+	if (dsp->rx_volume) {
+		if (dsp_debug & DEBUG_DSP_DTMF)
+			printk(KERN_DEBUG "%s dsp %s cannot do hardware DTMF, "
+				"because rx_volume is changed\n",
+				__func__, dsp->name);
+		hardware = 0;
+	}
+	/* check if encryption is enabled */
+	if (dsp->bf_enable) {
+		if (dsp_debug & DEBUG_DSP_DTMF)
+			printk(KERN_DEBUG "%s dsp %s cannot do hardware DTMF, "
+				"because encryption is enabled\n",
+				__func__, dsp->name);
+		hardware = 0;
+	}
+	/* check if pipeline exists */
+	if (dsp->pipeline.inuse) {
+		if (dsp_debug & DEBUG_DSP_DTMF)
+			printk(KERN_DEBUG "%s dsp %s cannot do hardware DTMF, "
+				"because pipeline exists.\n",
+				__func__, dsp->name);
+		hardware = 0;
+	}
+
+	dsp->dtmf.hardware = hardware;
+	dsp->dtmf.software = !hardware;
+}
+
+
+/*************************************************************
+ * calculate the coefficients of the given sample and decode *
+ *************************************************************/
+
+/* the given sample is decoded. if the sample is not long enough for a
+ * complete frame, the decoding is finished and continued with the next
+ * call of this function.
+ *
+ * the algorithm is very good for detection with a minimum of errors. i
+ * tested it allot. it even works with very short tones (40ms). the only
+ * disadvantage is, that it doesn't work good with different volumes of both
+ * tones. this will happen, if accoustically coupled dialers are used.
+ * it sometimes detects tones during speach, which is normal for decoders.
+ * use sequences to given commands during calls.
+ *
+ * dtmf - points to a structure of the current dtmf state
+ * spl and len - the sample
+ * fmt - 0 = alaw, 1 = ulaw, 2 = coefficients from HFC DTMF hw-decoder
+ */
+
+u8
+*dsp_dtmf_goertzel_decode(struct dsp *dsp, u8 *data, int len, int fmt)
+{
+	u8 what;
+	int size;
+	signed short *buf;
+	s32 sk, sk1, sk2;
+	int k, n, i;
+	s32 *hfccoeff;
+	s32 result[NCOEFF], tresh, treshl;
+	int lowgroup, highgroup;
+	s64 cos2pik_;
+
+	dsp->dtmf.digits[0] = '\0';
+
+	/* Note: The function will loop until the buffer has not enough samples
+	 * left to decode a full frame.
+	 */
+again:
+	/* convert samples */
+	size = dsp->dtmf.size;
+	buf = dsp->dtmf.buffer;
+	switch (fmt) {
+	case 0: /* alaw */
+	case 1: /* ulaw */
+		while (size < DSP_DTMF_NPOINTS && len) {
+			buf[size++] = dsp_audio_law_to_s32[*data++];
+			len--;
+		}
+		break;
+
+	case 2: /* HFC coefficients */
+	default:
+		if (len < 64) {
+			if (len > 0)
+				printk(KERN_ERR "%s: coefficients have invalid "
+					"size. (is=%d < must=%d)\n",
+					__func__, len, 64);
+			return dsp->dtmf.digits;
+		}
+		hfccoeff = (s32 *)data;
+		for (k = 0; k < NCOEFF; k++) {
+			sk2 = (*hfccoeff++)>>4;
+			sk = (*hfccoeff++)>>4;
+			if (sk > 32767 || sk < -32767 || sk2 > 32767
+			    || sk2 < -32767)
+				printk(KERN_WARNING
+					"DTMF-Detection overflow\n");
+			/* compute |X(k)|**2 */
+			result[k] =
+				 (sk * sk) -
+				 (((cos2pik[k] * sk) >> 15) * sk2) +
+				 (sk2 * sk2);
+		}
+		data += 64;
+		len -= 64;
+		goto coefficients;
+		break;
+	}
+	dsp->dtmf.size = size;
+
+	if (size < DSP_DTMF_NPOINTS)
+		return dsp->dtmf.digits;
+
+	dsp->dtmf.size = 0;
+
+	/* now we have a full buffer of signed long samples - we do goertzel */
+	for (k = 0; k < NCOEFF; k++) {
+		sk = 0;
+		sk1 = 0;
+		sk2 = 0;
+		buf = dsp->dtmf.buffer;
+		cos2pik_ = cos2pik[k];
+		for (n = 0; n < DSP_DTMF_NPOINTS; n++) {
+			sk = ((cos2pik_*sk1)>>15) - sk2 + (*buf++);
+			sk2 = sk1;
+			sk1 = sk;
+		}
+		sk >>= 8;
+		sk2 >>= 8;
+		if (sk > 32767 || sk < -32767 || sk2 > 32767 || sk2 < -32767)
+			printk(KERN_WARNING "DTMF-Detection overflow\n");
+		/* compute |X(k)|**2 */
+		result[k] =
+			(sk * sk) -
+			(((cos2pik[k] * sk) >> 15) * sk2) +
+			(sk2 * sk2);
+	}
+
+	/* our (squared) coefficients have been calculated, we need to process
+	 * them.
+	 */
+coefficients:
+	tresh = 0;
+	for (i = 0; i < NCOEFF; i++) {
+		if (result[i] < 0)
+			result[i] = 0;
+		if (result[i] > dsp->dtmf.treshold) {
+			if (result[i] > tresh)
+				tresh = result[i];
+		}
+	}
+
+	if (tresh == 0) {
+		what = 0;
+		goto storedigit;
+	}
+
+	if (dsp_debug & DEBUG_DSP_DTMFCOEFF)
+		printk(KERN_DEBUG "a %3d %3d %3d %3d %3d %3d %3d %3d"
+			" tr:%3d r %3d %3d %3d %3d %3d %3d %3d %3d\n",
+			result[0]/10000, result[1]/10000, result[2]/10000,
+			result[3]/10000, result[4]/10000, result[5]/10000,
+			result[6]/10000, result[7]/10000, tresh/10000,
+			result[0]/(tresh/100), result[1]/(tresh/100),
+			result[2]/(tresh/100), result[3]/(tresh/100),
+			result[4]/(tresh/100), result[5]/(tresh/100),
+			result[6]/(tresh/100), result[7]/(tresh/100));
+
+	/* calc digit (lowgroup/highgroup) */
+	lowgroup = -1;
+	highgroup = -1;
+	treshl = tresh >> 3;  /* tones which are not on, must be below 9 dB */
+	tresh = tresh >> 2;  /* touchtones must match within 6 dB */
+	for (i = 0; i < NCOEFF; i++) {
+		if (result[i] < treshl)
+			continue;  /* ignore */
+		if (result[i] < tresh) {
+			lowgroup = -1;
+			highgroup = -1;
+			break;  /* noise inbetween */
+		}
+		/* good level found. This is allowed only one time per group */
+		if (i < NCOEFF/2) {
+			/* lowgroup */
+			if (lowgroup >= 0) {
+				/* Bad. Another tone found. */
+				lowgroup = -1;
+				break;
+			} else
+				lowgroup = i;
+		} else {
+			/* higroup */
+			if (highgroup >= 0) {
+				/* Bad. Another tone found. */
+				highgroup = -1;
+				break;
+			} else
+				highgroup = i-(NCOEFF/2);
+		}
+	}
+
+	/* get digit or null */
+	what = 0;
+	if (lowgroup >= 0 && highgroup >= 0)
+		what = dtmf_matrix[lowgroup][highgroup];
+
+storedigit:
+	if (what && (dsp_debug & DEBUG_DSP_DTMF))
+		printk(KERN_DEBUG "DTMF what: %c\n", what);
+
+	if (dsp->dtmf.lastwhat != what)
+		dsp->dtmf.count = 0;
+
+	/* the tone (or no tone) must remain 3 times without change */
+	if (dsp->dtmf.count == 2) {
+		if (dsp->dtmf.lastdigit != what) {
+			dsp->dtmf.lastdigit = what;
+			if (what) {
+				if (dsp_debug & DEBUG_DSP_DTMF)
+					printk(KERN_DEBUG "DTMF digit: %c\n",
+						what);
+				if ((strlen(dsp->dtmf.digits)+1)
+					< sizeof(dsp->dtmf.digits)) {
+					dsp->dtmf.digits[strlen(
+						dsp->dtmf.digits)+1] = '\0';
+					dsp->dtmf.digits[strlen(
+						dsp->dtmf.digits)] = what;
+				}
+			}
+		}
+	} else
+		dsp->dtmf.count++;
+
+	dsp->dtmf.lastwhat = what;
+
+	goto again;
+}
+
+
diff --git a/drivers/isdn/mISDN/dsp_ecdis.h b/drivers/isdn/mISDN/dsp_ecdis.h
new file mode 100644
index 000000000000..8a20af43308b
--- /dev/null
+++ b/drivers/isdn/mISDN/dsp_ecdis.h
@@ -0,0 +1,110 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * ec_disable_detector.h - A detector which should eventually meet the
+ *                         G.164/G.165 requirements for detecting the
+ *                         2100Hz echo cancellor disable tone.
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *
+ * Copyright (C) 2001 Steve Underwood
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "dsp_biquad.h"
+
+struct ec_disable_detector_state {
+	struct biquad2_state notch;
+	int notch_level;
+	int channel_level;
+	int tone_present;
+	int tone_cycle_duration;
+	int good_cycles;
+	int hit;
+};
+
+
+#define FALSE 0
+#define TRUE (!FALSE)
+
+static inline void
+echo_can_disable_detector_init(struct ec_disable_detector_state *det)
+{
+    /* Elliptic notch */
+    /* This is actually centred at 2095Hz, but gets the balance we want, due
+       to the asymmetric walls of the notch */
+	biquad2_init(&det->notch,
+		(int32_t) (-0.7600000*32768.0),
+		(int32_t) (-0.1183852*32768.0),
+		(int32_t) (-0.5104039*32768.0),
+		(int32_t) (0.1567596*32768.0),
+		(int32_t) (1.0000000*32768.0));
+
+	det->channel_level = 0;
+	det->notch_level = 0;
+	det->tone_present = FALSE;
+	det->tone_cycle_duration = 0;
+	det->good_cycles = 0;
+	det->hit = 0;
+}
+/*- End of function --------------------------------------------------------*/
+
+static inline int
+echo_can_disable_detector_update(struct ec_disable_detector_state *det,
+int16_t amp)
+{
+	int16_t notched;
+
+	notched = biquad2(&det->notch, amp);
+	/* Estimate the overall energy in the channel, and the energy in
+	   the notch (i.e. overall channel energy - tone energy => noise).
+	   Use abs instead of multiply for speed (is it really faster?).
+	   Damp the overall energy a little more for a stable result.
+	   Damp the notch energy a little less, so we don't damp out the
+	   blip every time the phase reverses */
+	det->channel_level += ((abs(amp) - det->channel_level) >> 5);
+	det->notch_level += ((abs(notched) - det->notch_level) >> 4);
+	if (det->channel_level > 280) {
+		/* There is adequate energy in the channel.
+		 Is it mostly at 2100Hz? */
+		if (det->notch_level*6 < det->channel_level) {
+			/* The notch says yes, so we have the tone. */
+			if (!det->tone_present) {
+				/* Do we get a kick every 450+-25ms? */
+				if (det->tone_cycle_duration >= 425*8
+					&& det->tone_cycle_duration <= 475*8) {
+					det->good_cycles++;
+					if (det->good_cycles > 2)
+					det->hit = TRUE;
+				}
+				det->tone_cycle_duration = 0;
+			}
+			det->tone_present = TRUE;
+		} else
+			det->tone_present = FALSE;
+		det->tone_cycle_duration++;
+	} else {
+		det->tone_present = FALSE;
+		det->tone_cycle_duration = 0;
+		det->good_cycles = 0;
+	}
+	return det->hit;
+}
+/*- End of function --------------------------------------------------------*/
+/*- End of file ------------------------------------------------------------*/
diff --git a/drivers/isdn/mISDN/dsp_hwec.c b/drivers/isdn/mISDN/dsp_hwec.c
new file mode 100644
index 000000000000..eb892d9dd5c6
--- /dev/null
+++ b/drivers/isdn/mISDN/dsp_hwec.c
@@ -0,0 +1,138 @@
+/*
+ * dsp_hwec.c:
+ * builtin mISDN dsp pipeline element for enabling the hw echocanceller
+ *
+ * Copyright (C) 2007, Nadi Sarrar
+ *
+ * Nadi Sarrar <nadi@beronet.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called LICENSE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/mISDNdsp.h>
+#include <linux/mISDNif.h>
+#include "core.h"
+#include "dsp.h"
+#include "dsp_hwec.h"
+
+static struct mISDN_dsp_element_arg args[] = {
+	{ "deftaps", "128", "Set the number of taps of cancellation." },
+};
+
+static struct mISDN_dsp_element dsp_hwec_p = {
+	.name = "hwec",
+	.new = NULL,
+	.free = NULL,
+	.process_tx = NULL,
+	.process_rx = NULL,
+	.num_args = sizeof(args) / sizeof(struct mISDN_dsp_element_arg),
+	.args = args,
+};
+struct mISDN_dsp_element *dsp_hwec = &dsp_hwec_p;
+
+void dsp_hwec_enable(struct dsp *dsp, const char *arg)
+{
+	int deftaps = 128,
+		len;
+	struct mISDN_ctrl_req	cq;
+
+	if (!dsp) {
+		printk(KERN_ERR "%s: failed to enable hwec: dsp is NULL\n",
+			__func__);
+		return;
+	}
+
+	if (!arg)
+		goto _do;
+
+	len = strlen(arg);
+	if (!len)
+		goto _do;
+
+	{
+		char _dup[len + 1];
+		char *dup, *tok, *name, *val;
+		int tmp;
+
+		strcpy(_dup, arg);
+		dup = _dup;
+
+		while ((tok = strsep(&dup, ","))) {
+			if (!strlen(tok))
+				continue;
+			name = strsep(&tok, "=");
+			val = tok;
+
+			if (!val)
+				continue;
+
+			if (!strcmp(name, "deftaps")) {
+				if (sscanf(val, "%d", &tmp) == 1)
+					deftaps = tmp;
+			}
+		}
+	}
+
+_do:
+	printk(KERN_DEBUG "%s: enabling hwec with deftaps=%d\n",
+		__func__, deftaps);
+	memset(&cq, 0, sizeof(cq));
+	cq.op = MISDN_CTRL_HFC_ECHOCAN_ON;
+	cq.p1 = deftaps;
+	if (!dsp->ch.peer->ctrl(&dsp->ch, CONTROL_CHANNEL, &cq)) {
+		printk(KERN_DEBUG "%s: CONTROL_CHANNEL failed\n",
+			__func__);
+		return;
+	}
+}
+
+void dsp_hwec_disable(struct dsp *dsp)
+{
+	struct mISDN_ctrl_req	cq;
+
+	if (!dsp) {
+		printk(KERN_ERR "%s: failed to disable hwec: dsp is NULL\n",
+			__func__);
+		return;
+	}
+
+	printk(KERN_DEBUG "%s: disabling hwec\n", __func__);
+	memset(&cq, 0, sizeof(cq));
+	cq.op = MISDN_CTRL_HFC_ECHOCAN_OFF;
+	if (!dsp->ch.peer->ctrl(&dsp->ch, CONTROL_CHANNEL, &cq)) {
+		printk(KERN_DEBUG "%s: CONTROL_CHANNEL failed\n",
+			__func__);
+		return;
+	}
+}
+
+int dsp_hwec_init(void)
+{
+	mISDN_dsp_element_register(dsp_hwec);
+
+	return 0;
+}
+
+void dsp_hwec_exit(void)
+{
+	mISDN_dsp_element_unregister(dsp_hwec);
+}
+
diff --git a/drivers/isdn/mISDN/dsp_hwec.h b/drivers/isdn/mISDN/dsp_hwec.h
new file mode 100644
index 000000000000..eebe80c3f713
--- /dev/null
+++ b/drivers/isdn/mISDN/dsp_hwec.h
@@ -0,0 +1,10 @@
+/*
+ * dsp_hwec.h
+ */
+
+extern struct mISDN_dsp_element *dsp_hwec;
+extern void dsp_hwec_enable(struct dsp *dsp, const char *arg);
+extern void dsp_hwec_disable(struct dsp *dsp);
+extern int  dsp_hwec_init(void);
+extern void dsp_hwec_exit(void);
+
diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c
new file mode 100644
index 000000000000..850260ab57d0
--- /dev/null
+++ b/drivers/isdn/mISDN/dsp_pipeline.c
@@ -0,0 +1,348 @@
+/*
+ * dsp_pipeline.c: pipelined audio processing
+ *
+ * Copyright (C) 2007, Nadi Sarrar
+ *
+ * Nadi Sarrar <nadi@beronet.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called LICENSE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/mISDNif.h>
+#include <linux/mISDNdsp.h>
+#include "dsp.h"
+#include "dsp_hwec.h"
+
+/* uncomment for debugging */
+/*#define PIPELINE_DEBUG*/
+
+struct dsp_pipeline_entry {
+	struct mISDN_dsp_element *elem;
+	void                *p;
+	struct list_head     list;
+};
+struct dsp_element_entry {
+	struct mISDN_dsp_element *elem;
+	struct device	     dev;
+	struct list_head     list;
+};
+
+static LIST_HEAD(dsp_elements);
+
+/* sysfs */
+static struct class *elements_class;
+
+static ssize_t
+attr_show_args(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct mISDN_dsp_element *elem = dev_get_drvdata(dev);
+	ssize_t len = 0;
+	int i = 0;
+
+	*buf = 0;
+	for (; i < elem->num_args; ++i)
+		len = sprintf(buf, "%sName:        %s\n%s%s%sDescription: %s\n"
+			"\n", buf,
+			  elem->args[i].name,
+			  elem->args[i].def ? "Default:     " : "",
+			  elem->args[i].def ? elem->args[i].def : "",
+			  elem->args[i].def ? "\n" : "",
+			  elem->args[i].desc);
+
+	return len;
+}
+
+static struct device_attribute element_attributes[] = {
+	__ATTR(args, 0444, attr_show_args, NULL),
+};
+
+int mISDN_dsp_element_register(struct mISDN_dsp_element *elem)
+{
+	struct dsp_element_entry *entry;
+	int ret, i;
+
+	if (!elem)
+		return -EINVAL;
+
+	entry = kzalloc(sizeof(struct dsp_element_entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->elem = elem;
+
+	entry->dev.class = elements_class;
+	dev_set_drvdata(&entry->dev, elem);
+	snprintf(entry->dev.bus_id, BUS_ID_SIZE, elem->name);
+	ret = device_register(&entry->dev);
+	if (ret) {
+		printk(KERN_ERR "%s: failed to register %s\n",
+			__func__, elem->name);
+		goto err1;
+	}
+
+	for (i = 0; i < (sizeof(element_attributes)
+		/ sizeof(struct device_attribute)); ++i)
+		ret = device_create_file(&entry->dev,
+				&element_attributes[i]);
+		if (ret) {
+			printk(KERN_ERR "%s: failed to create device file\n",
+				__func__);
+			goto err2;
+		}
+
+	list_add_tail(&entry->list, &dsp_elements);
+
+	printk(KERN_DEBUG "%s: %s registered\n", __func__, elem->name);
+
+	return 0;
+
+err2:
+	device_unregister(&entry->dev);
+err1:
+	kfree(entry);
+	return ret;
+}
+EXPORT_SYMBOL(mISDN_dsp_element_register);
+
+void mISDN_dsp_element_unregister(struct mISDN_dsp_element *elem)
+{
+	struct dsp_element_entry *entry, *n;
+
+	if (!elem)
+		return;
+
+	list_for_each_entry_safe(entry, n, &dsp_elements, list)
+		if (entry->elem == elem) {
+			list_del(&entry->list);
+			device_unregister(&entry->dev);
+			kfree(entry);
+			printk(KERN_DEBUG "%s: %s unregistered\n",
+				__func__, elem->name);
+			return;
+		}
+	printk(KERN_ERR "%s: element %s not in list.\n", __func__, elem->name);
+}
+EXPORT_SYMBOL(mISDN_dsp_element_unregister);
+
+int dsp_pipeline_module_init(void)
+{
+	elements_class = class_create(THIS_MODULE, "dsp_pipeline");
+	if (IS_ERR(elements_class))
+		return PTR_ERR(elements_class);
+
+#ifdef PIPELINE_DEBUG
+	printk(KERN_DEBUG "%s: dsp pipeline module initialized\n", __func__);
+#endif
+
+	dsp_hwec_init();
+
+	return 0;
+}
+
+void dsp_pipeline_module_exit(void)
+{
+	struct dsp_element_entry *entry, *n;
+
+	dsp_hwec_exit();
+
+	class_destroy(elements_class);
+
+	list_for_each_entry_safe(entry, n, &dsp_elements, list) {
+		list_del(&entry->list);
+		printk(KERN_WARNING "%s: element was still registered: %s\n",
+			__func__, entry->elem->name);
+		kfree(entry);
+	}
+
+	printk(KERN_DEBUG "%s: dsp pipeline module exited\n", __func__);
+}
+
+int dsp_pipeline_init(struct dsp_pipeline *pipeline)
+{
+	if (!pipeline)
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&pipeline->list);
+
+#ifdef PIPELINE_DEBUG
+	printk(KERN_DEBUG "%s: dsp pipeline ready\n", __func__);
+#endif
+
+	return 0;
+}
+
+static inline void _dsp_pipeline_destroy(struct dsp_pipeline *pipeline)
+{
+	struct dsp_pipeline_entry *entry, *n;
+
+	list_for_each_entry_safe(entry, n, &pipeline->list, list) {
+		list_del(&entry->list);
+		if (entry->elem == dsp_hwec)
+			dsp_hwec_disable(container_of(pipeline, struct dsp,
+				pipeline));
+		else
+			entry->elem->free(entry->p);
+		kfree(entry);
+	}
+}
+
+void dsp_pipeline_destroy(struct dsp_pipeline *pipeline)
+{
+
+	if (!pipeline)
+		return;
+
+	_dsp_pipeline_destroy(pipeline);
+
+#ifdef PIPELINE_DEBUG
+	printk(KERN_DEBUG "%s: dsp pipeline destroyed\n", __func__);
+#endif
+}
+
+int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg)
+{
+	int len, incomplete = 0, found = 0;
+	char *dup, *tok, *name, *args;
+	struct dsp_element_entry *entry, *n;
+	struct dsp_pipeline_entry *pipeline_entry;
+	struct mISDN_dsp_element *elem;
+
+	if (!pipeline)
+		return -EINVAL;
+
+	if (!list_empty(&pipeline->list))
+		_dsp_pipeline_destroy(pipeline);
+
+	if (!cfg)
+		return 0;
+
+	len = strlen(cfg);
+	if (!len)
+		return 0;
+
+	dup = kmalloc(len + 1, GFP_KERNEL);
+	if (!dup)
+		return 0;
+	strcpy(dup, cfg);
+	while ((tok = strsep(&dup, "|"))) {
+		if (!strlen(tok))
+			continue;
+		name = strsep(&tok, "(");
+		args = strsep(&tok, ")");
+		if (args && !*args)
+			args = 0;
+
+		list_for_each_entry_safe(entry, n, &dsp_elements, list)
+			if (!strcmp(entry->elem->name, name)) {
+				elem = entry->elem;
+
+				pipeline_entry = kmalloc(sizeof(struct
+					dsp_pipeline_entry), GFP_KERNEL);
+				if (!pipeline_entry) {
+					printk(KERN_DEBUG "%s: failed to add "
+					    "entry to pipeline: %s (out of "
+					    "memory)\n", __func__, elem->name);
+					incomplete = 1;
+					goto _out;
+				}
+				pipeline_entry->elem = elem;
+
+				if (elem == dsp_hwec) {
+					/* This is a hack to make the hwec
+					   available as a pipeline module */
+					dsp_hwec_enable(container_of(pipeline,
+						struct dsp, pipeline), args);
+					list_add_tail(&pipeline_entry->list,
+						&pipeline->list);
+				} else {
+					pipeline_entry->p = elem->new(args);
+					if (pipeline_entry->p) {
+						list_add_tail(&pipeline_entry->
+							list, &pipeline->list);
+#ifdef PIPELINE_DEBUG
+						printk(KERN_DEBUG "%s: created "
+						    "instance of %s%s%s\n",
+						    __func__, name, args ?
+						    " with args " : "", args ?
+						    args : "");
+#endif
+					} else {
+						printk(KERN_DEBUG "%s: failed "
+						  "to add entry to pipeline: "
+						  "%s (new() returned NULL)\n",
+						  __func__, elem->name);
+						kfree(pipeline_entry);
+						incomplete = 1;
+					}
+				}
+				found = 1;
+				break;
+			}
+
+		if (found)
+			found = 0;
+		else {
+			printk(KERN_DEBUG "%s: element not found, skipping: "
+				"%s\n", __func__, name);
+			incomplete = 1;
+		}
+	}
+
+_out:
+	if (!list_empty(&pipeline->list))
+		pipeline->inuse = 1;
+	else
+		pipeline->inuse = 0;
+
+#ifdef PIPELINE_DEBUG
+	printk(KERN_DEBUG "%s: dsp pipeline built%s: %s\n",
+		__func__, incomplete ? " incomplete" : "", cfg);
+#endif
+	kfree(dup);
+	return 0;
+}
+
+void dsp_pipeline_process_tx(struct dsp_pipeline *pipeline, u8 *data, int len)
+{
+	struct dsp_pipeline_entry *entry;
+
+	if (!pipeline)
+		return;
+
+	list_for_each_entry(entry, &pipeline->list, list)
+		if (entry->elem->process_tx)
+			entry->elem->process_tx(entry->p, data, len);
+}
+
+void dsp_pipeline_process_rx(struct dsp_pipeline *pipeline, u8 *data, int len)
+{
+	struct dsp_pipeline_entry *entry;
+
+	if (!pipeline)
+		return;
+
+	list_for_each_entry_reverse(entry, &pipeline->list, list)
+		if (entry->elem->process_rx)
+			entry->elem->process_rx(entry->p, data, len);
+}
+
+
diff --git a/drivers/isdn/mISDN/dsp_tones.c b/drivers/isdn/mISDN/dsp_tones.c
new file mode 100644
index 000000000000..23dd0dd21524
--- /dev/null
+++ b/drivers/isdn/mISDN/dsp_tones.c
@@ -0,0 +1,551 @@
+/*
+ * Audio support data for ISDN4Linux.
+ *
+ * Copyright Andreas Eversberg (jolly@eversberg.eu)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ *
+ */
+
+#include <linux/mISDNif.h>
+#include <linux/mISDNdsp.h>
+#include "core.h"
+#include "dsp.h"
+
+
+#define DATA_S sample_silence
+#define SIZE_S (&sizeof_silence)
+#define DATA_GA sample_german_all
+#define SIZE_GA (&sizeof_german_all)
+#define DATA_GO sample_german_old
+#define SIZE_GO (&sizeof_german_old)
+#define DATA_DT sample_american_dialtone
+#define SIZE_DT (&sizeof_american_dialtone)
+#define DATA_RI sample_american_ringing
+#define SIZE_RI (&sizeof_american_ringing)
+#define DATA_BU sample_american_busy
+#define SIZE_BU (&sizeof_american_busy)
+#define DATA_S1 sample_special1
+#define SIZE_S1 (&sizeof_special1)
+#define DATA_S2 sample_special2
+#define SIZE_S2 (&sizeof_special2)
+#define DATA_S3 sample_special3
+#define SIZE_S3 (&sizeof_special3)
+
+/***************/
+/* tones loops */
+/***************/
+
+/* all tones are alaw encoded */
+/* the last sample+1 is in phase with the first sample. the error is low */
+
+static u8 sample_german_all[] = {
+	0x80, 0xab, 0x81, 0x6d, 0xfd, 0xdd, 0x5d, 0x9d,
+	0x4d, 0xd1, 0x89, 0x88, 0xd0, 0x4c, 0x9c, 0x5c,
+	0xdc, 0xfc, 0x6c,
+	0x80, 0xab, 0x81, 0x6d, 0xfd, 0xdd, 0x5d, 0x9d,
+	0x4d, 0xd1, 0x89, 0x88, 0xd0, 0x4c, 0x9c, 0x5c,
+	0xdc, 0xfc, 0x6c,
+	0x80, 0xab, 0x81, 0x6d, 0xfd, 0xdd, 0x5d, 0x9d,
+	0x4d, 0xd1, 0x89, 0x88, 0xd0, 0x4c, 0x9c, 0x5c,
+	0xdc, 0xfc, 0x6c,
+	0x80, 0xab, 0x81, 0x6d, 0xfd, 0xdd, 0x5d, 0x9d,
+	0x4d, 0xd1, 0x89, 0x88, 0xd0, 0x4c, 0x9c, 0x5c,
+	0xdc, 0xfc, 0x6c,
+};
+static u32 sizeof_german_all = sizeof(sample_german_all);
+
+static u8 sample_german_old[] = {
+	0xec, 0x68, 0xe1, 0x6d, 0x6d, 0x91, 0x51, 0xed,
+	0x6d, 0x01, 0x1e, 0x10, 0x0c, 0x90, 0x60, 0x70,
+	0x8c,
+	0xec, 0x68, 0xe1, 0x6d, 0x6d, 0x91, 0x51, 0xed,
+	0x6d, 0x01, 0x1e, 0x10, 0x0c, 0x90, 0x60, 0x70,
+	0x8c,
+	0xec, 0x68, 0xe1, 0x6d, 0x6d, 0x91, 0x51, 0xed,
+	0x6d, 0x01, 0x1e, 0x10, 0x0c, 0x90, 0x60, 0x70,
+	0x8c,
+	0xec, 0x68, 0xe1, 0x6d, 0x6d, 0x91, 0x51, 0xed,
+	0x6d, 0x01, 0x1e, 0x10, 0x0c, 0x90, 0x60, 0x70,
+	0x8c,
+};
+static u32 sizeof_german_old = sizeof(sample_german_old);
+
+static u8 sample_american_dialtone[] = {
+	0x2a, 0x18, 0x90, 0x6c, 0x4c, 0xbc, 0x4c, 0x6c,
+	0x10, 0x58, 0x32, 0xb9, 0x31, 0x2d, 0x8d, 0x0d,
+	0x8d, 0x2d, 0x31, 0x99, 0x0f, 0x28, 0x60, 0xf0,
+	0xd0, 0x50, 0xd0, 0x30, 0x60, 0x08, 0x8e, 0x67,
+	0x09, 0x19, 0x21, 0xe1, 0xd9, 0xb9, 0x29, 0x67,
+	0x83, 0x02, 0xce, 0xbe, 0xee, 0x1a, 0x1b, 0xef,
+	0xbf, 0xcf, 0x03, 0x82, 0x66, 0x28, 0xb8, 0xd8,
+	0xe0, 0x20, 0x18, 0x08, 0x66, 0x8f, 0x09, 0x61,
+	0x31, 0xd1, 0x51, 0xd1, 0xf1, 0x61, 0x29, 0x0e,
+	0x98, 0x30, 0x2c, 0x8c, 0x0c, 0x8c, 0x2c, 0x30,
+	0xb8, 0x33, 0x59, 0x11, 0x6d, 0x4d, 0xbd, 0x4d,
+	0x6d, 0x91, 0x19,
+};
+static u32 sizeof_american_dialtone = sizeof(sample_american_dialtone);
+
+static u8 sample_american_ringing[] = {
+	0x2a, 0xe0, 0xac, 0x0c, 0xbc, 0x4c, 0x8c, 0x90,
+	0x48, 0xc7, 0xc1, 0xed, 0xcd, 0x4d, 0xcd, 0xed,
+	0xc1, 0xb7, 0x08, 0x30, 0xec, 0xcc, 0xcc, 0x8c,
+	0x10, 0x58, 0x1a, 0x99, 0x71, 0xed, 0x8d, 0x8d,
+	0x2d, 0x41, 0x89, 0x9e, 0x20, 0x70, 0x2c, 0xec,
+	0x2c, 0x70, 0x20, 0x86, 0x77, 0xe1, 0x31, 0x11,
+	0xd1, 0xf1, 0x81, 0x09, 0xa3, 0x56, 0x58, 0x00,
+	0x40, 0xc0, 0x60, 0x38, 0x46, 0x43, 0x57, 0x39,
+	0xd9, 0x59, 0x99, 0xc9, 0x77, 0x2f, 0x2e, 0xc6,
+	0xd6, 0x28, 0xd6, 0x36, 0x26, 0x2e, 0x8a, 0xa3,
+	0x43, 0x63, 0x4b, 0x4a, 0x62, 0x42, 0xa2, 0x8b,
+	0x2f, 0x27, 0x37, 0xd7, 0x29, 0xd7, 0xc7, 0x2f,
+	0x2e, 0x76, 0xc8, 0x98, 0x58, 0xd8, 0x38, 0x56,
+	0x42, 0x47, 0x39, 0x61, 0xc1, 0x41, 0x01, 0x59,
+	0x57, 0xa2, 0x08, 0x80, 0xf0, 0xd0, 0x10, 0x30,
+	0xe0, 0x76, 0x87, 0x21, 0x71, 0x2d, 0xed, 0x2d,
+	0x71, 0x21, 0x9f, 0x88, 0x40, 0x2c, 0x8c, 0x8c,
+	0xec, 0x70, 0x98, 0x1b, 0x59, 0x11, 0x8d, 0xcd,
+	0xcd, 0xed, 0x31, 0x09, 0xb6, 0xc0, 0xec, 0xcc,
+	0x4c, 0xcc, 0xec, 0xc0, 0xc6, 0x49, 0x91, 0x8d,
+	0x4d, 0xbd, 0x0d, 0xad, 0xe1,
+};
+static u32 sizeof_american_ringing = sizeof(sample_american_ringing);
+
+static u8 sample_american_busy[] = {
+	0x2a, 0x00, 0x6c, 0x4c, 0x4c, 0x6c, 0xb0, 0x66,
+	0x99, 0x11, 0x6d, 0x8d, 0x2d, 0x41, 0xd7, 0x96,
+	0x60, 0xf0, 0x70, 0x40, 0x58, 0xf6, 0x53, 0x57,
+	0x09, 0x89, 0xd7, 0x5f, 0xe3, 0x2a, 0xe3, 0x5f,
+	0xd7, 0x89, 0x09, 0x57, 0x53, 0xf6, 0x58, 0x40,
+	0x70, 0xf0, 0x60, 0x96, 0xd7, 0x41, 0x2d, 0x8d,
+	0x6d, 0x11, 0x99, 0x66, 0xb0, 0x6c, 0x4c, 0x4c,
+	0x6c, 0x00, 0x2a, 0x01, 0x6d, 0x4d, 0x4d, 0x6d,
+	0xb1, 0x67, 0x98, 0x10, 0x6c, 0x8c, 0x2c, 0x40,
+	0xd6, 0x97, 0x61, 0xf1, 0x71, 0x41, 0x59, 0xf7,
+	0x52, 0x56, 0x08, 0x88, 0xd6, 0x5e, 0xe2, 0x2a,
+	0xe2, 0x5e, 0xd6, 0x88, 0x08, 0x56, 0x52, 0xf7,
+	0x59, 0x41, 0x71, 0xf1, 0x61, 0x97, 0xd6, 0x40,
+	0x2c, 0x8c, 0x6c, 0x10, 0x98, 0x67, 0xb1, 0x6d,
+	0x4d, 0x4d, 0x6d, 0x01,
+};
+static u32 sizeof_american_busy = sizeof(sample_american_busy);
+
+static u8 sample_special1[] = {
+	0x2a, 0x2c, 0xbc, 0x6c, 0xd6, 0x71, 0xbd, 0x0d,
+	0xd9, 0x80, 0xcc, 0x4c, 0x40, 0x39, 0x0d, 0xbd,
+	0x11, 0x86, 0xec, 0xbc, 0xec, 0x0e, 0x51, 0xbd,
+	0x8d, 0x89, 0x30, 0x4c, 0xcc, 0xe0, 0xe1, 0xcd,
+	0x4d, 0x31, 0x88, 0x8c, 0xbc, 0x50, 0x0f, 0xed,
+	0xbd, 0xed, 0x87, 0x10, 0xbc, 0x0c, 0x38, 0x41,
+	0x4d, 0xcd, 0x81, 0xd8, 0x0c, 0xbc, 0x70, 0xd7,
+	0x6d, 0xbd, 0x2d,
+};
+static u32 sizeof_special1 = sizeof(sample_special1);
+
+static u8 sample_special2[] = {
+	0x2a, 0xcc, 0x8c, 0xd7, 0x4d, 0x2d, 0x18, 0xbc,
+	0x10, 0xc1, 0xbd, 0xc1, 0x10, 0xbc, 0x18, 0x2d,
+	0x4d, 0xd7, 0x8c, 0xcc, 0x2a, 0xcd, 0x8d, 0xd6,
+	0x4c, 0x2c, 0x19, 0xbd, 0x11, 0xc0, 0xbc, 0xc0,
+	0x11, 0xbd, 0x19, 0x2c, 0x4c, 0xd6, 0x8d, 0xcd,
+	0x2a, 0xcc, 0x8c, 0xd7, 0x4d, 0x2d, 0x18, 0xbc,
+	0x10, 0xc1, 0xbd, 0xc1, 0x10, 0xbc, 0x18, 0x2d,
+	0x4d, 0xd7, 0x8c, 0xcc, 0x2a, 0xcd, 0x8d, 0xd6,
+	0x4c, 0x2c, 0x19, 0xbd, 0x11, 0xc0, 0xbc, 0xc0,
+	0x11, 0xbd, 0x19, 0x2c, 0x4c, 0xd6, 0x8d, 0xcd,
+};
+static u32 sizeof_special2 = sizeof(sample_special2);
+
+static u8 sample_special3[] = {
+	0x2a, 0xbc, 0x18, 0xcd, 0x11, 0x2c, 0x8c, 0xc1,
+	0x4d, 0xd6, 0xbc, 0xd6, 0x4d, 0xc1, 0x8c, 0x2c,
+	0x11, 0xcd, 0x18, 0xbc, 0x2a, 0xbd, 0x19, 0xcc,
+	0x10, 0x2d, 0x8d, 0xc0, 0x4c, 0xd7, 0xbd, 0xd7,
+	0x4c, 0xc0, 0x8d, 0x2d, 0x10, 0xcc, 0x19, 0xbd,
+	0x2a, 0xbc, 0x18, 0xcd, 0x11, 0x2c, 0x8c, 0xc1,
+	0x4d, 0xd6, 0xbc, 0xd6, 0x4d, 0xc1, 0x8c, 0x2c,
+	0x11, 0xcd, 0x18, 0xbc, 0x2a, 0xbd, 0x19, 0xcc,
+	0x10, 0x2d, 0x8d, 0xc0, 0x4c, 0xd7, 0xbd, 0xd7,
+	0x4c, 0xc0, 0x8d, 0x2d, 0x10, 0xcc, 0x19, 0xbd,
+};
+static u32 sizeof_special3 = sizeof(sample_special3);
+
+static u8 sample_silence[] = {
+	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
+	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
+	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
+	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
+	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
+	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
+	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
+	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
+	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
+	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
+	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
+	0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a,
+};
+static u32 sizeof_silence = sizeof(sample_silence);
+
+struct tones_samples {
+	u32 *len;
+	u8 *data;
+};
+static struct
+tones_samples samples[] = {
+	{&sizeof_german_all, sample_german_all},
+	{&sizeof_german_old, sample_german_old},
+	{&sizeof_american_dialtone, sample_american_dialtone},
+	{&sizeof_american_ringing, sample_american_ringing},
+	{&sizeof_american_busy, sample_american_busy},
+	{&sizeof_special1, sample_special1},
+	{&sizeof_special2, sample_special2},
+	{&sizeof_special3, sample_special3},
+	{NULL, NULL},
+};
+
+/***********************************
+ * generate ulaw from alaw samples *
+ ***********************************/
+
+void
+dsp_audio_generate_ulaw_samples(void)
+{
+	int i, j;
+
+	i = 0;
+	while (samples[i].len) {
+		j = 0;
+		while (j < (*samples[i].len)) {
+			samples[i].data[j] =
+				dsp_audio_alaw_to_ulaw[samples[i].data[j]];
+			j++;
+		}
+		i++;
+	}
+}
+
+
+/****************************
+ * tone sequence definition *
+ ****************************/
+
+struct pattern {
+	int tone;
+	u8 *data[10];
+	u32 *siz[10];
+	u32 seq[10];
+} pattern[] = {
+	{TONE_GERMAN_DIALTONE,
+	{DATA_GA, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_GA, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+	{1900, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_OLDDIALTONE,
+	{DATA_GO, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_GO, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+	{1998, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_AMERICAN_DIALTONE,
+	{DATA_DT, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_DT, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+	{8000, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_DIALPBX,
+	{DATA_GA, DATA_S, DATA_GA, DATA_S, DATA_GA, DATA_S, 0, 0, 0, 0},
+	{SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, 0, 0, 0, 0},
+	{2000, 2000, 2000, 2000, 2000, 12000, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_OLDDIALPBX,
+	{DATA_GO, DATA_S, DATA_GO, DATA_S, DATA_GO, DATA_S, 0, 0, 0, 0},
+	{SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, 0, 0, 0, 0},
+	{2000, 2000, 2000, 2000, 2000, 12000, 0, 0, 0, 0} },
+
+	{TONE_AMERICAN_DIALPBX,
+	{DATA_DT, DATA_S, DATA_DT, DATA_S, DATA_DT, DATA_S, 0, 0, 0, 0},
+	{SIZE_DT, SIZE_S, SIZE_DT, SIZE_S, SIZE_DT, SIZE_S, 0, 0, 0, 0},
+	{2000, 2000, 2000, 2000, 2000, 12000, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_RINGING,
+	{DATA_GA, DATA_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_GA, SIZE_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{8000, 32000, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_OLDRINGING,
+	{DATA_GO, DATA_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_GO, SIZE_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{8000, 40000, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_AMERICAN_RINGING,
+	{DATA_RI, DATA_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_RI, SIZE_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{8000, 32000, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_RINGPBX,
+	{DATA_GA, DATA_S, DATA_GA, DATA_S, 0, 0, 0, 0, 0, 0},
+	{SIZE_GA, SIZE_S, SIZE_GA, SIZE_S, 0, 0, 0, 0, 0, 0},
+	{4000, 4000, 4000, 28000, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_OLDRINGPBX,
+	{DATA_GO, DATA_S, DATA_GO, DATA_S, 0, 0, 0, 0, 0, 0},
+	{SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, 0, 0, 0, 0, 0, 0},
+	{4000, 4000, 4000, 28000, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_AMERICAN_RINGPBX,
+	{DATA_RI, DATA_S, DATA_RI, DATA_S, 0, 0, 0, 0, 0, 0},
+	{SIZE_RI, SIZE_S, SIZE_RI, SIZE_S, 0, 0, 0, 0, 0, 0},
+	{4000, 4000, 4000, 28000, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_BUSY,
+	{DATA_GA, DATA_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_GA, SIZE_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{4000, 4000, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_OLDBUSY,
+	{DATA_GO, DATA_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_GO, SIZE_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{1000, 5000, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_AMERICAN_BUSY,
+	{DATA_BU, DATA_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_BU, SIZE_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{4000, 4000, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_HANGUP,
+	{DATA_GA, DATA_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_GA, SIZE_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{4000, 4000, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_OLDHANGUP,
+	{DATA_GO, DATA_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_GO, SIZE_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{1000, 5000, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_AMERICAN_HANGUP,
+	{DATA_DT, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_DT, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+	{8000, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_SPECIAL_INFO,
+	{DATA_S1, DATA_S2, DATA_S3, DATA_S, 0, 0, 0, 0, 0, 0},
+	{SIZE_S1, SIZE_S2, SIZE_S3, SIZE_S, 0, 0, 0, 0, 0, 0},
+	{2666, 2666, 2666, 8002, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_GASSENBESETZT,
+	{DATA_GA, DATA_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{SIZE_GA, SIZE_S, 0, 0, 0, 0, 0, 0, 0, 0},
+	{2000, 2000, 0, 0, 0, 0, 0, 0, 0, 0} },
+
+	{TONE_GERMAN_AUFSCHALTTON,
+	{DATA_GO, DATA_S, DATA_GO, DATA_S, 0, 0, 0, 0, 0, 0},
+	{SIZE_GO, SIZE_S, SIZE_GO, SIZE_S, 0, 0, 0, 0, 0, 0},
+	{1000, 5000, 1000, 17000, 0, 0, 0, 0, 0, 0} },
+
+	{0,
+	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
+};
+
+/******************
+ * copy tone data *
+ ******************/
+
+/* an sk_buff is generated from the number of samples needed.
+ * the count will be changed and may begin from 0 each pattern period.
+ * the clue is to precalculate the pointers and legths to use only one
+ * memcpy per function call, or two memcpy if the tone sequence changes.
+ *
+ * pattern - the type of the pattern
+ * count - the sample from the beginning of the pattern (phase)
+ * len - the number of bytes
+ *
+ * return - the sk_buff with the sample
+ *
+ * if tones has finished (e.g. knocking tone), dsp->tones is turned off
+ */
+void dsp_tone_copy(struct dsp *dsp, u8 *data, int len)
+{
+	int index, count, start, num;
+	struct pattern *pat;
+	struct dsp_tone *tone = &dsp->tone;
+
+	/* if we have no tone, we copy silence */
+	if (!tone->tone) {
+		memset(data, dsp_silence, len);
+		return;
+	}
+
+	/* process pattern */
+	pat = (struct pattern *)tone->pattern;
+		/* points to the current pattern */
+	index = tone->index; /* gives current sequence index */
+	count = tone->count; /* gives current sample */
+
+	/* copy sample */
+	while (len) {
+		/* find sample to start with */
+		while (42) {
+			/* warp arround */
+			if (!pat->seq[index]) {
+				count = 0;
+				index = 0;
+			}
+			/* check if we are currently playing this tone */
+			if (count < pat->seq[index])
+				break;
+			if (dsp_debug & DEBUG_DSP_TONE)
+				printk(KERN_DEBUG "%s: reaching next sequence "
+					"(index=%d)\n", __func__, index);
+			count -= pat->seq[index];
+			index++;
+		}
+		/* calculate start and number of samples */
+		start = count % (*(pat->siz[index]));
+		num = len;
+		if (num+count > pat->seq[index])
+			num = pat->seq[index] - count;
+		if (num+start > (*(pat->siz[index])))
+			num = (*(pat->siz[index])) - start;
+		/* copy memory */
+		memcpy(data, pat->data[index]+start, num);
+		/* reduce length */
+		data += num;
+		count += num;
+		len -= num;
+	}
+	tone->index = index;
+	tone->count = count;
+
+	/* return sk_buff */
+	return;
+}
+
+
+/*******************************
+ * send HW message to hfc card *
+ *******************************/
+
+static void
+dsp_tone_hw_message(struct dsp *dsp, u8 *sample, int len)
+{
+	struct sk_buff *nskb;
+
+	/* unlocking is not required, because we don't expect a response */
+	nskb = _alloc_mISDN_skb(PH_CONTROL_REQ,
+		(len)?HFC_SPL_LOOP_ON:HFC_SPL_LOOP_OFF, len, sample,
+		GFP_ATOMIC);
+	if (nskb) {
+		if (dsp->ch.peer) {
+			if (dsp->ch.recv(dsp->ch.peer, nskb))
+				dev_kfree_skb(nskb);
+		} else
+			dev_kfree_skb(nskb);
+	}
+}
+
+
+/*****************
+ * timer expires *
+ *****************/
+void
+dsp_tone_timeout(void *arg)
+{
+	struct dsp *dsp = arg;
+	struct dsp_tone *tone = &dsp->tone;
+	struct pattern *pat = (struct pattern *)tone->pattern;
+	int index = tone->index;
+
+	if (!tone->tone)
+		return;
+
+	index++;
+	if (!pat->seq[index])
+		index = 0;
+	tone->index = index;
+
+	/* set next tone */
+	if (pat->data[index] == DATA_S)
+		dsp_tone_hw_message(dsp, 0, 0);
+	else
+		dsp_tone_hw_message(dsp, pat->data[index], *(pat->siz[index]));
+	/* set timer */
+	init_timer(&tone->tl);
+	tone->tl.expires = jiffies + (pat->seq[index] * HZ) / 8000;
+	add_timer(&tone->tl);
+}
+
+
+/********************
+ * set/release tone *
+ ********************/
+
+/*
+ * tones are relaized by streaming or by special loop commands if supported
+ * by hardware. when hardware is used, the patterns will be controlled by
+ * timers.
+ */
+int
+dsp_tone(struct dsp *dsp, int tone)
+{
+	struct pattern *pat;
+	int i;
+	struct dsp_tone *tonet = &dsp->tone;
+
+	tonet->software = 0;
+	tonet->hardware = 0;
+
+	/* we turn off the tone */
+	if (!tone) {
+		if (dsp->features.hfc_loops)
+		if (timer_pending(&tonet->tl))
+			del_timer(&tonet->tl);
+		if (dsp->features.hfc_loops)
+			dsp_tone_hw_message(dsp, NULL, 0);
+		tonet->tone = 0;
+		return 0;
+	}
+
+	pat = NULL;
+	i = 0;
+	while (pattern[i].tone) {
+		if (pattern[i].tone == tone) {
+			pat = &pattern[i];
+			break;
+		}
+		i++;
+	}
+	if (!pat) {
+		printk(KERN_WARNING "dsp: given tone 0x%x is invalid\n", tone);
+		return -EINVAL;
+	}
+	if (dsp_debug & DEBUG_DSP_TONE)
+		printk(KERN_DEBUG "%s: now starting tone %d (index=%d)\n",
+			__func__, tone, 0);
+	tonet->tone = tone;
+	tonet->pattern = pat;
+	tonet->index = 0;
+	tonet->count = 0;
+
+	if (dsp->features.hfc_loops) {
+		tonet->hardware = 1;
+		/* set first tone */
+		dsp_tone_hw_message(dsp, pat->data[0], *(pat->siz[0]));
+		/* set timer */
+		if (timer_pending(&tonet->tl))
+			del_timer(&tonet->tl);
+		init_timer(&tonet->tl);
+		tonet->tl.expires = jiffies + (pat->seq[0] * HZ) / 8000;
+		add_timer(&tonet->tl);
+	} else {
+		tonet->software = 1;
+	}
+
+	return 0;
+}
+
+
+
+
+
diff --git a/include/linux/mISDNdsp.h b/include/linux/mISDNdsp.h
new file mode 100644
index 000000000000..6b71d2dce508
--- /dev/null
+++ b/include/linux/mISDNdsp.h
@@ -0,0 +1,37 @@
+#ifndef __mISDNdsp_H__
+#define __mISDNdsp_H__
+
+struct mISDN_dsp_element_arg {
+	char	*name;
+	char	*def;
+	char	*desc;
+};
+
+struct mISDN_dsp_element {
+	char	*name;
+	void	*(*new)(const char *arg);
+	void	(*free)(void *p);
+	void	(*process_tx)(void *p, unsigned char *data, int len);
+	void	(*process_rx)(void *p, unsigned char *data, int len);
+	int	num_args;
+	struct mISDN_dsp_element_arg
+		*args;
+};
+
+extern int  mISDN_dsp_element_register(struct mISDN_dsp_element *elem);
+extern void mISDN_dsp_element_unregister(struct mISDN_dsp_element *elem);
+
+struct dsp_features {
+	int	hfc_id; /* unique id to identify the chip (or -1) */
+	int	hfc_dtmf; /* set if HFCmulti card supports dtmf */
+	int	hfc_loops; /* set if card supports tone loops */
+	int	hfc_echocanhw; /* set if card supports echocancelation*/
+	int	pcm_id; /* unique id to identify the pcm bus (or -1) */
+	int	pcm_slots; /* number of slots on the pcm bus */
+	int	pcm_banks; /* number of IO banks of pcm bus */
+	int	unclocked; /* data is not clocked (has jitter/loss) */
+	int	unordered; /* data is unordered (packets have index) */
+};
+
+#endif
+
-- 
cgit v1.2.3


From af69fb3a8ffa37e986db00ed93099dc44babeef4 Mon Sep 17 00:00:00 2001
From: Karsten Keil <kkeil@suse.de>
Date: Sun, 27 Jul 2008 02:00:43 +0200
Subject: Add mISDN HFC multiport driver

Enable support for cards with Cologne Chip AG's HFC multiport
chip.

Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 drivers/isdn/hardware/mISDN/Kconfig     |   12 +
 drivers/isdn/hardware/mISDN/Makefile    |    1 +
 drivers/isdn/hardware/mISDN/hfc_multi.h | 1204 +++++++
 drivers/isdn/hardware/mISDN/hfcmulti.c  | 5320 +++++++++++++++++++++++++++++++
 include/linux/pci_ids.h                 |   33 +
 5 files changed, 6570 insertions(+)
 create mode 100644 drivers/isdn/hardware/mISDN/hfc_multi.h
 create mode 100644 drivers/isdn/hardware/mISDN/hfcmulti.c

(limited to 'include/linux')

diff --git a/drivers/isdn/hardware/mISDN/Kconfig b/drivers/isdn/hardware/mISDN/Kconfig
index f62dc8752be9..14793480c453 100644
--- a/drivers/isdn/hardware/mISDN/Kconfig
+++ b/drivers/isdn/hardware/mISDN/Kconfig
@@ -11,3 +11,15 @@ config MISDN_HFCPCI
 	  Enable support for cards with Cologne Chip AG's
           HFC PCI chip.
 
+config MISDN_HFCMULTI
+	tristate "Support for HFC multiport cards (HFC-4S/8S/E1)"
+	depends on PCI
+	depends on MISDN
+	help
+	  Enable support for cards with Cologne Chip AG's HFC multiport
+	  chip. There are three types of chips that are quite similar,
+	  but the interface is different:
+	   * HFC-4S (4 S/T interfaces on one chip)
+	   * HFC-8S (8 S/T interfaces on one chip)
+	   * HFC-E1 (E1 interface for 2Mbit ISDN)
+
diff --git a/drivers/isdn/hardware/mISDN/Makefile b/drivers/isdn/hardware/mISDN/Makefile
index 6f20a40b9d54..1e7ca5332ad7 100644
--- a/drivers/isdn/hardware/mISDN/Makefile
+++ b/drivers/isdn/hardware/mISDN/Makefile
@@ -4,3 +4,4 @@
 #
 
 obj-$(CONFIG_MISDN_HFCPCI) += hfcpci.o
+obj-$(CONFIG_MISDN_HFCMULTI) += hfcmulti.o
diff --git a/drivers/isdn/hardware/mISDN/hfc_multi.h b/drivers/isdn/hardware/mISDN/hfc_multi.h
new file mode 100644
index 000000000000..a33d87afc843
--- /dev/null
+++ b/drivers/isdn/hardware/mISDN/hfc_multi.h
@@ -0,0 +1,1204 @@
+/*
+ * see notice in hfc_multi.c
+ */
+
+extern void ztdummy_extern_interrupt(void);
+extern void ztdummy_register_interrupt(void);
+extern int ztdummy_unregister_interrupt(void);
+
+#define DEBUG_HFCMULTI_FIFO	0x00010000
+#define	DEBUG_HFCMULTI_CRC	0x00020000
+#define	DEBUG_HFCMULTI_INIT	0x00040000
+#define	DEBUG_HFCMULTI_PLXSD	0x00080000
+#define	DEBUG_HFCMULTI_MODE	0x00100000
+#define	DEBUG_HFCMULTI_MSG	0x00200000
+#define	DEBUG_HFCMULTI_STATE	0x00400000
+#define	DEBUG_HFCMULTI_SYNC	0x01000000
+#define	DEBUG_HFCMULTI_DTMF	0x02000000
+#define	DEBUG_HFCMULTI_LOCK	0x80000000
+
+#define	PCI_ENA_REGIO	0x01
+#define	PCI_ENA_MEMIO	0x02
+
+/*
+ * NOTE: some registers are assigned multiple times due to different modes
+ *       also registers are assigned differen for HFC-4s/8s and HFC-E1
+ */
+
+/*
+#define MAX_FRAME_SIZE	2048
+*/
+
+struct hfc_chan {
+	struct dchannel	*dch;	/* link if channel is a D-channel */
+	struct bchannel	*bch;	/* link if channel is a B-channel */
+	int		port; 	/* the interface port this */
+				/* channel is associated with */
+	int		nt_timer; /* -1 if off, 0 if elapsed, >0 if running */
+	int		los, ais, slip_tx, slip_rx, rdi; /* current alarms */
+	int		jitter;
+	u_long		cfg;	/* port configuration */
+	int		sync;	/* sync state (used by E1) */
+	u_int		protocol; /* current protocol */
+	int		slot_tx; /* current pcm slot */
+	int		bank_tx; /* current pcm bank */
+	int		slot_rx;
+	int		bank_rx;
+	int		conf;	/* conference setting of TX slot */
+	int		txpending;	/* if there is currently data in */
+					/* the FIFO 0=no, 1=yes, 2=splloop */
+	int		rx_off; /* set to turn fifo receive off */
+	int		coeff_count; /* curren coeff block */
+	s32		*coeff; /* memory pointer to 8 coeff blocks */
+};
+
+
+struct hfcm_hw {
+	u_char	r_ctrl;
+	u_char	r_irq_ctrl;
+	u_char	r_cirm;
+	u_char	r_ram_sz;
+	u_char	r_pcm_md0;
+	u_char	r_irqmsk_misc;
+	u_char	r_dtmf;
+	u_char	r_st_sync;
+	u_char	r_sci_msk;
+	u_char	r_tx0, r_tx1;
+	u_char	a_st_ctrl0[8];
+	timer_t	timer;
+};
+
+
+/* for each stack these flags are used (cfg) */
+#define	HFC_CFG_NONCAP_TX	1 /* S/T TX interface has less capacity */
+#define	HFC_CFG_DIS_ECHANNEL	2 /* disable E-channel processing */
+#define	HFC_CFG_REG_ECHANNEL	3 /* register E-channel */
+#define	HFC_CFG_OPTICAL		4 /* the E1 interface is optical */
+#define	HFC_CFG_REPORT_LOS	5 /* the card should report loss of signal */
+#define	HFC_CFG_REPORT_AIS	6 /* the card should report alarm ind. sign. */
+#define	HFC_CFG_REPORT_SLIP	7 /* the card should report bit slips */
+#define	HFC_CFG_REPORT_RDI	8 /* the card should report remote alarm */
+#define	HFC_CFG_DTMF		9 /* enable DTMF-detection */
+#define	HFC_CFG_CRC4		10 /* disable CRC-4 Multiframe mode, */
+					/* use double frame instead. */
+
+#define	HFC_CHIP_EXRAM_128	0 /* external ram 128k */
+#define	HFC_CHIP_EXRAM_512	1 /* external ram 256k */
+#define	HFC_CHIP_REVISION0	2 /* old fifo handling */
+#define	HFC_CHIP_PCM_SLAVE	3 /* PCM is slave */
+#define	HFC_CHIP_PCM_MASTER	4 /* PCM is master */
+#define	HFC_CHIP_RX_SYNC	5 /* disable pll sync for pcm */
+#define	HFC_CHIP_DTMF		6 /* DTMF decoding is enabled */
+#define	HFC_CHIP_ULAW		7 /* ULAW mode */
+#define	HFC_CHIP_CLOCK2		8 /* double clock mode */
+#define	HFC_CHIP_E1CLOCK_GET	9 /* always get clock from E1 interface */
+#define	HFC_CHIP_E1CLOCK_PUT	10 /* always put clock from E1 interface */
+#define	HFC_CHIP_WATCHDOG	11 /* whether we should send signals */
+					/* to the watchdog */
+#define	HFC_CHIP_B410P		12 /* whether we have a b410p with echocan in */
+					/* hw */
+#define	HFC_CHIP_PLXSD		13 /* whether we have a Speech-Design PLX */
+
+#define HFC_IO_MODE_PCIMEM	0x00 /* normal memory mapped IO */
+#define HFC_IO_MODE_REGIO	0x01 /* PCI io access */
+#define HFC_IO_MODE_PLXSD	0x02 /* access HFC via PLX9030 */
+
+/* table entry in the PCI devices list */
+struct hm_map {
+	char *vendor_name;
+	char *card_name;
+	int type;
+	int ports;
+	int clock2;
+	int leds;
+	int opticalsupport;
+	int dip_type;
+	int io_mode;
+};
+
+struct hfc_multi {
+	struct list_head	list;
+	struct hm_map	*mtyp;
+	int		id;
+	int		pcm;	/* id of pcm bus */
+	int		type;
+	int		ports;
+
+	u_int		irq;	/* irq used by card */
+	u_int		irqcnt;
+	struct pci_dev	*pci_dev;
+	int		io_mode; /* selects mode */
+#ifdef HFC_REGISTER_DEBUG
+	void		(*HFC_outb)(struct hfc_multi *hc, u_char reg,
+				u_char val, const char *function, int line);
+	void		(*HFC_outb_nodebug)(struct hfc_multi *hc, u_char reg,
+				u_char val, const char *function, int line);
+	u_char		(*HFC_inb)(struct hfc_multi *hc, u_char reg,
+				const char *function, int line);
+	u_char		(*HFC_inb_nodebug)(struct hfc_multi *hc, u_char reg,
+				const char *function, int line);
+	u_short		(*HFC_inw)(struct hfc_multi *hc, u_char reg,
+				const char *function, int line);
+	u_short		(*HFC_inw_nodebug)(struct hfc_multi *hc, u_char reg,
+				const char *function, int line);
+	void		(*HFC_wait)(struct hfc_multi *hc,
+				const char *function, int line);
+	void		(*HFC_wait_nodebug)(struct hfc_multi *hc,
+				const char *function, int line);
+#else
+	void		(*HFC_outb)(struct hfc_multi *hc, u_char reg,
+				u_char val);
+	void		(*HFC_outb_nodebug)(struct hfc_multi *hc, u_char reg,
+				u_char val);
+	u_char		(*HFC_inb)(struct hfc_multi *hc, u_char reg);
+	u_char		(*HFC_inb_nodebug)(struct hfc_multi *hc, u_char reg);
+	u_short		(*HFC_inw)(struct hfc_multi *hc, u_char reg);
+	u_short		(*HFC_inw_nodebug)(struct hfc_multi *hc, u_char reg);
+	void		(*HFC_wait)(struct hfc_multi *hc);
+	void		(*HFC_wait_nodebug)(struct hfc_multi *hc);
+#endif
+	void		(*read_fifo)(struct hfc_multi *hc, u_char *data,
+				int len);
+	void		(*write_fifo)(struct hfc_multi *hc, u_char *data,
+				int len);
+	u_long		pci_origmembase, plx_origmembase, dsp_origmembase;
+	u_char		*pci_membase; /* PCI memory (MUST BE BYTE POINTER) */
+	u_char		*plx_membase; /* PLX memory */
+	u_char		*dsp_membase; /* DSP on PLX */
+	u_long		pci_iobase; /* PCI IO */
+	struct hfcm_hw	hw;	/* remember data of write-only-registers */
+
+	u_long		chip;	/* chip configuration */
+	int		masterclk; /* port that provides master clock -1=off */
+	int		dtmf;	/* flag that dtmf is currently in process */
+	int		Flen;	/* F-buffer size */
+	int		Zlen;	/* Z-buffer size (must be int for calculation)*/
+	int		max_trans; /* maximum transparent fifo fill */
+	int		Zmin;	/* Z-buffer offset */
+	int		DTMFbase; /* base address of DTMF coefficients */
+
+	u_int		slots;	/* number of PCM slots */
+	u_int		leds;	/* type of leds */
+	u_int		ledcount; /* used to animate leds */
+	u_long		ledstate; /* save last state of leds */
+	int		opticalsupport; /* has the e1 board */
+					/* an optical Interface */
+	int		dslot;	/* channel # of d-channel (E1) default 16 */
+
+	u_long		wdcount; 	/* every 500 ms we need to */
+					/* send the watchdog a signal */
+	u_char		wdbyte; /* watchdog toggle byte */
+	u_int		activity[8]; 	/* if there is any action on this */
+					/* port (will be cleared after */
+					/* showing led-states) */
+	int		e1_state; /* keep track of last state */
+	int		e1_getclock; /* if sync is retrieved from interface */
+	int		syncronized; /* keep track of existing sync interface */
+	int		e1_resync; /* resync jobs */
+
+	spinlock_t	lock;	/* the lock */
+
+	/*
+	 * the channel index is counted from 0, regardless where the channel
+	 * is located on the hfc-channel.
+	 * the bch->channel is equvalent to the hfc-channel
+	 */
+	struct hfc_chan	chan[32];
+	u_char		created[8]; /* what port is created */
+	signed char	slot_owner[256]; /* owner channel of slot */
+};
+
+/* PLX GPIOs */
+#define	PLX_GPIO4_DIR_BIT	13
+#define	PLX_GPIO4_BIT		14
+#define	PLX_GPIO5_DIR_BIT	16
+#define	PLX_GPIO5_BIT		17
+#define	PLX_GPIO6_DIR_BIT	19
+#define	PLX_GPIO6_BIT		20
+#define	PLX_GPIO7_DIR_BIT	22
+#define	PLX_GPIO7_BIT		23
+#define PLX_GPIO8_DIR_BIT	25
+#define PLX_GPIO8_BIT		26
+
+#define	PLX_GPIO4		(1 << PLX_GPIO4_BIT)
+#define	PLX_GPIO5		(1 << PLX_GPIO5_BIT)
+#define	PLX_GPIO6		(1 << PLX_GPIO6_BIT)
+#define	PLX_GPIO7		(1 << PLX_GPIO7_BIT)
+#define PLX_GPIO8		(1 << PLX_GPIO8_BIT)
+
+#define	PLX_GPIO4_DIR		(1 << PLX_GPIO4_DIR_BIT)
+#define	PLX_GPIO5_DIR		(1 << PLX_GPIO5_DIR_BIT)
+#define	PLX_GPIO6_DIR		(1 << PLX_GPIO6_DIR_BIT)
+#define	PLX_GPIO7_DIR		(1 << PLX_GPIO7_DIR_BIT)
+#define PLX_GPIO8_DIR		(1 << PLX_GPIO8_DIR_BIT)
+
+#define	PLX_TERM_ON			PLX_GPIO7
+#define	PLX_SLAVE_EN_N		PLX_GPIO5
+#define	PLX_MASTER_EN		PLX_GPIO6
+#define	PLX_SYNC_O_EN		PLX_GPIO4
+#define PLX_DSP_RES_N		PLX_GPIO8
+/* GPIO4..8 Enable & Set to OUT, SLAVE_EN_N = 1 */
+#define PLX_GPIOC_INIT		(PLX_GPIO4_DIR | PLX_GPIO5_DIR | PLX_GPIO6_DIR \
+			| PLX_GPIO7_DIR | PLX_GPIO8_DIR | PLX_SLAVE_EN_N)
+
+/* PLX Interrupt Control/STATUS */
+#define PLX_INTCSR_LINTI1_ENABLE 0x01
+#define PLX_INTCSR_LINTI1_STATUS 0x04
+#define PLX_INTCSR_LINTI2_ENABLE 0x08
+#define PLX_INTCSR_LINTI2_STATUS 0x20
+#define PLX_INTCSR_PCIINT_ENABLE 0x40
+
+/* PLX Registers */
+#define PLX_INTCSR 0x4c
+#define PLX_CNTRL  0x50
+#define PLX_GPIOC  0x54
+
+
+/*
+ * REGISTER SETTING FOR HFC-4S/8S AND HFC-E1
+ */
+
+/* write only registers */
+#define R_CIRM			0x00
+#define R_CTRL			0x01
+#define R_BRG_PCM_CFG 		0x02
+#define R_RAM_ADDR0		0x08
+#define R_RAM_ADDR1		0x09
+#define R_RAM_ADDR2		0x0A
+#define R_FIRST_FIFO		0x0B
+#define R_RAM_SZ		0x0C
+#define R_FIFO_MD		0x0D
+#define R_INC_RES_FIFO		0x0E
+#define R_FSM_IDX		0x0F
+#define R_FIFO			0x0F
+#define R_SLOT			0x10
+#define R_IRQMSK_MISC		0x11
+#define R_SCI_MSK		0x12
+#define R_IRQ_CTRL		0x13
+#define R_PCM_MD0		0x14
+#define R_PCM_MD1		0x15
+#define R_PCM_MD2		0x15
+#define R_SH0H			0x15
+#define R_SH1H			0x15
+#define R_SH0L			0x15
+#define R_SH1L			0x15
+#define R_SL_SEL0		0x15
+#define R_SL_SEL1		0x15
+#define R_SL_SEL2		0x15
+#define R_SL_SEL3		0x15
+#define R_SL_SEL4		0x15
+#define R_SL_SEL5		0x15
+#define R_SL_SEL6		0x15
+#define R_SL_SEL7		0x15
+#define R_ST_SEL		0x16
+#define R_ST_SYNC		0x17
+#define R_CONF_EN		0x18
+#define R_TI_WD			0x1A
+#define R_BERT_WD_MD		0x1B
+#define R_DTMF			0x1C
+#define R_DTMF_N		0x1D
+#define R_E1_WR_STA		0x20
+#define R_E1_RD_STA		0x20
+#define R_LOS0			0x22
+#define R_LOS1			0x23
+#define R_RX0			0x24
+#define R_RX_FR0		0x25
+#define R_RX_FR1		0x26
+#define R_TX0			0x28
+#define R_TX1			0x29
+#define R_TX_FR0		0x2C
+
+#define R_TX_FR1		0x2D
+#define R_TX_FR2		0x2E
+#define R_JATT_ATT		0x2F /* undocumented */
+#define A_ST_RD_STATE		0x30
+#define A_ST_WR_STATE		0x30
+#define R_RX_OFF		0x30
+#define A_ST_CTRL0		0x31
+#define R_SYNC_OUT		0x31
+#define A_ST_CTRL1		0x32
+#define A_ST_CTRL2		0x33
+#define A_ST_SQ_WR		0x34
+#define R_TX_OFF		0x34
+#define R_SYNC_CTRL		0x35
+#define A_ST_CLK_DLY		0x37
+#define R_PWM0			0x38
+#define R_PWM1			0x39
+#define A_ST_B1_TX		0x3C
+#define A_ST_B2_TX		0x3D
+#define A_ST_D_TX		0x3E
+#define R_GPIO_OUT0		0x40
+#define R_GPIO_OUT1		0x41
+#define R_GPIO_EN0		0x42
+#define R_GPIO_EN1		0x43
+#define R_GPIO_SEL		0x44
+#define R_BRG_CTRL		0x45
+#define R_PWM_MD		0x46
+#define R_BRG_MD		0x47
+#define R_BRG_TIM0		0x48
+#define R_BRG_TIM1		0x49
+#define R_BRG_TIM2		0x4A
+#define R_BRG_TIM3		0x4B
+#define R_BRG_TIM_SEL01		0x4C
+#define R_BRG_TIM_SEL23		0x4D
+#define R_BRG_TIM_SEL45		0x4E
+#define R_BRG_TIM_SEL67		0x4F
+#define A_SL_CFG		0xD0
+#define A_CONF			0xD1
+#define A_CH_MSK		0xF4
+#define A_CON_HDLC		0xFA
+#define A_SUBCH_CFG		0xFB
+#define A_CHANNEL		0xFC
+#define A_FIFO_SEQ		0xFD
+#define A_IRQ_MSK		0xFF
+
+/* read only registers */
+#define A_Z12			0x04
+#define A_Z1L			0x04
+#define A_Z1			0x04
+#define A_Z1H			0x05
+#define A_Z2L			0x06
+#define A_Z2			0x06
+#define A_Z2H			0x07
+#define A_F1			0x0C
+#define A_F12			0x0C
+#define A_F2			0x0D
+#define R_IRQ_OVIEW		0x10
+#define R_IRQ_MISC		0x11
+#define R_IRQ_STATECH		0x12
+#define R_CONF_OFLOW		0x14
+#define R_RAM_USE		0x15
+#define R_CHIP_ID		0x16
+#define R_BERT_STA		0x17
+#define R_F0_CNTL		0x18
+#define R_F0_CNTH		0x19
+#define R_BERT_EC		0x1A
+#define R_BERT_ECL		0x1A
+#define R_BERT_ECH		0x1B
+#define R_STATUS		0x1C
+#define R_CHIP_RV		0x1F
+#define R_STATE			0x20
+#define R_SYNC_STA		0x24
+#define R_RX_SL0_0		0x25
+#define R_RX_SL0_1		0x26
+#define R_RX_SL0_2		0x27
+#define R_JATT_DIR		0x2b /* undocumented */
+#define R_SLIP			0x2c
+#define A_ST_RD_STA		0x30
+#define R_FAS_EC		0x30
+#define R_FAS_ECL		0x30
+#define R_FAS_ECH		0x31
+#define R_VIO_EC		0x32
+#define R_VIO_ECL		0x32
+#define R_VIO_ECH		0x33
+#define A_ST_SQ_RD		0x34
+#define R_CRC_EC		0x34
+#define R_CRC_ECL		0x34
+#define R_CRC_ECH		0x35
+#define R_E_EC			0x36
+#define R_E_ECL			0x36
+#define R_E_ECH			0x37
+#define R_SA6_SA13_EC		0x38
+#define R_SA6_SA13_ECL		0x38
+#define R_SA6_SA13_ECH		0x39
+#define R_SA6_SA23_EC		0x3A
+#define R_SA6_SA23_ECL		0x3A
+#define R_SA6_SA23_ECH		0x3B
+#define A_ST_B1_RX		0x3C
+#define A_ST_B2_RX		0x3D
+#define A_ST_D_RX		0x3E
+#define A_ST_E_RX		0x3F
+#define R_GPIO_IN0		0x40
+#define R_GPIO_IN1		0x41
+#define R_GPI_IN0		0x44
+#define R_GPI_IN1		0x45
+#define R_GPI_IN2		0x46
+#define R_GPI_IN3		0x47
+#define R_INT_DATA		0x88
+#define R_IRQ_FIFO_BL0		0xC8
+#define R_IRQ_FIFO_BL1		0xC9
+#define R_IRQ_FIFO_BL2		0xCA
+#define R_IRQ_FIFO_BL3		0xCB
+#define R_IRQ_FIFO_BL4		0xCC
+#define R_IRQ_FIFO_BL5		0xCD
+#define R_IRQ_FIFO_BL6		0xCE
+#define R_IRQ_FIFO_BL7		0xCF
+
+/* read and write registers */
+#define A_FIFO_DATA0		0x80
+#define A_FIFO_DATA1		0x80
+#define A_FIFO_DATA2		0x80
+#define A_FIFO_DATA0_NOINC	0x84
+#define A_FIFO_DATA1_NOINC	0x84
+#define A_FIFO_DATA2_NOINC	0x84
+#define R_RAM_DATA		0xC0
+
+
+/*
+ * BIT SETTING FOR HFC-4S/8S AND HFC-E1
+ */
+
+/* chapter 2: universal bus interface */
+/* R_CIRM */
+#define V_IRQ_SEL		0x01
+#define V_SRES			0x08
+#define V_HFCRES		0x10
+#define V_PCMRES		0x20
+#define V_STRES			0x40
+#define V_ETRES			0x40
+#define V_RLD_EPR		0x80
+/* R_CTRL */
+#define V_FIFO_LPRIO		0x02
+#define V_SLOW_RD		0x04
+#define V_EXT_RAM		0x08
+#define V_CLK_OFF		0x20
+#define V_ST_CLK		0x40
+/* R_RAM_ADDR0 */
+#define V_RAM_ADDR2		0x01
+#define V_ADDR_RES		0x40
+#define V_ADDR_INC		0x80
+/* R_RAM_SZ */
+#define V_RAM_SZ		0x01
+#define V_PWM0_16KHZ		0x10
+#define V_PWM1_16KHZ		0x20
+#define V_FZ_MD			0x80
+/* R_CHIP_ID */
+#define V_PNP_IRQ		0x01
+#define V_CHIP_ID		0x10
+
+/* chapter 3: data flow */
+/* R_FIRST_FIFO */
+#define V_FIRST_FIRO_DIR	0x01
+#define V_FIRST_FIFO_NUM	0x02
+/* R_FIFO_MD */
+#define V_FIFO_MD		0x01
+#define V_CSM_MD		0x04
+#define V_FSM_MD		0x08
+#define V_FIFO_SZ		0x10
+/* R_FIFO */
+#define V_FIFO_DIR		0x01
+#define V_FIFO_NUM		0x02
+#define V_REV			0x80
+/* R_SLOT */
+#define V_SL_DIR		0x01
+#define V_SL_NUM		0x02
+/* A_SL_CFG */
+#define V_CH_DIR		0x01
+#define V_CH_SEL		0x02
+#define V_ROUTING		0x40
+/* A_CON_HDLC */
+#define V_IFF			0x01
+#define V_HDLC_TRP		0x02
+#define V_TRP_IRQ		0x04
+#define V_DATA_FLOW		0x20
+/* A_SUBCH_CFG */
+#define V_BIT_CNT		0x01
+#define V_START_BIT		0x08
+#define V_LOOP_FIFO		0x40
+#define V_INV_DATA		0x80
+/* A_CHANNEL */
+#define V_CH_DIR0		0x01
+#define V_CH_NUM0		0x02
+/* A_FIFO_SEQ */
+#define V_NEXT_FIFO_DIR		0x01
+#define V_NEXT_FIFO_NUM		0x02
+#define V_SEQ_END		0x40
+
+/* chapter 4: FIFO handling and HDLC controller */
+/* R_INC_RES_FIFO */
+#define V_INC_F			0x01
+#define V_RES_F			0x02
+#define V_RES_LOST		0x04
+
+/* chapter 5: S/T interface */
+/* R_SCI_MSK */
+#define V_SCI_MSK_ST0		0x01
+#define V_SCI_MSK_ST1		0x02
+#define V_SCI_MSK_ST2		0x04
+#define V_SCI_MSK_ST3		0x08
+#define V_SCI_MSK_ST4		0x10
+#define V_SCI_MSK_ST5		0x20
+#define V_SCI_MSK_ST6		0x40
+#define V_SCI_MSK_ST7		0x80
+/* R_ST_SEL */
+#define V_ST_SEL		0x01
+#define V_MULT_ST		0x08
+/* R_ST_SYNC */
+#define V_SYNC_SEL		0x01
+#define V_AUTO_SYNC		0x08
+/* A_ST_WR_STA */
+#define V_ST_SET_STA		0x01
+#define V_ST_LD_STA		0x10
+#define V_ST_ACT		0x20
+#define V_SET_G2_G3		0x80
+/* A_ST_CTRL0 */
+#define V_B1_EN			0x01
+#define V_B2_EN			0x02
+#define V_ST_MD			0x04
+#define V_D_PRIO		0x08
+#define V_SQ_EN			0x10
+#define V_96KHZ			0x20
+#define V_TX_LI			0x40
+#define V_ST_STOP		0x80
+/* A_ST_CTRL1 */
+#define V_G2_G3_EN		0x01
+#define V_D_HI			0x04
+#define V_E_IGNO		0x08
+#define V_E_LO			0x10
+#define V_B12_SWAP		0x80
+/* A_ST_CTRL2 */
+#define V_B1_RX_EN		0x01
+#define V_B2_RX_EN		0x02
+#define V_ST_TRIS		0x40
+/* A_ST_CLK_DLY */
+#define V_ST_CK_DLY		0x01
+#define V_ST_SMPL		0x10
+/* A_ST_D_TX */
+#define V_ST_D_TX		0x40
+/* R_IRQ_STATECH */
+#define V_SCI_ST0		0x01
+#define V_SCI_ST1		0x02
+#define V_SCI_ST2		0x04
+#define V_SCI_ST3		0x08
+#define V_SCI_ST4		0x10
+#define V_SCI_ST5		0x20
+#define V_SCI_ST6		0x40
+#define V_SCI_ST7		0x80
+/* A_ST_RD_STA */
+#define V_ST_STA		0x01
+#define V_FR_SYNC_ST		0x10
+#define V_TI2_EXP		0x20
+#define V_INFO0			0x40
+#define V_G2_G3			0x80
+/* A_ST_SQ_RD */
+#define V_ST_SQ			0x01
+#define V_MF_RX_RDY		0x10
+#define V_MF_TX_RDY		0x80
+/* A_ST_D_RX */
+#define V_ST_D_RX		0x40
+/* A_ST_E_RX */
+#define V_ST_E_RX		0x40
+
+/* chapter 5: E1 interface */
+/* R_E1_WR_STA */
+/* R_E1_RD_STA */
+#define V_E1_SET_STA		0x01
+#define V_E1_LD_STA		0x10
+/* R_RX0 */
+#define V_RX_CODE		0x01
+#define V_RX_FBAUD		0x04
+#define V_RX_CMI		0x08
+#define V_RX_INV_CMI		0x10
+#define V_RX_INV_CLK		0x20
+#define V_RX_INV_DATA		0x40
+#define V_AIS_ITU		0x80
+/* R_RX_FR0 */
+#define V_NO_INSYNC		0x01
+#define V_AUTO_RESYNC		0x02
+#define V_AUTO_RECO		0x04
+#define V_SWORD_COND		0x08
+#define V_SYNC_LOSS		0x10
+#define V_XCRC_SYNC		0x20
+#define V_MF_RESYNC		0x40
+#define V_RESYNC		0x80
+/* R_RX_FR1 */
+#define V_RX_MF			0x01
+#define V_RX_MF_SYNC		0x02
+#define V_RX_SL0_RAM		0x04
+#define V_ERR_SIM		0x20
+#define V_RES_NMF		0x40
+/* R_TX0 */
+#define V_TX_CODE		0x01
+#define V_TX_FBAUD		0x04
+#define V_TX_CMI_CODE		0x08
+#define V_TX_INV_CMI_CODE	0x10
+#define V_TX_INV_CLK		0x20
+#define V_TX_INV_DATA		0x40
+#define V_OUT_EN		0x80
+/* R_TX1 */
+#define V_INV_CLK		0x01
+#define V_EXCHG_DATA_LI		0x02
+#define V_AIS_OUT		0x04
+#define V_ATX			0x20
+#define V_NTRI			0x40
+#define V_AUTO_ERR_RES		0x80
+/* R_TX_FR0 */
+#define V_TRP_FAS		0x01
+#define V_TRP_NFAS		0x02
+#define V_TRP_RAL		0x04
+#define V_TRP_SA		0x08
+/* R_TX_FR1 */
+#define V_TX_FAS		0x01
+#define V_TX_NFAS		0x02
+#define V_TX_RAL		0x04
+#define V_TX_SA			0x08
+/* R_TX_FR2 */
+#define V_TX_MF			0x01
+#define V_TRP_SL0		0x02
+#define V_TX_SL0_RAM		0x04
+#define V_TX_E			0x10
+#define V_NEG_E			0x20
+#define V_XS12_ON		0x40
+#define V_XS15_ON		0x80
+/* R_RX_OFF */
+#define V_RX_SZ			0x01
+#define V_RX_INIT		0x04
+/* R_SYNC_OUT */
+#define V_SYNC_E1_RX		0x01
+#define V_IPATS0		0x20
+#define V_IPATS1		0x40
+#define V_IPATS2		0x80
+/* R_TX_OFF */
+#define V_TX_SZ			0x01
+#define V_TX_INIT		0x04
+/* R_SYNC_CTRL */
+#define V_EXT_CLK_SYNC		0x01
+#define V_SYNC_OFFS		0x02
+#define V_PCM_SYNC		0x04
+#define V_NEG_CLK		0x08
+#define V_HCLK			0x10
+/*
+#define V_JATT_AUTO_DEL		0x20
+#define V_JATT_AUTO		0x40
+*/
+#define V_JATT_OFF		0x80
+/* R_STATE */
+#define V_E1_STA		0x01
+#define V_ALT_FR_RX		0x40
+#define V_ALT_FR_TX		0x80
+/* R_SYNC_STA */
+#define V_RX_STA		0x01
+#define V_FR_SYNC_E1		0x04
+#define V_SIG_LOS		0x08
+#define V_MFA_STA		0x10
+#define V_AIS			0x40
+#define V_NO_MF_SYNC		0x80
+/* R_RX_SL0_0 */
+#define V_SI_FAS		0x01
+#define V_SI_NFAS		0x02
+#define V_A			0x04
+#define V_CRC_OK		0x08
+#define V_TX_E1			0x10
+#define V_TX_E2			0x20
+#define V_RX_E1			0x40
+#define V_RX_E2			0x80
+/* R_SLIP */
+#define V_SLIP_RX		0x01
+#define V_FOSLIP_RX		0x08
+#define V_SLIP_TX		0x10
+#define V_FOSLIP_TX		0x80
+
+/* chapter 6: PCM interface */
+/* R_PCM_MD0 */
+#define V_PCM_MD		0x01
+#define V_C4_POL		0x02
+#define V_F0_NEG		0x04
+#define V_F0_LEN		0x08
+#define V_PCM_ADDR		0x10
+/* R_SL_SEL0 */
+#define V_SL_SEL0		0x01
+#define V_SH_SEL0		0x80
+/* R_SL_SEL1 */
+#define V_SL_SEL1		0x01
+#define V_SH_SEL1		0x80
+/* R_SL_SEL2 */
+#define V_SL_SEL2		0x01
+#define V_SH_SEL2		0x80
+/* R_SL_SEL3 */
+#define V_SL_SEL3		0x01
+#define V_SH_SEL3		0x80
+/* R_SL_SEL4 */
+#define V_SL_SEL4		0x01
+#define V_SH_SEL4		0x80
+/* R_SL_SEL5 */
+#define V_SL_SEL5		0x01
+#define V_SH_SEL5		0x80
+/* R_SL_SEL6 */
+#define V_SL_SEL6		0x01
+#define V_SH_SEL6		0x80
+/* R_SL_SEL7 */
+#define V_SL_SEL7		0x01
+#define V_SH_SEL7		0x80
+/* R_PCM_MD1 */
+#define V_ODEC_CON		0x01
+#define V_PLL_ADJ		0x04
+#define V_PCM_DR		0x10
+#define V_PCM_LOOP		0x40
+/* R_PCM_MD2 */
+#define V_SYNC_PLL		0x02
+#define V_SYNC_SRC		0x04
+#define V_SYNC_OUT		0x08
+#define V_ICR_FR_TIME		0x40
+#define V_EN_PLL		0x80
+
+/* chapter 7: pulse width modulation */
+/* R_PWM_MD */
+#define V_EXT_IRQ_EN		0x08
+#define V_PWM0_MD		0x10
+#define V_PWM1_MD		0x40
+
+/* chapter 8: multiparty audio conferences */
+/* R_CONF_EN */
+#define V_CONF_EN		0x01
+#define V_ULAW			0x80
+/* A_CONF */
+#define V_CONF_NUM		0x01
+#define V_NOISE_SUPPR		0x08
+#define V_ATT_LEV		0x20
+#define V_CONF_SL		0x80
+/* R_CONF_OFLOW */
+#define V_CONF_OFLOW0		0x01
+#define V_CONF_OFLOW1		0x02
+#define V_CONF_OFLOW2		0x04
+#define V_CONF_OFLOW3		0x08
+#define V_CONF_OFLOW4		0x10
+#define V_CONF_OFLOW5		0x20
+#define V_CONF_OFLOW6		0x40
+#define V_CONF_OFLOW7		0x80
+
+/* chapter 9: DTMF contoller */
+/* R_DTMF0 */
+#define V_DTMF_EN		0x01
+#define V_HARM_SEL		0x02
+#define V_DTMF_RX_CH		0x04
+#define V_DTMF_STOP		0x08
+#define V_CHBL_SEL		0x10
+#define V_RST_DTMF		0x40
+#define V_ULAW_SEL		0x80
+
+/* chapter 10: BERT */
+/* R_BERT_WD_MD */
+#define V_PAT_SEQ		0x01
+#define V_BERT_ERR		0x08
+#define V_AUTO_WD_RES		0x20
+#define V_WD_RES		0x80
+/* R_BERT_STA */
+#define V_BERT_SYNC_SRC		0x01
+#define V_BERT_SYNC		0x10
+#define V_BERT_INV_DATA		0x20
+
+/* chapter 11: auxiliary interface */
+/* R_BRG_PCM_CFG */
+#define V_BRG_EN		0x01
+#define V_BRG_MD		0x02
+#define V_PCM_CLK		0x20
+#define V_ADDR_WRDLY		0x40
+/* R_BRG_CTRL */
+#define V_BRG_CS		0x01
+#define V_BRG_ADDR		0x08
+#define V_BRG_CS_SRC		0x80
+/* R_BRG_MD */
+#define V_BRG_MD0		0x01
+#define V_BRG_MD1		0x02
+#define V_BRG_MD2		0x04
+#define V_BRG_MD3		0x08
+#define V_BRG_MD4		0x10
+#define V_BRG_MD5		0x20
+#define V_BRG_MD6		0x40
+#define V_BRG_MD7		0x80
+/* R_BRG_TIM0 */
+#define V_BRG_TIM0_IDLE		0x01
+#define V_BRG_TIM0_CLK		0x10
+/* R_BRG_TIM1 */
+#define V_BRG_TIM1_IDLE		0x01
+#define V_BRG_TIM1_CLK		0x10
+/* R_BRG_TIM2 */
+#define V_BRG_TIM2_IDLE		0x01
+#define V_BRG_TIM2_CLK		0x10
+/* R_BRG_TIM3 */
+#define V_BRG_TIM3_IDLE		0x01
+#define V_BRG_TIM3_CLK		0x10
+/* R_BRG_TIM_SEL01 */
+#define V_BRG_WR_SEL0		0x01
+#define V_BRG_RD_SEL0		0x04
+#define V_BRG_WR_SEL1		0x10
+#define V_BRG_RD_SEL1		0x40
+/* R_BRG_TIM_SEL23 */
+#define V_BRG_WR_SEL2		0x01
+#define V_BRG_RD_SEL2		0x04
+#define V_BRG_WR_SEL3		0x10
+#define V_BRG_RD_SEL3		0x40
+/* R_BRG_TIM_SEL45 */
+#define V_BRG_WR_SEL4		0x01
+#define V_BRG_RD_SEL4		0x04
+#define V_BRG_WR_SEL5		0x10
+#define V_BRG_RD_SEL5		0x40
+/* R_BRG_TIM_SEL67 */
+#define V_BRG_WR_SEL6		0x01
+#define V_BRG_RD_SEL6		0x04
+#define V_BRG_WR_SEL7		0x10
+#define V_BRG_RD_SEL7		0x40
+
+/* chapter 12: clock, reset, interrupt, timer and watchdog */
+/* R_IRQMSK_MISC */
+#define V_STA_IRQMSK		0x01
+#define V_TI_IRQMSK		0x02
+#define V_PROC_IRQMSK		0x04
+#define V_DTMF_IRQMSK		0x08
+#define V_IRQ1S_MSK		0x10
+#define V_SA6_IRQMSK		0x20
+#define V_RX_EOMF_MSK		0x40
+#define V_TX_EOMF_MSK		0x80
+/* R_IRQ_CTRL */
+#define V_FIFO_IRQ		0x01
+#define V_GLOB_IRQ_EN		0x08
+#define V_IRQ_POL		0x10
+/* R_TI_WD */
+#define V_EV_TS			0x01
+#define V_WD_TS			0x10
+/* A_IRQ_MSK */
+#define V_IRQ			0x01
+#define V_BERT_EN		0x02
+#define V_MIX_IRQ		0x04
+/* R_IRQ_OVIEW */
+#define V_IRQ_FIFO_BL0		0x01
+#define V_IRQ_FIFO_BL1		0x02
+#define V_IRQ_FIFO_BL2		0x04
+#define V_IRQ_FIFO_BL3		0x08
+#define V_IRQ_FIFO_BL4		0x10
+#define V_IRQ_FIFO_BL5		0x20
+#define V_IRQ_FIFO_BL6		0x40
+#define V_IRQ_FIFO_BL7		0x80
+/* R_IRQ_MISC */
+#define V_STA_IRQ		0x01
+#define V_TI_IRQ		0x02
+#define V_IRQ_PROC		0x04
+#define V_DTMF_IRQ		0x08
+#define V_IRQ1S			0x10
+#define V_SA6_IRQ		0x20
+#define V_RX_EOMF		0x40
+#define V_TX_EOMF		0x80
+/* R_STATUS */
+#define V_BUSY			0x01
+#define V_PROC			0x02
+#define V_DTMF_STA		0x04
+#define V_LOST_STA		0x08
+#define V_SYNC_IN		0x10
+#define V_EXT_IRQSTA		0x20
+#define V_MISC_IRQSTA		0x40
+#define V_FR_IRQSTA		0x80
+/* R_IRQ_FIFO_BL0 */
+#define V_IRQ_FIFO0_TX		0x01
+#define V_IRQ_FIFO0_RX		0x02
+#define V_IRQ_FIFO1_TX		0x04
+#define V_IRQ_FIFO1_RX		0x08
+#define V_IRQ_FIFO2_TX		0x10
+#define V_IRQ_FIFO2_RX		0x20
+#define V_IRQ_FIFO3_TX		0x40
+#define V_IRQ_FIFO3_RX		0x80
+/* R_IRQ_FIFO_BL1 */
+#define V_IRQ_FIFO4_TX		0x01
+#define V_IRQ_FIFO4_RX		0x02
+#define V_IRQ_FIFO5_TX		0x04
+#define V_IRQ_FIFO5_RX		0x08
+#define V_IRQ_FIFO6_TX		0x10
+#define V_IRQ_FIFO6_RX		0x20
+#define V_IRQ_FIFO7_TX		0x40
+#define V_IRQ_FIFO7_RX		0x80
+/* R_IRQ_FIFO_BL2 */
+#define V_IRQ_FIFO8_TX		0x01
+#define V_IRQ_FIFO8_RX		0x02
+#define V_IRQ_FIFO9_TX		0x04
+#define V_IRQ_FIFO9_RX		0x08
+#define V_IRQ_FIFO10_TX		0x10
+#define V_IRQ_FIFO10_RX		0x20
+#define V_IRQ_FIFO11_TX		0x40
+#define V_IRQ_FIFO11_RX		0x80
+/* R_IRQ_FIFO_BL3 */
+#define V_IRQ_FIFO12_TX		0x01
+#define V_IRQ_FIFO12_RX		0x02
+#define V_IRQ_FIFO13_TX		0x04
+#define V_IRQ_FIFO13_RX		0x08
+#define V_IRQ_FIFO14_TX		0x10
+#define V_IRQ_FIFO14_RX		0x20
+#define V_IRQ_FIFO15_TX		0x40
+#define V_IRQ_FIFO15_RX		0x80
+/* R_IRQ_FIFO_BL4 */
+#define V_IRQ_FIFO16_TX		0x01
+#define V_IRQ_FIFO16_RX		0x02
+#define V_IRQ_FIFO17_TX		0x04
+#define V_IRQ_FIFO17_RX		0x08
+#define V_IRQ_FIFO18_TX		0x10
+#define V_IRQ_FIFO18_RX		0x20
+#define V_IRQ_FIFO19_TX		0x40
+#define V_IRQ_FIFO19_RX		0x80
+/* R_IRQ_FIFO_BL5 */
+#define V_IRQ_FIFO20_TX		0x01
+#define V_IRQ_FIFO20_RX		0x02
+#define V_IRQ_FIFO21_TX		0x04
+#define V_IRQ_FIFO21_RX		0x08
+#define V_IRQ_FIFO22_TX		0x10
+#define V_IRQ_FIFO22_RX		0x20
+#define V_IRQ_FIFO23_TX		0x40
+#define V_IRQ_FIFO23_RX		0x80
+/* R_IRQ_FIFO_BL6 */
+#define V_IRQ_FIFO24_TX		0x01
+#define V_IRQ_FIFO24_RX		0x02
+#define V_IRQ_FIFO25_TX		0x04
+#define V_IRQ_FIFO25_RX		0x08
+#define V_IRQ_FIFO26_TX		0x10
+#define V_IRQ_FIFO26_RX		0x20
+#define V_IRQ_FIFO27_TX		0x40
+#define V_IRQ_FIFO27_RX		0x80
+/* R_IRQ_FIFO_BL7 */
+#define V_IRQ_FIFO28_TX		0x01
+#define V_IRQ_FIFO28_RX		0x02
+#define V_IRQ_FIFO29_TX		0x04
+#define V_IRQ_FIFO29_RX		0x08
+#define V_IRQ_FIFO30_TX		0x10
+#define V_IRQ_FIFO30_RX		0x20
+#define V_IRQ_FIFO31_TX		0x40
+#define V_IRQ_FIFO31_RX		0x80
+
+/* chapter 13: general purpose I/O pins (GPIO) and input pins (GPI) */
+/* R_GPIO_OUT0 */
+#define V_GPIO_OUT0		0x01
+#define V_GPIO_OUT1		0x02
+#define V_GPIO_OUT2		0x04
+#define V_GPIO_OUT3		0x08
+#define V_GPIO_OUT4		0x10
+#define V_GPIO_OUT5		0x20
+#define V_GPIO_OUT6		0x40
+#define V_GPIO_OUT7		0x80
+/* R_GPIO_OUT1 */
+#define V_GPIO_OUT8		0x01
+#define V_GPIO_OUT9		0x02
+#define V_GPIO_OUT10		0x04
+#define V_GPIO_OUT11		0x08
+#define V_GPIO_OUT12		0x10
+#define V_GPIO_OUT13		0x20
+#define V_GPIO_OUT14		0x40
+#define V_GPIO_OUT15		0x80
+/* R_GPIO_EN0 */
+#define V_GPIO_EN0		0x01
+#define V_GPIO_EN1		0x02
+#define V_GPIO_EN2		0x04
+#define V_GPIO_EN3		0x08
+#define V_GPIO_EN4		0x10
+#define V_GPIO_EN5		0x20
+#define V_GPIO_EN6		0x40
+#define V_GPIO_EN7		0x80
+/* R_GPIO_EN1 */
+#define V_GPIO_EN8		0x01
+#define V_GPIO_EN9		0x02
+#define V_GPIO_EN10		0x04
+#define V_GPIO_EN11		0x08
+#define V_GPIO_EN12		0x10
+#define V_GPIO_EN13		0x20
+#define V_GPIO_EN14		0x40
+#define V_GPIO_EN15		0x80
+/* R_GPIO_SEL */
+#define V_GPIO_SEL0		0x01
+#define V_GPIO_SEL1		0x02
+#define V_GPIO_SEL2		0x04
+#define V_GPIO_SEL3		0x08
+#define V_GPIO_SEL4		0x10
+#define V_GPIO_SEL5		0x20
+#define V_GPIO_SEL6		0x40
+#define V_GPIO_SEL7		0x80
+/* R_GPIO_IN0 */
+#define V_GPIO_IN0		0x01
+#define V_GPIO_IN1		0x02
+#define V_GPIO_IN2		0x04
+#define V_GPIO_IN3		0x08
+#define V_GPIO_IN4		0x10
+#define V_GPIO_IN5		0x20
+#define V_GPIO_IN6		0x40
+#define V_GPIO_IN7		0x80
+/* R_GPIO_IN1 */
+#define V_GPIO_IN8		0x01
+#define V_GPIO_IN9		0x02
+#define V_GPIO_IN10		0x04
+#define V_GPIO_IN11		0x08
+#define V_GPIO_IN12		0x10
+#define V_GPIO_IN13		0x20
+#define V_GPIO_IN14		0x40
+#define V_GPIO_IN15		0x80
+/* R_GPI_IN0 */
+#define V_GPI_IN0		0x01
+#define V_GPI_IN1		0x02
+#define V_GPI_IN2		0x04
+#define V_GPI_IN3		0x08
+#define V_GPI_IN4		0x10
+#define V_GPI_IN5		0x20
+#define V_GPI_IN6		0x40
+#define V_GPI_IN7		0x80
+/* R_GPI_IN1 */
+#define V_GPI_IN8		0x01
+#define V_GPI_IN9		0x02
+#define V_GPI_IN10		0x04
+#define V_GPI_IN11		0x08
+#define V_GPI_IN12		0x10
+#define V_GPI_IN13		0x20
+#define V_GPI_IN14		0x40
+#define V_GPI_IN15		0x80
+/* R_GPI_IN2 */
+#define V_GPI_IN16		0x01
+#define V_GPI_IN17		0x02
+#define V_GPI_IN18		0x04
+#define V_GPI_IN19		0x08
+#define V_GPI_IN20		0x10
+#define V_GPI_IN21		0x20
+#define V_GPI_IN22		0x40
+#define V_GPI_IN23		0x80
+/* R_GPI_IN3 */
+#define V_GPI_IN24		0x01
+#define V_GPI_IN25		0x02
+#define V_GPI_IN26		0x04
+#define V_GPI_IN27		0x08
+#define V_GPI_IN28		0x10
+#define V_GPI_IN29		0x20
+#define V_GPI_IN30		0x40
+#define V_GPI_IN31		0x80
+
+/* map of all registers, used for debugging */
+
+#ifdef HFC_REGISTER_DEBUG
+struct hfc_register_names {
+	char *name;
+	u_char reg;
+} hfc_register_names[] = {
+	/* write registers */
+	{"R_CIRM",		0x00},
+	{"R_CTRL",		0x01},
+	{"R_BRG_PCM_CFG ",	0x02},
+	{"R_RAM_ADDR0",		0x08},
+	{"R_RAM_ADDR1",		0x09},
+	{"R_RAM_ADDR2",		0x0A},
+	{"R_FIRST_FIFO",	0x0B},
+	{"R_RAM_SZ",		0x0C},
+	{"R_FIFO_MD",		0x0D},
+	{"R_INC_RES_FIFO",	0x0E},
+	{"R_FIFO / R_FSM_IDX",	0x0F},
+	{"R_SLOT",		0x10},
+	{"R_IRQMSK_MISC",	0x11},
+	{"R_SCI_MSK",		0x12},
+	{"R_IRQ_CTRL",		0x13},
+	{"R_PCM_MD0",		0x14},
+	{"R_0x15",		0x15},
+	{"R_ST_SEL",		0x16},
+	{"R_ST_SYNC",		0x17},
+	{"R_CONF_EN",		0x18},
+	{"R_TI_WD",		0x1A},
+	{"R_BERT_WD_MD",	0x1B},
+	{"R_DTMF",		0x1C},
+	{"R_DTMF_N",		0x1D},
+	{"R_E1_XX_STA",		0x20},
+	{"R_LOS0",		0x22},
+	{"R_LOS1",		0x23},
+	{"R_RX0",		0x24},
+	{"R_RX_FR0",		0x25},
+	{"R_RX_FR1",		0x26},
+	{"R_TX0",		0x28},
+	{"R_TX1",		0x29},
+	{"R_TX_FR0",		0x2C},
+	{"R_TX_FR1",		0x2D},
+	{"R_TX_FR2",		0x2E},
+	{"R_JATT_ATT",		0x2F},
+	{"A_ST_xx_STA/R_RX_OFF", 0x30},
+	{"A_ST_CTRL0/R_SYNC_OUT", 0x31},
+	{"A_ST_CTRL1",		0x32},
+	{"A_ST_CTRL2",		0x33},
+	{"A_ST_SQ_WR",		0x34},
+	{"R_TX_OFF",		0x34},
+	{"R_SYNC_CTRL",		0x35},
+	{"A_ST_CLK_DLY",	0x37},
+	{"R_PWM0",		0x38},
+	{"R_PWM1",		0x39},
+	{"A_ST_B1_TX",		0x3C},
+	{"A_ST_B2_TX",		0x3D},
+	{"A_ST_D_TX",		0x3E},
+	{"R_GPIO_OUT0",		0x40},
+	{"R_GPIO_OUT1",		0x41},
+	{"R_GPIO_EN0",		0x42},
+	{"R_GPIO_EN1",		0x43},
+	{"R_GPIO_SEL",		0x44},
+	{"R_BRG_CTRL",		0x45},
+	{"R_PWM_MD",		0x46},
+	{"R_BRG_MD",		0x47},
+	{"R_BRG_TIM0",		0x48},
+	{"R_BRG_TIM1",		0x49},
+	{"R_BRG_TIM2",		0x4A},
+	{"R_BRG_TIM3",		0x4B},
+	{"R_BRG_TIM_SEL01",	0x4C},
+	{"R_BRG_TIM_SEL23",	0x4D},
+	{"R_BRG_TIM_SEL45",	0x4E},
+	{"R_BRG_TIM_SEL67",	0x4F},
+	{"A_FIFO_DATA0-2",	0x80},
+	{"A_FIFO_DATA0-2_NOINC", 0x84},
+	{"R_RAM_DATA",		0xC0},
+	{"A_SL_CFG",		0xD0},
+	{"A_CONF",		0xD1},
+	{"A_CH_MSK",		0xF4},
+	{"A_CON_HDLC",		0xFA},
+	{"A_SUBCH_CFG",		0xFB},
+	{"A_CHANNEL",		0xFC},
+	{"A_FIFO_SEQ",		0xFD},
+	{"A_IRQ_MSK",		0xFF},
+	{NULL, 0},
+
+	/* read registers */
+	{"A_Z1",		0x04},
+	{"A_Z1H",		0x05},
+	{"A_Z2",		0x06},
+	{"A_Z2H",		0x07},
+	{"A_F1",		0x0C},
+	{"A_F2",		0x0D},
+	{"R_IRQ_OVIEW",		0x10},
+	{"R_IRQ_MISC",		0x11},
+	{"R_IRQ_STATECH",	0x12},
+	{"R_CONF_OFLOW",	0x14},
+	{"R_RAM_USE",		0x15},
+	{"R_CHIP_ID",		0x16},
+	{"R_BERT_STA",		0x17},
+	{"R_F0_CNTL",		0x18},
+	{"R_F0_CNTH",		0x19},
+	{"R_BERT_ECL",		0x1A},
+	{"R_BERT_ECH",		0x1B},
+	{"R_STATUS",		0x1C},
+	{"R_CHIP_RV",		0x1F},
+	{"R_STATE",		0x20},
+	{"R_SYNC_STA",		0x24},
+	{"R_RX_SL0_0",		0x25},
+	{"R_RX_SL0_1",		0x26},
+	{"R_RX_SL0_2",		0x27},
+	{"R_JATT_DIR",		0x2b},
+	{"R_SLIP",		0x2c},
+	{"A_ST_RD_STA",		0x30},
+	{"R_FAS_ECL",		0x30},
+	{"R_FAS_ECH",		0x31},
+	{"R_VIO_ECL",		0x32},
+	{"R_VIO_ECH",		0x33},
+	{"R_CRC_ECL / A_ST_SQ_RD", 0x34},
+	{"R_CRC_ECH",		0x35},
+	{"R_E_ECL",		0x36},
+	{"R_E_ECH",		0x37},
+	{"R_SA6_SA13_ECL",	0x38},
+	{"R_SA6_SA13_ECH",	0x39},
+	{"R_SA6_SA23_ECL",	0x3A},
+	{"R_SA6_SA23_ECH",	0x3B},
+	{"A_ST_B1_RX",		0x3C},
+	{"A_ST_B2_RX",		0x3D},
+	{"A_ST_D_RX",		0x3E},
+	{"A_ST_E_RX",		0x3F},
+	{"R_GPIO_IN0",		0x40},
+	{"R_GPIO_IN1",		0x41},
+	{"R_GPI_IN0",		0x44},
+	{"R_GPI_IN1",		0x45},
+	{"R_GPI_IN2",		0x46},
+	{"R_GPI_IN3",		0x47},
+	{"A_FIFO_DATA0-2",	0x80},
+	{"A_FIFO_DATA0-2_NOINC", 0x84},
+	{"R_INT_DATA",		0x88},
+	{"R_RAM_DATA",		0xC0},
+	{"R_IRQ_FIFO_BL0",	0xC8},
+	{"R_IRQ_FIFO_BL1",	0xC9},
+	{"R_IRQ_FIFO_BL2",	0xCA},
+	{"R_IRQ_FIFO_BL3",	0xCB},
+	{"R_IRQ_FIFO_BL4",	0xCC},
+	{"R_IRQ_FIFO_BL5",	0xCD},
+	{"R_IRQ_FIFO_BL6",	0xCE},
+	{"R_IRQ_FIFO_BL7",	0xCF},
+};
+#endif /* HFC_REGISTER_DEBUG */
+
diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
new file mode 100644
index 000000000000..2649ea55a9e8
--- /dev/null
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -0,0 +1,5320 @@
+/*
+ * hfcmulti.c  low level driver for hfc-4s/hfc-8s/hfc-e1 based cards
+ *
+ * Author	Andreas Eversberg (jolly@eversberg.eu)
+ * ported to mqueue mechanism:
+ *		Peter Sprenger (sprengermoving-bytes.de)
+ *
+ * inspired by existing hfc-pci driver:
+ * Copyright 1999  by Werner Cornelius (werner@isdn-development.de)
+ * Copyright 2008  by Karsten Keil (kkeil@suse.de)
+ * Copyright 2008  by Andreas Eversberg (jolly@eversberg.eu)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ * Thanks to Cologne Chip AG for this great controller!
+ */
+
+/*
+ * module parameters:
+ * type:
+ *	By default (0), the card is automatically detected.
+ *	Or use the following combinations:
+ *	Bit 0-7   = 0x00001 = HFC-E1 (1 port)
+ * or	Bit 0-7   = 0x00004 = HFC-4S (4 ports)
+ * or	Bit 0-7   = 0x00008 = HFC-8S (8 ports)
+ *	Bit 8     = 0x00100 = uLaw (instead of aLaw)
+ *	Bit 9     = 0x00200 = Disable DTMF detect on all B-channels via hardware
+ *	Bit 10    = spare
+ *	Bit 11    = 0x00800 = Force PCM bus into slave mode. (otherwhise auto)
+ * or   Bit 12    = 0x01000 = Force PCM bus into master mode. (otherwhise auto)
+ *	Bit 13	  = spare
+ *	Bit 14    = 0x04000 = Use external ram (128K)
+ *	Bit 15    = 0x08000 = Use external ram (512K)
+ *	Bit 16    = 0x10000 = Use 64 timeslots instead of 32
+ * or	Bit 17    = 0x20000 = Use 128 timeslots instead of anything else
+ *	Bit 18    = spare
+ *	Bit 19    = 0x80000 = Send the Watchdog a Signal (Dual E1 with Watchdog)
+ * (all other bits are reserved and shall be 0)
+ *	example: 0x20204 one HFC-4S with dtmf detection and 128 timeslots on PCM
+ *		 bus (PCM master)
+ *
+ * port: (optional or required for all ports on all installed cards)
+ *	HFC-4S/HFC-8S only bits:
+ *	Bit 0	  = 0x001 = Use master clock for this S/T interface
+ *			    (ony once per chip).
+ *	Bit 1     = 0x002 = transmitter line setup (non capacitive mode)
+ *			    Don't use this unless you know what you are doing!
+ *	Bit 2     = 0x004 = Disable E-channel. (No E-channel processing)
+ *	example: 0x0001,0x0000,0x0000,0x0000 one HFC-4S with master clock
+ *		 received from port 1
+ *
+ *	HFC-E1 only bits:
+ *	Bit 0     = 0x0001 = interface: 0=copper, 1=optical
+ *	Bit 1     = 0x0002 = reserved (later for 32 B-channels transparent mode)
+ *	Bit 2     = 0x0004 = Report LOS
+ *	Bit 3     = 0x0008 = Report AIS
+ *	Bit 4     = 0x0010 = Report SLIP
+ *	Bit 5     = 0x0020 = Report RDI
+ *	Bit 8     = 0x0100 = Turn off CRC-4 Multiframe Mode, use double frame
+ *			     mode instead.
+ *	Bit 9	  = 0x0200 = Force get clock from interface, even in NT mode.
+ * or	Bit 10	  = 0x0400 = Force put clock to interface, even in TE mode.
+ *	Bit 11    = 0x0800 = Use direct RX clock for PCM sync rather than PLL.
+ *			     (E1 only)
+ *	Bit 12-13 = 0xX000 = elastic jitter buffer (1-3), Set both bits to 0
+ *			     for default.
+ * (all other bits are reserved and shall be 0)
+ *
+ * debug:
+ *	NOTE: only one debug value must be given for all cards
+ *	enable debugging (see hfc_multi.h for debug options)
+ *
+ * poll:
+ *	NOTE: only one poll value must be given for all cards
+ *	Give the number of samples for each fifo process.
+ *	By default 128 is used. Decrease to reduce delay, increase to
+ *	reduce cpu load. If unsure, don't mess with it!
+ *	Valid is 8, 16, 32, 64, 128, 256.
+ *
+ * pcm:
+ *	NOTE: only one pcm value must be given for every card.
+ *	The PCM bus id tells the mISDNdsp module about the connected PCM bus.
+ *	By default (0), the PCM bus id is 100 for the card that is PCM master.
+ *	If multiple cards are PCM master (because they are not interconnected),
+ *	each card with PCM master will have increasing PCM id.
+ *	All PCM busses with the same ID are expected to be connected and have
+ *	common time slots slots.
+ *	Only one chip of the PCM bus must be master, the others slave.
+ *	-1 means no support of PCM bus not even.
+ *	Omit this value, if all cards are interconnected or none is connected.
+ *	If unsure, don't give this parameter.
+ *
+ * dslot:
+ *	NOTE: only one poll value must be given for every card.
+ *	Also this value must be given for non-E1 cards. If omitted, the E1
+ *	card has D-channel on time slot 16, which is default.
+ *	If 1..15 or 17..31, an alternate time slot is used for D-channel.
+ *	In this case, the application must be able to handle this.
+ *	If -1 is given, the D-channel is disabled and all 31 slots can be used
+ *	for B-channel. (only for specific applications)
+ *	If you don't know how to use it, you don't need it!
+ *
+ * iomode:
+ *	NOTE: only one mode value must be given for every card.
+ *	-> See hfc_multi.h for HFC_IO_MODE_* values
+ *	By default, the IO mode is pci memory IO (MEMIO).
+ *	Some cards requre specific IO mode, so it cannot be changed.
+ *	It may be usefull to set IO mode to register io (REGIO) to solve
+ *	PCI bridge problems.
+ *	If unsure, don't give this parameter.
+ *
+ * clockdelay_nt:
+ *	NOTE: only one clockdelay_nt value must be given once for all cards.
+ *	Give the value of the clock control register (A_ST_CLK_DLY)
+ *	of the S/T interfaces in NT mode.
+ *	This register is needed for the TBR3 certification, so don't change it.
+ *
+ * clockdelay_te:
+ *	NOTE: only one clockdelay_te value must be given once
+ *	Give the value of the clock control register (A_ST_CLK_DLY)
+ *	of the S/T interfaces in TE mode.
+ *	This register is needed for the TBR3 certification, so don't change it.
+ */
+
+/*
+ * debug register access (never use this, it will flood your system log)
+ * #define HFC_REGISTER_DEBUG
+ */
+
+static const char *hfcmulti_revision = "2.00";
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/mISDNhw.h>
+#include <linux/mISDNdsp.h>
+
+/*
+#define IRQCOUNT_DEBUG
+#define IRQ_DEBUG
+*/
+
+#include "hfc_multi.h"
+#ifdef ECHOPREP
+#include "gaintab.h"
+#endif
+
+#define	MAX_CARDS	8
+#define	MAX_PORTS	(8 * MAX_CARDS)
+
+static LIST_HEAD(HFClist);
+static spinlock_t HFClock; /* global hfc list lock */
+
+static void ph_state_change(struct dchannel *);
+static void (*hfc_interrupt)(void);
+static void (*register_interrupt)(void);
+static int (*unregister_interrupt)(void);
+static int interrupt_registered;
+
+static struct hfc_multi *syncmaster;
+int plxsd_master; /* if we have a master card (yet) */
+static spinlock_t plx_lock; /* may not acquire other lock inside */
+EXPORT_SYMBOL(plx_lock);
+
+#define	TYP_E1		1
+#define	TYP_4S		4
+#define TYP_8S		8
+
+static int poll_timer = 6;	/* default = 128 samples = 16ms */
+/* number of POLL_TIMER interrupts for G2 timeout (ca 1s) */
+static int nt_t1_count[] = { 3840, 1920, 960, 480, 240, 120, 60, 30  };
+#define	CLKDEL_TE	0x0f	/* CLKDEL in TE mode */
+#define	CLKDEL_NT	0x6c	/* CLKDEL in NT mode
+				   (0x60 MUST be included!) */
+static u_char silence =	0xff;	/* silence by LAW */
+
+#define	DIP_4S	0x1		/* DIP Switches for Beronet 1S/2S/4S cards */
+#define	DIP_8S	0x2		/* DIP Switches for Beronet 8S+ cards */
+#define	DIP_E1	0x3		/* DIP Switches for Beronet E1 cards */
+
+/*
+ * module stuff
+ */
+
+static uint	type[MAX_CARDS];
+static uint	pcm[MAX_CARDS];
+static uint	dslot[MAX_CARDS];
+static uint	iomode[MAX_CARDS];
+static uint	port[MAX_PORTS];
+static uint	debug;
+static uint	poll;
+static uint	timer;
+static uint	clockdelay_te = CLKDEL_TE;
+static uint	clockdelay_nt = CLKDEL_NT;
+
+static int	HFC_cnt, Port_cnt, PCM_cnt = 99;
+
+MODULE_AUTHOR("Andreas Eversberg");
+MODULE_LICENSE("GPL");
+module_param(debug, uint, S_IRUGO | S_IWUSR);
+module_param(poll, uint, S_IRUGO | S_IWUSR);
+module_param(timer, uint, S_IRUGO | S_IWUSR);
+module_param(clockdelay_te, uint, S_IRUGO | S_IWUSR);
+module_param(clockdelay_nt, uint, S_IRUGO | S_IWUSR);
+module_param_array(type, uint, NULL, S_IRUGO | S_IWUSR);
+module_param_array(pcm, uint, NULL, S_IRUGO | S_IWUSR);
+module_param_array(dslot, uint, NULL, S_IRUGO | S_IWUSR);
+module_param_array(iomode, uint, NULL, S_IRUGO | S_IWUSR);
+module_param_array(port, uint, NULL, S_IRUGO | S_IWUSR);
+
+#ifdef HFC_REGISTER_DEBUG
+#define HFC_outb(hc, reg, val) \
+	(hc->HFC_outb(hc, reg, val, __func__, __LINE__))
+#define HFC_outb_nodebug(hc, reg, val) \
+	(hc->HFC_outb_nodebug(hc, reg, val, __func__, __LINE__))
+#define HFC_inb(hc, reg) \
+	(hc->HFC_inb(hc, reg, __func__, __LINE__))
+#define HFC_inb_nodebug(hc, reg) \
+	(hc->HFC_inb_nodebug(hc, reg, __func__, __LINE__))
+#define HFC_inw(hc, reg) \
+	(hc->HFC_inw(hc, reg, __func__, __LINE__))
+#define HFC_inw_nodebug(hc, reg) \
+	(hc->HFC_inw_nodebug(hc, reg, __func__, __LINE__))
+#define HFC_wait(hc) \
+	(hc->HFC_wait(hc, __func__, __LINE__))
+#define HFC_wait_nodebug(hc) \
+	(hc->HFC_wait_nodebug(hc, __func__, __LINE__))
+#else
+#define HFC_outb(hc, reg, val)		(hc->HFC_outb(hc, reg, val))
+#define HFC_outb_nodebug(hc, reg, val)	(hc->HFC_outb_nodebug(hc, reg, val))
+#define HFC_inb(hc, reg)		(hc->HFC_inb(hc, reg))
+#define HFC_inb_nodebug(hc, reg)	(hc->HFC_inb_nodebug(hc, reg))
+#define HFC_inw(hc, reg)		(hc->HFC_inw(hc, reg))
+#define HFC_inw_nodebug(hc, reg)	(hc->HFC_inw_nodebug(hc, reg))
+#define HFC_wait(hc)			(hc->HFC_wait(hc))
+#define HFC_wait_nodebug(hc)		(hc->HFC_wait_nodebug(hc))
+#endif
+
+/* HFC_IO_MODE_PCIMEM */
+static void
+#ifdef HFC_REGISTER_DEBUG
+HFC_outb_pcimem(struct hfc_multi *hc, u_char reg, u_char val,
+		const char *function, int line)
+#else
+HFC_outb_pcimem(struct hfc_multi *hc, u_char reg, u_char val)
+#endif
+{
+	writeb(val, (hc->pci_membase)+reg);
+}
+static u_char
+#ifdef HFC_REGISTER_DEBUG
+HFC_inb_pcimem(struct hfc_multi *hc, u_char reg, const char *function, int line)
+#else
+HFC_inb_pcimem(struct hfc_multi *hc, u_char reg)
+#endif
+{
+	return readb((hc->pci_membase)+reg);
+}
+static u_short
+#ifdef HFC_REGISTER_DEBUG
+HFC_inw_pcimem(struct hfc_multi *hc, u_char reg, const char *function, int line)
+#else
+HFC_inw_pcimem(struct hfc_multi *hc, u_char reg)
+#endif
+{
+	return readw((hc->pci_membase)+reg);
+}
+static void
+#ifdef HFC_REGISTER_DEBUG
+HFC_wait_pcimem(struct hfc_multi *hc, const char *function, int line)
+#else
+HFC_wait_pcimem(struct hfc_multi *hc)
+#endif
+{
+	while (readb((hc->pci_membase)+R_STATUS) & V_BUSY);
+}
+
+/* HFC_IO_MODE_REGIO */
+static void
+#ifdef HFC_REGISTER_DEBUG
+HFC_outb_regio(struct hfc_multi *hc, u_char reg, u_char val,
+	const char *function, int line)
+#else
+HFC_outb_regio(struct hfc_multi *hc, u_char reg, u_char val)
+#endif
+{
+	outb(reg, (hc->pci_iobase)+4);
+	outb(val, hc->pci_iobase);
+}
+static u_char
+#ifdef HFC_REGISTER_DEBUG
+HFC_inb_regio(struct hfc_multi *hc, u_char reg, const char *function, int line)
+#else
+HFC_inb_regio(struct hfc_multi *hc, u_char reg)
+#endif
+{
+	outb(reg, (hc->pci_iobase)+4);
+	return inb(hc->pci_iobase);
+}
+static u_short
+#ifdef HFC_REGISTER_DEBUG
+HFC_inw_regio(struct hfc_multi *hc, u_char reg, const char *function, int line)
+#else
+HFC_inw_regio(struct hfc_multi *hc, u_char reg)
+#endif
+{
+	outb(reg, (hc->pci_iobase)+4);
+	return inw(hc->pci_iobase);
+}
+static void
+#ifdef HFC_REGISTER_DEBUG
+HFC_wait_regio(struct hfc_multi *hc, const char *function, int line)
+#else
+HFC_wait_regio(struct hfc_multi *hc)
+#endif
+{
+	outb(R_STATUS, (hc->pci_iobase)+4);
+	while (inb(hc->pci_iobase) & V_BUSY);
+}
+
+#ifdef HFC_REGISTER_DEBUG
+static void
+HFC_outb_debug(struct hfc_multi *hc, u_char reg, u_char val,
+		const char *function, int line)
+{
+	char regname[256] = "", bits[9] = "xxxxxxxx";
+	int i;
+
+	i = -1;
+	while (hfc_register_names[++i].name) {
+		if (hfc_register_names[i].reg == reg)
+			strcat(regname, hfc_register_names[i].name);
+	}
+	if (regname[0] == '\0')
+		strcpy(regname, "register");
+
+	bits[7] = '0'+(!!(val&1));
+	bits[6] = '0'+(!!(val&2));
+	bits[5] = '0'+(!!(val&4));
+	bits[4] = '0'+(!!(val&8));
+	bits[3] = '0'+(!!(val&16));
+	bits[2] = '0'+(!!(val&32));
+	bits[1] = '0'+(!!(val&64));
+	bits[0] = '0'+(!!(val&128));
+	printk(KERN_DEBUG
+	    "HFC_outb(chip %d, %02x=%s, 0x%02x=%s); in %s() line %d\n",
+	    hc->id, reg, regname, val, bits, function, line);
+	HFC_outb_nodebug(hc, reg, val);
+}
+static u_char
+HFC_inb_debug(struct hfc_multi *hc, u_char reg, const char *function, int line)
+{
+	char regname[256] = "", bits[9] = "xxxxxxxx";
+	u_char val = HFC_inb_nodebug(hc, reg);
+	int i;
+
+	i = 0;
+	while (hfc_register_names[i++].name)
+		;
+	while (hfc_register_names[++i].name) {
+		if (hfc_register_names[i].reg == reg)
+			strcat(regname, hfc_register_names[i].name);
+	}
+	if (regname[0] == '\0')
+		strcpy(regname, "register");
+
+	bits[7] = '0'+(!!(val&1));
+	bits[6] = '0'+(!!(val&2));
+	bits[5] = '0'+(!!(val&4));
+	bits[4] = '0'+(!!(val&8));
+	bits[3] = '0'+(!!(val&16));
+	bits[2] = '0'+(!!(val&32));
+	bits[1] = '0'+(!!(val&64));
+	bits[0] = '0'+(!!(val&128));
+	printk(KERN_DEBUG
+	    "HFC_inb(chip %d, %02x=%s) = 0x%02x=%s; in %s() line %d\n",
+	    hc->id, reg, regname, val, bits, function, line);
+	return val;
+}
+static u_short
+HFC_inw_debug(struct hfc_multi *hc, u_char reg, const char *function, int line)
+{
+	char regname[256] = "";
+	u_short val = HFC_inw_nodebug(hc, reg);
+	int i;
+
+	i = 0;
+	while (hfc_register_names[i++].name)
+		;
+	while (hfc_register_names[++i].name) {
+		if (hfc_register_names[i].reg == reg)
+			strcat(regname, hfc_register_names[i].name);
+	}
+	if (regname[0] == '\0')
+		strcpy(regname, "register");
+
+	printk(KERN_DEBUG
+	    "HFC_inw(chip %d, %02x=%s) = 0x%04x; in %s() line %d\n",
+	    hc->id, reg, regname, val, function, line);
+	return val;
+}
+static void
+HFC_wait_debug(struct hfc_multi *hc, const char *function, int line)
+{
+	printk(KERN_DEBUG "HFC_wait(chip %d); in %s() line %d\n",
+	    hc->id, function, line);
+	HFC_wait_nodebug(hc);
+}
+#endif
+
+/* write fifo data (REGIO) */
+void
+write_fifo_regio(struct hfc_multi *hc, u_char *data, int len)
+{
+	outb(A_FIFO_DATA0, (hc->pci_iobase)+4);
+	while (len>>2) {
+		outl(*(u32 *)data, hc->pci_iobase);
+		data += 4;
+		len -= 4;
+	}
+	while (len>>1) {
+		outw(*(u16 *)data, hc->pci_iobase);
+		data += 2;
+		len -= 2;
+	}
+	while (len) {
+		outb(*data, hc->pci_iobase);
+		data++;
+		len--;
+	}
+}
+/* write fifo data (PCIMEM) */
+void
+write_fifo_pcimem(struct hfc_multi *hc, u_char *data, int len)
+{
+	while (len>>2) {
+		writel(*(u32 *)data, (hc->pci_membase)+A_FIFO_DATA0);
+		data += 4;
+		len -= 4;
+	}
+	while (len>>1) {
+		writew(*(u16 *)data, (hc->pci_membase)+A_FIFO_DATA0);
+		data += 2;
+		len -= 2;
+	}
+	while (len) {
+		writeb(*data, (hc->pci_membase)+A_FIFO_DATA0);
+		data++;
+		len--;
+	}
+}
+/* read fifo data (REGIO) */
+void
+read_fifo_regio(struct hfc_multi *hc, u_char *data, int len)
+{
+	outb(A_FIFO_DATA0, (hc->pci_iobase)+4);
+	while (len>>2) {
+		*(u32 *)data = inl(hc->pci_iobase);
+		data += 4;
+		len -= 4;
+	}
+	while (len>>1) {
+		*(u16 *)data = inw(hc->pci_iobase);
+		data += 2;
+		len -= 2;
+	}
+	while (len) {
+		*data = inb(hc->pci_iobase);
+		data++;
+		len--;
+	}
+}
+
+/* read fifo data (PCIMEM) */
+void
+read_fifo_pcimem(struct hfc_multi *hc, u_char *data, int len)
+{
+	while (len>>2) {
+		*(u32 *)data =
+			readl((hc->pci_membase)+A_FIFO_DATA0);
+		data += 4;
+		len -= 4;
+	}
+	while (len>>1) {
+		*(u16 *)data =
+			readw((hc->pci_membase)+A_FIFO_DATA0);
+		data += 2;
+		len -= 2;
+	}
+	while (len) {
+		*data = readb((hc->pci_membase)+A_FIFO_DATA0);
+		data++;
+		len--;
+	}
+}
+
+
+static void
+enable_hwirq(struct hfc_multi *hc)
+{
+	hc->hw.r_irq_ctrl |= V_GLOB_IRQ_EN;
+	HFC_outb(hc, R_IRQ_CTRL, hc->hw.r_irq_ctrl);
+}
+
+static void
+disable_hwirq(struct hfc_multi *hc)
+{
+	hc->hw.r_irq_ctrl &= ~((u_char)V_GLOB_IRQ_EN);
+	HFC_outb(hc, R_IRQ_CTRL, hc->hw.r_irq_ctrl);
+}
+
+#define	NUM_EC 2
+#define	MAX_TDM_CHAN 32
+
+
+inline void
+enablepcibridge(struct hfc_multi *c)
+{
+	HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x3); /* was _io before */
+}
+
+inline void
+disablepcibridge(struct hfc_multi *c)
+{
+	HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x2); /* was _io before */
+}
+
+inline unsigned char
+readpcibridge(struct hfc_multi *hc, unsigned char address)
+{
+	unsigned short cipv;
+	unsigned char data;
+
+	if (!hc->pci_iobase)
+		return 0;
+
+	/* slow down a PCI read access by 1 PCI clock cycle */
+	HFC_outb(hc, R_CTRL, 0x4); /*was _io before*/
+
+	if (address == 0)
+		cipv = 0x4000;
+	else
+		cipv = 0x5800;
+
+	/* select local bridge port address by writing to CIP port */
+	/* data = HFC_inb(c, cipv); * was _io before */
+	outw(cipv, hc->pci_iobase + 4);
+	data = inb(hc->pci_iobase);
+
+	/* restore R_CTRL for normal PCI read cycle speed */
+	HFC_outb(hc, R_CTRL, 0x0); /* was _io before */
+
+	return data;
+}
+
+inline void
+writepcibridge(struct hfc_multi *hc, unsigned char address, unsigned char data)
+{
+	unsigned short cipv;
+	unsigned int datav;
+
+	if (!hc->pci_iobase)
+		return;
+
+	if (address == 0)
+		cipv = 0x4000;
+	else
+		cipv = 0x5800;
+
+	/* select local bridge port address by writing to CIP port */
+	outw(cipv, hc->pci_iobase + 4);
+	/* define a 32 bit dword with 4 identical bytes for write sequence */
+	datav = data | ((__u32) data << 8) | ((__u32) data << 16) |
+	    ((__u32) data << 24);
+
+	/*
+	 * write this 32 bit dword to the bridge data port
+	 * this will initiate a write sequence of up to 4 writes to the same
+	 * address on the local bus interface the number of write accesses
+	 * is undefined but >=1 and depends on the next PCI transaction
+	 * during write sequence on the local bus
+	 */
+	outl(datav, hc->pci_iobase);
+}
+
+inline void
+cpld_set_reg(struct hfc_multi *hc, unsigned char reg)
+{
+	/* Do data pin read low byte */
+	HFC_outb(hc, R_GPIO_OUT1, reg);
+}
+
+inline void
+cpld_write_reg(struct hfc_multi *hc, unsigned char reg, unsigned char val)
+{
+	cpld_set_reg(hc, reg);
+
+	enablepcibridge(hc);
+	writepcibridge(hc, 1, val);
+	disablepcibridge(hc);
+
+	return;
+}
+
+inline unsigned char
+cpld_read_reg(struct hfc_multi *hc, unsigned char reg)
+{
+	unsigned char bytein;
+
+	cpld_set_reg(hc, reg);
+
+	/* Do data pin read low byte */
+	HFC_outb(hc, R_GPIO_OUT1, reg);
+
+	enablepcibridge(hc);
+	bytein = readpcibridge(hc, 1);
+	disablepcibridge(hc);
+
+	return bytein;
+}
+
+inline void
+vpm_write_address(struct hfc_multi *hc, unsigned short addr)
+{
+	cpld_write_reg(hc, 0, 0xff & addr);
+	cpld_write_reg(hc, 1, 0x01 & (addr >> 8));
+}
+
+inline unsigned short
+vpm_read_address(struct hfc_multi *c)
+{
+	unsigned short addr;
+	unsigned short highbit;
+
+	addr = cpld_read_reg(c, 0);
+	highbit = cpld_read_reg(c, 1);
+
+	addr = addr | (highbit << 8);
+
+	return addr & 0x1ff;
+}
+
+inline unsigned char
+vpm_in(struct hfc_multi *c, int which, unsigned short addr)
+{
+	unsigned char res;
+
+	vpm_write_address(c, addr);
+
+	if (!which)
+		cpld_set_reg(c, 2);
+	else
+		cpld_set_reg(c, 3);
+
+	enablepcibridge(c);
+	res = readpcibridge(c, 1);
+	disablepcibridge(c);
+
+	cpld_set_reg(c, 0);
+
+	return res;
+}
+
+inline void
+vpm_out(struct hfc_multi *c, int which, unsigned short addr,
+    unsigned char data)
+{
+	vpm_write_address(c, addr);
+
+	enablepcibridge(c);
+
+	if (!which)
+		cpld_set_reg(c, 2);
+	else
+		cpld_set_reg(c, 3);
+
+	writepcibridge(c, 1, data);
+
+	cpld_set_reg(c, 0);
+
+	disablepcibridge(c);
+
+	{
+	unsigned char regin;
+	regin = vpm_in(c, which, addr);
+	if (regin != data)
+		printk(KERN_DEBUG "Wrote 0x%x to register 0x%x but got back "
+			"0x%x\n", data, addr, regin);
+	}
+
+}
+
+
+void
+vpm_init(struct hfc_multi *wc)
+{
+	unsigned char reg;
+	unsigned int mask;
+	unsigned int i, x, y;
+	unsigned int ver;
+
+	for (x = 0; x < NUM_EC; x++) {
+		/* Setup GPIO's */
+		if (!x) {
+			ver = vpm_in(wc, x, 0x1a0);
+			printk(KERN_DEBUG "VPM: Chip %d: ver %02x\n", x, ver);
+		}
+
+		for (y = 0; y < 4; y++) {
+			vpm_out(wc, x, 0x1a8 + y, 0x00); /* GPIO out */
+			vpm_out(wc, x, 0x1ac + y, 0x00); /* GPIO dir */
+			vpm_out(wc, x, 0x1b0 + y, 0x00); /* GPIO sel */
+		}
+
+		/* Setup TDM path - sets fsync and tdm_clk as inputs */
+		reg = vpm_in(wc, x, 0x1a3); /* misc_con */
+		vpm_out(wc, x, 0x1a3, reg & ~2);
+
+		/* Setup Echo length (256 taps) */
+		vpm_out(wc, x, 0x022, 1);
+		vpm_out(wc, x, 0x023, 0xff);
+
+		/* Setup timeslots */
+		vpm_out(wc, x, 0x02f, 0x00);
+		mask = 0x02020202 << (x * 4);
+
+		/* Setup the tdm channel masks for all chips */
+		for (i = 0; i < 4; i++)
+			vpm_out(wc, x, 0x33 - i, (mask >> (i << 3)) & 0xff);
+
+		/* Setup convergence rate */
+		printk(KERN_DEBUG "VPM: A-law mode\n");
+		reg = 0x00 | 0x10 | 0x01;
+		vpm_out(wc, x, 0x20, reg);
+		printk(KERN_DEBUG "VPM reg 0x20 is %x\n", reg);
+		/*vpm_out(wc, x, 0x20, (0x00 | 0x08 | 0x20 | 0x10)); */
+
+		vpm_out(wc, x, 0x24, 0x02);
+		reg = vpm_in(wc, x, 0x24);
+		printk(KERN_DEBUG "NLP Thresh is set to %d (0x%x)\n", reg, reg);
+
+		/* Initialize echo cans */
+		for (i = 0; i < MAX_TDM_CHAN; i++) {
+			if (mask & (0x00000001 << i))
+				vpm_out(wc, x, i, 0x00);
+		}
+
+		/*
+		 * ARM arch at least disallows a udelay of
+		 * more than 2ms... it gives a fake "__bad_udelay"
+		 * reference at link-time.
+		 * long delays in kernel code are pretty sucky anyway
+		 * for now work around it using 5 x 2ms instead of 1 x 10ms
+		 */
+
+		udelay(2000);
+		udelay(2000);
+		udelay(2000);
+		udelay(2000);
+		udelay(2000);
+
+		/* Put in bypass mode */
+		for (i = 0; i < MAX_TDM_CHAN; i++) {
+			if (mask & (0x00000001 << i))
+				vpm_out(wc, x, i, 0x01);
+		}
+
+		/* Enable bypass */
+		for (i = 0; i < MAX_TDM_CHAN; i++) {
+			if (mask & (0x00000001 << i))
+				vpm_out(wc, x, 0x78 + i, 0x01);
+		}
+
+	}
+}
+
+void
+vpm_check(struct hfc_multi *hctmp)
+{
+	unsigned char gpi2;
+
+	gpi2 = HFC_inb(hctmp, R_GPI_IN2);
+
+	if ((gpi2 & 0x3) != 0x3)
+		printk(KERN_DEBUG "Got interrupt 0x%x from VPM!\n", gpi2);
+}
+
+
+/*
+ * Interface to enable/disable the HW Echocan
+ *
+ * these functions are called within a spin_lock_irqsave on
+ * the channel instance lock, so we are not disturbed by irqs
+ *
+ * we can later easily change the interface to make  other
+ * things configurable, for now we configure the taps
+ *
+ */
+
+void
+vpm_echocan_on(struct hfc_multi *hc, int ch, int taps)
+{
+	unsigned int timeslot;
+	unsigned int unit;
+	struct bchannel *bch = hc->chan[ch].bch;
+#ifdef TXADJ
+	int txadj = -4;
+	struct sk_buff *skb;
+#endif
+	if (hc->chan[ch].protocol != ISDN_P_B_RAW)
+		return;
+
+	if (!bch)
+		return;
+
+#ifdef TXADJ
+	skb = _alloc_mISDN_skb(PH_CONTROL_IND, HFC_VOL_CHANGE_TX,
+		sizeof(int), &txadj, GFP_ATOMIC);
+	if (skb)
+		recv_Bchannel_skb(bch, skb);
+#endif
+
+	timeslot = ((ch/4)*8) + ((ch%4)*4) + 1;
+	unit = ch % 4;
+
+	printk(KERN_NOTICE "vpm_echocan_on called taps [%d] on timeslot %d\n",
+	    taps, timeslot);
+
+	vpm_out(hc, unit, timeslot, 0x7e);
+}
+
+void
+vpm_echocan_off(struct hfc_multi *hc, int ch)
+{
+	unsigned int timeslot;
+	unsigned int unit;
+	struct bchannel *bch = hc->chan[ch].bch;
+#ifdef TXADJ
+	int txadj = 0;
+	struct sk_buff *skb;
+#endif
+
+	if (hc->chan[ch].protocol != ISDN_P_B_RAW)
+		return;
+
+	if (!bch)
+		return;
+
+#ifdef TXADJ
+	skb = _alloc_mISDN_skb(PH_CONTROL_IND, HFC_VOL_CHANGE_TX,
+		sizeof(int), &txadj, GFP_ATOMIC);
+	if (skb)
+		recv_Bchannel_skb(bch, skb);
+#endif
+
+	timeslot = ((ch/4)*8) + ((ch%4)*4) + 1;
+	unit = ch % 4;
+
+	printk(KERN_NOTICE "vpm_echocan_off called on timeslot %d\n",
+	    timeslot);
+	/* FILLME */
+	vpm_out(hc, unit, timeslot, 0x01);
+}
+
+
+/*
+ * Speech Design resync feature
+ * NOTE: This is called sometimes outside interrupt handler.
+ * We must lock irqsave, so no other interrupt (other card) will occurr!
+ * Also multiple interrupts may nest, so must lock each access (lists, card)!
+ */
+static inline void
+hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm)
+{
+	struct hfc_multi *hc, *next, *pcmmaster = 0;
+	u_int *plx_acc_32, pv;
+	u_long flags;
+
+	spin_lock_irqsave(&HFClock, flags);
+	spin_lock(&plx_lock); /* must be locked inside other locks */
+
+	if (debug & DEBUG_HFCMULTI_PLXSD)
+		printk(KERN_DEBUG "%s: RESYNC(syncmaster=0x%p)\n",
+			__func__, syncmaster);
+
+	/* select new master */
+	if (newmaster) {
+		if (debug & DEBUG_HFCMULTI_PLXSD)
+			printk(KERN_DEBUG "using provided controller\n");
+	} else {
+		list_for_each_entry_safe(hc, next, &HFClist, list) {
+			if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+				if (hc->syncronized) {
+					newmaster = hc;
+					break;
+				}
+			}
+		}
+	}
+
+	/* Disable sync of all cards */
+	list_for_each_entry_safe(hc, next, &HFClist, list) {
+		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+			plx_acc_32 = (u_int *)(hc->plx_membase+PLX_GPIOC);
+			pv = readl(plx_acc_32);
+			pv &= ~PLX_SYNC_O_EN;
+			writel(pv, plx_acc_32);
+			if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip)) {
+				pcmmaster = hc;
+				if (hc->type == 1) {
+					if (debug & DEBUG_HFCMULTI_PLXSD)
+						printk(KERN_DEBUG
+							"Schedule SYNC_I\n");
+					hc->e1_resync |= 1; /* get SYNC_I */
+				}
+			}
+		}
+	}
+
+	if (newmaster) {
+		hc = newmaster;
+		if (debug & DEBUG_HFCMULTI_PLXSD)
+			printk(KERN_DEBUG "id=%d (0x%p) = syncronized with "
+				"interface.\n", hc->id, hc);
+		/* Enable new sync master */
+		plx_acc_32 = (u_int *)(hc->plx_membase+PLX_GPIOC);
+		pv = readl(plx_acc_32);
+		pv |= PLX_SYNC_O_EN;
+		writel(pv, plx_acc_32);
+		/* switch to jatt PLL, if not disabled by RX_SYNC */
+		if (hc->type == 1 && !test_bit(HFC_CHIP_RX_SYNC, &hc->chip)) {
+			if (debug & DEBUG_HFCMULTI_PLXSD)
+				printk(KERN_DEBUG "Schedule jatt PLL\n");
+			hc->e1_resync |= 2; /* switch to jatt */
+		}
+	} else {
+		if (pcmmaster) {
+			hc = pcmmaster;
+			if (debug & DEBUG_HFCMULTI_PLXSD)
+				printk(KERN_DEBUG
+					"id=%d (0x%p) = PCM master syncronized "
+					"with QUARTZ\n", hc->id, hc);
+			if (hc->type == 1) {
+				/* Use the crystal clock for the PCM
+				   master card */
+				if (debug & DEBUG_HFCMULTI_PLXSD)
+					printk(KERN_DEBUG
+					    "Schedule QUARTZ for HFC-E1\n");
+				hc->e1_resync |= 4; /* switch quartz */
+			} else {
+				if (debug & DEBUG_HFCMULTI_PLXSD)
+					printk(KERN_DEBUG
+					    "QUARTZ is automatically "
+					    "enabled by HFC-%dS\n", hc->type);
+			}
+			plx_acc_32 = (u_int *)(hc->plx_membase+PLX_GPIOC);
+			pv = readl(plx_acc_32);
+			pv |= PLX_SYNC_O_EN;
+			writel(pv, plx_acc_32);
+		} else
+			if (!rm)
+				printk(KERN_ERR "%s no pcm master, this MUST "
+					"not happen!\n", __func__);
+	}
+	syncmaster = newmaster;
+
+	spin_unlock(&plx_lock);
+	spin_unlock_irqrestore(&HFClock, flags);
+}
+
+/* This must be called AND hc must be locked irqsave!!! */
+inline void
+plxsd_checksync(struct hfc_multi *hc, int rm)
+{
+	if (hc->syncronized) {
+		if (syncmaster == NULL) {
+			if (debug & DEBUG_HFCMULTI_PLXSD)
+				printk(KERN_WARNING "%s: GOT sync on card %d"
+					" (id=%d)\n", __func__, hc->id + 1,
+					hc->id);
+			hfcmulti_resync(hc, hc, rm);
+		}
+	} else {
+		if (syncmaster == hc) {
+			if (debug & DEBUG_HFCMULTI_PLXSD)
+				printk(KERN_WARNING "%s: LOST sync on card %d"
+					" (id=%d)\n", __func__, hc->id + 1,
+					hc->id);
+			hfcmulti_resync(hc, NULL, rm);
+		}
+	}
+}
+
+
+/*
+ * free hardware resources used by driver
+ */
+static void
+release_io_hfcmulti(struct hfc_multi *hc)
+{
+	u_int	*plx_acc_32, pv;
+	u_long	plx_flags;
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: entered\n", __func__);
+
+	/* soft reset also masks all interrupts */
+	hc->hw.r_cirm |= V_SRES;
+	HFC_outb(hc, R_CIRM, hc->hw.r_cirm);
+	udelay(1000);
+	hc->hw.r_cirm &= ~V_SRES;
+	HFC_outb(hc, R_CIRM, hc->hw.r_cirm);
+	udelay(1000); /* instead of 'wait' that may cause locking */
+
+	/* release Speech Design card, if PLX was initialized */
+	if (test_bit(HFC_CHIP_PLXSD, &hc->chip) && hc->plx_membase) {
+		if (debug & DEBUG_HFCMULTI_PLXSD)
+			printk(KERN_DEBUG "%s: release PLXSD card %d\n",
+			    __func__, hc->id + 1);
+		spin_lock_irqsave(&plx_lock, plx_flags);
+		plx_acc_32 = (u_int *)(hc->plx_membase+PLX_GPIOC);
+		writel(PLX_GPIOC_INIT, plx_acc_32);
+		pv = readl(plx_acc_32);
+		/* Termination off */
+		pv &= ~PLX_TERM_ON;
+		/* Disconnect the PCM */
+		pv |= PLX_SLAVE_EN_N;
+		pv &= ~PLX_MASTER_EN;
+		pv &= ~PLX_SYNC_O_EN;
+		/* Put the DSP in Reset */
+		pv &= ~PLX_DSP_RES_N;
+		writel(pv, plx_acc_32);
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_WARNING "%s: PCM off: PLX_GPIO=%x\n",
+				__func__, pv);
+		spin_unlock_irqrestore(&plx_lock, plx_flags);
+	}
+
+	/* disable memory mapped ports / io ports */
+	test_and_clear_bit(HFC_CHIP_PLXSD, &hc->chip); /* prevent resync */
+	pci_write_config_word(hc->pci_dev, PCI_COMMAND, 0);
+	if (hc->pci_membase)
+		iounmap((void *)hc->pci_membase);
+	if (hc->plx_membase)
+		iounmap((void *)hc->plx_membase);
+	if (hc->pci_iobase)
+		release_region(hc->pci_iobase, 8);
+
+	if (hc->pci_dev) {
+		pci_disable_device(hc->pci_dev);
+		pci_set_drvdata(hc->pci_dev, NULL);
+	}
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: done\n", __func__);
+}
+
+/*
+ * function called to reset the HFC chip. A complete software reset of chip
+ * and fifos is done. All configuration of the chip is done.
+ */
+
+static int
+init_chip(struct hfc_multi *hc)
+{
+	u_long			flags, val, val2 = 0, rev;
+	int			i, err = 0;
+	u_char			r_conf_en, rval;
+	u_int			*plx_acc_32, pv;
+	u_long			plx_flags, hfc_flags;
+	int			plx_count;
+	struct hfc_multi	*pos, *next, *plx_last_hc;
+
+	spin_lock_irqsave(&hc->lock, flags);
+	/* reset all registers */
+	memset(&hc->hw, 0, sizeof(struct hfcm_hw));
+
+	/* revision check */
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: entered\n", __func__);
+	val = HFC_inb(hc, R_CHIP_ID)>>4;
+	if (val != 0x8 && val != 0xc && val != 0xe) {
+		printk(KERN_INFO "HFC_multi: unknown CHIP_ID:%x\n", (u_int)val);
+		err = -EIO;
+		goto out;
+	}
+	rev = HFC_inb(hc, R_CHIP_RV);
+	printk(KERN_INFO
+	    "HFC_multi: detected HFC with chip ID=0x%lx revision=%ld%s\n",
+	    val, rev, (rev == 0) ? " (old FIFO handling)" : "");
+	if (rev == 0) {
+		test_and_set_bit(HFC_CHIP_REVISION0, &hc->chip);
+		printk(KERN_WARNING
+		    "HFC_multi: NOTE: Your chip is revision 0, "
+		    "ask Cologne Chip for update. Newer chips "
+		    "have a better FIFO handling. Old chips "
+		    "still work but may have slightly lower "
+		    "HDLC transmit performance.\n");
+	}
+	if (rev > 1) {
+		printk(KERN_WARNING "HFC_multi: WARNING: This driver doesn't "
+		    "consider chip revision = %ld. The chip / "
+		    "bridge may not work.\n", rev);
+	}
+
+	/* set s-ram size */
+	hc->Flen = 0x10;
+	hc->Zmin = 0x80;
+	hc->Zlen = 384;
+	hc->DTMFbase = 0x1000;
+	if (test_bit(HFC_CHIP_EXRAM_128, &hc->chip)) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: changing to 128K extenal RAM\n",
+			    __func__);
+		hc->hw.r_ctrl |= V_EXT_RAM;
+		hc->hw.r_ram_sz = 1;
+		hc->Flen = 0x20;
+		hc->Zmin = 0xc0;
+		hc->Zlen = 1856;
+		hc->DTMFbase = 0x2000;
+	}
+	if (test_bit(HFC_CHIP_EXRAM_512, &hc->chip)) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: changing to 512K extenal RAM\n",
+			    __func__);
+		hc->hw.r_ctrl |= V_EXT_RAM;
+		hc->hw.r_ram_sz = 2;
+		hc->Flen = 0x20;
+		hc->Zmin = 0xc0;
+		hc->Zlen = 8000;
+		hc->DTMFbase = 0x2000;
+	}
+	hc->max_trans = poll << 1;
+	if (hc->max_trans > hc->Zlen)
+		hc->max_trans = hc->Zlen;
+
+	/* Speech Design PLX bridge */
+	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+		if (debug & DEBUG_HFCMULTI_PLXSD)
+			printk(KERN_DEBUG "%s: initializing PLXSD card %d\n",
+			    __func__, hc->id + 1);
+		spin_lock_irqsave(&plx_lock, plx_flags);
+		plx_acc_32 = (u_int *)(hc->plx_membase+PLX_GPIOC);
+		writel(PLX_GPIOC_INIT, plx_acc_32);
+		pv = readl(plx_acc_32);
+		/* The first and the last cards are terminating the PCM bus */
+		pv |= PLX_TERM_ON; /* hc is currently the last */
+		/* Disconnect the PCM */
+		pv |= PLX_SLAVE_EN_N;
+		pv &= ~PLX_MASTER_EN;
+		pv &= ~PLX_SYNC_O_EN;
+		/* Put the DSP in Reset */
+		pv &= ~PLX_DSP_RES_N;
+		writel(pv, plx_acc_32);
+		spin_unlock_irqrestore(&plx_lock, plx_flags);
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_WARNING "%s: slave/term: PLX_GPIO=%x\n",
+				__func__, pv);
+		/*
+		 * If we are the 3rd PLXSD card or higher, we must turn
+		 * termination of last PLXSD card off.
+		 */
+		spin_lock_irqsave(&HFClock, hfc_flags);
+		plx_count = 0;
+		plx_last_hc = NULL;
+		list_for_each_entry_safe(pos, next, &HFClist, list) {
+			if (test_bit(HFC_CHIP_PLXSD, &pos->chip)) {
+				plx_count++;
+				if (pos != hc)
+					plx_last_hc = pos;
+			}
+		}
+		if (plx_count >= 3) {
+			if (debug & DEBUG_HFCMULTI_PLXSD)
+				printk(KERN_DEBUG "%s: card %d is between, so "
+					"we disable termination\n",
+				    __func__, plx_last_hc->id + 1);
+			spin_lock_irqsave(&plx_lock, plx_flags);
+			plx_acc_32 = (u_int *)(plx_last_hc->plx_membase
+					+ PLX_GPIOC);
+			pv = readl(plx_acc_32);
+			pv &= ~PLX_TERM_ON;
+			writel(pv, plx_acc_32);
+			spin_unlock_irqrestore(&plx_lock, plx_flags);
+			if (debug & DEBUG_HFCMULTI_INIT)
+			    printk(KERN_WARNING "%s: term off: PLX_GPIO=%x\n",
+					__func__, pv);
+		}
+		spin_unlock_irqrestore(&HFClock, hfc_flags);
+		hc->hw.r_pcm_md0 = V_F0_LEN; /* shift clock for DSP */
+	}
+
+	/* we only want the real Z2 read-pointer for revision > 0 */
+	if (!test_bit(HFC_CHIP_REVISION0, &hc->chip))
+		hc->hw.r_ram_sz |= V_FZ_MD;
+
+	/* select pcm mode */
+	if (test_bit(HFC_CHIP_PCM_SLAVE, &hc->chip)) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: setting PCM into slave mode\n",
+			    __func__);
+	} else
+	if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip) && !plxsd_master) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: setting PCM into master mode\n",
+			    __func__);
+		hc->hw.r_pcm_md0 |= V_PCM_MD;
+	} else {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: performing PCM auto detect\n",
+			    __func__);
+	}
+
+	/* soft reset */
+	HFC_outb(hc, R_CTRL, hc->hw.r_ctrl);
+	HFC_outb(hc, R_RAM_SZ, hc->hw.r_ram_sz);
+	HFC_outb(hc, R_FIFO_MD, 0);
+	hc->hw.r_cirm = V_SRES | V_HFCRES | V_PCMRES | V_STRES | V_RLD_EPR;
+	HFC_outb(hc, R_CIRM, hc->hw.r_cirm);
+	udelay(100);
+	hc->hw.r_cirm = 0;
+	HFC_outb(hc, R_CIRM, hc->hw.r_cirm);
+	udelay(100);
+	HFC_outb(hc, R_RAM_SZ, hc->hw.r_ram_sz);
+
+	/* Speech Design PLX bridge pcm and sync mode */
+	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+		spin_lock_irqsave(&plx_lock, plx_flags);
+		plx_acc_32 = (u_int *)(hc->plx_membase+PLX_GPIOC);
+		pv = readl(plx_acc_32);
+		/* Connect PCM */
+		if (hc->hw.r_pcm_md0 & V_PCM_MD) {
+			pv |= PLX_MASTER_EN | PLX_SLAVE_EN_N;
+			pv |= PLX_SYNC_O_EN;
+			if (debug & DEBUG_HFCMULTI_INIT)
+				printk(KERN_WARNING "%s: master: PLX_GPIO=%x\n",
+					__func__, pv);
+		} else {
+			pv &= ~(PLX_MASTER_EN | PLX_SLAVE_EN_N);
+			pv &= ~PLX_SYNC_O_EN;
+			if (debug & DEBUG_HFCMULTI_INIT)
+				printk(KERN_WARNING "%s: slave: PLX_GPIO=%x\n",
+					__func__, pv);
+		}
+		writel(pv, plx_acc_32);
+		spin_unlock_irqrestore(&plx_lock, plx_flags);
+	}
+
+	/* PCM setup */
+	HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0x90);
+	if (hc->slots == 32)
+		HFC_outb(hc, R_PCM_MD1, 0x00);
+	if (hc->slots == 64)
+		HFC_outb(hc, R_PCM_MD1, 0x10);
+	if (hc->slots == 128)
+		HFC_outb(hc, R_PCM_MD1, 0x20);
+	HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0xa0);
+	if (test_bit(HFC_CHIP_PLXSD, &hc->chip))
+		HFC_outb(hc, R_PCM_MD2, V_SYNC_SRC); /* sync via SYNC_I / O */
+	else
+		HFC_outb(hc, R_PCM_MD2, 0x00); /* sync from interface */
+	HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0x00);
+	for (i = 0; i < 256; i++) {
+		HFC_outb_nodebug(hc, R_SLOT, i);
+		HFC_outb_nodebug(hc, A_SL_CFG, 0);
+		HFC_outb_nodebug(hc, A_CONF, 0);
+		hc->slot_owner[i] = -1;
+	}
+
+	/* set clock speed */
+	if (test_bit(HFC_CHIP_CLOCK2, &hc->chip)) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG
+			    "%s: setting double clock\n", __func__);
+		HFC_outb(hc, R_BRG_PCM_CFG, V_PCM_CLK);
+	}
+
+	/* B410P GPIO */
+	if (test_bit(HFC_CHIP_B410P, &hc->chip)) {
+		printk(KERN_NOTICE "Setting GPIOs\n");
+		HFC_outb(hc, R_GPIO_SEL, 0x30);
+		HFC_outb(hc, R_GPIO_EN1, 0x3);
+		udelay(1000);
+		printk(KERN_NOTICE "calling vpm_init\n");
+		vpm_init(hc);
+	}
+
+	/* check if R_F0_CNT counts (8 kHz frame count) */
+	val = HFC_inb(hc, R_F0_CNTL);
+	val += HFC_inb(hc, R_F0_CNTH) << 8;
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG
+		    "HFC_multi F0_CNT %ld after reset\n", val);
+	spin_unlock_irqrestore(&hc->lock, flags);
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout((HZ/100)?:1); /* Timeout minimum 10ms */
+	spin_lock_irqsave(&hc->lock, flags);
+	val2 = HFC_inb(hc, R_F0_CNTL);
+	val2 += HFC_inb(hc, R_F0_CNTH) << 8;
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG
+			"HFC_multi F0_CNT %ld after 10 ms (1st try)\n",
+		    val2);
+	if (val2 >= val+8) { /* 1 ms */
+		/* it counts, so we keep the pcm mode */
+		if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip))
+			printk(KERN_INFO "controller is PCM bus MASTER\n");
+		else
+		if (test_bit(HFC_CHIP_PCM_SLAVE, &hc->chip))
+			printk(KERN_INFO "controller is PCM bus SLAVE\n");
+		else {
+			test_and_set_bit(HFC_CHIP_PCM_SLAVE, &hc->chip);
+			printk(KERN_INFO "controller is PCM bus SLAVE "
+				"(auto detected)\n");
+		}
+	} else {
+		/* does not count */
+		if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip)) {
+controller_fail:
+			printk(KERN_ERR "HFC_multi ERROR, getting no 125us "
+			    "pulse. Seems that controller fails.\n");
+			err = -EIO;
+			goto out;
+		}
+		if (test_bit(HFC_CHIP_PCM_SLAVE, &hc->chip)) {
+			printk(KERN_INFO "controller is PCM bus SLAVE "
+				"(ignoring missing PCM clock)\n");
+		} else {
+			/* only one pcm master */
+			if (test_bit(HFC_CHIP_PLXSD, &hc->chip)
+				&& plxsd_master) {
+				printk(KERN_ERR "HFC_multi ERROR, no clock "
+				    "on another Speech Design card found. "
+				    "Please be sure to connect PCM cable.\n");
+				err = -EIO;
+				goto out;
+			}
+			/* retry with master clock */
+			if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+				spin_lock_irqsave(&plx_lock, plx_flags);
+				plx_acc_32 = (u_int *)(hc->plx_membase +
+					PLX_GPIOC);
+				pv = readl(plx_acc_32);
+				pv |= PLX_MASTER_EN | PLX_SLAVE_EN_N;
+				pv |= PLX_SYNC_O_EN;
+				writel(pv, plx_acc_32);
+				spin_unlock_irqrestore(&plx_lock, plx_flags);
+				if (debug & DEBUG_HFCMULTI_INIT)
+				    printk(KERN_WARNING "%s: master: PLX_GPIO"
+					"=%x\n", __func__, pv);
+			}
+			hc->hw.r_pcm_md0 |= V_PCM_MD;
+			HFC_outb(hc, R_PCM_MD0, hc->hw.r_pcm_md0 | 0x00);
+			spin_unlock_irqrestore(&hc->lock, flags);
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_timeout((HZ/100)?:1); /* Timeout min. 10ms */
+			spin_lock_irqsave(&hc->lock, flags);
+			val2 = HFC_inb(hc, R_F0_CNTL);
+			val2 += HFC_inb(hc, R_F0_CNTH) << 8;
+			if (debug & DEBUG_HFCMULTI_INIT)
+				printk(KERN_DEBUG "HFC_multi F0_CNT %ld after "
+					"10 ms (2nd try)\n", val2);
+			if (val2 >= val+8) { /* 1 ms */
+				test_and_set_bit(HFC_CHIP_PCM_MASTER,
+					&hc->chip);
+				printk(KERN_INFO "controller is PCM bus MASTER "
+					"(auto detected)\n");
+			} else
+				goto controller_fail;
+		}
+	}
+
+	/* Release the DSP Reset */
+	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+		if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip))
+			plxsd_master = 1;
+		spin_lock_irqsave(&plx_lock, plx_flags);
+		plx_acc_32 = (u_int *)(hc->plx_membase+PLX_GPIOC);
+		pv = readl(plx_acc_32);
+		pv |=  PLX_DSP_RES_N;
+		writel(pv, plx_acc_32);
+		spin_unlock_irqrestore(&plx_lock, plx_flags);
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_WARNING "%s: reset off: PLX_GPIO=%x\n",
+				__func__, pv);
+	}
+
+	/* pcm id */
+	if (hc->pcm)
+		printk(KERN_INFO "controller has given PCM BUS ID %d\n",
+			hc->pcm);
+	else {
+		if (test_bit(HFC_CHIP_PCM_MASTER, &hc->chip)
+		 || test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+			PCM_cnt++; /* SD has proprietary bridging */
+		}
+		hc->pcm = PCM_cnt;
+		printk(KERN_INFO "controller has PCM BUS ID %d "
+			"(auto selected)\n", hc->pcm);
+	}
+
+	/* set up timer */
+	HFC_outb(hc, R_TI_WD, poll_timer);
+	hc->hw.r_irqmsk_misc |= V_TI_IRQMSK;
+
+	/*
+	 * set up 125us interrupt, only if function pointer is available
+	 * and module parameter timer is set
+	 */
+	if (timer && hfc_interrupt && register_interrupt) {
+		/* only one chip should use this interrupt */
+		timer = 0;
+		interrupt_registered = 1;
+		hc->hw.r_irqmsk_misc |= V_PROC_IRQMSK;
+		/* deactivate other interrupts in ztdummy */
+		register_interrupt();
+	}
+
+	/* set E1 state machine IRQ */
+	if (hc->type == 1)
+		hc->hw.r_irqmsk_misc |= V_STA_IRQMSK;
+
+	/* set DTMF detection */
+	if (test_bit(HFC_CHIP_DTMF, &hc->chip)) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: enabling DTMF detection "
+			    "for all B-channel\n", __func__);
+		hc->hw.r_dtmf = V_DTMF_EN | V_DTMF_STOP;
+		if (test_bit(HFC_CHIP_ULAW, &hc->chip))
+			hc->hw.r_dtmf |= V_ULAW_SEL;
+		HFC_outb(hc, R_DTMF_N, 102 - 1);
+		hc->hw.r_irqmsk_misc |= V_DTMF_IRQMSK;
+	}
+
+	/* conference engine */
+	if (test_bit(HFC_CHIP_ULAW, &hc->chip))
+		r_conf_en = V_CONF_EN | V_ULAW;
+	else
+		r_conf_en = V_CONF_EN;
+	HFC_outb(hc, R_CONF_EN, r_conf_en);
+
+	/* setting leds */
+	switch (hc->leds) {
+	case 1: /* HFC-E1 OEM */
+		if (test_bit(HFC_CHIP_WATCHDOG, &hc->chip))
+			HFC_outb(hc, R_GPIO_SEL, 0x32);
+		else
+			HFC_outb(hc, R_GPIO_SEL, 0x30);
+
+		HFC_outb(hc, R_GPIO_EN1, 0x0f);
+		HFC_outb(hc, R_GPIO_OUT1, 0x00);
+
+		HFC_outb(hc, R_GPIO_EN0, V_GPIO_EN2 | V_GPIO_EN3);
+		break;
+
+	case 2: /* HFC-4S OEM */
+	case 3:
+		HFC_outb(hc, R_GPIO_SEL, 0xf0);
+		HFC_outb(hc, R_GPIO_EN1, 0xff);
+		HFC_outb(hc, R_GPIO_OUT1, 0x00);
+		break;
+	}
+
+	/* set master clock */
+	if (hc->masterclk >= 0) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: setting ST master clock "
+			    "to port %d (0..%d)\n",
+			    __func__, hc->masterclk, hc->ports-1);
+		hc->hw.r_st_sync = hc->masterclk | V_AUTO_SYNC;
+		HFC_outb(hc, R_ST_SYNC, hc->hw.r_st_sync);
+	}
+
+	/* setting misc irq */
+	HFC_outb(hc, R_IRQMSK_MISC, hc->hw.r_irqmsk_misc);
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "r_irqmsk_misc.2: 0x%x\n",
+		    hc->hw.r_irqmsk_misc);
+
+	/* RAM access test */
+	HFC_outb(hc, R_RAM_ADDR0, 0);
+	HFC_outb(hc, R_RAM_ADDR1, 0);
+	HFC_outb(hc, R_RAM_ADDR2, 0);
+	for (i = 0; i < 256; i++) {
+		HFC_outb_nodebug(hc, R_RAM_ADDR0, i);
+		HFC_outb_nodebug(hc, R_RAM_DATA, ((i*3)&0xff));
+	}
+	for (i = 0; i < 256; i++) {
+		HFC_outb_nodebug(hc, R_RAM_ADDR0, i);
+		HFC_inb_nodebug(hc, R_RAM_DATA);
+		rval = HFC_inb_nodebug(hc, R_INT_DATA);
+		if (rval != ((i * 3) & 0xff)) {
+			printk(KERN_DEBUG
+			    "addr:%x val:%x should:%x\n", i, rval,
+			    (i * 3) & 0xff);
+			err++;
+		}
+	}
+	if (err) {
+		printk(KERN_DEBUG "aborting - %d RAM access errors\n", err);
+		err = -EIO;
+		goto out;
+	}
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: done\n", __func__);
+out:
+	spin_unlock_irqrestore(&hc->lock, flags);
+	return err;
+}
+
+
+/*
+ * control the watchdog
+ */
+static void
+hfcmulti_watchdog(struct hfc_multi *hc)
+{
+	hc->wdcount++;
+
+	if (hc->wdcount > 10) {
+		hc->wdcount = 0;
+		hc->wdbyte = hc->wdbyte == V_GPIO_OUT2 ?
+		    V_GPIO_OUT3 : V_GPIO_OUT2;
+
+	/* printk("Sending Watchdog Kill %x\n",hc->wdbyte); */
+		HFC_outb(hc, R_GPIO_EN0, V_GPIO_EN2 | V_GPIO_EN3);
+		HFC_outb(hc, R_GPIO_OUT0, hc->wdbyte);
+	}
+}
+
+
+
+/*
+ * output leds
+ */
+static void
+hfcmulti_leds(struct hfc_multi *hc)
+{
+	unsigned long lled;
+	unsigned long leddw;
+	int i, state, active, leds;
+	struct dchannel *dch;
+	int led[4];
+
+	hc->ledcount += poll;
+	if (hc->ledcount > 4096) {
+		hc->ledcount -= 4096;
+		hc->ledstate = 0xAFFEAFFE;
+	}
+
+	switch (hc->leds) {
+	case 1: /* HFC-E1 OEM */
+		/* 2 red blinking: NT mode deactivate
+		 * 2 red steady:   TE mode deactivate
+		 * left green:     L1 active
+		 * left red:       frame sync, but no L1
+		 * right green:    L2 active
+		 */
+		if (hc->chan[hc->dslot].sync != 2) { /* no frame sync */
+			if (hc->chan[hc->dslot].dch->dev.D.protocol
+				!= ISDN_P_NT_E1) {
+				led[0] = 1;
+				led[1] = 1;
+			} else if (hc->ledcount>>11) {
+				led[0] = 1;
+				led[1] = 1;
+			} else {
+				led[0] = 0;
+				led[1] = 0;
+			}
+			led[2] = 0;
+			led[3] = 0;
+		} else { /* with frame sync */
+			/* TODO make it work */
+			led[0] = 0;
+			led[1] = 0;
+			led[2] = 0;
+			led[3] = 1;
+		}
+		leds = (led[0] | (led[1]<<2) | (led[2]<<1) | (led[3]<<3))^0xF;
+			/* leds are inverted */
+		if (leds != (int)hc->ledstate) {
+			HFC_outb_nodebug(hc, R_GPIO_OUT1, leds);
+			hc->ledstate = leds;
+		}
+		break;
+
+	case 2: /* HFC-4S OEM */
+		/* red blinking = PH_DEACTIVATE NT Mode
+		 * red steady   = PH_DEACTIVATE TE Mode
+		 * green steady = PH_ACTIVATE
+		 */
+		for (i = 0; i < 4; i++) {
+			state = 0;
+			active = -1;
+			dch = hc->chan[(i << 2) | 2].dch;
+			if (dch) {
+				state = dch->state;
+				if (dch->dev.D.protocol == ISDN_P_NT_S0)
+					active = 3;
+				else
+					active = 7;
+			}
+			if (state) {
+				if (state == active) {
+					led[i] = 1; /* led green */
+				} else
+					if (dch->dev.D.protocol == ISDN_P_TE_S0)
+						/* TE mode: led red */
+						led[i] = 2;
+					else
+						if (hc->ledcount>>11)
+							/* led red */
+							led[i] = 2;
+						else
+							/* led off */
+							led[i] = 0;
+			} else
+				led[i] = 0; /* led off */
+		}
+		if (test_bit(HFC_CHIP_B410P, &hc->chip)) {
+			leds = 0;
+			for (i = 0; i < 4; i++) {
+				if (led[i] == 1) {
+					/*green*/
+					leds |= (0x2 << (i * 2));
+				} else if (led[i] == 2) {
+					/*red*/
+					leds |= (0x1 << (i * 2));
+				}
+			}
+			if (leds != (int)hc->ledstate) {
+				vpm_out(hc, 0, 0x1a8 + 3, leds);
+				hc->ledstate = leds;
+			}
+		} else {
+			leds = ((led[3] > 0) << 0) | ((led[1] > 0) << 1) |
+			    ((led[0] > 0) << 2) | ((led[2] > 0) << 3) |
+			    ((led[3] & 1) << 4) | ((led[1] & 1) << 5) |
+			    ((led[0] & 1) << 6) | ((led[2] & 1) << 7);
+			if (leds != (int)hc->ledstate) {
+				HFC_outb_nodebug(hc, R_GPIO_EN1, leds & 0x0F);
+				HFC_outb_nodebug(hc, R_GPIO_OUT1, leds >> 4);
+				hc->ledstate = leds;
+			}
+		}
+		break;
+
+	case 3: /* HFC 1S/2S Beronet */
+		/* red blinking = PH_DEACTIVATE NT Mode
+		 * red steady   = PH_DEACTIVATE TE Mode
+		 * green steady = PH_ACTIVATE
+		 */
+		for (i = 0; i < 2; i++) {
+			state = 0;
+			active = -1;
+			dch = hc->chan[(i << 2) | 2].dch;
+			if (dch) {
+				state = dch->state;
+				if (dch->dev.D.protocol == ISDN_P_NT_S0)
+					active = 3;
+				else
+					active = 7;
+			}
+			if (state) {
+				if (state == active) {
+					led[i] = 1; /* led green */
+				} else
+					if (dch->dev.D.protocol == ISDN_P_TE_S0)
+						/* TE mode: led red */
+						led[i] = 2;
+					else
+						if (hc->ledcount >> 11)
+							/* led red */
+							led[i] = 2;
+						else
+							/* led off */
+							led[i] = 0;
+			} else
+				led[i] = 0; /* led off */
+		}
+
+
+		leds = (led[0] > 0) | ((led[1] > 0)<<1) | ((led[0]&1)<<2)
+			| ((led[1]&1)<<3);
+		if (leds != (int)hc->ledstate) {
+			HFC_outb_nodebug(hc, R_GPIO_EN1,
+			    ((led[0] > 0) << 2) | ((led[1] > 0) << 3));
+			HFC_outb_nodebug(hc, R_GPIO_OUT1,
+			    ((led[0] & 1) << 2) | ((led[1] & 1) << 3));
+			hc->ledstate = leds;
+		}
+		break;
+	case 8: /* HFC 8S+ Beronet */
+		lled = 0;
+
+		for (i = 0; i < 8; i++) {
+			state = 0;
+			active = -1;
+			dch = hc->chan[(i << 2) | 2].dch;
+			if (dch) {
+				state = dch->state;
+				if (dch->dev.D.protocol == ISDN_P_NT_S0)
+					active = 3;
+				else
+					active = 7;
+			}
+			if (state) {
+				if (state == active) {
+					lled |= 0 << i;
+				} else
+					if (hc->ledcount >> 11)
+						lled |= 0 << i;
+					else
+						lled |= 1 << i;
+			} else
+				lled |= 1 << i;
+		}
+		leddw = lled << 24 | lled << 16 | lled << 8 | lled;
+		if (leddw != hc->ledstate) {
+			/* HFC_outb(hc, R_BRG_PCM_CFG, 1);
+			HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x3); */
+			/* was _io before */
+			HFC_outb_nodebug(hc, R_BRG_PCM_CFG, 1 | V_PCM_CLK);
+			outw(0x4000, hc->pci_iobase + 4);
+			outl(leddw, hc->pci_iobase);
+			HFC_outb_nodebug(hc, R_BRG_PCM_CFG, V_PCM_CLK);
+			hc->ledstate = leddw;
+		}
+		break;
+	}
+}
+/*
+ * read dtmf coefficients
+ */
+
+static void
+hfcmulti_dtmf(struct hfc_multi *hc)
+{
+	s32		*coeff;
+	u_int		mantissa;
+	int		co, ch;
+	struct bchannel	*bch = NULL;
+	u8		exponent;
+	int		dtmf = 0;
+	int		addr;
+	u16		w_float;
+	struct sk_buff	*skb;
+	struct mISDNhead *hh;
+
+	if (debug & DEBUG_HFCMULTI_DTMF)
+		printk(KERN_DEBUG "%s: dtmf detection irq\n", __func__);
+	for (ch = 0; ch <= 31; ch++) {
+		/* only process enabled B-channels */
+		bch = hc->chan[ch].bch;
+		if (!bch)
+			continue;
+		if (!hc->created[hc->chan[ch].port])
+			continue;
+		if (!test_bit(FLG_TRANSPARENT, &bch->Flags))
+			continue;
+		if (debug & DEBUG_HFCMULTI_DTMF)
+			printk(KERN_DEBUG "%s: dtmf channel %d:",
+				__func__, ch);
+		coeff = &(hc->chan[ch].coeff[hc->chan[ch].coeff_count * 16]);
+		dtmf = 1;
+		for (co = 0; co < 8; co++) {
+			/* read W(n-1) coefficient */
+			addr = hc->DTMFbase + ((co<<7) | (ch<<2));
+			HFC_outb_nodebug(hc, R_RAM_ADDR0, addr);
+			HFC_outb_nodebug(hc, R_RAM_ADDR1, addr>>8);
+			HFC_outb_nodebug(hc, R_RAM_ADDR2, (addr>>16)
+				| V_ADDR_INC);
+			w_float = HFC_inb_nodebug(hc, R_RAM_DATA);
+			w_float |= (HFC_inb_nodebug(hc, R_RAM_DATA) << 8);
+			if (debug & DEBUG_HFCMULTI_DTMF)
+				printk(" %04x", w_float);
+
+			/* decode float (see chip doc) */
+			mantissa = w_float & 0x0fff;
+			if (w_float & 0x8000)
+				mantissa |= 0xfffff000;
+			exponent = (w_float>>12) & 0x7;
+			if (exponent) {
+				mantissa ^= 0x1000;
+				mantissa <<= (exponent-1);
+			}
+
+			/* store coefficient */
+			coeff[co<<1] = mantissa;
+
+			/* read W(n) coefficient */
+			w_float = HFC_inb_nodebug(hc, R_RAM_DATA);
+			w_float |= (HFC_inb_nodebug(hc, R_RAM_DATA) << 8);
+			if (debug & DEBUG_HFCMULTI_DTMF)
+				printk(" %04x", w_float);
+
+			/* decode float (see chip doc) */
+			mantissa = w_float & 0x0fff;
+			if (w_float & 0x8000)
+				mantissa |= 0xfffff000;
+			exponent = (w_float>>12) & 0x7;
+			if (exponent) {
+				mantissa ^= 0x1000;
+				mantissa <<= (exponent-1);
+			}
+
+			/* store coefficient */
+			coeff[(co<<1)|1] = mantissa;
+		}
+		if (debug & DEBUG_HFCMULTI_DTMF)
+			printk("%s: DTMF ready %08x %08x %08x %08x "
+			    "%08x %08x %08x %08x\n", __func__,
+			    coeff[0], coeff[1], coeff[2], coeff[3],
+			    coeff[4], coeff[5], coeff[6], coeff[7]);
+		hc->chan[ch].coeff_count++;
+		if (hc->chan[ch].coeff_count == 8) {
+			hc->chan[ch].coeff_count = 0;
+			skb = mI_alloc_skb(512, GFP_ATOMIC);
+			if (!skb) {
+				printk(KERN_WARNING "%s: No memory for skb\n",
+				    __func__);
+				continue;
+			}
+			hh = mISDN_HEAD_P(skb);
+			hh->prim = PH_CONTROL_IND;
+			hh->id = DTMF_HFC_COEF;
+			memcpy(skb_put(skb, 512), hc->chan[ch].coeff, 512);
+			recv_Bchannel_skb(bch, skb);
+		}
+	}
+
+	/* restart DTMF processing */
+	hc->dtmf = dtmf;
+	if (dtmf)
+		HFC_outb_nodebug(hc, R_DTMF, hc->hw.r_dtmf | V_RST_DTMF);
+}
+
+
+/*
+ * fill fifo as much as possible
+ */
+
+static void
+hfcmulti_tx(struct hfc_multi *hc, int ch)
+{
+	int i, ii, temp, len = 0;
+	int Zspace, z1, z2; /* must be int for calculation */
+	int Fspace, f1, f2;
+	u_char *d;
+	int *txpending, slot_tx;
+	struct	bchannel *bch;
+	struct  dchannel *dch;
+	struct  sk_buff **sp = NULL;
+	int *idxp;
+
+	bch = hc->chan[ch].bch;
+	dch = hc->chan[ch].dch;
+	if ((!dch) && (!bch))
+		return;
+
+	txpending = &hc->chan[ch].txpending;
+	slot_tx = hc->chan[ch].slot_tx;
+	if (dch) {
+		if (!test_bit(FLG_ACTIVE, &dch->Flags))
+			return;
+		sp = &dch->tx_skb;
+		idxp = &dch->tx_idx;
+	} else {
+		if (!test_bit(FLG_ACTIVE, &bch->Flags))
+			return;
+		sp = &bch->tx_skb;
+		idxp = &bch->tx_idx;
+	}
+	if (*sp)
+		len = (*sp)->len;
+
+	if ((!len) && *txpending != 1)
+		return; /* no data */
+
+	if (test_bit(HFC_CHIP_B410P, &hc->chip) &&
+	    (hc->chan[ch].protocol == ISDN_P_B_RAW) &&
+	    (hc->chan[ch].slot_rx < 0) &&
+	    (hc->chan[ch].slot_tx < 0))
+		HFC_outb_nodebug(hc, R_FIFO, 0x20 | (ch << 1));
+	else
+		HFC_outb_nodebug(hc, R_FIFO, ch << 1);
+	HFC_wait_nodebug(hc);
+
+	if (*txpending == 2) {
+		/* reset fifo */
+		HFC_outb_nodebug(hc, R_INC_RES_FIFO, V_RES_F);
+		HFC_wait_nodebug(hc);
+		HFC_outb(hc, A_SUBCH_CFG, 0);
+		*txpending = 1;
+	}
+next_frame:
+	if (dch || test_bit(FLG_HDLC, &bch->Flags)) {
+		f1 = HFC_inb_nodebug(hc, A_F1);
+		f2 = HFC_inb_nodebug(hc, A_F2);
+		while (f2 != (temp = HFC_inb_nodebug(hc, A_F2))) {
+			if (debug & DEBUG_HFCMULTI_FIFO)
+				printk(KERN_DEBUG
+				    "%s(card %d): reread f2 because %d!=%d\n",
+				    __func__, hc->id + 1, temp, f2);
+			f2 = temp; /* repeat until F2 is equal */
+		}
+		Fspace = f2 - f1 - 1;
+		if (Fspace < 0)
+			Fspace += hc->Flen;
+		/*
+		 * Old FIFO handling doesn't give us the current Z2 read
+		 * pointer, so we cannot send the next frame before the fifo
+		 * is empty. It makes no difference except for a slightly
+		 * lower performance.
+		 */
+		if (test_bit(HFC_CHIP_REVISION0, &hc->chip)) {
+			if (f1 != f2)
+				Fspace = 0;
+			else
+				Fspace = 1;
+		}
+		/* one frame only for ST D-channels, to allow resending */
+		if (hc->type != 1 && dch) {
+			if (f1 != f2)
+				Fspace = 0;
+		}
+		/* F-counter full condition */
+		if (Fspace == 0)
+			return;
+	}
+	z1 = HFC_inw_nodebug(hc, A_Z1) - hc->Zmin;
+	z2 = HFC_inw_nodebug(hc, A_Z2) - hc->Zmin;
+	while (z2 != (temp = (HFC_inw_nodebug(hc, A_Z2) - hc->Zmin))) {
+		if (debug & DEBUG_HFCMULTI_FIFO)
+			printk(KERN_DEBUG "%s(card %d): reread z2 because "
+				"%d!=%d\n", __func__, hc->id + 1, temp, z2);
+		z2 = temp; /* repeat unti Z2 is equal */
+	}
+	Zspace = z2 - z1;
+	if (Zspace <= 0)
+		Zspace += hc->Zlen;
+	Zspace -= 4; /* keep not too full, so pointers will not overrun */
+	/* fill transparent data only to maxinum transparent load (minus 4) */
+	if (bch && test_bit(FLG_TRANSPARENT, &bch->Flags))
+		Zspace = Zspace - hc->Zlen + hc->max_trans;
+	if (Zspace <= 0) /* no space of 4 bytes */
+		return;
+
+	/* if no data */
+	if (!len) {
+		if (z1 == z2) { /* empty */
+			/* if done with FIFO audio data during PCM connection */
+			if (bch && (!test_bit(FLG_HDLC, &bch->Flags)) &&
+			    *txpending && slot_tx >= 0) {
+				if (debug & DEBUG_HFCMULTI_MODE)
+					printk(KERN_DEBUG
+					    "%s: reconnecting PCM due to no "
+					    "more FIFO data: channel %d "
+					    "slot_tx %d\n",
+					    __func__, ch, slot_tx);
+				/* connect slot */
+				HFC_outb(hc, A_CON_HDLC, 0xc0 | 0x00 |
+				    V_HDLC_TRP | V_IFF);
+				HFC_outb_nodebug(hc, R_FIFO, ch<<1 | 1);
+				HFC_wait_nodebug(hc);
+				HFC_outb(hc, A_CON_HDLC, 0xc0 | 0x00 |
+				    V_HDLC_TRP | V_IFF);
+				HFC_outb_nodebug(hc, R_FIFO, ch<<1);
+				HFC_wait_nodebug(hc);
+			}
+			*txpending = 0;
+		}
+		return; /* no data */
+	}
+
+	/* if audio data and connected slot */
+	if (bch && (!test_bit(FLG_HDLC, &bch->Flags)) && (!*txpending)
+		&& slot_tx >= 0) {
+		if (debug & DEBUG_HFCMULTI_MODE)
+			printk(KERN_DEBUG "%s: disconnecting PCM due to "
+			    "FIFO data: channel %d slot_tx %d\n",
+			    __func__, ch, slot_tx);
+		/* disconnect slot */
+		HFC_outb(hc, A_CON_HDLC, 0x80 | 0x00 | V_HDLC_TRP | V_IFF);
+		HFC_outb_nodebug(hc, R_FIFO, ch<<1 | 1);
+		HFC_wait_nodebug(hc);
+		HFC_outb(hc, A_CON_HDLC, 0x80 | 0x00 | V_HDLC_TRP | V_IFF);
+		HFC_outb_nodebug(hc, R_FIFO, ch<<1);
+		HFC_wait_nodebug(hc);
+	}
+	*txpending = 1;
+
+	/* show activity */
+	hc->activity[hc->chan[ch].port] = 1;
+
+	/* fill fifo to what we have left */
+	ii = len;
+	if (dch || test_bit(FLG_HDLC, &bch->Flags))
+		temp = 1;
+	else
+		temp = 0;
+	i = *idxp;
+	d = (*sp)->data + i;
+	if (ii - i > Zspace)
+		ii = Zspace + i;
+	if (debug & DEBUG_HFCMULTI_FIFO)
+		printk(KERN_DEBUG "%s(card %d): fifo(%d) has %d bytes space "
+		    "left (z1=%04x, z2=%04x) sending %d of %d bytes %s\n",
+			__func__, hc->id + 1, ch, Zspace, z1, z2, ii-i, len-i,
+			temp ? "HDLC":"TRANS");
+
+
+	/* Have to prep the audio data */
+	hc->write_fifo(hc, d, ii - i);
+	*idxp = ii;
+
+	/* if not all data has been written */
+	if (ii != len) {
+		/* NOTE: fifo is started by the calling function */
+		return;
+	}
+
+	/* if all data has been written, terminate frame */
+	if (dch || test_bit(FLG_HDLC, &bch->Flags)) {
+		/* increment f-counter */
+		HFC_outb_nodebug(hc, R_INC_RES_FIFO, V_INC_F);
+		HFC_wait_nodebug(hc);
+	}
+
+	/* send confirm, since get_net_bframe will not do it with trans */
+	if (bch && test_bit(FLG_TRANSPARENT, &bch->Flags))
+		confirm_Bsend(bch);
+
+	/* check for next frame */
+	dev_kfree_skb(*sp);
+	if (bch && get_next_bframe(bch)) { /* hdlc is confirmed here */
+		len = (*sp)->len;
+		goto next_frame;
+	}
+	if (dch && get_next_dframe(dch)) {
+		len = (*sp)->len;
+		goto next_frame;
+	}
+
+	/*
+	 * now we have no more data, so in case of transparent,
+	 * we set the last byte in fifo to 'silence' in case we will get
+	 * no more data at all. this prevents sending an undefined value.
+	 */
+	if (bch && test_bit(FLG_TRANSPARENT, &bch->Flags))
+		HFC_outb_nodebug(hc, A_FIFO_DATA0_NOINC, silence);
+}
+
+
+/* NOTE: only called if E1 card is in active state */
+static void
+hfcmulti_rx(struct hfc_multi *hc, int ch)
+{
+	int temp;
+	int Zsize, z1, z2 = 0; /* = 0, to make GCC happy */
+	int f1 = 0, f2 = 0; /* = 0, to make GCC happy */
+	int again = 0;
+	struct	bchannel *bch;
+	struct  dchannel *dch;
+	struct sk_buff	*skb, **sp = NULL;
+	int	maxlen;
+
+	bch = hc->chan[ch].bch;
+	dch = hc->chan[ch].dch;
+	if ((!dch) && (!bch))
+		return;
+	if (dch) {
+		if (!test_bit(FLG_ACTIVE, &dch->Flags))
+			return;
+		sp = &dch->rx_skb;
+		maxlen = dch->maxlen;
+	} else {
+		if (!test_bit(FLG_ACTIVE, &bch->Flags))
+			return;
+		sp = &bch->rx_skb;
+		maxlen = bch->maxlen;
+	}
+next_frame:
+	/* on first AND before getting next valid frame, R_FIFO must be written
+	   to. */
+	if (test_bit(HFC_CHIP_B410P, &hc->chip) &&
+	    (hc->chan[ch].protocol == ISDN_P_B_RAW) &&
+	    (hc->chan[ch].slot_rx < 0) &&
+	    (hc->chan[ch].slot_tx < 0))
+		HFC_outb_nodebug(hc, R_FIFO, 0x20 | (ch<<1) | 1);
+	else
+		HFC_outb_nodebug(hc, R_FIFO, (ch<<1)|1);
+	HFC_wait_nodebug(hc);
+
+	/* ignore if rx is off BUT change fifo (above) to start pending TX */
+	if (hc->chan[ch].rx_off)
+		return;
+
+	if (dch || test_bit(FLG_HDLC, &bch->Flags)) {
+		f1 = HFC_inb_nodebug(hc, A_F1);
+		while (f1 != (temp = HFC_inb_nodebug(hc, A_F1))) {
+			if (debug & DEBUG_HFCMULTI_FIFO)
+				printk(KERN_DEBUG
+				    "%s(card %d): reread f1 because %d!=%d\n",
+				    __func__, hc->id + 1, temp, f1);
+			f1 = temp; /* repeat until F1 is equal */
+		}
+		f2 = HFC_inb_nodebug(hc, A_F2);
+	}
+	z1 = HFC_inw_nodebug(hc, A_Z1) - hc->Zmin;
+	while (z1 != (temp = (HFC_inw_nodebug(hc, A_Z1) - hc->Zmin))) {
+		if (debug & DEBUG_HFCMULTI_FIFO)
+			printk(KERN_DEBUG "%s(card %d): reread z2 because "
+				"%d!=%d\n", __func__, hc->id + 1, temp, z2);
+		z1 = temp; /* repeat until Z1 is equal */
+	}
+	z2 = HFC_inw_nodebug(hc, A_Z2) - hc->Zmin;
+	Zsize = z1 - z2;
+	if ((dch || test_bit(FLG_HDLC, &bch->Flags)) && f1 != f2)
+		/* complete hdlc frame */
+		Zsize++;
+	if (Zsize < 0)
+		Zsize += hc->Zlen;
+	/* if buffer is empty */
+	if (Zsize <= 0)
+		return;
+
+	if (*sp == NULL) {
+		*sp = mI_alloc_skb(maxlen + 3, GFP_ATOMIC);
+		if (*sp == NULL) {
+			printk(KERN_DEBUG "%s: No mem for rx_skb\n",
+			    __func__);
+			return;
+		}
+	}
+	/* show activity */
+	hc->activity[hc->chan[ch].port] = 1;
+
+	/* empty fifo with what we have */
+	if (dch || test_bit(FLG_HDLC, &bch->Flags)) {
+		if (debug & DEBUG_HFCMULTI_FIFO)
+			printk(KERN_DEBUG "%s(card %d): fifo(%d) reading %d "
+			    "bytes (z1=%04x, z2=%04x) HDLC %s (f1=%d, f2=%d) "
+			    "got=%d (again %d)\n", __func__, hc->id + 1, ch,
+			    Zsize, z1, z2, (f1 == f2) ? "fragment" : "COMPLETE",
+			    f1, f2, Zsize + (*sp)->len, again);
+		/* HDLC */
+		if ((Zsize + (*sp)->len) > (maxlen + 3)) {
+			if (debug & DEBUG_HFCMULTI_FIFO)
+				printk(KERN_DEBUG
+				    "%s(card %d): hdlc-frame too large.\n",
+				    __func__, hc->id + 1);
+			skb_trim(*sp, 0);
+			HFC_outb_nodebug(hc, R_INC_RES_FIFO, V_RES_F);
+			HFC_wait_nodebug(hc);
+			return;
+		}
+
+		hc->read_fifo(hc, skb_put(*sp, Zsize), Zsize);
+
+		if (f1 != f2) {
+			/* increment Z2,F2-counter */
+			HFC_outb_nodebug(hc, R_INC_RES_FIFO, V_INC_F);
+			HFC_wait_nodebug(hc);
+			/* check size */
+			if ((*sp)->len < 4) {
+				if (debug & DEBUG_HFCMULTI_FIFO)
+					printk(KERN_DEBUG
+					    "%s(card %d): Frame below minimum "
+					    "size\n", __func__, hc->id + 1);
+				skb_trim(*sp, 0);
+				goto next_frame;
+			}
+			/* there is at least one complete frame, check crc */
+			if ((*sp)->data[(*sp)->len - 1]) {
+				if (debug & DEBUG_HFCMULTI_CRC)
+					printk(KERN_DEBUG
+					    "%s: CRC-error\n", __func__);
+				skb_trim(*sp, 0);
+				goto next_frame;
+			}
+			skb_trim(*sp, (*sp)->len - 3);
+			if ((*sp)->len < MISDN_COPY_SIZE) {
+				skb = *sp;
+				*sp = mI_alloc_skb(skb->len, GFP_ATOMIC);
+				if (*sp) {
+					memcpy(skb_put(*sp, skb->len),
+					    skb->data, skb->len);
+					skb_trim(skb, 0);
+				} else {
+					printk(KERN_DEBUG "%s: No mem\n",
+					    __func__);
+					*sp = skb;
+					skb = NULL;
+				}
+			} else {
+				skb = NULL;
+			}
+			if (debug & DEBUG_HFCMULTI_FIFO) {
+				printk(KERN_DEBUG "%s(card %d):",
+					__func__, hc->id + 1);
+				temp = 0;
+				while (temp < (*sp)->len)
+					printk(" %02x", (*sp)->data[temp++]);
+				printk("\n");
+			}
+			if (dch)
+				recv_Dchannel(dch);
+			else
+				recv_Bchannel(bch);
+			*sp = skb;
+			again++;
+			goto next_frame;
+		}
+		/* there is an incomplete frame */
+	} else {
+		/* transparent */
+		if (Zsize > skb_tailroom(*sp))
+			Zsize = skb_tailroom(*sp);
+		hc->read_fifo(hc, skb_put(*sp, Zsize), Zsize);
+		if (((*sp)->len) < MISDN_COPY_SIZE) {
+			skb = *sp;
+			*sp = mI_alloc_skb(skb->len, GFP_ATOMIC);
+			if (*sp) {
+				memcpy(skb_put(*sp, skb->len),
+				    skb->data, skb->len);
+				skb_trim(skb, 0);
+			} else {
+				printk(KERN_DEBUG "%s: No mem\n", __func__);
+				*sp = skb;
+				skb = NULL;
+			}
+		} else {
+			skb = NULL;
+		}
+		if (debug & DEBUG_HFCMULTI_FIFO)
+			printk(KERN_DEBUG
+			    "%s(card %d): fifo(%d) reading %d bytes "
+			    "(z1=%04x, z2=%04x) TRANS\n",
+				__func__, hc->id + 1, ch, Zsize, z1, z2);
+		/* only bch is transparent */
+		recv_Bchannel(bch);
+		*sp = skb;
+	}
+}
+
+
+/*
+ * Interrupt handler
+ */
+static void
+signal_state_up(struct dchannel *dch, int info, char *msg)
+{
+	struct sk_buff	*skb;
+	int		id, data = info;
+
+	if (debug & DEBUG_HFCMULTI_STATE)
+		printk(KERN_DEBUG "%s: %s\n", __func__, msg);
+
+	id = TEI_SAPI | (GROUP_TEI << 8); /* manager address */
+
+	skb = _alloc_mISDN_skb(MPH_INFORMATION_IND, id, sizeof(data), &data,
+		GFP_ATOMIC);
+	if (!skb)
+		return;
+	recv_Dchannel_skb(dch, skb);
+}
+
+static inline void
+handle_timer_irq(struct hfc_multi *hc)
+{
+	int		ch, temp;
+	struct dchannel	*dch;
+	u_long		flags;
+
+	/* process queued resync jobs */
+	if (hc->e1_resync) {
+		/* lock, so e1_resync gets not changed */
+		spin_lock_irqsave(&HFClock, flags);
+		if (hc->e1_resync & 1) {
+			if (debug & DEBUG_HFCMULTI_PLXSD)
+				printk(KERN_DEBUG "Enable SYNC_I\n");
+			HFC_outb(hc, R_SYNC_CTRL, V_EXT_CLK_SYNC);
+			/* disable JATT, if RX_SYNC is set */
+			if (test_bit(HFC_CHIP_RX_SYNC, &hc->chip))
+				HFC_outb(hc, R_SYNC_OUT, V_SYNC_E1_RX);
+		}
+		if (hc->e1_resync & 2) {
+			if (debug & DEBUG_HFCMULTI_PLXSD)
+				printk(KERN_DEBUG "Enable jatt PLL\n");
+			HFC_outb(hc, R_SYNC_CTRL, V_SYNC_OFFS);
+		}
+		if (hc->e1_resync & 4) {
+			if (debug & DEBUG_HFCMULTI_PLXSD)
+				printk(KERN_DEBUG
+				    "Enable QUARTZ for HFC-E1\n");
+			/* set jatt to quartz */
+			HFC_outb(hc, R_SYNC_CTRL, V_EXT_CLK_SYNC
+				| V_JATT_OFF);
+			/* switch to JATT, in case it is not already */
+			HFC_outb(hc, R_SYNC_OUT, 0);
+		}
+		hc->e1_resync = 0;
+		spin_unlock_irqrestore(&HFClock, flags);
+	}
+
+	if (hc->type != 1 || hc->e1_state == 1)
+		for (ch = 0; ch <= 31; ch++) {
+			if (hc->created[hc->chan[ch].port]) {
+				hfcmulti_tx(hc, ch);
+				/* fifo is started when switching to rx-fifo */
+				hfcmulti_rx(hc, ch);
+				if (hc->chan[ch].dch &&
+				    hc->chan[ch].nt_timer > -1) {
+					dch = hc->chan[ch].dch;
+					if (!(--hc->chan[ch].nt_timer)) {
+						schedule_event(dch,
+						    FLG_PHCHANGE);
+						if (debug &
+						    DEBUG_HFCMULTI_STATE)
+							printk(KERN_DEBUG
+							    "%s: nt_timer at "
+							    "state %x\n",
+							    __func__,
+							    dch->state);
+					}
+				}
+			}
+		}
+	if (hc->type == 1 && hc->created[0]) {
+		dch = hc->chan[hc->dslot].dch;
+		if (test_bit(HFC_CFG_REPORT_LOS, &hc->chan[hc->dslot].cfg)) {
+			/* LOS */
+			temp = HFC_inb_nodebug(hc, R_SYNC_STA) & V_SIG_LOS;
+			if (!temp && hc->chan[hc->dslot].los)
+				signal_state_up(dch, L1_SIGNAL_LOS_ON,
+				    "LOS detected");
+			if (temp && !hc->chan[hc->dslot].los)
+				signal_state_up(dch, L1_SIGNAL_LOS_OFF,
+				    "LOS gone");
+			hc->chan[hc->dslot].los = temp;
+		}
+		if (test_bit(HFC_CFG_REPORT_AIS, &hc->chan[hc->dslot].cfg)) {
+			/* AIS */
+			temp = HFC_inb_nodebug(hc, R_SYNC_STA) & V_AIS;
+			if (!temp && hc->chan[hc->dslot].ais)
+				signal_state_up(dch, L1_SIGNAL_AIS_ON,
+				    "AIS detected");
+			if (temp && !hc->chan[hc->dslot].ais)
+				signal_state_up(dch, L1_SIGNAL_AIS_OFF,
+				    "AIS gone");
+			hc->chan[hc->dslot].ais = temp;
+		}
+		if (test_bit(HFC_CFG_REPORT_SLIP, &hc->chan[hc->dslot].cfg)) {
+			/* SLIP */
+			temp = HFC_inb_nodebug(hc, R_SLIP) & V_FOSLIP_RX;
+			if (!temp && hc->chan[hc->dslot].slip_rx)
+				signal_state_up(dch, L1_SIGNAL_SLIP_RX,
+				    " bit SLIP detected RX");
+			hc->chan[hc->dslot].slip_rx = temp;
+			temp = HFC_inb_nodebug(hc, R_SLIP) & V_FOSLIP_TX;
+			if (!temp && hc->chan[hc->dslot].slip_tx)
+				signal_state_up(dch, L1_SIGNAL_SLIP_TX,
+				    " bit SLIP detected TX");
+			hc->chan[hc->dslot].slip_tx = temp;
+		}
+		if (test_bit(HFC_CFG_REPORT_RDI, &hc->chan[hc->dslot].cfg)) {
+			/* RDI */
+			temp = HFC_inb_nodebug(hc, R_RX_SL0_0) & V_A;
+			if (!temp && hc->chan[hc->dslot].rdi)
+				signal_state_up(dch, L1_SIGNAL_RDI_ON,
+				    "RDI detected");
+			if (temp && !hc->chan[hc->dslot].rdi)
+				signal_state_up(dch, L1_SIGNAL_RDI_OFF,
+				    "RDI gone");
+			hc->chan[hc->dslot].rdi = temp;
+		}
+		temp = HFC_inb_nodebug(hc, R_JATT_DIR);
+		switch (hc->chan[hc->dslot].sync) {
+		case 0:
+			if ((temp & 0x60) == 0x60) {
+				if (debug & DEBUG_HFCMULTI_SYNC)
+					printk(KERN_DEBUG
+					    "%s: (id=%d) E1 now "
+					    "in clock sync\n",
+					    __func__, hc->id);
+				HFC_outb(hc, R_RX_OFF,
+				    hc->chan[hc->dslot].jitter | V_RX_INIT);
+				HFC_outb(hc, R_TX_OFF,
+				    hc->chan[hc->dslot].jitter | V_RX_INIT);
+				hc->chan[hc->dslot].sync = 1;
+				goto check_framesync;
+			}
+			break;
+		case 1:
+			if ((temp & 0x60) != 0x60) {
+				if (debug & DEBUG_HFCMULTI_SYNC)
+					printk(KERN_DEBUG
+					    "%s: (id=%d) E1 "
+					    "lost clock sync\n",
+					    __func__, hc->id);
+				hc->chan[hc->dslot].sync = 0;
+				break;
+			}
+check_framesync:
+			temp = HFC_inb_nodebug(hc, R_SYNC_STA);
+			if (temp == 0x27) {
+				if (debug & DEBUG_HFCMULTI_SYNC)
+					printk(KERN_DEBUG
+					    "%s: (id=%d) E1 "
+					    "now in frame sync\n",
+					    __func__, hc->id);
+				hc->chan[hc->dslot].sync = 2;
+			}
+			break;
+		case 2:
+			if ((temp & 0x60) != 0x60) {
+				if (debug & DEBUG_HFCMULTI_SYNC)
+					printk(KERN_DEBUG
+					    "%s: (id=%d) E1 lost "
+					    "clock & frame sync\n",
+					    __func__, hc->id);
+				hc->chan[hc->dslot].sync = 0;
+				break;
+			}
+			temp = HFC_inb_nodebug(hc, R_SYNC_STA);
+			if (temp != 0x27) {
+				if (debug & DEBUG_HFCMULTI_SYNC)
+					printk(KERN_DEBUG
+					    "%s: (id=%d) E1 "
+					    "lost frame sync\n",
+					    __func__, hc->id);
+				hc->chan[hc->dslot].sync = 1;
+			}
+			break;
+		}
+	}
+
+	if (test_bit(HFC_CHIP_WATCHDOG, &hc->chip))
+		hfcmulti_watchdog(hc);
+
+	if (hc->leds)
+		hfcmulti_leds(hc);
+}
+
+static void
+ph_state_irq(struct hfc_multi *hc, u_char r_irq_statech)
+{
+	struct dchannel	*dch;
+	int		ch;
+	int		active;
+	u_char		st_status, temp;
+
+	/* state machine */
+	for (ch = 0; ch <= 31; ch++) {
+		if (hc->chan[ch].dch) {
+			dch = hc->chan[ch].dch;
+			if (r_irq_statech & 1) {
+				HFC_outb_nodebug(hc, R_ST_SEL,
+					hc->chan[ch].port);
+				/* undocumented: delay after R_ST_SEL */
+				udelay(1);
+				/* undocumented: status changes during read */
+				st_status = HFC_inb_nodebug(hc, A_ST_RD_STATE);
+				while (st_status != (temp =
+					HFC_inb_nodebug(hc, A_ST_RD_STATE))) {
+					if (debug & DEBUG_HFCMULTI_STATE)
+						printk(KERN_DEBUG "%s: reread "
+						    "STATE because %d!=%d\n",
+						    __func__, temp,
+						    st_status);
+					st_status = temp; /* repeat */
+				}
+
+				/* Speech Design TE-sync indication */
+				if (test_bit(HFC_CHIP_PLXSD, &hc->chip) &&
+					dch->dev.D.protocol == ISDN_P_TE_S0) {
+					if (st_status & V_FR_SYNC_ST)
+						hc->syncronized |=
+						    (1 << hc->chan[ch].port);
+					else
+						hc->syncronized &=
+						   ~(1 << hc->chan[ch].port);
+				}
+				dch->state = st_status & 0x0f;
+				if (dch->dev.D.protocol == ISDN_P_NT_S0)
+					active = 3;
+				else
+					active = 7;
+				if (dch->state == active) {
+					HFC_outb_nodebug(hc, R_FIFO,
+						(ch << 1) | 1);
+					HFC_wait_nodebug(hc);
+					HFC_outb_nodebug(hc,
+						R_INC_RES_FIFO, V_RES_F);
+					HFC_wait_nodebug(hc);
+					dch->tx_idx = 0;
+				}
+				schedule_event(dch, FLG_PHCHANGE);
+				if (debug & DEBUG_HFCMULTI_STATE)
+					printk(KERN_DEBUG
+					    "%s: S/T newstate %x port %d\n",
+					    __func__, dch->state,
+					    hc->chan[ch].port);
+			}
+			r_irq_statech >>= 1;
+		}
+	}
+	if (test_bit(HFC_CHIP_PLXSD, &hc->chip))
+		plxsd_checksync(hc, 0);
+}
+
+static void
+fifo_irq(struct hfc_multi *hc, int block)
+{
+	int	ch, j;
+	struct dchannel	*dch;
+	struct bchannel	*bch;
+	u_char r_irq_fifo_bl;
+
+	r_irq_fifo_bl = HFC_inb_nodebug(hc, R_IRQ_FIFO_BL0 + block);
+	j = 0;
+	while (j < 8) {
+		ch = (block << 2) + (j >> 1);
+		dch = hc->chan[ch].dch;
+		bch = hc->chan[ch].bch;
+		if (((!dch) && (!bch)) || (!hc->created[hc->chan[ch].port])) {
+			j += 2;
+			continue;
+		}
+		if (dch && (r_irq_fifo_bl & (1 << j)) &&
+		    test_bit(FLG_ACTIVE, &dch->Flags)) {
+			hfcmulti_tx(hc, ch);
+			/* start fifo */
+			HFC_outb_nodebug(hc, R_FIFO, 0);
+			HFC_wait_nodebug(hc);
+		}
+		if (bch && (r_irq_fifo_bl & (1 << j)) &&
+		    test_bit(FLG_ACTIVE, &bch->Flags)) {
+			hfcmulti_tx(hc, ch);
+			/* start fifo */
+			HFC_outb_nodebug(hc, R_FIFO, 0);
+			HFC_wait_nodebug(hc);
+		}
+		j++;
+		if (dch && (r_irq_fifo_bl & (1 << j)) &&
+		    test_bit(FLG_ACTIVE, &dch->Flags)) {
+			hfcmulti_rx(hc, ch);
+		}
+		if (bch && (r_irq_fifo_bl & (1 << j)) &&
+		    test_bit(FLG_ACTIVE, &bch->Flags)) {
+			hfcmulti_rx(hc, ch);
+		}
+		j++;
+	}
+}
+
+#ifdef IRQ_DEBUG
+int irqsem;
+#endif
+static irqreturn_t
+hfcmulti_interrupt(int intno, void *dev_id)
+{
+#ifdef IRQCOUNT_DEBUG
+	static int iq1 = 0, iq2 = 0, iq3 = 0, iq4 = 0,
+	    iq5 = 0, iq6 = 0, iqcnt = 0;
+#endif
+	static int		count;
+	struct hfc_multi	*hc = dev_id;
+	struct dchannel		*dch;
+	u_char			r_irq_statech, status, r_irq_misc, r_irq_oview;
+	int			i;
+	u_short			*plx_acc, wval;
+	u_char			e1_syncsta, temp;
+	u_long			flags;
+
+	if (!hc) {
+		printk(KERN_ERR "HFC-multi: Spurious interrupt!\n");
+		return IRQ_NONE;
+	}
+
+	spin_lock(&hc->lock);
+
+#ifdef IRQ_DEBUG
+	if (irqsem)
+		printk(KERN_ERR "irq for card %d during irq from "
+		"card %d, this is no bug.\n", hc->id + 1, irqsem);
+	irqsem = hc->id + 1;
+#endif
+
+	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+		spin_lock_irqsave(&plx_lock, flags);
+		plx_acc = (u_short *)(hc->plx_membase + PLX_INTCSR);
+		wval = readw(plx_acc);
+		spin_unlock_irqrestore(&plx_lock, flags);
+		if (!(wval & PLX_INTCSR_LINTI1_STATUS))
+			goto irq_notforus;
+	}
+
+	status = HFC_inb_nodebug(hc, R_STATUS);
+	r_irq_statech = HFC_inb_nodebug(hc, R_IRQ_STATECH);
+#ifdef IRQCOUNT_DEBUG
+	if (r_irq_statech)
+		iq1++;
+	if (status & V_DTMF_STA)
+		iq2++;
+	if (status & V_LOST_STA)
+		iq3++;
+	if (status & V_EXT_IRQSTA)
+		iq4++;
+	if (status & V_MISC_IRQSTA)
+		iq5++;
+	if (status & V_FR_IRQSTA)
+		iq6++;
+	if (iqcnt++ > 5000) {
+		printk(KERN_ERR "iq1:%x iq2:%x iq3:%x iq4:%x iq5:%x iq6:%x\n",
+		    iq1, iq2, iq3, iq4, iq5, iq6);
+		iqcnt = 0;
+	}
+#endif
+	if (!r_irq_statech &&
+	    !(status & (V_DTMF_STA | V_LOST_STA | V_EXT_IRQSTA |
+	    V_MISC_IRQSTA | V_FR_IRQSTA))) {
+		/* irq is not for us */
+		goto irq_notforus;
+	}
+	hc->irqcnt++;
+	if (r_irq_statech) {
+		if (hc->type != 1)
+			ph_state_irq(hc, r_irq_statech);
+	}
+	if (status & V_EXT_IRQSTA)
+		; /* external IRQ */
+	if (status & V_LOST_STA) {
+		/* LOST IRQ */
+		HFC_outb(hc, R_INC_RES_FIFO, V_RES_LOST); /* clear irq! */
+	}
+	if (status & V_MISC_IRQSTA) {
+		/* misc IRQ */
+		r_irq_misc = HFC_inb_nodebug(hc, R_IRQ_MISC);
+		if (r_irq_misc & V_STA_IRQ) {
+			if (hc->type == 1) {
+				/* state machine */
+				dch = hc->chan[hc->dslot].dch;
+				e1_syncsta = HFC_inb_nodebug(hc, R_SYNC_STA);
+				if (test_bit(HFC_CHIP_PLXSD, &hc->chip)
+				 && hc->e1_getclock) {
+					if (e1_syncsta & V_FR_SYNC_E1)
+						hc->syncronized = 1;
+					else
+						hc->syncronized = 0;
+				}
+				/* undocumented: status changes during read */
+				dch->state = HFC_inb_nodebug(hc, R_E1_RD_STA);
+				while (dch->state != (temp =
+					HFC_inb_nodebug(hc, R_E1_RD_STA))) {
+					if (debug & DEBUG_HFCMULTI_STATE)
+						printk(KERN_DEBUG "%s: reread "
+						    "STATE because %d!=%d\n",
+						    __func__, temp,
+						    dch->state);
+					dch->state = temp; /* repeat */
+				}
+				dch->state = HFC_inb_nodebug(hc, R_E1_RD_STA)
+					& 0x7;
+				schedule_event(dch, FLG_PHCHANGE);
+				if (debug & DEBUG_HFCMULTI_STATE)
+					printk(KERN_DEBUG
+					    "%s: E1 (id=%d) newstate %x\n",
+					    __func__, hc->id, dch->state);
+				if (test_bit(HFC_CHIP_PLXSD, &hc->chip))
+					plxsd_checksync(hc, 0);
+			}
+		}
+		if (r_irq_misc & V_TI_IRQ)
+			handle_timer_irq(hc);
+
+		if (r_irq_misc & V_DTMF_IRQ) {
+			/* -> DTMF IRQ */
+			hfcmulti_dtmf(hc);
+		}
+		/* TODO: REPLACE !!!! 125 us Interrupts are not acceptable  */
+		if (r_irq_misc & V_IRQ_PROC) {
+			/* IRQ every 125us */
+			count++;
+			/* generate 1kHz signal */
+			if (count == 8) {
+				if (hfc_interrupt)
+					hfc_interrupt();
+				count = 0;
+			}
+		}
+
+	}
+	if (status & V_FR_IRQSTA) {
+		/* FIFO IRQ */
+		r_irq_oview = HFC_inb_nodebug(hc, R_IRQ_OVIEW);
+		for (i = 0; i < 8; i++) {
+			if (r_irq_oview & (1 << i))
+				fifo_irq(hc, i);
+		}
+	}
+
+#ifdef IRQ_DEBUG
+	irqsem = 0;
+#endif
+	spin_unlock(&hc->lock);
+	return IRQ_HANDLED;
+
+irq_notforus:
+#ifdef IRQ_DEBUG
+	irqsem = 0;
+#endif
+	spin_unlock(&hc->lock);
+	return IRQ_NONE;
+}
+
+
+/*
+ * timer callback for D-chan busy resolution. Currently no function
+ */
+
+static void
+hfcmulti_dbusy_timer(struct hfc_multi *hc)
+{
+}
+
+
+/*
+ * activate/deactivate hardware for selected channels and mode
+ *
+ * configure B-channel with the given protocol
+ * ch eqals to the HFC-channel (0-31)
+ * ch is the number of channel (0-4,4-7,8-11,12-15,16-19,20-23,24-27,28-31
+ * for S/T, 1-31 for E1)
+ * the hdlc interrupts will be set/unset
+ */
+static int
+mode_hfcmulti(struct hfc_multi *hc, int ch, int protocol, int slot_tx,
+    int bank_tx, int slot_rx, int bank_rx)
+{
+	int flow_tx = 0, flow_rx = 0, routing = 0;
+	int oslot_tx, oslot_rx;
+	int conf;
+
+	if (ch < 0 || ch > 31)
+		return EINVAL;
+	oslot_tx = hc->chan[ch].slot_tx;
+	oslot_rx = hc->chan[ch].slot_rx;
+	conf = hc->chan[ch].conf;
+
+	if (debug & DEBUG_HFCMULTI_MODE)
+		printk(KERN_DEBUG
+		    "%s: card %d channel %d protocol %x slot old=%d new=%d "
+		    "bank new=%d (TX) slot old=%d new=%d bank new=%d (RX)\n",
+		    __func__, hc->id, ch, protocol, oslot_tx, slot_tx,
+		    bank_tx, oslot_rx, slot_rx, bank_rx);
+
+	if (oslot_tx >= 0 && slot_tx != oslot_tx) {
+		/* remove from slot */
+		if (debug & DEBUG_HFCMULTI_MODE)
+			printk(KERN_DEBUG "%s: remove from slot %d (TX)\n",
+			    __func__, oslot_tx);
+		if (hc->slot_owner[oslot_tx<<1] == ch) {
+			HFC_outb(hc, R_SLOT, oslot_tx << 1);
+			HFC_outb(hc, A_SL_CFG, 0);
+			HFC_outb(hc, A_CONF, 0);
+			hc->slot_owner[oslot_tx<<1] = -1;
+		} else {
+			if (debug & DEBUG_HFCMULTI_MODE)
+				printk(KERN_DEBUG
+				    "%s: we are not owner of this tx slot "
+				    "anymore, channel %d is.\n",
+				    __func__, hc->slot_owner[oslot_tx<<1]);
+		}
+	}
+
+	if (oslot_rx >= 0 && slot_rx != oslot_rx) {
+		/* remove from slot */
+		if (debug & DEBUG_HFCMULTI_MODE)
+			printk(KERN_DEBUG
+			    "%s: remove from slot %d (RX)\n",
+			    __func__, oslot_rx);
+		if (hc->slot_owner[(oslot_rx << 1) | 1] == ch) {
+			HFC_outb(hc, R_SLOT, (oslot_rx << 1) | V_SL_DIR);
+			HFC_outb(hc, A_SL_CFG, 0);
+			hc->slot_owner[(oslot_rx << 1) | 1] = -1;
+		} else {
+			if (debug & DEBUG_HFCMULTI_MODE)
+				printk(KERN_DEBUG
+				    "%s: we are not owner of this rx slot "
+				    "anymore, channel %d is.\n",
+				    __func__,
+				    hc->slot_owner[(oslot_rx << 1) | 1]);
+		}
+	}
+
+	if (slot_tx < 0) {
+		flow_tx = 0x80; /* FIFO->ST */
+		/* disable pcm slot */
+		hc->chan[ch].slot_tx = -1;
+		hc->chan[ch].bank_tx = 0;
+	} else {
+		/* set pcm slot */
+		if (hc->chan[ch].txpending)
+			flow_tx = 0x80; /* FIFO->ST */
+		else
+			flow_tx = 0xc0; /* PCM->ST */
+		/* put on slot */
+		routing = bank_tx ? 0xc0 : 0x80;
+		if (conf >= 0 || bank_tx > 1)
+			routing = 0x40; /* loop */
+		if (debug & DEBUG_HFCMULTI_MODE)
+			printk(KERN_DEBUG "%s: put channel %d to slot %d bank"
+			    " %d flow %02x routing %02x conf %d (TX)\n",
+			    __func__, ch, slot_tx, bank_tx,
+			    flow_tx, routing, conf);
+		HFC_outb(hc, R_SLOT, slot_tx << 1);
+		HFC_outb(hc, A_SL_CFG, (ch<<1) | routing);
+		HFC_outb(hc, A_CONF, (conf < 0) ? 0 : (conf | V_CONF_SL));
+		hc->slot_owner[slot_tx << 1] = ch;
+		hc->chan[ch].slot_tx = slot_tx;
+		hc->chan[ch].bank_tx = bank_tx;
+	}
+	if (slot_rx < 0) {
+		/* disable pcm slot */
+		flow_rx = 0x80; /* ST->FIFO */
+		hc->chan[ch].slot_rx = -1;
+		hc->chan[ch].bank_rx = 0;
+	} else {
+		/* set pcm slot */
+		if (hc->chan[ch].txpending)
+			flow_rx = 0x80; /* ST->FIFO */
+		else
+			flow_rx = 0xc0; /* ST->(FIFO,PCM) */
+		/* put on slot */
+		routing = bank_rx?0x80:0xc0; /* reversed */
+		if (conf >= 0 || bank_rx > 1)
+			routing = 0x40; /* loop */
+		if (debug & DEBUG_HFCMULTI_MODE)
+			printk(KERN_DEBUG "%s: put channel %d to slot %d bank"
+			    " %d flow %02x routing %02x conf %d (RX)\n",
+			    __func__, ch, slot_rx, bank_rx,
+			    flow_rx, routing, conf);
+		HFC_outb(hc, R_SLOT, (slot_rx<<1) | V_SL_DIR);
+		HFC_outb(hc, A_SL_CFG, (ch<<1) | V_CH_DIR | routing);
+		hc->slot_owner[(slot_rx<<1)|1] = ch;
+		hc->chan[ch].slot_rx = slot_rx;
+		hc->chan[ch].bank_rx = bank_rx;
+	}
+
+	switch (protocol) {
+	case (ISDN_P_NONE):
+		/* disable TX fifo */
+		HFC_outb(hc, R_FIFO, ch << 1);
+		HFC_wait(hc);
+		HFC_outb(hc, A_CON_HDLC, flow_tx | 0x00 | V_IFF);
+		HFC_outb(hc, A_SUBCH_CFG, 0);
+		HFC_outb(hc, A_IRQ_MSK, 0);
+		HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
+		HFC_wait(hc);
+		/* disable RX fifo */
+		HFC_outb(hc, R_FIFO, (ch<<1)|1);
+		HFC_wait(hc);
+		HFC_outb(hc, A_CON_HDLC, flow_rx | 0x00);
+		HFC_outb(hc, A_SUBCH_CFG, 0);
+		HFC_outb(hc, A_IRQ_MSK, 0);
+		HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
+		HFC_wait(hc);
+		if (hc->chan[ch].bch && hc->type != 1) {
+			hc->hw.a_st_ctrl0[hc->chan[ch].port] &=
+			    ((ch & 0x3) == 0)? ~V_B1_EN: ~V_B2_EN;
+			HFC_outb(hc, R_ST_SEL, hc->chan[ch].port);
+			/* undocumented: delay after R_ST_SEL */
+			udelay(1);
+			HFC_outb(hc, A_ST_CTRL0,
+			    hc->hw.a_st_ctrl0[hc->chan[ch].port]);
+		}
+		if (hc->chan[ch].bch) {
+			test_and_clear_bit(FLG_HDLC, &hc->chan[ch].bch->Flags);
+			test_and_clear_bit(FLG_TRANSPARENT,
+			    &hc->chan[ch].bch->Flags);
+		}
+		break;
+	case (ISDN_P_B_RAW): /* B-channel */
+
+		if (test_bit(HFC_CHIP_B410P, &hc->chip) &&
+		    (hc->chan[ch].slot_rx < 0) &&
+		    (hc->chan[ch].slot_tx < 0)) {
+
+			printk(KERN_DEBUG
+			    "Setting B-channel %d to echo cancelable "
+			    "state on PCM slot %d\n", ch,
+			    ((ch / 4) * 8) + ((ch % 4) * 4) + 1);
+			printk(KERN_DEBUG
+			    "Enabling pass through for channel\n");
+			vpm_out(hc, ch, ((ch / 4) * 8) +
+			    ((ch % 4) * 4) + 1, 0x01);
+			/* rx path */
+			/* S/T -> PCM */
+			HFC_outb(hc, R_FIFO, (ch << 1));
+			HFC_wait(hc);
+			HFC_outb(hc, A_CON_HDLC, 0xc0 | V_HDLC_TRP | V_IFF);
+			HFC_outb(hc, R_SLOT, (((ch / 4) * 8) +
+			    ((ch % 4) * 4) + 1) << 1);
+			HFC_outb(hc, A_SL_CFG, 0x80 | (ch << 1));
+
+			/* PCM -> FIFO */
+			HFC_outb(hc, R_FIFO, 0x20 | (ch << 1) | 1);
+			HFC_wait(hc);
+			HFC_outb(hc, A_CON_HDLC, 0x20 | V_HDLC_TRP | V_IFF);
+			HFC_outb(hc, A_SUBCH_CFG, 0);
+			HFC_outb(hc, A_IRQ_MSK, 0);
+			HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
+			HFC_wait(hc);
+			HFC_outb(hc, R_SLOT, ((((ch / 4) * 8) +
+			    ((ch % 4) * 4) + 1) << 1) | 1);
+			HFC_outb(hc, A_SL_CFG, 0x80 | 0x20 | (ch << 1) | 1);
+
+			/* tx path */
+			/* PCM -> S/T */
+			HFC_outb(hc, R_FIFO, (ch << 1) | 1);
+			HFC_wait(hc);
+			HFC_outb(hc, A_CON_HDLC, 0xc0 | V_HDLC_TRP | V_IFF);
+			HFC_outb(hc, R_SLOT, ((((ch / 4) * 8) +
+			    ((ch % 4) * 4)) << 1) | 1);
+			HFC_outb(hc, A_SL_CFG, 0x80 | 0x40 | (ch << 1) | 1);
+
+			/* FIFO -> PCM */
+			HFC_outb(hc, R_FIFO, 0x20 | (ch << 1));
+			HFC_wait(hc);
+			HFC_outb(hc, A_CON_HDLC, 0x20 | V_HDLC_TRP | V_IFF);
+			HFC_outb(hc, A_SUBCH_CFG, 0);
+			HFC_outb(hc, A_IRQ_MSK, 0);
+			HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
+			HFC_wait(hc);
+			/* tx silence */
+			HFC_outb_nodebug(hc, A_FIFO_DATA0_NOINC, silence);
+			HFC_outb(hc, R_SLOT, (((ch / 4) * 8) +
+			    ((ch % 4) * 4)) << 1);
+			HFC_outb(hc, A_SL_CFG, 0x80 | 0x20 | (ch << 1));
+		} else {
+			/* enable TX fifo */
+			HFC_outb(hc, R_FIFO, ch << 1);
+			HFC_wait(hc);
+			HFC_outb(hc, A_CON_HDLC, flow_tx | 0x00 |
+			    V_HDLC_TRP | V_IFF);
+			HFC_outb(hc, A_SUBCH_CFG, 0);
+			HFC_outb(hc, A_IRQ_MSK, 0);
+			HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
+			HFC_wait(hc);
+			/* tx silence */
+			HFC_outb_nodebug(hc, A_FIFO_DATA0_NOINC, silence);
+			/* enable RX fifo */
+			HFC_outb(hc, R_FIFO, (ch<<1)|1);
+			HFC_wait(hc);
+			HFC_outb(hc, A_CON_HDLC, flow_rx | 0x00 | V_HDLC_TRP);
+			HFC_outb(hc, A_SUBCH_CFG, 0);
+			HFC_outb(hc, A_IRQ_MSK, 0);
+			HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
+			HFC_wait(hc);
+		}
+		if (hc->type != 1) {
+			hc->hw.a_st_ctrl0[hc->chan[ch].port] |=
+			    ((ch & 0x3) == 0) ? V_B1_EN : V_B2_EN;
+			HFC_outb(hc, R_ST_SEL, hc->chan[ch].port);
+			/* undocumented: delay after R_ST_SEL */
+			udelay(1);
+			HFC_outb(hc, A_ST_CTRL0,
+			    hc->hw.a_st_ctrl0[hc->chan[ch].port]);
+		}
+		if (hc->chan[ch].bch)
+			test_and_set_bit(FLG_TRANSPARENT,
+			    &hc->chan[ch].bch->Flags);
+		break;
+	case (ISDN_P_B_HDLC): /* B-channel */
+	case (ISDN_P_TE_S0): /* D-channel */
+	case (ISDN_P_NT_S0):
+	case (ISDN_P_TE_E1):
+	case (ISDN_P_NT_E1):
+		/* enable TX fifo */
+		HFC_outb(hc, R_FIFO, ch<<1);
+		HFC_wait(hc);
+		if (hc->type == 1 || hc->chan[ch].bch) {
+			/* E1 or B-channel */
+			HFC_outb(hc, A_CON_HDLC, flow_tx | 0x04);
+			HFC_outb(hc, A_SUBCH_CFG, 0);
+		} else {
+			/* D-Channel without HDLC fill flags */
+			HFC_outb(hc, A_CON_HDLC, flow_tx | 0x04 | V_IFF);
+			HFC_outb(hc, A_SUBCH_CFG, 2);
+		}
+		HFC_outb(hc, A_IRQ_MSK, V_IRQ);
+		HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
+		HFC_wait(hc);
+		/* enable RX fifo */
+		HFC_outb(hc, R_FIFO, (ch<<1)|1);
+		HFC_wait(hc);
+		HFC_outb(hc, A_CON_HDLC, flow_rx | 0x04);
+		if (hc->type == 1 || hc->chan[ch].bch)
+			HFC_outb(hc, A_SUBCH_CFG, 0); /* full 8 bits */
+		else
+			HFC_outb(hc, A_SUBCH_CFG, 2); /* 2 bits dchannel */
+		HFC_outb(hc, A_IRQ_MSK, V_IRQ);
+		HFC_outb(hc, R_INC_RES_FIFO, V_RES_F);
+		HFC_wait(hc);
+		if (hc->chan[ch].bch) {
+			test_and_set_bit(FLG_HDLC, &hc->chan[ch].bch->Flags);
+			if (hc->type != 1) {
+				hc->hw.a_st_ctrl0[hc->chan[ch].port] |=
+				  ((ch&0x3) == 0) ? V_B1_EN : V_B2_EN;
+				HFC_outb(hc, R_ST_SEL, hc->chan[ch].port);
+				/* undocumented: delay after R_ST_SEL */
+				udelay(1);
+				HFC_outb(hc, A_ST_CTRL0,
+				  hc->hw.a_st_ctrl0[hc->chan[ch].port]);
+			}
+		}
+		break;
+	default:
+		printk(KERN_DEBUG "%s: protocol not known %x\n",
+		    __func__, protocol);
+		hc->chan[ch].protocol = ISDN_P_NONE;
+		return -ENOPROTOOPT;
+	}
+	hc->chan[ch].protocol = protocol;
+	return 0;
+}
+
+
+/*
+ * connect/disconnect PCM
+ */
+
+static void
+hfcmulti_pcm(struct hfc_multi *hc, int ch, int slot_tx, int bank_tx,
+    int slot_rx, int bank_rx)
+{
+	if (slot_rx < 0 || slot_rx < 0 || bank_tx < 0 || bank_rx < 0) {
+		/* disable PCM */
+		mode_hfcmulti(hc, ch, hc->chan[ch].protocol, -1, 0, -1, 0);
+		return;
+	}
+
+	/* enable pcm */
+	mode_hfcmulti(hc, ch, hc->chan[ch].protocol, slot_tx, bank_tx,
+		slot_rx, bank_rx);
+}
+
+/*
+ * set/disable conference
+ */
+
+static void
+hfcmulti_conf(struct hfc_multi *hc, int ch, int num)
+{
+	if (num >= 0 && num <= 7)
+		hc->chan[ch].conf = num;
+	else
+		hc->chan[ch].conf = -1;
+	mode_hfcmulti(hc, ch, hc->chan[ch].protocol, hc->chan[ch].slot_tx,
+	    hc->chan[ch].bank_tx, hc->chan[ch].slot_rx,
+	    hc->chan[ch].bank_rx);
+}
+
+
+/*
+ * set/disable sample loop
+ */
+
+/* NOTE: this function is experimental and therefore disabled */
+
+/*
+ * Layer 1 callback function
+ */
+static int
+hfcm_l1callback(struct dchannel *dch, u_int cmd)
+{
+	struct hfc_multi	*hc = dch->hw;
+	u_long	flags;
+
+	switch (cmd) {
+	case INFO3_P8:
+	case INFO3_P10:
+		break;
+	case HW_RESET_REQ:
+		/* start activation */
+		spin_lock_irqsave(&hc->lock, flags);
+		if (hc->type == 1) {
+			if (debug & DEBUG_HFCMULTI_MSG)
+				printk(KERN_DEBUG
+				    "%s: HW_RESET_REQ no BRI\n",
+				    __func__);
+		} else {
+			HFC_outb(hc, R_ST_SEL, hc->chan[dch->slot].port);
+			/* undocumented: delay after R_ST_SEL */
+			udelay(1);
+			HFC_outb(hc, A_ST_WR_STATE, V_ST_LD_STA | 3); /* F3 */
+			udelay(6); /* wait at least 5,21us */
+			HFC_outb(hc, A_ST_WR_STATE, 3);
+			HFC_outb(hc, A_ST_WR_STATE, 3 | (V_ST_ACT*3));
+				/* activate */
+		}
+		spin_unlock_irqrestore(&hc->lock, flags);
+		l1_event(dch->l1, HW_POWERUP_IND);
+		break;
+	case HW_DEACT_REQ:
+		/* start deactivation */
+		spin_lock_irqsave(&hc->lock, flags);
+		if (hc->type == 1) {
+			if (debug & DEBUG_HFCMULTI_MSG)
+				printk(KERN_DEBUG
+				    "%s: HW_DEACT_REQ no BRI\n",
+				    __func__);
+		} else {
+			HFC_outb(hc, R_ST_SEL, hc->chan[dch->slot].port);
+			/* undocumented: delay after R_ST_SEL */
+			udelay(1);
+			HFC_outb(hc, A_ST_WR_STATE, V_ST_ACT*2);
+				/* deactivate */
+			if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+				hc->syncronized &=
+				   ~(1 << hc->chan[dch->slot].port);
+				plxsd_checksync(hc, 0);
+			}
+		}
+		skb_queue_purge(&dch->squeue);
+		if (dch->tx_skb) {
+			dev_kfree_skb(dch->tx_skb);
+			dch->tx_skb = NULL;
+		}
+		dch->tx_idx = 0;
+		if (dch->rx_skb) {
+			dev_kfree_skb(dch->rx_skb);
+			dch->rx_skb = NULL;
+		}
+		test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
+		if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags))
+			del_timer(&dch->timer);
+		spin_unlock_irqrestore(&hc->lock, flags);
+		break;
+	case HW_POWERUP_REQ:
+		spin_lock_irqsave(&hc->lock, flags);
+		if (hc->type == 1) {
+			if (debug & DEBUG_HFCMULTI_MSG)
+				printk(KERN_DEBUG
+				    "%s: HW_POWERUP_REQ no BRI\n",
+				    __func__);
+		} else {
+			HFC_outb(hc, R_ST_SEL, hc->chan[dch->slot].port);
+			/* undocumented: delay after R_ST_SEL */
+			udelay(1);
+			HFC_outb(hc, A_ST_WR_STATE, 3 | 0x10); /* activate */
+			udelay(6); /* wait at least 5,21us */
+			HFC_outb(hc, A_ST_WR_STATE, 3); /* activate */
+		}
+		spin_unlock_irqrestore(&hc->lock, flags);
+		break;
+	case PH_ACTIVATE_IND:
+		test_and_set_bit(FLG_ACTIVE, &dch->Flags);
+		_queue_data(&dch->dev.D, cmd, MISDN_ID_ANY, 0, NULL,
+			GFP_ATOMIC);
+		break;
+	case PH_DEACTIVATE_IND:
+		test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
+		_queue_data(&dch->dev.D, cmd, MISDN_ID_ANY, 0, NULL,
+			GFP_ATOMIC);
+		break;
+	default:
+		if (dch->debug & DEBUG_HW)
+			printk(KERN_DEBUG "%s: unknown command %x\n",
+			    __func__, cmd);
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * Layer2 -> Layer 1 Transfer
+ */
+
+static int
+handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
+{
+	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
+	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
+	struct hfc_multi	*hc = dch->hw;
+	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
+	int			ret = -EINVAL;
+	unsigned int		id;
+	u_long			flags;
+
+	switch (hh->prim) {
+	case PH_DATA_REQ:
+		if (skb->len < 1)
+			break;
+		spin_lock_irqsave(&hc->lock, flags);
+		ret = dchannel_senddata(dch, skb);
+		if (ret > 0) { /* direct TX */
+			id = hh->id; /* skb can be freed */
+			hfcmulti_tx(hc, dch->slot);
+			ret = 0;
+			/* start fifo */
+			HFC_outb(hc, R_FIFO, 0);
+			HFC_wait(hc);
+			spin_unlock_irqrestore(&hc->lock, flags);
+			queue_ch_frame(ch, PH_DATA_CNF, id, NULL);
+		} else
+			spin_unlock_irqrestore(&hc->lock, flags);
+		return ret;
+	case PH_ACTIVATE_REQ:
+		if (dch->dev.D.protocol != ISDN_P_TE_S0) {
+			spin_lock_irqsave(&hc->lock, flags);
+			ret = 0;
+			if (debug & DEBUG_HFCMULTI_MSG)
+				printk(KERN_DEBUG
+				    "%s: PH_ACTIVATE port %d (0..%d)\n",
+				    __func__, hc->chan[dch->slot].port,
+				    hc->ports-1);
+			/* start activation */
+			if (hc->type == 1) {
+				ph_state_change(dch);
+				if (debug & DEBUG_HFCMULTI_STATE)
+					printk(KERN_DEBUG
+					    "%s: E1 report state %x \n",
+					    __func__, dch->state);
+			} else {
+				HFC_outb(hc, R_ST_SEL,
+				    hc->chan[dch->slot].port);
+				/* undocumented: delay after R_ST_SEL */
+				udelay(1);
+				HFC_outb(hc, A_ST_WR_STATE, V_ST_LD_STA | 1);
+				    /* G1 */
+				udelay(6); /* wait at least 5,21us */
+				HFC_outb(hc, A_ST_WR_STATE, 1);
+				HFC_outb(hc, A_ST_WR_STATE, 1 |
+				    (V_ST_ACT*3)); /* activate */
+				dch->state = 1;
+			}
+			spin_unlock_irqrestore(&hc->lock, flags);
+		} else
+			ret = l1_event(dch->l1, hh->prim);
+		break;
+	case PH_DEACTIVATE_REQ:
+		test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
+		if (dch->dev.D.protocol != ISDN_P_TE_S0) {
+			spin_lock_irqsave(&hc->lock, flags);
+			if (debug & DEBUG_HFCMULTI_MSG)
+				printk(KERN_DEBUG
+				    "%s: PH_DEACTIVATE port %d (0..%d)\n",
+				    __func__, hc->chan[dch->slot].port,
+				    hc->ports-1);
+			/* start deactivation */
+			if (hc->type == 1) {
+				if (debug & DEBUG_HFCMULTI_MSG)
+					printk(KERN_DEBUG
+					    "%s: PH_DEACTIVATE no BRI\n",
+					    __func__);
+			} else {
+				HFC_outb(hc, R_ST_SEL,
+				    hc->chan[dch->slot].port);
+				/* undocumented: delay after R_ST_SEL */
+				udelay(1);
+				HFC_outb(hc, A_ST_WR_STATE, V_ST_ACT * 2);
+				    /* deactivate */
+				dch->state = 1;
+			}
+			skb_queue_purge(&dch->squeue);
+			if (dch->tx_skb) {
+				dev_kfree_skb(dch->tx_skb);
+				dch->tx_skb = NULL;
+			}
+			dch->tx_idx = 0;
+			if (dch->rx_skb) {
+				dev_kfree_skb(dch->rx_skb);
+				dch->rx_skb = NULL;
+			}
+			test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
+			if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags))
+				del_timer(&dch->timer);
+#ifdef FIXME
+			if (test_and_clear_bit(FLG_L1_BUSY, &dch->Flags))
+				dchannel_sched_event(&hc->dch, D_CLEARBUSY);
+#endif
+			ret = 0;
+			spin_unlock_irqrestore(&hc->lock, flags);
+		} else
+			ret = l1_event(dch->l1, hh->prim);
+		break;
+	}
+	if (!ret)
+		dev_kfree_skb(skb);
+	return ret;
+}
+
+static void
+deactivate_bchannel(struct bchannel *bch)
+{
+	struct hfc_multi	*hc = bch->hw;
+	u_long			flags;
+
+	spin_lock_irqsave(&hc->lock, flags);
+	if (test_and_clear_bit(FLG_TX_NEXT, &bch->Flags)) {
+		dev_kfree_skb(bch->next_skb);
+		bch->next_skb = NULL;
+	}
+	if (bch->tx_skb) {
+		dev_kfree_skb(bch->tx_skb);
+		bch->tx_skb = NULL;
+	}
+	bch->tx_idx = 0;
+	if (bch->rx_skb) {
+		dev_kfree_skb(bch->rx_skb);
+		bch->rx_skb = NULL;
+	}
+	hc->chan[bch->slot].coeff_count = 0;
+	test_and_clear_bit(FLG_ACTIVE, &bch->Flags);
+	test_and_clear_bit(FLG_TX_BUSY, &bch->Flags);
+	hc->chan[bch->slot].rx_off = 0;
+	hc->chan[bch->slot].conf = -1;
+	mode_hfcmulti(hc, bch->slot, ISDN_P_NONE, -1, 0, -1, 0);
+	spin_unlock_irqrestore(&hc->lock, flags);
+}
+
+static int
+handle_bmsg(struct mISDNchannel *ch, struct sk_buff *skb)
+{
+	struct bchannel		*bch = container_of(ch, struct bchannel, ch);
+	struct hfc_multi	*hc = bch->hw;
+	int			ret = -EINVAL;
+	struct mISDNhead	*hh = mISDN_HEAD_P(skb);
+	unsigned int		id;
+	u_long			flags;
+
+	switch (hh->prim) {
+	case PH_DATA_REQ:
+		if (!skb->len)
+			break;
+		spin_lock_irqsave(&hc->lock, flags);
+		ret = bchannel_senddata(bch, skb);
+		if (ret > 0) { /* direct TX */
+			id = hh->id; /* skb can be freed */
+			hfcmulti_tx(hc, bch->slot);
+			ret = 0;
+			/* start fifo */
+			HFC_outb_nodebug(hc, R_FIFO, 0);
+			HFC_wait_nodebug(hc);
+			if (!test_bit(FLG_TRANSPARENT, &bch->Flags)) {
+				spin_unlock_irqrestore(&hc->lock, flags);
+				queue_ch_frame(ch, PH_DATA_CNF, id, NULL);
+			} else
+				spin_unlock_irqrestore(&hc->lock, flags);
+		} else
+			spin_unlock_irqrestore(&hc->lock, flags);
+		return ret;
+	case PH_ACTIVATE_REQ:
+		if (debug & DEBUG_HFCMULTI_MSG)
+			printk(KERN_DEBUG "%s: PH_ACTIVATE ch %d (0..32)\n",
+				__func__, bch->slot);
+		spin_lock_irqsave(&hc->lock, flags);
+		/* activate B-channel if not already activated */
+		if (!test_and_set_bit(FLG_ACTIVE, &bch->Flags)) {
+			hc->chan[bch->slot].txpending = 0;
+			ret = mode_hfcmulti(hc, bch->slot,
+				ch->protocol,
+				hc->chan[bch->slot].slot_tx,
+				hc->chan[bch->slot].bank_tx,
+				hc->chan[bch->slot].slot_rx,
+				hc->chan[bch->slot].bank_rx);
+			if (!ret) {
+				if (ch->protocol == ISDN_P_B_RAW && !hc->dtmf
+					&& test_bit(HFC_CHIP_DTMF, &hc->chip)) {
+					/* start decoder */
+					hc->dtmf = 1;
+					if (debug & DEBUG_HFCMULTI_DTMF)
+						printk(KERN_DEBUG
+						    "%s: start dtmf decoder\n",
+							__func__);
+					HFC_outb(hc, R_DTMF, hc->hw.r_dtmf |
+					    V_RST_DTMF);
+				}
+			}
+		} else
+			ret = 0;
+		spin_unlock_irqrestore(&hc->lock, flags);
+		if (!ret)
+			_queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY, 0, NULL,
+				GFP_KERNEL);
+		break;
+	case PH_CONTROL_REQ:
+		spin_lock_irqsave(&hc->lock, flags);
+		switch (hh->id) {
+		case HFC_SPL_LOOP_ON: /* set sample loop */
+			if (debug & DEBUG_HFCMULTI_MSG)
+			printk(KERN_DEBUG
+			    "%s: HFC_SPL_LOOP_ON (len = %d)\n",
+			    __func__, skb->len);
+			ret = 0;
+			break;
+		case HFC_SPL_LOOP_OFF: /* set silence */
+			if (debug & DEBUG_HFCMULTI_MSG)
+				printk(KERN_DEBUG "%s: HFC_SPL_LOOP_OFF\n",
+				    __func__);
+			ret = 0;
+			break;
+		default:
+			printk(KERN_ERR
+			     "%s: unknown PH_CONTROL_REQ info %x\n",
+			     __func__, hh->id);
+			ret = -EINVAL;
+		}
+		spin_unlock_irqrestore(&hc->lock, flags);
+		break;
+	case PH_DEACTIVATE_REQ:
+		deactivate_bchannel(bch); /* locked there */
+		_queue_data(ch, PH_DEACTIVATE_IND, MISDN_ID_ANY, 0, NULL,
+			GFP_KERNEL);
+		ret = 0;
+		break;
+	}
+	if (!ret)
+		dev_kfree_skb(skb);
+	return ret;
+}
+
+/*
+ * bchannel control function
+ */
+static int
+channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq)
+{
+	int			ret = 0;
+	struct dsp_features	*features =
+		(struct dsp_features *)(*((u_long *)&cq->p1));
+	struct hfc_multi	*hc = bch->hw;
+	int			slot_tx;
+	int			bank_tx;
+	int			slot_rx;
+	int			bank_rx;
+	int			num;
+
+	switch (cq->op) {
+	case MISDN_CTRL_GETOP:
+		cq->op = MISDN_CTRL_HFC_OP | MISDN_CTRL_HW_FEATURES_OP
+			| MISDN_CTRL_RX_OFF;
+		break;
+	case MISDN_CTRL_RX_OFF: /* turn off / on rx stream */
+		hc->chan[bch->slot].rx_off = !!cq->p1;
+		if (!hc->chan[bch->slot].rx_off) {
+			/* reset fifo on rx on */
+			HFC_outb_nodebug(hc, R_FIFO, (bch->slot << 1) | 1);
+			HFC_wait_nodebug(hc);
+			HFC_outb_nodebug(hc, R_INC_RES_FIFO, V_RES_F);
+			HFC_wait_nodebug(hc);
+		}
+		if (debug & DEBUG_HFCMULTI_MSG)
+			printk(KERN_DEBUG "%s: RX_OFF request (nr=%d off=%d)\n",
+			    __func__, bch->nr, hc->chan[bch->slot].rx_off);
+		break;
+	case MISDN_CTRL_HW_FEATURES: /* fill features structure */
+		if (debug & DEBUG_HFCMULTI_MSG)
+			printk(KERN_DEBUG "%s: HW_FEATURE request\n",
+			    __func__);
+		/* create confirm */
+		features->hfc_id = hc->id;
+		if (test_bit(HFC_CHIP_DTMF, &hc->chip))
+			features->hfc_dtmf = 1;
+		features->hfc_loops = 0;
+		if (test_bit(HFC_CHIP_B410P, &hc->chip)) {
+			features->hfc_echocanhw = 1;
+		} else {
+			features->pcm_id = hc->pcm;
+			features->pcm_slots = hc->slots;
+			features->pcm_banks = 2;
+		}
+		break;
+	case MISDN_CTRL_HFC_PCM_CONN: /* connect to pcm timeslot (0..N) */
+		slot_tx = cq->p1 & 0xff;
+		bank_tx = cq->p1 >> 8;
+		slot_rx = cq->p2 & 0xff;
+		bank_rx = cq->p2 >> 8;
+		if (debug & DEBUG_HFCMULTI_MSG)
+			printk(KERN_DEBUG
+			    "%s: HFC_PCM_CONN slot %d bank %d (TX) "
+			    "slot %d bank %d (RX)\n",
+			    __func__, slot_tx, bank_tx,
+			    slot_rx, bank_rx);
+		if (slot_tx < hc->slots && bank_tx <= 2 &&
+		    slot_rx < hc->slots && bank_rx <= 2)
+			hfcmulti_pcm(hc, bch->slot,
+			    slot_tx, bank_tx, slot_rx, bank_rx);
+		else {
+			printk(KERN_WARNING
+			    "%s: HFC_PCM_CONN slot %d bank %d (TX) "
+			    "slot %d bank %d (RX) out of range\n",
+			    __func__, slot_tx, bank_tx,
+			    slot_rx, bank_rx);
+			ret = -EINVAL;
+		}
+		break;
+	case MISDN_CTRL_HFC_PCM_DISC: /* release interface from pcm timeslot */
+		if (debug & DEBUG_HFCMULTI_MSG)
+			printk(KERN_DEBUG "%s: HFC_PCM_DISC\n",
+			    __func__);
+		hfcmulti_pcm(hc, bch->slot, -1, 0, -1, 0);
+		break;
+	case MISDN_CTRL_HFC_CONF_JOIN: /* join conference (0..7) */
+		num = cq->p1 & 0xff;
+		if (debug & DEBUG_HFCMULTI_MSG)
+			printk(KERN_DEBUG "%s: HFC_CONF_JOIN conf %d\n",
+			    __func__, num);
+		if (num <= 7)
+			hfcmulti_conf(hc, bch->slot, num);
+		else {
+			printk(KERN_WARNING
+			    "%s: HW_CONF_JOIN conf %d out of range\n",
+			    __func__, num);
+			ret = -EINVAL;
+		}
+		break;
+	case MISDN_CTRL_HFC_CONF_SPLIT: /* split conference */
+		if (debug & DEBUG_HFCMULTI_MSG)
+			printk(KERN_DEBUG "%s: HFC_CONF_SPLIT\n", __func__);
+		hfcmulti_conf(hc, bch->slot, -1);
+		break;
+	case MISDN_CTRL_HFC_ECHOCAN_ON:
+		if (debug & DEBUG_HFCMULTI_MSG)
+			printk(KERN_DEBUG "%s: HFC_ECHOCAN_ON\n", __func__);
+		if (test_bit(HFC_CHIP_B410P, &hc->chip))
+			vpm_echocan_on(hc, bch->slot, cq->p1);
+		else
+			ret = -EINVAL;
+		break;
+
+	case MISDN_CTRL_HFC_ECHOCAN_OFF:
+		if (debug & DEBUG_HFCMULTI_MSG)
+			printk(KERN_DEBUG "%s: HFC_ECHOCAN_OFF\n",
+				__func__);
+		if (test_bit(HFC_CHIP_B410P, &hc->chip))
+			vpm_echocan_off(hc, bch->slot);
+		else
+			ret = -EINVAL;
+		break;
+	default:
+		printk(KERN_WARNING "%s: unknown Op %x\n",
+		    __func__, cq->op);
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static int
+hfcm_bctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
+{
+	struct bchannel		*bch = container_of(ch, struct bchannel, ch);
+	struct hfc_multi	*hc = bch->hw;
+	int			err = -EINVAL;
+	u_long	flags;
+
+	if (bch->debug & DEBUG_HW)
+		printk(KERN_DEBUG "%s: cmd:%x %p\n",
+		    __func__, cmd, arg);
+	switch (cmd) {
+	case CLOSE_CHANNEL:
+		test_and_clear_bit(FLG_OPEN, &bch->Flags);
+		if (test_bit(FLG_ACTIVE, &bch->Flags))
+			deactivate_bchannel(bch); /* locked there */
+		ch->protocol = ISDN_P_NONE;
+		ch->peer = NULL;
+		module_put(THIS_MODULE);
+		err = 0;
+		break;
+	case CONTROL_CHANNEL:
+		spin_lock_irqsave(&hc->lock, flags);
+		err = channel_bctrl(bch, arg);
+		spin_unlock_irqrestore(&hc->lock, flags);
+		break;
+	default:
+		printk(KERN_WARNING "%s: unknown prim(%x)\n",
+			__func__, cmd);
+	}
+	return err;
+}
+
+/*
+ * handle D-channel events
+ *
+ * handle state change event
+ */
+static void
+ph_state_change(struct dchannel *dch)
+{
+	struct hfc_multi *hc = dch->hw;
+	int ch, i;
+
+	if (!dch) {
+		printk(KERN_WARNING "%s: ERROR given dch is NULL\n",
+		    __func__);
+		return;
+	}
+	ch = dch->slot;
+
+	if (hc->type == 1) {
+		if (dch->dev.D.protocol == ISDN_P_TE_E1) {
+			if (debug & DEBUG_HFCMULTI_STATE)
+				printk(KERN_DEBUG
+				    "%s: E1 TE (id=%d) newstate %x\n",
+				    __func__, hc->id, dch->state);
+		} else {
+			if (debug & DEBUG_HFCMULTI_STATE)
+				printk(KERN_DEBUG
+				    "%s: E1 NT (id=%d) newstate %x\n",
+				    __func__, hc->id, dch->state);
+		}
+		switch (dch->state) {
+		case (1):
+			if (hc->e1_state != 1) {
+			    for (i = 1; i <= 31; i++) {
+				/* reset fifos on e1 activation */
+				HFC_outb_nodebug(hc, R_FIFO, (i << 1) | 1);
+				HFC_wait_nodebug(hc);
+				HFC_outb_nodebug(hc,
+					R_INC_RES_FIFO, V_RES_F);
+				HFC_wait_nodebug(hc);
+			    }
+			}
+			test_and_set_bit(FLG_ACTIVE, &dch->Flags);
+			_queue_data(&dch->dev.D, PH_ACTIVATE_IND,
+			    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
+			break;
+
+		default:
+			if (hc->e1_state != 1)
+				return;
+			test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
+			_queue_data(&dch->dev.D, PH_DEACTIVATE_IND,
+			    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
+		}
+		hc->e1_state = dch->state;
+	} else {
+		if (dch->dev.D.protocol == ISDN_P_TE_S0) {
+			if (debug & DEBUG_HFCMULTI_STATE)
+				printk(KERN_DEBUG
+				    "%s: S/T TE newstate %x\n",
+				    __func__, dch->state);
+			switch (dch->state) {
+			case (0):
+				l1_event(dch->l1, HW_RESET_IND);
+				break;
+			case (3):
+				l1_event(dch->l1, HW_DEACT_IND);
+				break;
+			case (5):
+			case (8):
+				l1_event(dch->l1, ANYSIGNAL);
+				break;
+			case (6):
+				l1_event(dch->l1, INFO2);
+				break;
+			case (7):
+				l1_event(dch->l1, INFO4_P8);
+				break;
+			}
+		} else {
+			if (debug & DEBUG_HFCMULTI_STATE)
+				printk(KERN_DEBUG "%s: S/T NT newstate %x\n",
+				    __func__, dch->state);
+			switch (dch->state) {
+			case (2):
+				if (hc->chan[ch].nt_timer == 0) {
+					hc->chan[ch].nt_timer = -1;
+					HFC_outb(hc, R_ST_SEL,
+					    hc->chan[ch].port);
+					/* undocumented: delay after R_ST_SEL */
+					udelay(1);
+					HFC_outb(hc, A_ST_WR_STATE, 4 |
+					    V_ST_LD_STA); /* G4 */
+					udelay(6); /* wait at least 5,21us */
+					HFC_outb(hc, A_ST_WR_STATE, 4);
+					dch->state = 4;
+				} else {
+					/* one extra count for the next event */
+					hc->chan[ch].nt_timer =
+					    nt_t1_count[poll_timer] + 1;
+					HFC_outb(hc, R_ST_SEL,
+					    hc->chan[ch].port);
+					/* undocumented: delay after R_ST_SEL */
+					udelay(1);
+					/* allow G2 -> G3 transition */
+					HFC_outb(hc, A_ST_WR_STATE, 2 |
+					    V_SET_G2_G3);
+				}
+				break;
+			case (1):
+				hc->chan[ch].nt_timer = -1;
+				test_and_clear_bit(FLG_ACTIVE, &dch->Flags);
+				_queue_data(&dch->dev.D, PH_DEACTIVATE_IND,
+				    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
+				break;
+			case (4):
+				hc->chan[ch].nt_timer = -1;
+				break;
+			case (3):
+				hc->chan[ch].nt_timer = -1;
+				test_and_set_bit(FLG_ACTIVE, &dch->Flags);
+				_queue_data(&dch->dev.D, PH_ACTIVATE_IND,
+				    MISDN_ID_ANY, 0, NULL, GFP_ATOMIC);
+				break;
+			}
+		}
+	}
+}
+
+/*
+ * called for card mode init message
+ */
+
+static void
+hfcmulti_initmode(struct dchannel *dch)
+{
+	struct hfc_multi *hc = dch->hw;
+	u_char		a_st_wr_state, r_e1_wr_sta;
+	int		i, pt;
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: entered\n", __func__);
+
+	if (hc->type == 1) {
+		hc->chan[hc->dslot].slot_tx = -1;
+		hc->chan[hc->dslot].slot_rx = -1;
+		hc->chan[hc->dslot].conf = -1;
+		if (hc->dslot) {
+			mode_hfcmulti(hc, hc->dslot, dch->dev.D.protocol,
+				-1, 0, -1, 0);
+			dch->timer.function = (void *) hfcmulti_dbusy_timer;
+			dch->timer.data = (long) dch;
+			init_timer(&dch->timer);
+		}
+		for (i = 1; i <= 31; i++) {
+			if (i == hc->dslot)
+				continue;
+			hc->chan[i].slot_tx = -1;
+			hc->chan[i].slot_rx = -1;
+			hc->chan[i].conf = -1;
+			mode_hfcmulti(hc, i, ISDN_P_NONE, -1, 0, -1, 0);
+		}
+		/* E1 */
+		if (test_bit(HFC_CFG_REPORT_LOS, &hc->chan[hc->dslot].cfg)) {
+			HFC_outb(hc, R_LOS0, 255); /* 2 ms */
+			HFC_outb(hc, R_LOS1, 255); /* 512 ms */
+		}
+		if (test_bit(HFC_CFG_OPTICAL, &hc->chan[hc->dslot].cfg)) {
+			HFC_outb(hc, R_RX0, 0);
+			hc->hw.r_tx0 = 0 | V_OUT_EN;
+		} else {
+			HFC_outb(hc, R_RX0, 1);
+			hc->hw.r_tx0 = 1 | V_OUT_EN;
+		}
+		hc->hw.r_tx1 = V_ATX | V_NTRI;
+		HFC_outb(hc, R_TX0, hc->hw.r_tx0);
+		HFC_outb(hc, R_TX1, hc->hw.r_tx1);
+		HFC_outb(hc, R_TX_FR0, 0x00);
+		HFC_outb(hc, R_TX_FR1, 0xf8);
+
+		if (test_bit(HFC_CFG_CRC4, &hc->chan[hc->dslot].cfg))
+			HFC_outb(hc, R_TX_FR2, V_TX_MF | V_TX_E | V_NEG_E);
+
+		HFC_outb(hc, R_RX_FR0, V_AUTO_RESYNC | V_AUTO_RECO | 0);
+
+		if (test_bit(HFC_CFG_CRC4, &hc->chan[hc->dslot].cfg))
+			HFC_outb(hc, R_RX_FR1, V_RX_MF | V_RX_MF_SYNC);
+
+		if (dch->dev.D.protocol == ISDN_P_NT_E1) {
+			if (debug & DEBUG_HFCMULTI_INIT)
+				printk(KERN_DEBUG "%s: E1 port is NT-mode\n",
+				    __func__);
+			r_e1_wr_sta = 0; /* G0 */
+			hc->e1_getclock = 0;
+		} else {
+			if (debug & DEBUG_HFCMULTI_INIT)
+				printk(KERN_DEBUG "%s: E1 port is TE-mode\n",
+				    __func__);
+			r_e1_wr_sta = 0; /* F0 */
+			hc->e1_getclock = 1;
+		}
+		if (test_bit(HFC_CHIP_RX_SYNC, &hc->chip))
+			HFC_outb(hc, R_SYNC_OUT, V_SYNC_E1_RX);
+		else
+			HFC_outb(hc, R_SYNC_OUT, 0);
+		if (test_bit(HFC_CHIP_E1CLOCK_GET, &hc->chip))
+			hc->e1_getclock = 1;
+		if (test_bit(HFC_CHIP_E1CLOCK_PUT, &hc->chip))
+			hc->e1_getclock = 0;
+		if (test_bit(HFC_CHIP_PCM_SLAVE, &hc->chip)) {
+			/* SLAVE (clock master) */
+			if (debug & DEBUG_HFCMULTI_INIT)
+				printk(KERN_DEBUG
+				    "%s: E1 port is clock master "
+				    "(clock from PCM)\n", __func__);
+			HFC_outb(hc, R_SYNC_CTRL, V_EXT_CLK_SYNC | V_PCM_SYNC);
+		} else {
+			if (hc->e1_getclock) {
+				/* MASTER (clock slave) */
+				if (debug & DEBUG_HFCMULTI_INIT)
+					printk(KERN_DEBUG
+					    "%s: E1 port is clock slave "
+					    "(clock to PCM)\n", __func__);
+				HFC_outb(hc, R_SYNC_CTRL, V_SYNC_OFFS);
+			} else {
+				/* MASTER (clock master) */
+				if (debug & DEBUG_HFCMULTI_INIT)
+					printk(KERN_DEBUG "%s: E1 port is "
+					    "clock master "
+					    "(clock from QUARTZ)\n",
+					    __func__);
+				HFC_outb(hc, R_SYNC_CTRL, V_EXT_CLK_SYNC |
+				    V_PCM_SYNC | V_JATT_OFF);
+				HFC_outb(hc, R_SYNC_OUT, 0);
+			}
+		}
+		HFC_outb(hc, R_JATT_ATT, 0x9c); /* undoc register */
+		HFC_outb(hc, R_PWM_MD, V_PWM0_MD);
+		HFC_outb(hc, R_PWM0, 0x50);
+		HFC_outb(hc, R_PWM1, 0xff);
+		/* state machine setup */
+		HFC_outb(hc, R_E1_WR_STA, r_e1_wr_sta | V_E1_LD_STA);
+		udelay(6); /* wait at least 5,21us */
+		HFC_outb(hc, R_E1_WR_STA, r_e1_wr_sta);
+		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+			hc->syncronized = 0;
+			plxsd_checksync(hc, 0);
+		}
+	} else {
+		i = dch->slot;
+		hc->chan[i].slot_tx = -1;
+		hc->chan[i].slot_rx = -1;
+		hc->chan[i].conf = -1;
+		mode_hfcmulti(hc, i, dch->dev.D.protocol, -1, 0, -1, 0);
+		dch->timer.function = (void *)hfcmulti_dbusy_timer;
+		dch->timer.data = (long) dch;
+		init_timer(&dch->timer);
+		hc->chan[i - 2].slot_tx = -1;
+		hc->chan[i - 2].slot_rx = -1;
+		hc->chan[i - 2].conf = -1;
+		mode_hfcmulti(hc, i - 2, ISDN_P_NONE, -1, 0, -1, 0);
+		hc->chan[i - 1].slot_tx = -1;
+		hc->chan[i - 1].slot_rx = -1;
+		hc->chan[i - 1].conf = -1;
+		mode_hfcmulti(hc, i - 1, ISDN_P_NONE, -1, 0, -1, 0);
+		/* ST */
+		pt = hc->chan[i].port;
+		/* select interface */
+		HFC_outb(hc, R_ST_SEL, pt);
+		/* undocumented: delay after R_ST_SEL */
+		udelay(1);
+		if (dch->dev.D.protocol == ISDN_P_NT_S0) {
+			if (debug & DEBUG_HFCMULTI_INIT)
+				printk(KERN_DEBUG
+				    "%s: ST port %d is NT-mode\n",
+				    __func__, pt);
+			/* clock delay */
+			HFC_outb(hc, A_ST_CLK_DLY, clockdelay_nt);
+			a_st_wr_state = 1; /* G1 */
+			hc->hw.a_st_ctrl0[pt] = V_ST_MD;
+		} else {
+			if (debug & DEBUG_HFCMULTI_INIT)
+				printk(KERN_DEBUG
+				    "%s: ST port %d is TE-mode\n",
+				    __func__, pt);
+			/* clock delay */
+			HFC_outb(hc, A_ST_CLK_DLY, clockdelay_te);
+			a_st_wr_state = 2; /* F2 */
+			hc->hw.a_st_ctrl0[pt] = 0;
+		}
+		if (!test_bit(HFC_CFG_NONCAP_TX, &hc->chan[i].cfg))
+			hc->hw.a_st_ctrl0[pt] |= V_TX_LI;
+		/* line setup */
+		HFC_outb(hc, A_ST_CTRL0,  hc->hw.a_st_ctrl0[pt]);
+		/* disable E-channel */
+		if ((dch->dev.D.protocol == ISDN_P_NT_S0) ||
+		    test_bit(HFC_CFG_DIS_ECHANNEL, &hc->chan[i].cfg))
+			HFC_outb(hc, A_ST_CTRL1, V_E_IGNO);
+		else
+			HFC_outb(hc, A_ST_CTRL1, 0);
+		/* enable B-channel receive */
+		HFC_outb(hc, A_ST_CTRL2,  V_B1_RX_EN | V_B2_RX_EN);
+		/* state machine setup */
+		HFC_outb(hc, A_ST_WR_STATE, a_st_wr_state | V_ST_LD_STA);
+		udelay(6); /* wait at least 5,21us */
+		HFC_outb(hc, A_ST_WR_STATE, a_st_wr_state);
+		hc->hw.r_sci_msk |= 1 << pt;
+		/* state machine interrupts */
+		HFC_outb(hc, R_SCI_MSK, hc->hw.r_sci_msk);
+		/* unset sync on port */
+		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+			hc->syncronized &=
+			   ~(1 << hc->chan[dch->slot].port);
+			plxsd_checksync(hc, 0);
+		}
+	}
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk("%s: done\n", __func__);
+}
+
+
+static int
+open_dchannel(struct hfc_multi *hc, struct dchannel *dch,
+    struct channel_req *rq)
+{
+	int	err = 0;
+	u_long	flags;
+
+	if (debug & DEBUG_HW_OPEN)
+		printk(KERN_DEBUG "%s: dev(%d) open from %p\n", __func__,
+		    dch->dev.id, __builtin_return_address(0));
+	if (rq->protocol == ISDN_P_NONE)
+		return -EINVAL;
+	if ((dch->dev.D.protocol != ISDN_P_NONE) &&
+	    (dch->dev.D.protocol != rq->protocol)) {
+	    if (debug & DEBUG_HFCMULTI_MODE)
+		printk(KERN_WARNING "%s: change protocol %x to %x\n",
+		    __func__, dch->dev.D.protocol, rq->protocol);
+	}
+	if ((dch->dev.D.protocol == ISDN_P_TE_S0)
+	 && (rq->protocol != ISDN_P_TE_S0))
+		l1_event(dch->l1, CLOSE_CHANNEL);
+	if (dch->dev.D.protocol != rq->protocol) {
+		if (rq->protocol == ISDN_P_TE_S0) {
+			err = create_l1(dch, hfcm_l1callback);
+			if (err)
+				return err;
+		}
+		dch->dev.D.protocol = rq->protocol;
+		spin_lock_irqsave(&hc->lock, flags);
+		hfcmulti_initmode(dch);
+		spin_unlock_irqrestore(&hc->lock, flags);
+	}
+
+	if (((rq->protocol == ISDN_P_NT_S0) && (dch->state == 3)) ||
+	    ((rq->protocol == ISDN_P_TE_S0) && (dch->state == 7)) ||
+	    ((rq->protocol == ISDN_P_NT_E1) && (dch->state == 1)) ||
+	    ((rq->protocol == ISDN_P_TE_E1) && (dch->state == 1))) {
+		_queue_data(&dch->dev.D, PH_ACTIVATE_IND, MISDN_ID_ANY,
+		    0, NULL, GFP_KERNEL);
+	}
+	rq->ch = &dch->dev.D;
+	if (!try_module_get(THIS_MODULE))
+		printk(KERN_WARNING "%s:cannot get module\n", __func__);
+	return 0;
+}
+
+static int
+open_bchannel(struct hfc_multi *hc, struct dchannel *dch,
+    struct channel_req *rq)
+{
+	struct bchannel	*bch;
+	int		ch;
+
+	if (!test_bit(rq->adr.channel, &dch->dev.channelmap[0]))
+		return -EINVAL;
+	if (rq->protocol == ISDN_P_NONE)
+		return -EINVAL;
+	if (hc->type == 1)
+		ch = rq->adr.channel;
+	else
+		ch = (rq->adr.channel - 1) + (dch->slot - 2);
+	bch = hc->chan[ch].bch;
+	if (!bch) {
+		printk(KERN_ERR "%s:internal error ch %d has no bch\n",
+		    __func__, ch);
+		return -EINVAL;
+	}
+	if (test_and_set_bit(FLG_OPEN, &bch->Flags))
+		return -EBUSY; /* b-channel can be only open once */
+	bch->ch.protocol = rq->protocol;
+	hc->chan[ch].rx_off = 0;
+	rq->ch = &bch->ch;
+	if (!try_module_get(THIS_MODULE))
+		printk(KERN_WARNING "%s:cannot get module\n", __func__);
+	return 0;
+}
+
+/*
+ * device control function
+ */
+static int
+channel_dctrl(struct dchannel *dch, struct mISDN_ctrl_req *cq)
+{
+	int	ret = 0;
+
+	switch (cq->op) {
+	case MISDN_CTRL_GETOP:
+		cq->op = 0;
+		break;
+	default:
+		printk(KERN_WARNING "%s: unknown Op %x\n",
+		    __func__, cq->op);
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static int
+hfcm_dctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
+{
+	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
+	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
+	struct hfc_multi	*hc = dch->hw;
+	struct channel_req	*rq;
+	int			err = 0;
+	u_long			flags;
+
+	if (dch->debug & DEBUG_HW)
+		printk(KERN_DEBUG "%s: cmd:%x %p\n",
+		    __func__, cmd, arg);
+	switch (cmd) {
+	case OPEN_CHANNEL:
+		rq = arg;
+		switch (rq->protocol) {
+		case ISDN_P_TE_S0:
+		case ISDN_P_NT_S0:
+			if (hc->type == 1) {
+				err = -EINVAL;
+				break;
+			}
+			err = open_dchannel(hc, dch, rq); /* locked there */
+			break;
+		case ISDN_P_TE_E1:
+		case ISDN_P_NT_E1:
+			if (hc->type != 1) {
+				err = -EINVAL;
+				break;
+			}
+			err = open_dchannel(hc, dch, rq); /* locked there */
+			break;
+		default:
+			spin_lock_irqsave(&hc->lock, flags);
+			err = open_bchannel(hc, dch, rq);
+			spin_unlock_irqrestore(&hc->lock, flags);
+		}
+		break;
+	case CLOSE_CHANNEL:
+		if (debug & DEBUG_HW_OPEN)
+			printk(KERN_DEBUG "%s: dev(%d) close from %p\n",
+			    __func__, dch->dev.id,
+			    __builtin_return_address(0));
+		module_put(THIS_MODULE);
+		break;
+	case CONTROL_CHANNEL:
+		spin_lock_irqsave(&hc->lock, flags);
+		err = channel_dctrl(dch, arg);
+		spin_unlock_irqrestore(&hc->lock, flags);
+		break;
+	default:
+		if (dch->debug & DEBUG_HW)
+			printk(KERN_DEBUG "%s: unknown command %x\n",
+			    __func__, cmd);
+		err = -EINVAL;
+	}
+	return err;
+}
+
+/*
+ * initialize the card
+ */
+
+/*
+ * start timer irq, wait some time and check if we have interrupts.
+ * if not, reset chip and try again.
+ */
+static int
+init_card(struct hfc_multi *hc)
+{
+	int	err = -EIO;
+	u_long	flags;
+	u_short	*plx_acc;
+	u_long	plx_flags;
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: entered\n", __func__);
+
+	spin_lock_irqsave(&hc->lock, flags);
+	/* set interrupts but leave global interrupt disabled */
+	hc->hw.r_irq_ctrl = V_FIFO_IRQ;
+	disable_hwirq(hc);
+	spin_unlock_irqrestore(&hc->lock, flags);
+
+	if (request_irq(hc->pci_dev->irq, hfcmulti_interrupt, IRQF_SHARED,
+	    "HFC-multi", hc)) {
+		printk(KERN_WARNING "mISDN: Could not get interrupt %d.\n",
+		    hc->pci_dev->irq);
+		return -EIO;
+	}
+	hc->irq = hc->pci_dev->irq;
+
+	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+		spin_lock_irqsave(&plx_lock, plx_flags);
+		plx_acc = (u_short *)(hc->plx_membase+PLX_INTCSR);
+		writew((PLX_INTCSR_PCIINT_ENABLE | PLX_INTCSR_LINTI1_ENABLE),
+			plx_acc); /* enable PCI & LINT1 irq */
+		spin_unlock_irqrestore(&plx_lock, plx_flags);
+	}
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: IRQ %d count %d\n",
+		    __func__, hc->irq, hc->irqcnt);
+	err = init_chip(hc);
+	if (err)
+		goto error;
+	/*
+	 * Finally enable IRQ output
+	 * this is only allowed, if an IRQ routine is allready
+	 * established for this HFC, so don't do that earlier
+	 */
+	spin_lock_irqsave(&hc->lock, flags);
+	enable_hwirq(hc);
+	spin_unlock_irqrestore(&hc->lock, flags);
+	/* printk(KERN_DEBUG "no master irq set!!!\n"); */
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout((100*HZ)/1000); /* Timeout 100ms */
+	/* turn IRQ off until chip is completely initialized */
+	spin_lock_irqsave(&hc->lock, flags);
+	disable_hwirq(hc);
+	spin_unlock_irqrestore(&hc->lock, flags);
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: IRQ %d count %d\n",
+		    __func__, hc->irq, hc->irqcnt);
+	if (hc->irqcnt) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: done\n", __func__);
+
+		return 0;
+	}
+	if (test_bit(HFC_CHIP_PCM_SLAVE, &hc->chip)) {
+		printk(KERN_INFO "ignoring missing interrupts\n");
+		return 0;
+	}
+
+	printk(KERN_ERR "HFC PCI: IRQ(%d) getting no interrupts during init.\n",
+		hc->irq);
+
+	err = -EIO;
+
+error:
+	if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+		spin_lock_irqsave(&plx_lock, plx_flags);
+		plx_acc = (u_short *)(hc->plx_membase+PLX_INTCSR);
+		writew(0x00, plx_acc); /*disable IRQs*/
+		spin_unlock_irqrestore(&plx_lock, plx_flags);
+	}
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_WARNING "%s: free irq %d\n", __func__, hc->irq);
+	if (hc->irq) {
+		free_irq(hc->irq, hc);
+		hc->irq = 0;
+	}
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: done (err=%d)\n", __func__, err);
+	return err;
+}
+
+/*
+ * find pci device and set it up
+ */
+
+static int
+setup_pci(struct hfc_multi *hc, struct pci_dev *pdev,
+		const struct pci_device_id *ent)
+{
+	struct hm_map	*m = (struct hm_map *)ent->driver_data;
+
+	printk(KERN_INFO
+	    "HFC-multi: card manufacturer: '%s' card name: '%s' clock: %s\n",
+	    m->vendor_name, m->card_name, m->clock2 ? "double" : "normal");
+
+	hc->pci_dev = pdev;
+	if (m->clock2)
+		test_and_set_bit(HFC_CHIP_CLOCK2, &hc->chip);
+
+	if (ent->device == 0xB410) {
+		test_and_set_bit(HFC_CHIP_B410P, &hc->chip);
+		test_and_set_bit(HFC_CHIP_PCM_MASTER, &hc->chip);
+		test_and_clear_bit(HFC_CHIP_PCM_SLAVE, &hc->chip);
+		hc->slots = 32;
+	}
+
+	if (hc->pci_dev->irq <= 0) {
+		printk(KERN_WARNING "HFC-multi: No IRQ for PCI card found.\n");
+		return -EIO;
+	}
+	if (pci_enable_device(hc->pci_dev)) {
+		printk(KERN_WARNING "HFC-multi: Error enabling PCI card.\n");
+		return -EIO;
+	}
+	hc->leds = m->leds;
+	hc->ledstate = 0xAFFEAFFE;
+	hc->opticalsupport = m->opticalsupport;
+
+	/* set memory access methods */
+	if (m->io_mode) /* use mode from card config */
+		hc->io_mode = m->io_mode;
+	switch (hc->io_mode) {
+	case HFC_IO_MODE_PLXSD:
+		test_and_set_bit(HFC_CHIP_PLXSD, &hc->chip);
+		hc->slots = 128; /* required */
+		/* fall through */
+	case HFC_IO_MODE_PCIMEM:
+		hc->HFC_outb = HFC_outb_pcimem;
+		hc->HFC_inb = HFC_inb_pcimem;
+		hc->HFC_inw = HFC_inw_pcimem;
+		hc->HFC_wait = HFC_wait_pcimem;
+		hc->read_fifo = read_fifo_pcimem;
+		hc->write_fifo = write_fifo_pcimem;
+		break;
+	case HFC_IO_MODE_REGIO:
+		hc->HFC_outb = HFC_outb_regio;
+		hc->HFC_inb = HFC_inb_regio;
+		hc->HFC_inw = HFC_inw_regio;
+		hc->HFC_wait = HFC_wait_regio;
+		hc->read_fifo = read_fifo_regio;
+		hc->write_fifo = write_fifo_regio;
+		break;
+	default:
+		printk(KERN_WARNING "HFC-multi: Invalid IO mode.\n");
+		pci_disable_device(hc->pci_dev);
+		return -EIO;
+	}
+	hc->HFC_outb_nodebug = hc->HFC_outb;
+	hc->HFC_inb_nodebug = hc->HFC_inb;
+	hc->HFC_inw_nodebug = hc->HFC_inw;
+	hc->HFC_wait_nodebug = hc->HFC_wait;
+#ifdef HFC_REGISTER_DEBUG
+	hc->HFC_outb = HFC_outb_debug;
+	hc->HFC_inb = HFC_inb_debug;
+	hc->HFC_inw = HFC_inw_debug;
+	hc->HFC_wait = HFC_wait_debug;
+#endif
+	hc->pci_iobase = 0;
+	hc->pci_membase = NULL;
+	hc->plx_membase = NULL;
+
+	switch (hc->io_mode) {
+	case HFC_IO_MODE_PLXSD:
+		hc->plx_origmembase =  hc->pci_dev->resource[0].start;
+		/* MEMBASE 1 is PLX PCI Bridge */
+
+		if (!hc->plx_origmembase) {
+			printk(KERN_WARNING
+			  "HFC-multi: No IO-Memory for PCI PLX bridge found\n");
+			pci_disable_device(hc->pci_dev);
+			return -EIO;
+		}
+
+		hc->plx_membase = ioremap(hc->plx_origmembase, 0x80);
+		if (!hc->plx_membase) {
+			printk(KERN_WARNING
+			    "HFC-multi: failed to remap plx address space. "
+			    "(internal error)\n");
+			pci_disable_device(hc->pci_dev);
+			return -EIO;
+		}
+		printk(KERN_INFO
+		    "HFC-multi: plx_membase:%#lx plx_origmembase:%#lx\n",
+		    (u_long)hc->plx_membase, hc->plx_origmembase);
+
+		hc->pci_origmembase =  hc->pci_dev->resource[2].start;
+		    /* MEMBASE 1 is PLX PCI Bridge */
+		if (!hc->pci_origmembase) {
+			printk(KERN_WARNING
+			    "HFC-multi: No IO-Memory for PCI card found\n");
+			pci_disable_device(hc->pci_dev);
+			return -EIO;
+		}
+
+		hc->pci_membase = ioremap(hc->pci_origmembase, 0x400);
+		if (!hc->pci_membase) {
+			printk(KERN_WARNING "HFC-multi: failed to remap io "
+			    "address space. (internal error)\n");
+			pci_disable_device(hc->pci_dev);
+			return -EIO;
+		}
+
+		printk(KERN_INFO
+		    "card %d: defined at MEMBASE %#lx (%#lx) IRQ %d HZ %d "
+		    "leds-type %d\n",
+		    hc->id, (u_long)hc->pci_membase, hc->pci_origmembase,
+		    hc->pci_dev->irq, HZ, hc->leds);
+		pci_write_config_word(hc->pci_dev, PCI_COMMAND, PCI_ENA_MEMIO);
+		break;
+	case HFC_IO_MODE_PCIMEM:
+		hc->pci_origmembase = hc->pci_dev->resource[1].start;
+		if (!hc->pci_origmembase) {
+			printk(KERN_WARNING
+			    "HFC-multi: No IO-Memory for PCI card found\n");
+			pci_disable_device(hc->pci_dev);
+			return -EIO;
+		}
+
+		hc->pci_membase = ioremap(hc->pci_origmembase, 256);
+		if (!hc->pci_membase) {
+			printk(KERN_WARNING
+			    "HFC-multi: failed to remap io address space. "
+			    "(internal error)\n");
+			pci_disable_device(hc->pci_dev);
+			return -EIO;
+		}
+		printk(KERN_INFO "card %d: defined at MEMBASE %#lx (%#lx) IRQ %d "
+		    "HZ %d leds-type %d\n", hc->id, (u_long)hc->pci_membase,
+		    hc->pci_origmembase, hc->pci_dev->irq, HZ, hc->leds);
+		pci_write_config_word(hc->pci_dev, PCI_COMMAND, PCI_ENA_MEMIO);
+		break;
+	case HFC_IO_MODE_REGIO:
+		hc->pci_iobase = (u_int) hc->pci_dev->resource[0].start;
+		if (!hc->pci_iobase) {
+			printk(KERN_WARNING
+				"HFC-multi: No IO for PCI card found\n");
+			pci_disable_device(hc->pci_dev);
+			return -EIO;
+		}
+
+		if (!request_region(hc->pci_iobase, 8, "hfcmulti")) {
+			printk(KERN_WARNING "HFC-multi: failed to request "
+			    "address space at 0x%08lx (internal error)\n",
+			    hc->pci_iobase);
+			pci_disable_device(hc->pci_dev);
+			return -EIO;
+		}
+
+		printk(KERN_INFO
+		    "%s %s: defined at IOBASE %#x IRQ %d HZ %d leds-type %d\n",
+		    m->vendor_name, m->card_name, (u_int) hc->pci_iobase,
+		    hc->pci_dev->irq, HZ, hc->leds);
+		pci_write_config_word(hc->pci_dev, PCI_COMMAND, PCI_ENA_REGIO);
+		break;
+	default:
+		printk(KERN_WARNING "HFC-multi: Invalid IO mode.\n");
+		pci_disable_device(hc->pci_dev);
+		return -EIO;
+	}
+
+	pci_set_drvdata(hc->pci_dev, hc);
+
+	/* At this point the needed PCI config is done */
+	/* fifos are still not enabled */
+	return 0;
+}
+
+
+/*
+ * remove port
+ */
+
+static void
+release_port(struct hfc_multi *hc, struct dchannel *dch)
+{
+	int	pt, ci, i = 0;
+	u_long	flags;
+	struct bchannel *pb;
+
+	ci = dch->slot;
+	pt = hc->chan[ci].port;
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: entered for port %d\n",
+			__func__, pt + 1);
+
+	if (pt >= hc->ports) {
+		printk(KERN_WARNING "%s: ERROR port out of range (%d).\n",
+		     __func__, pt + 1);
+		return;
+	}
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: releasing port=%d\n",
+		    __func__, pt + 1);
+
+	if (dch->dev.D.protocol == ISDN_P_TE_S0)
+		l1_event(dch->l1, CLOSE_CHANNEL);
+
+	hc->chan[ci].dch = NULL;
+
+	if (hc->created[pt]) {
+		hc->created[pt] = 0;
+		mISDN_unregister_device(&dch->dev);
+	}
+
+	spin_lock_irqsave(&hc->lock, flags);
+
+	if (dch->timer.function) {
+		del_timer(&dch->timer);
+		dch->timer.function = NULL;
+	}
+
+	if (hc->type == 1) { /* E1 */
+		/* remove sync */
+		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+			hc->syncronized = 0;
+			plxsd_checksync(hc, 1);
+		}
+		/* free channels */
+		for (i = 0; i <= 31; i++) {
+			if (hc->chan[i].bch) {
+				if (debug & DEBUG_HFCMULTI_INIT)
+					printk(KERN_DEBUG
+					    "%s: free port %d channel %d\n",
+					    __func__, hc->chan[i].port+1, i);
+				pb = hc->chan[i].bch;
+				hc->chan[i].bch = NULL;
+				spin_unlock_irqrestore(&hc->lock, flags);
+				mISDN_freebchannel(pb);
+				kfree(pb);
+				kfree(hc->chan[i].coeff);
+				spin_lock_irqsave(&hc->lock, flags);
+			}
+		}
+	} else {
+		/* remove sync */
+		if (test_bit(HFC_CHIP_PLXSD, &hc->chip)) {
+			hc->syncronized &=
+			   ~(1 << hc->chan[ci].port);
+			plxsd_checksync(hc, 1);
+		}
+		/* free channels */
+		if (hc->chan[ci - 2].bch) {
+			if (debug & DEBUG_HFCMULTI_INIT)
+				printk(KERN_DEBUG
+				    "%s: free port %d channel %d\n",
+				    __func__, hc->chan[ci - 2].port+1,
+				    ci - 2);
+			pb = hc->chan[ci - 2].bch;
+			hc->chan[ci - 2].bch = NULL;
+			spin_unlock_irqrestore(&hc->lock, flags);
+			mISDN_freebchannel(pb);
+			kfree(pb);
+			kfree(hc->chan[ci - 2].coeff);
+			spin_lock_irqsave(&hc->lock, flags);
+		}
+		if (hc->chan[ci - 1].bch) {
+			if (debug & DEBUG_HFCMULTI_INIT)
+				printk(KERN_DEBUG
+				    "%s: free port %d channel %d\n",
+				    __func__, hc->chan[ci - 1].port+1,
+				    ci - 1);
+			pb = hc->chan[ci - 1].bch;
+			hc->chan[ci - 1].bch = NULL;
+			spin_unlock_irqrestore(&hc->lock, flags);
+			mISDN_freebchannel(pb);
+			kfree(pb);
+			kfree(hc->chan[ci - 1].coeff);
+			spin_lock_irqsave(&hc->lock, flags);
+		}
+	}
+
+	spin_unlock_irqrestore(&hc->lock, flags);
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: free port %d channel D\n", __func__, pt);
+	mISDN_freedchannel(dch);
+	kfree(dch);
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: done!\n", __func__);
+}
+
+static void
+release_card(struct hfc_multi *hc)
+{
+	u_long	flags;
+	int	ch;
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_WARNING "%s: release card (%d) entered\n",
+		    __func__, hc->id);
+
+	spin_lock_irqsave(&hc->lock, flags);
+	disable_hwirq(hc);
+	spin_unlock_irqrestore(&hc->lock, flags);
+
+	udelay(1000);
+
+	/* dimm leds */
+	if (hc->leds)
+		hfcmulti_leds(hc);
+
+	/* disable D-channels & B-channels */
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: disable all channels (d and b)\n",
+		    __func__);
+	for (ch = 0; ch <= 31; ch++) {
+		if (hc->chan[ch].dch)
+			release_port(hc, hc->chan[ch].dch);
+	}
+
+	/* release hardware & irq */
+	if (hc->irq) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_WARNING "%s: free irq %d\n",
+			    __func__, hc->irq);
+		free_irq(hc->irq, hc);
+		hc->irq = 0;
+
+	}
+	release_io_hfcmulti(hc);
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_WARNING "%s: remove instance from list\n",
+		     __func__);
+	list_del(&hc->list);
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_WARNING "%s: delete instance\n", __func__);
+	if (hc == syncmaster)
+		syncmaster = NULL;
+	kfree(hc);
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_WARNING "%s: card successfully removed\n",
+		    __func__);
+}
+
+static int
+init_e1_port(struct hfc_multi *hc, struct hm_map *m)
+{
+	struct dchannel	*dch;
+	struct bchannel	*bch;
+	int		ch, ret = 0;
+	char		name[MISDN_MAX_IDLEN];
+
+	dch = kzalloc(sizeof(struct dchannel), GFP_KERNEL);
+	if (!dch)
+		return -ENOMEM;
+	dch->debug = debug;
+	mISDN_initdchannel(dch, MAX_DFRAME_LEN_L1, ph_state_change);
+	dch->hw = hc;
+	dch->dev.Dprotocols = (1 << ISDN_P_TE_E1) | (1 << ISDN_P_NT_E1);
+	dch->dev.Bprotocols = (1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
+	    (1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK));
+	dch->dev.D.send = handle_dmsg;
+	dch->dev.D.ctrl = hfcm_dctrl;
+	dch->dev.nrbchan = (hc->dslot)?30:31;
+	dch->slot = hc->dslot;
+	hc->chan[hc->dslot].dch = dch;
+	hc->chan[hc->dslot].port = 0;
+	hc->chan[hc->dslot].nt_timer = -1;
+	for (ch = 1; ch <= 31; ch++) {
+		if (ch == hc->dslot) /* skip dchannel */
+			continue;
+		bch = kzalloc(sizeof(struct bchannel), GFP_KERNEL);
+		if (!bch) {
+			printk(KERN_ERR "%s: no memory for bchannel\n",
+			    __func__);
+			ret = -ENOMEM;
+			goto free_chan;
+		}
+		hc->chan[ch].coeff = kzalloc(512, GFP_KERNEL);
+		if (!hc->chan[ch].coeff) {
+			printk(KERN_ERR "%s: no memory for coeffs\n",
+			    __func__);
+			ret = -ENOMEM;
+			goto free_chan;
+		}
+		bch->nr = ch;
+		bch->slot = ch;
+		bch->debug = debug;
+		mISDN_initbchannel(bch, MAX_DATA_MEM);
+		bch->hw = hc;
+		bch->ch.send = handle_bmsg;
+		bch->ch.ctrl = hfcm_bctrl;
+		bch->ch.nr = ch;
+		list_add(&bch->ch.list, &dch->dev.bchannels);
+		hc->chan[ch].bch = bch;
+		hc->chan[ch].port = 0;
+		test_and_set_bit(bch->nr, &dch->dev.channelmap[0]);
+	}
+	/* set optical line type */
+	if (port[Port_cnt] & 0x001) {
+		if (!m->opticalsupport)  {
+			printk(KERN_INFO
+			    "This board has no optical "
+			    "support\n");
+		} else {
+			if (debug & DEBUG_HFCMULTI_INIT)
+				printk(KERN_DEBUG
+				    "%s: PORT set optical "
+				    "interfacs: card(%d) "
+				    "port(%d)\n",
+				    __func__,
+				    HFC_cnt + 1, 1);
+			test_and_set_bit(HFC_CFG_OPTICAL,
+			    &hc->chan[hc->dslot].cfg);
+		}
+	}
+	/* set LOS report */
+	if (port[Port_cnt] & 0x004) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: PORT set "
+			    "LOS report: card(%d) port(%d)\n",
+			    __func__, HFC_cnt + 1, 1);
+		test_and_set_bit(HFC_CFG_REPORT_LOS,
+		    &hc->chan[hc->dslot].cfg);
+	}
+	/* set AIS report */
+	if (port[Port_cnt] & 0x008) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: PORT set "
+			    "AIS report: card(%d) port(%d)\n",
+			    __func__, HFC_cnt + 1, 1);
+		test_and_set_bit(HFC_CFG_REPORT_AIS,
+		    &hc->chan[hc->dslot].cfg);
+	}
+	/* set SLIP report */
+	if (port[Port_cnt] & 0x010) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG
+			    "%s: PORT set SLIP report: "
+			    "card(%d) port(%d)\n",
+			    __func__, HFC_cnt + 1, 1);
+		test_and_set_bit(HFC_CFG_REPORT_SLIP,
+		    &hc->chan[hc->dslot].cfg);
+	}
+	/* set RDI report */
+	if (port[Port_cnt] & 0x020) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG
+			    "%s: PORT set RDI report: "
+			    "card(%d) port(%d)\n",
+			    __func__, HFC_cnt + 1, 1);
+		test_and_set_bit(HFC_CFG_REPORT_RDI,
+		    &hc->chan[hc->dslot].cfg);
+	}
+	/* set CRC-4 Mode */
+	if (!(port[Port_cnt] & 0x100)) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: PORT turn on CRC4 report:"
+				" card(%d) port(%d)\n",
+				__func__, HFC_cnt + 1, 1);
+		test_and_set_bit(HFC_CFG_CRC4,
+		    &hc->chan[hc->dslot].cfg);
+	} else {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: PORT turn off CRC4"
+				" report: card(%d) port(%d)\n",
+				__func__, HFC_cnt + 1, 1);
+	}
+	/* set forced clock */
+	if (port[Port_cnt] & 0x0200) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: PORT force getting clock from "
+				"E1: card(%d) port(%d)\n",
+				__func__, HFC_cnt + 1, 1);
+		test_and_set_bit(HFC_CHIP_E1CLOCK_GET, &hc->chip);
+	} else
+	if (port[Port_cnt] & 0x0400) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: PORT force putting clock to "
+				"E1: card(%d) port(%d)\n",
+				__func__, HFC_cnt + 1, 1);
+		test_and_set_bit(HFC_CHIP_E1CLOCK_PUT, &hc->chip);
+	}
+	/* set JATT PLL */
+	if (port[Port_cnt] & 0x0800) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG "%s: PORT disable JATT PLL on "
+				"E1: card(%d) port(%d)\n",
+				__func__, HFC_cnt + 1, 1);
+		test_and_set_bit(HFC_CHIP_RX_SYNC, &hc->chip);
+	}
+	/* set elastic jitter buffer */
+	if (port[Port_cnt] & 0x3000) {
+		hc->chan[hc->dslot].jitter = (port[Port_cnt]>>12) & 0x3;
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG
+			    "%s: PORT set elastic "
+			    "buffer to %d: card(%d) port(%d)\n",
+			    __func__, hc->chan[hc->dslot].jitter,
+			    HFC_cnt + 1, 1);
+	} else
+		hc->chan[hc->dslot].jitter = 2; /* default */
+	snprintf(name, MISDN_MAX_IDLEN - 1, "hfc-e1.%d", HFC_cnt + 1);
+	ret = mISDN_register_device(&dch->dev, name);
+	if (ret)
+		goto free_chan;
+	hc->created[0] = 1;
+	return ret;
+free_chan:
+	release_port(hc, dch);
+	return ret;
+}
+
+static int
+init_multi_port(struct hfc_multi *hc, int pt)
+{
+	struct dchannel	*dch;
+	struct bchannel	*bch;
+	int		ch, i, ret = 0;
+	char		name[MISDN_MAX_IDLEN];
+
+	dch = kzalloc(sizeof(struct dchannel), GFP_KERNEL);
+	if (!dch)
+		return -ENOMEM;
+	dch->debug = debug;
+	mISDN_initdchannel(dch, MAX_DFRAME_LEN_L1, ph_state_change);
+	dch->hw = hc;
+	dch->dev.Dprotocols = (1 << ISDN_P_TE_S0) | (1 << ISDN_P_NT_S0);
+	dch->dev.Bprotocols = (1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
+	    (1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK));
+	dch->dev.D.send = handle_dmsg;
+	dch->dev.D.ctrl = hfcm_dctrl;
+	dch->dev.nrbchan = 2;
+	i = pt << 2;
+	dch->slot = i + 2;
+	hc->chan[i + 2].dch = dch;
+	hc->chan[i + 2].port = pt;
+	hc->chan[i + 2].nt_timer = -1;
+	for (ch = 0; ch < dch->dev.nrbchan; ch++) {
+		bch = kzalloc(sizeof(struct bchannel), GFP_KERNEL);
+		if (!bch) {
+			printk(KERN_ERR "%s: no memory for bchannel\n",
+			    __func__);
+			ret = -ENOMEM;
+			goto free_chan;
+		}
+		hc->chan[i + ch].coeff = kzalloc(512, GFP_KERNEL);
+		if (!hc->chan[i + ch].coeff) {
+			printk(KERN_ERR "%s: no memory for coeffs\n",
+			    __func__);
+			ret = -ENOMEM;
+			goto free_chan;
+		}
+		bch->nr = ch + 1;
+		bch->slot = i + ch;
+		bch->debug = debug;
+		mISDN_initbchannel(bch, MAX_DATA_MEM);
+		bch->hw = hc;
+		bch->ch.send = handle_bmsg;
+		bch->ch.ctrl = hfcm_bctrl;
+		bch->ch.nr = ch + 1;
+		list_add(&bch->ch.list, &dch->dev.bchannels);
+		hc->chan[i + ch].bch = bch;
+		hc->chan[i + ch].port = pt;
+		test_and_set_bit(bch->nr, &dch->dev.channelmap[0]);
+	}
+	/* set master clock */
+	if (port[Port_cnt] & 0x001) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG
+			    "%s: PROTOCOL set master clock: "
+			    "card(%d) port(%d)\n",
+			    __func__, HFC_cnt + 1, pt + 1);
+		if (dch->dev.D.protocol != ISDN_P_TE_S0) {
+			printk(KERN_ERR "Error: Master clock "
+			    "for port(%d) of card(%d) is only"
+			    " possible with TE-mode\n",
+			    pt + 1, HFC_cnt + 1);
+			ret = -EINVAL;
+			goto free_chan;
+		}
+		if (hc->masterclk >= 0) {
+			printk(KERN_ERR "Error: Master clock "
+			    "for port(%d) of card(%d) already "
+			    "defined for port(%d)\n",
+			    pt + 1, HFC_cnt + 1, hc->masterclk+1);
+			ret = -EINVAL;
+			goto free_chan;
+		}
+		hc->masterclk = pt;
+	}
+	/* set transmitter line to non capacitive */
+	if (port[Port_cnt] & 0x002) {
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG
+			    "%s: PROTOCOL set non capacitive "
+			    "transmitter: card(%d) port(%d)\n",
+			    __func__, HFC_cnt + 1, pt + 1);
+		test_and_set_bit(HFC_CFG_NONCAP_TX,
+		    &hc->chan[i + 2].cfg);
+	}
+	/* disable E-channel */
+	if (port[Port_cnt] & 0x004) {
+	if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG
+			    "%s: PROTOCOL disable E-channel: "
+			    "card(%d) port(%d)\n",
+			    __func__, HFC_cnt + 1, pt + 1);
+		test_and_set_bit(HFC_CFG_DIS_ECHANNEL,
+		    &hc->chan[i + 2].cfg);
+	}
+	snprintf(name, MISDN_MAX_IDLEN - 1, "hfc-%ds.%d/%d",
+		hc->type, HFC_cnt + 1, pt + 1);
+	ret = mISDN_register_device(&dch->dev, name);
+	if (ret)
+		goto free_chan;
+	hc->created[pt] = 1;
+	return ret;
+free_chan:
+	release_port(hc, dch);
+	return ret;
+}
+
+static int
+hfcmulti_init(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct hm_map	*m = (struct hm_map *)ent->driver_data;
+	int		ret_err = 0;
+	int		pt;
+	struct hfc_multi	*hc;
+	u_long		flags;
+	u_char		dips = 0, pmj = 0; /* dip settings, port mode Jumpers */
+
+	if (HFC_cnt >= MAX_CARDS) {
+		printk(KERN_ERR "too many cards (max=%d).\n",
+			MAX_CARDS);
+		return -EINVAL;
+	}
+	if ((type[HFC_cnt] & 0xff) && (type[HFC_cnt] & 0xff) != m->type) {
+		printk(KERN_WARNING "HFC-MULTI: Card '%s:%s' type %d found but "
+		    "type[%d] %d was supplied as module parameter\n",
+		    m->vendor_name, m->card_name, m->type, HFC_cnt,
+		    type[HFC_cnt] & 0xff);
+		printk(KERN_WARNING "HFC-MULTI: Load module without parameters "
+			"first, to see cards and their types.");
+		return -EINVAL;
+	}
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: Registering %s:%s chip type %d (0x%x)\n",
+		    __func__, m->vendor_name, m->card_name, m->type,
+		    type[HFC_cnt]);
+
+	/* allocate card+fifo structure */
+	hc = kzalloc(sizeof(struct hfc_multi), GFP_KERNEL);
+	if (!hc) {
+		printk(KERN_ERR "No kmem for HFC-Multi card\n");
+		return -ENOMEM;
+	}
+	spin_lock_init(&hc->lock);
+	hc->mtyp = m;
+	hc->type =  m->type;
+	hc->ports = m->ports;
+	hc->id = HFC_cnt;
+	hc->pcm = pcm[HFC_cnt];
+	hc->io_mode = iomode[HFC_cnt];
+	if (dslot[HFC_cnt] < 0) {
+		hc->dslot = 0;
+		printk(KERN_INFO "HFC-E1 card has disabled D-channel, but "
+			"31 B-channels\n");
+	} if (dslot[HFC_cnt] > 0 && dslot[HFC_cnt] < 32) {
+		hc->dslot = dslot[HFC_cnt];
+		printk(KERN_INFO "HFC-E1 card has alternating D-channel on "
+			"time slot %d\n", dslot[HFC_cnt]);
+	} else
+		hc->dslot = 16;
+
+	/* set chip specific features */
+	hc->masterclk = -1;
+	if (type[HFC_cnt] & 0x100) {
+		test_and_set_bit(HFC_CHIP_ULAW, &hc->chip);
+		silence = 0xff; /* ulaw silence */
+	} else
+		silence = 0x2a; /* alaw silence */
+	if (!(type[HFC_cnt] & 0x200))
+		test_and_set_bit(HFC_CHIP_DTMF, &hc->chip);
+
+	if (type[HFC_cnt] & 0x800)
+		test_and_set_bit(HFC_CHIP_PCM_SLAVE, &hc->chip);
+	if (type[HFC_cnt] & 0x1000) {
+		test_and_set_bit(HFC_CHIP_PCM_MASTER, &hc->chip);
+		test_and_clear_bit(HFC_CHIP_PCM_SLAVE, &hc->chip);
+	}
+	if (type[HFC_cnt] & 0x4000)
+		test_and_set_bit(HFC_CHIP_EXRAM_128, &hc->chip);
+	if (type[HFC_cnt] & 0x8000)
+		test_and_set_bit(HFC_CHIP_EXRAM_512, &hc->chip);
+	hc->slots = 32;
+	if (type[HFC_cnt] & 0x10000)
+		hc->slots = 64;
+	if (type[HFC_cnt] & 0x20000)
+		hc->slots = 128;
+	if (type[HFC_cnt] & 0x80000) {
+		test_and_set_bit(HFC_CHIP_WATCHDOG, &hc->chip);
+		hc->wdcount = 0;
+		hc->wdbyte = V_GPIO_OUT2;
+		printk(KERN_NOTICE "Watchdog enabled\n");
+	}
+
+	/* setup pci, hc->slots may change due to PLXSD */
+	ret_err = setup_pci(hc, pdev, ent);
+	if (ret_err) {
+		if (hc == syncmaster)
+			syncmaster = NULL;
+		kfree(hc);
+		return ret_err;
+	}
+
+	/* crate channels */
+	for (pt = 0; pt < hc->ports; pt++) {
+		if (Port_cnt >= MAX_PORTS) {
+			printk(KERN_ERR "too many ports (max=%d).\n",
+				MAX_PORTS);
+			ret_err = -EINVAL;
+			goto free_card;
+		}
+		if (hc->type == 1)
+			ret_err = init_e1_port(hc, m);
+		else
+			ret_err = init_multi_port(hc, pt);
+		if (debug & DEBUG_HFCMULTI_INIT)
+			printk(KERN_DEBUG
+			    "%s: Registering D-channel, card(%d) port(%d)"
+			    "result %d\n",
+			    __func__, HFC_cnt + 1, pt, ret_err);
+
+		if (ret_err) {
+			while (pt) { /* release already registered ports */
+				pt--;
+				release_port(hc, hc->chan[(pt << 2) + 2].dch);
+			}
+			goto free_card;
+		}
+		Port_cnt++;
+	}
+
+	/* disp switches */
+	switch (m->dip_type) {
+	case DIP_4S:
+		/*
+		 * get DIP Setting for beroNet 1S/2S/4S cards
+		 *  check if Port Jumper config matches
+		 * module param 'protocol'
+		 * DIP Setting: (collect GPIO 13/14/15 (R_GPIO_IN1) +
+		 * GPI 19/23 (R_GPI_IN2))
+		 */
+		dips = ((~HFC_inb(hc, R_GPIO_IN1) & 0xE0) >> 5) |
+			((~HFC_inb(hc, R_GPI_IN2) & 0x80) >> 3) |
+			(~HFC_inb(hc, R_GPI_IN2) & 0x08);
+
+		/* Port mode (TE/NT) jumpers */
+		pmj = ((HFC_inb(hc, R_GPI_IN3) >> 4)  & 0xf);
+
+		if (test_bit(HFC_CHIP_B410P, &hc->chip))
+			pmj = ~pmj & 0xf;
+
+		printk(KERN_INFO "%s: %s DIPs(0x%x) jumpers(0x%x)\n",
+			m->vendor_name, m->card_name, dips, pmj);
+		break;
+	case DIP_8S:
+		/*
+		 * get DIP Setting for beroNet 8S0+ cards
+		 *
+		 * enable PCI auxbridge function
+		 */
+		HFC_outb(hc, R_BRG_PCM_CFG, 1 | V_PCM_CLK);
+		/* prepare access to auxport */
+		outw(0x4000, hc->pci_iobase + 4);
+		/*
+		 * some dummy reads are required to
+		 * read valid DIP switch data
+		 */
+		dips = inb(hc->pci_iobase);
+		dips = inb(hc->pci_iobase);
+		dips = inb(hc->pci_iobase);
+		dips = ~inb(hc->pci_iobase) & 0x3F;
+		outw(0x0, hc->pci_iobase + 4);
+		/* disable PCI auxbridge function */
+		HFC_outb(hc, R_BRG_PCM_CFG, V_PCM_CLK);
+		printk(KERN_INFO "%s: %s DIPs(0x%x)\n",
+		    m->vendor_name, m->card_name, dips);
+		break;
+	case DIP_E1:
+		/*
+		 * get DIP Setting for beroNet E1 cards
+		 * DIP Setting: collect GPI 4/5/6/7 (R_GPI_IN0)
+		 */
+		dips = (~HFC_inb(hc, R_GPI_IN0) & 0xF0)>>4;
+		printk(KERN_INFO "%s: %s DIPs(0x%x)\n",
+		    m->vendor_name, m->card_name, dips);
+		break;
+	}
+
+	/* add to list */
+	spin_lock_irqsave(&HFClock, flags);
+	list_add_tail(&hc->list, &HFClist);
+	spin_unlock_irqrestore(&HFClock, flags);
+
+	/* initialize hardware */
+	ret_err = init_card(hc);
+	if (ret_err) {
+		printk(KERN_ERR "init card returns %d\n", ret_err);
+		release_card(hc);
+		return ret_err;
+	}
+
+	/* start IRQ and return */
+	spin_lock_irqsave(&hc->lock, flags);
+	enable_hwirq(hc);
+	spin_unlock_irqrestore(&hc->lock, flags);
+	return 0;
+
+free_card:
+	release_io_hfcmulti(hc);
+	if (hc == syncmaster)
+		syncmaster = NULL;
+	kfree(hc);
+	return ret_err;
+}
+
+static void __devexit hfc_remove_pci(struct pci_dev *pdev)
+{
+	struct hfc_multi	*card = pci_get_drvdata(pdev);
+	u_long			flags;
+
+	if (debug)
+		printk(KERN_INFO "removing hfc_multi card vendor:%x "
+		    "device:%x subvendor:%x subdevice:%x\n",
+		    pdev->vendor, pdev->device,
+		    pdev->subsystem_vendor, pdev->subsystem_device);
+
+	if (card) {
+		spin_lock_irqsave(&HFClock, flags);
+		release_card(card);
+		spin_unlock_irqrestore(&HFClock, flags);
+	}  else {
+		if (debug)
+			printk(KERN_WARNING "%s: drvdata allready removed\n",
+			    __func__);
+	}
+}
+
+#define	VENDOR_CCD	"Cologne Chip AG"
+#define	VENDOR_BN	"beroNet GmbH"
+#define	VENDOR_DIG	"Digium Inc."
+#define VENDOR_JH	"Junghanns.NET GmbH"
+#define VENDOR_PRIM	"PrimuX"
+
+static const struct hm_map hfcm_map[] = {
+/*0*/	{VENDOR_BN, "HFC-1S Card (mini PCI)", 4, 1, 1, 3, 0, DIP_4S, 0},
+/*1*/	{VENDOR_BN, "HFC-2S Card", 4, 2, 1, 3, 0, DIP_4S},
+/*2*/	{VENDOR_BN, "HFC-2S Card (mini PCI)", 4, 2, 1, 3, 0, DIP_4S, 0},
+/*3*/	{VENDOR_BN, "HFC-4S Card", 4, 4, 1, 2, 0, DIP_4S, 0},
+/*4*/	{VENDOR_BN, "HFC-4S Card (mini PCI)", 4, 4, 1, 2, 0, 0, 0},
+/*5*/	{VENDOR_CCD, "HFC-4S Eval (old)", 4, 4, 0, 0, 0, 0, 0},
+/*6*/	{VENDOR_CCD, "HFC-4S IOB4ST", 4, 4, 1, 2, 0, 0, 0},
+/*7*/	{VENDOR_CCD, "HFC-4S", 4, 4, 1, 2, 0, 0, 0},
+/*8*/	{VENDOR_DIG, "HFC-4S Card", 4, 4, 0, 2, 0, 0, HFC_IO_MODE_REGIO},
+/*9*/	{VENDOR_CCD, "HFC-4S Swyx 4xS0 SX2 QuadBri", 4, 4, 1, 2, 0, 0, 0},
+/*10*/	{VENDOR_JH, "HFC-4S (junghanns 2.0)", 4, 4, 1, 2, 0, 0, 0},
+/*11*/	{VENDOR_PRIM, "HFC-2S Primux Card", 4, 2, 0, 0, 0, 0, 0},
+
+/*12*/	{VENDOR_BN, "HFC-8S Card", 8, 8, 1, 0, 0, 0, 0},
+/*13*/	{VENDOR_BN, "HFC-8S Card (+)", 8, 8, 1, 8, 0, DIP_8S,
+		HFC_IO_MODE_REGIO},
+/*14*/	{VENDOR_CCD, "HFC-8S Eval (old)", 8, 8, 0, 0, 0, 0, 0},
+/*15*/	{VENDOR_CCD, "HFC-8S IOB4ST Recording", 8, 8, 1, 0, 0, 0, 0},
+
+/*16*/	{VENDOR_CCD, "HFC-8S IOB8ST", 8, 8, 1, 0, 0, 0, 0},
+/*17*/	{VENDOR_CCD, "HFC-8S", 8, 8, 1, 0, 0, 0, 0},
+/*18*/	{VENDOR_CCD, "HFC-8S", 8, 8, 1, 0, 0, 0, 0},
+
+/*19*/	{VENDOR_BN, "HFC-E1 Card", 1, 1, 0, 1, 0, DIP_E1, 0},
+/*20*/	{VENDOR_BN, "HFC-E1 Card (mini PCI)", 1, 1, 0, 1, 0, 0, 0},
+/*21*/	{VENDOR_BN, "HFC-E1+ Card (Dual)", 1, 1, 0, 1, 0, DIP_E1, 0},
+/*22*/	{VENDOR_BN, "HFC-E1 Card (Dual)", 1, 1, 0, 1, 0, DIP_E1, 0},
+
+/*23*/	{VENDOR_CCD, "HFC-E1 Eval (old)", 1, 1, 0, 0, 0, 0, 0},
+/*24*/	{VENDOR_CCD, "HFC-E1 IOB1E1", 1, 1, 0, 1, 0, 0, 0},
+/*25*/	{VENDOR_CCD, "HFC-E1", 1, 1, 0, 1, 0, 0, 0},
+
+/*26*/	{VENDOR_CCD, "HFC-4S Speech Design", 4, 4, 0, 0, 0, 0,
+		HFC_IO_MODE_PLXSD},
+/*27*/	{VENDOR_CCD, "HFC-E1 Speech Design", 1, 1, 0, 0, 0, 0,
+		HFC_IO_MODE_PLXSD},
+/*28*/	{VENDOR_CCD, "HFC-4S OpenVox", 4, 4, 1, 0, 0, 0, 0},
+/*29*/	{VENDOR_CCD, "HFC-2S OpenVox", 4, 2, 1, 0, 0, 0, 0},
+/*30*/	{VENDOR_CCD, "HFC-8S OpenVox", 8, 8, 1, 0, 0, 0, 0},
+};
+
+#undef H
+#define H(x)	((unsigned long)&hfcm_map[x])
+static struct pci_device_id hfmultipci_ids[] __devinitdata = {
+
+	/* Cards with HFC-4S Chip */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_BN1SM, 0, 0, H(0)}, /* BN1S mini PCI */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_BN2S, 0, 0, H(1)}, /* BN2S */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_BN2SM, 0, 0, H(2)}, /* BN2S mini PCI */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_BN4S, 0, 0, H(3)}, /* BN4S */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_BN4SM, 0, 0, H(4)}, /* BN4S mini PCI */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_DEVICE_ID_CCD_HFC4S, 0, 0, H(5)}, /* Old Eval */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_IOB4ST, 0, 0, H(6)}, /* IOB4ST */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_HFC4S, 0, 0, H(7)}, /* 4S */
+	{ PCI_VENDOR_ID_DIGIUM, PCI_DEVICE_ID_DIGIUM_HFC4S,
+		PCI_VENDOR_ID_DIGIUM, PCI_DEVICE_ID_DIGIUM_HFC4S, 0, 0, H(8)},
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_SWYX4S, 0, 0, H(9)}, /* 4S Swyx */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_JH4S20, 0, 0, H(10)},
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_PMX2S, 0, 0, H(11)}, /* Primux */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_OV4S, 0, 0, H(28)}, /* OpenVox 4 */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_OV2S, 0, 0, H(29)}, /* OpenVox 2 */
+
+	/* Cards with HFC-8S Chip */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
+	PCI_SUBDEVICE_ID_CCD_BN8S, 0, 0, H(12)}, /* BN8S */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
+	PCI_SUBDEVICE_ID_CCD_BN8SP, 0, 0, H(13)}, /* BN8S+ */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
+	PCI_DEVICE_ID_CCD_HFC8S, 0, 0, H(14)}, /* old Eval */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
+	PCI_SUBDEVICE_ID_CCD_IOB8STR, 0, 0, H(15)},
+	    /* IOB8ST Recording */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_IOB8ST, 0, 0, H(16)}, /* IOB8ST  */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_IOB8ST_1, 0, 0, H(17)}, /* IOB8ST  */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_HFC8S, 0, 0, H(18)}, /* 8S */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_OV8S, 0, 0, H(30)}, /* OpenVox 8 */
+
+
+	/* Cards with HFC-E1 Chip */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_BNE1, 0, 0, H(19)}, /* BNE1 */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_BNE1M, 0, 0, H(20)}, /* BNE1 mini PCI */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_BNE1DP, 0, 0, H(21)}, /* BNE1 + (Dual) */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_BNE1D, 0, 0, H(22)}, /* BNE1 (Dual) */
+
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
+		PCI_DEVICE_ID_CCD_HFCE1, 0, 0, H(23)}, /* Old Eval */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_IOB1E1, 0, 0, H(24)}, /* IOB1E1 */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_HFCE1, 0, 0, H(25)}, /* E1 */
+
+	{ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_SPD4S, 0, 0, H(26)}, /* PLX PCI Bridge */
+	{ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_CCD,
+		PCI_SUBDEVICE_ID_CCD_SPDE1, 0, 0, H(27)}, /* PLX PCI Bridge */
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC4S, PCI_ANY_ID, PCI_ANY_ID,
+		0, 0, 0},
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFC8S, PCI_ANY_ID, PCI_ANY_ID,
+		0, 0, 0},
+	{ PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_HFCE1, PCI_ANY_ID, PCI_ANY_ID,
+		0, 0, 0},
+	{0, }
+};
+#undef H
+
+MODULE_DEVICE_TABLE(pci, hfmultipci_ids);
+
+static int
+hfcmulti_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct hm_map	*m = (struct hm_map *)ent->driver_data;
+	int		ret;
+
+	if (m == NULL) {
+		if (ent->vendor == PCI_VENDOR_ID_CCD)
+			if (ent->device == PCI_DEVICE_ID_CCD_HFC4S ||
+			    ent->device == PCI_DEVICE_ID_CCD_HFC8S ||
+			    ent->device == PCI_DEVICE_ID_CCD_HFCE1)
+				printk(KERN_ERR
+				    "unknown HFC multiport controller "
+				    "(vendor:%x device:%x subvendor:%x "
+				    "subdevice:%x) Please contact the "
+				    "driver maintainer for support.\n",
+				    ent->vendor, ent->device,
+				    ent->subvendor, ent->subdevice);
+		return -ENODEV;
+	}
+	ret = hfcmulti_init(pdev, ent);
+	if (ret)
+		return ret;
+	HFC_cnt++;
+	printk(KERN_INFO "%d devices registered\n", HFC_cnt);
+	return 0;
+}
+
+static struct pci_driver hfcmultipci_driver = {
+	.name		= "hfc_multi",
+	.probe		= hfcmulti_probe,
+	.remove		= __devexit_p(hfc_remove_pci),
+	.id_table	= hfmultipci_ids,
+};
+
+static void __exit
+HFCmulti_cleanup(void)
+{
+	struct hfc_multi *card, *next;
+
+	/* unload interrupt function symbol */
+	if (hfc_interrupt)
+		symbol_put(ztdummy_extern_interrupt);
+	if (register_interrupt)
+		symbol_put(ztdummy_register_interrupt);
+	if (unregister_interrupt) {
+		if (interrupt_registered) {
+			interrupt_registered = 0;
+			unregister_interrupt();
+		}
+		symbol_put(ztdummy_unregister_interrupt);
+	}
+
+	list_for_each_entry_safe(card, next, &HFClist, list)
+		release_card(card);
+	/* get rid of all devices of this driver */
+	pci_unregister_driver(&hfcmultipci_driver);
+}
+
+static int __init
+HFCmulti_init(void)
+{
+	int err;
+
+#ifdef IRQ_DEBUG
+	printk(KERN_ERR "%s: IRQ_DEBUG IS ENABLED!\n", __func__);
+#endif
+
+	spin_lock_init(&HFClock);
+	spin_lock_init(&plx_lock);
+
+	if (debug & DEBUG_HFCMULTI_INIT)
+		printk(KERN_DEBUG "%s: init entered\n", __func__);
+
+#ifdef __BIG_ENDIAN
+#error "not running on big endian machines now"
+#endif
+	hfc_interrupt = symbol_get(ztdummy_extern_interrupt);
+	register_interrupt = symbol_get(ztdummy_register_interrupt);
+	unregister_interrupt = symbol_get(ztdummy_unregister_interrupt);
+	printk(KERN_INFO "mISDN: HFC-multi driver %s\n",
+	    hfcmulti_revision);
+
+	switch (poll) {
+	case 0:
+		poll_timer = 6;
+		poll = 128;
+		break;
+		/*
+		 * wenn dieses break nochmal verschwindet,
+		 * gibt es heisse ohren :-)
+		 * "without the break you will get hot ears ???"
+		 */
+	case 8:
+		poll_timer = 2;
+		break;
+	case 16:
+		poll_timer = 3;
+		break;
+	case 32:
+		poll_timer = 4;
+		break;
+	case 64:
+		poll_timer = 5;
+		break;
+	case 128:
+		poll_timer = 6;
+		break;
+	case 256:
+		poll_timer = 7;
+		break;
+	default:
+		printk(KERN_ERR
+		    "%s: Wrong poll value (%d).\n", __func__, poll);
+		err = -EINVAL;
+		return err;
+
+	}
+
+	err = pci_register_driver(&hfcmultipci_driver);
+	if (err < 0) {
+		printk(KERN_ERR "error registering pci driver: %x\n", err);
+		if (hfc_interrupt)
+			symbol_put(ztdummy_extern_interrupt);
+		if (register_interrupt)
+			symbol_put(ztdummy_register_interrupt);
+		if (unregister_interrupt) {
+			if (interrupt_registered) {
+				interrupt_registered = 0;
+				unregister_interrupt();
+			}
+			symbol_put(ztdummy_unregister_interrupt);
+		}
+		return err;
+	}
+	return 0;
+}
+
+
+module_init(HFCmulti_init);
+module_exit(HFCmulti_cleanup);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c3b1761aba26..ffe479ba0779 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1832,7 +1832,13 @@
 #define PCI_DEVICE_ID_MOXA_C320		0x3200
 
 #define PCI_VENDOR_ID_CCD		0x1397
+#define PCI_DEVICE_ID_CCD_HFC4S		0x08B4
+#define PCI_SUBDEVICE_ID_CCD_PMX2S	0x1234
+#define PCI_DEVICE_ID_CCD_HFC8S		0x16B8
 #define PCI_DEVICE_ID_CCD_2BD0		0x2bd0
+#define PCI_DEVICE_ID_CCD_HFCE1		0x30B1
+#define PCI_SUBDEVICE_ID_CCD_SPD4S	0x3136
+#define PCI_SUBDEVICE_ID_CCD_SPDE1	0x3137
 #define PCI_DEVICE_ID_CCD_B000		0xb000
 #define PCI_DEVICE_ID_CCD_B006		0xb006
 #define PCI_DEVICE_ID_CCD_B007		0xb007
@@ -1842,8 +1848,32 @@
 #define PCI_DEVICE_ID_CCD_B00B		0xb00b
 #define PCI_DEVICE_ID_CCD_B00C		0xb00c
 #define PCI_DEVICE_ID_CCD_B100		0xb100
+#define PCI_SUBDEVICE_ID_CCD_IOB4ST	0xB520
+#define PCI_SUBDEVICE_ID_CCD_IOB8STR	0xB521
+#define PCI_SUBDEVICE_ID_CCD_IOB8ST	0xB522
+#define PCI_SUBDEVICE_ID_CCD_IOB1E1	0xB523
+#define PCI_SUBDEVICE_ID_CCD_SWYX4S	0xB540
+#define PCI_SUBDEVICE_ID_CCD_JH4S20	0xB550
+#define PCI_SUBDEVICE_ID_CCD_IOB8ST_1	0xB552
+#define PCI_SUBDEVICE_ID_CCD_BN4S	0xB560
+#define PCI_SUBDEVICE_ID_CCD_BN8S	0xB562
+#define PCI_SUBDEVICE_ID_CCD_BNE1	0xB563
+#define PCI_SUBDEVICE_ID_CCD_BNE1D	0xB564
+#define PCI_SUBDEVICE_ID_CCD_BNE1DP	0xB565
+#define PCI_SUBDEVICE_ID_CCD_BN2S	0xB566
+#define PCI_SUBDEVICE_ID_CCD_BN1SM	0xB567
+#define PCI_SUBDEVICE_ID_CCD_BN4SM	0xB568
+#define PCI_SUBDEVICE_ID_CCD_BN2SM	0xB569
+#define PCI_SUBDEVICE_ID_CCD_BNE1M	0xB56A
+#define PCI_SUBDEVICE_ID_CCD_BN8SP	0xB56B
+#define PCI_SUBDEVICE_ID_CCD_HFC4S	0xB620
+#define PCI_SUBDEVICE_ID_CCD_HFC8S	0xB622
 #define PCI_DEVICE_ID_CCD_B700		0xb700
 #define PCI_DEVICE_ID_CCD_B701		0xb701
+#define PCI_SUBDEVICE_ID_CCD_HFCE1	0xC523
+#define PCI_SUBDEVICE_ID_CCD_OV2S	0xE884
+#define PCI_SUBDEVICE_ID_CCD_OV4S	0xE888
+#define PCI_SUBDEVICE_ID_CCD_OV8S	0xE998
 
 #define PCI_VENDOR_ID_EXAR		0x13a8
 #define PCI_DEVICE_ID_EXAR_XR17C152	0x0152
@@ -2523,6 +2553,9 @@
 
 #define PCI_VENDOR_ID_3COM_2		0xa727
 
+#define PCI_VENDOR_ID_DIGIUM		0xd161
+#define PCI_DEVICE_ID_DIGIUM_HFC4S	0xb410
+
 #define PCI_SUBVENDOR_ID_EXSYS		0xd84d
 #define PCI_SUBDEVICE_ID_EXSYS_4014	0x4014
 #define PCI_SUBDEVICE_ID_EXSYS_4055	0x4055
-- 
cgit v1.2.3


From 93bc4e89c260d91576840c4881d1066d84ccd422 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Sat, 26 Jul 2008 17:49:33 -0700
Subject: netfilter: fix double-free and use-after free

As suggested by Patrick McHardy, introduce a __krealloc() that doesn't
free the original buffer to fix a double-free and use-after-free bug
introduced by me in netfilter that uses RCU.

Reported-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Tested-by: Dieter Ries <clip2@gmx.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/slab.h                |  1 +
 mm/util.c                           | 44 ++++++++++++++++++++++++++++---------
 net/netfilter/nf_conntrack_extend.c |  2 +-
 3 files changed, 36 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9aa90a6f20e0..be6f1d40b66a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -96,6 +96,7 @@ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
 /*
  * Common kmalloc functions provided by all allocators
  */
+void * __must_check __krealloc(const void *, size_t, gfp_t);
 void * __must_check krealloc(const void *, size_t, gfp_t);
 void kfree(const void *);
 size_t ksize(const void *);
diff --git a/mm/util.c b/mm/util.c
index 8f18683825bc..6ef9e9943f62 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -68,25 +68,22 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp)
 EXPORT_SYMBOL(kmemdup);
 
 /**
- * krealloc - reallocate memory. The contents will remain unchanged.
+ * __krealloc - like krealloc() but don't free @p.
  * @p: object to reallocate memory for.
  * @new_size: how many bytes of memory are required.
  * @flags: the type of memory to allocate.
  *
- * The contents of the object pointed to are preserved up to the
- * lesser of the new and old sizes.  If @p is %NULL, krealloc()
- * behaves exactly like kmalloc().  If @size is 0 and @p is not a
- * %NULL pointer, the object pointed to is freed.
+ * This function is like krealloc() except it never frees the originally
+ * allocated buffer. Use this if you don't want to free the buffer immediately
+ * like, for example, with RCU.
  */
-void *krealloc(const void *p, size_t new_size, gfp_t flags)
+void *__krealloc(const void *p, size_t new_size, gfp_t flags)
 {
 	void *ret;
 	size_t ks = 0;
 
-	if (unlikely(!new_size)) {
-		kfree(p);
+	if (unlikely(!new_size))
 		return ZERO_SIZE_PTR;
-	}
 
 	if (p)
 		ks = ksize(p);
@@ -95,10 +92,37 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags)
 		return (void *)p;
 
 	ret = kmalloc_track_caller(new_size, flags);
-	if (ret && p) {
+	if (ret && p)
 		memcpy(ret, p, ks);
+
+	return ret;
+}
+EXPORT_SYMBOL(__krealloc);
+
+/**
+ * krealloc - reallocate memory. The contents will remain unchanged.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * The contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes.  If @p is %NULL, krealloc()
+ * behaves exactly like kmalloc().  If @size is 0 and @p is not a
+ * %NULL pointer, the object pointed to is freed.
+ */
+void *krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+	void *ret;
+
+	if (unlikely(!new_size)) {
 		kfree(p);
+		return ZERO_SIZE_PTR;
 	}
+
+	ret = __krealloc(p, new_size, flags);
+	if (ret && p != ret)
+		kfree(p);
+
 	return ret;
 }
 EXPORT_SYMBOL(krealloc);
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 3469bc71a385..c956ef7eeecb 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -95,7 +95,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 	newlen = newoff + t->len;
 	rcu_read_unlock();
 
-	new = krealloc(ct->ext, newlen, gfp);
+	new = __krealloc(ct->ext, newlen, gfp);
 	if (!new)
 		return NULL;
 
-- 
cgit v1.2.3


From d2d9648ec6858e19d16a0b16da62534e85888653 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 1 Jul 2008 14:16:09 +0200
Subject: [PATCH] reuse xxx_fifo_fops for xxx_pipe_fops

Merge fifo and pipe file_operations.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fifo.c          |  8 ++++----
 fs/pipe.c          | 51 ++++++++-------------------------------------------
 include/linux/fs.h |  6 +++---
 3 files changed, 15 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fifo.c b/fs/fifo.c
index 9785e36f81e7..987bf9411495 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -57,7 +57,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	 *  POSIX.1 says that O_NONBLOCK means return with the FIFO
 	 *  opened, even when there is no process writing the FIFO.
 	 */
-		filp->f_op = &read_fifo_fops;
+		filp->f_op = &read_pipefifo_fops;
 		pipe->r_counter++;
 		if (pipe->readers++ == 0)
 			wake_up_partner(inode);
@@ -86,7 +86,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 		if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
 			goto err;
 
-		filp->f_op = &write_fifo_fops;
+		filp->f_op = &write_pipefifo_fops;
 		pipe->w_counter++;
 		if (!pipe->writers++)
 			wake_up_partner(inode);
@@ -105,7 +105,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	 *  This implementation will NEVER block on a O_RDWR open, since
 	 *  the process can at least talk to itself.
 	 */
-		filp->f_op = &rdwr_fifo_fops;
+		filp->f_op = &rdwr_pipefifo_fops;
 
 		pipe->readers++;
 		pipe->writers++;
@@ -151,5 +151,5 @@ err_nocleanup:
  * depending on the access mode of the file...
  */
 const struct file_operations def_fifo_fops = {
-	.open		= fifo_open,	/* will set read or write pipe_fops */
+	.open		= fifo_open,	/* will set read_ or write_pipefifo_fops */
 };
diff --git a/fs/pipe.c b/fs/pipe.c
index 10c4e9aa5c49..fcba6542b8d0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -777,45 +777,10 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
 /*
  * The file_operations structs are not static because they
  * are also used in linux/fs/fifo.c to do operations on FIFOs.
+ *
+ * Pipes reuse fifos' file_operations structs.
  */
-const struct file_operations read_fifo_fops = {
-	.llseek		= no_llseek,
-	.read		= do_sync_read,
-	.aio_read	= pipe_read,
-	.write		= bad_pipe_w,
-	.poll		= pipe_poll,
-	.unlocked_ioctl	= pipe_ioctl,
-	.open		= pipe_read_open,
-	.release	= pipe_read_release,
-	.fasync		= pipe_read_fasync,
-};
-
-const struct file_operations write_fifo_fops = {
-	.llseek		= no_llseek,
-	.read		= bad_pipe_r,
-	.write		= do_sync_write,
-	.aio_write	= pipe_write,
-	.poll		= pipe_poll,
-	.unlocked_ioctl	= pipe_ioctl,
-	.open		= pipe_write_open,
-	.release	= pipe_write_release,
-	.fasync		= pipe_write_fasync,
-};
-
-const struct file_operations rdwr_fifo_fops = {
-	.llseek		= no_llseek,
-	.read		= do_sync_read,
-	.aio_read	= pipe_read,
-	.write		= do_sync_write,
-	.aio_write	= pipe_write,
-	.poll		= pipe_poll,
-	.unlocked_ioctl	= pipe_ioctl,
-	.open		= pipe_rdwr_open,
-	.release	= pipe_rdwr_release,
-	.fasync		= pipe_rdwr_fasync,
-};
-
-static const struct file_operations read_pipe_fops = {
+const struct file_operations read_pipefifo_fops = {
 	.llseek		= no_llseek,
 	.read		= do_sync_read,
 	.aio_read	= pipe_read,
@@ -827,7 +792,7 @@ static const struct file_operations read_pipe_fops = {
 	.fasync		= pipe_read_fasync,
 };
 
-static const struct file_operations write_pipe_fops = {
+const struct file_operations write_pipefifo_fops = {
 	.llseek		= no_llseek,
 	.read		= bad_pipe_r,
 	.write		= do_sync_write,
@@ -839,7 +804,7 @@ static const struct file_operations write_pipe_fops = {
 	.fasync		= pipe_write_fasync,
 };
 
-static const struct file_operations rdwr_pipe_fops = {
+const struct file_operations rdwr_pipefifo_fops = {
 	.llseek		= no_llseek,
 	.read		= do_sync_read,
 	.aio_read	= pipe_read,
@@ -927,7 +892,7 @@ static struct inode * get_pipe_inode(void)
 	inode->i_pipe = pipe;
 
 	pipe->readers = pipe->writers = 1;
-	inode->i_fop = &rdwr_pipe_fops;
+	inode->i_fop = &rdwr_pipefifo_fops;
 
 	/*
 	 * Mark the inode dirty from the very beginning,
@@ -978,7 +943,7 @@ struct file *create_write_pipe(int flags)
 	d_instantiate(dentry, inode);
 
 	err = -ENFILE;
-	f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipe_fops);
+	f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
 	if (!f)
 		goto err_dentry;
 	f->f_mapping = inode->i_mapping;
@@ -1020,7 +985,7 @@ struct file *create_read_pipe(struct file *wrf, int flags)
 
 	f->f_pos = 0;
 	f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-	f->f_op = &read_pipe_fops;
+	f->f_op = &read_pipefifo_fops;
 	f->f_mode = FMODE_READ;
 	f->f_version = 0;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 53d2edb709b3..7721a2ac9c0e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1696,9 +1696,9 @@ extern void init_special_inode(struct inode *, umode_t, dev_t);
 extern void make_bad_inode(struct inode *);
 extern int is_bad_inode(struct inode *);
 
-extern const struct file_operations read_fifo_fops;
-extern const struct file_operations write_fifo_fops;
-extern const struct file_operations rdwr_fifo_fops;
+extern const struct file_operations read_pipefifo_fops;
+extern const struct file_operations write_pipefifo_fops;
+extern const struct file_operations rdwr_pipefifo_fops;
 
 extern int fs_may_remount_ro(struct super_block *);
 
-- 
cgit v1.2.3


From 734550921e9b7ab924a43aa3d0bd4239dac4fbf1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 14 Jul 2008 21:22:20 -0400
Subject: [PATCH] beginning of sysctl cleanup - ctl_table_set

New object: set of sysctls [currently - root and per-net-ns].
Contains: pointer to parent set, list of tables and "should I see this set?"
method (->is_seen(set)).
Current lists of tables are subsumed by that; net-ns contains such a beast.
->lookup() for ctl_table_root returns pointer to ctl_table_set instead of
that to ->list of that ctl_table_set.

[folded compile fixes by rdd for configs without sysctl]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sysctl.h      | 15 +++++++++++++--
 include/net/net_namespace.h |  4 +++-
 kernel/sysctl.c             | 41 +++++++++++++++++++++++++++++++----------
 net/sysctl_net.c            | 22 ++++++++++------------
 4 files changed, 57 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 24141b4d1a11..c1e0cf408af9 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -947,6 +947,16 @@ struct ctl_table;
 struct nsproxy;
 struct ctl_table_root;
 
+struct ctl_table_set {
+	struct list_head list;
+	struct ctl_table_set *parent;
+	int (*is_seen)(struct ctl_table_set *);
+};
+
+extern void setup_sysctl_set(struct ctl_table_set *p,
+	struct ctl_table_set *parent,
+	int (*is_seen)(struct ctl_table_set *));
+
 extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev);
 extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
 						struct ctl_table_header *prev);
@@ -1049,8 +1059,8 @@ struct ctl_table
 
 struct ctl_table_root {
 	struct list_head root_list;
-	struct list_head header_list;
-	struct list_head *(*lookup)(struct ctl_table_root *root,
+	struct ctl_table_set default_set;
+	struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
 					   struct nsproxy *namespaces);
 	int (*permissions)(struct ctl_table_root *root,
 			struct nsproxy *namespaces, struct ctl_table *table);
@@ -1066,6 +1076,7 @@ struct ctl_table_header
 	struct completion *unregistering;
 	struct ctl_table *ctl_table_arg;
 	struct ctl_table_root *root;
+	struct ctl_table_set *set;
 };
 
 /* struct ctl_path describes where in the hierarchy a table is added */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3855620b78a9..a8eb43cf0c7e 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -38,7 +38,9 @@ struct net {
 	struct proc_dir_entry 	*proc_net;
 	struct proc_dir_entry 	*proc_net_stat;
 
-	struct list_head	sysctl_table_headers;
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_set	sysctls;
+#endif
 
 	struct net_device       *loopback_dev;          /* The loopback */
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 35a50db9b6ce..8ee4a0619fbb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -160,12 +160,13 @@ static struct ctl_table root_table[];
 static struct ctl_table_root sysctl_table_root;
 static struct ctl_table_header root_table_header = {
 	.ctl_table = root_table,
-	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.header_list),
+	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
 	.root = &sysctl_table_root,
+	.set = &sysctl_table_root.default_set,
 };
 static struct ctl_table_root sysctl_table_root = {
 	.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
-	.header_list = LIST_HEAD_INIT(root_table_header.ctl_entry),
+	.default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
 };
 
 static struct ctl_table kern_table[];
@@ -1403,14 +1404,20 @@ void sysctl_head_finish(struct ctl_table_header *head)
 	spin_unlock(&sysctl_lock);
 }
 
+static struct ctl_table_set *
+lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
+{
+	struct ctl_table_set *set = &root->default_set;
+	if (root->lookup)
+		set = root->lookup(root, namespaces);
+	return set;
+}
+
 static struct list_head *
 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
 {
-	struct list_head *header_list;
-	header_list = &root->header_list;
-	if (root->lookup)
-		header_list = root->lookup(root, namespaces);
-	return header_list;
+	struct ctl_table_set *set = lookup_header_set(root, namespaces);
+	return &set->list;
 }
 
 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
@@ -1720,7 +1727,6 @@ struct ctl_table_header *__register_sysctl_paths(
 	struct nsproxy *namespaces,
 	const struct ctl_path *path, struct ctl_table *table)
 {
-	struct list_head *header_list;
 	struct ctl_table_header *header;
 	struct ctl_table *new, **prevp;
 	unsigned int n, npath;
@@ -1772,8 +1778,8 @@ struct ctl_table_header *__register_sysctl_paths(
 	}
 #endif
 	spin_lock(&sysctl_lock);
-	header_list = lookup_header_list(root, namespaces);
-	list_add_tail(&header->ctl_entry, header_list);
+	header->set = lookup_header_set(root, namespaces);
+	list_add_tail(&header->ctl_entry, &header->set->list);
 	spin_unlock(&sysctl_lock);
 
 	return header;
@@ -1832,6 +1838,15 @@ void unregister_sysctl_table(struct ctl_table_header * header)
 	kfree(header);
 }
 
+void setup_sysctl_set(struct ctl_table_set *p,
+	struct ctl_table_set *parent,
+	int (*is_seen)(struct ctl_table_set *))
+{
+	INIT_LIST_HEAD(&p->list);
+	p->parent = parent ? parent : &sysctl_table_root.default_set;
+	p->is_seen = is_seen;
+}
+
 #else /* !CONFIG_SYSCTL */
 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
 {
@@ -1848,6 +1863,12 @@ void unregister_sysctl_table(struct ctl_table_header * table)
 {
 }
 
+void setup_sysctl_set(struct ctl_table_set *p,
+	struct ctl_table_set *parent,
+	int (*is_seen)(struct ctl_table_set *))
+{
+}
+
 #endif /* CONFIG_SYSCTL */
 
 /*
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 63ada437fc2f..cefbc367d8be 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -29,10 +29,15 @@
 #include <linux/if_tr.h>
 #endif
 
-static struct list_head *
+static struct ctl_table_set *
 net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
 {
-	return &namespaces->net_ns->sysctl_table_headers;
+	return &namespaces->net_ns->sysctls;
+}
+
+static int is_seen(struct ctl_table_set *set)
+{
+	return &current->nsproxy->net_ns->sysctls == set;
 }
 
 /* Return standard mode bits for table entry. */
@@ -53,13 +58,6 @@ static struct ctl_table_root net_sysctl_root = {
 	.permissions = net_ctl_permissions,
 };
 
-static LIST_HEAD(net_sysctl_ro_tables);
-static struct list_head *net_ctl_ro_header_lookup(struct ctl_table_root *root,
-		struct nsproxy *namespaces)
-{
-	return &net_sysctl_ro_tables;
-}
-
 static int net_ctl_ro_header_perms(struct ctl_table_root *root,
 		struct nsproxy *namespaces, struct ctl_table *table)
 {
@@ -70,19 +68,18 @@ static int net_ctl_ro_header_perms(struct ctl_table_root *root,
 }
 
 static struct ctl_table_root net_sysctl_ro_root = {
-	.lookup = net_ctl_ro_header_lookup,
 	.permissions = net_ctl_ro_header_perms,
 };
 
 static int sysctl_net_init(struct net *net)
 {
-	INIT_LIST_HEAD(&net->sysctl_table_headers);
+	setup_sysctl_set(&net->sysctls, NULL, is_seen);
 	return 0;
 }
 
 static void sysctl_net_exit(struct net *net)
 {
-	WARN_ON(!list_empty(&net->sysctl_table_headers));
+	WARN_ON(!list_empty(&net->sysctls.list));
 	return;
 }
 
@@ -98,6 +95,7 @@ static __init int sysctl_init(void)
 	if (ret)
 		goto out;
 	register_sysctl_root(&net_sysctl_root);
+	setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL, NULL);
 	register_sysctl_root(&net_sysctl_ro_root);
 out:
 	return ret;
-- 
cgit v1.2.3


From f7e6ced4061da509f737541ca4dbd44d83a6e82f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 15 Jul 2008 01:44:23 -0400
Subject: [PATCH] allow delayed freeing of ctl_table_header

Refcount the sucker; instead of freeing it by the end of unregistration
just drop the refcount and free only when it hits zero.  Make sure that
we _always_ make ->unregistering non-NULL in start_unregistering().

That allows anybody to get a reference to such puppy, preventing its
freeing and reuse.  It does *not* block unregistration.  Anybody who
holds such a reference can
	* try to grab a "use" reference (ctl_head_grab()); that will
succeeds if and only if it hadn't entered unregistration yet.  If it
succeeds, we can use it in all normal ways until we release the "use"
reference (with ctl_head_finish()).  Note that this relies on having
->unregistering become non-NULL in all cases when one starts to unregister
the sucker.
	* keep pointers to ctl_table entries; they *can* be freed if
the entire thing is unregistered.  However, if ctl_head_grab() succeeds,
we know that unregistration had not happened (and will not happen until
ctl_head_finish()) and such pointers can be used safely.

IOW, now we can have inodes under /proc/sys keep references to ctl_table
entries, protecting them with references to ctl_table_header and
grabbing the latter for the duration of operations that require access
to ctl_table.  That won't cause deadlocks, since unregistration will not
be stopped by mere keeping a reference to ctl_table_header.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sysctl.h |  6 ++++++
 kernel/sysctl.c        | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index c1e0cf408af9..956264d09ba0 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -957,6 +957,11 @@ extern void setup_sysctl_set(struct ctl_table_set *p,
 	struct ctl_table_set *parent,
 	int (*is_seen)(struct ctl_table_set *));
 
+struct ctl_table_header;
+
+extern void sysctl_head_get(struct ctl_table_header *);
+extern void sysctl_head_put(struct ctl_table_header *);
+extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *);
 extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev);
 extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
 						struct ctl_table_header *prev);
@@ -1073,6 +1078,7 @@ struct ctl_table_header
 	struct ctl_table *ctl_table;
 	struct list_head ctl_entry;
 	int used;
+	int count;
 	struct completion *unregistering;
 	struct ctl_table *ctl_table_arg;
 	struct ctl_table_root *root;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8ee4a0619fbb..60d9357e7172 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1387,6 +1387,9 @@ static void start_unregistering(struct ctl_table_header *p)
 		spin_unlock(&sysctl_lock);
 		wait_for_completion(&wait);
 		spin_lock(&sysctl_lock);
+	} else {
+		/* anything non-NULL; we'll never dereference it */
+		p->unregistering = ERR_PTR(-EINVAL);
 	}
 	/*
 	 * do not remove from the list until nobody holds it; walking the
@@ -1395,6 +1398,32 @@ static void start_unregistering(struct ctl_table_header *p)
 	list_del_init(&p->ctl_entry);
 }
 
+void sysctl_head_get(struct ctl_table_header *head)
+{
+	spin_lock(&sysctl_lock);
+	head->count++;
+	spin_unlock(&sysctl_lock);
+}
+
+void sysctl_head_put(struct ctl_table_header *head)
+{
+	spin_lock(&sysctl_lock);
+	if (!--head->count)
+		kfree(head);
+	spin_unlock(&sysctl_lock);
+}
+
+struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
+{
+	if (!head)
+		BUG();
+	spin_lock(&sysctl_lock);
+	if (!use_table(head))
+		head = ERR_PTR(-ENOENT);
+	spin_unlock(&sysctl_lock);
+	return head;
+}
+
 void sysctl_head_finish(struct ctl_table_header *head)
 {
 	if (!head)
@@ -1771,6 +1800,7 @@ struct ctl_table_header *__register_sysctl_paths(
 	header->unregistering = NULL;
 	header->root = root;
 	sysctl_set_parent(NULL, header->ctl_table);
+	header->count = 1;
 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
 	if (sysctl_check_table(namespaces, header->ctl_table)) {
 		kfree(header);
@@ -1834,8 +1864,9 @@ void unregister_sysctl_table(struct ctl_table_header * header)
 
 	spin_lock(&sysctl_lock);
 	start_unregistering(header);
+	if (!--header->count)
+		kfree(header);
 	spin_unlock(&sysctl_lock);
-	kfree(header);
 }
 
 void setup_sysctl_set(struct ctl_table_set *p,
@@ -1869,6 +1900,10 @@ void setup_sysctl_set(struct ctl_table_set *p,
 {
 }
 
+void sysctl_head_put(struct ctl_table_header *head)
+{
+}
+
 #endif /* CONFIG_SYSCTL */
 
 /*
-- 
cgit v1.2.3


From ae7edecc9b8810770a8e5cb9a466ea4bdcfa8401 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 15 Jul 2008 06:33:31 -0400
Subject: [PATCH] sysctl: keep track of tree relationships

In a sense, that's the heart of the series.  It's based on the following
property of the trees we are actually asked to add: they can be split into
stem that is already covered by registered trees and crown that is entirely
new.  IOW, if a/b and a/c/d are introduced by our tree, then a/c is also
introduced by it.

That allows to associate tree and table entry with each node in the union;
while directory nodes might be covered by many trees, only one will cover
the node by its crown.  And that will allow much saner logics for /proc/sys
in the next patches.  This patch introduces the data structures needed to
keep track of that.

When adding a sysctl table, we find a "parent" one.  Which is to say,
find the deepest node on its stem that already is present in one of the
tables from our table set or its ancestor sets.  That table will be our
parent and that node in it - attachment point.  Add our table to list
anchored in parent, have it refer the parent and contents of attachment
point.  Also remember where its crown lives.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sysctl.h |  3 +++
 kernel/sysctl.c        | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 956264d09ba0..3f6599aeb0db 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -1083,6 +1083,9 @@ struct ctl_table_header
 	struct ctl_table *ctl_table_arg;
 	struct ctl_table_root *root;
 	struct ctl_table_set *set;
+	struct ctl_table *attached_by;
+	struct ctl_table *attached_to;
+	struct ctl_table_header *parent;
 };
 
 /* struct ctl_path describes where in the hierarchy a table is added */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 60d9357e7172..c9a0af887033 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1680,6 +1680,52 @@ static __init int sysctl_init(void)
 
 core_initcall(sysctl_init);
 
+static int is_branch_in(struct ctl_table *branch, struct ctl_table *table)
+{
+	struct ctl_table *p;
+	const char *s = branch->procname;
+
+	/* branch should have named subdirectory as its first element */
+	if (!s || !branch->child)
+		return 0;
+
+	/* ... and nothing else */
+	if (branch[1].procname || branch[1].ctl_name)
+		return 0;
+
+	/* table should contain subdirectory with the same name */
+	for (p = table; p->procname || p->ctl_name; p++) {
+		if (!p->child)
+			continue;
+		if (p->procname && strcmp(p->procname, s) == 0)
+			return 1;
+	}
+	return 0;
+}
+
+/* see if attaching q to p would be an improvement */
+static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
+{
+	struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
+	int is_better = 0;
+	int not_in_parent = !p->attached_by;
+
+	while (is_branch_in(by, to)) {
+		if (by == q->attached_by)
+			is_better = 1;
+		if (to == p->attached_by)
+			not_in_parent = 1;
+		by = by->child;
+		to = to->child;
+	}
+
+	if (is_better && not_in_parent) {
+		q->attached_by = by;
+		q->attached_to = to;
+		q->parent = p;
+	}
+}
+
 /**
  * __register_sysctl_paths - register a sysctl hierarchy
  * @root: List of sysctl headers to register on
@@ -1759,6 +1805,7 @@ struct ctl_table_header *__register_sysctl_paths(
 	struct ctl_table_header *header;
 	struct ctl_table *new, **prevp;
 	unsigned int n, npath;
+	struct ctl_table_set *set;
 
 	/* Count the path components */
 	for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath)
@@ -1809,6 +1856,18 @@ struct ctl_table_header *__register_sysctl_paths(
 #endif
 	spin_lock(&sysctl_lock);
 	header->set = lookup_header_set(root, namespaces);
+	header->attached_by = header->ctl_table;
+	header->attached_to = root_table;
+	header->parent = &root_table_header;
+	for (set = header->set; set; set = set->parent) {
+		struct ctl_table_header *p;
+		list_for_each_entry(p, &set->list, ctl_entry) {
+			if (p->unregistering)
+				continue;
+			try_attach(p, header);
+		}
+	}
+	header->parent->count++;
 	list_add_tail(&header->ctl_entry, &header->set->list);
 	spin_unlock(&sysctl_lock);
 
@@ -1864,6 +1923,10 @@ void unregister_sysctl_table(struct ctl_table_header * header)
 
 	spin_lock(&sysctl_lock);
 	start_unregistering(header);
+	if (!--header->parent->count) {
+		WARN_ON(1);
+		kfree(header->parent);
+	}
 	if (!--header->count)
 		kfree(header);
 	spin_unlock(&sysctl_lock);
-- 
cgit v1.2.3


From 9043476f726802f4b00c96d0c4f418dde48d1304 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 15 Jul 2008 08:54:06 -0400
Subject: [PATCH] sanitize proc_sysctl

* keep references to ctl_table_head and ctl_table in /proc/sys inodes
* grab the former during operations, use the latter for access to
  entry if that succeeds
* have ->d_compare() check if table should be seen for one who does lookup;
  that allows us to avoid flipping inodes - if we have the same name resolve
  to different things, we'll just keep several dentries and ->d_compare()
  will reject the wrong ones.
* have ->lookup() and ->readdir() scan the table of our inode first, then
  walk all ctl_table_header and scan ->attached_by for those that are
  attached to our directory.
* implement ->getattr().
* get rid of insane amounts of tree-walking
* get rid of the need to know dentry in ->permission() and of the contortions
  induced by that.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/inode.c         |   5 +
 fs/proc/proc_sysctl.c   | 427 ++++++++++++++++++++++--------------------------
 include/linux/proc_fs.h |   5 +
 include/linux/sysctl.h  |   1 +
 kernel/sysctl.c         |  15 ++
 5 files changed, 218 insertions(+), 235 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b37f25dc45a5..8bb03f056c28 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/smp_lock.h>
+#include <linux/sysctl.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -65,6 +66,8 @@ static void proc_delete_inode(struct inode *inode)
 			module_put(de->owner);
 		de_put(de);
 	}
+	if (PROC_I(inode)->sysctl)
+		sysctl_head_put(PROC_I(inode)->sysctl);
 	clear_inode(inode);
 }
 
@@ -84,6 +87,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
 	ei->fd = 0;
 	ei->op.proc_get_link = NULL;
 	ei->pde = NULL;
+	ei->sysctl = NULL;
+	ei->sysctl_entry = NULL;
 	inode = &ei->vfs_inode;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	return inode;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5acc001d49f6..fa1ec2433e44 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -10,149 +10,110 @@
 static struct dentry_operations proc_sys_dentry_operations;
 static const struct file_operations proc_sys_file_operations;
 static const struct inode_operations proc_sys_inode_operations;
+static const struct file_operations proc_sys_dir_file_operations;
+static const struct inode_operations proc_sys_dir_operations;
 
-static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table)
-{
-	/* Refresh the cached information bits in the inode */
-	if (table) {
-		inode->i_uid = 0;
-		inode->i_gid = 0;
-		inode->i_mode = table->mode;
-		if (table->proc_handler) {
-			inode->i_mode |= S_IFREG;
-			inode->i_nlink = 1;
-		} else {
-			inode->i_mode |= S_IFDIR;
-			inode->i_nlink = 0;	/* It is too hard to figure out */
-		}
-	}
-}
-
-static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
+static struct inode *proc_sys_make_inode(struct super_block *sb,
+		struct ctl_table_header *head, struct ctl_table *table)
 {
 	struct inode *inode;
-	struct proc_inode *dir_ei, *ei;
-	int depth;
+	struct proc_inode *ei;
 
-	inode = new_inode(dir->i_sb);
+	inode = new_inode(sb);
 	if (!inode)
 		goto out;
 
-	/* A directory is always one deeper than it's parent */
-	dir_ei = PROC_I(dir);
-	depth = dir_ei->fd + 1;
-
+	sysctl_head_get(head);
 	ei = PROC_I(inode);
-	ei->fd = depth;
+	ei->sysctl = head;
+	ei->sysctl_entry = table;
+
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-	inode->i_op = &proc_sys_inode_operations;
-	inode->i_fop = &proc_sys_file_operations;
 	inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
-	proc_sys_refresh_inode(inode, table);
+	inode->i_mode = table->mode;
+	if (!table->child) {
+		inode->i_mode |= S_IFREG;
+		inode->i_op = &proc_sys_inode_operations;
+		inode->i_fop = &proc_sys_file_operations;
+	} else {
+		inode->i_mode |= S_IFDIR;
+		inode->i_nlink = 0;
+		inode->i_op = &proc_sys_dir_operations;
+		inode->i_fop = &proc_sys_dir_file_operations;
+	}
 out:
 	return inode;
 }
 
-static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth)
-{
-	for (;;) {
-		struct proc_inode *ei;
-
-		ei = PROC_I(dentry->d_inode);
-		if (ei->fd == depth)
-			break; /* found */
-
-		dentry = dentry->d_parent;
-	}
-	return dentry;
-}
-
-static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
-							struct qstr *name)
+static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
 {
 	int len;
-	for ( ; table->ctl_name || table->procname; table++) {
+	for ( ; p->ctl_name || p->procname; p++) {
 
-		if (!table->procname)
+		if (!p->procname)
 			continue;
 
-		len = strlen(table->procname);
+		len = strlen(p->procname);
 		if (len != name->len)
 			continue;
 
-		if (memcmp(table->procname, name->name, len) != 0)
+		if (memcmp(p->procname, name->name, len) != 0)
 			continue;
 
 		/* I have a match */
-		return table;
+		return p;
 	}
 	return NULL;
 }
 
-static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry,
-						struct ctl_table *table)
+struct ctl_table_header *grab_header(struct inode *inode)
 {
-	struct dentry *ancestor;
-	struct proc_inode *ei;
-	int depth, i;
+	if (PROC_I(inode)->sysctl)
+		return sysctl_head_grab(PROC_I(inode)->sysctl);
+	else
+		return sysctl_head_next(NULL);
+}
 
-	ei = PROC_I(dentry->d_inode);
-	depth = ei->fd;
+static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
+					struct nameidata *nd)
+{
+	struct ctl_table_header *head = grab_header(dir);
+	struct ctl_table *table = PROC_I(dir)->sysctl_entry;
+	struct ctl_table_header *h = NULL;
+	struct qstr *name = &dentry->d_name;
+	struct ctl_table *p;
+	struct inode *inode;
+	struct dentry *err = ERR_PTR(-ENOENT);
 
-	if (depth == 0)
-		return table;
+	if (IS_ERR(head))
+		return ERR_CAST(head);
 
-	for (i = 1; table && (i <= depth); i++) {
-		ancestor = proc_sys_ancestor(dentry, i);
-		table = proc_sys_lookup_table_one(table, &ancestor->d_name);
-		if (table)
-			table = table->child;
+	if (table && !table->child) {
+		WARN_ON(1);
+		goto out;
 	}
-	return table;
-
-}
-static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
-						struct qstr *name,
-						struct ctl_table *table)
-{
-	table = proc_sys_lookup_table(dparent, table);
-	if (table)
-		table = proc_sys_lookup_table_one(table, name);
-	return table;
-}
 
-static struct ctl_table *do_proc_sys_lookup(struct dentry *parent,
-						struct qstr *name,
-						struct ctl_table_header **ptr)
-{
-	struct ctl_table_header *head;
-	struct ctl_table *table = NULL;
+	table = table ? table->child : head->ctl_table;
 
-	for (head = sysctl_head_next(NULL); head;
-			head = sysctl_head_next(head)) {
-		table = proc_sys_lookup_entry(parent, name, head->ctl_table);
-		if (table)
-			break;
+	p = find_in_table(table, name);
+	if (!p) {
+		for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
+			if (h->attached_to != table)
+				continue;
+			p = find_in_table(h->attached_by, name);
+			if (p)
+				break;
+		}
 	}
-	*ptr = head;
-	return table;
-}
-
-static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
-					struct nameidata *nd)
-{
-	struct ctl_table_header *head;
-	struct inode *inode;
-	struct dentry *err;
-	struct ctl_table *table;
 
-	err = ERR_PTR(-ENOENT);
-	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
-	if (!table)
+	if (!p)
 		goto out;
 
 	err = ERR_PTR(-ENOMEM);
-	inode = proc_sys_make_inode(dir, table);
+	inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
+	if (h)
+		sysctl_head_finish(h);
+
 	if (!inode)
 		goto out;
 
@@ -168,22 +129,14 @@ out:
 static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 		size_t count, loff_t *ppos, int write)
 {
-	struct dentry *dentry = filp->f_dentry;
-	struct ctl_table_header *head;
-	struct ctl_table *table;
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct ctl_table_header *head = grab_header(inode);
+	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 	ssize_t error;
 	size_t res;
 
-	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
-	/* Has the sysctl entry disappeared on us? */
-	error = -ENOENT;
-	if (!table)
-		goto out;
-
-	/* Has the sysctl entry been replaced by a directory? */
-	error = -EISDIR;
-	if (!table->proc_handler)
-		goto out;
+	if (IS_ERR(head))
+		return PTR_ERR(head);
 
 	/*
 	 * At this point we know that the sysctl was not unregistered
@@ -193,6 +146,11 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 	if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
 		goto out;
 
+	/* if that can happen at all, it should be -EINVAL, not -EISDIR */
+	error = -EINVAL;
+	if (!table->proc_handler)
+		goto out;
+
 	/* careful: calling conventions are nasty here */
 	res = count;
 	error = table->proc_handler(table, write, filp, buf, &res, ppos);
@@ -218,82 +176,86 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
 
 
 static int proc_sys_fill_cache(struct file *filp, void *dirent,
-				filldir_t filldir, struct ctl_table *table)
+				filldir_t filldir,
+				struct ctl_table_header *head,
+				struct ctl_table *table)
 {
-	struct ctl_table_header *head;
-	struct ctl_table *child_table = NULL;
 	struct dentry *child, *dir = filp->f_path.dentry;
 	struct inode *inode;
 	struct qstr qname;
 	ino_t ino = 0;
 	unsigned type = DT_UNKNOWN;
-	int ret;
 
 	qname.name = table->procname;
 	qname.len  = strlen(table->procname);
 	qname.hash = full_name_hash(qname.name, qname.len);
 
-	/* Suppress duplicates.
-	 * Only fill a directory entry if it is the value that
-	 * an ordinary lookup of that name returns.  Hide all
-	 * others.
-	 *
-	 * If we ever cache this translation in the dcache
-	 * I should do a dcache lookup first.  But for now
-	 * it is just simpler not to.
-	 */
-	ret = 0;
-	child_table = do_proc_sys_lookup(dir, &qname, &head);
-	sysctl_head_finish(head);
-	if (child_table != table)
-		return 0;
-
 	child = d_lookup(dir, &qname);
 	if (!child) {
-		struct dentry *new;
-		new = d_alloc(dir, &qname);
-		if (new) {
-			inode = proc_sys_make_inode(dir->d_inode, table);
-			if (!inode)
-				child = ERR_PTR(-ENOMEM);
-			else {
-				new->d_op = &proc_sys_dentry_operations;
-				d_add(new, inode);
+		child = d_alloc(dir, &qname);
+		if (child) {
+			inode = proc_sys_make_inode(dir->d_sb, head, table);
+			if (!inode) {
+				dput(child);
+				return -ENOMEM;
+			} else {
+				child->d_op = &proc_sys_dentry_operations;
+				d_add(child, inode);
 			}
-			if (child)
-				dput(new);
-			else
-				child = new;
+		} else {
+			return -ENOMEM;
 		}
 	}
-	if (!child || IS_ERR(child) || !child->d_inode)
-		goto end_instantiate;
 	inode = child->d_inode;
-	if (inode) {
-		ino  = inode->i_ino;
-		type = inode->i_mode >> 12;
-	}
+	ino  = inode->i_ino;
+	type = inode->i_mode >> 12;
 	dput(child);
-end_instantiate:
-	if (!ino)
-		ino= find_inode_number(dir, &qname);
-	if (!ino)
-		ino = 1;
-	return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
+	return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
+}
+
+static int scan(struct ctl_table_header *head, ctl_table *table,
+		unsigned long *pos, struct file *file,
+		void *dirent, filldir_t filldir)
+{
+
+	for (; table->ctl_name || table->procname; table++, (*pos)++) {
+		int res;
+
+		/* Can't do anything without a proc name */
+		if (!table->procname)
+			continue;
+
+		if (*pos < file->f_pos)
+			continue;
+
+		res = proc_sys_fill_cache(file, dirent, filldir, head, table);
+		if (res)
+			return res;
+
+		file->f_pos = *pos + 1;
+	}
+	return 0;
 }
 
 static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
-	struct ctl_table_header *head = NULL;
-	struct ctl_table *table;
+	struct ctl_table_header *head = grab_header(inode);
+	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+	struct ctl_table_header *h = NULL;
 	unsigned long pos;
-	int ret;
+	int ret = -EINVAL;
+
+	if (IS_ERR(head))
+		return PTR_ERR(head);
 
-	ret = -ENOTDIR;
-	if (!S_ISDIR(inode->i_mode))
+	if (table && !table->child) {
+		WARN_ON(1);
 		goto out;
+	}
+
+	table = table ? table->child : head->ctl_table;
 
 	ret = 0;
 	/* Avoid a switch here: arm builds fail with missing __cmpdi2 */
@@ -311,30 +273,17 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	}
 	pos = 2;
 
-	/* - Find each instance of the directory
-	 * - Read all entries in each instance
-	 * - Before returning an entry to user space lookup the entry
-	 *   by name and if I find a different entry don't return
-	 *   this one because it means it is a buried dup.
-	 * For sysctl this should only happen for directory entries.
-	 */
-	for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
-		table = proc_sys_lookup_table(dentry, head->ctl_table);
+	ret = scan(head, table, &pos, filp, dirent, filldir);
+	if (ret)
+		goto out;
 
-		if (!table)
+	for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
+		if (h->attached_to != table)
 			continue;
-
-		for (; table->ctl_name || table->procname; table++, pos++) {
-			/* Can't do anything without a proc name */
-			if (!table->procname)
-				continue;
-
-			if (pos < filp->f_pos)
-				continue;
-
-			if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
-				goto out;
-			filp->f_pos = pos + 1;
+		ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
+		if (ret) {
+			sysctl_head_finish(h);
+			break;
 		}
 	}
 	ret = 1;
@@ -349,47 +298,18 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *
 	 * sysctl entries that are not writeable,
 	 * are _NOT_ writeable, capabilities or not.
 	 */
-	struct ctl_table_header *head;
-	struct ctl_table *table;
-	struct dentry *dentry;
-	int mode;
-	int depth;
+	struct ctl_table_header *head = grab_header(inode);
+	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 	int error;
 
-	head = NULL;
-	depth = PROC_I(inode)->fd;
-
-	/* First check the cached permissions, in case we don't have
-	 * enough information to lookup the sysctl table entry.
-	 */
-	error = -EACCES;
-	mode = inode->i_mode;
-
-	if (current->euid == 0)
-		mode >>= 6;
-	else if (in_group_p(0))
-		mode >>= 3;
-
-	if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
-		error = 0;
-
-	/* If we can't get a sysctl table entry the permission
-	 * checks on the cached mode will have to be enough.
-	 */
-	if (!nd || !depth)
-		goto out;
+	if (IS_ERR(head))
+		return PTR_ERR(head);
 
-	dentry = nd->path.dentry;
-	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
+	if (!table) /* global root - r-xr-xr-x */
+		error = mask & MAY_WRITE ? -EACCES : 0;
+	else /* Use the permissions on the sysctl table entry */
+		error = sysctl_perm(head->root, table, mask);
 
-	/* If the entry does not exist deny permission */
-	error = -EACCES;
-	if (!table)
-		goto out;
-
-	/* Use the permissions on the sysctl table entry */
-	error = sysctl_perm(head->root, table, mask);
-out:
 	sysctl_head_finish(head);
 	return error;
 }
@@ -409,33 +329,70 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
 	return error;
 }
 
-/* I'm lazy and don't distinguish between files and directories,
- * until access time.
- */
+static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	struct ctl_table_header *head = grab_header(inode);
+	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+
+	if (IS_ERR(head))
+		return PTR_ERR(head);
+
+	generic_fillattr(inode, stat);
+	if (table)
+		stat->mode = (stat->mode & S_IFMT) | table->mode;
+
+	sysctl_head_finish(head);
+	return 0;
+}
+
 static const struct file_operations proc_sys_file_operations = {
 	.read		= proc_sys_read,
 	.write		= proc_sys_write,
+};
+
+static const struct file_operations proc_sys_dir_file_operations = {
 	.readdir	= proc_sys_readdir,
 };
 
 static const struct inode_operations proc_sys_inode_operations = {
+	.permission	= proc_sys_permission,
+	.setattr	= proc_sys_setattr,
+	.getattr	= proc_sys_getattr,
+};
+
+static const struct inode_operations proc_sys_dir_operations = {
 	.lookup		= proc_sys_lookup,
 	.permission	= proc_sys_permission,
 	.setattr	= proc_sys_setattr,
+	.getattr	= proc_sys_getattr,
 };
 
 static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct ctl_table_header *head;
-	struct ctl_table *table;
-	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
-	proc_sys_refresh_inode(dentry->d_inode, table);
-	sysctl_head_finish(head);
-	return !!table;
+	return !PROC_I(dentry->d_inode)->sysctl->unregistering;
+}
+
+static int proc_sys_delete(struct dentry *dentry)
+{
+	return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
+}
+
+static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
+			    struct qstr *name)
+{
+	struct dentry *dentry = container_of(qstr, struct dentry, d_name);
+	if (qstr->len != name->len)
+		return 1;
+	if (memcmp(qstr->name, name->name, name->len))
+		return 1;
+	return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
 }
 
 static struct dentry_operations proc_sys_dentry_operations = {
 	.d_revalidate	= proc_sys_revalidate,
+	.d_delete	= proc_sys_delete,
+	.d_compare	= proc_sys_compare,
 };
 
 static struct proc_dir_entry *proc_sys_root;
@@ -443,8 +400,8 @@ static struct proc_dir_entry *proc_sys_root;
 int proc_sys_init(void)
 {
 	proc_sys_root = proc_mkdir("sys", NULL);
-	proc_sys_root->proc_iops = &proc_sys_inode_operations;
-	proc_sys_root->proc_fops = &proc_sys_file_operations;
+	proc_sys_root->proc_iops = &proc_sys_dir_operations;
+	proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
 	proc_sys_root->nlink = 0;
 	return 0;
 }
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index f560d1705afe..fb61850d1cfc 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -282,11 +282,16 @@ union proc_op {
 		struct task_struct *task);
 };
 
+struct ctl_table_header;
+struct ctl_table;
+
 struct proc_inode {
 	struct pid *pid;
 	int fd;
 	union proc_op op;
 	struct proc_dir_entry *pde;
+	struct ctl_table_header *sysctl;
+	struct ctl_table *sysctl_entry;
 	struct inode vfs_inode;
 };
 
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 3f6599aeb0db..d0437f36921f 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -961,6 +961,7 @@ struct ctl_table_header;
 
 extern void sysctl_head_get(struct ctl_table_header *);
 extern void sysctl_head_put(struct ctl_table_header *);
+extern int sysctl_is_seen(struct ctl_table_header *);
 extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *);
 extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev);
 extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c9a0af887033..ff5abcca5ddf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1932,6 +1932,21 @@ void unregister_sysctl_table(struct ctl_table_header * header)
 	spin_unlock(&sysctl_lock);
 }
 
+int sysctl_is_seen(struct ctl_table_header *p)
+{
+	struct ctl_table_set *set = p->set;
+	int res;
+	spin_lock(&sysctl_lock);
+	if (p->unregistering)
+		res = 0;
+	else if (!set->is_seen)
+		res = 1;
+	else
+		res = set->is_seen(set);
+	spin_unlock(&sysctl_lock);
+	return res;
+}
+
 void setup_sysctl_set(struct ctl_table_set *p,
 	struct ctl_table_set *parent,
 	int (*is_seen)(struct ctl_table_set *))
-- 
cgit v1.2.3


From e6305c43eda10ebfd2ad9e35d6e172ccc7bb3695 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 15 Jul 2008 21:03:57 -0400
Subject: [PATCH] sanitize ->permission() prototype

* kill nameidata * argument; map the 3 bits in ->flags anybody cares
  about to new MAY_... ones and pass with the mask.
* kill redundant gfs2_iop_permission()
* sanitize ecryptfs_permission()
* fix remaining places where ->permission() instances might barf on new
  MAY_... found in mask.

The obvious next target in that direction is permission(9)

folded fix for nfs_permission() breakage from Miklos Szeredi <mszeredi@suse.cz>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/internal.h              |  4 +---
 fs/afs/security.c              |  2 +-
 fs/bad_inode.c                 |  3 +--
 fs/cifs/cifsfs.c               |  2 +-
 fs/coda/dir.c                  |  4 +++-
 fs/coda/pioctl.c               |  6 ++----
 fs/ecryptfs/inode.c            | 17 ++---------------
 fs/ext2/acl.c                  |  2 +-
 fs/ext2/acl.h                  |  2 +-
 fs/ext3/acl.c                  |  2 +-
 fs/ext3/acl.h                  |  2 +-
 fs/ext4/acl.c                  |  2 +-
 fs/ext4/acl.h                  |  2 +-
 fs/fuse/dir.c                  |  6 +++---
 fs/gfs2/ops_inode.c            | 12 +++---------
 fs/hfs/inode.c                 |  3 +--
 fs/hfsplus/inode.c             |  2 +-
 fs/hostfs/hostfs_kern.c        |  2 +-
 fs/jffs2/acl.c                 |  2 +-
 fs/jffs2/acl.h                 |  2 +-
 fs/jfs/acl.c                   |  2 +-
 fs/jfs/jfs_acl.h               |  2 +-
 fs/namei.c                     | 23 +++++++++++++++++------
 fs/nfs/dir.c                   | 11 +++++------
 fs/ocfs2/file.c                |  2 +-
 fs/ocfs2/file.h                |  3 +--
 fs/proc/base.c                 |  3 +--
 fs/proc/proc_sysctl.c          |  2 +-
 fs/reiserfs/xattr.c            |  2 +-
 fs/smbfs/file.c                |  4 ++--
 fs/xfs/linux-2.6/xfs_iops.c    |  3 +--
 include/linux/coda_linux.h     |  2 +-
 include/linux/fs.h             |  5 ++++-
 include/linux/nfs_fs.h         |  2 +-
 include/linux/reiserfs_xattr.h |  2 +-
 include/linux/shmem_fs.h       |  2 +-
 kernel/sysctl.c                | 10 +++++-----
 mm/shmem_acl.c                 |  2 +-
 38 files changed, 74 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7102824ba847..3cb6920ff30b 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -469,8 +469,6 @@ extern bool afs_cm_incoming_call(struct afs_call *);
 extern const struct inode_operations afs_dir_inode_operations;
 extern const struct file_operations afs_dir_file_operations;
 
-extern int afs_permission(struct inode *, int, struct nameidata *);
-
 /*
  * file.c
  */
@@ -605,7 +603,7 @@ extern void afs_clear_permits(struct afs_vnode *);
 extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
 extern void afs_zap_permits(struct rcu_head *);
 extern struct key *afs_request_key(struct afs_cell *);
-extern int afs_permission(struct inode *, int, struct nameidata *);
+extern int afs_permission(struct inode *, int);
 
 /*
  * server.c
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 3bcbeceba1bb..3ef504370034 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -284,7 +284,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
  * - AFS ACLs are attached to directories only, and a file is controlled by its
  *   parent directory's ACL
  */
-int afs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int afs_permission(struct inode *inode, int mask)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 	afs_access_t uninitialized_var(access);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index f1c2ea8342f5..5f1538c03b1b 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -243,8 +243,7 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
 	return -EIO;
 }
 
-static int bad_inode_permission(struct inode *inode, int mask,
-			struct nameidata *nd)
+static int bad_inode_permission(struct inode *inode, int mask)
 {
 	return -EIO;
 }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index fe5f6809cba6..1ec7076f7b24 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -267,7 +267,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int cifs_permission(struct inode *inode, int mask)
 {
 	struct cifs_sb_info *cifs_sb;
 
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 3d2580e00a3e..c5916228243c 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -137,9 +137,11 @@ exit:
 }
 
 
-int coda_permission(struct inode *inode, int mask, struct nameidata *nd)
+int coda_permission(struct inode *inode, int mask)
 {
         int error = 0;
+
+	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
  
 	if (!mask)
 		return 0; 
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index c21a1f552a63..c38a98974fb0 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,8 +24,7 @@
 #include <linux/coda_psdev.h>
 
 /* pioctl ops */
-static int coda_ioctl_permission(struct inode *inode, int mask,
-				 struct nameidata *nd);
+static int coda_ioctl_permission(struct inode *inode, int mask);
 static int coda_pioctl(struct inode * inode, struct file * filp, 
                        unsigned int cmd, unsigned long user_data);
 
@@ -42,8 +41,7 @@ const struct file_operations coda_ioctl_operations = {
 };
 
 /* the coda pioctl inode ops */
-static int coda_ioctl_permission(struct inode *inode, int mask,
-				 struct nameidata *nd)
+static int coda_ioctl_permission(struct inode *inode, int mask)
 {
         return 0;
 }
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index d755455e3bff..32f4228efcd5 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -830,22 +830,9 @@ out:
 }
 
 static int
-ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+ecryptfs_permission(struct inode *inode, int mask)
 {
-	int rc;
-
-        if (nd) {
-		struct vfsmount *vfsmnt_save = nd->path.mnt;
-		struct dentry *dentry_save = nd->path.dentry;
-
-		nd->path.mnt = ecryptfs_dentry_to_lower_mnt(nd->path.dentry);
-		nd->path.dentry = ecryptfs_dentry_to_lower(nd->path.dentry);
-		rc = permission(ecryptfs_inode_to_lower(inode), mask, nd);
-		nd->path.mnt = vfsmnt_save;
-		nd->path.dentry = dentry_save;
-        } else
-		rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL);
-        return rc;
+	return permission(ecryptfs_inode_to_lower(inode), mask, NULL);
 }
 
 /**
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index e58669e1b87c..ae8c4f850b27 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -294,7 +294,7 @@ ext2_check_acl(struct inode *inode, int mask)
 }
 
 int
-ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext2_permission(struct inode *inode, int mask)
 {
 	return generic_permission(inode, mask, ext2_check_acl);
 }
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 0bde85bafe38..b42cf578554b 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -58,7 +58,7 @@ static inline int ext2_acl_count(size_t size)
 #define EXT2_ACL_NOT_CACHED ((void *)-1)
 
 /* acl.c */
-extern int ext2_permission (struct inode *, int, struct nameidata *);
+extern int ext2_permission (struct inode *, int);
 extern int ext2_acl_chmod (struct inode *);
 extern int ext2_init_acl (struct inode *, struct inode *);
 
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index a754d1848173..b60bb241880c 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -299,7 +299,7 @@ ext3_check_acl(struct inode *inode, int mask)
 }
 
 int
-ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext3_permission(struct inode *inode, int mask)
 {
 	return generic_permission(inode, mask, ext3_check_acl);
 }
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 0d1e6279cbfd..42da16b8cac0 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -58,7 +58,7 @@ static inline int ext3_acl_count(size_t size)
 #define EXT3_ACL_NOT_CACHED ((void *)-1)
 
 /* acl.c */
-extern int ext3_permission (struct inode *, int, struct nameidata *);
+extern int ext3_permission (struct inode *, int);
 extern int ext3_acl_chmod (struct inode *);
 extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
 
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 3c8dab880d91..c7d04e165446 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -299,7 +299,7 @@ ext4_check_acl(struct inode *inode, int mask)
 }
 
 int
-ext4_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext4_permission(struct inode *inode, int mask)
 {
 	return generic_permission(inode, mask, ext4_check_acl);
 }
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 26a5c1abf147..cd2b855a07d6 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -58,7 +58,7 @@ static inline int ext4_acl_count(size_t size)
 #define EXT4_ACL_NOT_CACHED ((void *)-1)
 
 /* acl.c */
-extern int ext4_permission (struct inode *, int, struct nameidata *);
+extern int ext4_permission (struct inode *, int);
 extern int ext4_acl_chmod (struct inode *);
 extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 51d0035ff07e..48a7934cb950 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -898,7 +898,7 @@ static int fuse_access(struct inode *inode, int mask)
 		return PTR_ERR(req);
 
 	memset(&inarg, 0, sizeof(inarg));
-	inarg.mask = mask;
+	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
 	req->in.h.opcode = FUSE_ACCESS;
 	req->in.h.nodeid = get_node_id(inode);
 	req->in.numargs = 1;
@@ -927,7 +927,7 @@ static int fuse_access(struct inode *inode, int mask)
  * access request is sent.  Execute permission is still checked
  * locally based on file mode.
  */
-static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int fuse_permission(struct inode *inode, int mask)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	bool refreshed = false;
@@ -962,7 +962,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
 		   exist.  So if permissions are revoked this won't be
 		   noticed immediately, only after the attribute
 		   timeout has expired */
-	} else if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR))) {
+	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
 		err = fuse_access(inode, mask);
 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 		if (!(inode->i_mode & S_IXUGO)) {
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 1e252dfc5294..4e982532f085 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -915,12 +915,6 @@ int gfs2_permission(struct inode *inode, int mask)
 	return error;
 }
 
-static int gfs2_iop_permission(struct inode *inode, int mask,
-			       struct nameidata *nd)
-{
-	return gfs2_permission(inode, mask);
-}
-
 static int setattr_size(struct inode *inode, struct iattr *attr)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
@@ -1150,7 +1144,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
 }
 
 const struct inode_operations gfs2_file_iops = {
-	.permission = gfs2_iop_permission,
+	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
 	.getattr = gfs2_getattr,
 	.setxattr = gfs2_setxattr,
@@ -1169,7 +1163,7 @@ const struct inode_operations gfs2_dir_iops = {
 	.rmdir = gfs2_rmdir,
 	.mknod = gfs2_mknod,
 	.rename = gfs2_rename,
-	.permission = gfs2_iop_permission,
+	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
 	.getattr = gfs2_getattr,
 	.setxattr = gfs2_setxattr,
@@ -1181,7 +1175,7 @@ const struct inode_operations gfs2_dir_iops = {
 const struct inode_operations gfs2_symlink_iops = {
 	.readlink = gfs2_readlink,
 	.follow_link = gfs2_follow_link,
-	.permission = gfs2_iop_permission,
+	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
 	.getattr = gfs2_getattr,
 	.setxattr = gfs2_setxattr,
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index dc4ec640e875..aa73f3fd5dd9 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -511,8 +511,7 @@ void hfs_clear_inode(struct inode *inode)
 	}
 }
 
-static int hfs_permission(struct inode *inode, int mask,
-			  struct nameidata *nd)
+static int hfs_permission(struct inode *inode, int mask)
 {
 	if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
 		return 0;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index cc3b5e24339b..d4014e3044d2 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -238,7 +238,7 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms)
 	perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
 }
 
-static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int hfsplus_permission(struct inode *inode, int mask)
 {
 	/* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
 	 * open_exec has the same test, so it's still not executable, if a x bit
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 5222345ddccf..d6ecabf4d231 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -822,7 +822,7 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
 	return err;
 }
 
-int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
+int hostfs_permission(struct inode *ino, int desired)
 {
 	char *name;
 	int r = 0, w = 0, x = 0, err;
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 4c80404a9aba..d98713777a1b 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -314,7 +314,7 @@ static int jffs2_check_acl(struct inode *inode, int mask)
 	return -EAGAIN;
 }
 
-int jffs2_permission(struct inode *inode, int mask, struct nameidata *nd)
+int jffs2_permission(struct inode *inode, int mask)
 {
 	return generic_permission(inode, mask, jffs2_check_acl);
 }
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 0bb7f003fd80..8ca058aed384 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -28,7 +28,7 @@ struct jffs2_acl_header {
 
 #define JFFS2_ACL_NOT_CACHED ((void *)-1)
 
-extern int jffs2_permission(struct inode *, int, struct nameidata *);
+extern int jffs2_permission(struct inode *, int);
 extern int jffs2_acl_chmod(struct inode *);
 extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
 extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 4d84bdc88299..d3e5c33665de 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -140,7 +140,7 @@ static int jfs_check_acl(struct inode *inode, int mask)
 	return -EAGAIN;
 }
 
-int jfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int jfs_permission(struct inode *inode, int mask)
 {
 	return generic_permission(inode, mask, jfs_check_acl);
 }
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 455fa4292045..88475f10a389 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
 
 #ifdef CONFIG_JFS_POSIX_ACL
 
-int jfs_permission(struct inode *, int, struct nameidata *);
+int jfs_permission(struct inode *, int);
 int jfs_init_acl(tid_t, struct inode *, struct inode *);
 int jfs_setattr(struct dentry *, struct iattr *);
 
diff --git a/fs/namei.c b/fs/namei.c
index 3b26a240ade9..46af98ed136b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -185,6 +185,8 @@ int generic_permission(struct inode *inode, int mask,
 {
 	umode_t			mode = inode->i_mode;
 
+	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
+
 	if (current->fsuid == inode->i_uid)
 		mode >>= 6;
 	else {
@@ -203,7 +205,7 @@ int generic_permission(struct inode *inode, int mask,
 	/*
 	 * If the DACs are ok we don't need any capability check.
 	 */
-	if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
+	if ((mask & ~mode) == 0)
 		return 0;
 
  check_capabilities:
@@ -228,7 +230,7 @@ int generic_permission(struct inode *inode, int mask,
 
 int permission(struct inode *inode, int mask, struct nameidata *nd)
 {
-	int retval, submask;
+	int retval;
 	struct vfsmount *mnt = NULL;
 
 	if (nd)
@@ -261,9 +263,17 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 	}
 
 	/* Ordinary permission routines do not understand MAY_APPEND. */
-	submask = mask & ~MAY_APPEND;
 	if (inode->i_op && inode->i_op->permission) {
-		retval = inode->i_op->permission(inode, submask, nd);
+		int extra = 0;
+		if (nd) {
+			if (nd->flags & LOOKUP_ACCESS)
+				extra |= MAY_ACCESS;
+			if (nd->flags & LOOKUP_CHDIR)
+				extra |= MAY_CHDIR;
+			if (nd->flags & LOOKUP_OPEN)
+				extra |= MAY_OPEN;
+		}
+		retval = inode->i_op->permission(inode, mask | extra);
 		if (!retval) {
 			/*
 			 * Exec permission on a regular file is denied if none
@@ -277,7 +287,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 				return -EACCES;
 		}
 	} else {
-		retval = generic_permission(inode, submask, NULL);
+		retval = generic_permission(inode, mask, NULL);
 	}
 	if (retval)
 		return retval;
@@ -286,7 +296,8 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 	if (retval)
 		return retval;
 
-	return security_inode_permission(inode, mask, nd);
+	return security_inode_permission(inode,
+			mask & (MAY_READ|MAY_WRITE|MAY_EXEC), nd);
 }
 
 /**
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 28a238dab23a..74f92b717f78 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1884,7 +1884,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 		return status;
 	nfs_access_add_cache(inode, &cache);
 out:
-	if ((cache.mask & mask) == mask)
+	if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
 		return 0;
 	return -EACCES;
 }
@@ -1907,17 +1907,17 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
 	return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
 }
 
-int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int nfs_permission(struct inode *inode, int mask)
 {
 	struct rpc_cred *cred;
 	int res = 0;
 
 	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
 
-	if (mask == 0)
+	if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
 		goto out;
 	/* Is this sys_access() ? */
-	if (nd != NULL && (nd->flags & LOOKUP_ACCESS))
+	if (mask & MAY_ACCESS)
 		goto force_lookup;
 
 	switch (inode->i_mode & S_IFMT) {
@@ -1926,8 +1926,7 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
 		case S_IFREG:
 			/* NFSv4 has atomic_open... */
 			if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
-					&& nd != NULL
-					&& (nd->flags & LOOKUP_OPEN))
+					&& (mask & MAY_OPEN))
 				goto out;
 			break;
 		case S_IFDIR:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e8514e8b6ce8..be2dd95d3a1d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1176,7 +1176,7 @@ bail:
 	return err;
 }
 
-int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
+int ocfs2_permission(struct inode *inode, int mask)
 {
 	int ret;
 
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 048ddcaf5c80..1e27b4d017ea 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -62,8 +62,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
 int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		  struct kstat *stat);
-int ocfs2_permission(struct inode *inode, int mask,
-		     struct nameidata *nd);
+int ocfs2_permission(struct inode *inode, int mask);
 
 int ocfs2_should_update_atime(struct inode *inode,
 			      struct vfsmount *vfsmnt);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 81bce6791bfc..d82d800389f6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1859,8 +1859,7 @@ static const struct file_operations proc_fd_operations = {
  * /proc/pid/fd needs a special permission handler so that a process can still
  * access /proc/self/fd after it has executed a setuid().
  */
-static int proc_fd_permission(struct inode *inode, int mask,
-				struct nameidata *nd)
+static int proc_fd_permission(struct inode *inode, int mask)
 {
 	int rv;
 
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index fa1ec2433e44..f9a8b892718f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -292,7 +292,7 @@ out:
 	return ret;
 }
 
-static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int proc_sys_permission(struct inode *inode, int mask)
 {
 	/*
 	 * sysctl entries that are not writeable,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d7c4935c1034..bb3cb5b7cdb2 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -1250,7 +1250,7 @@ static int reiserfs_check_acl(struct inode *inode, int mask)
 	return error;
 }
 
-int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int reiserfs_permission(struct inode *inode, int mask)
 {
 	/*
 	 * We don't do permission checks on the internal objects.
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 2294783320cb..e4f8d51a5553 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -408,7 +408,7 @@ smb_file_release(struct inode *inode, struct file * file)
  * privileges, so we need our own check for this.
  */
 static int
-smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
+smb_file_permission(struct inode *inode, int mask)
 {
 	int mode = inode->i_mode;
 	int error = 0;
@@ -417,7 +417,7 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
 
 	/* Look at user permissions */
 	mode >>= 6;
-	if ((mode & 7 & mask) != mask)
+	if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
 		error = -EACCES;
 	return error;
 }
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 2bf287ef5489..5fc61c824bb9 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -589,8 +589,7 @@ xfs_check_acl(
 STATIC int
 xfs_vn_permission(
 	struct inode		*inode,
-	int			mask,
-	struct nameidata	*nd)
+	int			mask)
 {
 	return generic_permission(inode, mask, xfs_check_acl);
 }
diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h
index 31b75311e2ca..dcc228aa335a 100644
--- a/include/linux/coda_linux.h
+++ b/include/linux/coda_linux.h
@@ -37,7 +37,7 @@ extern const struct file_operations coda_ioctl_operations;
 /* operations shared over more than one file */
 int coda_open(struct inode *i, struct file *f);
 int coda_release(struct inode *i, struct file *f);
-int coda_permission(struct inode *inode, int mask, struct nameidata *nd);
+int coda_permission(struct inode *inode, int mask);
 int coda_revalidate_inode(struct dentry *);
 int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 int coda_setattr(struct dentry *, struct iattr *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7721a2ac9c0e..6c923c9b79bc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -60,6 +60,9 @@ extern int dir_notify_enable;
 #define MAY_WRITE 2
 #define MAY_READ 4
 #define MAY_APPEND 8
+#define MAY_ACCESS 16
+#define MAY_CHDIR 32
+#define MAY_OPEN 64
 
 #define FMODE_READ 1
 #define FMODE_WRITE 2
@@ -1272,7 +1275,7 @@ struct inode_operations {
 	void * (*follow_link) (struct dentry *, struct nameidata *);
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
-	int (*permission) (struct inode *, int, struct nameidata *);
+	int (*permission) (struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 29d261918734..f08f9ca602af 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -332,7 +332,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-extern int nfs_permission(struct inode *, int, struct nameidata *);
+extern int nfs_permission(struct inode *, int);
 extern int nfs_open(struct inode *, struct file *);
 extern int nfs_release(struct inode *, struct file *);
 extern int nfs_attribute_timeout(struct inode *inode);
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index 66a96814d614..af135ae895db 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -55,7 +55,7 @@ int reiserfs_removexattr(struct dentry *dentry, const char *name);
 int reiserfs_delete_xattrs(struct inode *inode);
 int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
 int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
-int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd);
+int reiserfs_permission(struct inode *inode, int mask);
 
 int reiserfs_xattr_del(struct inode *, const char *);
 int reiserfs_xattr_get(const struct inode *, const char *, void *, size_t);
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index f2d12d5a21b8..fd83f2584b15 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -43,7 +43,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 }
 
 #ifdef CONFIG_TMPFS_POSIX_ACL
-int shmem_permission(struct inode *, int, struct nameidata *);
+int shmem_permission(struct inode *, int);
 int shmem_acl_init(struct inode *, struct inode *);
 void shmem_acl_destroy_inode(struct inode *);
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ff5abcca5ddf..911d846f0503 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1516,9 +1516,9 @@ static int do_sysctl_strategy(struct ctl_table_root *root,
 	int op = 0, rc;
 
 	if (oldval)
-		op |= 004;
+		op |= MAY_READ;
 	if (newval)
-		op |= 002;
+		op |= MAY_WRITE;
 	if (sysctl_perm(root, table, op))
 		return -EPERM;
 
@@ -1560,7 +1560,7 @@ repeat:
 		if (n == table->ctl_name) {
 			int error;
 			if (table->child) {
-				if (sysctl_perm(root, table, 001))
+				if (sysctl_perm(root, table, MAY_EXEC))
 					return -EPERM;
 				name++;
 				nlen--;
@@ -1635,7 +1635,7 @@ static int test_perm(int mode, int op)
 		mode >>= 6;
 	else if (in_egroup_p(0))
 		mode >>= 3;
-	if ((mode & op & 0007) == op)
+	if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
 		return 0;
 	return -EACCES;
 }
@@ -1645,7 +1645,7 @@ int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
 	int error;
 	int mode;
 
-	error = security_sysctl(table, op);
+	error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
 	if (error)
 		return error;
 
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
index f5664c5b9eb1..8e5aadd7dcd6 100644
--- a/mm/shmem_acl.c
+++ b/mm/shmem_acl.c
@@ -191,7 +191,7 @@ shmem_check_acl(struct inode *inode, int mask)
  * shmem_permission  -  permission() inode operation
  */
 int
-shmem_permission(struct inode *inode, int mask, struct nameidata *nd)
+shmem_permission(struct inode *inode, int mask)
 {
 	return generic_permission(inode, mask, shmem_check_acl);
 }
-- 
cgit v1.2.3


From 2f1936b87783a3a56c9441b27b9ba7a747f11e8e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 24 Jun 2008 16:50:14 +0200
Subject: [patch 3/5] vfs: change remove_suid() to file_remove_suid()

All calls to remove_suid() are made with a file pointer, because
(similarly to file_update_time) it is called when the file is written.

Clean up callers by passing in a file instead of a dentry.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c             | 2 +-
 fs/ntfs/file.c             | 2 +-
 fs/splice.c                | 4 ++--
 fs/xfs/linux-2.6/xfs_lrw.c | 2 +-
 include/linux/fs.h         | 2 +-
 mm/filemap.c               | 7 ++++---
 mm/filemap_xip.c           | 2 +-
 7 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 67ff2c6a8f63..2bada6bbc317 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -893,7 +893,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (count == 0)
 		goto out;
 
-	err = remove_suid(file->f_path.dentry);
+	err = file_remove_suid(file);
 	if (err)
 		goto out;
 
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 3c5550cd11d6..d020866d4232 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2118,7 +2118,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
 		goto out;
 	if (!count)
 		goto out;
-	err = remove_suid(file->f_path.dentry);
+	err = file_remove_suid(file);
 	if (err)
 		goto out;
 	file_update_time(file);
diff --git a/fs/splice.c b/fs/splice.c
index 47dc1a445d1f..b30311ba8af6 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -772,7 +772,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
 	ssize_t ret;
 	int err;
 
-	err = remove_suid(out->f_path.dentry);
+	err = file_remove_suid(out);
 	if (unlikely(err))
 		return err;
 
@@ -830,7 +830,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 	ssize_t ret;
 
 	inode_double_lock(inode, pipe->inode);
-	ret = remove_suid(out->f_path.dentry);
+	ret = file_remove_suid(out);
 	if (likely(!ret))
 		ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
 	inode_double_unlock(inode, pipe->inode);
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 5e3b57516ec7..82333b3e118e 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -711,7 +711,7 @@ start:
 	     !capable(CAP_FSETID)) {
 		error = xfs_write_clear_setuid(xip);
 		if (likely(!error))
-			error = -remove_suid(file->f_path.dentry);
+			error = -file_remove_suid(file);
 		if (unlikely(error)) {
 			goto out_unlock_internal;
 		}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6c923c9b79bc..1a3546e69f9e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1834,7 +1834,7 @@ extern void clear_inode(struct inode *);
 extern void destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
 extern int should_remove_suid(struct dentry *);
-extern int remove_suid(struct dentry *);
+extern int file_remove_suid(struct file *);
 
 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
 extern void remove_inode_hash(struct inode *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 2ed8b0389c51..5de7633e1dbe 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1758,8 +1758,9 @@ static int __remove_suid(struct dentry *dentry, int kill)
 	return notify_change(dentry, &newattrs);
 }
 
-int remove_suid(struct dentry *dentry)
+int file_remove_suid(struct file *file)
 {
+	struct dentry *dentry = file->f_path.dentry;
 	int killsuid = should_remove_suid(dentry);
 	int killpriv = security_inode_need_killpriv(dentry);
 	int error = 0;
@@ -1773,7 +1774,7 @@ int remove_suid(struct dentry *dentry)
 
 	return error;
 }
-EXPORT_SYMBOL(remove_suid);
+EXPORT_SYMBOL(file_remove_suid);
 
 static size_t __iovec_copy_from_user_inatomic(char *vaddr,
 			const struct iovec *iov, size_t base, size_t bytes)
@@ -2529,7 +2530,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
 	if (count == 0)
 		goto out;
 
-	err = remove_suid(file->f_path.dentry);
+	err = file_remove_suid(file);
 	if (err)
 		goto out;
 
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 3e744abcce9d..98a3f31ccd6a 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -380,7 +380,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
 	if (count == 0)
 		goto out_backing;
 
-	ret = remove_suid(filp->f_path.dentry);
+	ret = file_remove_suid(filp);
 	if (ret)
 		goto out_backing;
 
-- 
cgit v1.2.3


From db2e747b14991a4c6a5c98b0e5f552a193237c03 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 24 Jun 2008 16:50:16 +0200
Subject: [patch 5/5] vfs: remove mode parameter from vfs_symlink()

Remove the unused mode parameter from vfs_symlink and callers.

Thanks to Tetsuo Handa for noticing.

CC: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/ecryptfs/inode.c |  4 +---
 fs/namei.c          |  4 ++--
 fs/nfsd/vfs.c       | 10 ++--------
 include/linux/fs.h  |  2 +-
 4 files changed, 6 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 32f4228efcd5..f25caf2b0887 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -465,7 +465,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
 	int rc;
 	struct dentry *lower_dentry;
 	struct dentry *lower_dir_dentry;
-	umode_t mode;
 	char *encoded_symname;
 	int encoded_symlen;
 	struct ecryptfs_crypt_stat *crypt_stat = NULL;
@@ -473,7 +472,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	dget(lower_dentry);
 	lower_dir_dentry = lock_parent(lower_dentry);
-	mode = S_IALLUGO;
 	encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname,
 						  strlen(symname),
 						  &encoded_symname);
@@ -482,7 +480,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
 		goto out_lock;
 	}
 	rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
-			 encoded_symname, mode);
+			 encoded_symname);
 	kfree(encoded_symname);
 	if (rc || !lower_dentry->d_inode)
 		goto out_lock;
diff --git a/fs/namei.c b/fs/namei.c
index 3b67be7631dc..ae0e56fdb742 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2434,7 +2434,7 @@ asmlinkage long sys_unlink(const char __user *pathname)
 	return do_unlinkat(AT_FDCWD, pathname);
 }
 
-int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
+int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
 {
 	int error = may_create(dir, dentry, NULL);
 
@@ -2483,7 +2483,7 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
-	error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO);
+	error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(dentry);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0f4481e0502d..ad1ad59e3742 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1516,7 +1516,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	struct dentry	*dentry, *dnew;
 	__be32		err, cerr;
 	int		host_err;
-	umode_t		mode;
 
 	err = nfserr_noent;
 	if (!flen || !plen)
@@ -1535,11 +1534,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (IS_ERR(dnew))
 		goto out_nfserr;
 
-	mode = S_IALLUGO;
-	/* Only the MODE ATTRibute is even vaguely meaningful */
-	if (iap && (iap->ia_valid & ATTR_MODE))
-		mode = iap->ia_mode & S_IALLUGO;
-
 	host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
 	if (host_err)
 		goto out_nfserr;
@@ -1551,11 +1545,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		else {
 			strncpy(path_alloced, path, plen);
 			path_alloced[plen] = 0;
-			host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
+			host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
 			kfree(path_alloced);
 		}
 	} else
-		host_err = vfs_symlink(dentry->d_inode, dnew, path, mode);
+		host_err = vfs_symlink(dentry->d_inode, dnew, path);
 
 	if (!host_err) {
 		if (EX_ISSYNC(fhp->fh_export))
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1a3546e69f9e..25998e803fc2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1139,7 +1139,7 @@ extern int vfs_permission(struct nameidata *, int);
 extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
 extern int vfs_mkdir(struct inode *, struct dentry *, int);
 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
-extern int vfs_symlink(struct inode *, struct dentry *, const char *, int);
+extern int vfs_symlink(struct inode *, struct dentry *, const char *);
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
 extern int vfs_rmdir(struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *);
-- 
cgit v1.2.3


From 8bb79224b87aab92071e94d46e70bd160d89bf34 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 16 Jul 2008 09:51:03 -0400
Subject: [PATCH] permission checks for chdir need special treatment only on
 the last step

... so we ought to pass MAY_CHDIR to vfs_permission() instead of having
it triggered on every step of preceding pathname resolution.  LOOKUP_CHDIR
is killed by that.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c            | 2 --
 fs/open.c             | 5 ++---
 include/linux/namei.h | 1 -
 3 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index ae0e56fdb742..6c76e1ee9c45 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -268,8 +268,6 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 		if (nd) {
 			if (nd->flags & LOOKUP_ACCESS)
 				extra |= MAY_ACCESS;
-			if (nd->flags & LOOKUP_CHDIR)
-				extra |= MAY_CHDIR;
 			if (nd->flags & LOOKUP_OPEN)
 				extra |= MAY_OPEN;
 		}
diff --git a/fs/open.c b/fs/open.c
index b2e4c93aed03..8e02d42bfe44 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -501,12 +501,11 @@ asmlinkage long sys_chdir(const char __user * filename)
 	struct nameidata nd;
 	int error;
 
-	error = __user_walk(filename,
-			    LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
+	error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
 	if (error)
 		goto out;
 
-	error = vfs_permission(&nd, MAY_EXEC);
+	error = vfs_permission(&nd, MAY_EXEC | MAY_CHDIR);
 	if (error)
 		goto dput_and_out;
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 24d88e98a626..3cf62d26d493 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -55,7 +55,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_OPEN		(0x0100)
 #define LOOKUP_CREATE		(0x0200)
 #define LOOKUP_ACCESS		(0x0400)
-#define LOOKUP_CHDIR		(0x0800)
 
 extern int __user_walk(const char __user *, unsigned, struct nameidata *);
 extern int __user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *);
-- 
cgit v1.2.3


From 7f2da1e7d0330395e5e9e350b879b98a1ea495df Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 10 May 2008 20:44:54 -0400
Subject: [PATCH] kill altroot

long overdue...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                    | 89 +------------------------------------------
 fs/namespace.c                |  8 +---
 fs/open.c                     |  3 +-
 include/asm-alpha/namei.h     | 17 ---------
 include/asm-arm/namei.h       | 25 ------------
 include/asm-avr32/namei.h     |  7 ----
 include/asm-blackfin/namei.h  | 19 ---------
 include/asm-cris/namei.h      | 17 ---------
 include/asm-frv/namei.h       | 18 ---------
 include/asm-h8300/namei.h     | 17 ---------
 include/asm-ia64/namei.h      | 25 ------------
 include/asm-m32r/namei.h      | 17 ---------
 include/asm-m68k/namei.h      | 17 ---------
 include/asm-m68knommu/namei.h |  1 -
 include/asm-mips/namei.h      | 11 ------
 include/asm-mn10300/namei.h   | 22 -----------
 include/asm-parisc/namei.h    | 17 ---------
 include/asm-powerpc/namei.h   | 20 ----------
 include/asm-s390/namei.h      | 21 ----------
 include/asm-sh/namei.h        | 17 ---------
 include/asm-sparc/namei.h     |  8 ----
 include/asm-sparc64/namei.h   |  1 -
 include/asm-um/namei.h        |  6 ---
 include/asm-v850/namei.h      | 17 ---------
 include/asm-x86/namei.h       | 11 ------
 include/asm-xtensa/namei.h    | 26 -------------
 include/linux/fs_struct.h     |  3 +-
 include/linux/namei.h         |  1 -
 kernel/exec_domain.c          |  1 -
 kernel/exit.c                 |  2 -
 kernel/fork.c                 |  7 ----
 31 files changed, 5 insertions(+), 466 deletions(-)
 delete mode 100644 include/asm-alpha/namei.h
 delete mode 100644 include/asm-arm/namei.h
 delete mode 100644 include/asm-avr32/namei.h
 delete mode 100644 include/asm-blackfin/namei.h
 delete mode 100644 include/asm-cris/namei.h
 delete mode 100644 include/asm-frv/namei.h
 delete mode 100644 include/asm-h8300/namei.h
 delete mode 100644 include/asm-ia64/namei.h
 delete mode 100644 include/asm-m32r/namei.h
 delete mode 100644 include/asm-m68k/namei.h
 delete mode 100644 include/asm-m68knommu/namei.h
 delete mode 100644 include/asm-mips/namei.h
 delete mode 100644 include/asm-mn10300/namei.h
 delete mode 100644 include/asm-parisc/namei.h
 delete mode 100644 include/asm-powerpc/namei.h
 delete mode 100644 include/asm-s390/namei.h
 delete mode 100644 include/asm-sh/namei.h
 delete mode 100644 include/asm-sparc/namei.h
 delete mode 100644 include/asm-sparc64/namei.h
 delete mode 100644 include/asm-um/namei.h
 delete mode 100644 include/asm-v850/namei.h
 delete mode 100644 include/asm-x86/namei.h
 delete mode 100644 include/asm-xtensa/namei.h

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 6c76e1ee9c45..095818089ac1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -31,7 +31,6 @@
 #include <linux/file.h>
 #include <linux/fcntl.h>
 #include <linux/device_cgroup.h>
-#include <asm/namei.h>
 #include <asm/uaccess.h>
 
 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
@@ -562,27 +561,16 @@ out_unlock:
 	return result;
 }
 
-static int __emul_lookup_dentry(const char *, struct nameidata *);
-
 /* SMP-safe */
-static __always_inline int
+static __always_inline void
 walk_init_root(const char *name, struct nameidata *nd)
 {
 	struct fs_struct *fs = current->fs;
 
 	read_lock(&fs->lock);
-	if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
-		nd->path = fs->altroot;
-		path_get(&fs->altroot);
-		read_unlock(&fs->lock);
-		if (__emul_lookup_dentry(name,nd))
-			return 0;
-		read_lock(&fs->lock);
-	}
 	nd->path = fs->root;
 	path_get(&fs->root);
 	read_unlock(&fs->lock);
-	return 1;
 }
 
 /*
@@ -623,12 +611,9 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
 
 	if (*link == '/') {
 		path_put(&nd->path);
-		if (!walk_init_root(link, nd))
-			/* weird __emul_prefix() stuff did it */
-			goto out;
+		walk_init_root(link, nd);
 	}
 	res = link_path_walk(link, nd);
-out:
 	if (nd->depth || res || nd->last_type!=LAST_NORM)
 		return res;
 	/*
@@ -1077,67 +1062,6 @@ static int path_walk(const char *name, struct nameidata *nd)
 	return link_path_walk(name, nd);
 }
 
-/* 
- * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if
- * everything is done. Returns 0 and drops input nd, if lookup failed;
- */
-static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
-{
-	if (path_walk(name, nd))
-		return 0;		/* something went wrong... */
-
-	if (!nd->path.dentry->d_inode ||
-	    S_ISDIR(nd->path.dentry->d_inode->i_mode)) {
-		struct path old_path = nd->path;
-		struct qstr last = nd->last;
-		int last_type = nd->last_type;
-		struct fs_struct *fs = current->fs;
-
-		/*
-		 * NAME was not found in alternate root or it's a directory.
-		 * Try to find it in the normal root:
-		 */
-		nd->last_type = LAST_ROOT;
-		read_lock(&fs->lock);
-		nd->path = fs->root;
-		path_get(&fs->root);
-		read_unlock(&fs->lock);
-		if (path_walk(name, nd) == 0) {
-			if (nd->path.dentry->d_inode) {
-				path_put(&old_path);
-				return 1;
-			}
-			path_put(&nd->path);
-		}
-		nd->path = old_path;
-		nd->last = last;
-		nd->last_type = last_type;
-	}
-	return 1;
-}
-
-void set_fs_altroot(void)
-{
-	char *emul = __emul_prefix();
-	struct nameidata nd;
-	struct path path = {}, old_path;
-	int err;
-	struct fs_struct *fs = current->fs;
-
-	if (!emul)
-		goto set_it;
-	err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd);
-	if (!err)
-		path = nd.path;
-set_it:
-	write_lock(&fs->lock);
-	old_path = fs->altroot;
-	fs->altroot = path;
-	write_unlock(&fs->lock);
-	if (old_path.dentry)
-		path_put(&old_path);
-}
-
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
 static int do_path_lookup(int dfd, const char *name,
 				unsigned int flags, struct nameidata *nd)
@@ -1153,14 +1077,6 @@ static int do_path_lookup(int dfd, const char *name,
 
 	if (*name=='/') {
 		read_lock(&fs->lock);
-		if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
-			nd->path = fs->altroot;
-			path_get(&fs->altroot);
-			read_unlock(&fs->lock);
-			if (__emul_lookup_dentry(name,nd))
-				goto out; /* found in altroot */
-			read_lock(&fs->lock);
-		}
 		nd->path = fs->root;
 		path_get(&fs->root);
 		read_unlock(&fs->lock);
@@ -1194,7 +1110,6 @@ static int do_path_lookup(int dfd, const char *name,
 	}
 
 	retval = path_walk(name, nd);
-out:
 	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
 				nd->path.dentry->d_inode))
 		audit_inode(name, nd->path.dentry);
diff --git a/fs/namespace.c b/fs/namespace.c
index f30b11e2240e..c4fcf48acef8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1972,7 +1972,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 		struct fs_struct *fs)
 {
 	struct mnt_namespace *new_ns;
-	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
+	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
 	struct vfsmount *p, *q;
 
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
@@ -2015,10 +2015,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 				pwdmnt = p;
 				fs->pwd.mnt = mntget(q);
 			}
-			if (p == fs->altroot.mnt) {
-				altrootmnt = p;
-				fs->altroot.mnt = mntget(q);
-			}
 		}
 		p = next_mnt(p, mnt_ns->root);
 		q = next_mnt(q, new_ns->root);
@@ -2029,8 +2025,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 		mntput(rootmnt);
 	if (pwdmnt)
 		mntput(pwdmnt);
-	if (altrootmnt)
-		mntput(altrootmnt);
 
 	return new_ns;
 }
diff --git a/fs/open.c b/fs/open.c
index 8e02d42bfe44..d3a2a00f52dc 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -548,7 +548,7 @@ asmlinkage long sys_chroot(const char __user * filename)
 	struct nameidata nd;
 	int error;
 
-	error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
+	error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
 	if (error)
 		goto out;
 
@@ -561,7 +561,6 @@ asmlinkage long sys_chroot(const char __user * filename)
 		goto dput_and_out;
 
 	set_fs_root(current->fs, &nd.path);
-	set_fs_altroot();
 	error = 0;
 dput_and_out:
 	path_put(&nd.path);
diff --git a/include/asm-alpha/namei.h b/include/asm-alpha/namei.h
deleted file mode 100644
index 5cc9bb39499d..000000000000
--- a/include/asm-alpha/namei.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* $Id: namei.h,v 1.1 1996/12/13 14:48:21 jj Exp $
- * linux/include/asm-alpha/namei.h
- *
- * Included from linux/fs/namei.c
- */
-
-#ifndef __ALPHA_NAMEI_H
-#define __ALPHA_NAMEI_H
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif /* __ALPHA_NAMEI_H */
diff --git a/include/asm-arm/namei.h b/include/asm-arm/namei.h
deleted file mode 100644
index a402d3b9d0f7..000000000000
--- a/include/asm-arm/namei.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* 
- * linux/include/asm-arm/namei.h
- *
- * Routines to handle famous /usr/gnemul
- * Derived from the Sparc version of this file
- *
- * Included from linux/fs/namei.c
- */
-
-#ifndef __ASMARM_NAMEI_H
-#define __ASMARM_NAMEI_H
-
-#define ARM_BSD_EMUL "usr/gnemul/bsd/"
-
-static inline char *__emul_prefix(void)
-{
-	switch (current->personality) {
-	case PER_BSD:
-		return ARM_BSD_EMUL;
-	default:
-		return NULL;
-	}
-}
-
-#endif /* __ASMARM_NAMEI_H */
diff --git a/include/asm-avr32/namei.h b/include/asm-avr32/namei.h
deleted file mode 100644
index f0a26de06cab..000000000000
--- a/include/asm-avr32/namei.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_AVR32_NAMEI_H
-#define __ASM_AVR32_NAMEI_H
-
-/* This dummy routine may be changed to something useful */
-#define __emul_prefix() NULL
-
-#endif /* __ASM_AVR32_NAMEI_H */
diff --git a/include/asm-blackfin/namei.h b/include/asm-blackfin/namei.h
deleted file mode 100644
index 8b89a2d65cb4..000000000000
--- a/include/asm-blackfin/namei.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * linux/include/asm/namei.h
- *
- * Included from linux/fs/namei.c
- *
- * Changes made by Lineo Inc.    May 2001
- */
-
-#ifndef __BFIN_NAMEI_H
-#define __BFIN_NAMEI_H
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif
diff --git a/include/asm-cris/namei.h b/include/asm-cris/namei.h
deleted file mode 100644
index 8a3be7a6d9f6..000000000000
--- a/include/asm-cris/namei.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* $Id: namei.h,v 1.1 2000/07/10 16:32:31 bjornw Exp $
- * linux/include/asm-cris/namei.h
- *
- * Included from linux/fs/namei.c
- */
-
-#ifndef __CRIS_NAMEI_H
-#define __CRIS_NAMEI_H
-
-/* used to find file-system prefixes for doing emulations
- * see for example asm-sparc/namei.h
- * we don't use it...
- */
-
-#define __emul_prefix() NULL
-
-#endif /* __CRIS_NAMEI_H */
diff --git a/include/asm-frv/namei.h b/include/asm-frv/namei.h
deleted file mode 100644
index 4ea57171d951..000000000000
--- a/include/asm-frv/namei.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * include/asm-frv/namei.h
- *
- * Included from linux/fs/namei.c
- */
-
-#ifndef __ASM_NAMEI_H
-#define __ASM_NAMEI_H
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif
-
diff --git a/include/asm-h8300/namei.h b/include/asm-h8300/namei.h
deleted file mode 100644
index ab6f196db6e0..000000000000
--- a/include/asm-h8300/namei.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * linux/include/asm-h8300/namei.h
- *
- * Included from linux/fs/namei.c
- */
-
-#ifndef __H8300_NAMEI_H
-#define __H8300_NAMEI_H
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif
diff --git a/include/asm-ia64/namei.h b/include/asm-ia64/namei.h
deleted file mode 100644
index 78e768079083..000000000000
--- a/include/asm-ia64/namei.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _ASM_IA64_NAMEI_H
-#define _ASM_IA64_NAMEI_H
-
-/*
- * Modified 1998, 1999, 2001
- *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
- */
-
-#include <asm/ptrace.h>
-#include <asm/system.h>
-
-#define EMUL_PREFIX_LINUX_IA32 "/emul/ia32-linux/"
-
-static inline char *
-__emul_prefix (void)
-{
-	switch (current->personality) {
-	      case PER_LINUX32:
-		return EMUL_PREFIX_LINUX_IA32;
-	      default:
-		return NULL;
-	}
-}
-
-#endif /* _ASM_IA64_NAMEI_H */
diff --git a/include/asm-m32r/namei.h b/include/asm-m32r/namei.h
deleted file mode 100644
index 210f8056b805..000000000000
--- a/include/asm-m32r/namei.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _ASM_M32R_NAMEI_H
-#define _ASM_M32R_NAMEI_H
-
-/*
- * linux/include/asm-m32r/namei.h
- *
- * Included from linux/fs/namei.c
- */
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif /* _ASM_M32R_NAMEI_H */
diff --git a/include/asm-m68k/namei.h b/include/asm-m68k/namei.h
deleted file mode 100644
index f33f243b644a..000000000000
--- a/include/asm-m68k/namei.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * linux/include/asm-m68k/namei.h
- *
- * Included from linux/fs/namei.c
- */
-
-#ifndef __M68K_NAMEI_H
-#define __M68K_NAMEI_H
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif
diff --git a/include/asm-m68knommu/namei.h b/include/asm-m68knommu/namei.h
deleted file mode 100644
index 31a85d27b931..000000000000
--- a/include/asm-m68knommu/namei.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-m68k/namei.h>
diff --git a/include/asm-mips/namei.h b/include/asm-mips/namei.h
deleted file mode 100644
index a6605a752469..000000000000
--- a/include/asm-mips/namei.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_NAMEI_H
-#define _ASM_NAMEI_H
-
-/*
- * This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- */
-
-#define __emul_prefix() NULL
-
-#endif /* _ASM_NAMEI_H */
diff --git a/include/asm-mn10300/namei.h b/include/asm-mn10300/namei.h
deleted file mode 100644
index bd9ce94aeb65..000000000000
--- a/include/asm-mn10300/namei.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Emulation stuff
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _ASM_NAMEI_H
-#define _ASM_NAMEI_H
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif /* _ASM_NAMEI_H */
diff --git a/include/asm-parisc/namei.h b/include/asm-parisc/namei.h
deleted file mode 100644
index 8d29b3d9fb33..000000000000
--- a/include/asm-parisc/namei.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* $Id: namei.h,v 1.1 1996/12/13 14:48:21 jj Exp $
- * linux/include/asm-parisc/namei.h
- *
- * Included from linux/fs/namei.c
- */
-
-#ifndef __PARISC_NAMEI_H
-#define __PARISC_NAMEI_H
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif /* __PARISC_NAMEI_H */
diff --git a/include/asm-powerpc/namei.h b/include/asm-powerpc/namei.h
deleted file mode 100644
index 657443474a6a..000000000000
--- a/include/asm-powerpc/namei.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _ASM_POWERPC_NAMEI_H
-#define _ASM_POWERPC_NAMEI_H
-
-#ifdef __KERNEL__
-
-/*
- * Adapted from include/asm-alpha/namei.h
- *
- * Included from fs/namei.c
- */
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif	/* __KERNEL__ */
-#endif	/* _ASM_POWERPC_NAMEI_H */
diff --git a/include/asm-s390/namei.h b/include/asm-s390/namei.h
deleted file mode 100644
index 3e286bdde4b0..000000000000
--- a/include/asm-s390/namei.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- *  include/asm-s390/namei.h
- *
- *  S390 version
- *
- *  Derived from "include/asm-i386/namei.h"
- *
- *  Included from linux/fs/namei.c
- */
-
-#ifndef __S390_NAMEI_H
-#define __S390_NAMEI_H
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif /* __S390_NAMEI_H */
diff --git a/include/asm-sh/namei.h b/include/asm-sh/namei.h
deleted file mode 100644
index 338a5d947143..000000000000
--- a/include/asm-sh/namei.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* $Id: namei.h,v 1.3 2000/07/04 06:24:49 gniibe Exp $
- * linux/include/asm-sh/namei.h
- *
- * Included from linux/fs/namei.c
- */
-
-#ifndef __ASM_SH_NAMEI_H
-#define __ASM_SH_NAMEI_H
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif /* __ASM_SH_NAMEI_H */
diff --git a/include/asm-sparc/namei.h b/include/asm-sparc/namei.h
deleted file mode 100644
index eff944b8e321..000000000000
--- a/include/asm-sparc/namei.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef ___ASM_SPARC_NAMEI_H
-#define ___ASM_SPARC_NAMEI_H
-#if defined(__sparc__) && defined(__arch64__)
-#include <asm-sparc/namei_64.h>
-#else
-#include <asm-sparc/namei_32.h>
-#endif
-#endif
diff --git a/include/asm-sparc64/namei.h b/include/asm-sparc64/namei.h
deleted file mode 100644
index 1344a910ba2f..000000000000
--- a/include/asm-sparc64/namei.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-sparc/namei.h>
diff --git a/include/asm-um/namei.h b/include/asm-um/namei.h
deleted file mode 100644
index 002984d5bc85..000000000000
--- a/include/asm-um/namei.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_NAMEI_H
-#define __UM_NAMEI_H
-
-#include "asm/arch/namei.h"
-
-#endif
diff --git a/include/asm-v850/namei.h b/include/asm-v850/namei.h
deleted file mode 100644
index ee8339b23843..000000000000
--- a/include/asm-v850/namei.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * linux/include/asm-v850/namei.h
- *
- * Included from linux/fs/namei.c
- */
-
-#ifndef __V850_NAMEI_H__
-#define __V850_NAMEI_H__
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif /* __V850_NAMEI_H__ */
diff --git a/include/asm-x86/namei.h b/include/asm-x86/namei.h
deleted file mode 100644
index 415ef5d9550e..000000000000
--- a/include/asm-x86/namei.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_X86_NAMEI_H
-#define _ASM_X86_NAMEI_H
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif /* _ASM_X86_NAMEI_H */
diff --git a/include/asm-xtensa/namei.h b/include/asm-xtensa/namei.h
deleted file mode 100644
index 3fdff039d27d..000000000000
--- a/include/asm-xtensa/namei.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * include/asm-xtensa/namei.h
- *
- * Included from linux/fs/namei.c
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_NAMEI_H
-#define _XTENSA_NAMEI_H
-
-#ifdef __KERNEL__
-
-/* This dummy routine maybe changed to something useful
- * for /usr/gnemul/ emulation stuff.
- * Look at asm-sparc/namei.h for details.
- */
-
-#define __emul_prefix() NULL
-
-#endif	/* __KERNEL__ */
-#endif	/* _XTENSA_NAMEI_H */
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 282f54219129..9e5a06e78d02 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -7,7 +7,7 @@ struct fs_struct {
 	atomic_t count;
 	rwlock_t lock;
 	int umask;
-	struct path root, pwd, altroot;
+	struct path root, pwd;
 };
 
 #define INIT_FS {				\
@@ -19,7 +19,6 @@ struct fs_struct {
 extern struct kmem_cache *fs_cachep;
 
 extern void exit_fs(struct task_struct *);
-extern void set_fs_altroot(void);
 extern void set_fs_root(struct fs_struct *, struct path *);
 extern void set_fs_pwd(struct fs_struct *, struct path *);
 extern struct fs_struct *copy_fs_struct(struct fs_struct *);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 3cf62d26d493..768773d57857 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -47,7 +47,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_DIRECTORY	 2
 #define LOOKUP_CONTINUE		 4
 #define LOOKUP_PARENT		16
-#define LOOKUP_NOALT		32
 #define LOOKUP_REVAL		64
 /*
  * Intent data
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index c1ef192aa655..0d407e886735 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -168,7 +168,6 @@ __set_personality(u_long personality)
 	current->personality = personality;
 	oep = current_thread_info()->exec_domain;
 	current_thread_info()->exec_domain = ep;
-	set_fs_altroot();
 
 	module_put(oep->module);
 	return 0;
diff --git a/kernel/exit.c b/kernel/exit.c
index 6cdf60712bd2..0caf590548a0 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -565,8 +565,6 @@ void put_fs_struct(struct fs_struct *fs)
 	if (atomic_dec_and_test(&fs->count)) {
 		path_put(&fs->root);
 		path_put(&fs->pwd);
-		if (fs->altroot.dentry)
-			path_put(&fs->altroot);
 		kmem_cache_free(fs_cachep, fs);
 	}
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index abb3ed6298f6..5e050c1317c4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -657,13 +657,6 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
 		path_get(&old->root);
 		fs->pwd = old->pwd;
 		path_get(&old->pwd);
-		if (old->altroot.dentry) {
-			fs->altroot = old->altroot;
-			path_get(&old->altroot);
-		} else {
-			fs->altroot.mnt = NULL;
-			fs->altroot.dentry = NULL;
-		}
 		read_unlock(&old->lock);
 	}
 	return fs;
-- 
cgit v1.2.3


From a110343f0d6d41f68b7cf8c00b57a3172c67f816 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 17 Jul 2008 09:19:08 -0400
Subject: [PATCH] fix MAY_CHDIR/MAY_ACCESS/LOOKUP_ACCESS mess

* MAY_CHDIR is redundant - it's an equivalent of MAY_ACCESS
* MAY_ACCESS on fuse should affect only the last step of pathname resolution
* fchdir() and chroot() should pass MAY_ACCESS, for the same reason why
  chdir() needs that.
* now that we pass MAY_ACCESS explicitly in all cases, LOOKUP_ACCESS can be
  removed; it has no business being in nameidata.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fuse/dir.c         |  2 +-
 fs/namei.c            |  2 --
 fs/open.c             | 10 +++++-----
 include/linux/fs.h    |  3 +--
 include/linux/namei.h |  1 -
 5 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 48a7934cb950..fd03330cadeb 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -962,7 +962,7 @@ static int fuse_permission(struct inode *inode, int mask)
 		   exist.  So if permissions are revoked this won't be
 		   noticed immediately, only after the attribute
 		   timeout has expired */
-	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
+	} else if (mask & MAY_ACCESS) {
 		err = fuse_access(inode, mask);
 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 		if (!(inode->i_mode & S_IXUGO)) {
diff --git a/fs/namei.c b/fs/namei.c
index 095818089ac1..33dcaf025c49 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -265,8 +265,6 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 	if (inode->i_op && inode->i_op->permission) {
 		int extra = 0;
 		if (nd) {
-			if (nd->flags & LOOKUP_ACCESS)
-				extra |= MAY_ACCESS;
 			if (nd->flags & LOOKUP_OPEN)
 				extra |= MAY_OPEN;
 		}
diff --git a/fs/open.c b/fs/open.c
index d3a2a00f52dc..3317e1909b2c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -457,11 +457,11 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 			old_cap = cap_set_effective(current->cap_permitted);
 	}
 
-	res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
+	res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
 	if (res)
 		goto out;
 
-	res = vfs_permission(&nd, mode);
+	res = vfs_permission(&nd, mode | MAY_ACCESS);
 	/* SuS v2 requires we report a read only fs too */
 	if(res || !(mode & S_IWOTH) ||
 	   special_file(nd.path.dentry->d_inode->i_mode))
@@ -505,7 +505,7 @@ asmlinkage long sys_chdir(const char __user * filename)
 	if (error)
 		goto out;
 
-	error = vfs_permission(&nd, MAY_EXEC | MAY_CHDIR);
+	error = vfs_permission(&nd, MAY_EXEC | MAY_ACCESS);
 	if (error)
 		goto dput_and_out;
 
@@ -534,7 +534,7 @@ asmlinkage long sys_fchdir(unsigned int fd)
 	if (!S_ISDIR(inode->i_mode))
 		goto out_putf;
 
-	error = file_permission(file, MAY_EXEC);
+	error = file_permission(file, MAY_EXEC | MAY_ACCESS);
 	if (!error)
 		set_fs_pwd(current->fs, &file->f_path);
 out_putf:
@@ -552,7 +552,7 @@ asmlinkage long sys_chroot(const char __user * filename)
 	if (error)
 		goto out;
 
-	error = vfs_permission(&nd, MAY_EXEC);
+	error = vfs_permission(&nd, MAY_EXEC | MAY_ACCESS);
 	if (error)
 		goto dput_and_out;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 25998e803fc2..d8721e818b45 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -61,8 +61,7 @@ extern int dir_notify_enable;
 #define MAY_READ 4
 #define MAY_APPEND 8
 #define MAY_ACCESS 16
-#define MAY_CHDIR 32
-#define MAY_OPEN 64
+#define MAY_OPEN 32
 
 #define FMODE_READ 1
 #define FMODE_WRITE 2
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 768773d57857..60e35a02f6cb 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -53,7 +53,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
  */
 #define LOOKUP_OPEN		(0x0100)
 #define LOOKUP_CREATE		(0x0200)
-#define LOOKUP_ACCESS		(0x0400)
 
 extern int __user_walk(const char __user *, unsigned, struct nameidata *);
 extern int __user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *);
-- 
cgit v1.2.3


From b77b0646ef4efe31a7449bb3d9360fd00f95433d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 17 Jul 2008 09:37:02 -0400
Subject: [PATCH] pass MAY_OPEN to vfs_permission() explicitly

... and get rid of the last "let's deduce mask from nameidata->flags"
bit.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c                  |  4 ++--
 fs/namei.c                 | 13 ++++---------
 include/linux/security.h   |  7 +++----
 security/capability.c      |  3 +--
 security/security.c        |  4 ++--
 security/selinux/hooks.c   |  5 ++---
 security/smack/smack_lsm.c |  3 +--
 7 files changed, 15 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index b8792a131533..0ba5d355c5a1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -118,7 +118,7 @@ asmlinkage long sys_uselib(const char __user * library)
 	if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
 		goto exit;
 
-	error = vfs_permission(&nd, MAY_READ | MAY_EXEC);
+	error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN);
 	if (error)
 		goto exit;
 
@@ -666,7 +666,7 @@ struct file *open_exec(const char *name)
 		struct inode *inode = nd.path.dentry->d_inode;
 		file = ERR_PTR(-EACCES);
 		if (S_ISREG(inode->i_mode)) {
-			int err = vfs_permission(&nd, MAY_EXEC);
+			int err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
 			file = ERR_PTR(err);
 			if (!err) {
 				file = nameidata_to_filp(&nd,
diff --git a/fs/namei.c b/fs/namei.c
index 33dcaf025c49..6b0e8e5e079e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -263,12 +263,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 
 	/* Ordinary permission routines do not understand MAY_APPEND. */
 	if (inode->i_op && inode->i_op->permission) {
-		int extra = 0;
-		if (nd) {
-			if (nd->flags & LOOKUP_OPEN)
-				extra |= MAY_OPEN;
-		}
-		retval = inode->i_op->permission(inode, mask | extra);
+		retval = inode->i_op->permission(inode, mask);
 		if (!retval) {
 			/*
 			 * Exec permission on a regular file is denied if none
@@ -292,7 +287,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 		return retval;
 
 	return security_inode_permission(inode,
-			mask & (MAY_READ|MAY_WRITE|MAY_EXEC), nd);
+			mask & (MAY_READ|MAY_WRITE|MAY_EXEC));
 }
 
 /**
@@ -492,7 +487,7 @@ static int exec_permission_lite(struct inode *inode,
 
 	return -EACCES;
 ok:
-	return security_inode_permission(inode, MAY_EXEC, nd);
+	return security_inode_permission(inode, MAY_EXEC);
 }
 
 /*
@@ -1692,7 +1687,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	int will_write;
 	int flag = open_to_namei_flags(open_flag);
 
-	acc_mode = ACC_MODE(flag);
+	acc_mode = MAY_OPEN | ACC_MODE(flag);
 
 	/* O_TRUNC implies we need access checks for write permissions */
 	if (flag & O_TRUNC)
diff --git a/include/linux/security.h b/include/linux/security.h
index f0e9adb22ac2..fd96e7f8a6f9 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1362,7 +1362,7 @@ struct security_operations {
 			     struct inode *new_dir, struct dentry *new_dentry);
 	int (*inode_readlink) (struct dentry *dentry);
 	int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
-	int (*inode_permission) (struct inode *inode, int mask, struct nameidata *nd);
+	int (*inode_permission) (struct inode *inode, int mask);
 	int (*inode_setattr)	(struct dentry *dentry, struct iattr *attr);
 	int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry);
 	void (*inode_delete) (struct inode *inode);
@@ -1628,7 +1628,7 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
 			  struct inode *new_dir, struct dentry *new_dentry);
 int security_inode_readlink(struct dentry *dentry);
 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
-int security_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
+int security_inode_permission(struct inode *inode, int mask);
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
 int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
 void security_inode_delete(struct inode *inode);
@@ -2021,8 +2021,7 @@ static inline int security_inode_follow_link(struct dentry *dentry,
 	return 0;
 }
 
-static inline int security_inode_permission(struct inode *inode, int mask,
-					     struct nameidata *nd)
+static inline int security_inode_permission(struct inode *inode, int mask)
 {
 	return 0;
 }
diff --git a/security/capability.c b/security/capability.c
index 5b01c0b02422..63d10da515a5 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -211,8 +211,7 @@ static int cap_inode_follow_link(struct dentry *dentry,
 	return 0;
 }
 
-static int cap_inode_permission(struct inode *inode, int mask,
-				struct nameidata *nd)
+static int cap_inode_permission(struct inode *inode, int mask)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 59f23b5918b3..78ed3ffde242 100644
--- a/security/security.c
+++ b/security/security.c
@@ -429,11 +429,11 @@ int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
 	return security_ops->inode_follow_link(dentry, nd);
 }
 
-int security_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
+int security_inode_permission(struct inode *inode, int mask)
 {
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
-	return security_ops->inode_permission(inode, mask, nd);
+	return security_ops->inode_permission(inode, mask);
 }
 
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3481cde5bf15..5ba13908b5b4 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2624,12 +2624,11 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *na
 	return dentry_has_perm(current, NULL, dentry, FILE__READ);
 }
 
-static int selinux_inode_permission(struct inode *inode, int mask,
-				    struct nameidata *nd)
+static int selinux_inode_permission(struct inode *inode, int mask)
 {
 	int rc;
 
-	rc = secondary_ops->inode_permission(inode, mask, nd);
+	rc = secondary_ops->inode_permission(inode, mask);
 	if (rc)
 		return rc;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index ee5a51cbc5eb..1b40e558f983 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -522,8 +522,7 @@ static int smack_inode_rename(struct inode *old_inode,
  *
  * Returns 0 if access is permitted, -EACCES otherwise
  */
-static int smack_inode_permission(struct inode *inode, int mask,
-				  struct nameidata *nd)
+static int smack_inode_permission(struct inode *inode, int mask)
 {
 	/*
 	 * No permission to check. Existence test. Yup, it's there.
-- 
cgit v1.2.3


From 88b387824fdaecb6ba0f471acf0aadf7d24739fd Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 21 Jul 2008 18:06:36 +0800
Subject: [PATCH] vfs: use kstrdup() and check failing allocation

- use kstrdup() instead of kmalloc() + memcpy()
- return NULL if allocating ->mnt_devname failed
- mnt_devname should be const

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c        | 24 +++++++++++++-----------
 include/linux/mount.h |  2 +-
 2 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index c4fcf48acef8..26380f599534 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -112,9 +112,13 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		int err;
 
 		err = mnt_alloc_id(mnt);
-		if (err) {
-			kmem_cache_free(mnt_cache, mnt);
-			return NULL;
+		if (err)
+			goto out_free_cache;
+
+		if (name) {
+			mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
+			if (!mnt->mnt_devname)
+				goto out_free_id;
 		}
 
 		atomic_set(&mnt->mnt_count, 1);
@@ -127,16 +131,14 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_slave_list);
 		INIT_LIST_HEAD(&mnt->mnt_slave);
 		atomic_set(&mnt->__mnt_writers, 0);
-		if (name) {
-			int size = strlen(name) + 1;
-			char *newname = kmalloc(size, GFP_KERNEL);
-			if (newname) {
-				memcpy(newname, name, size);
-				mnt->mnt_devname = newname;
-			}
-		}
 	}
 	return mnt;
+
+out_free_id:
+	mnt_free_id(mnt);
+out_free_cache:
+	kmem_cache_free(mnt_cache, mnt);
+	return NULL;
 }
 
 /*
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 4374d1adeb4b..b5efaa2132ab 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -47,7 +47,7 @@ struct vfsmount {
 	struct list_head mnt_child;	/* and going through their mnt_child */
 	int mnt_flags;
 	/* 4 bytes hole on 64bits arches */
-	char *mnt_devname;		/* Name of device e.g. /dev/dsk/hda1 */
+	const char *mnt_devname;	/* Name of device e.g. /dev/dsk/hda1 */
 	struct list_head mnt_list;
 	struct list_head mnt_expire;	/* link in fs-specific expiry list */
 	struct list_head mnt_share;	/* circular list of shared mounts */
-- 
cgit v1.2.3


From 9767d74957450da6365c363d69e3d02d605d7375 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Jul 2008 15:01:26 +0200
Subject: [patch 1/4] vfs: utimes: move owner check into inode_change_ok()

Add a new ia_valid flag: ATTR_TIMES_SET, to handle the
UTIMES_OMIT/UTIMES_NOW and UTIMES_NOW/UTIMES_OMIT cases.  In these
cases neither ATTR_MTIME_SET nor ATTR_ATIME_SET is in the flags, yet
the POSIX draft specifies that permission checking is performed the
same way as if one or both of the times was explicitly set to a
timestamp.

See the path "vfs: utimensat(): fix error checking for
{UTIME_NOW,UTIME_OMIT} case" by Michael Kerrisk for the patch
introducing this behavior.

This is a cleanup, as well as allowing filesystems (NFS/fuse/...) to
perform their own permission checking instead of the default.

CC: Ulrich Drepper <drepper@redhat.com>
CC: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/attr.c          |  2 +-
 fs/utimes.c        | 17 ++++-------------
 include/linux/fs.h | 33 +++++++++++++++++----------------
 3 files changed, 22 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/attr.c b/fs/attr.c
index 966b73e25f82..765fc75fab3b 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -51,7 +51,7 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
 	}
 
 	/* Check for setting the inode time. */
-	if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
+	if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
 		if (!is_owner_or_cap(inode))
 			goto error;
 	}
diff --git a/fs/utimes.c b/fs/utimes.c
index b6b664e7145e..ecf8941ba34a 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -101,7 +101,6 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
 		     times[1].tv_nsec == UTIME_NOW)
 		times = NULL;
 
-	/* In most cases, the checks are done in inode_change_ok() */
 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
 	if (times) {
 		error = -EPERM;
@@ -123,21 +122,13 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
 			newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
 			newattrs.ia_valid |= ATTR_MTIME_SET;
 		}
-
 		/*
-		 * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT
-		 * cases, we need to make an extra check that is not done by
-		 * inode_change_ok().
+		 * Tell inode_change_ok(), that this is an explicit time
+		 * update, even if neither ATTR_ATIME_SET nor ATTR_MTIME_SET
+		 * were used.
 		 */
-		if (((times[0].tv_nsec == UTIME_NOW &&
-			    times[1].tv_nsec == UTIME_OMIT)
-		     ||
-		     (times[0].tv_nsec == UTIME_OMIT &&
-			    times[1].tv_nsec == UTIME_NOW))
-		    && !is_owner_or_cap(inode))
-			goto mnt_drop_write_and_out;
+		newattrs.ia_valid |= ATTR_TIMES_SET;
 	} else {
-
 		/*
 		 * If times is NULL (or both times are UTIME_NOW),
 		 * then we need to check permissions, because
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d8721e818b45..527b9e482f99 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -320,22 +320,23 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
  */
-#define ATTR_MODE	1
-#define ATTR_UID	2
-#define ATTR_GID	4
-#define ATTR_SIZE	8
-#define ATTR_ATIME	16
-#define ATTR_MTIME	32
-#define ATTR_CTIME	64
-#define ATTR_ATIME_SET	128
-#define ATTR_MTIME_SET	256
-#define ATTR_FORCE	512	/* Not a change, but a change it */
-#define ATTR_ATTR_FLAG	1024
-#define ATTR_KILL_SUID	2048
-#define ATTR_KILL_SGID	4096
-#define ATTR_FILE	8192
-#define ATTR_KILL_PRIV	16384
-#define ATTR_OPEN	32768	/* Truncating from open(O_TRUNC) */
+#define ATTR_MODE	(1 << 0)
+#define ATTR_UID	(1 << 1)
+#define ATTR_GID	(1 << 2)
+#define ATTR_SIZE	(1 << 3)
+#define ATTR_ATIME	(1 << 4)
+#define ATTR_MTIME	(1 << 5)
+#define ATTR_CTIME	(1 << 6)
+#define ATTR_ATIME_SET	(1 << 7)
+#define ATTR_MTIME_SET	(1 << 8)
+#define ATTR_FORCE	(1 << 9) /* Not a change, but a change it */
+#define ATTR_ATTR_FLAG	(1 << 10)
+#define ATTR_KILL_SUID	(1 << 11)
+#define ATTR_KILL_SGID	(1 << 12)
+#define ATTR_FILE	(1 << 13)
+#define ATTR_KILL_PRIV	(1 << 14)
+#define ATTR_OPEN	(1 << 15) /* Truncating from open(O_TRUNC) */
+#define ATTR_TIMES_SET	(1 << 16)
 
 /*
  * This is the Inode Attributes structure, used for notify_change().  It
-- 
cgit v1.2.3


From f419a2e3b64def707e1384ee38abb77f99af5f6d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 22 Jul 2008 00:07:17 -0400
Subject: [PATCH] kill nameidata passing to permission(), rename to
 inode_permission()

Incidentally, the name that gives hundreds of false positives on grep
is not a good idea...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/inode.c |  2 +-
 fs/namei.c          | 22 +++++++++-------------
 fs/nfsd/nfsfh.c     |  2 +-
 fs/nfsd/vfs.c       |  4 ++--
 fs/utimes.c         |  2 +-
 fs/xattr.c          |  2 +-
 include/linux/fs.h  |  2 +-
 ipc/mqueue.c        |  2 +-
 8 files changed, 17 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index f25caf2b0887..89209f00f9c7 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -830,7 +830,7 @@ out:
 static int
 ecryptfs_permission(struct inode *inode, int mask)
 {
-	return permission(ecryptfs_inode_to_lower(inode), mask, NULL);
+	return inode_permission(ecryptfs_inode_to_lower(inode), mask);
 }
 
 /**
diff --git a/fs/namei.c b/fs/namei.c
index 396cb3e5c364..5029b93ebbd5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -227,13 +227,9 @@ int generic_permission(struct inode *inode, int mask,
 	return -EACCES;
 }
 
-int permission(struct inode *inode, int mask, struct nameidata *nd)
+int inode_permission(struct inode *inode, int mask)
 {
 	int retval;
-	struct vfsmount *mnt = NULL;
-
-	if (nd)
-		mnt = nd->path.mnt;
 
 	if (mask & MAY_WRITE) {
 		umode_t mode = inode->i_mode;
@@ -293,7 +289,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
  */
 int vfs_permission(struct nameidata *nd, int mask)
 {
-	return permission(nd->path.dentry->d_inode, mask, nd);
+	return inode_permission(nd->path.dentry->d_inode, mask);
 }
 
 /**
@@ -310,7 +306,7 @@ int vfs_permission(struct nameidata *nd, int mask)
  */
 int file_permission(struct file *file, int mask)
 {
-	return permission(file->f_path.dentry->d_inode, mask, NULL);
+	return inode_permission(file->f_path.dentry->d_inode, mask);
 }
 
 /*
@@ -1262,7 +1258,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
 {
 	int err;
 
-	err = permission(nd->path.dentry->d_inode, MAY_EXEC, nd);
+	err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC);
 	if (err)
 		return ERR_PTR(err);
 	return __lookup_hash(&nd->last, nd->path.dentry, nd);
@@ -1310,7 +1306,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 	if (err)
 		return ERR_PTR(err);
 
-	err = permission(base->d_inode, MAY_EXEC, NULL);
+	err = inode_permission(base->d_inode, MAY_EXEC);
 	if (err)
 		return ERR_PTR(err);
 	return __lookup_hash(&this, base, NULL);
@@ -1400,7 +1396,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 	BUG_ON(victim->d_parent->d_inode != dir);
 	audit_inode_child(victim->d_name.name, victim, dir);
 
-	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
+	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
 	if (error)
 		return error;
 	if (IS_APPEND(dir))
@@ -1437,7 +1433,7 @@ static inline int may_create(struct inode *dir, struct dentry *child,
 		return -EEXIST;
 	if (IS_DEADDIR(dir))
 		return -ENOENT;
-	return permission(dir,MAY_WRITE | MAY_EXEC, nd);
+	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
 }
 
 /* 
@@ -2543,7 +2539,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 	 * we'll need to flip '..'.
 	 */
 	if (new_dir != old_dir) {
-		error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
+		error = inode_permission(old_dentry->d_inode, MAY_WRITE);
 		if (error)
 			return error;
 	}
@@ -2897,7 +2893,7 @@ EXPORT_SYMBOL(page_symlink);
 EXPORT_SYMBOL(page_symlink_inode_operations);
 EXPORT_SYMBOL(path_lookup);
 EXPORT_SYMBOL(vfs_path_lookup);
-EXPORT_SYMBOL(permission);
+EXPORT_SYMBOL(inode_permission);
 EXPORT_SYMBOL(vfs_permission);
 EXPORT_SYMBOL(file_permission);
 EXPORT_SYMBOL(unlock_rename);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index f45451eb1e38..ea37c96f0445 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -51,7 +51,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
 		/* make sure parents give x permission to user */
 		int err;
 		parent = dget_parent(tdentry);
-		err = permission(parent->d_inode, MAY_EXEC, NULL);
+		err = inode_permission(parent->d_inode, MAY_EXEC);
 		if (err < 0) {
 			dput(parent);
 			break;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ad1ad59e3742..18060bed5267 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1953,12 +1953,12 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
 		return 0;
 
 	/* This assumes  NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
-	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
+	err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));
 
 	/* Allow read access to binaries even when mode 111 */
 	if (err == -EACCES && S_ISREG(inode->i_mode) &&
 	    acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
-		err = permission(inode, MAY_EXEC, NULL);
+		err = inode_permission(inode, MAY_EXEC);
 
 	return err? nfserrno(err) : 0;
 }
diff --git a/fs/utimes.c b/fs/utimes.c
index dad679d3a158..dc28b7826259 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -96,7 +96,7 @@ static int utimes_common(struct path *path, struct timespec *times)
 			goto mnt_drop_write_and_out;
 
 		if (!is_owner_or_cap(inode)) {
-			error = permission(inode, MAY_WRITE, NULL);
+			error = inode_permission(inode, MAY_WRITE);
 			if (error)
 				goto mnt_drop_write_and_out;
 		}
diff --git a/fs/xattr.c b/fs/xattr.c
index 4706a8b1f495..b96222e05ba0 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -63,7 +63,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
 			return -EPERM;
 	}
 
-	return permission(inode, mask, NULL);
+	return inode_permission(inode, mask);
 }
 
 int
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 527b9e482f99..9d2de4cadabd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1770,7 +1770,7 @@ extern int do_remount_sb(struct super_block *sb, int flags,
 extern sector_t bmap(struct inode *, sector_t);
 #endif
 extern int notify_change(struct dentry *, struct iattr *);
-extern int permission(struct inode *, int, struct nameidata *);
+extern int inode_permission(struct inode *, int);
 extern int generic_permission(struct inode *, int,
 		int (*check_acl)(struct inode *, int));
 
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 474984f9e032..96fb36cd9874 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -638,7 +638,7 @@ static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
+	if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
 		dput(dentry);
 		mntput(mqueue_mnt);
 		return ERR_PTR(-EACCES);
-- 
cgit v1.2.3


From 2d8f30380ab8c706f4e0a8f1aaa22b5886e9ac8a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 22 Jul 2008 09:59:21 -0400
Subject: [PATCH] sanitize __user_walk_fd() et.al.

* do not pass nameidata; struct path is all the callers want.
* switch to new helpers:
	user_path_at(dfd, pathname, flags, &path)
	user_path(pathname, &path)
	user_lpath(pathname, &path)
	user_path_dir(pathname, &path)  (fail if not a directory)
  The last 3 are trivial macro wrappers for the first one.
* remove nameidata in callers.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/osf_sys.c  |  10 ++--
 arch/parisc/hpux/sys_hpux.c  |  10 ++--
 fs/coda/pioctl.c             |  14 ++---
 fs/compat.c                  |  20 +++----
 fs/inotify_user.c            |  22 ++++----
 fs/namei.c                   |  36 ++++++-------
 fs/namespace.c               |  74 +++++++++++++-------------
 fs/open.c                    | 124 +++++++++++++++++++++----------------------
 fs/stat.c                    |  32 +++++------
 fs/utimes.c                  |   8 +--
 fs/xattr.c                   |  96 ++++++++++++++++-----------------
 fs/xfs/linux-2.6/xfs_ioctl.c |  14 +++--
 include/linux/namei.h        |  13 ++---
 13 files changed, 235 insertions(+), 238 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 32ca1b927307..6e943135f0e0 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -253,15 +253,15 @@ do_osf_statfs(struct dentry * dentry, struct osf_statfs __user *buffer,
 }
 
 asmlinkage int
-osf_statfs(char __user *path, struct osf_statfs __user *buffer, unsigned long bufsiz)
+osf_statfs(char __user *pathname, struct osf_statfs __user *buffer, unsigned long bufsiz)
 {
-	struct nameidata nd;
+	struct path path;
 	int retval;
 
-	retval = user_path_walk(path, &nd);
+	retval = user_path(pathname, &path);
 	if (!retval) {
-		retval = do_osf_statfs(nd.path.dentry, buffer, bufsiz);
-		path_put(&nd.path);
+		retval = do_osf_statfs(path.dentry, buffer, bufsiz);
+		path_put(&path);
 	}
 	return retval;
 }
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index be255ebb609c..18072e03a019 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -210,19 +210,19 @@ static int vfs_statfs_hpux(struct dentry *dentry, struct hpux_statfs *buf)
 }
 
 /* hpux statfs */
-asmlinkage long hpux_statfs(const char __user *path,
+asmlinkage long hpux_statfs(const char __user *pathname,
 						struct hpux_statfs __user *buf)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (!error) {
 		struct hpux_statfs tmp;
-		error = vfs_statfs_hpux(nd.path.dentry, &tmp);
+		error = vfs_statfs_hpux(path.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
-		path_put(&nd.path);
+		path_put(&path);
 	}
 	return error;
 }
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index c38a98974fb0..c51365422aa8 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -49,7 +49,7 @@ static int coda_ioctl_permission(struct inode *inode, int mask)
 static int coda_pioctl(struct inode * inode, struct file * filp, 
                        unsigned int cmd, unsigned long user_data)
 {
-	struct nameidata nd;
+	struct path path;
         int error;
 	struct PioctlData data;
         struct inode *target_inode = NULL;
@@ -64,21 +64,21 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
          * Look up the pathname. Note that the pathname is in 
          * user memory, and namei takes care of this
          */
-        if ( data.follow ) {
-                error = user_path_walk(data.path, &nd);
+        if (data.follow) {
+                error = user_path(data.path, &path);
 	} else {
-	        error = user_path_walk_link(data.path, &nd);
+	        error = user_lpath(data.path, &path);
 	}
 		
 	if ( error ) {
 		return error;
         } else {
-		target_inode = nd.path.dentry->d_inode;
+		target_inode = path.dentry->d_inode;
 	}
 	
 	/* return if it is not a Coda inode */
 	if ( target_inode->i_sb != inode->i_sb ) {
-		path_put(&nd.path);
+		path_put(&path);
 	        return  -EINVAL;
 	}
 
@@ -87,7 +87,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
 
 	error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data);
 
-	path_put(&nd.path);
+	path_put(&path);
         return error;
 }
 
diff --git a/fs/compat.c b/fs/compat.c
index 106eba28ec5a..c9d1472e65c5 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -234,18 +234,18 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
  * The following statfs calls are copies of code from fs/open.c and
  * should be checked against those from time to time
  */
-asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs __user *buf)
+asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(nd.path.dentry, &tmp);
+		error = vfs_statfs(path.dentry, &tmp);
 		if (!error)
 			error = put_compat_statfs(buf, &tmp);
-		path_put(&nd.path);
+		path_put(&path);
 	}
 	return error;
 }
@@ -299,21 +299,21 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
 	return 0;
 }
 
-asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, struct compat_statfs64 __user *buf)
+asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
 	if (sz != sizeof(*buf))
 		return -EINVAL;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(nd.path.dentry, &tmp);
+		error = vfs_statfs(path.dentry, &tmp);
 		if (!error)
 			error = put_compat_statfs64(buf, &tmp);
-		path_put(&nd.path);
+		path_put(&path);
 	}
 	return error;
 }
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 9b99ebf28884..60249429a253 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -354,20 +354,20 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev)
 }
 
 /*
- * find_inode - resolve a user-given path to a specific inode and return a nd
+ * find_inode - resolve a user-given path to a specific inode
  */
-static int find_inode(const char __user *dirname, struct nameidata *nd,
+static int find_inode(const char __user *dirname, struct path *path,
 		      unsigned flags)
 {
 	int error;
 
-	error = __user_walk(dirname, flags, nd);
+	error = user_path_at(AT_FDCWD, dirname, flags, path);
 	if (error)
 		return error;
 	/* you can only watch an inode if you have read permissions on it */
-	error = inode_permission(nd->path.dentry->d_inode, MAY_READ);
+	error = inode_permission(path->dentry->d_inode, MAY_READ);
 	if (error)
-		path_put(&nd->path);
+		path_put(path);
 	return error;
 }
 
@@ -650,11 +650,11 @@ asmlinkage long sys_inotify_init(void)
 	return sys_inotify_init1(0);
 }
 
-asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
+asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
 {
 	struct inode *inode;
 	struct inotify_device *dev;
-	struct nameidata nd;
+	struct path path;
 	struct file *filp;
 	int ret, fput_needed;
 	unsigned flags = 0;
@@ -674,12 +674,12 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 	if (mask & IN_ONLYDIR)
 		flags |= LOOKUP_DIRECTORY;
 
-	ret = find_inode(path, &nd, flags);
+	ret = find_inode(pathname, &path, flags);
 	if (unlikely(ret))
 		goto fput_and_out;
 
-	/* inode held in place by reference to nd; dev by fget on fd */
-	inode = nd.path.dentry->d_inode;
+	/* inode held in place by reference to path; dev by fget on fd */
+	inode = path.dentry->d_inode;
 	dev = filp->private_data;
 
 	mutex_lock(&dev->up_mutex);
@@ -688,7 +688,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 		ret = create_watch(dev, inode, mask);
 	mutex_unlock(&dev->up_mutex);
 
-	path_put(&nd.path);
+	path_put(&path);
 fput_and_out:
 	fput_light(filp, fput_needed);
 	return ret;
diff --git a/fs/namei.c b/fs/namei.c
index 5029b93ebbd5..edb5e973f9b3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1334,24 +1334,24 @@ struct dentry *lookup_one_noperm(const char *name, struct dentry *base)
 	return __lookup_hash(&this, base, NULL);
 }
 
-int __user_walk_fd(int dfd, const char __user *name, unsigned flags,
-			    struct nameidata *nd)
+int user_path_at(int dfd, const char __user *name, unsigned flags,
+		 struct path *path)
 {
+	struct nameidata nd;
 	char *tmp = getname(name);
 	int err = PTR_ERR(tmp);
-
 	if (!IS_ERR(tmp)) {
-		err = do_path_lookup(dfd, tmp, flags, nd);
+
+		BUG_ON(flags & LOOKUP_PARENT);
+
+		err = do_path_lookup(dfd, tmp, flags, &nd);
 		putname(tmp);
+		if (!err)
+			*path = nd.path;
 	}
 	return err;
 }
 
-int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
-{
-	return __user_walk_fd(AT_FDCWD, name, flags, nd);
-}
-
 /*
  * It's inline, so penalty for filesystems that don't use sticky bit is
  * minimal.
@@ -2446,7 +2446,8 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 			   int flags)
 {
 	struct dentry *new_dentry;
-	struct nameidata nd, old_nd;
+	struct nameidata nd;
+	struct path old_path;
 	int error;
 	char * to;
 
@@ -2457,16 +2458,16 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 	if (IS_ERR(to))
 		return PTR_ERR(to);
 
-	error = __user_walk_fd(olddfd, oldname,
-			       flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
-			       &old_nd);
+	error = user_path_at(olddfd, oldname,
+			     flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
+			     &old_path);
 	if (error)
 		goto exit;
 	error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
 	if (error)
 		goto out;
 	error = -EXDEV;
-	if (old_nd.path.mnt != nd.path.mnt)
+	if (old_path.mnt != nd.path.mnt)
 		goto out_release;
 	new_dentry = lookup_create(&nd, 0);
 	error = PTR_ERR(new_dentry);
@@ -2475,7 +2476,7 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
-	error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry);
+	error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(new_dentry);
@@ -2484,7 +2485,7 @@ out_unlock:
 out_release:
 	path_put(&nd.path);
 out:
-	path_put(&old_nd.path);
+	path_put(&old_path);
 exit:
 	putname(to);
 
@@ -2877,8 +2878,7 @@ const struct inode_operations page_symlink_inode_operations = {
 	.put_link	= page_put_link,
 };
 
-EXPORT_SYMBOL(__user_walk);
-EXPORT_SYMBOL(__user_walk_fd);
+EXPORT_SYMBOL(user_path_at);
 EXPORT_SYMBOL(follow_down);
 EXPORT_SYMBOL(follow_up);
 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
diff --git a/fs/namespace.c b/fs/namespace.c
index 26380f599534..411728c0c8bb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1130,27 +1130,27 @@ static int do_umount(struct vfsmount *mnt, int flags)
 
 asmlinkage long sys_umount(char __user * name, int flags)
 {
-	struct nameidata nd;
+	struct path path;
 	int retval;
 
-	retval = __user_walk(name, LOOKUP_FOLLOW, &nd);
+	retval = user_path(name, &path);
 	if (retval)
 		goto out;
 	retval = -EINVAL;
-	if (nd.path.dentry != nd.path.mnt->mnt_root)
+	if (path.dentry != path.mnt->mnt_root)
 		goto dput_and_out;
-	if (!check_mnt(nd.path.mnt))
+	if (!check_mnt(path.mnt))
 		goto dput_and_out;
 
 	retval = -EPERM;
 	if (!capable(CAP_SYS_ADMIN))
 		goto dput_and_out;
 
-	retval = do_umount(nd.path.mnt, flags);
+	retval = do_umount(path.mnt, flags);
 dput_and_out:
 	/* we mustn't call path_put() as that would clear mnt_expiry_mark */
-	dput(nd.path.dentry);
-	mntput_no_expire(nd.path.mnt);
+	dput(path.dentry);
+	mntput_no_expire(path.mnt);
 out:
 	return retval;
 }
@@ -2179,28 +2179,26 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 			       const char __user * put_old)
 {
 	struct vfsmount *tmp;
-	struct nameidata new_nd, old_nd;
-	struct path parent_path, root_parent, root;
+	struct path new, old, parent_path, root_parent, root;
 	int error;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
-			    &new_nd);
+	error = user_path_dir(new_root, &new);
 	if (error)
 		goto out0;
 	error = -EINVAL;
-	if (!check_mnt(new_nd.path.mnt))
+	if (!check_mnt(new.mnt))
 		goto out1;
 
-	error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd);
+	error = user_path_dir(put_old, &old);
 	if (error)
 		goto out1;
 
-	error = security_sb_pivotroot(&old_nd.path, &new_nd.path);
+	error = security_sb_pivotroot(&old, &new);
 	if (error) {
-		path_put(&old_nd.path);
+		path_put(&old);
 		goto out1;
 	}
 
@@ -2209,69 +2207,69 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 	path_get(&current->fs->root);
 	read_unlock(&current->fs->lock);
 	down_write(&namespace_sem);
-	mutex_lock(&old_nd.path.dentry->d_inode->i_mutex);
+	mutex_lock(&old.dentry->d_inode->i_mutex);
 	error = -EINVAL;
-	if (IS_MNT_SHARED(old_nd.path.mnt) ||
-		IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) ||
+	if (IS_MNT_SHARED(old.mnt) ||
+		IS_MNT_SHARED(new.mnt->mnt_parent) ||
 		IS_MNT_SHARED(root.mnt->mnt_parent))
 		goto out2;
 	if (!check_mnt(root.mnt))
 		goto out2;
 	error = -ENOENT;
-	if (IS_DEADDIR(new_nd.path.dentry->d_inode))
+	if (IS_DEADDIR(new.dentry->d_inode))
 		goto out2;
-	if (d_unhashed(new_nd.path.dentry) && !IS_ROOT(new_nd.path.dentry))
+	if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
 		goto out2;
-	if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry))
+	if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
 		goto out2;
 	error = -EBUSY;
-	if (new_nd.path.mnt == root.mnt ||
-	    old_nd.path.mnt == root.mnt)
+	if (new.mnt == root.mnt ||
+	    old.mnt == root.mnt)
 		goto out2; /* loop, on the same file system  */
 	error = -EINVAL;
 	if (root.mnt->mnt_root != root.dentry)
 		goto out2; /* not a mountpoint */
 	if (root.mnt->mnt_parent == root.mnt)
 		goto out2; /* not attached */
-	if (new_nd.path.mnt->mnt_root != new_nd.path.dentry)
+	if (new.mnt->mnt_root != new.dentry)
 		goto out2; /* not a mountpoint */
-	if (new_nd.path.mnt->mnt_parent == new_nd.path.mnt)
+	if (new.mnt->mnt_parent == new.mnt)
 		goto out2; /* not attached */
 	/* make sure we can reach put_old from new_root */
-	tmp = old_nd.path.mnt;
+	tmp = old.mnt;
 	spin_lock(&vfsmount_lock);
-	if (tmp != new_nd.path.mnt) {
+	if (tmp != new.mnt) {
 		for (;;) {
 			if (tmp->mnt_parent == tmp)
 				goto out3; /* already mounted on put_old */
-			if (tmp->mnt_parent == new_nd.path.mnt)
+			if (tmp->mnt_parent == new.mnt)
 				break;
 			tmp = tmp->mnt_parent;
 		}
-		if (!is_subdir(tmp->mnt_mountpoint, new_nd.path.dentry))
+		if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
 			goto out3;
-	} else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry))
+	} else if (!is_subdir(old.dentry, new.dentry))
 		goto out3;
-	detach_mnt(new_nd.path.mnt, &parent_path);
+	detach_mnt(new.mnt, &parent_path);
 	detach_mnt(root.mnt, &root_parent);
 	/* mount old root on put_old */
-	attach_mnt(root.mnt, &old_nd.path);
+	attach_mnt(root.mnt, &old);
 	/* mount new_root on / */
-	attach_mnt(new_nd.path.mnt, &root_parent);
+	attach_mnt(new.mnt, &root_parent);
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	spin_unlock(&vfsmount_lock);
-	chroot_fs_refs(&root, &new_nd.path);
-	security_sb_post_pivotroot(&root, &new_nd.path);
+	chroot_fs_refs(&root, &new);
+	security_sb_post_pivotroot(&root, &new);
 	error = 0;
 	path_put(&root_parent);
 	path_put(&parent_path);
 out2:
-	mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex);
+	mutex_unlock(&old.dentry->d_inode->i_mutex);
 	up_write(&namespace_sem);
 	path_put(&root);
-	path_put(&old_nd.path);
+	path_put(&old);
 out1:
-	path_put(&new_nd.path);
+	path_put(&new);
 out0:
 	return error;
 out3:
diff --git a/fs/open.c b/fs/open.c
index e94266700eda..3fe1a6857c75 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -122,37 +122,37 @@ static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
 	return 0;
 }
 
-asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
+asmlinkage long sys_statfs(const char __user *pathname, struct statfs __user * buf)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (!error) {
 		struct statfs tmp;
-		error = vfs_statfs_native(nd.path.dentry, &tmp);
+		error = vfs_statfs_native(path.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
-		path_put(&nd.path);
+		path_put(&path);
 	}
 	return error;
 }
 
 
-asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf)
+asmlinkage long sys_statfs64(const char __user *pathname, size_t sz, struct statfs64 __user *buf)
 {
-	struct nameidata nd;
+	struct path path;
 	long error;
 
 	if (sz != sizeof(*buf))
 		return -EINVAL;
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (!error) {
 		struct statfs64 tmp;
-		error = vfs_statfs64(nd.path.dentry, &tmp);
+		error = vfs_statfs64(path.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
-		path_put(&nd.path);
+		path_put(&path);
 	}
 	return error;
 }
@@ -223,20 +223,20 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
 	return err;
 }
 
-static long do_sys_truncate(const char __user * path, loff_t length)
+static long do_sys_truncate(const char __user *pathname, loff_t length)
 {
-	struct nameidata nd;
-	struct inode * inode;
+	struct path path;
+	struct inode *inode;
 	int error;
 
 	error = -EINVAL;
 	if (length < 0)	/* sorry, but loff_t says... */
 		goto out;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (error)
 		goto out;
-	inode = nd.path.dentry->d_inode;
+	inode = path.dentry->d_inode;
 
 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
 	error = -EISDIR;
@@ -247,7 +247,7 @@ static long do_sys_truncate(const char __user * path, loff_t length)
 	if (!S_ISREG(inode->i_mode))
 		goto dput_and_out;
 
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (error)
 		goto dput_and_out;
 
@@ -274,15 +274,15 @@ static long do_sys_truncate(const char __user * path, loff_t length)
 	error = locks_verify_truncate(inode, NULL, length);
 	if (!error) {
 		DQUOT_INIT(inode);
-		error = do_truncate(nd.path.dentry, length, 0, NULL);
+		error = do_truncate(path.dentry, length, 0, NULL);
 	}
 
 put_write_and_out:
 	put_write_access(inode);
 mnt_drop_write_and_out:
-	mnt_drop_write(nd.path.mnt);
+	mnt_drop_write(path.mnt);
 dput_and_out:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
@@ -425,7 +425,7 @@ out:
  */
 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 {
-	struct nameidata nd;
+	struct path path;
 	struct inode *inode;
 	int old_fsuid, old_fsgid;
 	kernel_cap_t uninitialized_var(old_cap);  /* !SECURE_NO_SETUID_FIXUP */
@@ -449,7 +449,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 		 * FIXME: There is a race here against sys_capset.  The
 		 * capabilities can change yet we will restore the old
 		 * value below.  We should hold task_capabilities_lock,
-		 * but we cannot because user_path_walk can sleep.
+		 * but we cannot because user_path_at can sleep.
 		 */
 #endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */
 		if (current->uid)
@@ -458,11 +458,11 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 			old_cap = cap_set_effective(current->cap_permitted);
 	}
 
-	res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
+	res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
 	if (res)
 		goto out;
 
-	inode = nd.path.dentry->d_inode;
+	inode = path.dentry->d_inode;
 
 	if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 		/*
@@ -470,7 +470,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 		 * with the "noexec" flag.
 		 */
 		res = -EACCES;
-		if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
+		if (path.mnt->mnt_flags & MNT_NOEXEC)
 			goto out_path_release;
 	}
 
@@ -488,11 +488,11 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 	 * inherently racy and know that the fs may change
 	 * state before we even see this result.
 	 */
-	if (__mnt_is_readonly(nd.path.mnt))
+	if (__mnt_is_readonly(path.mnt))
 		res = -EROFS;
 
 out_path_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	current->fsuid = old_fsuid;
 	current->fsgid = old_fsgid;
@@ -510,21 +510,21 @@ asmlinkage long sys_access(const char __user *filename, int mode)
 
 asmlinkage long sys_chdir(const char __user * filename)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+	error = user_path_dir(filename, &path);
 	if (error)
 		goto out;
 
-	error = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
+	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
 	if (error)
 		goto dput_and_out;
 
-	set_fs_pwd(current->fs, &nd.path);
+	set_fs_pwd(current->fs, &path);
 
 dput_and_out:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
@@ -557,14 +557,14 @@ out:
 
 asmlinkage long sys_chroot(const char __user * filename)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
+	error = user_path_dir(filename, &path);
 	if (error)
 		goto out;
 
-	error = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
+	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
 	if (error)
 		goto dput_and_out;
 
@@ -572,10 +572,10 @@ asmlinkage long sys_chroot(const char __user * filename)
 	if (!capable(CAP_SYS_CHROOT))
 		goto dput_and_out;
 
-	set_fs_root(current->fs, &nd.path);
+	set_fs_root(current->fs, &path);
 	error = 0;
 dput_and_out:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
@@ -617,17 +617,17 @@ out:
 asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
 			     mode_t mode)
 {
-	struct nameidata nd;
-	struct inode * inode;
+	struct path path;
+	struct inode *inode;
 	int error;
 	struct iattr newattrs;
 
-	error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
+	error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
 	if (error)
 		goto out;
-	inode = nd.path.dentry->d_inode;
+	inode = path.dentry->d_inode;
 
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (error)
 		goto dput_and_out;
 	mutex_lock(&inode->i_mutex);
@@ -635,11 +635,11 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
 		mode = inode->i_mode;
 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-	error = notify_change(nd.path.dentry, &newattrs);
+	error = notify_change(path.dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
-	mnt_drop_write(nd.path.mnt);
+	mnt_drop_write(path.mnt);
 dput_and_out:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
@@ -676,19 +676,19 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
 
 asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk(filename, &nd);
+	error = user_path(filename, &path);
 	if (error)
 		goto out;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (error)
 		goto out_release;
-	error = chown_common(nd.path.dentry, user, group);
-	mnt_drop_write(nd.path.mnt);
+	error = chown_common(path.dentry, user, group);
+	mnt_drop_write(path.mnt);
 out_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
@@ -696,7 +696,7 @@ out:
 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 			     gid_t group, int flag)
 {
-	struct nameidata nd;
+	struct path path;
 	int error = -EINVAL;
 	int follow;
 
@@ -704,35 +704,35 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 		goto out;
 
 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
-	error = __user_walk_fd(dfd, filename, follow, &nd);
+	error = user_path_at(dfd, filename, follow, &path);
 	if (error)
 		goto out;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (error)
 		goto out_release;
-	error = chown_common(nd.path.dentry, user, group);
-	mnt_drop_write(nd.path.mnt);
+	error = chown_common(path.dentry, user, group);
+	mnt_drop_write(path.mnt);
 out_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
 
 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk_link(filename, &nd);
+	error = user_lpath(filename, &path);
 	if (error)
 		goto out;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (error)
 		goto out_release;
-	error = chown_common(nd.path.dentry, user, group);
-	mnt_drop_write(nd.path.mnt);
+	error = chown_common(path.dentry, user, group);
+	mnt_drop_write(path.mnt);
 out_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
diff --git a/fs/stat.c b/fs/stat.c
index 9cf41f719d50..7c46fbeb8b76 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
 
 int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
+	error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path);
 	if (!error) {
-		error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
-		path_put(&nd.path);
+		error = vfs_getattr(path.mnt, path.dentry, stat);
+		path_put(&path);
 	}
 	return error;
 }
@@ -77,13 +77,13 @@ EXPORT_SYMBOL(vfs_stat);
 
 int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = __user_walk_fd(dfd, name, 0, &nd);
+	error = user_path_at(dfd, name, 0, &path);
 	if (!error) {
-		error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
-		path_put(&nd.path);
+		error = vfs_getattr(path.mnt, path.dentry, stat);
+		path_put(&path);
 	}
 	return error;
 }
@@ -291,29 +291,29 @@ asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf)
 	return error;
 }
 
-asmlinkage long sys_readlinkat(int dfd, const char __user *path,
+asmlinkage long sys_readlinkat(int dfd, const char __user *pathname,
 				char __user *buf, int bufsiz)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
 	if (bufsiz <= 0)
 		return -EINVAL;
 
-	error = __user_walk_fd(dfd, path, 0, &nd);
+	error = user_path_at(dfd, pathname, 0, &path);
 	if (!error) {
-		struct inode *inode = nd.path.dentry->d_inode;
+		struct inode *inode = path.dentry->d_inode;
 
 		error = -EINVAL;
 		if (inode->i_op && inode->i_op->readlink) {
-			error = security_inode_readlink(nd.path.dentry);
+			error = security_inode_readlink(path.dentry);
 			if (!error) {
-				touch_atime(nd.path.mnt, nd.path.dentry);
-				error = inode->i_op->readlink(nd.path.dentry,
+				touch_atime(path.mnt, path.dentry);
+				error = inode->i_op->readlink(path.dentry,
 							      buf, bufsiz);
 			}
 		}
-		path_put(&nd.path);
+		path_put(&path);
 	}
 	return error;
 }
diff --git a/fs/utimes.c b/fs/utimes.c
index dc28b7826259..6929e3e91d05 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -152,18 +152,18 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
 		error = utimes_common(&file->f_path, times);
 		fput(file);
 	} else {
-		struct nameidata nd;
+		struct path path;
 		int lookup_flags = 0;
 
 		if (!(flags & AT_SYMLINK_NOFOLLOW))
 			lookup_flags |= LOOKUP_FOLLOW;
 
-		error = __user_walk_fd(dfd, filename, lookup_flags, &nd);
+		error = user_path_at(dfd, filename, lookup_flags, &path);
 		if (error)
 			goto out;
 
-		error = utimes_common(&nd.path, times);
-		path_put(&nd.path);
+		error = utimes_common(&path, times);
+		path_put(&path);
 	}
 
 out:
diff --git a/fs/xattr.c b/fs/xattr.c
index b96222e05ba0..468377e66531 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -252,40 +252,40 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
 }
 
 asmlinkage long
-sys_setxattr(const char __user *path, const char __user *name,
+sys_setxattr(const char __user *pathname, const char __user *name,
 	     const void __user *value, size_t size, int flags)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (error)
 		return error;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (!error) {
-		error = setxattr(nd.path.dentry, name, value, size, flags);
-		mnt_drop_write(nd.path.mnt);
+		error = setxattr(path.dentry, name, value, size, flags);
+		mnt_drop_write(path.mnt);
 	}
-	path_put(&nd.path);
+	path_put(&path);
 	return error;
 }
 
 asmlinkage long
-sys_lsetxattr(const char __user *path, const char __user *name,
+sys_lsetxattr(const char __user *pathname, const char __user *name,
 	      const void __user *value, size_t size, int flags)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk_link(path, &nd);
+	error = user_lpath(pathname, &path);
 	if (error)
 		return error;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (!error) {
-		error = setxattr(nd.path.dentry, name, value, size, flags);
-		mnt_drop_write(nd.path.mnt);
+		error = setxattr(path.dentry, name, value, size, flags);
+		mnt_drop_write(path.mnt);
 	}
-	path_put(&nd.path);
+	path_put(&path);
 	return error;
 }
 
@@ -350,32 +350,32 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
 }
 
 asmlinkage ssize_t
-sys_getxattr(const char __user *path, const char __user *name,
+sys_getxattr(const char __user *pathname, const char __user *name,
 	     void __user *value, size_t size)
 {
-	struct nameidata nd;
+	struct path path;
 	ssize_t error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (error)
 		return error;
-	error = getxattr(nd.path.dentry, name, value, size);
-	path_put(&nd.path);
+	error = getxattr(path.dentry, name, value, size);
+	path_put(&path);
 	return error;
 }
 
 asmlinkage ssize_t
-sys_lgetxattr(const char __user *path, const char __user *name, void __user *value,
+sys_lgetxattr(const char __user *pathname, const char __user *name, void __user *value,
 	      size_t size)
 {
-	struct nameidata nd;
+	struct path path;
 	ssize_t error;
 
-	error = user_path_walk_link(path, &nd);
+	error = user_lpath(pathname, &path);
 	if (error)
 		return error;
-	error = getxattr(nd.path.dentry, name, value, size);
-	path_put(&nd.path);
+	error = getxattr(path.dentry, name, value, size);
+	path_put(&path);
 	return error;
 }
 
@@ -425,30 +425,30 @@ listxattr(struct dentry *d, char __user *list, size_t size)
 }
 
 asmlinkage ssize_t
-sys_listxattr(const char __user *path, char __user *list, size_t size)
+sys_listxattr(const char __user *pathname, char __user *list, size_t size)
 {
-	struct nameidata nd;
+	struct path path;
 	ssize_t error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (error)
 		return error;
-	error = listxattr(nd.path.dentry, list, size);
-	path_put(&nd.path);
+	error = listxattr(path.dentry, list, size);
+	path_put(&path);
 	return error;
 }
 
 asmlinkage ssize_t
-sys_llistxattr(const char __user *path, char __user *list, size_t size)
+sys_llistxattr(const char __user *pathname, char __user *list, size_t size)
 {
-	struct nameidata nd;
+	struct path path;
 	ssize_t error;
 
-	error = user_path_walk_link(path, &nd);
+	error = user_lpath(pathname, &path);
 	if (error)
 		return error;
-	error = listxattr(nd.path.dentry, list, size);
-	path_put(&nd.path);
+	error = listxattr(path.dentry, list, size);
+	path_put(&path);
 	return error;
 }
 
@@ -486,38 +486,38 @@ removexattr(struct dentry *d, const char __user *name)
 }
 
 asmlinkage long
-sys_removexattr(const char __user *path, const char __user *name)
+sys_removexattr(const char __user *pathname, const char __user *name)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (error)
 		return error;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (!error) {
-		error = removexattr(nd.path.dentry, name);
-		mnt_drop_write(nd.path.mnt);
+		error = removexattr(path.dentry, name);
+		mnt_drop_write(path.mnt);
 	}
-	path_put(&nd.path);
+	path_put(&path);
 	return error;
 }
 
 asmlinkage long
-sys_lremovexattr(const char __user *path, const char __user *name)
+sys_lremovexattr(const char __user *pathname, const char __user *name)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk_link(path, &nd);
+	error = user_lpath(pathname, &path);
 	if (error)
 		return error;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (!error) {
-		error = removexattr(nd.path.dentry, name);
-		mnt_drop_write(nd.path.mnt);
+		error = removexattr(path.dentry, name);
+		mnt_drop_write(path.mnt);
 	}
-	path_put(&nd.path);
+	path_put(&path);
 	return error;
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a42ba9d71156..01939ba2d8de 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -84,17 +84,15 @@ xfs_find_handle(
 	switch (cmd) {
 	case XFS_IOC_PATH_TO_FSHANDLE:
 	case XFS_IOC_PATH_TO_HANDLE: {
-		struct nameidata	nd;
-		int			error;
-
-		error = user_path_walk_link((const char __user *)hreq.path, &nd);
+		struct path path;
+		int error = user_lpath((const char __user *)hreq.path, &path);
 		if (error)
 			return error;
 
-		ASSERT(nd.path.dentry);
-		ASSERT(nd.path.dentry->d_inode);
-		inode = igrab(nd.path.dentry->d_inode);
-		path_put(&nd.path);
+		ASSERT(path.dentry);
+		ASSERT(path.dentry->d_inode);
+		inode = igrab(path.dentry->d_inode);
+		path_put(&path);
 		break;
 	}
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 60e35a02f6cb..00888ff69504 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -54,12 +54,13 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_OPEN		(0x0100)
 #define LOOKUP_CREATE		(0x0200)
 
-extern int __user_walk(const char __user *, unsigned, struct nameidata *);
-extern int __user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *);
-#define user_path_walk(name,nd) \
-	__user_walk_fd(AT_FDCWD, name, LOOKUP_FOLLOW, nd)
-#define user_path_walk_link(name,nd) \
-	__user_walk_fd(AT_FDCWD, name, 0, nd)
+extern int user_path_at(int, const char __user *, unsigned, struct path *);
+
+#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path)
+#define user_lpath(name, path) user_path_at(AT_FDCWD, name, 0, path)
+#define user_path_dir(name, path) \
+	user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path)
+
 extern int path_lookup(const char *, unsigned, struct nameidata *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct nameidata *);
-- 
cgit v1.2.3


From 516e0cc5646f377ab80fcc2ee639892eccb99853 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 26 Jul 2008 00:39:17 -0400
Subject: [PATCH] f_count may wrap around

make it atomic_long_t; while we are at it, get rid of useless checks in affs,
hfs and hpfs - ->open() always has it equal to 1, ->release() - to 0.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/net/ppp_generic.c |  6 +++---
 fs/affs/file.c            |  4 ----
 fs/aio.c                  |  6 +++---
 fs/file_table.c           | 10 +++++-----
 fs/hfs/inode.c            |  4 ----
 fs/hfsplus/inode.c        |  4 ----
 include/linux/fs.h        |  6 +++---
 include/net/af_unix.h     |  2 +-
 net/sched/sch_atm.c       |  4 ++--
 net/unix/af_unix.c        |  2 +-
 net/unix/garbage.c        | 18 +++++++++---------
 11 files changed, 27 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 739b3ab7bccc..ddccc074a76a 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -581,12 +581,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			if (file == ppp->owner)
 				ppp_shutdown_interface(ppp);
 		}
-		if (atomic_read(&file->f_count) <= 2) {
+		if (atomic_long_read(&file->f_count) <= 2) {
 			ppp_release(NULL, file);
 			err = 0;
 		} else
-			printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%d\n",
-			       atomic_read(&file->f_count));
+			printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%ld\n",
+			       atomic_long_read(&file->f_count));
 		unlock_kernel();
 		return err;
 	}
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6eac7bdeec94..1377b1240b6e 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -46,8 +46,6 @@ const struct inode_operations affs_file_inode_operations = {
 static int
 affs_file_open(struct inode *inode, struct file *filp)
 {
-	if (atomic_read(&filp->f_count) != 1)
-		return 0;
 	pr_debug("AFFS: open(%lu,%d)\n",
 		 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
 	atomic_inc(&AFFS_I(inode)->i_opencnt);
@@ -57,8 +55,6 @@ affs_file_open(struct inode *inode, struct file *filp)
 static int
 affs_file_release(struct inode *inode, struct file *filp)
 {
-	if (atomic_read(&filp->f_count) != 0)
-		return 0;
 	pr_debug("AFFS: release(%lu, %d)\n",
 		 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
 
diff --git a/fs/aio.c b/fs/aio.c
index 0051fd94b44e..f658441d5666 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -512,8 +512,8 @@ static void aio_fput_routine(struct work_struct *data)
  */
 static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 {
-	dprintk(KERN_DEBUG "aio_put(%p): f_count=%d\n",
-		req, atomic_read(&req->ki_filp->f_count));
+	dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
+		req, atomic_long_read(&req->ki_filp->f_count));
 
 	assert_spin_locked(&ctx->ctx_lock);
 
@@ -528,7 +528,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	/* Must be done under the lock to serialise against cancellation.
 	 * Call this aio_fput as it duplicates fput via the fput_work.
 	 */
-	if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
+	if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
 		get_ioctx(ctx);
 		spin_lock(&fput_lock);
 		list_add(&req->ki_list, &fput_head);
diff --git a/fs/file_table.c b/fs/file_table.c
index 83084225b4c3..f45a4493f9e7 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -120,7 +120,7 @@ struct file *get_empty_filp(void)
 
 	tsk = current;
 	INIT_LIST_HEAD(&f->f_u.fu_list);
-	atomic_set(&f->f_count, 1);
+	atomic_long_set(&f->f_count, 1);
 	rwlock_init(&f->f_owner.lock);
 	f->f_uid = tsk->fsuid;
 	f->f_gid = tsk->fsgid;
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(init_file);
 
 void fput(struct file *file)
 {
-	if (atomic_dec_and_test(&file->f_count))
+	if (atomic_long_dec_and_test(&file->f_count))
 		__fput(file);
 }
 
@@ -294,7 +294,7 @@ struct file *fget(unsigned int fd)
 	rcu_read_lock();
 	file = fcheck_files(files, fd);
 	if (file) {
-		if (!atomic_inc_not_zero(&file->f_count)) {
+		if (!atomic_long_inc_not_zero(&file->f_count)) {
 			/* File object ref couldn't be taken */
 			rcu_read_unlock();
 			return NULL;
@@ -326,7 +326,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
 		rcu_read_lock();
 		file = fcheck_files(files, fd);
 		if (file) {
-			if (atomic_inc_not_zero(&file->f_count))
+			if (atomic_long_inc_not_zero(&file->f_count))
 				*fput_needed = 1;
 			else
 				/* Didn't get the reference, someone's freed */
@@ -341,7 +341,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
 
 void put_filp(struct file *file)
 {
-	if (atomic_dec_and_test(&file->f_count)) {
+	if (atomic_long_dec_and_test(&file->f_count)) {
 		security_file_free(file);
 		file_kill(file);
 		file_free(file);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index aa73f3fd5dd9..7e19835efa2e 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -522,8 +522,6 @@ static int hfs_file_open(struct inode *inode, struct file *file)
 {
 	if (HFS_IS_RSRC(inode))
 		inode = HFS_I(inode)->rsrc_inode;
-	if (atomic_read(&file->f_count) != 1)
-		return 0;
 	atomic_inc(&HFS_I(inode)->opencnt);
 	return 0;
 }
@@ -534,8 +532,6 @@ static int hfs_file_release(struct inode *inode, struct file *file)
 
 	if (HFS_IS_RSRC(inode))
 		inode = HFS_I(inode)->rsrc_inode;
-	if (atomic_read(&file->f_count) != 0)
-		return 0;
 	if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
 		mutex_lock(&inode->i_mutex);
 		hfs_file_truncate(inode);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d4014e3044d2..b085d64a2b67 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -254,8 +254,6 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
 {
 	if (HFSPLUS_IS_RSRC(inode))
 		inode = HFSPLUS_I(inode).rsrc_inode;
-	if (atomic_read(&file->f_count) != 1)
-		return 0;
 	atomic_inc(&HFSPLUS_I(inode).opencnt);
 	return 0;
 }
@@ -266,8 +264,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
 
 	if (HFSPLUS_IS_RSRC(inode))
 		inode = HFSPLUS_I(inode).rsrc_inode;
-	if (atomic_read(&file->f_count) != 0)
-		return 0;
 	if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) {
 		mutex_lock(&inode->i_mutex);
 		hfsplus_file_truncate(inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9d2de4cadabd..7676fa1c20ae 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -795,7 +795,7 @@ struct file {
 #define f_dentry	f_path.dentry
 #define f_vfsmnt	f_path.mnt
 	const struct file_operations	*f_op;
-	atomic_t		f_count;
+	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	mode_t			f_mode;
 	loff_t			f_pos;
@@ -824,8 +824,8 @@ extern spinlock_t files_lock;
 #define file_list_lock() spin_lock(&files_lock);
 #define file_list_unlock() spin_unlock(&files_lock);
 
-#define get_file(x)	atomic_inc(&(x)->f_count)
-#define file_count(x)	atomic_read(&(x)->f_count)
+#define get_file(x)	atomic_long_inc(&(x)->f_count)
+#define file_count(x)	atomic_long_read(&(x)->f_count)
 
 #ifdef CONFIG_DEBUG_WRITECOUNT
 static inline void file_take_write(struct file *f)
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 2dfa96b0575e..7dd29b7e461d 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -51,7 +51,7 @@ struct unix_sock {
         struct sock		*peer;
         struct sock		*other;
 	struct list_head	link;
-        atomic_t                inflight;
+        atomic_long_t           inflight;
         spinlock_t		lock;
 	unsigned int		gc_candidate : 1;
         wait_queue_head_t       peer_wait;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 04faa835be17..6b517b9dac5b 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -162,7 +162,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 	qdisc_destroy(flow->q);
 	tcf_destroy_chain(&flow->filter_list);
 	if (flow->sock) {
-		pr_debug("atm_tc_put: f_count %d\n",
+		pr_debug("atm_tc_put: f_count %ld\n",
 			file_count(flow->sock->file));
 		flow->vcc->pop = flow->old_pop;
 		sockfd_put(flow->sock);
@@ -259,7 +259,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	sock = sockfd_lookup(fd, &error);
 	if (!sock)
 		return error;	/* f_count++ */
-	pr_debug("atm_tc_change: f_count %d\n", file_count(sock->file));
+	pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file));
 	if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
 		error = -EPROTOTYPE;
 		goto err_out;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 70ceb1604ad8..6e7fec74bdb3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -603,7 +603,7 @@ static struct sock * unix_create1(struct net *net, struct socket *sock)
 	u->dentry = NULL;
 	u->mnt	  = NULL;
 	spin_lock_init(&u->lock);
-	atomic_set(&u->inflight, 0);
+	atomic_long_set(&u->inflight, 0);
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->readlock); /* single task reading lock */
 	init_waitqueue_head(&u->peer_wait);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index ebdff3d877a1..2a27b84f740b 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -127,7 +127,7 @@ void unix_inflight(struct file *fp)
 	if(s) {
 		struct unix_sock *u = unix_sk(s);
 		spin_lock(&unix_gc_lock);
-		if (atomic_inc_return(&u->inflight) == 1) {
+		if (atomic_long_inc_return(&u->inflight) == 1) {
 			BUG_ON(!list_empty(&u->link));
 			list_add_tail(&u->link, &gc_inflight_list);
 		} else {
@@ -145,7 +145,7 @@ void unix_notinflight(struct file *fp)
 		struct unix_sock *u = unix_sk(s);
 		spin_lock(&unix_gc_lock);
 		BUG_ON(list_empty(&u->link));
-		if (atomic_dec_and_test(&u->inflight))
+		if (atomic_long_dec_and_test(&u->inflight))
 			list_del_init(&u->link);
 		unix_tot_inflight--;
 		spin_unlock(&unix_gc_lock);
@@ -237,17 +237,17 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
 
 static void dec_inflight(struct unix_sock *usk)
 {
-	atomic_dec(&usk->inflight);
+	atomic_long_dec(&usk->inflight);
 }
 
 static void inc_inflight(struct unix_sock *usk)
 {
-	atomic_inc(&usk->inflight);
+	atomic_long_inc(&usk->inflight);
 }
 
 static void inc_inflight_move_tail(struct unix_sock *u)
 {
-	atomic_inc(&u->inflight);
+	atomic_long_inc(&u->inflight);
 	/*
 	 * If this is still a candidate, move it to the end of the
 	 * list, so that it's checked even if it was already passed
@@ -288,11 +288,11 @@ void unix_gc(void)
 	 * before the detach without atomicity guarantees.
 	 */
 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
-		int total_refs;
-		int inflight_refs;
+		long total_refs;
+		long inflight_refs;
 
 		total_refs = file_count(u->sk.sk_socket->file);
-		inflight_refs = atomic_read(&u->inflight);
+		inflight_refs = atomic_long_read(&u->inflight);
 
 		BUG_ON(inflight_refs < 1);
 		BUG_ON(total_refs < inflight_refs);
@@ -324,7 +324,7 @@ void unix_gc(void)
 		/* Move cursor to after the current position. */
 		list_move(&cursor, &u->link);
 
-		if (atomic_read(&u->inflight) > 0) {
+		if (atomic_long_read(&u->inflight) > 0) {
 			list_move_tail(&u->link, &gc_inflight_list);
 			u->gc_candidate = 0;
 			scan_children(&u->sk, inc_inflight_move_tail, NULL);
-- 
cgit v1.2.3


From 964bd183624c03680796b63b4ab97ee3905a806a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 26 Jul 2008 03:33:14 -0400
Subject: [PATCH] get rid of __user_path_lookup_open

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c             | 14 ++++++++++----
 fs/namei.c            | 13 -------------
 include/linux/namei.h |  1 -
 3 files changed, 10 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index eca58c29eded..9696bbf0f0b1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -106,11 +106,17 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
  */
 asmlinkage long sys_uselib(const char __user * library)
 {
-	struct file * file;
+	struct file *file;
 	struct nameidata nd;
-	int error;
-
-	error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
+	char *tmp = getname(library);
+	int error = PTR_ERR(tmp);
+
+	if (!IS_ERR(tmp)) {
+		error = path_lookup_open(AT_FDCWD, tmp,
+					 LOOKUP_FOLLOW, &nd,
+					 FMODE_READ|FMODE_EXEC);
+		putname(tmp);
+	}
 	if (error)
 		goto out;
 
diff --git a/fs/namei.c b/fs/namei.c
index 38ceb6e06eba..a7b0a0b80128 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1193,19 +1193,6 @@ static int path_lookup_create(int dfd, const char *name,
 			nd, open_flags, create_mode);
 }
 
-int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
-		struct nameidata *nd, int open_flags)
-{
-	char *tmp = getname(name);
-	int err = PTR_ERR(tmp);
-
-	if (!IS_ERR(tmp)) {
-		err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0);
-		putname(tmp);
-	}
-	return err;
-}
-
 static struct dentry *__lookup_hash(struct qstr *name,
 		struct dentry *base, struct nameidata *nd)
 {
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 00888ff69504..68f8c3203c89 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -65,7 +65,6 @@ extern int path_lookup(const char *, unsigned, struct nameidata *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct nameidata *);
 
-extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags);
 extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags);
 extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
 		int (*open)(struct inode *, struct file *));
-- 
cgit v1.2.3


From 3f8206d496e9e9495afb1d4e70d29712b4d403c9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 26 Jul 2008 03:46:43 -0400
Subject: [PATCH] get rid of indirect users of namei.h

fs.h needs path.h, not namei.h; nfs_fs.h doesn't need it at all.
Several places in the tree needed direct include.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/nfsctl.c       | 1 +
 fs/ubifs/file.c        | 1 +
 include/linux/fs.h     | 2 +-
 include/linux/nfs_fs.h | 1 -
 kernel/cgroup.c        | 1 +
 5 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1955a2702e60..c53e65f8f3a2 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -12,6 +12,7 @@
 #include <linux/time.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/namei.h>
 #include <linux/fcntl.h>
 #include <linux/net.h>
 #include <linux/in.h>
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 005a3b854d96..8565e586e533 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -53,6 +53,7 @@
 
 #include "ubifs.h"
 #include <linux/mount.h>
+#include <linux/namei.h>
 
 static int read_block(struct inode *inode, void *addr, unsigned int block,
 		      struct ubifs_data_node *dn)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7676fa1c20ae..8252b045e624 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -279,7 +279,7 @@ extern int dir_notify_enable;
 #include <linux/types.h>
 #include <linux/kdev_t.h>
 #include <linux/dcache.h>
-#include <linux/namei.h>
+#include <linux/path.h>
 #include <linux/stat.h>
 #include <linux/cache.h>
 #include <linux/kobject.h>
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f08f9ca602af..78a5922a2f11 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -42,7 +42,6 @@
 #include <linux/in.h>
 #include <linux/kref.h>
 #include <linux/mm.h>
-#include <linux/namei.h>
 #include <linux/pagemap.h>
 #include <linux/rbtree.h>
 #include <linux/rwsem.h>
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 89bd6fb7894f..657f8f8d93a5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -45,6 +45,7 @@
 #include <linux/delayacct.h>
 #include <linux/cgroupstats.h>
 #include <linux/hash.h>
+#include <linux/namei.h>
 
 #include <asm/atomic.h>
 
-- 
cgit v1.2.3


From 510a35d4a47802f4a0028aa6bd2ca2170da5e32f Mon Sep 17 00:00:00 2001
From: Andrea Righi <righi.andrea@gmail.com>
Date: Sat, 26 Jul 2008 15:22:27 -0700
Subject: hugetlb: remove unused variable warning

Remove the following warning when CONFIG_HUGETLB_PAGE is not set:

	ipc/shm.c: In function `shm_get_stat':
	ipc/shm.c:565: warning: unused variable `h'

[akpm@linux-foundation.org: use tabs, not spaces]
Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 9a71d4cc88c8..32e0ef0f6e1f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -273,7 +273,10 @@ struct hstate {};
 #define huge_page_mask(h) PAGE_MASK
 #define huge_page_order(h) 0
 #define huge_page_shift(h) PAGE_SHIFT
-#define pages_per_huge_page(h) 1
+static inline unsigned int pages_per_huge_page(struct hstate *h)
+{
+	return 1;
+}
 #endif
 
 #endif /* _LINUX_HUGETLB_H */
-- 
cgit v1.2.3


From 9993e51c0c47ec69dce1f26c2321af6bb9165e9e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@infradead.org>
Date: Sat, 26 Jul 2008 13:53:46 -0300
Subject: V4L/DVB (8502): videodev2.h: CodingStyle cleanups

Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videodev2.h | 378 ++++++++++++++++++++--------------------------
 1 file changed, 166 insertions(+), 212 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 2e66a95e8d32..cc0c8952323b 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -91,8 +91,8 @@
  */
 
 /*  Four-character-code (FOURCC) */
-#define v4l2_fourcc(a,b,c,d)\
-	(((__u32)(a)<<0)|((__u32)(b)<<8)|((__u32)(c)<<16)|((__u32)(d)<<24))
+#define v4l2_fourcc(a, b, c, d)\
+	((__u32)(a) | ((__u32)(b) << 8) | ((__u32)(c) << 16) | ((__u32)(d) << 24))
 
 /*
  *	E N U M S
@@ -226,8 +226,7 @@ struct v4l2_fract {
 /*
  *	D R I V E R   C A P A B I L I T I E S
  */
-struct v4l2_capability
-{
+struct v4l2_capability {
 	__u8	driver[16];	/* i.e. "bttv" */
 	__u8	card[32];	/* i.e. "Hauppauge WinTV" */
 	__u8	bus_info[32];	/* "PCI:" + pci_name(pci_dev) */
@@ -259,8 +258,7 @@ struct v4l2_capability
 /*
  *	V I D E O   I M A G E   F O R M A T
  */
-struct v4l2_pix_format
-{
+struct v4l2_pix_format {
 	__u32         		width;
 	__u32			height;
 	__u32			pixelformat;
@@ -272,68 +270,67 @@ struct v4l2_pix_format
 };
 
 /*      Pixel format         FOURCC                        depth  Description  */
-#define V4L2_PIX_FMT_RGB332  v4l2_fourcc('R','G','B','1') /*  8  RGB-3-3-2     */
-#define V4L2_PIX_FMT_RGB444  v4l2_fourcc('R','4','4','4') /* 16  xxxxrrrr ggggbbbb */
-#define V4L2_PIX_FMT_RGB555  v4l2_fourcc('R','G','B','O') /* 16  RGB-5-5-5     */
-#define V4L2_PIX_FMT_RGB565  v4l2_fourcc('R','G','B','P') /* 16  RGB-5-6-5     */
-#define V4L2_PIX_FMT_RGB555X v4l2_fourcc('R','G','B','Q') /* 16  RGB-5-5-5 BE  */
-#define V4L2_PIX_FMT_RGB565X v4l2_fourcc('R','G','B','R') /* 16  RGB-5-6-5 BE  */
-#define V4L2_PIX_FMT_BGR24   v4l2_fourcc('B','G','R','3') /* 24  BGR-8-8-8     */
-#define V4L2_PIX_FMT_RGB24   v4l2_fourcc('R','G','B','3') /* 24  RGB-8-8-8     */
-#define V4L2_PIX_FMT_BGR32   v4l2_fourcc('B','G','R','4') /* 32  BGR-8-8-8-8   */
-#define V4L2_PIX_FMT_RGB32   v4l2_fourcc('R','G','B','4') /* 32  RGB-8-8-8-8   */
-#define V4L2_PIX_FMT_GREY    v4l2_fourcc('G','R','E','Y') /*  8  Greyscale     */
-#define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y','1','6',' ') /* 16  Greyscale     */
-#define V4L2_PIX_FMT_PAL8    v4l2_fourcc('P','A','L','8') /*  8  8-bit palette */
-#define V4L2_PIX_FMT_YVU410  v4l2_fourcc('Y','V','U','9') /*  9  YVU 4:1:0     */
-#define V4L2_PIX_FMT_YVU420  v4l2_fourcc('Y','V','1','2') /* 12  YVU 4:2:0     */
-#define V4L2_PIX_FMT_YUYV    v4l2_fourcc('Y','U','Y','V') /* 16  YUV 4:2:2     */
-#define V4L2_PIX_FMT_UYVY    v4l2_fourcc('U','Y','V','Y') /* 16  YUV 4:2:2     */
-#define V4L2_PIX_FMT_YUV422P v4l2_fourcc('4','2','2','P') /* 16  YVU422 planar */
-#define V4L2_PIX_FMT_YUV411P v4l2_fourcc('4','1','1','P') /* 16  YVU411 planar */
-#define V4L2_PIX_FMT_Y41P    v4l2_fourcc('Y','4','1','P') /* 12  YUV 4:1:1     */
-#define V4L2_PIX_FMT_YUV444  v4l2_fourcc('Y','4','4','4') /* 16  xxxxyyyy uuuuvvvv */
-#define V4L2_PIX_FMT_YUV555  v4l2_fourcc('Y','U','V','O') /* 16  YUV-5-5-5     */
-#define V4L2_PIX_FMT_YUV565  v4l2_fourcc('Y','U','V','P') /* 16  YUV-5-6-5     */
-#define V4L2_PIX_FMT_YUV32   v4l2_fourcc('Y','U','V','4') /* 32  YUV-8-8-8-8   */
+#define V4L2_PIX_FMT_RGB332  v4l2_fourcc('R', 'G', 'B', '1') /*  8  RGB-3-3-2     */
+#define V4L2_PIX_FMT_RGB444  v4l2_fourcc('R', '4', '4', '4') /* 16  xxxxrrrr ggggbbbb */
+#define V4L2_PIX_FMT_RGB555  v4l2_fourcc('R', 'G', 'B', 'O') /* 16  RGB-5-5-5     */
+#define V4L2_PIX_FMT_RGB565  v4l2_fourcc('R', 'G', 'B', 'P') /* 16  RGB-5-6-5     */
+#define V4L2_PIX_FMT_RGB555X v4l2_fourcc('R', 'G', 'B', 'Q') /* 16  RGB-5-5-5 BE  */
+#define V4L2_PIX_FMT_RGB565X v4l2_fourcc('R', 'G', 'B', 'R') /* 16  RGB-5-6-5 BE  */
+#define V4L2_PIX_FMT_BGR24   v4l2_fourcc('B', 'G', 'R', '3') /* 24  BGR-8-8-8     */
+#define V4L2_PIX_FMT_RGB24   v4l2_fourcc('R', 'G', 'B', '3') /* 24  RGB-8-8-8     */
+#define V4L2_PIX_FMT_BGR32   v4l2_fourcc('B', 'G', 'R', '4') /* 32  BGR-8-8-8-8   */
+#define V4L2_PIX_FMT_RGB32   v4l2_fourcc('R', 'G', 'B', '4') /* 32  RGB-8-8-8-8   */
+#define V4L2_PIX_FMT_GREY    v4l2_fourcc('G', 'R', 'E', 'Y') /*  8  Greyscale     */
+#define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
+#define V4L2_PIX_FMT_PAL8    v4l2_fourcc('P', 'A', 'L', '8') /*  8  8-bit palette */
+#define V4L2_PIX_FMT_YVU410  v4l2_fourcc('Y', 'V', 'U', '9') /*  9  YVU 4:1:0     */
+#define V4L2_PIX_FMT_YVU420  v4l2_fourcc('Y', 'V', '1', '2') /* 12  YVU 4:2:0     */
+#define V4L2_PIX_FMT_YUYV    v4l2_fourcc('Y', 'U', 'Y', 'V') /* 16  YUV 4:2:2     */
+#define V4L2_PIX_FMT_UYVY    v4l2_fourcc('U', 'Y', 'V', 'Y') /* 16  YUV 4:2:2     */
+#define V4L2_PIX_FMT_YUV422P v4l2_fourcc('4', '2', '2', 'P') /* 16  YVU422 planar */
+#define V4L2_PIX_FMT_YUV411P v4l2_fourcc('4', '1', '1', 'P') /* 16  YVU411 planar */
+#define V4L2_PIX_FMT_Y41P    v4l2_fourcc('Y', '4', '1', 'P') /* 12  YUV 4:1:1     */
+#define V4L2_PIX_FMT_YUV444  v4l2_fourcc('Y', '4', '4', '4') /* 16  xxxxyyyy uuuuvvvv */
+#define V4L2_PIX_FMT_YUV555  v4l2_fourcc('Y', 'U', 'V', 'O') /* 16  YUV-5-5-5     */
+#define V4L2_PIX_FMT_YUV565  v4l2_fourcc('Y', 'U', 'V', 'P') /* 16  YUV-5-6-5     */
+#define V4L2_PIX_FMT_YUV32   v4l2_fourcc('Y', 'U', 'V', '4') /* 32  YUV-8-8-8-8   */
 
 /* two planes -- one Y, one Cr + Cb interleaved  */
-#define V4L2_PIX_FMT_NV12    v4l2_fourcc('N','V','1','2') /* 12  Y/CbCr 4:2:0  */
-#define V4L2_PIX_FMT_NV21    v4l2_fourcc('N','V','2','1') /* 12  Y/CrCb 4:2:0  */
+#define V4L2_PIX_FMT_NV12    v4l2_fourcc('N', 'V', '1', '2') /* 12  Y/CbCr 4:2:0  */
+#define V4L2_PIX_FMT_NV21    v4l2_fourcc('N', 'V', '2', '1') /* 12  Y/CrCb 4:2:0  */
 
 /*  The following formats are not defined in the V4L2 specification */
-#define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y','U','V','9') /*  9  YUV 4:1:0     */
-#define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y','U','1','2') /* 12  YUV 4:2:0     */
-#define V4L2_PIX_FMT_YYUV    v4l2_fourcc('Y','Y','U','V') /* 16  YUV 4:2:2     */
-#define V4L2_PIX_FMT_HI240   v4l2_fourcc('H','I','2','4') /*  8  8-bit color   */
-#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H','M','1','2') /*  8  YUV 4:2:0 16x16 macroblocks */
+#define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y', 'U', 'V', '9') /*  9  YUV 4:1:0     */
+#define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y', 'U', '1', '2') /* 12  YUV 4:2:0     */
+#define V4L2_PIX_FMT_YYUV    v4l2_fourcc('Y', 'Y', 'U', 'V') /* 16  YUV 4:2:2     */
+#define V4L2_PIX_FMT_HI240   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
+#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
 
 /* see http://www.siliconimaging.com/RGB%20Bayer.htm */
-#define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B','A','8','1') /*  8  BGBG.. GRGR.. */
-#define V4L2_PIX_FMT_SGBRG8  v4l2_fourcc('G','B','R','G') /*  8  GBGB.. RGRG.. */
-#define V4L2_PIX_FMT_SBGGR16 v4l2_fourcc('B','Y','R','2') /* 16  BGBG.. GRGR.. */
+#define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B', 'A', '8', '1') /*  8  BGBG.. GRGR.. */
+#define V4L2_PIX_FMT_SGBRG8  v4l2_fourcc('G', 'B', 'R', 'G') /*  8  GBGB.. RGRG.. */
+#define V4L2_PIX_FMT_SBGGR16 v4l2_fourcc('B', 'Y', 'R', '2') /* 16  BGBG.. GRGR.. */
 
 /* compressed formats */
-#define V4L2_PIX_FMT_MJPEG    v4l2_fourcc('M','J','P','G') /* Motion-JPEG   */
-#define V4L2_PIX_FMT_JPEG     v4l2_fourcc('J','P','E','G') /* JFIF JPEG     */
-#define V4L2_PIX_FMT_DV       v4l2_fourcc('d','v','s','d') /* 1394          */
-#define V4L2_PIX_FMT_MPEG     v4l2_fourcc('M','P','E','G') /* MPEG-1/2/4    */
+#define V4L2_PIX_FMT_MJPEG    v4l2_fourcc('M', 'J', 'P', 'G') /* Motion-JPEG   */
+#define V4L2_PIX_FMT_JPEG     v4l2_fourcc('J', 'P', 'E', 'G') /* JFIF JPEG     */
+#define V4L2_PIX_FMT_DV       v4l2_fourcc('d', 'v', 's', 'd') /* 1394          */
+#define V4L2_PIX_FMT_MPEG     v4l2_fourcc('M', 'P', 'E', 'G') /* MPEG-1/2/4    */
 
 /*  Vendor-specific formats   */
-#define V4L2_PIX_FMT_WNVA     v4l2_fourcc('W','N','V','A') /* Winnov hw compress */
-#define V4L2_PIX_FMT_SN9C10X  v4l2_fourcc('S','9','1','0') /* SN9C10x compression */
-#define V4L2_PIX_FMT_PWC1     v4l2_fourcc('P','W','C','1') /* pwc older webcam */
-#define V4L2_PIX_FMT_PWC2     v4l2_fourcc('P','W','C','2') /* pwc newer webcam */
-#define V4L2_PIX_FMT_ET61X251 v4l2_fourcc('E','6','2','5') /* ET61X251 compression */
-#define V4L2_PIX_FMT_SPCA501  v4l2_fourcc('S','5','0','1') /* YUYV per line */
-#define V4L2_PIX_FMT_SPCA561  v4l2_fourcc('S','5','6','1') /* compressed GBRG bayer */
-#define V4L2_PIX_FMT_PAC207   v4l2_fourcc('P','2','0','7') /* compressed BGGR bayer */
+#define V4L2_PIX_FMT_WNVA     v4l2_fourcc('W', 'N', 'V', 'A') /* Winnov hw compress */
+#define V4L2_PIX_FMT_SN9C10X  v4l2_fourcc('S', '9', '1', '0') /* SN9C10x compression */
+#define V4L2_PIX_FMT_PWC1     v4l2_fourcc('P', 'W', 'C', '1') /* pwc older webcam */
+#define V4L2_PIX_FMT_PWC2     v4l2_fourcc('P', 'W', 'C', '2') /* pwc newer webcam */
+#define V4L2_PIX_FMT_ET61X251 v4l2_fourcc('E', '6', '2', '5') /* ET61X251 compression */
+#define V4L2_PIX_FMT_SPCA501  v4l2_fourcc('S', '5', '0', '1') /* YUYV per line */
+#define V4L2_PIX_FMT_SPCA561  v4l2_fourcc('S', '5', '6', '1') /* compressed GBRG bayer */
+#define V4L2_PIX_FMT_PAC207   v4l2_fourcc('P', '2', '0', '7') /* compressed BGGR bayer */
 
 /*
  *	F O R M A T   E N U M E R A T I O N
  */
-struct v4l2_fmtdesc
-{
+struct v4l2_fmtdesc {
 	__u32		    index;             /* Format number      */
 	enum v4l2_buf_type  type;              /* buffer type        */
 	__u32               flags;
@@ -349,21 +346,18 @@ struct v4l2_fmtdesc
 /*
  *	F R A M E   S I Z E   E N U M E R A T I O N
  */
-enum v4l2_frmsizetypes
-{
+enum v4l2_frmsizetypes {
 	V4L2_FRMSIZE_TYPE_DISCRETE	= 1,
 	V4L2_FRMSIZE_TYPE_CONTINUOUS	= 2,
 	V4L2_FRMSIZE_TYPE_STEPWISE	= 3,
 };
 
-struct v4l2_frmsize_discrete
-{
+struct v4l2_frmsize_discrete {
 	__u32			width;		/* Frame width [pixel] */
 	__u32			height;		/* Frame height [pixel] */
 };
 
-struct v4l2_frmsize_stepwise
-{
+struct v4l2_frmsize_stepwise {
 	__u32			min_width;	/* Minimum frame width [pixel] */
 	__u32			max_width;	/* Maximum frame width [pixel] */
 	__u32			step_width;	/* Frame width step size [pixel] */
@@ -372,8 +366,7 @@ struct v4l2_frmsize_stepwise
 	__u32			step_height;	/* Frame height step size [pixel] */
 };
 
-struct v4l2_frmsizeenum
-{
+struct v4l2_frmsizeenum {
 	__u32			index;		/* Frame size number */
 	__u32			pixel_format;	/* Pixel format */
 	__u32			type;		/* Frame size type the device supports. */
@@ -389,22 +382,19 @@ struct v4l2_frmsizeenum
 /*
  *	F R A M E   R A T E   E N U M E R A T I O N
  */
-enum v4l2_frmivaltypes
-{
+enum v4l2_frmivaltypes {
 	V4L2_FRMIVAL_TYPE_DISCRETE	= 1,
 	V4L2_FRMIVAL_TYPE_CONTINUOUS	= 2,
 	V4L2_FRMIVAL_TYPE_STEPWISE	= 3,
 };
 
-struct v4l2_frmival_stepwise
-{
+struct v4l2_frmival_stepwise {
 	struct v4l2_fract	min;		/* Minimum frame interval [s] */
 	struct v4l2_fract	max;		/* Maximum frame interval [s] */
 	struct v4l2_fract	step;		/* Frame interval step size [s] */
 };
 
-struct v4l2_frmivalenum
-{
+struct v4l2_frmivalenum {
 	__u32			index;		/* Frame format index */
 	__u32			pixel_format;	/* Pixel format */
 	__u32			width;		/* Frame width */
@@ -423,8 +413,7 @@ struct v4l2_frmivalenum
 /*
  *	T I M E C O D E
  */
-struct v4l2_timecode
-{
+struct v4l2_timecode {
 	__u32	type;
 	__u32	flags;
 	__u8	frames;
@@ -449,8 +438,7 @@ struct v4l2_timecode
 #define V4L2_TC_USERBITS_8BITCHARS	0x0008
 /* The above is based on SMPTE timecodes */
 
-struct v4l2_jpegcompression
-{
+struct v4l2_jpegcompression {
 	int quality;
 
 	int  APPn;              /* Number of APP segment to be written,
@@ -482,16 +470,14 @@ struct v4l2_jpegcompression
 /*
  *	M E M O R Y - M A P P I N G   B U F F E R S
  */
-struct v4l2_requestbuffers
-{
+struct v4l2_requestbuffers {
 	__u32			count;
 	enum v4l2_buf_type      type;
 	enum v4l2_memory        memory;
 	__u32			reserved[2];
 };
 
-struct v4l2_buffer
-{
+struct v4l2_buffer {
 	__u32			index;
 	enum v4l2_buf_type      type;
 	__u32			bytesused;
@@ -525,13 +511,12 @@ struct v4l2_buffer
 /*
  *	O V E R L A Y   P R E V I E W
  */
-struct v4l2_framebuffer
-{
+struct v4l2_framebuffer {
 	__u32			capability;
 	__u32			flags;
 /* FIXME: in theory we should pass something like PCI device + memory
  * region + offset instead of some physical address */
-	void*                   base;
+	void                    *base;
 	struct v4l2_pix_format	fmt;
 };
 /*  Flags for the 'capability' field. Read only */
@@ -550,14 +535,12 @@ struct v4l2_framebuffer
 #define V4L2_FBUF_FLAG_GLOBAL_ALPHA	0x0010
 #define V4L2_FBUF_FLAG_LOCAL_INV_ALPHA	0x0020
 
-struct v4l2_clip
-{
+struct v4l2_clip {
 	struct v4l2_rect        c;
 	struct v4l2_clip	__user *next;
 };
 
-struct v4l2_window
-{
+struct v4l2_window {
 	struct v4l2_rect        w;
 	enum v4l2_field  	field;
 	__u32			chromakey;
@@ -570,8 +553,7 @@ struct v4l2_window
 /*
  *	C A P T U R E   P A R A M E T E R S
  */
-struct v4l2_captureparm
-{
+struct v4l2_captureparm {
 	__u32		   capability;	  /*  Supported modes */
 	__u32		   capturemode;	  /*  Current mode */
 	struct v4l2_fract  timeperframe;  /*  Time per frame in .1us units */
@@ -584,8 +566,7 @@ struct v4l2_captureparm
 #define V4L2_MODE_HIGHQUALITY	0x0001	/*  High quality imaging mode */
 #define V4L2_CAP_TIMEPERFRAME	0x1000	/*  timeperframe field is supported */
 
-struct v4l2_outputparm
-{
+struct v4l2_outputparm {
 	__u32		   capability;	 /*  Supported modes */
 	__u32		   outputmode;	 /*  Current mode */
 	struct v4l2_fract  timeperframe; /*  Time per frame in seconds */
@@ -702,8 +683,7 @@ typedef __u64 v4l2_std_id;
 #define V4L2_STD_ALL            (V4L2_STD_525_60	|\
 				 V4L2_STD_625_50)
 
-struct v4l2_standard
-{
+struct v4l2_standard {
 	__u32		     index;
 	v4l2_std_id          id;
 	__u8		     name[24];
@@ -715,8 +695,7 @@ struct v4l2_standard
 /*
  *	V I D E O   I N P U T S
  */
-struct v4l2_input
-{
+struct v4l2_input {
 	__u32	     index;		/*  Which input */
 	__u8	     name[32];		/*  Label */
 	__u32	     type;		/*  Type of input */
@@ -753,8 +732,7 @@ struct v4l2_input
 /*
  *	V I D E O   O U T P U T S
  */
-struct v4l2_output
-{
+struct v4l2_output {
 	__u32	     index;		/*  Which output */
 	__u8	     name[32];		/*  Label */
 	__u32	     type;		/*  Type of output */
@@ -771,14 +749,12 @@ struct v4l2_output
 /*
  *	C O N T R O L S
  */
-struct v4l2_control
-{
+struct v4l2_control {
 	__u32		     id;
 	__s32		     value;
 };
 
-struct v4l2_ext_control
-{
+struct v4l2_ext_control {
 	__u32 id;
 	__u32 reserved2[2];
 	union {
@@ -788,8 +764,7 @@ struct v4l2_ext_control
 	};
 } __attribute__ ((packed));
 
-struct v4l2_ext_controls
-{
+struct v4l2_ext_controls {
 	__u32 ctrl_class;
 	__u32 count;
 	__u32 error_idx;
@@ -807,8 +782,7 @@ struct v4l2_ext_controls
 #define V4L2_CTRL_DRIVER_PRIV(id) (((id) & 0xffff) >= 0x1000)
 
 /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
-struct v4l2_queryctrl
-{
+struct v4l2_queryctrl {
 	__u32		     id;
 	enum v4l2_ctrl_type  type;
 	__u8		     name[32];	/* Whatever */
@@ -821,8 +795,7 @@ struct v4l2_queryctrl
 };
 
 /*  Used in the VIDIOC_QUERYMENU ioctl for querying menu items */
-struct v4l2_querymenu
-{
+struct v4l2_querymenu {
 	__u32		id;
 	__u32		index;
 	__u8		name[32];	/* Whatever */
@@ -1104,8 +1077,7 @@ enum  v4l2_exposure_auto_type {
 /*
  *	T U N I N G
  */
-struct v4l2_tuner
-{
+struct v4l2_tuner {
 	__u32                   index;
 	__u8			name[32];
 	enum v4l2_tuner_type    type;
@@ -1119,8 +1091,7 @@ struct v4l2_tuner
 	__u32			reserved[4];
 };
 
-struct v4l2_modulator
-{
+struct v4l2_modulator {
 	__u32			index;
 	__u8			name[32];
 	__u32			capability;
@@ -1153,8 +1124,7 @@ struct v4l2_modulator
 #define V4L2_TUNER_MODE_LANG1		0x0003
 #define V4L2_TUNER_MODE_LANG1_LANG2	0x0004
 
-struct v4l2_frequency
-{
+struct v4l2_frequency {
 	__u32		      tuner;
 	enum v4l2_tuner_type  type;
 	__u32		      frequency;
@@ -1172,8 +1142,7 @@ struct v4l2_hw_freq_seek {
 /*
  *	A U D I O
  */
-struct v4l2_audio
-{
+struct v4l2_audio {
 	__u32	index;
 	__u8	name[32];
 	__u32	capability;
@@ -1188,8 +1157,7 @@ struct v4l2_audio
 /*  Flags for the 'mode' field */
 #define V4L2_AUDMODE_AVL		0x00001
 
-struct v4l2_audioout
-{
+struct v4l2_audioout {
 	__u32	index;
 	__u8	name[32];
 	__u32	capability;
@@ -1253,8 +1221,7 @@ struct v4l2_encoder_cmd {
  */
 
 /* Raw VBI */
-struct v4l2_vbi_format
-{
+struct v4l2_vbi_format {
 	__u32	sampling_rate;		/* in 1 Hz */
 	__u32	offset;
 	__u32	samples_per_line;
@@ -1266,8 +1233,8 @@ struct v4l2_vbi_format
 };
 
 /*  VBI flags  */
-#define V4L2_VBI_UNSYNC		(1<< 0)
-#define V4L2_VBI_INTERLACED	(1<< 1)
+#define V4L2_VBI_UNSYNC		(1 << 0)
+#define V4L2_VBI_INTERLACED	(1 << 1)
 
 /* Sliced VBI
  *
@@ -1276,8 +1243,7 @@ struct v4l2_vbi_format
  * notice in the definitive implementation.
  */
 
-struct v4l2_sliced_vbi_format
-{
+struct v4l2_sliced_vbi_format {
 	__u16   service_set;
 	/* service_lines[0][...] specifies lines 0-23 (1-23 used) of the first field
 	   service_lines[1][...] specifies lines 0-23 (1-23 used) of the second field
@@ -1301,8 +1267,7 @@ struct v4l2_sliced_vbi_format
 #define V4L2_SLICED_VBI_525             (V4L2_SLICED_CAPTION_525)
 #define V4L2_SLICED_VBI_625             (V4L2_SLICED_TELETEXT_B | V4L2_SLICED_VPS | V4L2_SLICED_WSS_625)
 
-struct v4l2_sliced_vbi_cap
-{
+struct v4l2_sliced_vbi_cap {
 	__u16   service_set;
 	/* service_lines[0][...] specifies lines 0-23 (1-23 used) of the first field
 	   service_lines[1][...] specifies lines 0-23 (1-23 used) of the second field
@@ -1313,8 +1278,7 @@ struct v4l2_sliced_vbi_cap
 	__u32   reserved[3];    /* must be 0 */
 };
 
-struct v4l2_sliced_vbi_data
-{
+struct v4l2_sliced_vbi_data {
 	__u32   id;
 	__u32   field;          /* 0: first field, 1: second field */
 	__u32   line;           /* 1-23 */
@@ -1328,27 +1292,23 @@ struct v4l2_sliced_vbi_data
 
 /*	Stream data format
  */
-struct v4l2_format
-{
+struct v4l2_format {
 	enum v4l2_buf_type type;
-	union
-	{
-		struct v4l2_pix_format		pix;     // V4L2_BUF_TYPE_VIDEO_CAPTURE
-		struct v4l2_window		win;     // V4L2_BUF_TYPE_VIDEO_OVERLAY
-		struct v4l2_vbi_format		vbi;     // V4L2_BUF_TYPE_VBI_CAPTURE
-		struct v4l2_sliced_vbi_format	sliced;  // V4L2_BUF_TYPE_SLICED_VBI_CAPTURE
-		__u8	raw_data[200];                   // user-defined
+	union {
+		struct v4l2_pix_format		pix;     /* V4L2_BUF_TYPE_VIDEO_CAPTURE */
+		struct v4l2_window		win;     /* V4L2_BUF_TYPE_VIDEO_OVERLAY */
+		struct v4l2_vbi_format		vbi;     /* V4L2_BUF_TYPE_VBI_CAPTURE */
+		struct v4l2_sliced_vbi_format	sliced;  /* V4L2_BUF_TYPE_SLICED_VBI_CAPTURE */
+		__u8	raw_data[200];                   /* user-defined */
 	} fmt;
 };
 
 
 /*	Stream type-dependent parameters
  */
-struct v4l2_streamparm
-{
+struct v4l2_streamparm {
 	enum v4l2_buf_type type;
-	union
-	{
+	union {
 		struct v4l2_captureparm	capture;
 		struct v4l2_outputparm	output;
 		__u8	raw_data[200];  /* user-defined */
@@ -1386,92 +1346,86 @@ struct v4l2_chip_ident {
  *	I O C T L   C O D E S   F O R   V I D E O   D E V I C E S
  *
  */
-#define VIDIOC_QUERYCAP		_IOR  ('V',  0, struct v4l2_capability)
-#define VIDIOC_RESERVED		_IO   ('V',  1)
-#define VIDIOC_ENUM_FMT         _IOWR ('V',  2, struct v4l2_fmtdesc)
-#define VIDIOC_G_FMT		_IOWR ('V',  4, struct v4l2_format)
-#define VIDIOC_S_FMT		_IOWR ('V',  5, struct v4l2_format)
-#define VIDIOC_REQBUFS		_IOWR ('V',  8, struct v4l2_requestbuffers)
-#define VIDIOC_QUERYBUF		_IOWR ('V',  9, struct v4l2_buffer)
-#define VIDIOC_G_FBUF		_IOR  ('V', 10, struct v4l2_framebuffer)
-#define VIDIOC_S_FBUF		_IOW  ('V', 11, struct v4l2_framebuffer)
-#define VIDIOC_OVERLAY		_IOW  ('V', 14, int)
-#define VIDIOC_QBUF		_IOWR ('V', 15, struct v4l2_buffer)
-#define VIDIOC_DQBUF		_IOWR ('V', 17, struct v4l2_buffer)
-#define VIDIOC_STREAMON		_IOW  ('V', 18, int)
-#define VIDIOC_STREAMOFF	_IOW  ('V', 19, int)
-#define VIDIOC_G_PARM		_IOWR ('V', 21, struct v4l2_streamparm)
-#define VIDIOC_S_PARM		_IOWR ('V', 22, struct v4l2_streamparm)
-#define VIDIOC_G_STD		_IOR  ('V', 23, v4l2_std_id)
-#define VIDIOC_S_STD		_IOW  ('V', 24, v4l2_std_id)
-#define VIDIOC_ENUMSTD		_IOWR ('V', 25, struct v4l2_standard)
-#define VIDIOC_ENUMINPUT	_IOWR ('V', 26, struct v4l2_input)
-#define VIDIOC_G_CTRL		_IOWR ('V', 27, struct v4l2_control)
-#define VIDIOC_S_CTRL		_IOWR ('V', 28, struct v4l2_control)
-#define VIDIOC_G_TUNER		_IOWR ('V', 29, struct v4l2_tuner)
-#define VIDIOC_S_TUNER		_IOW  ('V', 30, struct v4l2_tuner)
-#define VIDIOC_G_AUDIO		_IOR  ('V', 33, struct v4l2_audio)
-#define VIDIOC_S_AUDIO		_IOW  ('V', 34, struct v4l2_audio)
-#define VIDIOC_QUERYCTRL	_IOWR ('V', 36, struct v4l2_queryctrl)
-#define VIDIOC_QUERYMENU	_IOWR ('V', 37, struct v4l2_querymenu)
-#define VIDIOC_G_INPUT		_IOR  ('V', 38, int)
-#define VIDIOC_S_INPUT		_IOWR ('V', 39, int)
-#define VIDIOC_G_OUTPUT		_IOR  ('V', 46, int)
-#define VIDIOC_S_OUTPUT		_IOWR ('V', 47, int)
-#define VIDIOC_ENUMOUTPUT	_IOWR ('V', 48, struct v4l2_output)
-#define VIDIOC_G_AUDOUT		_IOR  ('V', 49, struct v4l2_audioout)
-#define VIDIOC_S_AUDOUT		_IOW  ('V', 50, struct v4l2_audioout)
-#define VIDIOC_G_MODULATOR	_IOWR ('V', 54, struct v4l2_modulator)
-#define VIDIOC_S_MODULATOR	_IOW  ('V', 55, struct v4l2_modulator)
-#define VIDIOC_G_FREQUENCY	_IOWR ('V', 56, struct v4l2_frequency)
-#define VIDIOC_S_FREQUENCY	_IOW  ('V', 57, struct v4l2_frequency)
-#define VIDIOC_CROPCAP		_IOWR ('V', 58, struct v4l2_cropcap)
-#define VIDIOC_G_CROP		_IOWR ('V', 59, struct v4l2_crop)
-#define VIDIOC_S_CROP		_IOW  ('V', 60, struct v4l2_crop)
-#define VIDIOC_G_JPEGCOMP	_IOR  ('V', 61, struct v4l2_jpegcompression)
-#define VIDIOC_S_JPEGCOMP	_IOW  ('V', 62, struct v4l2_jpegcompression)
-#define VIDIOC_QUERYSTD      	_IOR  ('V', 63, v4l2_std_id)
-#define VIDIOC_TRY_FMT      	_IOWR ('V', 64, struct v4l2_format)
-#define VIDIOC_ENUMAUDIO	_IOWR ('V', 65, struct v4l2_audio)
-#define VIDIOC_ENUMAUDOUT	_IOWR ('V', 66, struct v4l2_audioout)
-#define VIDIOC_G_PRIORITY       _IOR  ('V', 67, enum v4l2_priority)
-#define VIDIOC_S_PRIORITY       _IOW  ('V', 68, enum v4l2_priority)
-#define VIDIOC_G_SLICED_VBI_CAP _IOWR ('V', 69, struct v4l2_sliced_vbi_cap)
-#define VIDIOC_LOG_STATUS       _IO   ('V', 70)
-#define VIDIOC_G_EXT_CTRLS	_IOWR ('V', 71, struct v4l2_ext_controls)
-#define VIDIOC_S_EXT_CTRLS	_IOWR ('V', 72, struct v4l2_ext_controls)
-#define VIDIOC_TRY_EXT_CTRLS	_IOWR ('V', 73, struct v4l2_ext_controls)
+#define VIDIOC_QUERYCAP		 _IOR('V',  0, struct v4l2_capability)
+#define VIDIOC_RESERVED		  _IO('V',  1)
+#define VIDIOC_ENUM_FMT         _IOWR('V',  2, struct v4l2_fmtdesc)
+#define VIDIOC_G_FMT		_IOWR('V',  4, struct v4l2_format)
+#define VIDIOC_S_FMT		_IOWR('V',  5, struct v4l2_format)
+#define VIDIOC_REQBUFS		_IOWR('V',  8, struct v4l2_requestbuffers)
+#define VIDIOC_QUERYBUF		_IOWR('V',  9, struct v4l2_buffer)
+#define VIDIOC_G_FBUF		 _IOR('V', 10, struct v4l2_framebuffer)
+#define VIDIOC_S_FBUF		 _IOW('V', 11, struct v4l2_framebuffer)
+#define VIDIOC_OVERLAY		 _IOW('V', 14, int)
+#define VIDIOC_QBUF		_IOWR('V', 15, struct v4l2_buffer)
+#define VIDIOC_DQBUF		_IOWR('V', 17, struct v4l2_buffer)
+#define VIDIOC_STREAMON		 _IOW('V', 18, int)
+#define VIDIOC_STREAMOFF	 _IOW('V', 19, int)
+#define VIDIOC_G_PARM		_IOWR('V', 21, struct v4l2_streamparm)
+#define VIDIOC_S_PARM		_IOWR('V', 22, struct v4l2_streamparm)
+#define VIDIOC_G_STD		 _IOR('V', 23, v4l2_std_id)
+#define VIDIOC_S_STD		 _IOW('V', 24, v4l2_std_id)
+#define VIDIOC_ENUMSTD		_IOWR('V', 25, struct v4l2_standard)
+#define VIDIOC_ENUMINPUT	_IOWR('V', 26, struct v4l2_input)
+#define VIDIOC_G_CTRL		_IOWR('V', 27, struct v4l2_control)
+#define VIDIOC_S_CTRL		_IOWR('V', 28, struct v4l2_control)
+#define VIDIOC_G_TUNER		_IOWR('V', 29, struct v4l2_tuner)
+#define VIDIOC_S_TUNER		 _IOW('V', 30, struct v4l2_tuner)
+#define VIDIOC_G_AUDIO		 _IOR('V', 33, struct v4l2_audio)
+#define VIDIOC_S_AUDIO		 _IOW('V', 34, struct v4l2_audio)
+#define VIDIOC_QUERYCTRL	_IOWR('V', 36, struct v4l2_queryctrl)
+#define VIDIOC_QUERYMENU	_IOWR('V', 37, struct v4l2_querymenu)
+#define VIDIOC_G_INPUT		 _IOR('V', 38, int)
+#define VIDIOC_S_INPUT		_IOWR('V', 39, int)
+#define VIDIOC_G_OUTPUT		 _IOR('V', 46, int)
+#define VIDIOC_S_OUTPUT		_IOWR('V', 47, int)
+#define VIDIOC_ENUMOUTPUT	_IOWR('V', 48, struct v4l2_output)
+#define VIDIOC_G_AUDOUT		 _IOR('V', 49, struct v4l2_audioout)
+#define VIDIOC_S_AUDOUT		 _IOW('V', 50, struct v4l2_audioout)
+#define VIDIOC_G_MODULATOR	_IOWR('V', 54, struct v4l2_modulator)
+#define VIDIOC_S_MODULATOR	 _IOW('V', 55, struct v4l2_modulator)
+#define VIDIOC_G_FREQUENCY	_IOWR('V', 56, struct v4l2_frequency)
+#define VIDIOC_S_FREQUENCY	 _IOW('V', 57, struct v4l2_frequency)
+#define VIDIOC_CROPCAP		_IOWR('V', 58, struct v4l2_cropcap)
+#define VIDIOC_G_CROP		_IOWR('V', 59, struct v4l2_crop)
+#define VIDIOC_S_CROP		 _IOW('V', 60, struct v4l2_crop)
+#define VIDIOC_G_JPEGCOMP	 _IOR('V', 61, struct v4l2_jpegcompression)
+#define VIDIOC_S_JPEGCOMP	 _IOW('V', 62, struct v4l2_jpegcompression)
+#define VIDIOC_QUERYSTD      	 _IOR('V', 63, v4l2_std_id)
+#define VIDIOC_TRY_FMT      	_IOWR('V', 64, struct v4l2_format)
+#define VIDIOC_ENUMAUDIO	_IOWR('V', 65, struct v4l2_audio)
+#define VIDIOC_ENUMAUDOUT	_IOWR('V', 66, struct v4l2_audioout)
+#define VIDIOC_G_PRIORITY        _IOR('V', 67, enum v4l2_priority)
+#define VIDIOC_S_PRIORITY        _IOW('V', 68, enum v4l2_priority)
+#define VIDIOC_G_SLICED_VBI_CAP _IOWR('V', 69, struct v4l2_sliced_vbi_cap)
+#define VIDIOC_LOG_STATUS         _IO('V', 70)
+#define VIDIOC_G_EXT_CTRLS	_IOWR('V', 71, struct v4l2_ext_controls)
+#define VIDIOC_S_EXT_CTRLS	_IOWR('V', 72, struct v4l2_ext_controls)
+#define VIDIOC_TRY_EXT_CTRLS	_IOWR('V', 73, struct v4l2_ext_controls)
 #if 1
-#define VIDIOC_ENUM_FRAMESIZES	_IOWR ('V', 74, struct v4l2_frmsizeenum)
-#define VIDIOC_ENUM_FRAMEINTERVALS	_IOWR ('V', 75, struct v4l2_frmivalenum)
-#define VIDIOC_G_ENC_INDEX      _IOR  ('V', 76, struct v4l2_enc_idx)
-#define VIDIOC_ENCODER_CMD      _IOWR ('V', 77, struct v4l2_encoder_cmd)
-#define VIDIOC_TRY_ENCODER_CMD  _IOWR ('V', 78, struct v4l2_encoder_cmd)
+#define VIDIOC_ENUM_FRAMESIZES	_IOWR('V', 74, struct v4l2_frmsizeenum)
+#define VIDIOC_ENUM_FRAMEINTERVALS _IOWR('V', 75, struct v4l2_frmivalenum)
+#define VIDIOC_G_ENC_INDEX       _IOR('V', 76, struct v4l2_enc_idx)
+#define VIDIOC_ENCODER_CMD      _IOWR('V', 77, struct v4l2_encoder_cmd)
+#define VIDIOC_TRY_ENCODER_CMD  _IOWR('V', 78, struct v4l2_encoder_cmd)
 
 /* Experimental, only implemented if CONFIG_VIDEO_ADV_DEBUG is defined */
-#define	VIDIOC_DBG_S_REGISTER 	_IOW  ('V', 79, struct v4l2_register)
-#define	VIDIOC_DBG_G_REGISTER 	_IOWR ('V', 80, struct v4l2_register)
+#define	VIDIOC_DBG_S_REGISTER 	 _IOW('V', 79, struct v4l2_register)
+#define	VIDIOC_DBG_G_REGISTER 	_IOWR('V', 80, struct v4l2_register)
 
-#define VIDIOC_G_CHIP_IDENT     _IOWR ('V', 81, struct v4l2_chip_ident)
+#define VIDIOC_G_CHIP_IDENT     _IOWR('V', 81, struct v4l2_chip_ident)
 #endif
-#define VIDIOC_S_HW_FREQ_SEEK	_IOW  ('V', 82, struct v4l2_hw_freq_seek)
+#define VIDIOC_S_HW_FREQ_SEEK	 _IOW('V', 82, struct v4l2_hw_freq_seek)
 
 #ifdef __OLD_VIDIOC_
 /* for compatibility, will go away some day */
-#define VIDIOC_OVERLAY_OLD     	_IOWR ('V', 14, int)
-#define VIDIOC_S_PARM_OLD      	_IOW  ('V', 22, struct v4l2_streamparm)
-#define VIDIOC_S_CTRL_OLD      	_IOW  ('V', 28, struct v4l2_control)
-#define VIDIOC_G_AUDIO_OLD     	_IOWR ('V', 33, struct v4l2_audio)
-#define VIDIOC_G_AUDOUT_OLD    	_IOWR ('V', 49, struct v4l2_audioout)
-#define VIDIOC_CROPCAP_OLD     	_IOR  ('V', 58, struct v4l2_cropcap)
+#define VIDIOC_OVERLAY_OLD     	_IOWR('V', 14, int)
+#define VIDIOC_S_PARM_OLD      	 _IOW('V', 22, struct v4l2_streamparm)
+#define VIDIOC_S_CTRL_OLD      	 _IOW('V', 28, struct v4l2_control)
+#define VIDIOC_G_AUDIO_OLD     	_IOWR('V', 33, struct v4l2_audio)
+#define VIDIOC_G_AUDOUT_OLD    	_IOWR('V', 49, struct v4l2_audioout)
+#define VIDIOC_CROPCAP_OLD     	 _IOR('V', 58, struct v4l2_cropcap)
 #endif
 
 #define BASE_VIDIOC_PRIVATE	192		/* 192-255 are private */
 
 #endif /* __LINUX_VIDEODEV2_H */
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
-- 
cgit v1.2.3


From 1250ac6d4ab716dafe0ac245fd31cd3a7cbc0a98 Mon Sep 17 00:00:00 2001
From: Jean-Francois Moine <moinejf@free.fr>
Date: Sat, 26 Jul 2008 08:02:47 -0300
Subject: V4L/DVB (8518): gspca: Remove the remaining frame decoding functions
 from the subdrivers.

SPCA505 and SPCA508 added in the pixel formats.
Decode functions and associated resources removed in spca505, 506 and 508.
The decode routines are now found in the V4L library.

Signed-off-by: Jean-Francois Moine <moinejf@free.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 drivers/media/video/gspca/spca505.c | 105 ++++++++++--------------------------
 drivers/media/video/gspca/spca506.c | 105 ++++++++++--------------------------
 drivers/media/video/gspca/spca508.c |  91 +++++++------------------------
 include/linux/videodev2.h           |   2 +
 4 files changed, 76 insertions(+), 227 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/gspca/spca505.c b/drivers/media/video/gspca/spca505.c
index 284d549e4d3e..32ffe5556061 100644
--- a/drivers/media/video/gspca/spca505.c
+++ b/drivers/media/video/gspca/spca505.c
@@ -31,10 +31,6 @@ MODULE_LICENSE("GPL");
 struct sd {
 	struct gspca_dev gspca_dev;		/* !! must be the first item */
 
-	int buflen;
-	unsigned char tmpbuf[640 * 480 * 3 / 2]; /* YYUV per line */
-	unsigned char tmpbuf2[640 * 480 * 2];	/* YUYV */
-
 	unsigned char brightness;
 
 	char subtype;
@@ -64,29 +60,29 @@ static struct ctrl sd_ctrls[] = {
 };
 
 static struct v4l2_pix_format vga_mode[] = {
-	{160, 120, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 160 * 2,
-		.sizeimage = 160 * 120 * 2,
+	{160, 120, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 160 * 3,
+		.sizeimage = 160 * 120 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 5},
-	{176, 144, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 176 * 2,
-		.sizeimage = 176 * 144 * 2,
+	{176, 144, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 176 * 3,
+		.sizeimage = 176 * 144 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 4},
-	{320, 240, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 320 * 2,
-		.sizeimage = 320 * 240 * 2,
+	{320, 240, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 320 * 3,
+		.sizeimage = 320 * 240 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 2},
-	{352, 288, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 352 * 2,
-		.sizeimage = 352 * 288 * 2,
+	{352, 288, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 352 * 3,
+		.sizeimage = 352 * 288 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 1},
-	{640, 480, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 640 * 2,
-		.sizeimage = 640 * 480 * 2,
+	{640, 480, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 640 * 3,
+		.sizeimage = 640 * 480 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 0},
 };
@@ -760,77 +756,30 @@ static void sd_close(struct gspca_dev *gspca_dev)
 	reg_write(gspca_dev->dev, 0x05, 0x11, 0xf);
 }
 
-/* convert YYUV per line to YUYV (YUV 4:2:2) */
-static void yyuv_decode(unsigned char *out,
-			unsigned char *in,
-			int width,
-			int height)
-{
-	unsigned char *Ui, *Vi, *yi, *yi1;
-	unsigned char *out1;
-	int i, j;
-
-	yi = in;
-	for (i = height / 2; --i >= 0; ) {
-		out1 = out + width * 2;		/* next line */
-		yi1 = yi + width;
-		Ui = yi1 + width;
-		Vi = Ui + width / 2;
-		for (j = width / 2; --j >= 0; ) {
-			*out++ = 128 + *yi++;
-			*out++ = 128 + *Ui;
-			*out++ = 128 + *yi++;
-			*out++ = 128 + *Vi;
-
-			*out1++ = 128 + *yi1++;
-			*out1++ = 128 + *Ui++;
-			*out1++ = 128 + *yi1++;
-			*out1++ = 128 + *Vi++;
-		}
-		yi += width * 2;
-		out = out1;
-	}
-}
-
 static void sd_pkt_scan(struct gspca_dev *gspca_dev,
 			struct gspca_frame *frame,	/* target */
 			__u8 *data,			/* isoc packet */
 			int len)			/* iso packet length */
 {
-	struct sd *sd = (struct sd *) gspca_dev;
-
 	switch (data[0]) {
 	case 0:				/* start of frame */
-		if (gspca_dev->last_packet_type == FIRST_PACKET) {
-			yyuv_decode(sd->tmpbuf2, sd->tmpbuf,
-					gspca_dev->width,
-					gspca_dev->height);
-			frame = gspca_frame_add(gspca_dev,
-						LAST_PACKET,
-						frame,
-						sd->tmpbuf2,
-						gspca_dev->width
-							* gspca_dev->height
-							* 2);
-		}
-		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
-				data, 0);
+		frame = gspca_frame_add(gspca_dev, LAST_PACKET, frame,
+					data, 0);
 		data += SPCA50X_OFFSET_DATA;
 		len -= SPCA50X_OFFSET_DATA;
-		if (len > 0)
-			memcpy(sd->tmpbuf, data, len);
-		else
-			len = 0;
-		sd->buflen = len;
-		return;
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		break;
 	case 0xff:			/* drop */
 /*		gspca_dev->last_packet_type = DISCARD_PACKET; */
-		return;
+		break;
+	default:
+		data += 1;
+		len -= 1;
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		break;
 	}
-	data += 1;
-	len -= 1;
-	memcpy(&sd->tmpbuf[sd->buflen], data, len);
-	sd->buflen += len;
 }
 
 static void setbrightness(struct gspca_dev *gspca_dev)
diff --git a/drivers/media/video/gspca/spca506.c b/drivers/media/video/gspca/spca506.c
index 2c281a0563e5..6fe715c80ad2 100644
--- a/drivers/media/video/gspca/spca506.c
+++ b/drivers/media/video/gspca/spca506.c
@@ -33,10 +33,6 @@ MODULE_LICENSE("GPL");
 struct sd {
 	struct gspca_dev gspca_dev;	/* !! must be the first item */
 
-	int buflen;
-	__u8 tmpbuf[640 * 480 * 3];	/* YYUV per line */
-	__u8 tmpbuf2[640 * 480 * 2];	/* YUYV */
-
 	unsigned char brightness;
 	unsigned char contrast;
 	unsigned char colors;
@@ -115,29 +111,29 @@ static struct ctrl sd_ctrls[] = {
 };
 
 static struct v4l2_pix_format vga_mode[] = {
-	{160, 120, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 160 * 2,
-		.sizeimage = 160 * 120 * 2,
+	{160, 120, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 160 * 3,
+		.sizeimage = 160 * 120 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 5},
-	{176, 144, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 176 * 2,
-		.sizeimage = 176 * 144 * 2,
+	{176, 144, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 176 * 3,
+		.sizeimage = 176 * 144 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 4},
-	{320, 240, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 320 * 2,
-		.sizeimage = 320 * 240 * 2,
+	{320, 240, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 320 * 3,
+		.sizeimage = 320 * 240 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 2},
-	{352, 288, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 352 * 2,
-		.sizeimage = 352 * 288 * 2,
+	{352, 288, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 352 * 3,
+		.sizeimage = 352 * 288 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 1},
-	{640, 480, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 640 * 2,
-		.sizeimage = 640 * 480 * 2,
+	{640, 480, V4L2_PIX_FMT_SPCA505, V4L2_FIELD_NONE,
+		.bytesperline = 640 * 3,
+		.sizeimage = 640 * 480 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 0},
 };
@@ -572,77 +568,30 @@ static void sd_close(struct gspca_dev *gspca_dev)
 {
 }
 
-/* convert YYUV per line to YUYV (YUV 4:2:2) */
-static void yyuv_decode(unsigned char *out,
-			unsigned char *in,
-			int width,
-			int height)
-{
-	unsigned char *Ui, *Vi, *yi, *yi1;
-	unsigned char *out1;
-	int i, j;
-
-	yi = in;
-	for (i = height / 2; --i >= 0; ) {
-		out1 = out + width * 2;		/* next line */
-		yi1 = yi + width;
-		Ui = yi1 + width;
-		Vi = Ui + width / 2;
-		for (j = width / 2; --j >= 0; ) {
-			*out++ = 128 + *yi++;
-			*out++ = 128 + *Ui;
-			*out++ = 128 + *yi++;
-			*out++ = 128 + *Vi;
-
-			*out1++ = 128 + *yi1++;
-			*out1++ = 128 + *Ui++;
-			*out1++ = 128 + *yi1++;
-			*out1++ = 128 + *Vi++;
-		}
-		yi += width * 2;
-		out = out1;
-	}
-}
-
 static void sd_pkt_scan(struct gspca_dev *gspca_dev,
 			struct gspca_frame *frame,	/* target */
 			__u8 *data,			/* isoc packet */
 			int len)			/* iso packet length */
 {
-	struct sd *sd = (struct sd *) gspca_dev;
-
 	switch (data[0]) {
 	case 0:				/* start of frame */
-		if (gspca_dev->last_packet_type == FIRST_PACKET) {
-			yyuv_decode(sd->tmpbuf2, sd->tmpbuf,
-					gspca_dev->width,
-					gspca_dev->height);
-			frame = gspca_frame_add(gspca_dev,
-						LAST_PACKET,
-						frame,
-						sd->tmpbuf2,
-						gspca_dev->width
-							* gspca_dev->height
-							* 2);
-		}
-		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
-				data, 0);
+		frame = gspca_frame_add(gspca_dev, LAST_PACKET, frame,
+					data, 0);
 		data += SPCA50X_OFFSET_DATA;
 		len -= SPCA50X_OFFSET_DATA;
-		if (len > 0)
-			memcpy(sd->tmpbuf, data, len);
-		else
-			len = 0;
-		sd->buflen = len;
-		return;
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		break;
 	case 0xff:			/* drop */
 /*		gspca_dev->last_packet_type = DISCARD_PACKET; */
-		return;
+		break;
+	default:
+		data += 1;
+		len -= 1;
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		break;
 	}
-	data += 1;
-	len -= 1;
-	memcpy(&sd->tmpbuf[sd->buflen], data, len);
-	sd->buflen += len;
 }
 
 static void setbrightness(struct gspca_dev *gspca_dev)
diff --git a/drivers/media/video/gspca/spca508.c b/drivers/media/video/gspca/spca508.c
index af531d62856c..4378e966edcc 100644
--- a/drivers/media/video/gspca/spca508.c
+++ b/drivers/media/video/gspca/spca508.c
@@ -30,10 +30,6 @@ MODULE_LICENSE("GPL");
 struct sd {
 	struct gspca_dev gspca_dev;		/* !! must be the first item */
 
-	int buflen;
-	unsigned char tmpbuf[352 * 288 * 3 / 2]; /* YUVY per line */
-	unsigned char tmpbuf2[352 * 288 * 2];	/* YUYV */
-
 	unsigned char brightness;
 
 	char subtype;
@@ -68,23 +64,23 @@ static struct ctrl sd_ctrls[] = {
 
 static struct v4l2_pix_format sif_mode[] = {
 	{160, 120, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 160 * 2,
-		.sizeimage = 160 * 120 * 2,
+		.bytesperline = 160 * 3,
+		.sizeimage = 160 * 120 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 3},
 	{176, 144, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 176 * 2,
-		.sizeimage = 176 * 144 * 2,
+		.bytesperline = 176 * 3,
+		.sizeimage = 176 * 144 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 2},
 	{320, 240, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 320 * 2,
-		.sizeimage = 320 * 240 * 2,
+		.bytesperline = 320 * 3,
+		.sizeimage = 320 * 240 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 1},
 	{352, 288, V4L2_PIX_FMT_YUYV, V4L2_FIELD_NONE,
-		.bytesperline = 352 * 2,
-		.sizeimage = 352 * 288 * 2,
+		.bytesperline = 352 * 3,
+		.sizeimage = 352 * 288 * 3 / 2,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 0},
 };
@@ -1567,77 +1563,30 @@ static void sd_close(struct gspca_dev *gspca_dev)
 {
 }
 
-/* convert YUVY per line to YUYV (YUV 4:2:2) */
-static void yuvy_decode(unsigned char *out,
-			unsigned char *in,
-			int width,
-			int height)
-{
-	unsigned char *Ui, *Vi, *yi, *yi1;
-	unsigned char *out1;
-	int i, j;
-
-	yi = in;
-	for (i = height / 2; --i >= 0; ) {
-		out1 = out + width * 2;		/* next line */
-		Ui = yi + width;
-		Vi = Ui + width / 2;
-		yi1 = Vi + width / 2;
-		for (j = width / 2; --j >= 0; ) {
-			*out++ = 128 + *yi++;
-			*out++ = 128 + *Ui;
-			*out++ = 128 + *yi++;
-			*out++ = 128 + *Vi;
-
-			*out1++ = 128 + *yi1++;
-			*out1++ = 128 + *Ui++;
-			*out1++ = 128 + *yi1++;
-			*out1++ = 128 + *Vi++;
-		}
-		yi += width * 2;
-		out = out1;
-	}
-}
-
 static void sd_pkt_scan(struct gspca_dev *gspca_dev,
 			struct gspca_frame *frame,	/* target */
 			__u8 *data,			/* isoc packet */
 			int len)			/* iso packet length */
 {
-	struct sd *sd = (struct sd *) gspca_dev;
-
 	switch (data[0]) {
 	case 0:				/* start of frame */
-		if (gspca_dev->last_packet_type == FIRST_PACKET) {
-			yuvy_decode(sd->tmpbuf2, sd->tmpbuf,
-					gspca_dev->width,
-					gspca_dev->height);
-			frame = gspca_frame_add(gspca_dev,
-						LAST_PACKET,
-						frame,
-						sd->tmpbuf2,
-						gspca_dev->width
-							* gspca_dev->height
-							* 2);
-		}
-		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
-				data, 0);
+		frame = gspca_frame_add(gspca_dev, LAST_PACKET, frame,
+					data, 0);
 		data += SPCA508_OFFSET_DATA;
 		len -= SPCA508_OFFSET_DATA;
-		if (len > 0)
-			memcpy(sd->tmpbuf, data, len);
-		else
-			len = 0;
-		sd->buflen = len;
-		return;
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		break;
 	case 0xff:			/* drop */
 /*		gspca_dev->last_packet_type = DISCARD_PACKET; */
-		return;
+		break;
+	default:
+		data += 1;
+		len -= 1;
+		gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		break;
 	}
-	data += 1;
-	len -= 1;
-	memcpy(&sd->tmpbuf[sd->buflen], data, len);
-	sd->buflen += len;
 }
 
 static void setbrightness(struct gspca_dev *gspca_dev)
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index cc0c8952323b..7d9ac046389e 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -324,6 +324,8 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_PWC2     v4l2_fourcc('P', 'W', 'C', '2') /* pwc newer webcam */
 #define V4L2_PIX_FMT_ET61X251 v4l2_fourcc('E', '6', '2', '5') /* ET61X251 compression */
 #define V4L2_PIX_FMT_SPCA501  v4l2_fourcc('S', '5', '0', '1') /* YUYV per line */
+#define V4L2_PIX_FMT_SPCA505  v4l2_fourcc('S','5','0','5') /* YYUV per line */
+#define V4L2_PIX_FMT_SPCA508  v4l2_fourcc('S','5','0','8') /* YUVY per line */
 #define V4L2_PIX_FMT_SPCA561  v4l2_fourcc('S', '5', '6', '1') /* compressed GBRG bayer */
 #define V4L2_PIX_FMT_PAC207   v4l2_fourcc('P', '2', '0', '7') /* compressed BGGR bayer */
 
-- 
cgit v1.2.3


From c1d7f4f1648cb8efd87f1b9560c40af2297e7c05 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 26 Jul 2008 08:33:47 -0300
Subject: V4L/DVB (8524): videodev: copy the VID_TYPE defines to videodev.h

The VID_TYPE defines are V4L1 specific, so copy them back to videodev.h.
In videodev2.h ensure that they are not used in the kernel (you need
to include videodev.h instead) and mark them are deprecated.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videodev.h  | 15 +++++++++++++++
 include/linux/videodev2.h |  6 ++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/videodev.h b/include/linux/videodev.h
index 9385a566aed8..15a653d41132 100644
--- a/include/linux/videodev.h
+++ b/include/linux/videodev.h
@@ -17,6 +17,21 @@
 
 #if defined(CONFIG_VIDEO_V4L1_COMPAT) || !defined (__KERNEL__)
 
+#define VID_TYPE_CAPTURE	1	/* Can capture */
+#define VID_TYPE_TUNER		2	/* Can tune */
+#define VID_TYPE_TELETEXT	4	/* Does teletext */
+#define VID_TYPE_OVERLAY	8	/* Overlay onto frame buffer */
+#define VID_TYPE_CHROMAKEY	16	/* Overlay by chromakey */
+#define VID_TYPE_CLIPPING	32	/* Can clip */
+#define VID_TYPE_FRAMERAM	64	/* Uses the frame buffer memory */
+#define VID_TYPE_SCALES		128	/* Scalable */
+#define VID_TYPE_MONOCHROME	256	/* Monochrome only */
+#define VID_TYPE_SUBCAPTURE	512	/* Can capture subareas of the image */
+#define VID_TYPE_MPEG_DECODER	1024	/* Can decode MPEG streams */
+#define VID_TYPE_MPEG_ENCODER	2048	/* Can encode MPEG streams */
+#define VID_TYPE_MJPEG_DECODER	4096	/* Can decode MJPEG streams */
+#define VID_TYPE_MJPEG_ENCODER	8192	/* Can encode MJPEG streams */
+
 struct video_capability
 {
 	char name[32];
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 7d9ac046389e..f7195351a1e7 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -71,6 +71,11 @@
  */
 #define VIDEO_MAX_FRAME               32
 
+#ifndef __KERNEL__
+
+/* These defines are V4L1 specific and should not be used with the V4L2 API!
+   They will be removed from this header in the future. */
+
 #define VID_TYPE_CAPTURE	1	/* Can capture */
 #define VID_TYPE_TUNER		2	/* Can tune */
 #define VID_TYPE_TELETEXT	4	/* Does teletext */
@@ -85,6 +90,7 @@
 #define VID_TYPE_MPEG_ENCODER	2048	/* Can encode MPEG streams */
 #define VID_TYPE_MJPEG_DECODER	4096	/* Can decode MJPEG streams */
 #define VID_TYPE_MJPEG_ENCODER	8192	/* Can encode MJPEG streams */
+#endif
 
 /*
  *	M I S C E L L A N E O U S
-- 
cgit v1.2.3


From 9fa0f6db3a201bef49f28e69f80802559a38586b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@infradead.org>
Date: Sun, 27 Jul 2008 08:55:17 -0300
Subject: V4L/DVB (8522): videodev2: Fix merge conflict

Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videodev2.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index f7195351a1e7..e466bd54a50e 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -330,8 +330,8 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_PWC2     v4l2_fourcc('P', 'W', 'C', '2') /* pwc newer webcam */
 #define V4L2_PIX_FMT_ET61X251 v4l2_fourcc('E', '6', '2', '5') /* ET61X251 compression */
 #define V4L2_PIX_FMT_SPCA501  v4l2_fourcc('S', '5', '0', '1') /* YUYV per line */
-#define V4L2_PIX_FMT_SPCA505  v4l2_fourcc('S','5','0','5') /* YYUV per line */
-#define V4L2_PIX_FMT_SPCA508  v4l2_fourcc('S','5','0','8') /* YUVY per line */
+#define V4L2_PIX_FMT_SPCA505  v4l2_fourcc('S', '5', '0', '5') /* YYUV per line */
+#define V4L2_PIX_FMT_SPCA508  v4l2_fourcc('S', '5', '0', '8') /* YUVY per line */
 #define V4L2_PIX_FMT_SPCA561  v4l2_fourcc('S', '5', '6', '1') /* compressed GBRG bayer */
 #define V4L2_PIX_FMT_PAC207   v4l2_fourcc('P', '2', '0', '7') /* compressed BGGR bayer */
 
-- 
cgit v1.2.3


From 5995477ab7f3522c497c9c4a1c55373e9d655574 Mon Sep 17 00:00:00 2001
From: Andrea Righi <righi.andrea@gmail.com>
Date: Sun, 27 Jul 2008 17:29:15 +0200
Subject: task IO accounting: improve code readability

Put all i/o statistics in struct proc_io_accounting and use inline functions to
initialize and increment statistics, removing a lot of single variable
assignments.

This also reduces the kernel size as following (with CONFIG_TASK_XACCT=y and
CONFIG_TASK_IO_ACCOUNTING=y).

    text    data     bss     dec     hex filename
   11651       0       0   11651    2d83 kernel/exit.o.before
   11619       0       0   11619    2d63 kernel/exit.o.after
   10886     132     136   11154    2b92 kernel/fork.o.before
   10758     132     136   11026    2b12 kernel/fork.o.after

 3082029  807968 4818600 8708597  84e1f5 vmlinux.o.before
 3081869  807968 4818600 8708437  84e155 vmlinux.o.after

Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Acked-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c                         | 57 ++++++++++------------------------
 include/linux/sched.h                  | 19 ++++--------
 include/linux/task_io_accounting.h     | 27 ++++++++++++++--
 include/linux/task_io_accounting_ops.h | 56 +++++++++++++++++++++++++++------
 kernel/exit.c                          | 30 ++----------------
 kernel/fork.c                          | 15 ++-------
 kernel/tsacct.c                        | 14 ++++-----
 7 files changed, 104 insertions(+), 114 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index e74308bdabd3..3d94906c7aa8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -53,6 +53,7 @@
 #include <linux/time.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/init.h>
 #include <linux/capability.h>
 #include <linux/file.h>
@@ -2402,44 +2403,17 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
 {
-	u64 rchar, wchar, syscr, syscw;
-	struct task_io_accounting ioac;
-
-	rchar = task->rchar;
-	wchar = task->wchar;
-	syscr = task->syscr;
-	syscw = task->syscw;
-	memcpy(&ioac, &task->ioac, sizeof(ioac));
-
-	if (whole) {
-		unsigned long flags;
-
-		if (lock_task_sighand(task, &flags)) {
-			struct signal_struct *sig = task->signal;
-			struct task_struct *t = task;
-
-			rchar += sig->rchar;
-			wchar += sig->wchar;
-			syscr += sig->syscr;
-			syscw += sig->syscw;
-
-			ioac.read_bytes += sig->ioac.read_bytes;
-			ioac.write_bytes += sig->ioac.write_bytes;
-			ioac.cancelled_write_bytes +=
-					sig->ioac.cancelled_write_bytes;
-			while_each_thread(task, t) {
-				rchar += t->rchar;
-				wchar += t->wchar;
-				syscr += t->syscr;
-				syscw += t->syscw;
-
-				ioac.read_bytes += t->ioac.read_bytes;
-				ioac.write_bytes += t->ioac.write_bytes;
-				ioac.cancelled_write_bytes +=
-					t->ioac.cancelled_write_bytes;
-			}
-			unlock_task_sighand(task, &flags);
-		}
+	struct proc_io_accounting acct = task->ioac;
+	unsigned long flags;
+
+	if (whole && lock_task_sighand(task, &flags)) {
+		struct task_struct *t = task;
+
+		task_io_accounting_add(&acct, &task->signal->ioac);
+		while_each_thread(task, t)
+			task_io_accounting_add(&acct, &t->ioac);
+
+		unlock_task_sighand(task, &flags);
 	}
 	return sprintf(buffer,
 			"rchar: %llu\n"
@@ -2449,9 +2423,10 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
 			"read_bytes: %llu\n"
 			"write_bytes: %llu\n"
 			"cancelled_write_bytes: %llu\n",
-			rchar, wchar, syscr, syscw,
-			ioac.read_bytes, ioac.write_bytes,
-			ioac.cancelled_write_bytes);
+			acct.chr.rchar, acct.chr.wchar,
+			acct.chr.syscr, acct.chr.syscw,
+			acct.blk.read_bytes, acct.blk.write_bytes,
+			acct.blk.cancelled_write_bytes);
 }
 
 static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f59318a0099b..034c1ca6b332 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -505,10 +505,7 @@ struct signal_struct {
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
 	unsigned long inblock, oublock, cinblock, coublock;
-#ifdef CONFIG_TASK_XACCT
-	u64 rchar, wchar, syscr, syscw;
-#endif
-	struct task_io_accounting ioac;
+	struct proc_io_accounting ioac;
 
 	/*
 	 * Cumulative ns of scheduled CPU time for dead threads in the
@@ -1256,11 +1253,7 @@ struct task_struct {
 
 	unsigned long ptrace_message;
 	siginfo_t *last_siginfo; /* For ptrace use.  */
-#ifdef CONFIG_TASK_XACCT
-/* i/o counters(bytes read/written, #syscalls */
-	u64 rchar, wchar, syscr, syscw;
-#endif
-	struct task_io_accounting ioac;
+	struct proc_io_accounting ioac;
 #if defined(CONFIG_TASK_XACCT)
 	u64 acct_rss_mem1;	/* accumulated rss usage */
 	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
@@ -2190,22 +2183,22 @@ extern long sched_group_rt_period(struct task_group *tg);
 #ifdef CONFIG_TASK_XACCT
 static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
 {
-	tsk->rchar += amt;
+	tsk->ioac.chr.rchar += amt;
 }
 
 static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
 {
-	tsk->wchar += amt;
+	tsk->ioac.chr.wchar += amt;
 }
 
 static inline void inc_syscr(struct task_struct *tsk)
 {
-	tsk->syscr++;
+	tsk->ioac.chr.syscr++;
 }
 
 static inline void inc_syscw(struct task_struct *tsk)
 {
-	tsk->syscw++;
+	tsk->ioac.chr.syscw++;
 }
 #else
 static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
diff --git a/include/linux/task_io_accounting.h b/include/linux/task_io_accounting.h
index 44d00e9cceea..165390f8b936 100644
--- a/include/linux/task_io_accounting.h
+++ b/include/linux/task_io_accounting.h
@@ -1,5 +1,5 @@
 /*
- * task_io_accounting: a structure which is used for recording a single task's
+ * proc_io_accounting: a structure which is used for recording a single task's
  * IO statistics.
  *
  * Don't include this header file directly - it is designed to be dragged in via
@@ -8,6 +8,22 @@
  * Blame akpm@osdl.org for all this.
  */
 
+#ifdef CONFIG_TASK_XACCT
+struct task_chr_io_accounting {
+	/* bytes read */
+	u64 rchar;
+	/*  bytes written */
+	u64 wchar;
+	/* # of read syscalls */
+	u64 syscr;
+	/* # of write syscalls */
+	u64 syscw;
+};
+#else /* CONFIG_TASK_XACCT */
+struct task_chr_io_accounting {
+};
+#endif /* CONFIG_TASK_XACCT */
+
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 struct task_io_accounting {
 	/*
@@ -31,7 +47,12 @@ struct task_io_accounting {
 	 */
 	u64 cancelled_write_bytes;
 };
-#else
+#else /* CONFIG_TASK_IO_ACCOUNTING */
 struct task_io_accounting {
 };
-#endif
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
+
+struct proc_io_accounting {
+	struct task_chr_io_accounting chr;
+	struct task_io_accounting blk;
+};
diff --git a/include/linux/task_io_accounting_ops.h b/include/linux/task_io_accounting_ops.h
index ff46c6fad79d..e6f958ebe97f 100644
--- a/include/linux/task_io_accounting_ops.h
+++ b/include/linux/task_io_accounting_ops.h
@@ -9,7 +9,7 @@
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 static inline void task_io_account_read(size_t bytes)
 {
-	current->ioac.read_bytes += bytes;
+	current->ioac.blk.read_bytes += bytes;
 }
 
 /*
@@ -18,12 +18,12 @@ static inline void task_io_account_read(size_t bytes)
  */
 static inline unsigned long task_io_get_inblock(const struct task_struct *p)
 {
-	return p->ioac.read_bytes >> 9;
+	return p->ioac.blk.read_bytes >> 9;
 }
 
 static inline void task_io_account_write(size_t bytes)
 {
-	current->ioac.write_bytes += bytes;
+	current->ioac.blk.write_bytes += bytes;
 }
 
 /*
@@ -32,17 +32,25 @@ static inline void task_io_account_write(size_t bytes)
  */
 static inline unsigned long task_io_get_oublock(const struct task_struct *p)
 {
-	return p->ioac.write_bytes >> 9;
+	return p->ioac.blk.write_bytes >> 9;
 }
 
 static inline void task_io_account_cancelled_write(size_t bytes)
 {
-	current->ioac.cancelled_write_bytes += bytes;
+	current->ioac.blk.cancelled_write_bytes += bytes;
 }
 
-static inline void task_io_accounting_init(struct task_struct *tsk)
+static inline void task_io_accounting_init(struct proc_io_accounting *ioac)
 {
-	memset(&tsk->ioac, 0, sizeof(tsk->ioac));
+	memset(ioac, 0, sizeof(*ioac));
+}
+
+static inline void task_blk_io_accounting_add(struct proc_io_accounting *dst,
+						struct proc_io_accounting *src)
+{
+	dst->blk.read_bytes += src->blk.read_bytes;
+	dst->blk.write_bytes += src->blk.write_bytes;
+	dst->blk.cancelled_write_bytes += src->blk.cancelled_write_bytes;
 }
 
 #else
@@ -69,9 +77,37 @@ static inline void task_io_account_cancelled_write(size_t bytes)
 {
 }
 
-static inline void task_io_accounting_init(struct task_struct *tsk)
+static inline void task_io_accounting_init(struct proc_io_accounting *ioac)
+{
+}
+
+static inline void task_blk_io_accounting_add(struct proc_io_accounting *dst,
+						struct proc_io_accounting *src)
 {
 }
 
-#endif		/* CONFIG_TASK_IO_ACCOUNTING */
-#endif		/* __TASK_IO_ACCOUNTING_OPS_INCLUDED */
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
+
+#ifdef CONFIG_TASK_XACCT
+static inline void task_chr_io_accounting_add(struct proc_io_accounting *dst,
+						struct proc_io_accounting *src)
+{
+	dst->chr.rchar += src->chr.rchar;
+	dst->chr.wchar += src->chr.wchar;
+	dst->chr.syscr += src->chr.syscr;
+	dst->chr.syscw += src->chr.syscw;
+}
+#else
+static inline void task_chr_io_accounting_add(struct proc_io_accounting *dst,
+						struct proc_io_accounting *src)
+{
+}
+#endif /* CONFIG_TASK_XACCT */
+
+static inline void task_io_accounting_add(struct proc_io_accounting *dst,
+						struct proc_io_accounting *src)
+{
+	task_chr_io_accounting_add(dst, src);
+	task_blk_io_accounting_add(dst, src);
+}
+#endif /* __TASK_IO_ACCOUNTING_OPS_INCLUDED */
diff --git a/kernel/exit.c b/kernel/exit.c
index 0caf590548a0..eb4d6470d1d0 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -121,18 +121,7 @@ static void __exit_signal(struct task_struct *tsk)
 		sig->nivcsw += tsk->nivcsw;
 		sig->inblock += task_io_get_inblock(tsk);
 		sig->oublock += task_io_get_oublock(tsk);
-#ifdef CONFIG_TASK_XACCT
-		sig->rchar += tsk->rchar;
-		sig->wchar += tsk->wchar;
-		sig->syscr += tsk->syscr;
-		sig->syscw += tsk->syscw;
-#endif /* CONFIG_TASK_XACCT */
-#ifdef CONFIG_TASK_IO_ACCOUNTING
-		sig->ioac.read_bytes += tsk->ioac.read_bytes;
-		sig->ioac.write_bytes += tsk->ioac.write_bytes;
-		sig->ioac.cancelled_write_bytes +=
-					tsk->ioac.cancelled_write_bytes;
-#endif /* CONFIG_TASK_IO_ACCOUNTING */
+		task_io_accounting_add(&sig->ioac, &tsk->ioac);
 		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 		sig = NULL; /* Marker for below. */
 	}
@@ -1363,21 +1352,8 @@ static int wait_task_zombie(struct task_struct *p, int options,
 		psig->coublock +=
 			task_io_get_oublock(p) +
 			sig->oublock + sig->coublock;
-#ifdef CONFIG_TASK_XACCT
-		psig->rchar += p->rchar + sig->rchar;
-		psig->wchar += p->wchar + sig->wchar;
-		psig->syscr += p->syscr + sig->syscr;
-		psig->syscw += p->syscw + sig->syscw;
-#endif /* CONFIG_TASK_XACCT */
-#ifdef CONFIG_TASK_IO_ACCOUNTING
-		psig->ioac.read_bytes +=
-			p->ioac.read_bytes + sig->ioac.read_bytes;
-		psig->ioac.write_bytes +=
-			p->ioac.write_bytes + sig->ioac.write_bytes;
-		psig->ioac.cancelled_write_bytes +=
-				p->ioac.cancelled_write_bytes +
-				sig->ioac.cancelled_write_bytes;
-#endif /* CONFIG_TASK_IO_ACCOUNTING */
+		task_io_accounting_add(&psig->ioac, &p->ioac);
+		task_io_accounting_add(&psig->ioac, &sig->ioac);
 		spin_unlock_irq(&p->parent->sighand->siglock);
 	}
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 5e050c1317c4..8214ba7c8bb1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -806,12 +806,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
-#ifdef CONFIG_TASK_XACCT
-	sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0;
-#endif
-#ifdef CONFIG_TASK_IO_ACCOUNTING
-	memset(&sig->ioac, 0, sizeof(sig->ioac));
-#endif
+	task_io_accounting_init(&sig->ioac);
 	sig->sum_sched_runtime = 0;
 	INIT_LIST_HEAD(&sig->cpu_timers[0]);
 	INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -994,13 +989,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->last_switch_timestamp = 0;
 #endif
 
-#ifdef CONFIG_TASK_XACCT
-	p->rchar = 0;		/* I/O counter: bytes read */
-	p->wchar = 0;		/* I/O counter: bytes written */
-	p->syscr = 0;		/* I/O counter: read syscalls */
-	p->syscw = 0;		/* I/O counter: write syscalls */
-#endif
-	task_io_accounting_init(p);
+	task_io_accounting_init(&p->ioac);
 	acct_clear_integrals(p);
 
 	p->it_virt_expires = cputime_zero;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 3da47ccdc5e5..f9cd2561689c 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -94,14 +94,14 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 		stats->hiwater_vm    = mm->hiwater_vm * PAGE_SIZE / KB;
 		mmput(mm);
 	}
-	stats->read_char	= p->rchar;
-	stats->write_char	= p->wchar;
-	stats->read_syscalls	= p->syscr;
-	stats->write_syscalls	= p->syscw;
+	stats->read_char	= p->ioac.chr.rchar;
+	stats->write_char	= p->ioac.chr.wchar;
+	stats->read_syscalls	= p->ioac.chr.syscr;
+	stats->write_syscalls	= p->ioac.chr.syscw;
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-	stats->read_bytes	= p->ioac.read_bytes;
-	stats->write_bytes	= p->ioac.write_bytes;
-	stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes;
+	stats->read_bytes	= p->ioac.blk.read_bytes;
+	stats->write_bytes	= p->ioac.blk.write_bytes;
+	stats->cancelled_write_bytes = p->ioac.blk.cancelled_write_bytes;
 #else
 	stats->read_bytes	= 0;
 	stats->write_bytes	= 0;
-- 
cgit v1.2.3


From 940389b8afad6495211614c13eb91ef7001773ec Mon Sep 17 00:00:00 2001
From: Andrea Righi <righi.andrea@gmail.com>
Date: Mon, 28 Jul 2008 00:48:12 +0200
Subject: task IO accounting: move all IO statistics in struct
 task_io_accounting

Simplify the code of include/linux/task_io_accounting.h.

It is also more reasonable to have all the task i/o-related statistics in a
single struct (task_io_accounting).

Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c                         | 10 +++----
 include/linux/sched.h                  | 12 ++++-----
 include/linux/task_io_accounting.h     | 17 ++----------
 include/linux/task_io_accounting_ops.h | 48 +++++++++++++++++-----------------
 kernel/tsacct.c                        | 14 +++++-----
 5 files changed, 44 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3d94906c7aa8..01ed610f9b87 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2403,7 +2403,7 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
 {
-	struct proc_io_accounting acct = task->ioac;
+	struct task_io_accounting acct = task->ioac;
 	unsigned long flags;
 
 	if (whole && lock_task_sighand(task, &flags)) {
@@ -2423,10 +2423,10 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
 			"read_bytes: %llu\n"
 			"write_bytes: %llu\n"
 			"cancelled_write_bytes: %llu\n",
-			acct.chr.rchar, acct.chr.wchar,
-			acct.chr.syscr, acct.chr.syscw,
-			acct.blk.read_bytes, acct.blk.write_bytes,
-			acct.blk.cancelled_write_bytes);
+			acct.rchar, acct.wchar,
+			acct.syscr, acct.syscw,
+			acct.read_bytes, acct.write_bytes,
+			acct.cancelled_write_bytes);
 }
 
 static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 034c1ca6b332..5270d449ff9d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -505,7 +505,7 @@ struct signal_struct {
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
 	unsigned long inblock, oublock, cinblock, coublock;
-	struct proc_io_accounting ioac;
+	struct task_io_accounting ioac;
 
 	/*
 	 * Cumulative ns of scheduled CPU time for dead threads in the
@@ -1253,7 +1253,7 @@ struct task_struct {
 
 	unsigned long ptrace_message;
 	siginfo_t *last_siginfo; /* For ptrace use.  */
-	struct proc_io_accounting ioac;
+	struct task_io_accounting ioac;
 #if defined(CONFIG_TASK_XACCT)
 	u64 acct_rss_mem1;	/* accumulated rss usage */
 	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
@@ -2183,22 +2183,22 @@ extern long sched_group_rt_period(struct task_group *tg);
 #ifdef CONFIG_TASK_XACCT
 static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
 {
-	tsk->ioac.chr.rchar += amt;
+	tsk->ioac.rchar += amt;
 }
 
 static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
 {
-	tsk->ioac.chr.wchar += amt;
+	tsk->ioac.wchar += amt;
 }
 
 static inline void inc_syscr(struct task_struct *tsk)
 {
-	tsk->ioac.chr.syscr++;
+	tsk->ioac.syscr++;
 }
 
 static inline void inc_syscw(struct task_struct *tsk)
 {
-	tsk->ioac.chr.syscw++;
+	tsk->ioac.syscw++;
 }
 #else
 static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
diff --git a/include/linux/task_io_accounting.h b/include/linux/task_io_accounting.h
index 165390f8b936..5e88afc9a2fb 100644
--- a/include/linux/task_io_accounting.h
+++ b/include/linux/task_io_accounting.h
@@ -1,5 +1,5 @@
 /*
- * proc_io_accounting: a structure which is used for recording a single task's
+ * task_io_accounting: a structure which is used for recording a single task's
  * IO statistics.
  *
  * Don't include this header file directly - it is designed to be dragged in via
@@ -8,8 +8,8 @@
  * Blame akpm@osdl.org for all this.
  */
 
+struct task_io_accounting {
 #ifdef CONFIG_TASK_XACCT
-struct task_chr_io_accounting {
 	/* bytes read */
 	u64 rchar;
 	/*  bytes written */
@@ -18,14 +18,9 @@ struct task_chr_io_accounting {
 	u64 syscr;
 	/* # of write syscalls */
 	u64 syscw;
-};
-#else /* CONFIG_TASK_XACCT */
-struct task_chr_io_accounting {
-};
 #endif /* CONFIG_TASK_XACCT */
 
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-struct task_io_accounting {
 	/*
 	 * The number of bytes which this task has caused to be read from
 	 * storage.
@@ -46,13 +41,5 @@ struct task_io_accounting {
 	 * information loss in doing that.
 	 */
 	u64 cancelled_write_bytes;
-};
-#else /* CONFIG_TASK_IO_ACCOUNTING */
-struct task_io_accounting {
-};
 #endif /* CONFIG_TASK_IO_ACCOUNTING */
-
-struct proc_io_accounting {
-	struct task_chr_io_accounting chr;
-	struct task_io_accounting blk;
 };
diff --git a/include/linux/task_io_accounting_ops.h b/include/linux/task_io_accounting_ops.h
index e6f958ebe97f..4d090f9ee608 100644
--- a/include/linux/task_io_accounting_ops.h
+++ b/include/linux/task_io_accounting_ops.h
@@ -9,7 +9,7 @@
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 static inline void task_io_account_read(size_t bytes)
 {
-	current->ioac.blk.read_bytes += bytes;
+	current->ioac.read_bytes += bytes;
 }
 
 /*
@@ -18,12 +18,12 @@ static inline void task_io_account_read(size_t bytes)
  */
 static inline unsigned long task_io_get_inblock(const struct task_struct *p)
 {
-	return p->ioac.blk.read_bytes >> 9;
+	return p->ioac.read_bytes >> 9;
 }
 
 static inline void task_io_account_write(size_t bytes)
 {
-	current->ioac.blk.write_bytes += bytes;
+	current->ioac.write_bytes += bytes;
 }
 
 /*
@@ -32,25 +32,25 @@ static inline void task_io_account_write(size_t bytes)
  */
 static inline unsigned long task_io_get_oublock(const struct task_struct *p)
 {
-	return p->ioac.blk.write_bytes >> 9;
+	return p->ioac.write_bytes >> 9;
 }
 
 static inline void task_io_account_cancelled_write(size_t bytes)
 {
-	current->ioac.blk.cancelled_write_bytes += bytes;
+	current->ioac.cancelled_write_bytes += bytes;
 }
 
-static inline void task_io_accounting_init(struct proc_io_accounting *ioac)
+static inline void task_io_accounting_init(struct task_io_accounting *ioac)
 {
 	memset(ioac, 0, sizeof(*ioac));
 }
 
-static inline void task_blk_io_accounting_add(struct proc_io_accounting *dst,
-						struct proc_io_accounting *src)
+static inline void task_blk_io_accounting_add(struct task_io_accounting *dst,
+						struct task_io_accounting *src)
 {
-	dst->blk.read_bytes += src->blk.read_bytes;
-	dst->blk.write_bytes += src->blk.write_bytes;
-	dst->blk.cancelled_write_bytes += src->blk.cancelled_write_bytes;
+	dst->read_bytes += src->read_bytes;
+	dst->write_bytes += src->write_bytes;
+	dst->cancelled_write_bytes += src->cancelled_write_bytes;
 }
 
 #else
@@ -77,35 +77,35 @@ static inline void task_io_account_cancelled_write(size_t bytes)
 {
 }
 
-static inline void task_io_accounting_init(struct proc_io_accounting *ioac)
+static inline void task_io_accounting_init(struct task_io_accounting *ioac)
 {
 }
 
-static inline void task_blk_io_accounting_add(struct proc_io_accounting *dst,
-						struct proc_io_accounting *src)
+static inline void task_blk_io_accounting_add(struct task_io_accounting *dst,
+						struct task_io_accounting *src)
 {
 }
 
 #endif /* CONFIG_TASK_IO_ACCOUNTING */
 
 #ifdef CONFIG_TASK_XACCT
-static inline void task_chr_io_accounting_add(struct proc_io_accounting *dst,
-						struct proc_io_accounting *src)
+static inline void task_chr_io_accounting_add(struct task_io_accounting *dst,
+						struct task_io_accounting *src)
 {
-	dst->chr.rchar += src->chr.rchar;
-	dst->chr.wchar += src->chr.wchar;
-	dst->chr.syscr += src->chr.syscr;
-	dst->chr.syscw += src->chr.syscw;
+	dst->rchar += src->rchar;
+	dst->wchar += src->wchar;
+	dst->syscr += src->syscr;
+	dst->syscw += src->syscw;
 }
 #else
-static inline void task_chr_io_accounting_add(struct proc_io_accounting *dst,
-						struct proc_io_accounting *src)
+static inline void task_chr_io_accounting_add(struct task_io_accounting *dst,
+						struct task_io_accounting *src)
 {
 }
 #endif /* CONFIG_TASK_XACCT */
 
-static inline void task_io_accounting_add(struct proc_io_accounting *dst,
-						struct proc_io_accounting *src)
+static inline void task_io_accounting_add(struct task_io_accounting *dst,
+						struct task_io_accounting *src)
 {
 	task_chr_io_accounting_add(dst, src);
 	task_blk_io_accounting_add(dst, src);
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index f9cd2561689c..8ebcd8532dfb 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -94,14 +94,14 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 		stats->hiwater_vm    = mm->hiwater_vm * PAGE_SIZE / KB;
 		mmput(mm);
 	}
-	stats->read_char	= p->ioac.chr.rchar;
-	stats->write_char	= p->ioac.chr.wchar;
-	stats->read_syscalls	= p->ioac.chr.syscr;
-	stats->write_syscalls	= p->ioac.chr.syscw;
+	stats->read_char	= p->ioac.rchar;
+	stats->write_char	= p->ioac.wchar;
+	stats->read_syscalls	= p->ioac.syscr;
+	stats->write_syscalls	= p->ioac.syscw;
 #ifdef CONFIG_TASK_IO_ACCOUNTING
-	stats->read_bytes	= p->ioac.blk.read_bytes;
-	stats->write_bytes	= p->ioac.blk.write_bytes;
-	stats->cancelled_write_bytes = p->ioac.blk.cancelled_write_bytes;
+	stats->read_bytes	= p->ioac.read_bytes;
+	stats->write_bytes	= p->ioac.write_bytes;
+	stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes;
 #else
 	stats->read_bytes	= 0;
 	stats->write_bytes	= 0;
-- 
cgit v1.2.3


From 5c2aed622571ac7c3c6ec182d6d3c318e4b45c8b Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Thu, 28 Feb 2008 11:33:03 -0500
Subject: stop_machine: add ALL_CPUS option

-allow stop_mahcine_run() to call a function on all cpus. Calling
 stop_machine_run() with a 'ALL_CPUS' invokes this new behavior.
 stop_machine_run() proceeds as normal until the calling cpu has
 invoked 'fn'. Then, we tell all the other cpus to call 'fn'.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
CC: Adrian Bunk <bunk@stusta.de>
CC: Andi Kleen <andi@firstfloor.org>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Christoph Hellwig <hch@infradead.org>
CC: mingo@elte.hu
CC: akpm@osdl.org
---
 include/linux/stop_machine.h |  8 +++++++-
 kernel/stop_machine.c        | 32 +++++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 5bfc553bdb21..18af011c13af 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -8,11 +8,17 @@
 #include <asm/system.h>
 
 #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
+
+#define ALL_CPUS ~0U
+
 /**
  * stop_machine_run: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr for the @fn()
- * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS.
+ * @cpu: if @cpu == n, run @fn() on cpu n
+ *       if @cpu == NR_CPUS, run @fn() on any cpu
+ *       if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then
+ *       concurrently on all the other cpus
  *
  * Description: This causes a thread to be scheduled on every other cpu,
  * each of which disables interrupts, and finally interrupts are disabled
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 738b411ff2d3..a473bd0cb71b 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -22,9 +22,17 @@ enum stopmachine_state {
 	STOPMACHINE_WAIT,
 	STOPMACHINE_PREPARE,
 	STOPMACHINE_DISABLE_IRQ,
+	STOPMACHINE_RUN,
 	STOPMACHINE_EXIT,
 };
 
+struct stop_machine_data {
+	int (*fn)(void *);
+	void *data;
+	struct completion done;
+	int run_all;
+} smdata;
+
 static enum stopmachine_state stopmachine_state;
 static unsigned int stopmachine_num_threads;
 static atomic_t stopmachine_thread_ack;
@@ -33,6 +41,7 @@ static int stopmachine(void *cpu)
 {
 	int irqs_disabled = 0;
 	int prepared = 0;
+	int ran = 0;
 	cpumask_of_cpu_ptr(cpumask, (int)(long)cpu);
 
 	set_cpus_allowed_ptr(current, cpumask);
@@ -58,6 +67,11 @@ static int stopmachine(void *cpu)
 			prepared = 1;
 			smp_mb(); /* Must read state first. */
 			atomic_inc(&stopmachine_thread_ack);
+		} else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
+			smdata.fn(smdata.data);
+			ran = 1;
+			smp_mb(); /* Must read state first. */
+			atomic_inc(&stopmachine_thread_ack);
 		}
 		/* Yield in first stage: migration threads need to
 		 * help our sisters onto their CPUs. */
@@ -136,11 +150,10 @@ static void restart_machine(void)
 	preempt_enable_no_resched();
 }
 
-struct stop_machine_data {
-	int (*fn)(void *);
-	void *data;
-	struct completion done;
-};
+static void run_other_cpus(void)
+{
+	stopmachine_set_state(STOPMACHINE_RUN);
+}
 
 static int do_stop(void *_smdata)
 {
@@ -150,6 +163,8 @@ static int do_stop(void *_smdata)
 	ret = stop_machine();
 	if (ret == 0) {
 		ret = smdata->fn(smdata->data);
+		if (smdata->run_all)
+			run_other_cpus();
 		restart_machine();
 	}
 
@@ -173,14 +188,17 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
 	struct stop_machine_data smdata;
 	struct task_struct *p;
 
+	mutex_lock(&stopmachine_mutex);
+
 	smdata.fn = fn;
 	smdata.data = data;
+	smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
 	init_completion(&smdata.done);
 
-	mutex_lock(&stopmachine_mutex);
+	smp_wmb(); /* make sure other cpus see smdata updates */
 
 	/* If they don't care which CPU fn runs on, bind to any online one. */
-	if (cpu == NR_CPUS)
+	if (cpu == NR_CPUS || cpu == ALL_CPUS)
 		cpu = raw_smp_processor_id();
 
 	p = kthread_create(do_stop, &smdata, "kstopmachine");
-- 
cgit v1.2.3


From ffdb5976c47609c862917d4c186ecbb5706d2dda Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 28 Jul 2008 12:16:28 -0500
Subject: Simplify stop_machine

stop_machine creates a kthread which creates kernel threads.  We can
create those threads directly and simplify things a little.  Some care
must be taken with CPU hotunplug, which has special needs, but that code
seems more robust than it was in the past.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 include/linux/stop_machine.h |  20 ++-
 kernel/cpu.c                 |  13 +-
 kernel/stop_machine.c        | 293 ++++++++++++++++++-------------------------
 3 files changed, 136 insertions(+), 190 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 18af011c13af..36c2c7284eb3 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -17,13 +17,12 @@
  * @data: the data ptr for the @fn()
  * @cpu: if @cpu == n, run @fn() on cpu n
  *       if @cpu == NR_CPUS, run @fn() on any cpu
- *       if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then
- *       concurrently on all the other cpus
+ *       if @cpu == ALL_CPUS, run @fn() on every online CPU.
  *
- * Description: This causes a thread to be scheduled on every other cpu,
- * each of which disables interrupts, and finally interrupts are disabled
- * on the current CPU.  The result is that noone is holding a spinlock
- * or inside any other preempt-disabled region when @fn() runs.
+ * Description: This causes a thread to be scheduled on every cpu,
+ * each of which disables interrupts.  The result is that noone is
+ * holding a spinlock or inside any other preempt-disabled region when
+ * @fn() runs.
  *
  * This can be thought of as a very heavy write lock, equivalent to
  * grabbing every spinlock in the kernel. */
@@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
  * @data: the data ptr for the @fn
  * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
  *
- * Description: This is a special version of the above, which returns the
- * thread which has run @fn(): kthread_stop will return the return value
- * of @fn().  Used by hotplug cpu.
+ * Description: This is a special version of the above, which assumes cpus
+ * won't come or go while it's being called.  Used by hotplug cpu.
  */
-struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
-				       unsigned int cpu);
-
+int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
 #else
 
 static inline int stop_machine_run(int (*fn)(void *), void *data,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 10ba5f1004a5..cf79bb911371 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -216,7 +216,6 @@ static int __ref take_cpu_down(void *_param)
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 {
 	int err, nr_calls = 0;
-	struct task_struct *p;
 	cpumask_t old_allowed, tmp;
 	void *hcpu = (void *)(long)cpu;
 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
@@ -250,19 +249,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	cpu_clear(cpu, tmp);
 	set_cpus_allowed_ptr(current, &tmp);
 
-	p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
+	err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
 
-	if (IS_ERR(p) || cpu_online(cpu)) {
+	if (err || cpu_online(cpu)) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 
-		if (IS_ERR(p)) {
-			err = PTR_ERR(p);
-			goto out_allowed;
-		}
-		goto out_thread;
+		goto out_allowed;
 	}
 
 	/* Wait for it to sleep (leaving idle task). */
@@ -279,8 +274,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 
 	check_for_tasks(cpu);
 
-out_thread:
-	err = kthread_stop(p);
 out_allowed:
 	set_cpus_allowed_ptr(current, &old_allowed);
 out_release:
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index a473bd0cb71b..35882dccc943 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -1,4 +1,4 @@
-/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
+/* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
  * GPL v2 and any later version.
  */
 #include <linux/cpu.h>
@@ -13,220 +13,177 @@
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
-/* Since we effect priority and affinity (both of which are visible
- * to, and settable by outside processes) we do indirection via a
- * kthread. */
-
-/* Thread to stop each CPU in user context. */
+/* This controls the threads on each CPU. */
 enum stopmachine_state {
-	STOPMACHINE_WAIT,
+	/* Dummy starting state for thread. */
+	STOPMACHINE_NONE,
+	/* Awaiting everyone to be scheduled. */
 	STOPMACHINE_PREPARE,
+	/* Disable interrupts. */
 	STOPMACHINE_DISABLE_IRQ,
+	/* Run the function */
 	STOPMACHINE_RUN,
+	/* Exit */
 	STOPMACHINE_EXIT,
 };
+static enum stopmachine_state state;
 
 struct stop_machine_data {
 	int (*fn)(void *);
 	void *data;
-	struct completion done;
-	int run_all;
-} smdata;
+	int fnret;
+};
 
-static enum stopmachine_state stopmachine_state;
-static unsigned int stopmachine_num_threads;
-static atomic_t stopmachine_thread_ack;
+/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
+static unsigned int num_threads;
+static atomic_t thread_ack;
+static struct completion finished;
+static DEFINE_MUTEX(lock);
 
-static int stopmachine(void *cpu)
+static void set_state(enum stopmachine_state newstate)
 {
-	int irqs_disabled = 0;
-	int prepared = 0;
-	int ran = 0;
-	cpumask_of_cpu_ptr(cpumask, (int)(long)cpu);
-
-	set_cpus_allowed_ptr(current, cpumask);
-
-	/* Ack: we are alive */
-	smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
-	atomic_inc(&stopmachine_thread_ack);
-
-	/* Simple state machine */
-	while (stopmachine_state != STOPMACHINE_EXIT) {
-		if (stopmachine_state == STOPMACHINE_DISABLE_IRQ 
-		    && !irqs_disabled) {
-			local_irq_disable();
-			hard_irq_disable();
-			irqs_disabled = 1;
-			/* Ack: irqs disabled. */
-			smp_mb(); /* Must read state first. */
-			atomic_inc(&stopmachine_thread_ack);
-		} else if (stopmachine_state == STOPMACHINE_PREPARE
-			   && !prepared) {
-			/* Everyone is in place, hold CPU. */
-			preempt_disable();
-			prepared = 1;
-			smp_mb(); /* Must read state first. */
-			atomic_inc(&stopmachine_thread_ack);
-		} else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
-			smdata.fn(smdata.data);
-			ran = 1;
-			smp_mb(); /* Must read state first. */
-			atomic_inc(&stopmachine_thread_ack);
-		}
-		/* Yield in first stage: migration threads need to
-		 * help our sisters onto their CPUs. */
-		if (!prepared && !irqs_disabled)
-			yield();
-		cpu_relax();
-	}
-
-	/* Ack: we are exiting. */
-	smp_mb(); /* Must read state first. */
-	atomic_inc(&stopmachine_thread_ack);
-
-	if (irqs_disabled)
-		local_irq_enable();
-	if (prepared)
-		preempt_enable();
-
-	return 0;
+	/* Reset ack counter. */
+	atomic_set(&thread_ack, num_threads);
+	smp_wmb();
+	state = newstate;
 }
 
-/* Change the thread state */
-static void stopmachine_set_state(enum stopmachine_state state)
+/* Last one to ack a state moves to the next state. */
+static void ack_state(void)
 {
-	atomic_set(&stopmachine_thread_ack, 0);
-	smp_wmb();
-	stopmachine_state = state;
-	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
-		cpu_relax();
+	if (atomic_dec_and_test(&thread_ack)) {
+		/* If we're the last one to ack the EXIT, we're finished. */
+		if (state == STOPMACHINE_EXIT)
+			complete(&finished);
+		else
+			set_state(state + 1);
+	}
 }
 
-static int stop_machine(void)
+/* This is the actual thread which stops the CPU.  It exits by itself rather
+ * than waiting for kthread_stop(), because it's easier for hotplug CPU. */
+static int stop_cpu(struct stop_machine_data *smdata)
 {
-	int i, ret = 0;
-
-	atomic_set(&stopmachine_thread_ack, 0);
-	stopmachine_num_threads = 0;
-	stopmachine_state = STOPMACHINE_WAIT;
+	enum stopmachine_state curstate = STOPMACHINE_NONE;
+	int uninitialized_var(ret);
 
-	for_each_online_cpu(i) {
-		if (i == raw_smp_processor_id())
-			continue;
-		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
-		if (ret < 0)
-			break;
-		stopmachine_num_threads++;
-	}
-
-	/* Wait for them all to come to life. */
-	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
-		yield();
+	/* Simple state machine */
+	do {
+		/* Chill out and ensure we re-read stopmachine_state. */
 		cpu_relax();
-	}
-
-	/* If some failed, kill them all. */
-	if (ret < 0) {
-		stopmachine_set_state(STOPMACHINE_EXIT);
-		return ret;
-	}
-
-	/* Now they are all started, make them hold the CPUs, ready. */
-	preempt_disable();
-	stopmachine_set_state(STOPMACHINE_PREPARE);
-
-	/* Make them disable irqs. */
-	local_irq_disable();
-	hard_irq_disable();
-	stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
-
-	return 0;
-}
+		if (state != curstate) {
+			curstate = state;
+			switch (curstate) {
+			case STOPMACHINE_DISABLE_IRQ:
+				local_irq_disable();
+				hard_irq_disable();
+				break;
+			case STOPMACHINE_RUN:
+				/* |= allows error detection if functions on
+				 * multiple CPUs. */
+				smdata->fnret |= smdata->fn(smdata->data);
+				break;
+			default:
+				break;
+			}
+			ack_state();
+		}
+	} while (curstate != STOPMACHINE_EXIT);
 
-static void restart_machine(void)
-{
-	stopmachine_set_state(STOPMACHINE_EXIT);
 	local_irq_enable();
-	preempt_enable_no_resched();
+	do_exit(0);
 }
 
-static void run_other_cpus(void)
+/* Callback for CPUs which aren't supposed to do anything. */
+static int chill(void *unused)
 {
-	stopmachine_set_state(STOPMACHINE_RUN);
+	return 0;
 }
 
-static int do_stop(void *_smdata)
+int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 {
-	struct stop_machine_data *smdata = _smdata;
-	int ret;
+	int i, err;
+	struct stop_machine_data active, idle;
+	struct task_struct **threads;
+
+	active.fn = fn;
+	active.data = data;
+	active.fnret = 0;
+	idle.fn = chill;
+	idle.data = NULL;
+
+	/* If they don't care which cpu fn runs on, just pick one. */
+	if (cpu == NR_CPUS)
+		cpu = any_online_cpu(cpu_online_map);
+
+	/* This could be too big for stack on large machines. */
+	threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
+	if (!threads)
+		return -ENOMEM;
+
+	/* Set up initial state. */
+	mutex_lock(&lock);
+	init_completion(&finished);
+	num_threads = num_online_cpus();
+	set_state(STOPMACHINE_PREPARE);
 
-	ret = stop_machine();
-	if (ret == 0) {
-		ret = smdata->fn(smdata->data);
-		if (smdata->run_all)
-			run_other_cpus();
-		restart_machine();
-	}
+	for_each_online_cpu(i) {
+		struct stop_machine_data *smdata;
+		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 
-	/* We're done: you can kthread_stop us now */
-	complete(&smdata->done);
+		if (cpu == ALL_CPUS || i == cpu)
+			smdata = &active;
+		else
+			smdata = &idle;
+
+		threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u",
+					    i);
+		if (IS_ERR(threads[i])) {
+			err = PTR_ERR(threads[i]);
+			threads[i] = NULL;
+			goto kill_threads;
+		}
 
-	/* Wait for kthread_stop */
-	set_current_state(TASK_INTERRUPTIBLE);
-	while (!kthread_should_stop()) {
-		schedule();
-		set_current_state(TASK_INTERRUPTIBLE);
-	}
-	__set_current_state(TASK_RUNNING);
-	return ret;
-}
+		/* Place it onto correct cpu. */
+		kthread_bind(threads[i], i);
 
-struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
-				       unsigned int cpu)
-{
-	static DEFINE_MUTEX(stopmachine_mutex);
-	struct stop_machine_data smdata;
-	struct task_struct *p;
+		/* Make it highest prio. */
+		if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
+			BUG();
+	}
 
-	mutex_lock(&stopmachine_mutex);
+	/* We've created all the threads.  Wake them all: hold this CPU so one
+	 * doesn't hit this CPU until we're ready. */
+	cpu = get_cpu();
+	for_each_online_cpu(i)
+		wake_up_process(threads[i]);
 
-	smdata.fn = fn;
-	smdata.data = data;
-	smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
-	init_completion(&smdata.done);
+	/* This will release the thread on our CPU. */
+	put_cpu();
+	wait_for_completion(&finished);
+	mutex_unlock(&lock);
 
-	smp_wmb(); /* make sure other cpus see smdata updates */
+	kfree(threads);
 
-	/* If they don't care which CPU fn runs on, bind to any online one. */
-	if (cpu == NR_CPUS || cpu == ALL_CPUS)
-		cpu = raw_smp_processor_id();
+	return active.fnret;
 
-	p = kthread_create(do_stop, &smdata, "kstopmachine");
-	if (!IS_ERR(p)) {
-		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+kill_threads:
+	for_each_online_cpu(i)
+		if (threads[i])
+			kthread_stop(threads[i]);
+	mutex_unlock(&lock);
 
-		/* One high-prio thread per cpu.  We'll do this one. */
-		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
-		kthread_bind(p, cpu);
-		wake_up_process(p);
-		wait_for_completion(&smdata.done);
-	}
-	mutex_unlock(&stopmachine_mutex);
-	return p;
+	kfree(threads);
+	return err;
 }
 
 int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 {
-	struct task_struct *p;
 	int ret;
 
 	/* No CPUs can come up or down during this. */
 	get_online_cpus();
-	p = __stop_machine_run(fn, data, cpu);
-	if (!IS_ERR(p))
-		ret = kthread_stop(p);
-	else
-		ret = PTR_ERR(p);
+	ret = __stop_machine_run(fn, data, cpu);
 	put_online_cpus();
 
 	return ret;
-- 
cgit v1.2.3


From eeec4fad963490821348a331cca6102ae1c4a7a3 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 28 Jul 2008 12:16:30 -0500
Subject: stop_machine(): stop_machine_run() changed to use cpu mask

Instead of a "cpu" arg with magic values NR_CPUS (any cpu) and ~0 (all
cpus), pass a cpumask_t.  Allow NULL for the common case (where we
don't care which CPU the function is run on): temporary cpumask_t's
are usually considered bad for stack space.

This deprecates stop_machine_run, to be removed soon when all the
callers are dead.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/stop_machine.h | 34 ++++++++++++++++++++++++----------
 kernel/cpu.c                 |  3 ++-
 kernel/stop_machine.c        | 27 +++++++++++++--------------
 3 files changed, 39 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 36c2c7284eb3..f1cb0ba6d715 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -5,19 +5,19 @@
    (and more).  So the "read" side to such a lock is anything which
    diables preeempt. */
 #include <linux/cpu.h>
+#include <linux/cpumask.h>
 #include <asm/system.h>
 
 #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
 
+/* Deprecated, but useful for transition. */
 #define ALL_CPUS ~0U
 
 /**
- * stop_machine_run: freeze the machine on all CPUs and run this function
+ * stop_machine: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr for the @fn()
- * @cpu: if @cpu == n, run @fn() on cpu n
- *       if @cpu == NR_CPUS, run @fn() on any cpu
- *       if @cpu == ALL_CPUS, run @fn() on every online CPU.
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
  *
  * Description: This causes a thread to be scheduled on every cpu,
  * each of which disables interrupts.  The result is that noone is
@@ -26,22 +26,22 @@
  *
  * This can be thought of as a very heavy write lock, equivalent to
  * grabbing every spinlock in the kernel. */
-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
+int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus);
 
 /**
- * __stop_machine_run: freeze the machine on all CPUs and run this function
+ * __stop_machine: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr for the @fn
- * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
  *
  * Description: This is a special version of the above, which assumes cpus
  * won't come or go while it's being called.  Used by hotplug cpu.
  */
-int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
+int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus);
 #else
 
-static inline int stop_machine_run(int (*fn)(void *), void *data,
-				   unsigned int cpu)
+static inline int stop_machine(int (*fn)(void *), void *data,
+			       const cpumask_t *cpus)
 {
 	int ret;
 	local_irq_disable();
@@ -50,4 +50,18 @@ static inline int stop_machine_run(int (*fn)(void *), void *data,
 	return ret;
 }
 #endif /* CONFIG_SMP */
+
+static inline int __deprecated stop_machine_run(int (*fn)(void *), void *data,
+						unsigned int cpu)
+{
+	/* If they don't care which cpu fn runs on, just pick one. */
+	if (cpu == NR_CPUS)
+		return stop_machine(fn, data, NULL);
+	else if (cpu == ~0U)
+		return stop_machine(fn, data, &cpu_possible_map);
+	else {
+		cpumask_t cpus = cpumask_of_cpu(cpu);
+		return stop_machine(fn, data, &cpus);
+	}
+}
 #endif /* _LINUX_STOP_MACHINE */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 53cf508f975a..29510d68338a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -248,8 +248,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	cpus_setall(tmp);
 	cpu_clear(cpu, tmp);
 	set_cpus_allowed_ptr(current, &tmp);
+	tmp = cpumask_of_cpu(cpu);
 
-	err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
+	err = __stop_machine(take_cpu_down, &tcd_param, &tmp);
 	if (err) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 35882dccc943..e446c7c7d6a9 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -100,7 +100,7 @@ static int chill(void *unused)
 	return 0;
 }
 
-int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
 {
 	int i, err;
 	struct stop_machine_data active, idle;
@@ -112,10 +112,6 @@ int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 	idle.fn = chill;
 	idle.data = NULL;
 
-	/* If they don't care which cpu fn runs on, just pick one. */
-	if (cpu == NR_CPUS)
-		cpu = any_online_cpu(cpu_online_map);
-
 	/* This could be too big for stack on large machines. */
 	threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
 	if (!threads)
@@ -128,13 +124,16 @@ int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 	set_state(STOPMACHINE_PREPARE);
 
 	for_each_online_cpu(i) {
-		struct stop_machine_data *smdata;
+		struct stop_machine_data *smdata = &idle;
 		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 
-		if (cpu == ALL_CPUS || i == cpu)
-			smdata = &active;
-		else
-			smdata = &idle;
+		if (!cpus) {
+			if (i == first_cpu(cpu_online_map))
+				smdata = &active;
+		} else {
+			if (cpu_isset(i, *cpus))
+				smdata = &active;
+		}
 
 		threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u",
 					    i);
@@ -154,7 +153,7 @@ int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 
 	/* We've created all the threads.  Wake them all: hold this CPU so one
 	 * doesn't hit this CPU until we're ready. */
-	cpu = get_cpu();
+	get_cpu();
 	for_each_online_cpu(i)
 		wake_up_process(threads[i]);
 
@@ -177,15 +176,15 @@ kill_threads:
 	return err;
 }
 
-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
 {
 	int ret;
 
 	/* No CPUs can come up or down during this. */
 	get_online_cpus();
-	ret = __stop_machine_run(fn, data, cpu);
+	ret = __stop_machine(fn, data, cpus);
 	put_online_cpus();
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(stop_machine_run);
+EXPORT_SYMBOL_GPL(stop_machine);
-- 
cgit v1.2.3


From 9403540c0653122ca34884a180439ddbfcbcb524 Mon Sep 17 00:00:00 2001
From: Barry Naujok <bnaujok@sgi.com>
Date: Wed, 21 May 2008 16:50:46 +1000
Subject: dcache: Add case-insensitive support d_ci_add() routine

This add a dcache entry to the dcache for lookup, but changing the name
that is associated with the entry rather than the one passed in to the
lookup routine.

First, it sees if the case-exact match already exists in the dcache and
uses it if one exists. Otherwise, it allocates a new node with the new
name and splices it into the dcache.

Original code from ntfs_lookup in fs/ntfs/namei.c by Anton Altaparmakov.

Signed-off-by: Barry Naujok <bnaujok@sgi.com>
Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
Acked-by: Christoph Hellwig <hch@infradead.org>
---
 fs/dcache.c            | 102 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dcache.h |   1 +
 2 files changed, 103 insertions(+)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index f2584d22cb45..101663d15e9f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1220,6 +1220,107 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 	return new;
 }
 
+/**
+ * d_add_ci - lookup or allocate new dentry with case-exact name
+ * @inode:  the inode case-insensitive lookup has found
+ * @dentry: the negative dentry that was passed to the parent's lookup func
+ * @name:   the case-exact name to be associated with the returned dentry
+ *
+ * This is to avoid filling the dcache with case-insensitive names to the
+ * same inode, only the actual correct case is stored in the dcache for
+ * case-insensitive filesystems.
+ *
+ * For a case-insensitive lookup match and if the the case-exact dentry
+ * already exists in in the dcache, use it and return it.
+ *
+ * If no entry exists with the exact case name, allocate new dentry with
+ * the exact case, and return the spliced entry.
+ */
+struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry,
+			struct qstr *name)
+{
+	int error;
+	struct dentry *found;
+	struct dentry *new;
+
+	/* Does a dentry matching the name exist already? */
+	found = d_hash_and_lookup(dentry->d_parent, name);
+	/* If not, create it now and return */
+	if (!found) {
+		new = d_alloc(dentry->d_parent, name);
+		if (!new) {
+			error = -ENOMEM;
+			goto err_out;
+		}
+		found = d_splice_alias(inode, new);
+		if (found) {
+			dput(new);
+			return found;
+		}
+		return new;
+	}
+	/* Matching dentry exists, check if it is negative. */
+	if (found->d_inode) {
+		if (unlikely(found->d_inode != inode)) {
+			/* This can't happen because bad inodes are unhashed. */
+			BUG_ON(!is_bad_inode(inode));
+			BUG_ON(!is_bad_inode(found->d_inode));
+		}
+		/*
+		 * Already have the inode and the dentry attached, decrement
+		 * the reference count to balance the iget() done
+		 * earlier on.  We found the dentry using d_lookup() so it
+		 * cannot be disconnected and thus we do not need to worry
+		 * about any NFS/disconnectedness issues here.
+		 */
+		iput(inode);
+		return found;
+	}
+	/*
+	 * Negative dentry: instantiate it unless the inode is a directory and
+	 * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
+	 * in which case d_move() that in place of the found dentry.
+	 */
+	if (!S_ISDIR(inode->i_mode)) {
+		/* Not a directory; everything is easy. */
+		d_instantiate(found, inode);
+		return found;
+	}
+	spin_lock(&dcache_lock);
+	if (list_empty(&inode->i_dentry)) {
+		/*
+		 * Directory without a 'disconnected' dentry; we need to do
+		 * d_instantiate() by hand because it takes dcache_lock which
+		 * we already hold.
+		 */
+		list_add(&found->d_alias, &inode->i_dentry);
+		found->d_inode = inode;
+		spin_unlock(&dcache_lock);
+		security_d_instantiate(found, inode);
+		return found;
+	}
+	/*
+	 * Directory with a 'disconnected' dentry; get a reference to the
+	 * 'disconnected' dentry.
+	 */
+	new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+	dget_locked(new);
+	spin_unlock(&dcache_lock);
+	/* Do security vodoo. */
+	security_d_instantiate(found, inode);
+	/* Move new in place of found. */
+	d_move(new, found);
+	/* Balance the iget() we did above. */
+	iput(inode);
+	/* Throw away found. */
+	dput(found);
+	/* Use new as the actual dentry. */
+	return new;
+
+err_out:
+	iput(inode);
+	return ERR_PTR(error);
+}
 
 /**
  * d_lookup - search for a dentry
@@ -2254,6 +2355,7 @@ EXPORT_SYMBOL(d_path);
 EXPORT_SYMBOL(d_prune_aliases);
 EXPORT_SYMBOL(d_rehash);
 EXPORT_SYMBOL(d_splice_alias);
+EXPORT_SYMBOL(d_add_ci);
 EXPORT_SYMBOL(d_validate);
 EXPORT_SYMBOL(dget_locked);
 EXPORT_SYMBOL(dput);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 98202c672fde..07aa198f19ed 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -230,6 +230,7 @@ extern void d_delete(struct dentry *);
 extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
 extern struct dentry * d_alloc_anon(struct inode *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
+extern struct dentry * d_add_ci(struct inode *, struct dentry *, struct qstr *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
 extern void shrink_dcache_for_umount(struct super_block *);
-- 
cgit v1.2.3


From 306cfd630a4d121cf4e08b894d8b4c4cf106e57e Mon Sep 17 00:00:00 2001
From: Adrian McMenamin <adrian@newgolddream.dyndns.info>
Date: Sun, 15 Jun 2008 20:48:09 +0100
Subject: maple: tidy maple_driver code by removing redundant
 connect/disconnect

The connect and disconnect functions are unnecessary - everything they do can be
accomplished in the initial probe - so remove them.

Signed-off-by: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/maple.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple.h b/include/linux/maple.h
index d31e36ebb436..523a286bb477 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -61,8 +61,6 @@ struct maple_device {
 
 struct maple_driver {
 	unsigned long function;
-	int (*connect) (struct maple_device * dev);
-	void (*disconnect) (struct maple_device * dev);
 	struct device_driver drv;
 };
 
-- 
cgit v1.2.3


From 7f71ac9374fec066e428892a68db158946cee1fb Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Mon, 28 Jul 2008 18:29:09 +0200
Subject: mfd: Coding style fixes

Fix some coding style fixes in the mfd core driver.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/mfd-core.c   | 15 +++++++--------
 include/linux/mfd/core.h | 17 ++++++++---------
 2 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 4dc861a7ac56..50207700140c 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -16,9 +16,9 @@
 #include <linux/mfd/core.h>
 
 static int mfd_add_device(struct platform_device *parent,
-		const struct mfd_cell *cell,
-		struct resource *mem_base,
-		int irq_base)
+			  const struct mfd_cell *cell,
+			  struct resource *mem_base,
+			  int irq_base)
 {
 	struct resource res[cell->num_resources];
 	struct platform_device *pdev;
@@ -75,11 +75,10 @@ fail_alloc:
 	return ret;
 }
 
-int mfd_add_devices(
-		struct platform_device *parent,
-		const struct mfd_cell *cells, int n_devs,
-		struct resource *mem_base,
-		int irq_base)
+int mfd_add_devices(struct platform_device *parent,
+		    const struct mfd_cell *cells, int n_devs,
+		    struct resource *mem_base,
+		    int irq_base)
 {
 	int i;
 	int ret = 0;
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index bb3dd0545928..b7cbb9968339 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -1,5 +1,3 @@
-#ifndef MFD_CORE_H
-#define MFD_CORE_H
 /*
  * drivers/mfd/mfd-core.h
  *
@@ -13,6 +11,9 @@
  *
  */
 
+#ifndef MFD_CORE_H
+#define MFD_CORE_H
+
 #include <linux/platform_device.h>
 
 /*
@@ -38,17 +39,15 @@ struct mfd_cell {
 	const struct resource	*resources;
 };
 
-static inline struct mfd_cell *
-mfd_get_cell(struct platform_device *pdev)
+static inline struct mfd_cell *mfd_get_cell(struct platform_device *pdev)
 {
 	return (struct mfd_cell *)pdev->dev.platform_data;
 }
 
-extern int mfd_add_devices(
-		struct platform_device *parent,
-		const struct mfd_cell *cells, int n_devs,
-		struct resource *mem_base,
-		int irq_base);
+extern int mfd_add_devices(struct platform_device *parent,
+			   const struct mfd_cell *cells, int n_devs,
+			   struct resource *mem_base,
+			   int irq_base);
 
 extern void mfd_remove_devices(struct platform_device *parent);
 
-- 
cgit v1.2.3


From e56b3bc7942982ac2589c942fb345e38bc7a341a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 28 Jul 2008 11:32:33 -0700
Subject: cpu masks: optimize and clean up cpumask_of_cpu()

Clean up and optimize cpumask_of_cpu(), by sharing all the zero words.

Instead of stupidly generating all possible i=0...NR_CPUS 2^i patterns
creating a huge array of constant bitmasks, realize that the zero words
can be shared.

In other words, on a 64-bit architecture, we only ever need 64 of these
arrays - with a different bit set in one single world (with enough zero
words around it so that we can create any bitmask by just offsetting in
that big array). And then we just put enough zeroes around it that we
can point every single cpumask to be one of those things.

So when we have 4k CPU's, instead of having 4k arrays (of 4k bits each,
with one bit set in each array - 2MB memory total), we have exactly 64
arrays instead, each 8k bits in size (64kB total).

And then we just point cpumask(n) to the right position (which we can
calculate dynamically). Once we have the right arrays, getting
"cpumask(n)" ends up being:

  static inline const cpumask_t *get_cpu_mask(unsigned int cpu)
  {
          const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
          p -= cpu / BITS_PER_LONG;
          return (const cpumask_t *)p;
  }

This brings other advantages and simplifications as well:

 - we are not wasting memory that is just filled with a single bit in
   various different places

 - we don't need all those games to re-create the arrays in some dense
   format, because they're already going to be dense enough.

if we compile a kernel for up to 4k CPU's, "wasting" that 64kB of memory
is a non-issue (especially since by doing this "overlapping" trick we
probably get better cache behaviour anyway).

[ mingo@elte.hu:

  Converted Linus's mails into a commit. See:

     http://lkml.org/lkml/2008/7/27/156
     http://lkml.org/lkml/2008/7/28/320

  Also applied a family filter - which also has the side-effect of leaving
  out the bits where Linus calls me an idio... Oh, never mind ;-)
]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c |  23 --------
 include/linux/cpumask.h        |  26 ++++++++-
 kernel/cpu.c                   | 128 +++++++----------------------------------
 3 files changed, 43 insertions(+), 134 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 1cd53dfcd309..76e305e064f9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -80,26 +80,6 @@ static void __init setup_per_cpu_maps(void)
 #endif
 }
 
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-/*
- * Replace static cpumask_of_cpu_map in the initdata section,
- * with one that's allocated sized by the possible number of cpus.
- *
- * (requires nr_cpu_ids to be initialized)
- */
-static void __init setup_cpumask_of_cpu(void)
-{
-	int i;
-
-	/* alloc_bootmem zeroes memory */
-	cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
-	for (i = 0; i < nr_cpu_ids; i++)
-		cpu_set(i, cpumask_of_cpu_map[i]);
-}
-#else
-static inline void setup_cpumask_of_cpu(void) { }
-#endif
-
 #ifdef CONFIG_X86_32
 /*
  * Great future not-so-futuristic plan: make i386 and x86_64 do it
@@ -199,9 +179,6 @@ void __init setup_per_cpu_areas(void)
 
 	/* Setup node to cpumask map */
 	setup_node_to_cpumask_map();
-
-	/* Setup cpumask_of_cpu map */
-	setup_cpumask_of_cpu();
 }
 
 #endif
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 8fa3b6d4a320..96d0509fb8d8 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -265,10 +265,30 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
 }
 
+/*
+ * Special-case data structure for "single bit set only" constant CPU masks.
+ *
+ * We pre-generate all the 64 (or 32) possible bit positions, with enough
+ * padding to the left and the right, and return the constant pointer
+ * appropriately offset.
+ */
+extern const unsigned long
+	cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
+
+static inline const cpumask_t *get_cpu_mask(unsigned int cpu)
+{
+	const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
+	p -= cpu / BITS_PER_LONG;
+	return (const cpumask_t *)p;
+}
+
+/*
+ * In cases where we take the address of the cpumask immediately,
+ * gcc optimizes it out (it's a constant) and there's no huge stack
+ * variable created:
+ */
+#define cpumask_of_cpu(cpu) ({ *get_cpu_mask(cpu); })
 
-/* cpumask_of_cpu_map[] is in kernel/cpu.c */
-extern const cpumask_t *cpumask_of_cpu_map;
-#define cpumask_of_cpu(cpu)	(cpumask_of_cpu_map[cpu])
 
 #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a35d8995dc8c..06a8358bb418 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -462,115 +462,27 @@ out:
 
 #endif /* CONFIG_SMP */
 
-/* 64 bits of zeros, for initializers. */
-#if BITS_PER_LONG == 32
-#define Z64 0, 0
-#else
-#define Z64 0
-#endif
+/*
+ * cpu_bit_bitmap[] is a special, "compressed" data structure that
+ * represents all NR_CPUS bits binary values of 1<<nr.
+ *
+ * It is used by cpumask_of_cpu() to get a constant address to a CPU
+ * mask value that has a single bit set only.
+ */
 
-/* Initializer macros. */
-#define CMI0(n) { .bits = { 1UL << (n) } }
-#define CMI(n, ...) { .bits = { __VA_ARGS__, 1UL << ((n) % BITS_PER_LONG) } }
-
-#define CMI8(n, ...)						\
-	CMI((n), __VA_ARGS__), CMI((n)+1, __VA_ARGS__),		\
-	CMI((n)+2, __VA_ARGS__), CMI((n)+3, __VA_ARGS__),	\
-	CMI((n)+4, __VA_ARGS__), CMI((n)+5, __VA_ARGS__),	\
-	CMI((n)+6, __VA_ARGS__), CMI((n)+7, __VA_ARGS__)
-
-#if BITS_PER_LONG == 32
-#define CMI64(n, ...)							\
-	CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__),		\
-	CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__),		\
-	CMI8((n)+32, 0, __VA_ARGS__), CMI8((n)+40, 0, __VA_ARGS__),	\
-	CMI8((n)+48, 0, __VA_ARGS__), CMI8((n)+56, 0, __VA_ARGS__)
-#else
-#define CMI64(n, ...)							\
-	CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__),		\
-	CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__),		\
-	CMI8((n)+32, __VA_ARGS__), CMI8((n)+40, __VA_ARGS__),	\
-	CMI8((n)+48, __VA_ARGS__), CMI8((n)+56, __VA_ARGS__)
-#endif
+/* cpu_bit_bitmap[0] is empty - so we can back into it */
+#define MASK_DECLARE_1(x)	[x+1][0] = 1UL << (x)
+#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
+#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
+#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
 
-#define CMI256(n, ...)							\
-	CMI64((n), __VA_ARGS__), CMI64((n)+64, Z64, __VA_ARGS__),	\
-	CMI64((n)+128, Z64, Z64, __VA_ARGS__),				\
-	CMI64((n)+192, Z64, Z64, Z64, __VA_ARGS__)
-#define Z256 Z64, Z64, Z64, Z64
-
-#define CMI1024(n, ...)					\
-	CMI256((n), __VA_ARGS__),			\
-	CMI256((n)+256, Z256, __VA_ARGS__),		\
-	CMI256((n)+512, Z256, Z256, __VA_ARGS__),	\
-	CMI256((n)+768, Z256, Z256, Z256, __VA_ARGS__)
-#define Z1024 Z256, Z256, Z256, Z256
-
-/* We want this statically initialized, just to be safe.  We try not
- * to waste too much space, either. */
-static const cpumask_t cpumask_map[]
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-__initdata
-#endif
-= {
-	CMI0(0), CMI0(1), CMI0(2), CMI0(3),
-#if NR_CPUS > 4
-	CMI0(4), CMI0(5), CMI0(6), CMI0(7),
-#endif
-#if NR_CPUS > 8
-	CMI0(8), CMI0(9), CMI0(10), CMI0(11),
-	CMI0(12), CMI0(13), CMI0(14), CMI0(15),
-#endif
-#if NR_CPUS > 16
-	CMI0(16), CMI0(17), CMI0(18), CMI0(19),
-	CMI0(20), CMI0(21), CMI0(22), CMI0(23),
-	CMI0(24), CMI0(25), CMI0(26), CMI0(27),
-	CMI0(28), CMI0(29), CMI0(30), CMI0(31),
-#endif
-#if NR_CPUS > 32
-#if BITS_PER_LONG == 32
-	CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0),
-	CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0),
-	CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0),
-	CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0),
-	CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0),
-	CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0),
-	CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0),
-	CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0),
-#else
-	CMI0(32), CMI0(33), CMI0(34), CMI0(35),
-	CMI0(36), CMI0(37), CMI0(38), CMI0(39),
-	CMI0(40), CMI0(41), CMI0(42), CMI0(43),
-	CMI0(44), CMI0(45), CMI0(46), CMI0(47),
-	CMI0(48), CMI0(49), CMI0(50), CMI0(51),
-	CMI0(52), CMI0(53), CMI0(54), CMI0(55),
-	CMI0(56), CMI0(57), CMI0(58), CMI0(59),
-	CMI0(60), CMI0(61), CMI0(62), CMI0(63),
-#endif /* BITS_PER_LONG == 64 */
-#endif
-#if NR_CPUS > 64
-	CMI64(64, Z64),
-#endif
-#if NR_CPUS > 128
-	CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64),
-#endif
-#if NR_CPUS > 256
-	CMI256(256, Z256),
-#endif
-#if NR_CPUS > 512
-	CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256),
-#endif
-#if NR_CPUS > 1024
-	CMI1024(1024, Z1024),
-#endif
-#if NR_CPUS > 2048
-	CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024),
-#endif
-#if NR_CPUS > 4096
-#error NR_CPUS too big.  Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP
+const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
+
+	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
+	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
+#if BITS_PER_LONG > 32
+	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
+	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
 #endif
 };
-
-const cpumask_t *cpumask_of_cpu_map = cpumask_map;
-
-EXPORT_SYMBOL_GPL(cpumask_of_cpu_map);
+EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
-- 
cgit v1.2.3


From 5fde244d39b88625ac578d83e6625138714de031 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Wed, 23 Jul 2008 10:32:24 +0800
Subject: PCI: disable ASPM per ACPI FADT setting

The ACPI FADT table includes an ASPM control bit. If the bit is set, do
not enable ASPM since it may indicate that the platform doesn't actually
support the feature.

Tested-by: Jack Howarth <howarth@bromo.msbb.uc.edu>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci-acpi.c   | 7 +++++++
 drivers/pci/pcie/aspm.c  | 5 +++++
 include/acpi/actbl.h     | 1 +
 include/linux/pci-aspm.h | 5 +++++
 4 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 7764768b6a0e..89a2f0fa10f9 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/module.h>
+#include <linux/pci-aspm.h>
 #include <acpi/acpi.h>
 #include <acpi/acnamesp.h>
 #include <acpi/acresrc.h>
@@ -372,6 +373,12 @@ static int __init acpi_pci_init(void)
 		printk(KERN_INFO"ACPI FADT declares the system doesn't support MSI, so disable it\n");
 		pci_no_msi();
 	}
+
+	if (acpi_gbl_FADT.boot_flags & BAF_PCIE_ASPM_CONTROL) {
+		printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
+		pcie_no_aspm();
+	}
+
 	ret = register_acpi_bus_type(&acpi_pci_bus);
 	if (ret)
 		return 0;
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index f82495583e63..759c51a4e399 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -808,6 +808,11 @@ static int __init pcie_aspm_disable(char *str)
 
 __setup("pcie_noaspm", pcie_aspm_disable);
 
+void pcie_no_aspm(void)
+{
+	aspm_disabled = 1;
+}
+
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #include <linux/pci-acpi.h>
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 1ebbe883f786..13a3d9ad92db 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -277,6 +277,7 @@ enum acpi_prefered_pm_profiles {
 #define BAF_LEGACY_DEVICES              0x0001
 #define BAF_8042_KEYBOARD_CONTROLLER    0x0002
 #define BAF_MSI_NOT_SUPPORTED           0x0008
+#define BAF_PCIE_ASPM_CONTROL           0x0010
 
 #define FADT2_REVISION_ID               3
 #define FADT2_MINUS_REVISION_ID         2
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
index a1a1e618e996..91ba0b338b47 100644
--- a/include/linux/pci-aspm.h
+++ b/include/linux/pci-aspm.h
@@ -27,6 +27,7 @@ extern void pcie_aspm_init_link_state(struct pci_dev *pdev);
 extern void pcie_aspm_exit_link_state(struct pci_dev *pdev);
 extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
 extern void pci_disable_link_state(struct pci_dev *pdev, int state);
+extern void pcie_no_aspm(void);
 #else
 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
 {
@@ -40,6 +41,10 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev)
 static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
 }
+
+static inline void pcie_no_aspm(void)
+{
+}
 #endif
 
 #ifdef CONFIG_PCIEASPM_DEBUG /* this depends on CONFIG_PCIEASPM */
-- 
cgit v1.2.3


From 149e16372a2066c5474d8a8db9b252afd57eb427 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Wed, 23 Jul 2008 10:32:31 +0800
Subject: PCI: disable ASPM on pre-1.1 PCIe devices

Disable ASPM on pre-1.1 PCIe devices, as many of them don't implement it
correctly.

Tested-by: Jack Howarth <howarth@bromo.msbb.uc.edu>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pcie/aspm.c  | 13 +++++++++++++
 drivers/pci/probe.c      |  3 ++-
 include/linux/pci_regs.h |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 759c51a4e399..704605298c5e 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -510,6 +510,7 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
 {
 	struct pci_dev *child_dev;
 	int child_pos;
+	u32 reg32;
 
 	/*
 	 * Some functions in a slot might not all be PCIE functions, very
@@ -519,6 +520,18 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
 		child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP);
 		if (!child_pos)
 			return -EINVAL;
+
+		/*
+		 * Disable ASPM for pre-1.1 PCIe device, we follow MS to use
+		 * RBER bit to determine if a function is 1.1 version device
+		 */
+		pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP,
+			&reg32);
+		if (!(reg32 & PCI_EXP_DEVCAP_RBER)) {
+			printk("Pre-1.1 PCIe device detected, "
+				"disable ASPM for %s\n", pci_name(pdev));
+			return -EINVAL;
+		}
 	}
 	return 0;
 }
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 203630065839..7098dfb07449 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1057,7 +1057,8 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
 		}
 	}
 
-	if (bus->self)
+	/* only one slot has pcie device */
+	if (bus->self && nr)
 		pcie_aspm_init_link_state(bus->self);
 
 	return nr;
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 19958b929905..450684f7eaac 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -374,6 +374,7 @@
 #define  PCI_EXP_DEVCAP_ATN_BUT	0x1000	/* Attention Button Present */
 #define  PCI_EXP_DEVCAP_ATN_IND	0x2000	/* Attention Indicator Present */
 #define  PCI_EXP_DEVCAP_PWR_IND	0x4000	/* Power Indicator Present */
+#define  PCI_EXP_DEVCAP_RBER	0x8000	/* Role-Based Error Reporting */
 #define  PCI_EXP_DEVCAP_PWR_VAL	0x3fc0000 /* Slot Power Limit Value */
 #define  PCI_EXP_DEVCAP_PWR_SCL	0xc000000 /* Slot Power Limit Scale */
 #define PCI_EXP_DEVCTL		8	/* Device Control */
-- 
cgit v1.2.3


From 979b1791e5b8f8b556faeec4c48339e7ed63af9f Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 24 Jul 2008 17:18:38 +0100
Subject: PCI: add D3 power state avoidance quirk

Libata has some hacks to deal with certain controllers going silly in D3
state. The right way to handle this is to keep a PCI device flag for
such devices. That can then be generalised for no ATA devices with power
problems.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c    |  4 ++++
 drivers/pci/quirks.c | 13 +++++++++++++
 include/linux/pci.h  |  2 ++
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c95f77d65718..0a3d856833fc 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -572,6 +572,10 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
 		if (!ret)
 			pci_update_current_state(dev);
 	}
+	/* This device is quirked not to be put into D3, so
+	   don't put it in D3 */
+	if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3))
+		return 0;
 
 	error = pci_raw_set_power_state(dev, state);
 
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 12d489395fad..0fb365074288 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -923,6 +923,19 @@ static void __init quirk_ide_samemode(struct pci_dev *pdev)
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, quirk_ide_samemode);
 
+/*
+ * Some ATA devices break if put into D3
+ */
+
+static void __devinit quirk_no_ata_d3(struct pci_dev *pdev)
+{
+	/* Quirk the legacy ATA devices only. The AHCI ones are ok */
+	if ((pdev->class >> 8) == PCI_CLASS_STORAGE_IDE)
+		pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID, quirk_no_ata_d3);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID, quirk_no_ata_d3);
+
 /* This was originally an Alpha specific thing, but it really fits here.
  * The i82375 PCI/EISA bridge appears as non-classified. Fix that.
  */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1d296d31abe0..825be3878f68 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -124,6 +124,8 @@ enum pci_dev_flags {
 	 * generation too.
 	 */
 	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1,
+	/* Device configuration is irrevocably lost if disabled into D3 */
+	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
 };
 
 typedef unsigned short __bitwise pci_bus_flags_t;
-- 
cgit v1.2.3


From 56edb58be157a06dc147a988af3588059556d392 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Tue, 29 Jul 2008 01:23:32 +0200
Subject: mfd: add platform_data to mfd_cell

Adding platform_data to mfd_cell allows passing of platform data directly
to the platform_device created for each cell and thus reuse of existing
drivers.
On the other side it can be used as a hook to mfd_cell itself
removing the need in mfd_get_cell method.

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Acked-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/mfd-core.c   |  2 +-
 drivers/mfd/tc6393xb.c   |  4 ++++
 include/linux/mfd/core.h | 13 +++++++------
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 50207700140c..ad4e4d16a36a 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -32,7 +32,7 @@ static int mfd_add_device(struct platform_device *parent,
 	pdev->dev.parent = &parent->dev;
 
 	ret = platform_device_add_data(pdev,
-			cell, sizeof(struct mfd_cell));
+			cell->platform_data, cell->data_size);
 	if (ret)
 		goto fail_device;
 
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 94e55e8e7ce6..9908aaa4881a 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -466,6 +466,10 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 		tc6393xb_attach_irq(dev);
 
 	tc6393xb_cells[TC6393XB_CELL_NAND].driver_data = tcpd->nand_data;
+	tc6393xb_cells[TC6393XB_CELL_NAND].platform_data =
+		&tc6393xb_cells[TC6393XB_CELL_NAND];
+	tc6393xb_cells[TC6393XB_CELL_NAND].data_size =
+		sizeof(tc6393xb_cells[TC6393XB_CELL_NAND]);
 
 	retval = mfd_add_devices(dev,
 			tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells),
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index b7cbb9968339..ea45d4a5a2ac 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -29,7 +29,13 @@ struct mfd_cell {
 	int			(*suspend)(struct platform_device *dev);
 	int			(*resume)(struct platform_device *dev);
 
-	void			*driver_data; /* driver-specific data */
+	/* driver-specific data for MFD-aware "cell" drivers */
+	void			*driver_data;
+
+	/* platform_data can be used to either pass data to "generic"
+	   driver or as a hook to mfd_cell for the "cell" drivers */
+	void			*platform_data;
+	size_t			data_size;
 
 	/*
 	 * This resources can be specified relatievly to the parent device.
@@ -39,11 +45,6 @@ struct mfd_cell {
 	const struct resource	*resources;
 };
 
-static inline struct mfd_cell *mfd_get_cell(struct platform_device *pdev)
-{
-	return (struct mfd_cell *)pdev->dev.platform_data;
-}
-
 extern int mfd_add_devices(struct platform_device *parent,
 			   const struct mfd_cell *cells, int n_devs,
 			   struct resource *mem_base,
-- 
cgit v1.2.3


From 6beeac76f5f96590fb751af5e138fbc3f62e8460 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@qumranet.com>
Date: Mon, 28 Jul 2008 15:46:22 -0700
Subject: mmu-notifiers: add list_del_init_rcu()

Introduce list_del_init_rcu() and document it.

Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Robin Holt <holt@sgi.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Kanoj Sarcar <kanojsarcar@yahoo.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Avi Kivity <avi@qumranet.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Chris Wright <chrisw@redhat.com>
Cc: Marcelo Tosatti <marcelo@kvack.org>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Izik Eidus <izike@qumranet.com>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rculist.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index b0f39be08b6c..eb4443c7e05b 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -97,6 +97,34 @@ static inline void list_del_rcu(struct list_head *entry)
 	entry->prev = LIST_POISON2;
 }
 
+/**
+ * hlist_del_init_rcu - deletes entry from hash list with re-initialization
+ * @n: the element to delete from the hash list.
+ *
+ * Note: list_unhashed() on the node return true after this. It is
+ * useful for RCU based read lockfree traversal if the writer side
+ * must know if the list entry is still hashed or already unhashed.
+ *
+ * In particular, it means that we can not poison the forward pointers
+ * that may still be used for walking the hash list and we can only
+ * zero the pprev pointer so list_unhashed() will return true after
+ * this.
+ *
+ * The caller must take whatever precautions are necessary (such as
+ * holding appropriate locks) to avoid racing with another
+ * list-mutation primitive, such as hlist_add_head_rcu() or
+ * hlist_del_rcu(), running on this same list.  However, it is
+ * perfectly legal to run concurrently with the _rcu list-traversal
+ * primitives, such as hlist_for_each_entry_rcu().
+ */
+static inline void hlist_del_init_rcu(struct hlist_node *n)
+{
+	if (!hlist_unhashed(n)) {
+		__hlist_del(n);
+		n->pprev = NULL;
+	}
+}
+
 /**
  * list_replace_rcu - replace old entry by new one
  * @old : the element to be replaced
-- 
cgit v1.2.3


From 7906d00cd1f687268f0a3599442d113767795ae6 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@qumranet.com>
Date: Mon, 28 Jul 2008 15:46:26 -0700
Subject: mmu-notifiers: add mm_take_all_locks() operation

mm_take_all_locks holds off reclaim from an entire mm_struct.  This allows
mmu notifiers to register into the mm at any time with the guarantee that
no mmu operation is in progress on the mm.

This operation locks against the VM for all pte/vma/mm related operations
that could ever happen on a certain mm.  This includes vmtruncate,
try_to_unmap, and all page faults.

The caller must take the mmap_sem in write mode before calling
mm_take_all_locks().  The caller isn't allowed to release the mmap_sem
until mm_drop_all_locks() returns.

mmap_sem in write mode is required in order to block all operations that
could modify pagetables and free pages without need of altering the vma
layout (for example populate_range() with nonlinear vmas).  It's also
needed in write mode to avoid new anon_vmas to be associated with existing
vmas.

A single task can't take more than one mm_take_all_locks() in a row or it
would deadlock.

mm_take_all_locks() and mm_drop_all_locks are expensive operations that
may have to take thousand of locks.

mm_take_all_locks() can fail if it's interrupted by signals.

When mmu_notifier_register returns, we must be sure that the driver is
notified if some task is in the middle of a vmtruncate for the 'mm' where
the mmu notifier was registered (mmu_notifier_invalidate_range_start/end
is run around the vmtruncation but mmu_notifier_register can run after
mmu_notifier_invalidate_range_start and before
mmu_notifier_invalidate_range_end).  Same problem for rmap paths.  And
we've to remove page pinning to avoid replicating the tlb_gather logic
inside KVM (and GRU doesn't work well with page pinning regardless of
needing tlb_gather), so without mm_take_all_locks when vmtruncate frees
the page, kvm would have no way to notice that it mapped into sptes a page
that is going into the freelist without a chance of any further
mmu_notifier notification.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Robin Holt <holt@sgi.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Kanoj Sarcar <kanojsarcar@yahoo.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Avi Kivity <avi@qumranet.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Chris Wright <chrisw@redhat.com>
Cc: Marcelo Tosatti <marcelo@kvack.org>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Izik Eidus <izike@qumranet.com>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h      |   3 +
 include/linux/pagemap.h |   1 +
 include/linux/rmap.h    |   8 +++
 mm/mmap.c               | 158 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 170 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6e695eaab4ce..866a3dbe5c75 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1104,6 +1104,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 	unsigned long addr, unsigned long len, pgoff_t pgoff);
 extern void exit_mmap(struct mm_struct *);
 
+extern int mm_take_all_locks(struct mm_struct *mm);
+extern void mm_drop_all_locks(struct mm_struct *mm);
+
 #ifdef CONFIG_PROC_FS
 /* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
 extern void added_exe_file_vma(struct mm_struct *mm);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a81d81890422..a39b38ccdc97 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -20,6 +20,7 @@
  */
 #define	AS_EIO		(__GFP_BITS_SHIFT + 0)	/* IO error on async write */
 #define AS_ENOSPC	(__GFP_BITS_SHIFT + 1)	/* ENOSPC on async write */
+#define AS_MM_ALL_LOCKS	(__GFP_BITS_SHIFT + 2)	/* under mm_take_all_locks() */
 
 static inline void mapping_set_error(struct address_space *mapping, int error)
 {
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1383692ac5bd..69407f85e10b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -26,6 +26,14 @@
  */
 struct anon_vma {
 	spinlock_t lock;	/* Serialize access to vma list */
+	/*
+	 * NOTE: the LSB of the head.next is set by
+	 * mm_take_all_locks() _after_ taking the above lock. So the
+	 * head must only be read/written after taking the above lock
+	 * to be sure to see a valid next pointer. The LSB bit itself
+	 * is serialized by a system wide lock only visible to
+	 * mm_take_all_locks() (mm_all_locks_mutex).
+	 */
 	struct list_head head;	/* List of private "related" vmas */
 };
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 5e0cc99e9cd5..e5f9cb83d6d4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2268,3 +2268,161 @@ int install_special_mapping(struct mm_struct *mm,
 
 	return 0;
 }
+
+static DEFINE_MUTEX(mm_all_locks_mutex);
+
+static void vm_lock_anon_vma(struct anon_vma *anon_vma)
+{
+	if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+		/*
+		 * The LSB of head.next can't change from under us
+		 * because we hold the mm_all_locks_mutex.
+		 */
+		spin_lock(&anon_vma->lock);
+		/*
+		 * We can safely modify head.next after taking the
+		 * anon_vma->lock. If some other vma in this mm shares
+		 * the same anon_vma we won't take it again.
+		 *
+		 * No need of atomic instructions here, head.next
+		 * can't change from under us thanks to the
+		 * anon_vma->lock.
+		 */
+		if (__test_and_set_bit(0, (unsigned long *)
+				       &anon_vma->head.next))
+			BUG();
+	}
+}
+
+static void vm_lock_mapping(struct address_space *mapping)
+{
+	if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+		/*
+		 * AS_MM_ALL_LOCKS can't change from under us because
+		 * we hold the mm_all_locks_mutex.
+		 *
+		 * Operations on ->flags have to be atomic because
+		 * even if AS_MM_ALL_LOCKS is stable thanks to the
+		 * mm_all_locks_mutex, there may be other cpus
+		 * changing other bitflags in parallel to us.
+		 */
+		if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
+			BUG();
+		spin_lock(&mapping->i_mmap_lock);
+	}
+}
+
+/*
+ * This operation locks against the VM for all pte/vma/mm related
+ * operations that could ever happen on a certain mm. This includes
+ * vmtruncate, try_to_unmap, and all page faults.
+ *
+ * The caller must take the mmap_sem in write mode before calling
+ * mm_take_all_locks(). The caller isn't allowed to release the
+ * mmap_sem until mm_drop_all_locks() returns.
+ *
+ * mmap_sem in write mode is required in order to block all operations
+ * that could modify pagetables and free pages without need of
+ * altering the vma layout (for example populate_range() with
+ * nonlinear vmas). It's also needed in write mode to avoid new
+ * anon_vmas to be associated with existing vmas.
+ *
+ * A single task can't take more than one mm_take_all_locks() in a row
+ * or it would deadlock.
+ *
+ * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
+ * mapping->flags avoid to take the same lock twice, if more than one
+ * vma in this mm is backed by the same anon_vma or address_space.
+ *
+ * We can take all the locks in random order because the VM code
+ * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
+ * takes more than one of them in a row. Secondly we're protected
+ * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
+ *
+ * mm_take_all_locks() and mm_drop_all_locks are expensive operations
+ * that may have to take thousand of locks.
+ *
+ * mm_take_all_locks() can fail if it's interrupted by signals.
+ */
+int mm_take_all_locks(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	int ret = -EINTR;
+
+	BUG_ON(down_read_trylock(&mm->mmap_sem));
+
+	mutex_lock(&mm_all_locks_mutex);
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (signal_pending(current))
+			goto out_unlock;
+		if (vma->anon_vma)
+			vm_lock_anon_vma(vma->anon_vma);
+		if (vma->vm_file && vma->vm_file->f_mapping)
+			vm_lock_mapping(vma->vm_file->f_mapping);
+	}
+	ret = 0;
+
+out_unlock:
+	if (ret)
+		mm_drop_all_locks(mm);
+
+	return ret;
+}
+
+static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
+{
+	if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+		/*
+		 * The LSB of head.next can't change to 0 from under
+		 * us because we hold the mm_all_locks_mutex.
+		 *
+		 * We must however clear the bitflag before unlocking
+		 * the vma so the users using the anon_vma->head will
+		 * never see our bitflag.
+		 *
+		 * No need of atomic instructions here, head.next
+		 * can't change from under us until we release the
+		 * anon_vma->lock.
+		 */
+		if (!__test_and_clear_bit(0, (unsigned long *)
+					  &anon_vma->head.next))
+			BUG();
+		spin_unlock(&anon_vma->lock);
+	}
+}
+
+static void vm_unlock_mapping(struct address_space *mapping)
+{
+	if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
+		/*
+		 * AS_MM_ALL_LOCKS can't change to 0 from under us
+		 * because we hold the mm_all_locks_mutex.
+		 */
+		spin_unlock(&mapping->i_mmap_lock);
+		if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
+					&mapping->flags))
+			BUG();
+	}
+}
+
+/*
+ * The mmap_sem cannot be released by the caller until
+ * mm_drop_all_locks() returns.
+ */
+void mm_drop_all_locks(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+
+	BUG_ON(down_read_trylock(&mm->mmap_sem));
+	BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (vma->anon_vma)
+			vm_unlock_anon_vma(vma->anon_vma);
+		if (vma->vm_file && vma->vm_file->f_mapping)
+			vm_unlock_mapping(vma->vm_file->f_mapping);
+	}
+
+	mutex_unlock(&mm_all_locks_mutex);
+}
-- 
cgit v1.2.3


From cddb8a5c14aa89810b40495d94d3d2a0faee6619 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@qumranet.com>
Date: Mon, 28 Jul 2008 15:46:29 -0700
Subject: mmu-notifiers: core

With KVM/GFP/XPMEM there isn't just the primary CPU MMU pointing to pages.
 There are secondary MMUs (with secondary sptes and secondary tlbs) too.
sptes in the kvm case are shadow pagetables, but when I say spte in
mmu-notifier context, I mean "secondary pte".  In GRU case there's no
actual secondary pte and there's only a secondary tlb because the GRU
secondary MMU has no knowledge about sptes and every secondary tlb miss
event in the MMU always generates a page fault that has to be resolved by
the CPU (this is not the case of KVM where the a secondary tlb miss will
walk sptes in hardware and it will refill the secondary tlb transparently
to software if the corresponding spte is present).  The same way
zap_page_range has to invalidate the pte before freeing the page, the spte
(and secondary tlb) must also be invalidated before any page is freed and
reused.

Currently we take a page_count pin on every page mapped by sptes, but that
means the pages can't be swapped whenever they're mapped by any spte
because they're part of the guest working set.  Furthermore a spte unmap
event can immediately lead to a page to be freed when the pin is released
(so requiring the same complex and relatively slow tlb_gather smp safe
logic we have in zap_page_range and that can be avoided completely if the
spte unmap event doesn't require an unpin of the page previously mapped in
the secondary MMU).

The mmu notifiers allow kvm/GRU/XPMEM to attach to the tsk->mm and know
when the VM is swapping or freeing or doing anything on the primary MMU so
that the secondary MMU code can drop sptes before the pages are freed,
avoiding all page pinning and allowing 100% reliable swapping of guest
physical address space.  Furthermore it avoids the code that teardown the
mappings of the secondary MMU, to implement a logic like tlb_gather in
zap_page_range that would require many IPI to flush other cpu tlbs, for
each fixed number of spte unmapped.

To make an example: if what happens on the primary MMU is a protection
downgrade (from writeable to wrprotect) the secondary MMU mappings will be
invalidated, and the next secondary-mmu-page-fault will call
get_user_pages and trigger a do_wp_page through get_user_pages if it
called get_user_pages with write=1, and it'll re-establishing an updated
spte or secondary-tlb-mapping on the copied page.  Or it will setup a
readonly spte or readonly tlb mapping if it's a guest-read, if it calls
get_user_pages with write=0.  This is just an example.

This allows to map any page pointed by any pte (and in turn visible in the
primary CPU MMU), into a secondary MMU (be it a pure tlb like GRU, or an
full MMU with both sptes and secondary-tlb like the shadow-pagetable layer
with kvm), or a remote DMA in software like XPMEM (hence needing of
schedule in XPMEM code to send the invalidate to the remote node, while no
need to schedule in kvm/gru as it's an immediate event like invalidating
primary-mmu pte).

At least for KVM without this patch it's impossible to swap guests
reliably.  And having this feature and removing the page pin allows
several other optimizations that simplify life considerably.

Dependencies:

1) mm_take_all_locks() to register the mmu notifier when the whole VM
   isn't doing anything with "mm".  This allows mmu notifier users to keep
   track if the VM is in the middle of the invalidate_range_begin/end
   critical section with an atomic counter incraese in range_begin and
   decreased in range_end.  No secondary MMU page fault is allowed to map
   any spte or secondary tlb reference, while the VM is in the middle of
   range_begin/end as any page returned by get_user_pages in that critical
   section could later immediately be freed without any further
   ->invalidate_page notification (invalidate_range_begin/end works on
   ranges and ->invalidate_page isn't called immediately before freeing
   the page).  To stop all page freeing and pagetable overwrites the
   mmap_sem must be taken in write mode and all other anon_vma/i_mmap
   locks must be taken too.

2) It'd be a waste to add branches in the VM if nobody could possibly
   run KVM/GRU/XPMEM on the kernel, so mmu notifiers will only enabled if
   CONFIG_KVM=m/y.  In the current kernel kvm won't yet take advantage of
   mmu notifiers, but this already allows to compile a KVM external module
   against a kernel with mmu notifiers enabled and from the next pull from
   kvm.git we'll start using them.  And GRU/XPMEM will also be able to
   continue the development by enabling KVM=m in their config, until they
   submit all GRU/XPMEM GPLv2 code to the mainline kernel.  Then they can
   also enable MMU_NOTIFIERS in the same way KVM does it (even if KVM=n).
   This guarantees nobody selects MMU_NOTIFIER=y if KVM and GRU and XPMEM
   are all =n.

The mmu_notifier_register call can fail because mm_take_all_locks may be
interrupted by a signal and return -EINTR.  Because mmu_notifier_reigster
is used when a driver startup, a failure can be gracefully handled.  Here
an example of the change applied to kvm to register the mmu notifiers.
Usually when a driver startups other allocations are required anyway and
-ENOMEM failure paths exists already.

 struct  kvm *kvm_arch_create_vm(void)
 {
        struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+       int err;

        if (!kvm)
                return ERR_PTR(-ENOMEM);

        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);

+       kvm->arch.mmu_notifier.ops = &kvm_mmu_notifier_ops;
+       err = mmu_notifier_register(&kvm->arch.mmu_notifier, current->mm);
+       if (err) {
+               kfree(kvm);
+               return ERR_PTR(err);
+       }
+
        return kvm;
 }

mmu_notifier_unregister returns void and it's reliable.

The patch also adds a few needed but missing includes that would prevent
kernel to compile after these changes on non-x86 archs (x86 didn't need
them by luck).

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix mm/filemap_xip.c build]
[akpm@linux-foundation.org: fix mm/mmu_notifier.c build]
Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Robin Holt <holt@sgi.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Kanoj Sarcar <kanojsarcar@yahoo.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Avi Kivity <avi@qumranet.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Chris Wright <chrisw@redhat.com>
Cc: Marcelo Tosatti <marcelo@kvack.org>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Izik Eidus <izike@qumranet.com>
Cc: Anthony Liguori <aliguori@us.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kvm/Kconfig         |   1 +
 include/linux/mm_types.h     |   4 +
 include/linux/mmu_notifier.h | 279 +++++++++++++++++++++++++++++++++++++++++++
 kernel/fork.c                |   3 +
 mm/Kconfig                   |   3 +
 mm/Makefile                  |   1 +
 mm/filemap_xip.c             |   3 +-
 mm/fremap.c                  |   3 +
 mm/hugetlb.c                 |   3 +
 mm/memory.c                  |  35 +++++-
 mm/mmap.c                    |   2 +
 mm/mmu_notifier.c            | 277 ++++++++++++++++++++++++++++++++++++++++++
 mm/mprotect.c                |   3 +
 mm/mremap.c                  |   6 +
 mm/rmap.c                    |  13 +-
 15 files changed, 623 insertions(+), 13 deletions(-)
 create mode 100644 include/linux/mmu_notifier.h
 create mode 100644 mm/mmu_notifier.c

(limited to 'include/linux')

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 8d45fabc5f3b..ce3251ce5504 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on HAVE_KVM
 	select PREEMPT_NOTIFIERS
+	select MMU_NOTIFIER
 	select ANON_INODES
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 746f975b58ef..386edbe2cb4e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -10,6 +10,7 @@
 #include <linux/rbtree.h>
 #include <linux/rwsem.h>
 #include <linux/completion.h>
+#include <linux/cpumask.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 
@@ -253,6 +254,9 @@ struct mm_struct {
 	struct file *exe_file;
 	unsigned long num_exe_file_vmas;
 #endif
+#ifdef CONFIG_MMU_NOTIFIER
+	struct mmu_notifier_mm *mmu_notifier_mm;
+#endif
 };
 
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
new file mode 100644
index 000000000000..b77486d152cd
--- /dev/null
+++ b/include/linux/mmu_notifier.h
@@ -0,0 +1,279 @@
+#ifndef _LINUX_MMU_NOTIFIER_H
+#define _LINUX_MMU_NOTIFIER_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm_types.h>
+
+struct mmu_notifier;
+struct mmu_notifier_ops;
+
+#ifdef CONFIG_MMU_NOTIFIER
+
+/*
+ * The mmu notifier_mm structure is allocated and installed in
+ * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
+ * critical section and it's released only when mm_count reaches zero
+ * in mmdrop().
+ */
+struct mmu_notifier_mm {
+	/* all mmu notifiers registerd in this mm are queued in this list */
+	struct hlist_head list;
+	/* to serialize the list modifications and hlist_unhashed */
+	spinlock_t lock;
+};
+
+struct mmu_notifier_ops {
+	/*
+	 * Called either by mmu_notifier_unregister or when the mm is
+	 * being destroyed by exit_mmap, always before all pages are
+	 * freed. This can run concurrently with other mmu notifier
+	 * methods (the ones invoked outside the mm context) and it
+	 * should tear down all secondary mmu mappings and freeze the
+	 * secondary mmu. If this method isn't implemented you've to
+	 * be sure that nothing could possibly write to the pages
+	 * through the secondary mmu by the time the last thread with
+	 * tsk->mm == mm exits.
+	 *
+	 * As side note: the pages freed after ->release returns could
+	 * be immediately reallocated by the gart at an alias physical
+	 * address with a different cache model, so if ->release isn't
+	 * implemented because all _software_ driven memory accesses
+	 * through the secondary mmu are terminated by the time the
+	 * last thread of this mm quits, you've also to be sure that
+	 * speculative _hardware_ operations can't allocate dirty
+	 * cachelines in the cpu that could not be snooped and made
+	 * coherent with the other read and write operations happening
+	 * through the gart alias address, so leading to memory
+	 * corruption.
+	 */
+	void (*release)(struct mmu_notifier *mn,
+			struct mm_struct *mm);
+
+	/*
+	 * clear_flush_young is called after the VM is
+	 * test-and-clearing the young/accessed bitflag in the
+	 * pte. This way the VM will provide proper aging to the
+	 * accesses to the page through the secondary MMUs and not
+	 * only to the ones through the Linux pte.
+	 */
+	int (*clear_flush_young)(struct mmu_notifier *mn,
+				 struct mm_struct *mm,
+				 unsigned long address);
+
+	/*
+	 * Before this is invoked any secondary MMU is still ok to
+	 * read/write to the page previously pointed to by the Linux
+	 * pte because the page hasn't been freed yet and it won't be
+	 * freed until this returns. If required set_page_dirty has to
+	 * be called internally to this method.
+	 */
+	void (*invalidate_page)(struct mmu_notifier *mn,
+				struct mm_struct *mm,
+				unsigned long address);
+
+	/*
+	 * invalidate_range_start() and invalidate_range_end() must be
+	 * paired and are called only when the mmap_sem and/or the
+	 * locks protecting the reverse maps are held. The subsystem
+	 * must guarantee that no additional references are taken to
+	 * the pages in the range established between the call to
+	 * invalidate_range_start() and the matching call to
+	 * invalidate_range_end().
+	 *
+	 * Invalidation of multiple concurrent ranges may be
+	 * optionally permitted by the driver. Either way the
+	 * establishment of sptes is forbidden in the range passed to
+	 * invalidate_range_begin/end for the whole duration of the
+	 * invalidate_range_begin/end critical section.
+	 *
+	 * invalidate_range_start() is called when all pages in the
+	 * range are still mapped and have at least a refcount of one.
+	 *
+	 * invalidate_range_end() is called when all pages in the
+	 * range have been unmapped and the pages have been freed by
+	 * the VM.
+	 *
+	 * The VM will remove the page table entries and potentially
+	 * the page between invalidate_range_start() and
+	 * invalidate_range_end(). If the page must not be freed
+	 * because of pending I/O or other circumstances then the
+	 * invalidate_range_start() callback (or the initial mapping
+	 * by the driver) must make sure that the refcount is kept
+	 * elevated.
+	 *
+	 * If the driver increases the refcount when the pages are
+	 * initially mapped into an address space then either
+	 * invalidate_range_start() or invalidate_range_end() may
+	 * decrease the refcount. If the refcount is decreased on
+	 * invalidate_range_start() then the VM can free pages as page
+	 * table entries are removed.  If the refcount is only
+	 * droppped on invalidate_range_end() then the driver itself
+	 * will drop the last refcount but it must take care to flush
+	 * any secondary tlb before doing the final free on the
+	 * page. Pages will no longer be referenced by the linux
+	 * address space but may still be referenced by sptes until
+	 * the last refcount is dropped.
+	 */
+	void (*invalidate_range_start)(struct mmu_notifier *mn,
+				       struct mm_struct *mm,
+				       unsigned long start, unsigned long end);
+	void (*invalidate_range_end)(struct mmu_notifier *mn,
+				     struct mm_struct *mm,
+				     unsigned long start, unsigned long end);
+};
+
+/*
+ * The notifier chains are protected by mmap_sem and/or the reverse map
+ * semaphores. Notifier chains are only changed when all reverse maps and
+ * the mmap_sem locks are taken.
+ *
+ * Therefore notifier chains can only be traversed when either
+ *
+ * 1. mmap_sem is held.
+ * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock).
+ * 3. No other concurrent thread can access the list (release)
+ */
+struct mmu_notifier {
+	struct hlist_node hlist;
+	const struct mmu_notifier_ops *ops;
+};
+
+static inline int mm_has_notifiers(struct mm_struct *mm)
+{
+	return unlikely(mm->mmu_notifier_mm);
+}
+
+extern int mmu_notifier_register(struct mmu_notifier *mn,
+				 struct mm_struct *mm);
+extern int __mmu_notifier_register(struct mmu_notifier *mn,
+				   struct mm_struct *mm);
+extern void mmu_notifier_unregister(struct mmu_notifier *mn,
+				    struct mm_struct *mm);
+extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
+extern void __mmu_notifier_release(struct mm_struct *mm);
+extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
+					  unsigned long address);
+extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
+					  unsigned long address);
+extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end);
+extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
+				  unsigned long start, unsigned long end);
+
+static inline void mmu_notifier_release(struct mm_struct *mm)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_release(mm);
+}
+
+static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
+					  unsigned long address)
+{
+	if (mm_has_notifiers(mm))
+		return __mmu_notifier_clear_flush_young(mm, address);
+	return 0;
+}
+
+static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
+					  unsigned long address)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_invalidate_page(mm, address);
+}
+
+static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_invalidate_range_start(mm, start, end);
+}
+
+static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_invalidate_range_end(mm, start, end);
+}
+
+static inline void mmu_notifier_mm_init(struct mm_struct *mm)
+{
+	mm->mmu_notifier_mm = NULL;
+}
+
+static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_mm_destroy(mm);
+}
+
+/*
+ * These two macros will sometime replace ptep_clear_flush.
+ * ptep_clear_flush is impleemnted as macro itself, so this also is
+ * implemented as a macro until ptep_clear_flush will converted to an
+ * inline function, to diminish the risk of compilation failure. The
+ * invalidate_page method over time can be moved outside the PT lock
+ * and these two macros can be later removed.
+ */
+#define ptep_clear_flush_notify(__vma, __address, __ptep)		\
+({									\
+	pte_t __pte;							\
+	struct vm_area_struct *___vma = __vma;				\
+	unsigned long ___address = __address;				\
+	__pte = ptep_clear_flush(___vma, ___address, __ptep);		\
+	mmu_notifier_invalidate_page(___vma->vm_mm, ___address);	\
+	__pte;								\
+})
+
+#define ptep_clear_flush_young_notify(__vma, __address, __ptep)		\
+({									\
+	int __young;							\
+	struct vm_area_struct *___vma = __vma;				\
+	unsigned long ___address = __address;				\
+	__young = ptep_clear_flush_young(___vma, ___address, __ptep);	\
+	__young |= mmu_notifier_clear_flush_young(___vma->vm_mm,	\
+						  ___address);		\
+	__young;							\
+})
+
+#else /* CONFIG_MMU_NOTIFIER */
+
+static inline void mmu_notifier_release(struct mm_struct *mm)
+{
+}
+
+static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
+					  unsigned long address)
+{
+	return 0;
+}
+
+static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
+					  unsigned long address)
+{
+}
+
+static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+}
+
+static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+}
+
+static inline void mmu_notifier_mm_init(struct mm_struct *mm)
+{
+}
+
+static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
+{
+}
+
+#define ptep_clear_flush_young_notify ptep_clear_flush_young
+#define ptep_clear_flush_notify ptep_clear_flush
+
+#endif /* CONFIG_MMU_NOTIFIER */
+
+#endif /* _LINUX_MMU_NOTIFIER_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 8214ba7c8bb1..7ce2ebe84796 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -27,6 +27,7 @@
 #include <linux/key.h>
 #include <linux/binfmts.h>
 #include <linux/mman.h>
+#include <linux/mmu_notifier.h>
 #include <linux/fs.h>
 #include <linux/nsproxy.h>
 #include <linux/capability.h>
@@ -414,6 +415,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
+		mmu_notifier_mm_init(mm);
 		return mm;
 	}
 
@@ -446,6 +448,7 @@ void __mmdrop(struct mm_struct *mm)
 	BUG_ON(mm == &init_mm);
 	mm_free_pgd(mm);
 	destroy_context(mm);
+	mmu_notifier_mm_destroy(mm);
 	free_mm(mm);
 }
 EXPORT_SYMBOL_GPL(__mmdrop);
diff --git a/mm/Kconfig b/mm/Kconfig
index efee5d379df4..446c6588c753 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -208,3 +208,6 @@ config NR_QUICK
 config VIRT_TO_BUS
 	def_bool y
 	depends on !ARCH_NO_VIRT_TO_BUS
+
+config MMU_NOTIFIER
+	bool
diff --git a/mm/Makefile b/mm/Makefile
index 06ca2381fef1..da4ccf015aea 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SHMEM) += shmem.o
 obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
 obj-$(CONFIG_SLOB) += slob.o
+obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 98a3f31ccd6a..380ab402d711 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/uio.h>
 #include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
 #include <linux/sched.h>
 #include <asm/tlbflush.h>
 #include <asm/io.h>
@@ -188,7 +189,7 @@ __xip_unmap (struct address_space * mapping,
 		if (pte) {
 			/* Nuke the page table entry. */
 			flush_cache_page(vma, address, pte_pfn(*pte));
-			pteval = ptep_clear_flush(vma, address, pte);
+			pteval = ptep_clear_flush_notify(vma, address, pte);
 			page_remove_rmap(page, vma);
 			dec_mm_counter(mm, file_rss);
 			BUG_ON(pte_dirty(pteval));
diff --git a/mm/fremap.c b/mm/fremap.c
index 07a9c82ce1a3..7881638e4a12 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -15,6 +15,7 @@
 #include <linux/rmap.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
@@ -214,7 +215,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
 		spin_unlock(&mapping->i_mmap_lock);
 	}
 
+	mmu_notifier_invalidate_range_start(mm, start, start + size);
 	err = populate_range(mm, vma, start, size, pgoff);
+	mmu_notifier_invalidate_range_end(mm, start, start + size);
 	if (!err && !(flags & MAP_NONBLOCK)) {
 		if (unlikely(has_write_lock)) {
 			downgrade_write(&mm->mmap_sem);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3be79dc18c5c..80eb0d31d0d3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/sysctl.h>
 #include <linux/highmem.h>
+#include <linux/mmu_notifier.h>
 #include <linux/nodemask.h>
 #include <linux/pagemap.h>
 #include <linux/mempolicy.h>
@@ -1672,6 +1673,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	BUG_ON(start & ~huge_page_mask(h));
 	BUG_ON(end & ~huge_page_mask(h));
 
+	mmu_notifier_invalidate_range_start(mm, start, end);
 	spin_lock(&mm->page_table_lock);
 	for (address = start; address < end; address += sz) {
 		ptep = huge_pte_offset(mm, address);
@@ -1713,6 +1715,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	}
 	spin_unlock(&mm->page_table_lock);
 	flush_tlb_range(vma, start, end);
+	mmu_notifier_invalidate_range_end(mm, start, end);
 	list_for_each_entry_safe(page, tmp, &page_list, lru) {
 		list_del(&page->lru);
 		put_page(page);
diff --git a/mm/memory.c b/mm/memory.c
index a8ca04faaea6..67f0ab9077d9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -51,6 +51,7 @@
 #include <linux/init.h>
 #include <linux/writeback.h>
 #include <linux/memcontrol.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -652,6 +653,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	unsigned long next;
 	unsigned long addr = vma->vm_start;
 	unsigned long end = vma->vm_end;
+	int ret;
 
 	/*
 	 * Don't copy ptes where a page fault will fill them correctly.
@@ -667,17 +669,33 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (is_vm_hugetlb_page(vma))
 		return copy_hugetlb_page_range(dst_mm, src_mm, vma);
 
+	/*
+	 * We need to invalidate the secondary MMU mappings only when
+	 * there could be a permission downgrade on the ptes of the
+	 * parent mm. And a permission downgrade will only happen if
+	 * is_cow_mapping() returns true.
+	 */
+	if (is_cow_mapping(vma->vm_flags))
+		mmu_notifier_invalidate_range_start(src_mm, addr, end);
+
+	ret = 0;
 	dst_pgd = pgd_offset(dst_mm, addr);
 	src_pgd = pgd_offset(src_mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(src_pgd))
 			continue;
-		if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
-						vma, addr, next))
-			return -ENOMEM;
+		if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+					    vma, addr, next))) {
+			ret = -ENOMEM;
+			break;
+		}
 	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
-	return 0;
+
+	if (is_cow_mapping(vma->vm_flags))
+		mmu_notifier_invalidate_range_end(src_mm,
+						  vma->vm_start, end);
+	return ret;
 }
 
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
@@ -881,7 +899,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 	unsigned long start = start_addr;
 	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
 	int fullmm = (*tlbp)->fullmm;
+	struct mm_struct *mm = vma->vm_mm;
 
+	mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
 	for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
 		unsigned long end;
 
@@ -946,6 +966,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
 		}
 	}
 out:
+	mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
 	return start;	/* which is now the end (or restart) address */
 }
 
@@ -1616,10 +1637,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
 {
 	pgd_t *pgd;
 	unsigned long next;
-	unsigned long end = addr + size;
+	unsigned long start = addr, end = addr + size;
 	int err;
 
 	BUG_ON(addr >= end);
+	mmu_notifier_invalidate_range_start(mm, start, end);
 	pgd = pgd_offset(mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
@@ -1627,6 +1649,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
+	mmu_notifier_invalidate_range_end(mm, start, end);
 	return err;
 }
 EXPORT_SYMBOL_GPL(apply_to_page_range);
@@ -1839,7 +1862,7 @@ gotten:
 		 * seen in the presence of one thread doing SMC and another
 		 * thread doing COW.
 		 */
-		ptep_clear_flush(vma, address, page_table);
+		ptep_clear_flush_notify(vma, address, page_table);
 		set_pte_at(mm, address, page_table, entry);
 		update_mmu_cache(vma, address, entry);
 		lru_cache_add_active(new_page);
diff --git a/mm/mmap.c b/mm/mmap.c
index e5f9cb83d6d4..245c3d69067b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -26,6 +26,7 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -2061,6 +2062,7 @@ void exit_mmap(struct mm_struct *mm)
 
 	/* mm's last user has gone, and its about to be pulled down */
 	arch_exit_mmap(mm);
+	mmu_notifier_release(mm);
 
 	lru_add_drain();
 	flush_cache_mm(mm);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
new file mode 100644
index 000000000000..5f4ef0250bee
--- /dev/null
+++ b/mm/mmu_notifier.c
@@ -0,0 +1,277 @@
+/*
+ *  linux/mm/mmu_notifier.c
+ *
+ *  Copyright (C) 2008  Qumranet, Inc.
+ *  Copyright (C) 2008  SGI
+ *             Christoph Lameter <clameter@sgi.com>
+ *
+ *  This work is licensed under the terms of the GNU GPL, version 2. See
+ *  the COPYING file in the top-level directory.
+ */
+
+#include <linux/rculist.h>
+#include <linux/mmu_notifier.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+/*
+ * This function can't run concurrently against mmu_notifier_register
+ * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
+ * runs with mm_users == 0. Other tasks may still invoke mmu notifiers
+ * in parallel despite there being no task using this mm any more,
+ * through the vmas outside of the exit_mmap context, such as with
+ * vmtruncate. This serializes against mmu_notifier_unregister with
+ * the mmu_notifier_mm->lock in addition to RCU and it serializes
+ * against the other mmu notifiers with RCU. struct mmu_notifier_mm
+ * can't go away from under us as exit_mmap holds an mm_count pin
+ * itself.
+ */
+void __mmu_notifier_release(struct mm_struct *mm)
+{
+	struct mmu_notifier *mn;
+
+	spin_lock(&mm->mmu_notifier_mm->lock);
+	while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
+		mn = hlist_entry(mm->mmu_notifier_mm->list.first,
+				 struct mmu_notifier,
+				 hlist);
+		/*
+		 * We arrived before mmu_notifier_unregister so
+		 * mmu_notifier_unregister will do nothing other than
+		 * to wait ->release to finish and
+		 * mmu_notifier_unregister to return.
+		 */
+		hlist_del_init_rcu(&mn->hlist);
+		/*
+		 * RCU here will block mmu_notifier_unregister until
+		 * ->release returns.
+		 */
+		rcu_read_lock();
+		spin_unlock(&mm->mmu_notifier_mm->lock);
+		/*
+		 * if ->release runs before mmu_notifier_unregister it
+		 * must be handled as it's the only way for the driver
+		 * to flush all existing sptes and stop the driver
+		 * from establishing any more sptes before all the
+		 * pages in the mm are freed.
+		 */
+		if (mn->ops->release)
+			mn->ops->release(mn, mm);
+		rcu_read_unlock();
+		spin_lock(&mm->mmu_notifier_mm->lock);
+	}
+	spin_unlock(&mm->mmu_notifier_mm->lock);
+
+	/*
+	 * synchronize_rcu here prevents mmu_notifier_release to
+	 * return to exit_mmap (which would proceed freeing all pages
+	 * in the mm) until the ->release method returns, if it was
+	 * invoked by mmu_notifier_unregister.
+	 *
+	 * The mmu_notifier_mm can't go away from under us because one
+	 * mm_count is hold by exit_mmap.
+	 */
+	synchronize_rcu();
+}
+
+/*
+ * If no young bitflag is supported by the hardware, ->clear_flush_young can
+ * unmap the address and return 1 or 0 depending if the mapping previously
+ * existed or not.
+ */
+int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
+					unsigned long address)
+{
+	struct mmu_notifier *mn;
+	struct hlist_node *n;
+	int young = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops->clear_flush_young)
+			young |= mn->ops->clear_flush_young(mn, mm, address);
+	}
+	rcu_read_unlock();
+
+	return young;
+}
+
+void __mmu_notifier_invalidate_page(struct mm_struct *mm,
+					  unsigned long address)
+{
+	struct mmu_notifier *mn;
+	struct hlist_node *n;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops->invalidate_page)
+			mn->ops->invalidate_page(mn, mm, address);
+	}
+	rcu_read_unlock();
+}
+
+void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	struct mmu_notifier *mn;
+	struct hlist_node *n;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops->invalidate_range_start)
+			mn->ops->invalidate_range_start(mn, mm, start, end);
+	}
+	rcu_read_unlock();
+}
+
+void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	struct mmu_notifier *mn;
+	struct hlist_node *n;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops->invalidate_range_end)
+			mn->ops->invalidate_range_end(mn, mm, start, end);
+	}
+	rcu_read_unlock();
+}
+
+static int do_mmu_notifier_register(struct mmu_notifier *mn,
+				    struct mm_struct *mm,
+				    int take_mmap_sem)
+{
+	struct mmu_notifier_mm *mmu_notifier_mm;
+	int ret;
+
+	BUG_ON(atomic_read(&mm->mm_users) <= 0);
+
+	ret = -ENOMEM;
+	mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
+	if (unlikely(!mmu_notifier_mm))
+		goto out;
+
+	if (take_mmap_sem)
+		down_write(&mm->mmap_sem);
+	ret = mm_take_all_locks(mm);
+	if (unlikely(ret))
+		goto out_cleanup;
+
+	if (!mm_has_notifiers(mm)) {
+		INIT_HLIST_HEAD(&mmu_notifier_mm->list);
+		spin_lock_init(&mmu_notifier_mm->lock);
+		mm->mmu_notifier_mm = mmu_notifier_mm;
+		mmu_notifier_mm = NULL;
+	}
+	atomic_inc(&mm->mm_count);
+
+	/*
+	 * Serialize the update against mmu_notifier_unregister. A
+	 * side note: mmu_notifier_release can't run concurrently with
+	 * us because we hold the mm_users pin (either implicitly as
+	 * current->mm or explicitly with get_task_mm() or similar).
+	 * We can't race against any other mmu notifier method either
+	 * thanks to mm_take_all_locks().
+	 */
+	spin_lock(&mm->mmu_notifier_mm->lock);
+	hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list);
+	spin_unlock(&mm->mmu_notifier_mm->lock);
+
+	mm_drop_all_locks(mm);
+out_cleanup:
+	if (take_mmap_sem)
+		up_write(&mm->mmap_sem);
+	/* kfree() does nothing if mmu_notifier_mm is NULL */
+	kfree(mmu_notifier_mm);
+out:
+	BUG_ON(atomic_read(&mm->mm_users) <= 0);
+	return ret;
+}
+
+/*
+ * Must not hold mmap_sem nor any other VM related lock when calling
+ * this registration function. Must also ensure mm_users can't go down
+ * to zero while this runs to avoid races with mmu_notifier_release,
+ * so mm has to be current->mm or the mm should be pinned safely such
+ * as with get_task_mm(). If the mm is not current->mm, the mm_users
+ * pin should be released by calling mmput after mmu_notifier_register
+ * returns. mmu_notifier_unregister must be always called to
+ * unregister the notifier. mm_count is automatically pinned to allow
+ * mmu_notifier_unregister to safely run at any time later, before or
+ * after exit_mmap. ->release will always be called before exit_mmap
+ * frees the pages.
+ */
+int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	return do_mmu_notifier_register(mn, mm, 1);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_register);
+
+/*
+ * Same as mmu_notifier_register but here the caller must hold the
+ * mmap_sem in write mode.
+ */
+int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	return do_mmu_notifier_register(mn, mm, 0);
+}
+EXPORT_SYMBOL_GPL(__mmu_notifier_register);
+
+/* this is called after the last mmu_notifier_unregister() returned */
+void __mmu_notifier_mm_destroy(struct mm_struct *mm)
+{
+	BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list));
+	kfree(mm->mmu_notifier_mm);
+	mm->mmu_notifier_mm = LIST_POISON1; /* debug */
+}
+
+/*
+ * This releases the mm_count pin automatically and frees the mm
+ * structure if it was the last user of it. It serializes against
+ * running mmu notifiers with RCU and against mmu_notifier_unregister
+ * with the unregister lock + RCU. All sptes must be dropped before
+ * calling mmu_notifier_unregister. ->release or any other notifier
+ * method may be invoked concurrently with mmu_notifier_unregister,
+ * and only after mmu_notifier_unregister returned we're guaranteed
+ * that ->release or any other method can't run anymore.
+ */
+void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	BUG_ON(atomic_read(&mm->mm_count) <= 0);
+
+	spin_lock(&mm->mmu_notifier_mm->lock);
+	if (!hlist_unhashed(&mn->hlist)) {
+		hlist_del_rcu(&mn->hlist);
+
+		/*
+		 * RCU here will force exit_mmap to wait ->release to finish
+		 * before freeing the pages.
+		 */
+		rcu_read_lock();
+		spin_unlock(&mm->mmu_notifier_mm->lock);
+		/*
+		 * exit_mmap will block in mmu_notifier_release to
+		 * guarantee ->release is called before freeing the
+		 * pages.
+		 */
+		if (mn->ops->release)
+			mn->ops->release(mn, mm);
+		rcu_read_unlock();
+	} else
+		spin_unlock(&mm->mmu_notifier_mm->lock);
+
+	/*
+	 * Wait any running method to finish, of course including
+	 * ->release if it was run by mmu_notifier_relase instead of us.
+	 */
+	synchronize_rcu();
+
+	BUG_ON(atomic_read(&mm->mm_count) <= 0);
+
+	mmdrop(mm);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index abd645a3b0a0..fded06f923f4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -21,6 +21,7 @@
 #include <linux/syscalls.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/mmu_notifier.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -203,10 +204,12 @@ success:
 		dirty_accountable = 1;
 	}
 
+	mmu_notifier_invalidate_range_start(mm, start, end);
 	if (is_vm_hugetlb_page(vma))
 		hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
 	else
 		change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
+	mmu_notifier_invalidate_range_end(mm, start, end);
 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
 	return 0;
diff --git a/mm/mremap.c b/mm/mremap.c
index 08e3c7f2bd15..1a7743923c8c 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -18,6 +18,7 @@
 #include <linux/highmem.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -74,7 +75,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *old_pte, *new_pte, pte;
 	spinlock_t *old_ptl, *new_ptl;
+	unsigned long old_start;
 
+	old_start = old_addr;
+	mmu_notifier_invalidate_range_start(vma->vm_mm,
+					    old_start, old_end);
 	if (vma->vm_file) {
 		/*
 		 * Subtle point from Rajesh Venkatasubramanian: before
@@ -116,6 +121,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	pte_unmap_unlock(old_pte - 1, old_ptl);
 	if (mapping)
 		spin_unlock(&mapping->i_mmap_lock);
+	mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
 }
 
 #define LATENCY_LIMIT	(64 * PAGE_SIZE)
diff --git a/mm/rmap.c b/mm/rmap.c
index 39ae5a9bf382..99bc3f9cd796 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -49,6 +49,7 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/memcontrol.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/tlbflush.h>
 
@@ -287,7 +288,7 @@ static int page_referenced_one(struct page *page,
 	if (vma->vm_flags & VM_LOCKED) {
 		referenced++;
 		*mapcount = 1;	/* break early from loop */
-	} else if (ptep_clear_flush_young(vma, address, pte))
+	} else if (ptep_clear_flush_young_notify(vma, address, pte))
 		referenced++;
 
 	/* Pretend the page is referenced if the task has the
@@ -457,7 +458,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
 		pte_t entry;
 
 		flush_cache_page(vma, address, pte_pfn(*pte));
-		entry = ptep_clear_flush(vma, address, pte);
+		entry = ptep_clear_flush_notify(vma, address, pte);
 		entry = pte_wrprotect(entry);
 		entry = pte_mkclean(entry);
 		set_pte_at(mm, address, pte, entry);
@@ -705,14 +706,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	 * skipped over this mm) then we should reactivate it.
 	 */
 	if (!migration && ((vma->vm_flags & VM_LOCKED) ||
-			(ptep_clear_flush_young(vma, address, pte)))) {
+			(ptep_clear_flush_young_notify(vma, address, pte)))) {
 		ret = SWAP_FAIL;
 		goto out_unmap;
 	}
 
 	/* Nuke the page table entry. */
 	flush_cache_page(vma, address, page_to_pfn(page));
-	pteval = ptep_clear_flush(vma, address, pte);
+	pteval = ptep_clear_flush_notify(vma, address, pte);
 
 	/* Move the dirty bit to the physical page now the pte is gone. */
 	if (pte_dirty(pteval))
@@ -837,12 +838,12 @@ static void try_to_unmap_cluster(unsigned long cursor,
 		page = vm_normal_page(vma, address, *pte);
 		BUG_ON(!page || PageAnon(page));
 
-		if (ptep_clear_flush_young(vma, address, pte))
+		if (ptep_clear_flush_young_notify(vma, address, pte))
 			continue;
 
 		/* Nuke the page table entry. */
 		flush_cache_page(vma, address, pte_pfn(*pte));
-		pteval = ptep_clear_flush(vma, address, pte);
+		pteval = ptep_clear_flush_notify(vma, address, pte);
 
 		/* If nonlinear, store the file page offset in the pte. */
 		if (page->index != linear_page_index(vma, address))
-- 
cgit v1.2.3


From 8ab22b9abb5c55413802e4adc9aa6223324547c3 Mon Sep 17 00:00:00 2001
From: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Date: Mon, 28 Jul 2008 15:46:36 -0700
Subject: vfs: pagecache usage optimization for pagesize!=blocksize

When we read some part of a file through pagecache, if there is a
pagecache of corresponding index but this page is not uptodate, read IO
is issued and this page will be uptodate.

I think this is good for pagesize == blocksize environment but there is
room for improvement on pagesize != blocksize environment.  Because in
this case a page can have multiple buffers and even if a page is not
uptodate, some buffers can be uptodate.

So I suggest that when all buffers which correspond to a part of a file
that we want to read are uptodate, use this pagecache and copy data from
this pagecache to user buffer even if a page is not uptodate.  This can
reduce read IO and improve system throughput.

I wrote a benchmark program and got result number with this program.

This benchmark do:

  1: mount and open a test file.

  2: create a 512MB file.

  3: close a file and umount.

  4: mount and again open a test file.

  5: pwrite randomly 300000 times on a test file.  offset is aligned
     by IO size(1024bytes).

  6: measure time of preading randomly 100000 times on a test file.

The result was:
	2.6.26
        330 sec

	2.6.26-patched
        226 sec

Arch:i386
Filesystem:ext3
Blocksize:1024 bytes
Memory: 1GB

On ext3/4, a file is written through buffer/block.  So random read/write
mixed workloads or random read after random write workloads are optimized
with this patch under pagesize != blocksize environment.  This test result
showed this.

The benchmark program is as follows:

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>

#define LEN 1024
#define LOOP 1024*512 /* 512MB */

main(void)
{
	unsigned long i, offset, filesize;
	int fd;
	char buf[LEN];
	time_t t1, t2;

	if (mount("/dev/sda1", "/root/test1/", "ext3", 0, 0) < 0) {
		perror("cannot mount\n");
		exit(1);
	}
	memset(buf, 0, LEN);
	fd = open("/root/test1/testfile", O_CREAT|O_RDWR|O_TRUNC);
	if (fd < 0) {
		perror("cannot open file\n");
		exit(1);
	}
	for (i = 0; i < LOOP; i++)
		write(fd, buf, LEN);
	close(fd);
	if (umount("/root/test1/") < 0) {
		perror("cannot umount\n");
		exit(1);
	}
	if (mount("/dev/sda1", "/root/test1/", "ext3", 0, 0) < 0) {
		perror("cannot mount\n");
		exit(1);
	}
	fd = open("/root/test1/testfile", O_RDWR);
	if (fd < 0) {
		perror("cannot open file\n");
		exit(1);
	}

	filesize = LEN * LOOP;
	for (i = 0; i < 300000; i++){
		offset = (random() % filesize) & (~(LEN - 1));
		pwrite(fd, buf, LEN, offset);
	}
	printf("start test\n");
	time(&t1);
	for (i = 0; i < 100000; i++){
		offset = (random() % filesize) & (~(LEN - 1));
		pread(fd, buf, LEN, offset);
	}
	time(&t2);
	printf("%ld sec\n", t2-t1);
	close(fd);
	if (umount("/root/test1/") < 0) {
		perror("cannot umount\n");
		exit(1);
	}
}

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jan Kara <jack@ucw.cz>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                 | 46 +++++++++++++++++++++++
 fs/ext2/inode.c             |  1 +
 fs/ext3/inode.c             | 67 +++++++++++++++++----------------
 fs/ext4/inode.c             | 92 +++++++++++++++++++++++----------------------
 include/linux/buffer_head.h |  2 +
 include/linux/fs.h          | 44 +++++++++++-----------
 mm/filemap.c                | 14 ++++++-
 7 files changed, 167 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index f95805019639..ca12a6bb82b1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2095,6 +2095,52 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 }
 EXPORT_SYMBOL(generic_write_end);
 
+/*
+ * block_is_partially_uptodate checks whether buffers within a page are
+ * uptodate or not.
+ *
+ * Returns true if all buffers which correspond to a file portion
+ * we want to read are uptodate.
+ */
+int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
+					unsigned long from)
+{
+	struct inode *inode = page->mapping->host;
+	unsigned block_start, block_end, blocksize;
+	unsigned to;
+	struct buffer_head *bh, *head;
+	int ret = 1;
+
+	if (!page_has_buffers(page))
+		return 0;
+
+	blocksize = 1 << inode->i_blkbits;
+	to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
+	to = from + to;
+	if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
+		return 0;
+
+	head = page_buffers(page);
+	bh = head;
+	block_start = 0;
+	do {
+		block_end = block_start + blocksize;
+		if (block_end > from && block_start < to) {
+			if (!buffer_uptodate(bh)) {
+				ret = 0;
+				break;
+			}
+			if (block_end >= to)
+				break;
+		}
+		block_start = block_end;
+		bh = bh->b_this_page;
+	} while (bh != head);
+
+	return ret;
+}
+EXPORT_SYMBOL(block_is_partially_uptodate);
+
 /*
  * Generic "read page" function for block devices that have the normal
  * get_block functionality. This is most of the block device filesystems.
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 384fc0d1dd74..991d6dfeb51f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -791,6 +791,7 @@ const struct address_space_operations ext2_aops = {
 	.direct_IO		= ext2_direct_IO,
 	.writepages		= ext2_writepages,
 	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate	= block_is_partially_uptodate,
 };
 
 const struct address_space_operations ext2_aops_xip = {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3bf07d70b914..507d8689b111 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1767,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page)
 }
 
 static const struct address_space_operations ext3_ordered_aops = {
-	.readpage	= ext3_readpage,
-	.readpages	= ext3_readpages,
-	.writepage	= ext3_ordered_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext3_write_begin,
-	.write_end	= ext3_ordered_write_end,
-	.bmap		= ext3_bmap,
-	.invalidatepage	= ext3_invalidatepage,
-	.releasepage	= ext3_releasepage,
-	.direct_IO	= ext3_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext3_readpage,
+	.readpages		= ext3_readpages,
+	.writepage		= ext3_ordered_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext3_write_begin,
+	.write_end		= ext3_ordered_write_end,
+	.bmap			= ext3_bmap,
+	.invalidatepage		= ext3_invalidatepage,
+	.releasepage		= ext3_releasepage,
+	.direct_IO		= ext3_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext3_writeback_aops = {
-	.readpage	= ext3_readpage,
-	.readpages	= ext3_readpages,
-	.writepage	= ext3_writeback_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext3_write_begin,
-	.write_end	= ext3_writeback_write_end,
-	.bmap		= ext3_bmap,
-	.invalidatepage	= ext3_invalidatepage,
-	.releasepage	= ext3_releasepage,
-	.direct_IO	= ext3_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext3_readpage,
+	.readpages		= ext3_readpages,
+	.writepage		= ext3_writeback_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext3_write_begin,
+	.write_end		= ext3_writeback_write_end,
+	.bmap			= ext3_bmap,
+	.invalidatepage		= ext3_invalidatepage,
+	.releasepage		= ext3_releasepage,
+	.direct_IO		= ext3_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext3_journalled_aops = {
-	.readpage	= ext3_readpage,
-	.readpages	= ext3_readpages,
-	.writepage	= ext3_journalled_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext3_write_begin,
-	.write_end	= ext3_journalled_write_end,
-	.set_page_dirty	= ext3_journalled_set_page_dirty,
-	.bmap		= ext3_bmap,
-	.invalidatepage	= ext3_invalidatepage,
-	.releasepage	= ext3_releasepage,
+	.readpage		= ext3_readpage,
+	.readpages		= ext3_readpages,
+	.writepage		= ext3_journalled_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext3_write_begin,
+	.write_end		= ext3_journalled_write_end,
+	.set_page_dirty		= ext3_journalled_set_page_dirty,
+	.bmap			= ext3_bmap,
+	.invalidatepage		= ext3_invalidatepage,
+	.releasepage		= ext3_releasepage,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 void ext3_set_aops(struct inode *inode)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8ca2763df091..9843b046c235 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2806,59 +2806,63 @@ static int ext4_journalled_set_page_dirty(struct page *page)
 }
 
 static const struct address_space_operations ext4_ordered_aops = {
-	.readpage	= ext4_readpage,
-	.readpages	= ext4_readpages,
-	.writepage	= ext4_normal_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext4_write_begin,
-	.write_end	= ext4_ordered_write_end,
-	.bmap		= ext4_bmap,
-	.invalidatepage	= ext4_invalidatepage,
-	.releasepage	= ext4_releasepage,
-	.direct_IO	= ext4_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext4_readpage,
+	.readpages		= ext4_readpages,
+	.writepage		= ext4_normal_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext4_write_begin,
+	.write_end		= ext4_ordered_write_end,
+	.bmap			= ext4_bmap,
+	.invalidatepage		= ext4_invalidatepage,
+	.releasepage		= ext4_releasepage,
+	.direct_IO		= ext4_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext4_writeback_aops = {
-	.readpage	= ext4_readpage,
-	.readpages	= ext4_readpages,
-	.writepage	= ext4_normal_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext4_write_begin,
-	.write_end	= ext4_writeback_write_end,
-	.bmap		= ext4_bmap,
-	.invalidatepage	= ext4_invalidatepage,
-	.releasepage	= ext4_releasepage,
-	.direct_IO	= ext4_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext4_readpage,
+	.readpages		= ext4_readpages,
+	.writepage		= ext4_normal_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext4_write_begin,
+	.write_end		= ext4_writeback_write_end,
+	.bmap			= ext4_bmap,
+	.invalidatepage		= ext4_invalidatepage,
+	.releasepage		= ext4_releasepage,
+	.direct_IO		= ext4_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext4_journalled_aops = {
-	.readpage	= ext4_readpage,
-	.readpages	= ext4_readpages,
-	.writepage	= ext4_journalled_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext4_write_begin,
-	.write_end	= ext4_journalled_write_end,
-	.set_page_dirty	= ext4_journalled_set_page_dirty,
-	.bmap		= ext4_bmap,
-	.invalidatepage	= ext4_invalidatepage,
-	.releasepage	= ext4_releasepage,
+	.readpage		= ext4_readpage,
+	.readpages		= ext4_readpages,
+	.writepage		= ext4_journalled_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext4_write_begin,
+	.write_end		= ext4_journalled_write_end,
+	.set_page_dirty		= ext4_journalled_set_page_dirty,
+	.bmap			= ext4_bmap,
+	.invalidatepage		= ext4_invalidatepage,
+	.releasepage		= ext4_releasepage,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext4_da_aops = {
-	.readpage	= ext4_readpage,
-	.readpages	= ext4_readpages,
-	.writepage	= ext4_da_writepage,
-	.writepages	= ext4_da_writepages,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext4_da_write_begin,
-	.write_end	= ext4_da_write_end,
-	.bmap		= ext4_bmap,
-	.invalidatepage	= ext4_da_invalidatepage,
-	.releasepage	= ext4_releasepage,
-	.direct_IO	= ext4_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext4_readpage,
+	.readpages		= ext4_readpages,
+	.writepage		= ext4_da_writepage,
+	.writepages		= ext4_da_writepages,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext4_da_write_begin,
+	.write_end		= ext4_da_write_end,
+	.bmap			= ext4_bmap,
+	.invalidatepage		= ext4_da_invalidatepage,
+	.releasepage		= ext4_releasepage,
+	.direct_IO		= ext4_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 void ext4_set_aops(struct inode *inode)
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 82aa36c53ea7..50cfe8ceb478 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -205,6 +205,8 @@ void block_invalidatepage(struct page *page, unsigned long offset);
 int block_write_full_page(struct page *page, get_block_t *get_block,
 				struct writeback_control *wbc);
 int block_read_full_page(struct page*, get_block_t*);
+int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
+				unsigned long from);
 int block_write_begin(struct file *, struct address_space *,
 				loff_t, unsigned, unsigned,
 				struct page **, void **, get_block_t*);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8252b045e624..580b513668fe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -443,6 +443,27 @@ static inline size_t iov_iter_count(struct iov_iter *i)
 	return i->count;
 }
 
+/*
+ * "descriptor" for what we're up to with a read.
+ * This allows us to use the same read code yet
+ * have multiple different users of the data that
+ * we read from a file.
+ *
+ * The simplest case just copies the data to user
+ * mode.
+ */
+typedef struct {
+	size_t written;
+	size_t count;
+	union {
+		char __user *buf;
+		void *data;
+	} arg;
+	int error;
+} read_descriptor_t;
+
+typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
+		unsigned long, unsigned long);
 
 struct address_space_operations {
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
@@ -484,6 +505,8 @@ struct address_space_operations {
 	int (*migratepage) (struct address_space *,
 			struct page *, struct page *);
 	int (*launder_page) (struct page *);
+	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
+					unsigned long);
 };
 
 /*
@@ -1198,27 +1221,6 @@ struct block_device_operations {
 	struct module *owner;
 };
 
-/*
- * "descriptor" for what we're up to with a read.
- * This allows us to use the same read code yet
- * have multiple different users of the data that
- * we read from a file.
- *
- * The simplest case just copies the data to user
- * mode.
- */
-typedef struct {
-	size_t written;
-	size_t count;
-	union {
-		char __user * buf;
-		void *data;
-	} arg;
-	int error;
-} read_descriptor_t;
-
-typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long);
-
 /* These macros are for out of kernel modules to test that
  * the kernel supports the unlocked_ioctl and compat_ioctl
  * fields in struct file_operations. */
diff --git a/mm/filemap.c b/mm/filemap.c
index 5de7633e1dbe..42bbc6909ba4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1023,8 +1023,17 @@ find_page:
 					ra, filp, page,
 					index, last_index - index);
 		}
-		if (!PageUptodate(page))
-			goto page_not_up_to_date;
+		if (!PageUptodate(page)) {
+			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
+					!mapping->a_ops->is_partially_uptodate)
+				goto page_not_up_to_date;
+			if (TestSetPageLocked(page))
+				goto page_not_up_to_date;
+			if (!mapping->a_ops->is_partially_uptodate(page,
+								desc, offset))
+				goto page_not_up_to_date_locked;
+			unlock_page(page);
+		}
 page_ok:
 		/*
 		 * i_size must be checked after we know the page is Uptodate.
@@ -1094,6 +1103,7 @@ page_not_up_to_date:
 		if (lock_page_killable(page))
 			goto readpage_eio;
 
+page_not_up_to_date_locked:
 		/* Did it get truncated before we got the lock? */
 		if (!page->mapping) {
 			unlock_page(page);
-- 
cgit v1.2.3


From 424f525a1241351da947fb48a938128ddd774511 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Tue, 29 Jul 2008 01:30:26 +0200
Subject: mfd: accept pure device as a parent, not only platform_device

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/mfd-core.c   | 14 +++++++-------
 drivers/mfd/tc6393xb.c   |  4 ++--
 include/linux/mfd/core.h |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index ad4e4d16a36a..9c9c126ed334 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -15,7 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 
-static int mfd_add_device(struct platform_device *parent,
+static int mfd_add_device(struct device *parent, int id,
 			  const struct mfd_cell *cell,
 			  struct resource *mem_base,
 			  int irq_base)
@@ -25,11 +25,11 @@ static int mfd_add_device(struct platform_device *parent,
 	int ret = -ENOMEM;
 	int r;
 
-	pdev = platform_device_alloc(cell->name, parent->id);
+	pdev = platform_device_alloc(cell->name, id);
 	if (!pdev)
 		goto fail_alloc;
 
-	pdev->dev.parent = &parent->dev;
+	pdev->dev.parent = parent;
 
 	ret = platform_device_add_data(pdev,
 			cell->platform_data, cell->data_size);
@@ -75,7 +75,7 @@ fail_alloc:
 	return ret;
 }
 
-int mfd_add_devices(struct platform_device *parent,
+int mfd_add_devices(struct device *parent, int id,
 		    const struct mfd_cell *cells, int n_devs,
 		    struct resource *mem_base,
 		    int irq_base)
@@ -84,7 +84,7 @@ int mfd_add_devices(struct platform_device *parent,
 	int ret = 0;
 
 	for (i = 0; i < n_devs; i++) {
-		ret = mfd_add_device(parent, cells + i, mem_base, irq_base);
+		ret = mfd_add_device(parent, id, cells + i, mem_base, irq_base);
 		if (ret)
 			break;
 	}
@@ -102,9 +102,9 @@ static int mfd_remove_devices_fn(struct device *dev, void *unused)
 	return 0;
 }
 
-void mfd_remove_devices(struct platform_device *parent)
+void mfd_remove_devices(struct device *parent)
 {
-	device_for_each_child(&parent->dev, NULL, mfd_remove_devices_fn);
+	device_for_each_child(parent, NULL, mfd_remove_devices_fn);
 }
 EXPORT_SYMBOL(mfd_remove_devices);
 
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 9908aaa4881a..f4fd797c1590 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -471,7 +471,7 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 	tc6393xb_cells[TC6393XB_CELL_NAND].data_size =
 		sizeof(tc6393xb_cells[TC6393XB_CELL_NAND]);
 
-	retval = mfd_add_devices(dev,
+	retval = mfd_add_devices(&dev->dev, dev->id,
 			tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells),
 			iomem, tcpd->irq_base);
 
@@ -505,7 +505,7 @@ static int __devexit tc6393xb_remove(struct platform_device *dev)
 	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
 	int ret;
 
-	mfd_remove_devices(dev);
+	mfd_remove_devices(&dev->dev);
 
 	if (tc6393xb->irq)
 		tc6393xb_detach_irq(dev);
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index ea45d4a5a2ac..49ef857cdb2d 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -45,11 +45,11 @@ struct mfd_cell {
 	const struct resource	*resources;
 };
 
-extern int mfd_add_devices(struct platform_device *parent,
+extern int mfd_add_devices(struct device *parent, int id,
 			   const struct mfd_cell *cells, int n_devs,
 			   struct resource *mem_base,
 			   int irq_base);
 
-extern void mfd_remove_devices(struct platform_device *parent);
+extern void mfd_remove_devices(struct device *parent);
 
 #endif
-- 
cgit v1.2.3


From e930bffe95e1e886a1ede80726ea38df5838d067 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@qumranet.com>
Date: Fri, 25 Jul 2008 16:24:52 +0200
Subject: KVM: Synchronize guest physical memory map to host virtual memory map

Synchronize changes to host virtual addresses which are part of
a KVM memory slot to the KVM shadow mmu.  This allows pte operations
like swapping, page migration, and madvise() to transparently work
with KVM.

Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c         | 100 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/paging_tmpl.h |  12 ++++
 include/asm-x86/kvm_host.h |   6 ++
 include/linux/kvm_host.h   |  24 ++++++++
 virt/kvm/kvm_main.c        | 135 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 277 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2fa231923cf7..0bfe2bd305eb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -653,6 +653,84 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 	account_shadowed(kvm, gfn);
 }
 
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+	u64 *spte;
+	int need_tlb_flush = 0;
+
+	while ((spte = rmap_next(kvm, rmapp, NULL))) {
+		BUG_ON(!(*spte & PT_PRESENT_MASK));
+		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
+		rmap_remove(kvm, spte);
+		set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+		need_tlb_flush = 1;
+	}
+	return need_tlb_flush;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
+{
+	int i;
+	int retval = 0;
+
+	/*
+	 * If mmap_sem isn't taken, we can look the memslots with only
+	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
+	 */
+	for (i = 0; i < kvm->nmemslots; i++) {
+		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+		unsigned long start = memslot->userspace_addr;
+		unsigned long end;
+
+		/* mmu_lock protects userspace_addr */
+		if (!start)
+			continue;
+
+		end = start + (memslot->npages << PAGE_SHIFT);
+		if (hva >= start && hva < end) {
+			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+			retval |= handler(kvm, &memslot->rmap[gfn_offset]);
+			retval |= handler(kvm,
+					  &memslot->lpage_info[
+						  gfn_offset /
+						  KVM_PAGES_PER_HPAGE].rmap_pde);
+		}
+	}
+
+	return retval;
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+	return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+	u64 *spte;
+	int young = 0;
+
+	spte = rmap_next(kvm, rmapp, NULL);
+	while (spte) {
+		int _young;
+		u64 _spte = *spte;
+		BUG_ON(!(_spte & PT_PRESENT_MASK));
+		_young = _spte & PT_ACCESSED_MASK;
+		if (_young) {
+			young = 1;
+			clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
+		}
+		spte = rmap_next(kvm, rmapp, spte);
+	}
+	return young;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+}
+
 #ifdef MMU_DEBUG
 static int is_empty_shadow_page(u64 *spt)
 {
@@ -1203,6 +1281,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 	int r;
 	int largepage = 0;
 	pfn_t pfn;
+	unsigned long mmu_seq;
 
 	down_read(&current->mm->mmap_sem);
 	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
@@ -1210,6 +1289,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 		largepage = 1;
 	}
 
+	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	/* implicit mb(), we'll read before PT lock is unlocked */
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
 	up_read(&current->mm->mmap_sem);
 
@@ -1220,6 +1301,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 	}
 
 	spin_lock(&vcpu->kvm->mmu_lock);
+	if (mmu_notifier_retry(vcpu, mmu_seq))
+		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
 	r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
 			 PT32E_ROOT_LEVEL);
@@ -1227,6 +1310,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 
 
 	return r;
+
+out_unlock:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	kvm_release_pfn_clean(pfn);
+	return 0;
 }
 
 
@@ -1345,6 +1433,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 	int r;
 	int largepage = 0;
 	gfn_t gfn = gpa >> PAGE_SHIFT;
+	unsigned long mmu_seq;
 
 	ASSERT(vcpu);
 	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -1358,6 +1447,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
 		largepage = 1;
 	}
+	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	/* implicit mb(), we'll read before PT lock is unlocked */
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
 	up_read(&current->mm->mmap_sem);
 	if (is_error_pfn(pfn)) {
@@ -1365,12 +1456,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 		return 1;
 	}
 	spin_lock(&vcpu->kvm->mmu_lock);
+	if (mmu_notifier_retry(vcpu, mmu_seq))
+		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
 	r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
 			 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 	return r;
+
+out_unlock:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	kvm_release_pfn_clean(pfn);
+	return 0;
 }
 
 static void nonpaging_free(struct kvm_vcpu *vcpu)
@@ -1670,6 +1768,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
 		vcpu->arch.update_pte.largepage = 1;
 	}
+	vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	/* implicit mb(), we'll read before PT lock is unlocked */
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
 	up_read(&current->mm->mmap_sem);
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 4d918220baeb..f72ac1fa35f0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 	pfn = vcpu->arch.update_pte.pfn;
 	if (is_error_pfn(pfn))
 		return;
+	if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
+		return;
 	kvm_get_pfn(pfn);
 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
 		     gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
@@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	int r;
 	pfn_t pfn;
 	int largepage = 0;
+	unsigned long mmu_seq;
 
 	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 	kvm_mmu_audit(vcpu, "pre page fault");
@@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 			largepage = 1;
 		}
 	}
+	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+	/* implicit mb(), we'll read before PT lock is unlocked */
 	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
 	up_read(&current->mm->mmap_sem);
 
@@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	}
 
 	spin_lock(&vcpu->kvm->mmu_lock);
+	if (mmu_notifier_retry(vcpu, mmu_seq))
+		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
 	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
 				  largepage, &write_pt, pfn);
@@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 	return write_pt;
+
+out_unlock:
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	kvm_release_pfn_clean(pfn);
+	return 0;
 }
 
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index bc34dc21f178..0f3c53114614 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -13,6 +13,7 @@
 
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/mmu_notifier.h>
 
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
@@ -251,6 +252,7 @@ struct kvm_vcpu_arch {
 		gfn_t gfn;	/* presumed gfn during guest pte update */
 		pfn_t pfn;	/* pfn corresponding to that gfn */
 		int largepage;
+		unsigned long mmu_seq;
 	} update_pte;
 
 	struct i387_fxsave_struct host_fx_image;
@@ -729,4 +731,8 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 	KVM_EX_ENTRY " 666b, 667b \n\t" \
 	".popsection"
 
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 07d68a8ae8e9..8525afc53107 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -121,6 +121,12 @@ struct kvm {
 	struct kvm_coalesced_mmio_dev *coalesced_mmio_dev;
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 #endif
+
+#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+	struct mmu_notifier mmu_notifier;
+	unsigned long mmu_notifier_seq;
+	long mmu_notifier_count;
+#endif
 };
 
 /* The guest did something we don't support. */
@@ -332,4 +338,22 @@ int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
 #define kvm_trace_cleanup() ((void)0)
 #endif
 
+#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq)
+{
+	if (unlikely(vcpu->kvm->mmu_notifier_count))
+		return 1;
+	/*
+	 * Both reads happen under the mmu_lock and both values are
+	 * modified under mmu_lock, so there's no need of smb_rmb()
+	 * here in between, otherwise mmu_notifier_count should be
+	 * read before mmu_notifier_seq, see
+	 * mmu_notifier_invalidate_range_end write side.
+	 */
+	if (vcpu->kvm->mmu_notifier_seq != mmu_seq)
+		return 1;
+	return 0;
+}
+#endif
+
 #endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3735212cd3f8..7dd9b0b85e4e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -192,6 +192,123 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
 
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
+{
+	return container_of(mn, struct kvm, mmu_notifier);
+}
+
+static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
+					     struct mm_struct *mm,
+					     unsigned long address)
+{
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	int need_tlb_flush;
+
+	/*
+	 * When ->invalidate_page runs, the linux pte has been zapped
+	 * already but the page is still allocated until
+	 * ->invalidate_page returns. So if we increase the sequence
+	 * here the kvm page fault will notice if the spte can't be
+	 * established because the page is going to be freed. If
+	 * instead the kvm page fault establishes the spte before
+	 * ->invalidate_page runs, kvm_unmap_hva will release it
+	 * before returning.
+	 *
+	 * The sequence increase only need to be seen at spin_unlock
+	 * time, and not at spin_lock time.
+	 *
+	 * Increasing the sequence after the spin_unlock would be
+	 * unsafe because the kvm page fault could then establish the
+	 * pte after kvm_unmap_hva returned, without noticing the page
+	 * is going to be freed.
+	 */
+	spin_lock(&kvm->mmu_lock);
+	kvm->mmu_notifier_seq++;
+	need_tlb_flush = kvm_unmap_hva(kvm, address);
+	spin_unlock(&kvm->mmu_lock);
+
+	/* we've to flush the tlb before the pages can be freed */
+	if (need_tlb_flush)
+		kvm_flush_remote_tlbs(kvm);
+
+}
+
+static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+						    struct mm_struct *mm,
+						    unsigned long start,
+						    unsigned long end)
+{
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	int need_tlb_flush = 0;
+
+	spin_lock(&kvm->mmu_lock);
+	/*
+	 * The count increase must become visible at unlock time as no
+	 * spte can be established without taking the mmu_lock and
+	 * count is also read inside the mmu_lock critical section.
+	 */
+	kvm->mmu_notifier_count++;
+	for (; start < end; start += PAGE_SIZE)
+		need_tlb_flush |= kvm_unmap_hva(kvm, start);
+	spin_unlock(&kvm->mmu_lock);
+
+	/* we've to flush the tlb before the pages can be freed */
+	if (need_tlb_flush)
+		kvm_flush_remote_tlbs(kvm);
+}
+
+static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
+						  struct mm_struct *mm,
+						  unsigned long start,
+						  unsigned long end)
+{
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+
+	spin_lock(&kvm->mmu_lock);
+	/*
+	 * This sequence increase will notify the kvm page fault that
+	 * the page that is going to be mapped in the spte could have
+	 * been freed.
+	 */
+	kvm->mmu_notifier_seq++;
+	/*
+	 * The above sequence increase must be visible before the
+	 * below count decrease but both values are read by the kvm
+	 * page fault under mmu_lock spinlock so we don't need to add
+	 * a smb_wmb() here in between the two.
+	 */
+	kvm->mmu_notifier_count--;
+	spin_unlock(&kvm->mmu_lock);
+
+	BUG_ON(kvm->mmu_notifier_count < 0);
+}
+
+static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
+					      struct mm_struct *mm,
+					      unsigned long address)
+{
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	int young;
+
+	spin_lock(&kvm->mmu_lock);
+	young = kvm_age_hva(kvm, address);
+	spin_unlock(&kvm->mmu_lock);
+
+	if (young)
+		kvm_flush_remote_tlbs(kvm);
+
+	return young;
+}
+
+static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
+	.invalidate_page	= kvm_mmu_notifier_invalidate_page,
+	.invalidate_range_start	= kvm_mmu_notifier_invalidate_range_start,
+	.invalidate_range_end	= kvm_mmu_notifier_invalidate_range_end,
+	.clear_flush_young	= kvm_mmu_notifier_clear_flush_young,
+};
+#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
+
 static struct kvm *kvm_create_vm(void)
 {
 	struct kvm *kvm = kvm_arch_create_vm();
@@ -212,6 +329,21 @@ static struct kvm *kvm_create_vm(void)
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
 #endif
 
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+	{
+		int err;
+		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
+		err = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
+		if (err) {
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+			put_page(page);
+#endif
+			kfree(kvm);
+			return ERR_PTR(err);
+		}
+	}
+#endif
+
 	kvm->mm = current->mm;
 	atomic_inc(&kvm->mm->mm_count);
 	spin_lock_init(&kvm->mmu_lock);
@@ -271,6 +403,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	if (kvm->coalesced_mmio_ring != NULL)
 		free_page((unsigned long)kvm->coalesced_mmio_ring);
+#endif
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
 #endif
 	kvm_arch_destroy_vm(kvm);
 	mmdrop(mm);
-- 
cgit v1.2.3


From ed8486243379ef3e6c61363df915882945c0eaec Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Tue, 29 Jul 2008 11:30:57 +0300
Subject: KVM: Advertise synchronized mmu support to userspace

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c  | 1 +
 include/linux/kvm.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c7b01efe0646..0d682fc6aeb3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -883,6 +883,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PIT:
 	case KVM_CAP_NOP_IO_DELAY:
 	case KVM_CAP_MP_STATE:
+	case KVM_CAP_SYNC_MMU:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0ea064cbfbc8..69511f74f912 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -371,6 +371,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_PV_MMU 13
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
+#define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 
 /*
  * ioctls for VM fds
-- 
cgit v1.2.3


From 8978b74253280d59e97cf49a3ec2c0cbccd5b801 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 29 Jul 2008 13:38:53 +0900
Subject: generic, x86: fix add iommu_num_pages helper function

This IOMMU helper function doesn't work for some architectures:

  http://marc.info/?l=linux-kernel&m=121699304403202&w=2

It also breaks POWER and SPARC builds:

  http://marc.info/?l=linux-kernel&m=121730388001890&w=2

Currently, only x86 IOMMUs use this so let's move it to x86 for
now.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-dma.c    | 8 ++++++++
 include/asm-x86/iommu.h      | 2 ++
 include/linux/iommu-helper.h | 1 -
 lib/iommu-helper.c           | 8 --------
 4 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 8dbffb846de9..87d4d6964ec2 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void)
 
 	pci_swiotlb_init();
 }
+
+unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
+{
+	unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
+
+	return size >> PAGE_SHIFT;
+}
+EXPORT_SYMBOL(iommu_num_pages);
 #endif
 
 /*
diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index ecc8061904a9..5f888cc5be49 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h
@@ -7,6 +7,8 @@ extern struct dma_mapping_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 
+extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len);
+
 #ifdef CONFIG_GART_IOMMU
 extern int gart_iommu_aperture;
 extern int gart_iommu_aperture_allowed;
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index f8598f583944..c975caf75385 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -8,4 +8,3 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 				      unsigned long align_mask);
 extern void iommu_area_free(unsigned long *map, unsigned long start,
 			    unsigned int nr);
-extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len);
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 889ddce2021e..a3b8d4c3f77a 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -80,11 +80,3 @@ void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
 	}
 }
 EXPORT_SYMBOL(iommu_area_free);
-
-unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
-{
-	unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
-
-	return size >> PAGE_SHIFT;
-}
-EXPORT_SYMBOL(iommu_num_pages);
-- 
cgit v1.2.3


From 1795cf48b322b4d19230a40dbe7181acedd34a94 Mon Sep 17 00:00:00 2001
From: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Date: Tue, 29 Jul 2008 22:10:56 +0900
Subject: sh/maple: clean maple bus code

This patch cleans up the handling of the maple bus queue to remove
the risk of races when adding packets. It also removes references to the
redundant connect and disconnect functions.

Signed-off-by: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/maple/maple.c | 265 ++++++++++++++++++++++++++++++++---------------
 include/linux/maple.h    |   6 +-
 2 files changed, 189 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
index 617efb1640b1..be97789fa5fd 100644
--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -24,13 +24,12 @@
 #include <linux/slab.h>
 #include <linux/maple.h>
 #include <linux/dma-mapping.h>
+#include <linux/delay.h>
 #include <asm/cacheflush.h>
 #include <asm/dma.h>
 #include <asm/io.h>
-#include <asm/mach/dma.h>
-#include <asm/mach/sysasic.h>
-#include <asm/mach/maple.h>
-#include <linux/delay.h>
+#include <mach/dma.h>
+#include <mach/sysasic.h>
 
 MODULE_AUTHOR("Yaegshi Takeshi, Paul Mundt, M.R. Brown, Adrian McMenamin");
 MODULE_DESCRIPTION("Maple bus driver for Dreamcast");
@@ -46,14 +45,15 @@ static DECLARE_WORK(maple_vblank_process, maple_vblank_handler);
 static LIST_HEAD(maple_waitq);
 static LIST_HEAD(maple_sentq);
 
-static DEFINE_MUTEX(maple_list_lock);
+/* mutex to protect queue of waiting packets */
+static DEFINE_MUTEX(maple_wlist_lock);
 
 static struct maple_driver maple_dummy_driver;
 static struct device maple_bus;
 static int subdevice_map[MAPLE_PORTS];
 static unsigned long *maple_sendbuf, *maple_sendptr, *maple_lastptr;
 static unsigned long maple_pnp_time;
-static int started, scanning, liststatus, fullscan;
+static int started, scanning, fullscan;
 static struct kmem_cache *maple_queue_cache;
 
 struct maple_device_specify {
@@ -129,35 +129,124 @@ static void maple_release_device(struct device *dev)
 	kfree(mdev);
 }
 
-/**
+/*
  * maple_add_packet - add a single instruction to the queue
- * @mq: instruction to add to waiting queue
+ * @mdev - maple device
+ * @function - function on device being queried
+ * @command - maple command to add
+ * @length - length of command string (in 32 bit words)
+ * @data - remainder of command string
  */
-void maple_add_packet(struct mapleq *mq)
+int maple_add_packet(struct maple_device *mdev, u32 function, u32 command,
+	size_t length, void *data)
 {
-	mutex_lock(&maple_list_lock);
-	list_add(&mq->list, &maple_waitq);
-	mutex_unlock(&maple_list_lock);
+	int locking, ret = 0;
+	void *sendbuf = NULL;
+
+	mutex_lock(&maple_wlist_lock);
+	/* bounce if device already locked */
+	locking = mutex_is_locked(&mdev->mq->mutex);
+	if (locking) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	mutex_lock(&mdev->mq->mutex);
+
+	if (length) {
+		sendbuf = kmalloc(length * 4, GFP_KERNEL);
+		if (!sendbuf) {
+			mutex_unlock(&mdev->mq->mutex);
+			ret = -ENOMEM;
+			goto out;
+		}
+		((__be32 *)sendbuf)[0] = cpu_to_be32(function);
+	}
+
+	mdev->mq->command = command;
+	mdev->mq->length = length;
+	if (length > 1)
+		memcpy(sendbuf + 4, data, (length - 1) * 4);
+	mdev->mq->sendbuf = sendbuf;
+
+	list_add(&mdev->mq->list, &maple_waitq);
+out:
+	mutex_unlock(&maple_wlist_lock);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(maple_add_packet);
 
+/*
+ * maple_add_packet_sleeps - add a single instruction to the queue
+ *  - waits for lock to be free
+ * @mdev - maple device
+ * @function - function on device being queried
+ * @command - maple command to add
+ * @length - length of command string (in 32 bit words)
+ * @data - remainder of command string
+ */
+int maple_add_packet_sleeps(struct maple_device *mdev, u32 function,
+	u32 command, size_t length, void *data)
+{
+	int locking, ret = 0;
+	void *sendbuf = NULL;
+
+	locking = mutex_lock_interruptible(&mdev->mq->mutex);
+	if (locking) {
+		ret = -EIO;
+		goto out;
+	}
+
+	if (length) {
+		sendbuf = kmalloc(length * 4, GFP_KERNEL);
+		if (!sendbuf) {
+			mutex_unlock(&mdev->mq->mutex);
+			ret = -ENOMEM;
+			goto out;
+		}
+		((__be32 *)sendbuf)[0] = cpu_to_be32(function);
+	}
+
+	mdev->mq->command = command;
+	mdev->mq->length = length;
+	if (length > 1)
+		memcpy(sendbuf + 4, data, (length - 1) * 4);
+	mdev->mq->sendbuf = sendbuf;
+
+	mutex_lock(&maple_wlist_lock);
+	list_add(&mdev->mq->list, &maple_waitq);
+	mutex_unlock(&maple_wlist_lock);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(maple_add_packet_sleeps);
+
 static struct mapleq *maple_allocq(struct maple_device *mdev)
 {
 	struct mapleq *mq;
 
 	mq = kmalloc(sizeof(*mq), GFP_KERNEL);
 	if (!mq)
-		return NULL;
+		goto failed_nomem;
 
 	mq->dev = mdev;
 	mq->recvbufdcsp = kmem_cache_zalloc(maple_queue_cache, GFP_KERNEL);
 	mq->recvbuf = (void *) P2SEGADDR(mq->recvbufdcsp);
-	if (!mq->recvbuf) {
-		kfree(mq);
-		return NULL;
-	}
+	if (!mq->recvbuf)
+		goto failed_p2;
+	/*
+	 * most devices do not need the mutex - but
+	 * anything that injects block reads or writes
+	 * will rely on it
+	 */
+	mutex_init(&mq->mutex);
 
 	return mq;
+
+failed_p2:
+	kfree(mq);
+failed_nomem:
+	return NULL;
 }
 
 static struct maple_device *maple_alloc_dev(int port, int unit)
@@ -178,7 +267,6 @@ static struct maple_device *maple_alloc_dev(int port, int unit)
 	}
 	mdev->dev.bus = &maple_bus_type;
 	mdev->dev.parent = &maple_bus;
-	mdev->function = 0;
 	return mdev;
 }
 
@@ -216,7 +304,6 @@ static void maple_build_block(struct mapleq *mq)
 	*maple_sendptr++ = PHYSADDR(mq->recvbuf);
 	*maple_sendptr++ =
 	    mq->command | (to << 8) | (from << 16) | (len << 24);
-
 	while (len-- > 0)
 		*maple_sendptr++ = *lsendbuf++;
 }
@@ -224,22 +311,27 @@ static void maple_build_block(struct mapleq *mq)
 /* build up command queue */
 static void maple_send(void)
 {
-	int i;
-	int maple_packets;
+	int i, maple_packets = 0;
 	struct mapleq *mq, *nmq;
 
 	if (!list_empty(&maple_sentq))
 		return;
-	if (list_empty(&maple_waitq) || !maple_dma_done())
+	mutex_lock(&maple_wlist_lock);
+	if (list_empty(&maple_waitq) || !maple_dma_done()) {
+		mutex_unlock(&maple_wlist_lock);
 		return;
-	maple_packets = 0;
-	maple_sendptr = maple_lastptr = maple_sendbuf;
+	}
+	mutex_unlock(&maple_wlist_lock);
+	maple_lastptr = maple_sendbuf;
+	maple_sendptr = maple_sendbuf;
+	mutex_lock(&maple_wlist_lock);
 	list_for_each_entry_safe(mq, nmq, &maple_waitq, list) {
 		maple_build_block(mq);
 		list_move(&mq->list, &maple_sentq);
 		if (maple_packets++ > MAPLE_MAXPACKETS)
 			break;
 	}
+	mutex_unlock(&maple_wlist_lock);
 	if (maple_packets > 0) {
 		for (i = 0; i < (1 << MAPLE_DMA_PAGES); i++)
 			dma_cache_sync(0, maple_sendbuf + i * PAGE_SIZE,
@@ -247,7 +339,8 @@ static void maple_send(void)
 	}
 }
 
-static int attach_matching_maple_driver(struct device_driver *driver,
+/* check if there is a driver registered likely to match this device */
+static int check_matching_maple_driver(struct device_driver *driver,
 					void *devptr)
 {
 	struct maple_driver *maple_drv;
@@ -255,12 +348,8 @@ static int attach_matching_maple_driver(struct device_driver *driver,
 
 	mdev = devptr;
 	maple_drv = to_maple_driver(driver);
-	if (mdev->devinfo.function & be32_to_cpu(maple_drv->function)) {
-		if (maple_drv->connect(mdev) == 0) {
-			mdev->driver = maple_drv;
-			return 1;
-		}
-	}
+	if (mdev->devinfo.function & cpu_to_be32(maple_drv->function))
+		return 1;
 	return 0;
 }
 
@@ -268,11 +357,6 @@ static void maple_detach_driver(struct maple_device *mdev)
 {
 	if (!mdev)
 		return;
-	if (mdev->driver) {
-		if (mdev->driver->disconnect)
-			mdev->driver->disconnect(mdev);
-	}
-	mdev->driver = NULL;
 	device_unregister(&mdev->dev);
 	mdev = NULL;
 }
@@ -328,8 +412,8 @@ static void maple_attach_driver(struct maple_device *mdev)
 			mdev->port, mdev->unit, function);
 
 		matched =
-		    bus_for_each_drv(&maple_bus_type, NULL, mdev,
-				     attach_matching_maple_driver);
+			bus_for_each_drv(&maple_bus_type, NULL, mdev,
+				check_matching_maple_driver);
 
 		if (matched == 0) {
 			/* Driver does not exist yet */
@@ -373,45 +457,48 @@ static int detach_maple_device(struct device *device, void *portptr)
 
 static int setup_maple_commands(struct device *device, void *ignored)
 {
+	int add;
 	struct maple_device *maple_dev = to_maple_dev(device);
 
 	if ((maple_dev->interval > 0)
 	    && time_after(jiffies, maple_dev->when)) {
-		maple_dev->when = jiffies + maple_dev->interval;
-		maple_dev->mq->command = MAPLE_COMMAND_GETCOND;
-		maple_dev->mq->sendbuf = &maple_dev->function;
-		maple_dev->mq->length = 1;
-		maple_add_packet(maple_dev->mq);
-		liststatus++;
+		/* bounce if we cannot lock */
+		add = maple_add_packet(maple_dev,
+			be32_to_cpu(maple_dev->devinfo.function),
+			MAPLE_COMMAND_GETCOND, 1, NULL);
+		if (!add)
+			maple_dev->when = jiffies + maple_dev->interval;
 	} else {
-		if (time_after(jiffies, maple_pnp_time)) {
-			maple_dev->mq->command = MAPLE_COMMAND_DEVINFO;
-			maple_dev->mq->length = 0;
-			maple_add_packet(maple_dev->mq);
-			liststatus++;
-		}
+		if (time_after(jiffies, maple_pnp_time))
+			/* This will also bounce */
+			maple_add_packet(maple_dev, 0,
+				MAPLE_COMMAND_DEVINFO, 0, NULL);
 	}
-
 	return 0;
 }
 
 /* VBLANK bottom half - implemented via workqueue */
 static void maple_vblank_handler(struct work_struct *work)
 {
-	if (!maple_dma_done())
-		return;
-	if (!list_empty(&maple_sentq))
+	if (!list_empty(&maple_sentq) || !maple_dma_done())
 		return;
+
 	ctrl_outl(0, MAPLE_ENABLE);
-	liststatus = 0;
+
 	bus_for_each_dev(&maple_bus_type, NULL, NULL,
 			 setup_maple_commands);
+
 	if (time_after(jiffies, maple_pnp_time))
 		maple_pnp_time = jiffies + MAPLE_PNP_INTERVAL;
-	if (liststatus && list_empty(&maple_sentq)) {
-		INIT_LIST_HEAD(&maple_sentq);
+
+	mutex_lock(&maple_wlist_lock);
+	if (!list_empty(&maple_waitq) && list_empty(&maple_sentq)) {
+		mutex_unlock(&maple_wlist_lock);
 		maple_send();
+	} else {
+		mutex_unlock(&maple_wlist_lock);
 	}
+
 	maplebus_dma_reset();
 }
 
@@ -422,8 +509,8 @@ static void maple_map_subunits(struct maple_device *mdev, int submask)
 	struct maple_device *mdev_add;
 	struct maple_device_specify ds;
 
+	ds.port = mdev->port;
 	for (k = 0; k < 5; k++) {
-		ds.port = mdev->port;
 		ds.unit = k + 1;
 		retval =
 		    bus_for_each_dev(&maple_bus_type, NULL, &ds,
@@ -437,9 +524,9 @@ static void maple_map_subunits(struct maple_device *mdev, int submask)
 			mdev_add = maple_alloc_dev(mdev->port, k + 1);
 			if (!mdev_add)
 				return;
-			mdev_add->mq->command = MAPLE_COMMAND_DEVINFO;
-			mdev_add->mq->length = 0;
-			maple_add_packet(mdev_add->mq);
+			maple_add_packet(mdev_add, 0, MAPLE_COMMAND_DEVINFO,
+				0, NULL);
+			/* mark that we are checking sub devices */
 			scanning = 1;
 		}
 		submask = submask >> 1;
@@ -505,6 +592,28 @@ static void maple_response_devinfo(struct maple_device *mdev,
 	}
 }
 
+static void maple_port_rescan(void)
+{
+	int i;
+	struct maple_device *mdev;
+
+	fullscan = 1;
+	for (i = 0; i < MAPLE_PORTS; i++) {
+		if (checked[i] == false) {
+			fullscan = 0;
+			mdev = baseunits[i];
+			/*
+			 *  test lock in case scan has failed
+			 *  but device is still locked
+			 */
+			if (mutex_is_locked(&mdev->mq->mutex))
+				mutex_unlock(&mdev->mq->mutex);
+			maple_add_packet(mdev, 0, MAPLE_COMMAND_DEVINFO,
+				0, NULL);
+		}
+	}
+}
+
 /* maple dma end bottom half - implemented via workqueue */
 static void maple_dma_handler(struct work_struct *work)
 {
@@ -512,7 +621,6 @@ static void maple_dma_handler(struct work_struct *work)
 	struct maple_device *dev;
 	char *recvbuf;
 	enum maple_code code;
-	int i;
 
 	if (!maple_dma_done())
 		return;
@@ -522,6 +630,10 @@ static void maple_dma_handler(struct work_struct *work)
 			recvbuf = mq->recvbuf;
 			code = recvbuf[0];
 			dev = mq->dev;
+			kfree(mq->sendbuf);
+			mutex_unlock(&mq->mutex);
+			list_del_init(&mq->list);
+
 			switch (code) {
 			case MAPLE_RESPONSE_NONE:
 				maple_response_none(dev, mq);
@@ -558,26 +670,16 @@ static void maple_dma_handler(struct work_struct *work)
 				break;
 			}
 		}
-		INIT_LIST_HEAD(&maple_sentq);
+		/* if scanning is 1 then we have subdevices to check */
 		if (scanning == 1) {
 			maple_send();
 			scanning = 2;
 		} else
 			scanning = 0;
-
-		if (!fullscan) {
-			fullscan = 1;
-			for (i = 0; i < MAPLE_PORTS; i++) {
-				if (checked[i] == false) {
-					fullscan = 0;
-					dev = baseunits[i];
-					dev->mq->command =
-						MAPLE_COMMAND_DEVINFO;
-					dev->mq->length = 0;
-					maple_add_packet(dev->mq);
-				}
-			}
-		}
+		/*check if we have actually tested all ports yet */
+		if (!fullscan)
+			maple_port_rescan();
+		/* mark that we have been through the first scan */
 		if (started == 0)
 			started = 1;
 	}
@@ -631,7 +733,7 @@ static int match_maple_bus_driver(struct device *devptr,
 	if (maple_dev->devinfo.function == 0xFFFFFFFF)
 		return 0;
 	else if (maple_dev->devinfo.function &
-		 be32_to_cpu(maple_drv->function))
+		 cpu_to_be32(maple_drv->function))
 		return 1;
 	return 0;
 }
@@ -713,6 +815,9 @@ static int __init maple_bus_init(void)
 	if (!maple_queue_cache)
 		goto cleanup_bothirqs;
 
+	INIT_LIST_HEAD(&maple_waitq);
+	INIT_LIST_HEAD(&maple_sentq);
+
 	/* setup maple ports */
 	for (i = 0; i < MAPLE_PORTS; i++) {
 		checked[i] = false;
@@ -723,9 +828,7 @@ static int __init maple_bus_init(void)
 				maple_free_dev(mdev[i]);
 			goto cleanup_cache;
 		}
-		mdev[i]->mq->command = MAPLE_COMMAND_DEVINFO;
-		mdev[i]->mq->length = 0;
-		maple_add_packet(mdev[i]->mq);
+		maple_add_packet(mdev[i], 0, MAPLE_COMMAND_DEVINFO, 0, NULL);
 		subdevice_map[i] = 0;
 	}
 
diff --git a/include/linux/maple.h b/include/linux/maple.h
index 523a286bb477..c853b1066018 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -2,6 +2,7 @@
 #define __LINUX_MAPLE_H
 
 #include <linux/device.h>
+#include <mach/maple.h>
 
 extern struct bus_type maple_bus_type;
 
@@ -33,6 +34,7 @@ struct mapleq {
 	void *sendbuf, *recvbuf, *recvbufdcsp;
 	unsigned char length;
 	enum maple_code command;
+	struct mutex mutex;
 };
 
 struct maple_devinfo {
@@ -69,7 +71,9 @@ void maple_getcond_callback(struct maple_device *dev,
 			    unsigned long interval,
 			    unsigned long function);
 int maple_driver_register(struct device_driver *drv);
-void maple_add_packet(struct mapleq *mq);
+int maple_add_packet_sleeps(struct maple_device *mdev, u32 function,
+	u32 command, u32 length, void *data);
+void maple_clear_dev(struct maple_device *mdev);
 
 #define to_maple_dev(n) container_of(n, struct maple_device, dev)
 #define to_maple_driver(n) container_of(n, struct maple_driver, drv)
-- 
cgit v1.2.3


From f1b23361a0f15497d4c6795a2935b2e98064ddfb Mon Sep 17 00:00:00 2001
From: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Date: Mon, 21 Jul 2008 21:18:19 -0300
Subject: rfkill: document the rfkill struct locking (v2)

Reorder fields in struct rfkill and add comments to make it clear
which fields are protected by rfkill->mutex.

Signed-off-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index c5f6e54ec6ae..741d1a62cc3f 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -68,7 +68,8 @@ enum rfkill_state {
  * @user_claim_unsupported: Whether the hardware supports exclusive
  *	RF-kill control by userspace. Set this before registering.
  * @user_claim: Set when the switch is controlled exlusively by userspace.
- * @mutex: Guards switch state transitions
+ * @mutex: Guards switch state transitions.  It serializes callbacks
+ *	and also protects the state.
  * @data: Pointer to the RF button drivers private data which will be
  *	passed along when toggling radio state.
  * @toggle_radio(): Mandatory handler to control state of the radio.
@@ -89,12 +90,13 @@ struct rfkill {
 	const char *name;
 	enum rfkill_type type;
 
-	enum rfkill_state state;
 	bool user_claim_unsupported;
 	bool user_claim;
 
+	/* the mutex serializes callbacks and also protects
+	 * the state */
 	struct mutex mutex;
-
+	enum rfkill_state state;
 	void *data;
 	int (*toggle_radio)(void *data, enum rfkill_state state);
 	int (*get_state)(void *data, enum rfkill_state *state);
-- 
cgit v1.2.3


From d0f09804144fd9471a13cf4d80e66842c7fa114f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 29 Jul 2008 11:32:07 +0200
Subject: mac80211: partially fix skb->cb use

This patch fixes mac80211 to not use the skb->cb over the queue step
from virtual interfaces to the master. The patch also, for now,
disables aggregation because that would still require requeuing,
will fix that in a separate patch. There are two other places (software
requeue and powersaving stations) where requeue can happen, but that is
not currently used by any drivers/not possible to use respectively.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath5k/base.c           |  2 +-
 drivers/net/wireless/b43/xmit.c             |  2 +-
 drivers/net/wireless/b43legacy/xmit.c       |  2 +-
 drivers/net/wireless/iwlwifi/iwl-tx.c       |  2 +-
 drivers/net/wireless/iwlwifi/iwl3945-base.c |  2 +-
 drivers/net/wireless/rt2x00/rt2x00mac.c     |  2 +-
 include/linux/skbuff.h                      |  5 ++-
 include/net/mac80211.h                      |  6 ----
 net/core/skbuff.c                           |  3 ++
 net/mac80211/main.c                         |  8 +----
 net/mac80211/mlme.c                         |  8 ++---
 net/mac80211/tx.c                           | 47 +++++++++++++----------------
 net/mac80211/wme.c                          |  3 ++
 13 files changed, 40 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c
index 1106d1c06298..ff3fad794b61 100644
--- a/drivers/net/wireless/ath5k/base.c
+++ b/drivers/net/wireless/ath5k/base.c
@@ -1237,7 +1237,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf)
 
 	pktlen = skb->len;
 
-	if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) {
+	if (info->control.hw_key) {
 		keyidx = info->control.hw_key->hw_key_idx;
 		pktlen += info->control.icv_len;
 	}
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 8d54502222a6..9dda8169f7cc 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -192,7 +192,7 @@ int b43_generate_txhdr(struct b43_wldev *dev,
 	const struct b43_phy *phy = &dev->phy;
 	const struct ieee80211_hdr *wlhdr =
 	    (const struct ieee80211_hdr *)fragment_data;
-	int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT));
+	int use_encryption = !!info->control.hw_key;
 	__le16 fctl = wlhdr->frame_control;
 	struct ieee80211_rate *fbrate;
 	u8 rate, rate_fb;
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index e969ed8d412d..68e1f8c78727 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -192,7 +192,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev,
 			       u16 cookie)
 {
 	const struct ieee80211_hdr *wlhdr;
-	int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT));
+	int use_encryption = !!info->control.hw_key;
 	u16 fctl;
 	u8 rate;
 	struct ieee80211_rate *rate_fb;
diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c
index 9b50b1052b09..f72cd0bf6aa3 100644
--- a/drivers/net/wireless/iwlwifi/iwl-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-tx.c
@@ -906,7 +906,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
 	 * first entry */
 	iwl_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len);
 
-	if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+	if (info->control.hw_key)
 		iwl_tx_cmd_build_hwcrypto(priv, info, tx_cmd, skb, sta_id);
 
 	/* Set up TFD's 2nd entry to point directly to remainder of skb,
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 05121f395c4e..7c82ecfa30a4 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -2667,7 +2667,7 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv, struct sk_buff *skb)
 	 * first entry */
 	iwl3945_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len);
 
-	if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+	if (info->control.hw_key)
 		iwl3945_build_tx_cmd_hwcrypto(priv, info, out_cmd, skb, 0);
 
 	/* Set up TFD's 2nd entry to point directly to remainder of skb,
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index 042ab00d8bd2..c3ee4ecba792 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -63,7 +63,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
 	 */
 	memcpy(skb->cb, frag_skb->cb, sizeof(skb->cb));
 	rts_info = IEEE80211_SKB_CB(skb);
-	rts_info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	rts_info->control.hw_key = NULL;
 	rts_info->flags &= ~IEEE80211_TX_CTL_USE_RTS_CTS;
 	rts_info->flags &= ~IEEE80211_TX_CTL_USE_CTS_PROTECT;
 	rts_info->flags &= ~IEEE80211_TX_CTL_REQ_TX_STATUS;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7ea44f6621f2..a640385e0598 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -316,7 +316,10 @@ struct sk_buff {
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
-	/* 14 bit hole */
+#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
+	__u8			do_not_encrypt:1;
+#endif
+	/* 0/13/14 bit hole */
 
 #ifdef CONFIG_NET_DMA
 	dma_cookie_t		dma_cookie;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 74487f268237..b52721008be8 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -206,8 +206,6 @@ struct ieee80211_bss_conf {
  * These flags are used with the @flags member of &ieee80211_tx_info.
  *
  * @IEEE80211_TX_CTL_REQ_TX_STATUS: request TX status callback for this frame.
- * @IEEE80211_TX_CTL_DO_NOT_ENCRYPT: send this frame without encryption;
- *	e.g., for EAPOL frame
  * @IEEE80211_TX_CTL_USE_RTS_CTS: use RTS-CTS before sending frame
  * @IEEE80211_TX_CTL_USE_CTS_PROTECT: use CTS protection for the frame (e.g.,
  *	for combined 802.11g / 802.11b networks)
@@ -220,7 +218,6 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_SHORT_PREAMBLE: TBD
  * @IEEE80211_TX_CTL_LONG_RETRY_LIMIT: this frame should be send using the
  *	through set_retry_limit configured long retry value
- * @IEEE80211_TX_CTL_EAPOL_FRAME: internal to mac80211
  * @IEEE80211_TX_CTL_SEND_AFTER_DTIM: send this frame after DTIM beacon
  * @IEEE80211_TX_CTL_AMPDU: this frame should be sent as part of an A-MPDU
  * @IEEE80211_TX_CTL_OFDM_HT: this frame can be sent in HT OFDM rates. number
@@ -253,7 +250,6 @@ struct ieee80211_bss_conf {
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
-	IEEE80211_TX_CTL_DO_NOT_ENCRYPT		= BIT(1),
 	IEEE80211_TX_CTL_USE_RTS_CTS		= BIT(2),
 	IEEE80211_TX_CTL_USE_CTS_PROTECT	= BIT(3),
 	IEEE80211_TX_CTL_NO_ACK			= BIT(4),
@@ -263,7 +259,6 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_FIRST_FRAGMENT		= BIT(8),
 	IEEE80211_TX_CTL_SHORT_PREAMBLE		= BIT(9),
 	IEEE80211_TX_CTL_LONG_RETRY_LIMIT	= BIT(10),
-	IEEE80211_TX_CTL_EAPOL_FRAME		= BIT(11),
 	IEEE80211_TX_CTL_SEND_AFTER_DTIM	= BIT(12),
 	IEEE80211_TX_CTL_AMPDU			= BIT(13),
 	IEEE80211_TX_CTL_OFDM_HT		= BIT(14),
@@ -323,7 +318,6 @@ struct ieee80211_tx_info {
 			struct ieee80211_vif *vif;
 			struct ieee80211_key_conf *hw_key;
 			unsigned long jiffies;
-			int ifindex;
 			u16 aid;
 			s8 rts_cts_rate_idx, alt_retry_rate_idx;
 			u8 retry_limit;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4e0c92274189..84640172d65d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -485,6 +485,9 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(head);
 	C(data);
 	C(truesize);
+#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
+	C(do_not_encrypt);
+#endif
 	atomic_set(&n->users, 1);
 
 	atomic_inc(&(skb_shinfo(skb)->dataref));
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b5830f7055cf..a4c5b90de769 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1233,18 +1233,12 @@ static void ieee80211_tasklet_handler(unsigned long data)
 /* Remove added headers (e.g., QoS control), encryption header/MIC, etc. to
  * make a prepared TX frame (one that has been given to hw) to look like brand
  * new IEEE 802.11 frame that is ready to go through TX processing again.
- * Also, tx_packet_data in cb is restored from tx_control. */
+ */
 static void ieee80211_remove_tx_extra(struct ieee80211_local *local,
 				      struct ieee80211_key *key,
 				      struct sk_buff *skb)
 {
 	int hdrlen, iv_len, mic_len;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-
-	info->flags &=	IEEE80211_TX_CTL_REQ_TX_STATUS |
-			IEEE80211_TX_CTL_DO_NOT_ENCRYPT |
-			IEEE80211_TX_CTL_REQUEUE |
-			IEEE80211_TX_CTL_EAPOL_FRAME;
 
 	hdrlen = ieee80211_get_hdrlen_from_skb(skb);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d7c371e36bf0..35eb767cbcbe 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -606,7 +606,6 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb,
 		      int encrypt)
 {
 	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_tx_info *info;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	skb->dev = sdata->local->mdev;
@@ -614,11 +613,8 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb,
 	skb_set_network_header(skb, 0);
 	skb_set_transport_header(skb, 0);
 
-	info = IEEE80211_SKB_CB(skb);
-	memset(info, 0, sizeof(struct ieee80211_tx_info));
-	info->control.ifindex = sdata->dev->ifindex;
-	if (!encrypt)
-		info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	skb->iif = sdata->dev->ifindex;
+	skb->do_not_encrypt = !encrypt;
 
 	dev_queue_xmit(skb);
 }
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c5f78059c6ca..69019e943873 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -439,14 +439,14 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	u16 fc = tx->fc;
 
-	if (unlikely(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT))
+	if (unlikely(tx->skb->do_not_encrypt))
 		tx->key = NULL;
 	else if (tx->sta && (key = rcu_dereference(tx->sta->key)))
 		tx->key = key;
 	else if ((key = rcu_dereference(tx->sdata->default_key)))
 		tx->key = key;
 	else if (tx->sdata->drop_unencrypted &&
-		 !(info->flags & IEEE80211_TX_CTL_EAPOL_FRAME) &&
+		 (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) &&
 		 !(info->flags & IEEE80211_TX_CTL_INJECTED)) {
 		I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
 		return TX_DROP;
@@ -476,7 +476,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	}
 
 	if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
-		info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+		tx->skb->do_not_encrypt = 1;
 
 	return TX_CONTINUE;
 }
@@ -732,6 +732,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 		memcpy(skb_put(frag, copylen), pos, copylen);
 		memcpy(frag->cb, first->cb, sizeof(frag->cb));
 		skb_copy_queue_mapping(frag, first);
+		frag->do_not_encrypt = first->do_not_encrypt;
 
 		pos += copylen;
 		left -= copylen;
@@ -852,7 +853,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
-	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+	skb->do_not_encrypt = 1;
 	info->flags |= IEEE80211_TX_CTL_INJECTED;
 	tx->flags &= ~IEEE80211_TX_FRAGMENTED;
 
@@ -925,8 +926,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 				skb_trim(skb, skb->len - FCS_LEN);
 			}
 			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP)
-				info->flags &=
-					~IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
+				tx->skb->do_not_encrypt = 0;
 			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG)
 				tx->flags |= IEEE80211_TX_FRAGMENTED;
 			break;
@@ -1042,10 +1042,9 @@ static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 				struct sk_buff *skb,
 				struct net_device *mdev)
 {
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct net_device *dev;
 
-	dev = dev_get_by_index(&init_net, info->control.ifindex);
+	dev = dev_get_by_index(&init_net, skb->iif);
 	if (unlikely(dev && !is_ieee80211_device(dev, mdev))) {
 		dev_put(dev);
 		dev = NULL;
@@ -1306,8 +1305,8 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 	bool may_encrypt;
 	int ret;
 
-	if (info->control.ifindex)
-		odev = dev_get_by_index(&init_net, info->control.ifindex);
+	if (skb->iif)
+		odev = dev_get_by_index(&init_net, skb->iif);
 	if (unlikely(odev && !is_ieee80211_device(odev, dev))) {
 		dev_put(odev);
 		odev = NULL;
@@ -1321,9 +1320,13 @@ int ieee80211_master_start_xmit(struct sk_buff *skb,
 		return 0;
 	}
 
+	memset(info, 0, sizeof(*info));
+
+	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+
 	osdata = IEEE80211_DEV_TO_SUB_IF(odev);
 
-	may_encrypt = !(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT);
+	may_encrypt = !skb->do_not_encrypt;
 
 	headroom = osdata->local->tx_headroom;
 	if (may_encrypt)
@@ -1348,7 +1351,6 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
 				 struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_radiotap_header *prthdr =
 		(struct ieee80211_radiotap_header *)skb->data;
 	u16 len_rthdr;
@@ -1371,11 +1373,11 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
 	skb->dev = local->mdev;
 
 	/* needed because we set skb device to master */
-	info->control.ifindex = dev->ifindex;
+	skb->iif = dev->ifindex;
 
-	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
-	/* Interfaces should always request a status report */
-	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+	/* sometimes we do encrypt injected frames, will be fixed
+	 * up in radiotap parser if not wanted */
+	skb->do_not_encrypt = 0;
 
 	/*
 	 * fix up the pointers accounting for the radiotap
@@ -1419,7 +1421,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 			       struct net_device *dev)
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_tx_info *info;
 	struct ieee80211_sub_if_data *sdata;
 	int ret = 1, head_need;
 	u16 ethertype, hdrlen,  meshhdrlen = 0;
@@ -1645,14 +1646,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	nh_pos += hdrlen;
 	h_pos += hdrlen;
 
-	info = IEEE80211_SKB_CB(skb);
-	memset(info, 0, sizeof(*info));
-	info->control.ifindex = dev->ifindex;
-	if (ethertype == ETH_P_PAE)
-		info->flags |= IEEE80211_TX_CTL_EAPOL_FRAME;
-
-	/* Interfaces should always request a status report */
-	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+	skb->iif = dev->ifindex;
 
 	skb->dev = local->mdev;
 	dev->stats.tx_packets++;
@@ -1922,6 +1916,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 
 	info = IEEE80211_SKB_CB(skb);
 
+	skb->do_not_encrypt = 1;
+
 	info->band = band;
 	rate_control_get_rate(local->mdev, sband, skb, &rsel);
 
@@ -1940,7 +1936,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	info->tx_rate_idx = rsel.rate_idx;
 
 	info->flags |= IEEE80211_TX_CTL_NO_ACK;
-	info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT;
 	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
 	info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
 	if (sdata->bss_conf.use_short_preamble &&
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 07edda0b8a5c..28437f0001db 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -188,6 +188,9 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
 {
 	int i;
 
+	/* XXX: currently broken due to cb/requeue use */
+	return -EPERM;
+
 	/* prepare the filter and save it for the SW queue
 	 * matching the received HW queue */
 
-- 
cgit v1.2.3


From ce0ad7f0952581ba75ab6aee55bb1ed9bb22cf4f Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 30 Jul 2008 15:23:13 +1000
Subject: powerpc/mm: Lockless get_user_pages_fast() for 64-bit (v3)

Implement lockless get_user_pages_fast for 64-bit powerpc.

Page table existence is guaranteed with RCU, and speculative page references
are used to take a reference to the pages without having a prior existence
guarantee on them.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/Kconfig                |   3 +
 arch/powerpc/mm/Makefile            |   3 +-
 arch/powerpc/mm/gup.c               | 280 ++++++++++++++++++++++++++++++++++++
 include/asm-powerpc/pgtable-ppc64.h |   2 +
 include/linux/pagemap.h             |  23 +++
 5 files changed, 310 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/mm/gup.c

(limited to 'include/linux')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 587da5e0990f..63c9cafda9c4 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -42,6 +42,9 @@ config GENERIC_HARDIRQS
 	bool
 	default y
 
+config HAVE_GET_USER_PAGES_FAST
+	def_bool PPC64
+
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool PPC64
 
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 1c00e0196f6c..e7392b45a5ef 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -12,7 +12,8 @@ obj-y				:= fault.o mem.o \
 				   mmu_context_$(CONFIG_WORD_SIZE).o
 hash-$(CONFIG_PPC_NATIVE)	:= hash_native_64.o
 obj-$(CONFIG_PPC64)		+= hash_utils_64.o \
-				   slb_low.o slb.o stab.o mmap.o $(hash-y)
+				   slb_low.o slb.o stab.o \
+				   gup.o mmap.o $(hash-y)
 obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o
 obj-$(CONFIG_PPC_STD_MMU)	+= hash_low_$(CONFIG_WORD_SIZE).o \
 				   tlb_$(CONFIG_WORD_SIZE).o
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
new file mode 100644
index 000000000000..9fdf4d6335e4
--- /dev/null
+++ b/arch/powerpc/mm/gup.c
@@ -0,0 +1,280 @@
+/*
+ * Lockless get_user_pages_fast for powerpc
+ *
+ * Copyright (C) 2008 Nick Piggin
+ * Copyright (C) 2008 Novell Inc.
+ */
+#undef DEBUG
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/vmstat.h>
+#include <linux/pagemap.h>
+#include <linux/rwsem.h>
+#include <asm/pgtable.h>
+
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	unsigned long mask, result;
+	pte_t *ptep;
+
+	result = _PAGE_PRESENT|_PAGE_USER;
+	if (write)
+		result |= _PAGE_RW;
+	mask = result | _PAGE_SPECIAL;
+
+	ptep = pte_offset_kernel(&pmd, addr);
+	do {
+		pte_t pte = *ptep;
+		struct page *page;
+
+		if ((pte_val(pte) & mask) != result)
+			return 0;
+		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+		page = pte_page(pte);
+		if (!page_cache_get_speculative(page))
+			return 0;
+		if (unlikely(pte != *ptep)) {
+			put_page(page);
+			return 0;
+		}
+		pages[*nr] = page;
+		(*nr)++;
+
+	} while (ptep++, addr += PAGE_SIZE, addr != end);
+
+	return 1;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static noinline int gup_huge_pte(pte_t *ptep, struct hstate *hstate,
+				 unsigned long *addr, unsigned long end,
+				 int write, struct page **pages, int *nr)
+{
+	unsigned long mask;
+	unsigned long pte_end;
+	struct page *head, *page;
+	pte_t pte;
+	int refs;
+
+	pte_end = (*addr + huge_page_size(hstate)) & huge_page_mask(hstate);
+	if (pte_end < end)
+		end = pte_end;
+
+	pte = *ptep;
+	mask = _PAGE_PRESENT|_PAGE_USER;
+	if (write)
+		mask |= _PAGE_RW;
+	if ((pte_val(pte) & mask) != mask)
+		return 0;
+	/* hugepages are never "special" */
+	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+	refs = 0;
+	head = pte_page(pte);
+	page = head + ((*addr & ~huge_page_mask(hstate)) >> PAGE_SHIFT);
+	do {
+		VM_BUG_ON(compound_head(page) != head);
+		pages[*nr] = page;
+		(*nr)++;
+		page++;
+		refs++;
+	} while (*addr += PAGE_SIZE, *addr != end);
+
+	if (!page_cache_add_speculative(head, refs)) {
+		*nr -= refs;
+		return 0;
+	}
+	if (unlikely(pte != *ptep)) {
+		/* Could be optimized better */
+		while (*nr) {
+			put_page(page);
+			(*nr)--;
+		}
+	}
+
+	return 1;
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pmd_t *pmdp;
+
+	pmdp = pmd_offset(&pud, addr);
+	do {
+		pmd_t pmd = *pmdp;
+
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(pmd))
+			return 0;
+		if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+			return 0;
+	} while (pmdp++, addr = next, addr != end);
+
+	return 1;
+}
+
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pud_t *pudp;
+
+	pudp = pud_offset(&pgd, addr);
+	do {
+		pud_t pud = *pudp;
+
+		next = pud_addr_end(addr, end);
+		if (pud_none(pud))
+			return 0;
+		if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+			return 0;
+	} while (pudp++, addr = next, addr != end);
+
+	return 1;
+}
+
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next;
+	pgd_t *pgdp;
+	int psize, nr = 0;
+	unsigned int shift;
+
+	pr_debug("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
+
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+
+	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+					start, len)))
+		goto slow_irqon;
+
+	pr_debug("  aligned: %lx .. %lx\n", start, end);
+
+#ifdef CONFIG_HUGETLB_PAGE
+	/* We bail out on slice boundary crossing when hugetlb is
+	 * enabled in order to not have to deal with two different
+	 * page table formats
+	 */
+	if (addr < SLICE_LOW_TOP) {
+		if (end > SLICE_LOW_TOP)
+			goto slow_irqon;
+
+		if (unlikely(GET_LOW_SLICE_INDEX(addr) !=
+			     GET_LOW_SLICE_INDEX(end - 1)))
+			goto slow_irqon;
+	} else {
+		if (unlikely(GET_HIGH_SLICE_INDEX(addr) !=
+			     GET_HIGH_SLICE_INDEX(end - 1)))
+			goto slow_irqon;
+	}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+	/*
+	 * XXX: batch / limit 'nr', to avoid large irq off latency
+	 * needs some instrumenting to determine the common sizes used by
+	 * important workloads (eg. DB2), and whether limiting the batch size
+	 * will decrease performance.
+	 *
+	 * It seems like we're in the clear for the moment. Direct-IO is
+	 * the main guy that batches up lots of get_user_pages, and even
+	 * they are limited to 64-at-a-time which is not so many.
+	 */
+	/*
+	 * This doesn't prevent pagetable teardown, but does prevent
+	 * the pagetables from being freed on powerpc.
+	 *
+	 * So long as we atomically load page table pointers versus teardown,
+	 * we can follow the address down to the the page and take a ref on it.
+	 */
+	local_irq_disable();
+
+	psize = get_slice_psize(mm, addr);
+	shift = mmu_psize_defs[psize].shift;
+
+#ifdef CONFIG_HUGETLB_PAGE
+	if (unlikely(mmu_huge_psizes[psize])) {
+		pte_t *ptep;
+		unsigned long a = addr;
+		unsigned long sz = ((1UL) << shift);
+		struct hstate *hstate = size_to_hstate(sz);
+
+		BUG_ON(!hstate);
+		/*
+		 * XXX: could be optimized to avoid hstate
+		 * lookup entirely (just use shift)
+		 */
+
+		do {
+			VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, a)].shift);
+			ptep = huge_pte_offset(mm, a);
+			pr_debug(" %016lx: huge ptep %p\n", a, ptep);
+			if (!ptep || !gup_huge_pte(ptep, hstate, &a, end, write, pages,
+						   &nr))
+				goto slow;
+		} while (a != end);
+	} else
+#endif /* CONFIG_HUGETLB_PAGE */
+	{
+		pgdp = pgd_offset(mm, addr);
+		do {
+			pgd_t pgd = *pgdp;
+
+			VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift);
+			pr_debug("  %016lx: normal pgd %p\n", addr, (void *)pgd);
+			next = pgd_addr_end(addr, end);
+			if (pgd_none(pgd))
+				goto slow;
+			if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+				goto slow;
+		} while (pgdp++, addr = next, addr != end);
+	}
+	local_irq_enable();
+
+	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
+	return nr;
+
+	{
+		int ret;
+
+slow:
+		local_irq_enable();
+slow_irqon:
+		pr_debug("  slow path ! nr = %d\n", nr);
+
+		/* Try to get the remaining pages with get_user_pages */
+		start += nr << PAGE_SHIFT;
+		pages += nr;
+
+		down_read(&mm->mmap_sem);
+		ret = get_user_pages(current, mm, start,
+			(end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
+		up_read(&mm->mmap_sem);
+
+		/* Have to be a bit careful with return values */
+		if (nr > 0) {
+			if (ret < 0)
+				ret = nr;
+			else
+				ret += nr;
+		}
+
+		return ret;
+	}
+}
diff --git a/include/asm-powerpc/pgtable-ppc64.h b/include/asm-powerpc/pgtable-ppc64.h
index 5fc78c0be302..74c6f380b805 100644
--- a/include/asm-powerpc/pgtable-ppc64.h
+++ b/include/asm-powerpc/pgtable-ppc64.h
@@ -461,6 +461,8 @@ void pgtable_cache_init(void);
 	return pt;
 }
 
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long address);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a39b38ccdc97..69ed3cb1197a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -143,6 +143,29 @@ static inline int page_cache_get_speculative(struct page *page)
 	return 1;
 }
 
+/*
+ * Same as above, but add instead of inc (could just be merged)
+ */
+static inline int page_cache_add_speculative(struct page *page, int count)
+{
+	VM_BUG_ON(in_interrupt());
+
+#if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU)
+# ifdef CONFIG_PREEMPT
+	VM_BUG_ON(!in_atomic());
+# endif
+	VM_BUG_ON(page_count(page) == 0);
+	atomic_add(count, &page->_count);
+
+#else
+	if (unlikely(!atomic_add_unless(&page->_count, count, 0)))
+		return 0;
+#endif
+	VM_BUG_ON(PageCompound(page) && page != compound_head(page));
+
+	return 1;
+}
+
 static inline int page_freeze_refs(struct page *page, int count)
 {
 	return likely(atomic_cmpxchg(&page->_count, count, 0) == count);
-- 
cgit v1.2.3


From e2ce4eaa76214f65a3f328ec5b45c30248115768 Mon Sep 17 00:00:00 2001
From: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Date: Wed, 30 Apr 2008 15:10:07 +0100
Subject: regulator: consumer device interface

Add support to allow consumer device drivers to control their regulator
power supply.

This uses a similar API to the kernel clock interface in that consumer
drivers can get and put a regulator (like they can with clocks atm) and
get/set voltage, current limit, mode, enable and disable. This should
allow consumers complete control over their supply voltage and current
limit. This also compiles out if not in use so drivers can be reused in
systems with no regulator based power control.

Signed-off-by: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/regulator/consumer.h | 284 +++++++++++++++++++++++++++++++++++++
 1 file changed, 284 insertions(+)
 create mode 100644 include/linux/regulator/consumer.h

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
new file mode 100644
index 000000000000..afdc4558bb94
--- /dev/null
+++ b/include/linux/regulator/consumer.h
@@ -0,0 +1,284 @@
+/*
+ * consumer.h -- SoC Regulator consumer support.
+ *
+ * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC.
+ *
+ * Author: Liam Girdwood <lg@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Regulator Consumer Interface.
+ *
+ * A Power Management Regulator framework for SoC based devices.
+ * Features:-
+ *   o Voltage and current level control.
+ *   o Operating mode control.
+ *   o Regulator status.
+ *   o sysfs entries for showing client devices and status
+ *
+ * EXPERIMENTAL FEATURES:
+ *   Dynamic Regulator operating Mode Switching (DRMS) - allows regulators
+ *   to use most efficient operating mode depending upon voltage and load and
+ *   is transparent to client drivers.
+ *
+ *   e.g. Devices x,y,z share regulator r. Device x and y draw 20mA each during
+ *   IO and 1mA at idle. Device z draws 100mA when under load and 5mA when
+ *   idling. Regulator r has > 90% efficiency in NORMAL mode at loads > 100mA
+ *   but this drops rapidly to 60% when below 100mA. Regulator r has > 90%
+ *   efficiency in IDLE mode at loads < 10mA. Thus regulator r will operate
+ *   in normal mode for loads > 10mA and in IDLE mode for load <= 10mA.
+ *
+ */
+
+#ifndef __LINUX_REGULATOR_CONSUMER_H_
+#define __LINUX_REGULATOR_CONSUMER_H_
+
+/*
+ * Regulator operating modes.
+ *
+ * Regulators can run in a variety of different operating modes depending on
+ * output load. This allows further system power savings by selecting the
+ * best (and most efficient) regulator mode for a desired load.
+ *
+ * Most drivers will only care about NORMAL. The modes below are generic and
+ * will probably not match the naming convention of your regulator data sheet
+ * but should match the use cases in the datasheet.
+ *
+ * In order of power efficiency (least efficient at top).
+ *
+ *  Mode       Description
+ *  FAST       Regulator can handle fast changes in it's load.
+ *             e.g. useful in CPU voltage & frequency scaling where
+ *             load can quickly increase with CPU frequency increases.
+ *
+ *  NORMAL     Normal regulator power supply mode. Most drivers will
+ *             use this mode.
+ *
+ *  IDLE       Regulator runs in a more efficient mode for light
+ *             loads. Can be used for devices that have a low power
+ *             requirement during periods of inactivity. This mode
+ *             may be more noisy than NORMAL and may not be able
+ *             to handle fast load switching.
+ *
+ *  STANDBY    Regulator runs in the most efficient mode for very
+ *             light loads. Can be used by devices when they are
+ *             in a sleep/standby state. This mode is likely to be
+ *             the most noisy and may not be able to handle fast load
+ *             switching.
+ *
+ * NOTE: Most regulators will only support a subset of these modes. Some
+ * will only just support NORMAL.
+ *
+ * These modes can be OR'ed together to make up a mask of valid register modes.
+ */
+
+#define REGULATOR_MODE_FAST			0x1
+#define REGULATOR_MODE_NORMAL			0x2
+#define REGULATOR_MODE_IDLE			0x4
+#define REGULATOR_MODE_STANDBY			0x8
+
+/*
+ * Regulator notifier events.
+ *
+ * UNDER_VOLTAGE  Regulator output is under voltage.
+ * OVER_CURRENT   Regulator output current is too high.
+ * REGULATION_OUT Regulator output is out of regulation.
+ * FAIL           Regulator output has failed.
+ * OVER_TEMP      Regulator over temp.
+ * FORCE_DISABLE  Regulator shut down by software.
+ *
+ * NOTE: These events can be OR'ed together when passed into handler.
+ */
+
+#define REGULATOR_EVENT_UNDER_VOLTAGE		0x01
+#define REGULATOR_EVENT_OVER_CURRENT		0x02
+#define REGULATOR_EVENT_REGULATION_OUT		0x04
+#define REGULATOR_EVENT_FAIL			0x08
+#define REGULATOR_EVENT_OVER_TEMP		0x10
+#define REGULATOR_EVENT_FORCE_DISABLE		0x20
+
+struct regulator;
+
+/**
+ * struct regulator_bulk_data - Data used for bulk regulator operations.
+ *
+ * @supply   The name of the supply.  Initialised by the user before
+ *           using the bulk regulator APIs.
+ * @consumer The regulator consumer for the supply.  This will be managed
+ *           by the bulk API.
+ *
+ * The regulator APIs provide a series of regulator_bulk_() API calls as
+ * a convenience to consumers which require multiple supplies.  This
+ * structure is used to manage data for these calls.
+ */
+struct regulator_bulk_data {
+	const char *supply;
+	struct regulator *consumer;
+};
+
+#if defined(CONFIG_REGULATOR)
+
+/* regulator get and put */
+struct regulator *__must_check regulator_get(struct device *dev,
+					     const char *id);
+void regulator_put(struct regulator *regulator);
+
+/* regulator output control and status */
+int regulator_enable(struct regulator *regulator);
+int regulator_disable(struct regulator *regulator);
+int regulator_force_disable(struct regulator *regulator);
+int regulator_is_enabled(struct regulator *regulator);
+
+int regulator_bulk_get(struct device *dev, int num_consumers,
+		       struct regulator_bulk_data *consumers);
+int regulator_bulk_enable(int num_consumers,
+			  struct regulator_bulk_data *consumers);
+int regulator_bulk_disable(int num_consumers,
+			   struct regulator_bulk_data *consumers);
+void regulator_bulk_free(int num_consumers,
+			 struct regulator_bulk_data *consumers);
+
+int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV);
+int regulator_get_voltage(struct regulator *regulator);
+int regulator_set_current_limit(struct regulator *regulator,
+			       int min_uA, int max_uA);
+int regulator_get_current_limit(struct regulator *regulator);
+
+int regulator_set_mode(struct regulator *regulator, unsigned int mode);
+unsigned int regulator_get_mode(struct regulator *regulator);
+int regulator_set_optimum_mode(struct regulator *regulator, int load_uA);
+
+/* regulator notifier block */
+int regulator_register_notifier(struct regulator *regulator,
+			      struct notifier_block *nb);
+int regulator_unregister_notifier(struct regulator *regulator,
+				struct notifier_block *nb);
+
+/* driver data - core doesn't touch */
+void *regulator_get_drvdata(struct regulator *regulator);
+void regulator_set_drvdata(struct regulator *regulator, void *data);
+
+#else
+
+/*
+ * Make sure client drivers will still build on systems with no software
+ * controllable voltage or current regulators.
+ */
+static inline struct regulator *__must_check regulator_get(struct device *dev,
+	const char *id)
+{
+	/* Nothing except the stubbed out regulator API should be
+	 * looking at the value except to check if it is an error
+	 * value so the actual return value doesn't matter.
+	 */
+	return (struct regulator *)id;
+}
+static inline void regulator_put(struct regulator *regulator)
+{
+}
+
+static inline int regulator_enable(struct regulator *regulator)
+{
+	return 0;
+}
+
+static inline int regulator_disable(struct regulator *regulator)
+{
+	return 0;
+}
+
+static inline int regulator_is_enabled(struct regulator *regulator)
+{
+	return 1;
+}
+
+static inline int regulator_bulk_get(struct device *dev,
+				     int num_consumers,
+				     struct regulator_bulk_data *consumers)
+{
+	return 0;
+}
+
+static inline int regulator_bulk_enable(int num_consumers,
+					struct regulator_bulk_data *consumers)
+{
+	return 0;
+}
+
+static inline int regulator_bulk_disable(int num_consumers,
+					 struct regulator_bulk_data *consumers)
+{
+	return 0;
+}
+
+static inline void regulator_bulk_free(int num_consumers,
+				       struct regulator_bulk_data *consumers)
+{
+}
+
+static inline int regulator_set_voltage(struct regulator *regulator,
+					int min_uV, int max_uV)
+{
+	return 0;
+}
+
+static inline int regulator_get_voltage(struct regulator *regulator)
+{
+	return 0;
+}
+
+static inline int regulator_set_current_limit(struct regulator *regulator,
+					     int min_uA, int max_uA)
+{
+	return 0;
+}
+
+static inline int regulator_get_current_limit(struct regulator *regulator)
+{
+	return 0;
+}
+
+static inline int regulator_set_mode(struct regulator *regulator,
+	unsigned int mode)
+{
+	return 0;
+}
+
+static inline unsigned int regulator_get_mode(struct regulator *regulator)
+{
+	return REGULATOR_MODE_NORMAL;
+}
+
+static inline int regulator_set_optimum_mode(struct regulator *regulator,
+					int load_uA)
+{
+	return REGULATOR_MODE_NORMAL;
+}
+
+static inline int regulator_register_notifier(struct regulator *regulator,
+			      struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int regulator_unregister_notifier(struct regulator *regulator,
+				struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline void *regulator_get_drvdata(struct regulator *regulator)
+{
+	return NULL;
+}
+
+static inline void regulator_set_drvdata(struct regulator *regulator,
+	void *data)
+{
+}
+
+#endif
+
+#endif
-- 
cgit v1.2.3


From 571a354b1542a274d88617e1f6703f3fe7a517f1 Mon Sep 17 00:00:00 2001
From: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Date: Wed, 30 Apr 2008 15:42:28 +0100
Subject: regulator: regulator driver interface

This allows regulator drivers to register their regulators and provide
operations to the core. It also has a notifier call chain for propagating
regulator events to clients.

Signed-off-by: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/regulator/driver.h | 99 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 include/linux/regulator/driver.h

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
new file mode 100644
index 000000000000..1d712c7172a2
--- /dev/null
+++ b/include/linux/regulator/driver.h
@@ -0,0 +1,99 @@
+/*
+ * driver.h -- SoC Regulator driver support.
+ *
+ * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC.
+ *
+ * Author: Liam Girdwood <lg@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Regulator Driver Interface.
+ */
+
+#ifndef __LINUX_REGULATOR_DRIVER_H_
+#define __LINUX_REGULATOR_DRIVER_H_
+
+#include <linux/device.h>
+#include <linux/regulator/consumer.h>
+
+struct regulator_constraints;
+struct regulator_dev;
+
+/**
+ * struct regulator_ops - regulator operations.
+ *
+ * This struct describes regulator operations.
+ */
+struct regulator_ops {
+
+	/* get/set regulator voltage */
+	int (*set_voltage) (struct regulator_dev *, int min_uV, int max_uV);
+	int (*get_voltage) (struct regulator_dev *);
+
+	/* get/set regulator current  */
+	int (*set_current_limit) (struct regulator_dev *,
+				 int min_uA, int max_uA);
+	int (*get_current_limit) (struct regulator_dev *);
+
+	/* enable/disable regulator */
+	int (*enable) (struct regulator_dev *);
+	int (*disable) (struct regulator_dev *);
+	int (*is_enabled) (struct regulator_dev *);
+
+	/* get/set regulator operating mode (defined in regulator.h) */
+	int (*set_mode) (struct regulator_dev *, unsigned int mode);
+	unsigned int (*get_mode) (struct regulator_dev *);
+
+	/* get most efficient regulator operating mode for load */
+	unsigned int (*get_optimum_mode) (struct regulator_dev *, int input_uV,
+					  int output_uV, int load_uA);
+
+	/* the operations below are for configuration of regulator state when
+	 * it's parent PMIC enters a global STANBY/HIBERNATE state */
+
+	/* set regulator suspend voltage */
+	int (*set_suspend_voltage) (struct regulator_dev *, int uV);
+
+	/* enable/disable regulator in suspend state */
+	int (*set_suspend_enable) (struct regulator_dev *);
+	int (*set_suspend_disable) (struct regulator_dev *);
+
+	/* set regulator suspend operating mode (defined in regulator.h) */
+	int (*set_suspend_mode) (struct regulator_dev *, unsigned int mode);
+};
+
+/*
+ * Regulators can either control voltage or current.
+ */
+enum regulator_type {
+	REGULATOR_VOLTAGE,
+	REGULATOR_CURRENT,
+};
+
+/**
+ * struct regulator_desc - Regulator descriptor
+ *
+ */
+struct regulator_desc {
+	const char *name;
+	int id;
+	struct regulator_ops *ops;
+	int irq;
+	enum regulator_type type;
+	struct module *owner;
+};
+
+
+struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
+					  void *reg_data);
+void regulator_unregister(struct regulator_dev *rdev);
+
+int regulator_notifier_call_chain(struct regulator_dev *rdev,
+				  unsigned long event, void *data);
+
+void *rdev_get_drvdata(struct regulator_dev *rdev);
+int rdev_get_id(struct regulator_dev *rdev);
+
+#endif
-- 
cgit v1.2.3


From 4c1184e85cb381121a5273ea20ad31ca3faa0a4f Mon Sep 17 00:00:00 2001
From: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Date: Wed, 30 Apr 2008 15:46:09 +0100
Subject: regulator: machine driver interface

This interface is for machine specific code and allows the creation of
voltage/current domains (with constraints) for each regulator. It can
provide regulator constraints that will prevent device damage through
overvoltage or over current caused by buggy client drivers. It also
allows the creation of a regulator tree whereby some regulators are
supplied by others (similar to a clock tree).

Signed-off-by: Liam Girdwood <lg@opensource.wolfsonmicro.com>
Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/regulator/machine.h | 104 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 include/linux/regulator/machine.h

(limited to 'include/linux')

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
new file mode 100644
index 000000000000..11e737dbfcf2
--- /dev/null
+++ b/include/linux/regulator/machine.h
@@ -0,0 +1,104 @@
+/*
+ * machine.h -- SoC Regulator support, machine/board driver API.
+ *
+ * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC.
+ *
+ * Author: Liam Girdwood <lg@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Regulator Machine/Board Interface.
+ */
+
+#ifndef __LINUX_REGULATOR_MACHINE_H_
+#define __LINUX_REGULATOR_MACHINE_H_
+
+#include <linux/regulator/consumer.h>
+#include <linux/suspend.h>
+
+struct regulator;
+
+/*
+ * Regulator operation constraint flags. These flags are used to enable
+ * certain regulator operations and can be OR'ed together.
+ *
+ * VOLTAGE:  Regulator output voltage can be changed by software on this
+ *           board/machine.
+ * CURRENT:  Regulator output current can be changed by software on this
+ *           board/machine.
+ * MODE:     Regulator operating mode can be changed by software on this
+ *           board/machine.
+ * STATUS:   Regulator can be enabled and disabled.
+ * DRMS:     Dynamic Regulator Mode Switching is enabled for this regulator.
+ */
+
+#define REGULATOR_CHANGE_VOLTAGE	0x1
+#define REGULATOR_CHANGE_CURRENT	0x2
+#define REGULATOR_CHANGE_MODE		0x4
+#define REGULATOR_CHANGE_STATUS		0x8
+#define REGULATOR_CHANGE_DRMS		0x10
+
+/**
+ * struct regulator_state - regulator state during low power syatem states
+ *
+ * This describes a regulators state during a system wide low power state.
+ */
+struct regulator_state {
+	int uV;	/* suspend voltage */
+	unsigned int mode; /* suspend regulator operating mode */
+	int enabled; /* is regulator enabled in this suspend state */
+};
+
+/**
+ * struct regulation_constraints - regulator operating constraints.
+ *
+ * This struct describes regulator and board/machine specific constraints.
+ */
+struct regulation_constraints {
+
+	char *name;
+
+	/* voltage output range (inclusive) - for voltage control */
+	int min_uV;
+	int max_uV;
+
+	/* current output range (inclusive) - for current control */
+	int min_uA;
+	int max_uA;
+
+	/* valid regulator operating modes for this machine */
+	unsigned int valid_modes_mask;
+
+	/* valid operations for regulator on this machine */
+	unsigned int valid_ops_mask;
+
+	/* regulator input voltage - only if supply is another regulator */
+	int input_uV;
+
+	/* regulator suspend states for global PMIC STANDBY/HIBERNATE */
+	struct regulator_state state_disk;
+	struct regulator_state state_mem;
+	struct regulator_state state_standby;
+	suspend_state_t initial_state; /* suspend state to set at init */
+
+	/* constriant flags */
+	unsigned always_on:1;	/* regulator never off when system is on */
+	unsigned boot_on:1;	/* bootloader/firmware enabled regulator */
+	unsigned apply_uV:1;	/* apply uV constraint iff min == max */
+};
+
+int regulator_set_supply(const char *regulator, const char *regulator_supply);
+
+const char *regulator_get_supply(const char *regulator);
+
+int regulator_set_machine_constraints(const char *regulator,
+	struct regulation_constraints *constraints);
+
+int regulator_set_device_supply(const char *regulator, struct device *dev,
+				const char *supply);
+
+int regulator_suspend_prepare(suspend_state_t state);
+
+#endif
-- 
cgit v1.2.3


From 48d335ba3164ce99cb8847513d0e3b6ee604eb20 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 30 Apr 2008 15:50:21 +0100
Subject: regulator: fixed regulator interface

This patch adds support for fixed regulators. This class of regulator is
not software controllable but can coexist on machines with software
controlable regulators.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lg@opensource.wolfsonmicro.com>
---
 include/linux/regulator/fixed.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 include/linux/regulator/fixed.h

(limited to 'include/linux')

diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h
new file mode 100644
index 000000000000..1387a5d2190e
--- /dev/null
+++ b/include/linux/regulator/fixed.h
@@ -0,0 +1,22 @@
+/*
+ * fixed.h
+ *
+ * Copyright 2008 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ */
+
+#ifndef __REGULATOR_FIXED_H
+#define __REGULATOR_FIXED_H
+
+struct fixed_voltage_config {
+	const char *supply_name;
+	int microvolts;
+};
+
+#endif
-- 
cgit v1.2.3


From 0eb5d5ab3ec99bfd22ff16797d95835369ffb25b Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Fri, 11 Jul 2008 17:28:06 +0200
Subject: regulator: TI bq24022 Li-Ion Charger driver

This adds a regulator driver for the TI bq24022 Single-Chip
Li-Ion Charger with its nCE and ISET2 pins connected to GPIOs.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Liam Girdwood <lg@opensource.wolfsonmicro.com>
---
 drivers/regulator/Kconfig         |  10 +++
 drivers/regulator/Makefile        |   2 +
 drivers/regulator/bq24022.c       | 167 ++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/bq24022.h |  21 +++++
 4 files changed, 200 insertions(+)
 create mode 100644 drivers/regulator/bq24022.c
 create mode 100644 include/linux/regulator/bq24022.h

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 84f89ecce69e..a656128f1fdd 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -46,4 +46,14 @@ config REGULATOR_VIRTUAL_CONSUMER
 
           If unsure, say no.
 
+config REGULATOR_BQ24022
+	tristate "TI bq24022 Dual Input 1-Cell Li-Ion Charger IC"
+	default n
+	select REGULATOR
+	help
+	  This driver controls a TI bq24022 Charger attached via
+	  GPIOs. The provided current regulator can enable/disable
+	  charging select between 100 mA and 500 mA charging current
+	  limit.
+
 endmenu
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 29528b78c8de..ac2c64efe65c 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -7,4 +7,6 @@ obj-$(CONFIG_REGULATOR) += core.o
 obj-$(CONFIG_REGULATOR_FIXED_VOLTAGE) += fixed.o
 obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o
 
+obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
+
 ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG
diff --git a/drivers/regulator/bq24022.c b/drivers/regulator/bq24022.c
new file mode 100644
index 000000000000..263699d6152d
--- /dev/null
+++ b/drivers/regulator/bq24022.c
@@ -0,0 +1,167 @@
+/*
+ * Support for TI bq24022 (bqTINY-II) Dual Input (USB/AC Adpater)
+ * 1-Cell Li-Ion Charger connected via GPIOs.
+ *
+ * Copyright (c) 2008 Philipp Zabel
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/regulator/bq24022.h>
+#include <linux/regulator/driver.h>
+
+static int bq24022_set_current_limit(struct regulator_dev *rdev,
+					int min_uA, int max_uA)
+{
+	struct platform_device *pdev = rdev_get_drvdata(rdev);
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+	dev_dbg(&pdev->dev, "setting current limit to %s mA\n",
+		max_uA >= 500000 ? "500" : "100");
+
+	/* REVISIT: maybe return error if min_uA != 0 ? */
+	gpio_set_value(pdata->gpio_iset2, max_uA >= 500000);
+	return 0;
+}
+
+static int bq24022_get_current_limit(struct regulator_dev *rdev)
+{
+	struct platform_device *pdev = rdev_get_drvdata(rdev);
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+	return gpio_get_value(pdata->gpio_iset2) ? 500000 : 100000;
+}
+
+static int bq24022_enable(struct regulator_dev *rdev)
+{
+	struct platform_device *pdev = rdev_get_drvdata(rdev);
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+	dev_dbg(&pdev->dev, "enabling charger\n");
+
+	gpio_set_value(pdata->gpio_nce, 0);
+	return 0;
+}
+
+static int bq24022_disable(struct regulator_dev *rdev)
+{
+	struct platform_device *pdev = rdev_get_drvdata(rdev);
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+	dev_dbg(&pdev->dev, "disabling charger\n");
+
+	gpio_set_value(pdata->gpio_nce, 1);
+	return 0;
+}
+
+static int bq24022_is_enabled(struct regulator_dev *rdev)
+{
+	struct platform_device *pdev = rdev_get_drvdata(rdev);
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+
+	return !gpio_get_value(pdata->gpio_nce);
+}
+
+static struct regulator_ops bq24022_ops = {
+	.set_current_limit = bq24022_set_current_limit,
+	.get_current_limit = bq24022_get_current_limit,
+	.enable            = bq24022_enable,
+	.disable           = bq24022_disable,
+	.is_enabled        = bq24022_is_enabled,
+};
+
+static struct regulator_desc bq24022_desc = {
+	.name  = "bq24022",
+	.ops   = &bq24022_ops,
+	.type  = REGULATOR_CURRENT,
+};
+
+static int __init bq24022_probe(struct platform_device *pdev)
+{
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+	struct regulator_dev *bq24022;
+	int ret;
+
+	if (!pdata || !pdata->gpio_nce || !pdata->gpio_iset2)
+		return -EINVAL;
+
+	ret = gpio_request(pdata->gpio_nce, "ncharge_en");
+	if (ret) {
+		dev_dbg(&pdev->dev, "couldn't request nCE GPIO: %d\n",
+			pdata->gpio_nce);
+		goto err_ce;
+	}
+	ret = gpio_request(pdata->gpio_iset2, "charge_mode");
+	if (ret) {
+		dev_dbg(&pdev->dev, "couldn't request ISET2 GPIO: %d\n",
+			pdata->gpio_iset2);
+		goto err_iset2;
+	}
+	ret = gpio_direction_output(pdata->gpio_iset2, 0);
+	ret = gpio_direction_output(pdata->gpio_nce, 1);
+
+	bq24022 = regulator_register(&bq24022_desc, pdev);
+	if (IS_ERR(bq24022)) {
+		dev_dbg(&pdev->dev, "couldn't register regulator\n");
+		ret = PTR_ERR(bq24022);
+		goto err_reg;
+	}
+	platform_set_drvdata(pdev, bq24022);
+	dev_dbg(&pdev->dev, "registered regulator\n");
+
+	return 0;
+err_reg:
+	gpio_free(pdata->gpio_iset2);
+err_iset2:
+	gpio_free(pdata->gpio_nce);
+err_ce:
+	return ret;
+}
+
+static int __devexit bq24022_remove(struct platform_device *pdev)
+{
+	struct bq24022_mach_info *pdata = pdev->dev.platform_data;
+	struct regulator_dev *bq24022 = platform_get_drvdata(pdev);
+
+	regulator_unregister(bq24022);
+	gpio_free(pdata->gpio_iset2);
+	gpio_free(pdata->gpio_nce);
+
+	return 0;
+}
+
+static struct platform_driver bq24022_driver = {
+	.driver = {
+		.name = "bq24022",
+	},
+	.remove = __devexit_p(bq24022_remove),
+};
+
+static int __init bq24022_init(void)
+{
+	return platform_driver_probe(&bq24022_driver, bq24022_probe);
+}
+
+static void __exit bq24022_exit(void)
+{
+	platform_driver_unregister(&bq24022_driver);
+}
+
+/*
+ * make sure this is probed before gpio_vbus and pda_power,
+ * but after asic3 or other GPIO expander drivers.
+ */
+subsys_initcall(bq24022_init);
+module_exit(bq24022_exit);
+
+MODULE_AUTHOR("Philipp Zabel");
+MODULE_DESCRIPTION("TI bq24022 Li-Ion Charger driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/regulator/bq24022.h b/include/linux/regulator/bq24022.h
new file mode 100644
index 000000000000..e84b0a9feda5
--- /dev/null
+++ b/include/linux/regulator/bq24022.h
@@ -0,0 +1,21 @@
+/*
+ * Support for TI bq24022 (bqTINY-II) Dual Input (USB/AC Adpater)
+ * 1-Cell Li-Ion Charger connected via GPIOs.
+ *
+ * Copyright (c) 2008 Philipp Zabel
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/**
+ * bq24022_mach_info - platform data for bq24022
+ * @gpio_nce: GPIO line connected to the nCE pin, used to enable / disable charging
+ * @gpio_iset2: GPIO line connected to the ISET2 pin, used to limit charging current to 100 mA / 500 mA
+ */
+struct bq24022_mach_info {
+	int gpio_nce;
+	int gpio_iset2;
+};
-- 
cgit v1.2.3


From 785957d3e8c6fb37b18bf671923a76dbd8240025 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 30 Jul 2008 03:03:15 -0700
Subject: tcp: MD5: Use MIB counter instead of warning for MD5 mismatch.

From a report by Matti Aarnio, and preliminary patch by Adam Langley.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h |  2 ++
 net/ipv4/proc.c      |  2 ++
 net/ipv4/tcp_ipv4.c  | 10 ++--------
 net/ipv6/tcp_ipv6.c  | 27 ++++++++-------------------
 4 files changed, 14 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 5df62ef1280c..7a6e6bba4a71 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -214,6 +214,8 @@ enum
 	LINUX_MIB_TCPDSACKIGNOREDOLD,		/* TCPSACKIgnoredOld */
 	LINUX_MIB_TCPDSACKIGNOREDNOUNDO,	/* TCPSACKIgnoredNoUndo */
 	LINUX_MIB_TCPSPURIOUSRTOS,		/* TCPSpuriousRTOs */
+	LINUX_MIB_TCPMD5NOTFOUND,		/* TCPMD5NotFound */
+	LINUX_MIB_TCPMD5UNEXPECTED,		/* TCPMD5Unexpected */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 834356ea99df..8f5a403f6f6b 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -232,6 +232,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD),
 	SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO),
 	SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS),
+	SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND),
+	SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a2b06d0cc26b..b3875c0d83c7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1116,18 +1116,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 		return 0;
 
 	if (hash_expected && !hash_location) {
-		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found "
-			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
-			       NIPQUAD(iph->saddr), ntohs(th->source),
-			       NIPQUAD(iph->daddr), ntohs(th->dest));
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
 		return 1;
 	}
 
 	if (!hash_expected && hash_location) {
-		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
-			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
-			       NIPQUAD(iph->saddr), ntohs(th->source),
-			       NIPQUAD(iph->daddr), ntohs(th->dest));
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
 		return 1;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cff778b23a7f..1db45216b232 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -849,28 +849,17 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
 	hash_location = tcp_parse_md5sig_option(th);
 
-	/* do we have a hash as expected? */
-	if (!hash_expected) {
-		if (!hash_location)
-			return 0;
-		if (net_ratelimit()) {
-			printk(KERN_INFO "MD5 Hash NOT expected but found "
-			       "(" NIP6_FMT ", %u)->"
-			       "(" NIP6_FMT ", %u)\n",
-			       NIP6(ip6h->saddr), ntohs(th->source),
-			       NIP6(ip6h->daddr), ntohs(th->dest));
-		}
+	/* We've parsed the options - do we have a hash? */
+	if (!hash_expected && !hash_location)
+		return 0;
+
+	if (hash_expected && !hash_location) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
 		return 1;
 	}
 
-	if (!hash_location) {
-		if (net_ratelimit()) {
-			printk(KERN_INFO "MD5 Hash expected but NOT found "
-			       "(" NIP6_FMT ", %u)->"
-			       "(" NIP6_FMT ", %u)\n",
-			       NIP6(ip6h->saddr), ntohs(th->source),
-			       NIP6(ip6h->daddr), ntohs(th->dest));
-		}
+	if (!hash_expected && hash_location) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
 		return 1;
 	}
 
-- 
cgit v1.2.3


From 96d8b647cfff90c8ff07863866aacdcd9d13cead Mon Sep 17 00:00:00 2001
From: Alexey Korolev <akorolev@infradead.org>
Date: Tue, 29 Jul 2008 13:54:11 +0100
Subject: [MTD] [NAND] fix subpage read for small page NAND

Current implementation of subpage read feature for NAND has issues with
small page devices. Small page NAND do not support RNDOUT command.
So subpage feature is not applicable for them.

This patch disables support of subpage for small page NAND.
The code is verified on nandsim(SP NAND simulation) and on LP NAND
devices.

Thanks a lot to Artem for finding this issue.

Signed-off-by: Alexey Korolev <akorolev@infradead.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 83f678702dff..81774e5facf4 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -177,7 +177,9 @@ typedef enum {
 #define NAND_MUST_PAD(chip) (!(chip->options & NAND_NO_PADDING))
 #define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG))
 #define NAND_HAS_COPYBACK(chip) ((chip->options & NAND_COPYBACK))
-#define NAND_SUBPAGE_READ(chip) ((chip->ecc.mode == NAND_ECC_SOFT))
+/* Large page NAND with SOFT_ECC should support subpage reads */
+#define NAND_SUBPAGE_READ(chip) ((chip->ecc.mode == NAND_ECC_SOFT) \
+					&& (chip->page_shift > 9))
 
 /* Mask to zero out the chip options, which come from the id table */
 #define NAND_CHIPOPTIONS_MSK	(0x0000ffff & ~NAND_NO_AUTOINCR)
-- 
cgit v1.2.3


From 95b1bc20532c18e3f19cd460c8350350c84ffbb2 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Tue, 29 Jul 2008 22:28:12 -0700
Subject: [MTD] MTD_DEBUG always does compile-time typechecks

The current style for debug messages is to ensure they're always
parsed by the compiler and then subjected to dead code removal.
That way builds won't break only when debug options get enabled,
which is common when they are stripped out early by CPP.

This patch makes CONFIG_MTD_DEBUG adopt that convention.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 4ed40caff4e5..922636548558 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -272,7 +272,11 @@ static inline void mtd_erase_callback(struct erase_info *instr)
 			printk(KERN_INFO args);		\
 	} while(0)
 #else /* CONFIG_MTD_DEBUG */
-#define DEBUG(n, args...) do { } while(0)
+#define DEBUG(n, args...)				\
+	do {						\
+		if (0)					\
+			printk(KERN_INFO args);		\
+	} while(0)
 
 #endif /* CONFIG_MTD_DEBUG */
 
-- 
cgit v1.2.3


From 1a4e564b7db999fbe5d88318c96ac8747699d417 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Tue, 29 Jul 2008 22:32:57 -0700
Subject: resource: add resource_size()

Avoid one-off errors by introducing a resource_size() function.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ioport.h | 4 ++++
 kernel/resource.c      | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 2cd07cc29687..22d2115458c6 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -118,6 +118,10 @@ extern int allocate_resource(struct resource *root, struct resource *new,
 int adjust_resource(struct resource *res, resource_size_t start,
 		    resource_size_t size);
 resource_size_t resource_alignment(struct resource *res);
+static inline resource_size_t resource_size(struct resource *res)
+{
+	return res->end - res->start + 1;
+}
 
 /* Convenience shorthand with allocation */
 #define request_region(start,n,name)	__request_region(&ioport_resource, (start), (n), (name))
diff --git a/kernel/resource.c b/kernel/resource.c
index 74af2d7cb5a1..f5b518eabefe 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -490,7 +490,7 @@ resource_size_t resource_alignment(struct resource *res)
 {
 	switch (res->flags & (IORESOURCE_SIZEALIGN | IORESOURCE_STARTALIGN)) {
 	case IORESOURCE_SIZEALIGN:
-		return res->end - res->start + 1;
+		return resource_size(res);
 	case IORESOURCE_STARTALIGN:
 		return res->start;
 	default:
-- 
cgit v1.2.3


From a1531acd43310a7e4571d52e8846640667f4c74b Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Tue, 29 Jul 2008 22:32:58 -0700
Subject: cpufreq acpi: only call _PPC after cpufreq ACPI init funcs got called
 already

Ingo Molnar provided a fix to not call _PPC at processor driver
initialization time in "[PATCH] ACPI: fix cpufreq regression" (git
commit e4233dec749a3519069d9390561b5636a75c7579)

But it can still happen that _PPC is called at processor driver
initialization time.

This patch should make sure that this is not possible anymore.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c |  6 ++++++
 drivers/acpi/processor_perflib.c              | 15 +++++++++++++--
 drivers/cpufreq/cpufreq.c                     |  3 +++
 include/linux/cpufreq.h                       |  1 +
 4 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
index 69288f653144..3233fe84d158 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
@@ -96,6 +96,12 @@ static int pmi_notifier(struct notifier_block *nb,
 	struct cpufreq_frequency_table *cbe_freqs;
 	u8 node;
 
+	/* Should this really be called for CPUFREQ_ADJUST, CPUFREQ_INCOMPATIBLE
+	 * and CPUFREQ_NOTIFY policy events?)
+	 */
+	if (event == CPUFREQ_START)
+		return 0;
+
 	cbe_freqs = cpufreq_frequency_get_table(policy->cpu);
 	node = cbe_cpu_to_node(policy->cpu);
 
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index b4749969c6b4..e98071a64810 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -64,7 +64,13 @@ static DEFINE_MUTEX(performance_mutex);
  * policy is adjusted accordingly.
  */
 
-static unsigned int ignore_ppc = 0;
+/* ignore_ppc:
+ * -1 -> cpufreq low level drivers not initialized -> _PSS, etc. not called yet
+ *       ignore _PPC
+ *  0 -> cpufreq low level drivers initialized -> consider _PPC values
+ *  1 -> ignore _PPC totally -> forced by user through boot param
+ */
+static unsigned int ignore_ppc = -1;
 module_param(ignore_ppc, uint, 0644);
 MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \
 		 "limited by BIOS, this should help");
@@ -72,7 +78,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \
 #define PPC_REGISTERED   1
 #define PPC_IN_USE       2
 
-static int acpi_processor_ppc_status = 0;
+static int acpi_processor_ppc_status;
 
 static int acpi_processor_ppc_notifier(struct notifier_block *nb,
 				       unsigned long event, void *data)
@@ -81,6 +87,11 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
 	struct acpi_processor *pr;
 	unsigned int ppc = 0;
 
+	if (event == CPUFREQ_START && ignore_ppc <= 0) {
+		ignore_ppc = 0;
+		return 0;
+	}
+
 	if (ignore_ppc)
 		return 0;
 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 8d6a3ff02672..8a67f16987db 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -825,6 +825,9 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	policy->user_policy.min = policy->cpuinfo.min_freq;
 	policy->user_policy.max = policy->cpuinfo.max_freq;
 
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+				     CPUFREQ_START, policy);
+
 #ifdef CONFIG_SMP
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 2270ca5ec631..6fd5668aa572 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -106,6 +106,7 @@ struct cpufreq_policy {
 #define CPUFREQ_ADJUST		(0)
 #define CPUFREQ_INCOMPATIBLE	(1)
 #define CPUFREQ_NOTIFY		(2)
+#define CPUFREQ_START		(3)
 
 #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
 #define CPUFREQ_SHARED_TYPE_HW	 (1) /* HW does needed coordination */
-- 
cgit v1.2.3


From 1d1958f05095a7e9ecbba86235122784a3d1b561 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Tue, 29 Jul 2008 22:33:16 -0700
Subject: mm: remove find_max_pfn_with_active_regions

It has no user now

Also print out info about adding/removing active regions.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 -
 mm/page_alloc.c    | 17 -----------------
 2 files changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 866a3dbe5c75..5e2c8af49998 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1041,7 +1041,6 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 extern void get_pfn_range_for_nid(unsigned int nid,
 			unsigned long *start_pfn, unsigned long *end_pfn);
 extern unsigned long find_min_pfn_with_active_regions(void);
-extern unsigned long find_max_pfn_with_active_regions(void);
 extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3cf3d05b6bd4..401d104d2bb6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3753,23 +3753,6 @@ unsigned long __init find_min_pfn_with_active_regions(void)
 	return find_min_pfn_for_node(MAX_NUMNODES);
 }
 
-/**
- * find_max_pfn_with_active_regions - Find the maximum PFN registered
- *
- * It returns the maximum PFN based on information provided via
- * add_active_range().
- */
-unsigned long __init find_max_pfn_with_active_regions(void)
-{
-	int i;
-	unsigned long max_pfn = 0;
-
-	for (i = 0; i < nr_nodemap_entries; i++)
-		max_pfn = max(max_pfn, early_node_map[i].end_pfn);
-
-	return max_pfn;
-}
-
 /*
  * early_calculate_totalpages()
  * Sum pages in active regions for movable zone.
-- 
cgit v1.2.3


From 3f1712bac586069d6c891a8201457283b27e8abe Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Tue, 29 Jul 2008 22:33:32 -0700
Subject: print_ip_sym(): use %pS

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kallsyms.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 57aefa160a92..b96144887444 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -108,8 +108,7 @@ static inline void print_fn_descriptor_symbol(const char *fmt, void *addr)
 
 static inline void print_ip_sym(unsigned long ip)
 {
-	printk("[<%p>]", (void *) ip);
-	print_symbol(" %s\n", ip);
+	printk("[<%p>] %pS\n", (void *) ip, (void *) ip);
 }
 
 #endif /*_LINUX_KALLSYMS_H*/
-- 
cgit v1.2.3


From 2c203003f64de5fe55ae35712942100d270667fa Mon Sep 17 00:00:00 2001
From: Jerome Arbez-Gindre <jeromearbezgindre@gmail.com>
Date: Tue, 29 Jul 2008 22:33:33 -0700
Subject: connector: add a BlackBoard user to connector

Add a BlackBoard user to connector.  BlackBoard is part of the TSP GPL
sampling framework (http://savannah.nongnu.org/p/tsp)

[akpm@linux-foundation.org: add comment]
Signed-off-by: Jerome Arbez-Gindre <jeromearbezgindre@gmail.com>
Acked-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/connector.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/connector.h b/include/linux/connector.h
index 96a89d3d6727..5c7f9468f753 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -38,8 +38,9 @@
 #define CN_W1_VAL			0x1
 #define CN_IDX_V86D			0x4
 #define CN_VAL_V86D_UVESAFB		0x1
+#define CN_IDX_BB			0x5	/* BlackBoard, from the TSP GPL sampling framework */
 
-#define CN_NETLINK_USERS		5
+#define CN_NETLINK_USERS		6
 
 /*
  * Maximum connector's message size.
-- 
cgit v1.2.3


From 204b885e7322656284626949e51f292fe61313fa Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 29 Jul 2008 22:33:42 -0700
Subject: introduce lower_32_bits() macro

The file kernel.h contains the upper_32_bits macro.  This patch adds the
other part, the lower_32_bits macro.  Its first use will be in the driver
for AMD IOMMU.

Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fdbbf72ca2eb..aaa998f65c7a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -75,6 +75,12 @@ extern const char linux_proc_banner[];
  */
 #define upper_32_bits(n) ((u32)(((n) >> 16) >> 16))
 
+/**
+ * lower_32_bits - return bits 0-31 of a number
+ * @n: the number we're accessing
+ */
+#define lower_32_bits(n) ((u32)(n))
+
 #define	KERN_EMERG	"<0>"	/* system is unusable			*/
 #define	KERN_ALERT	"<1>"	/* action must be taken immediately	*/
 #define	KERN_CRIT	"<2>"	/* critical conditions			*/
-- 
cgit v1.2.3


From c627f9cc046c7cd93b4525d89377fb409e170a18 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Tue, 29 Jul 2008 22:33:53 -0700
Subject: mm: add zap_vma_ptes(): a library function to unmap driver ptes

zap_vma_ptes() is intended to be used by drivers to unmap ptes assigned to the
driver private vmas.  This interface is similar to zap_page_range() but is
less general & less likely to be abused.

Needed by the GRU driver.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5e2c8af49998..335288bff1b7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -744,6 +744,8 @@ struct zap_details {
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 		pte_t pte);
 
+int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
+		unsigned long size);
 unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size, struct zap_details *);
 unsigned long unmap_vmas(struct mmu_gather **tlb,
diff --git a/mm/memory.c b/mm/memory.c
index 67f0ab9077d9..6793b9c68107 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -994,6 +994,29 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 	return end;
 }
 
+/**
+ * zap_vma_ptes - remove ptes mapping the vma
+ * @vma: vm_area_struct holding ptes to be zapped
+ * @address: starting address of pages to zap
+ * @size: number of bytes to zap
+ *
+ * This function only unmaps ptes assigned to VM_PFNMAP vmas.
+ *
+ * The entire address range must be fully contained within the vma.
+ *
+ * Returns 0 if successful.
+ */
+int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
+		unsigned long size)
+{
+	if (address < vma->vm_start || address + size > vma->vm_end ||
+	    		!(vma->vm_flags & VM_PFNMAP))
+		return -1;
+	zap_page_range(vma, address, size, NULL);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(zap_vma_ptes);
+
 /*
  * Do a quick page-table lookup for a single page.
  */
-- 
cgit v1.2.3


From 3dd730f2b49f101b90d283c3efc4e6cd826dd8f6 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 29 Jul 2008 16:07:37 +1000
Subject: cpumask: statement expressions confuse some versions of gcc

when you take the address of the result.  Noticed on a sparc64 compile
using a version 3.4.5 cross compiler.

 kernel/time/tick-common.c: In function `tick_check_new_device':
 kernel/time/tick-common.c:210: error: invalid lvalue in unary `&'
 ...

Just make it a regular expression.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpumask.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 96d0509fb8d8..d3219d73f8e6 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -287,7 +287,7 @@ static inline const cpumask_t *get_cpu_mask(unsigned int cpu)
  * gcc optimizes it out (it's a constant) and there's no huge stack
  * variable created:
  */
-#define cpumask_of_cpu(cpu) ({ *get_cpu_mask(cpu); })
+#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu))
 
 
 #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
-- 
cgit v1.2.3


From 1f938d060a7bc01b5f82d46db3e38cd501b445a6 Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Mon, 21 Jul 2008 00:06:19 +0400
Subject: libata.h: replace __FUNCTION__ with __func__

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/libata.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5b247b8a6b3b..d4b8e5fa3e8b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -60,9 +60,9 @@
 
 /* note: prints function name for you */
 #ifdef ATA_DEBUG
-#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)
+#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
 #ifdef ATA_VERBOSE_DEBUG
-#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)
+#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
 #else
 #define VPRINTK(fmt, args...)
 #endif	/* ATA_VERBOSE_DEBUG */
@@ -71,7 +71,7 @@
 #define VPRINTK(fmt, args...)
 #endif	/* ATA_DEBUG */
 
-#define BPRINTK(fmt, args...) if (ap->flags & ATA_FLAG_DEBUGMSG) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)
+#define BPRINTK(fmt, args...) if (ap->flags & ATA_FLAG_DEBUGMSG) printk(KERN_ERR "%s: " fmt, __func__, ## args)
 
 /* NEW: debug levels */
 #define HAVE_LIBATA_MSG 1
-- 
cgit v1.2.3


From 963e4975c6f93c148ca809d986d412201df9af89 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 24 Jul 2008 17:16:06 +0100
Subject: pata_it821x: Driver updates and reworking

- Add support for the RDC 1010 variant
- Rework the core library to have a read_id method. This allows the hacky
  bits of it821x to go and prepares us for pata_hd
- Switch from WARN to BUG in ata_id_string as it will reboot if you get
  it wrong so WARN won't be seen
- Allow the issue of command 0xFC on the 821x. This is needed to query
  rebuild status.
- Tidy up printk formatting
- Do more ident rewriting on RAID volumes to handle firmware provided
  ident data which is rather wonky
- Report the firmware revision and device layout in RAID mode
- Don't try and disable raid on the 8211 or RDC - they don't have the
  relevant bits

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c |  31 +++++-
 drivers/ata/pata_it821x.c | 270 ++++++++++++++++++++++++++++++++++++++++------
 include/linux/libata.h    |   3 +
 3 files changed, 265 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index f69d1548b562..5ba96c5052c8 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1132,6 +1132,8 @@ void ata_id_string(const u16 *id, unsigned char *s,
 {
 	unsigned int c;
 
+	BUG_ON(len & 1);
+
 	while (len > 0) {
 		c = id[ofs] >> 8;
 		*s = c;
@@ -1165,8 +1167,6 @@ void ata_id_c_string(const u16 *id, unsigned char *s,
 {
 	unsigned char *p;
 
-	WARN_ON(!(len & 1));
-
 	ata_id_string(id, s, ofs, len - 1);
 
 	p = s + strnlen(s, len - 1);
@@ -1885,6 +1885,23 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev)
 	return 3 << ATA_SHIFT_PIO;
 }
 
+/**
+ *	ata_do_dev_read_id		-	default ID read method
+ *	@dev: device
+ *	@tf: proposed taskfile
+ *	@id: data buffer
+ *
+ *	Issue the identify taskfile and hand back the buffer containing
+ *	identify data. For some RAID controllers and for pre ATA devices
+ *	this function is wrapped or replaced by the driver
+ */
+unsigned int ata_do_dev_read_id(struct ata_device *dev,
+					struct ata_taskfile *tf, u16 *id)
+{
+	return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
+				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
+}
+
 /**
  *	ata_dev_read_id - Read ID data from the specified device
  *	@dev: target device
@@ -1920,7 +1937,7 @@ int ata_dev_read_id(struct ata_device *dev, unsigned int *p_class,
 	if (ata_msg_ctl(ap))
 		ata_dev_printk(dev, KERN_DEBUG, "%s: ENTER\n", __func__);
 
- retry:
+retry:
 	ata_tf_init(dev, &tf);
 
 	switch (class) {
@@ -1948,8 +1965,11 @@ int ata_dev_read_id(struct ata_device *dev, unsigned int *p_class,
 	 */
 	tf.flags |= ATA_TFLAG_POLLING;
 
-	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE,
-				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
+	if (ap->ops->read_id)
+		err_mask = ap->ops->read_id(dev, &tf, id);
+	else
+		err_mask = ata_do_dev_read_id(dev, &tf, id);
+
 	if (err_mask) {
 		if (err_mask & AC_ERR_NODEV_HINT) {
 			ata_dev_printk(dev, KERN_DEBUG,
@@ -6283,6 +6303,7 @@ EXPORT_SYMBOL_GPL(ata_host_resume);
 #endif /* CONFIG_PM */
 EXPORT_SYMBOL_GPL(ata_id_string);
 EXPORT_SYMBOL_GPL(ata_id_c_string);
+EXPORT_SYMBOL_GPL(ata_do_dev_read_id);
 EXPORT_SYMBOL_GPL(ata_scsi_simulate);
 
 EXPORT_SYMBOL_GPL(ata_pio_need_iordy);
diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index e10816931b2f..27843c70eb9d 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c
@@ -80,7 +80,7 @@
 
 
 #define DRV_NAME "pata_it821x"
-#define DRV_VERSION "0.3.8"
+#define DRV_VERSION "0.4.0"
 
 struct it821x_dev
 {
@@ -425,6 +425,8 @@ static unsigned int it821x_smart_qc_issue(struct ata_queued_cmd *qc)
 		case ATA_CMD_WRITE_MULTI:
 		case ATA_CMD_WRITE_MULTI_EXT:
 		case ATA_CMD_ID_ATA:
+		case ATA_CMD_INIT_DEV_PARAMS:
+		case 0xFC:	/* Internal 'report rebuild state' */
 		/* Arguably should just no-op this one */
 		case ATA_CMD_SET_FEATURES:
 			return ata_sff_qc_issue(qc);
@@ -509,7 +511,7 @@ static void it821x_dev_config(struct ata_device *adev)
 
 	if (strstr(model_num, "Integrated Technology Express")) {
 		/* RAID mode */
-		printk(KERN_INFO "IT821x %sRAID%d volume",
+		ata_dev_printk(adev, KERN_INFO, "%sRAID%d volume",
 			adev->id[147]?"Bootable ":"",
 			adev->id[129]);
 		if (adev->id[129] != 1)
@@ -519,37 +521,51 @@ static void it821x_dev_config(struct ata_device *adev)
 	/* This is a controller firmware triggered funny, don't
 	   report the drive faulty! */
 	adev->horkage &= ~ATA_HORKAGE_DIAGNOSTIC;
+	/* No HPA in 'smart' mode */
+	adev->horkage |= ATA_HORKAGE_BROKEN_HPA;
 }
 
 /**
- *	it821x_ident_hack	-	Hack identify data up
- *	@ap: Port
+ *	it821x_read_id	-	Hack identify data up
+ *	@adev: device to read
+ *	@tf: proposed taskfile
+ *	@id: buffer for returned ident data
  *
- *	Walk the devices on this firmware driven port and slightly
+ *	Query the devices on this firmware driven port and slightly
  *	mash the identify data to stop us and common tools trying to
  *	use features not firmware supported. The firmware itself does
  *	some masking (eg SMART) but not enough.
- *
- *	This is a bit of an abuse of the cable method, but it is the
- *	only method called at the right time. We could modify the libata
- *	core specifically for ident hacking but while we have one offender
- *	it seems better to keep the fallout localised.
  */
 
-static int it821x_ident_hack(struct ata_port *ap)
+static unsigned int it821x_read_id(struct ata_device *adev,
+					struct ata_taskfile *tf, u16 *id)
 {
-	struct ata_device *adev;
-	ata_link_for_each_dev(adev, &ap->link) {
-		if (ata_dev_enabled(adev)) {
-			adev->id[84] &= ~(1 << 6);	/* No FUA */
-			adev->id[85] &= ~(1 << 10);	/* No HPA */
-			adev->id[76] = 0;		/* No NCQ/AN etc */
-		}
+	unsigned int err_mask;
+	unsigned char model_num[ATA_ID_PROD_LEN + 1];
+
+	err_mask = ata_do_dev_read_id(adev, tf, id);
+	if (err_mask)
+		return err_mask;
+	ata_id_c_string(id, model_num, ATA_ID_PROD, sizeof(model_num));
+
+	id[83] &= ~(1 << 12);	/* Cache flush is firmware handled */
+	id[83] &= ~(1 << 13);	/* Ditto for LBA48 flushes */
+	id[84] &= ~(1 << 6);	/* No FUA */
+	id[85] &= ~(1 << 10);	/* No HPA */
+	id[76] = 0;		/* No NCQ/AN etc */
+
+	if (strstr(model_num, "Integrated Technology Express")) {
+		/* Set feature bits the firmware neglects */
+		id[49] |= 0x0300;	/* LBA, DMA */
+		id[82] |= 0x0400;	/* LBA48 */
+		id[83] &= 0x7FFF;
+		id[83] |= 0x4000;	/* Word 83 is valid */
+		id[86] |= 0x0400;	/* LBA48 on */
+		id[ATA_ID_MAJOR_VER] |= 0x1F;
 	}
-	return ata_cable_unknown(ap);
+	return err_mask;
 }
 
-
 /**
  *	it821x_check_atapi_dma	-	ATAPI DMA handler
  *	@qc: Command we are about to issue
@@ -577,6 +593,136 @@ static int it821x_check_atapi_dma(struct ata_queued_cmd *qc)
 	return 0;
 }
 
+/**
+ *	it821x_display_disk	-	display disk setup
+ *	@n: Device number
+ *	@buf: Buffer block from firmware
+ *
+ *	Produce a nice informative display of the device setup as provided
+ *	by the firmware.
+ */
+
+static void it821x_display_disk(int n, u8 *buf)
+{
+	unsigned char id[41];
+	int mode = 0;
+	char *mtype;
+	char mbuf[8];
+	char *cbl = "(40 wire cable)";
+
+	static const char *types[5] = {
+		"RAID0", "RAID1" "RAID 0+1", "JBOD", "DISK"
+	};
+
+	if (buf[52] > 4)	/* No Disk */
+		return;
+
+	ata_id_c_string((u16 *)buf, id, 0, 41); 
+
+	if (buf[51]) {
+		mode = ffs(buf[51]);
+		mtype = "UDMA";
+	} else if (buf[49]) {
+		mode = ffs(buf[49]);
+		mtype = "MWDMA";
+	}
+
+	if (buf[76])
+		cbl = "";
+
+	if (mode)
+		snprintf(mbuf, 8, "%5s%d", mtype, mode - 1);
+	else
+		strcpy(mbuf, "PIO");
+	if (buf[52] == 4)
+		printk(KERN_INFO "%d: %-6s %-8s          %s %s\n",
+				n, mbuf, types[buf[52]], id, cbl);
+	else
+		printk(KERN_INFO "%d: %-6s %-8s Volume: %1d %s %s\n",
+				n, mbuf, types[buf[52]], buf[53], id, cbl);
+	if (buf[125] < 100)
+		printk(KERN_INFO "%d: Rebuilding: %d%%\n", n, buf[125]);
+}
+
+/**
+ *	it821x_firmware_command		-	issue firmware command
+ *	@ap: IT821x port to interrogate
+ *	@cmd: command
+ *	@len: length
+ *
+ *	Issue firmware commands expecting data back from the controller. We
+ *	use this to issue commands that do not go via the normal paths. Other
+ *	commands such as 0xFC can be issued normally.
+ */
+
+static u8 *it821x_firmware_command(struct ata_port *ap, u8 cmd, int len)
+{
+	u8 status;
+	int n = 0;
+	u16 *buf = kmalloc(len, GFP_KERNEL);
+	if (buf == NULL) {
+		printk(KERN_ERR "it821x_firmware_command: Out of memory\n");
+		return NULL;
+	}
+	/* This isn't quite a normal ATA command as we are talking to the
+	   firmware not the drives */
+	ap->ctl |= ATA_NIEN;
+	iowrite8(ap->ctl, ap->ioaddr.ctl_addr);
+	ata_wait_idle(ap);
+	iowrite8(ATA_DEVICE_OBS, ap->ioaddr.device_addr);
+	iowrite8(cmd, ap->ioaddr.command_addr);
+	udelay(1);
+	/* This should be almost immediate but a little paranoia goes a long
+	   way. */
+	while(n++ < 10) {
+		status = ioread8(ap->ioaddr.status_addr);
+		if (status & ATA_ERR) {
+			kfree(buf);
+			printk(KERN_ERR "it821x_firmware_command: rejected\n");
+			return NULL;
+		}
+		if (status & ATA_DRQ) {
+			ioread16_rep(ap->ioaddr.data_addr, buf, len/2);
+			return (u8 *)buf;
+		}
+		mdelay(1);
+	}
+	kfree(buf);
+	printk(KERN_ERR "it821x_firmware_command: timeout\n");
+	return NULL;
+}
+
+/**
+ *	it821x_probe_firmware	-	firmware reporting/setup
+ *	@ap: IT821x port being probed
+ *
+ *	Probe the firmware of the controller by issuing firmware command
+ *	0xFA and analysing the returned data.
+ */
+
+static void it821x_probe_firmware(struct ata_port *ap)
+{
+	u8 *buf;
+	int i;
+
+	/* This is a bit ugly as we can't just issue a task file to a device
+	   as this is controller magic */
+
+	buf = it821x_firmware_command(ap, 0xFA, 512);
+
+	if (buf != NULL) {
+		printk(KERN_INFO "pata_it821x: Firmware %02X/%02X/%02X%02X\n",
+				buf[505],
+				buf[506],
+				buf[507],
+				buf[508]);
+		for (i = 0; i < 4; i++)
+ 			it821x_display_disk(i, buf + 128 * i);
+		kfree(buf);
+	}
+}
+
+
 
 /**
  *	it821x_port_start	-	port setup
@@ -610,6 +756,8 @@ static int it821x_port_start(struct ata_port *ap)
 		/* Long I/O's although allowed in LBA48 space cause the
 		   onboard firmware to enter the twighlight zone */
 		/* No ATAPI DMA in this mode either */
+		if (ap->port_no == 0)
+			it821x_probe_firmware(ap);
 	}
 	/* Pull the current clocks from 0x50 */
 	if (conf & (1 << (1 + ap->port_no)))
@@ -631,6 +779,25 @@ static int it821x_port_start(struct ata_port *ap)
 	return 0;
 }
 
+/**
+ *	it821x_rdc_cable	-	Cable detect for RDC1010
+ *	@ap: port we are checking
+ *
+ *	Return the RDC1010 cable type. Unlike the IT821x we know how to do
+ *	this and can do host side cable detect
+ */
+
+static int it821x_rdc_cable(struct ata_port *ap)
+{
+	u16 r40;
+	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+
+	pci_read_config_word(pdev, 0x40, &r40);
+	if (r40 & (1 << (2 + ap->port_no)))
+		return ATA_CBL_PATA40;
+	return ATA_CBL_PATA80;
+}
+
 static struct scsi_host_template it821x_sht = {
 	ATA_BMDMA_SHT(DRV_NAME),
 };
@@ -641,9 +808,10 @@ static struct ata_port_operations it821x_smart_port_ops = {
 	.check_atapi_dma= it821x_check_atapi_dma,
 	.qc_issue	= it821x_smart_qc_issue,
 
-	.cable_detect	= it821x_ident_hack,
+	.cable_detect	= ata_cable_80wire,
 	.set_mode	= it821x_smart_set_mode,
 	.dev_config	= it821x_dev_config,
+	.read_id	= it821x_read_id,
 
 	.port_start	= it821x_port_start,
 };
@@ -664,8 +832,29 @@ static struct ata_port_operations it821x_passthru_port_ops = {
 	.port_start	= it821x_port_start,
 };
 
+static struct ata_port_operations it821x_rdc_port_ops = {
+	.inherits	= &ata_bmdma_port_ops,
+
+	.check_atapi_dma= it821x_check_atapi_dma,
+	.sff_dev_select	= it821x_passthru_dev_select,
+	.bmdma_start 	= it821x_passthru_bmdma_start,
+	.bmdma_stop	= it821x_passthru_bmdma_stop,
+	.qc_issue	= it821x_passthru_qc_issue,
+
+	.cable_detect	= it821x_rdc_cable,
+	.set_piomode	= it821x_passthru_set_piomode,
+	.set_dmamode	= it821x_passthru_set_dmamode,
+
+	.port_start	= it821x_port_start,
+};
+
 static void it821x_disable_raid(struct pci_dev *pdev)
 {
+	/* Neither the RDC nor the IT8211 */
+	if (pdev->vendor != PCI_VENDOR_ID_ITE ||
+			pdev->device != PCI_DEVICE_ID_ITE_8212)
+			return;
+
 	/* Reset local CPU, and set BIOS not ready */
 	pci_write_config_byte(pdev, 0x5E, 0x01);
 
@@ -690,6 +879,7 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		.flags = ATA_FLAG_SLAVE_POSS,
 		.pio_mask = 0x1f,
 		.mwdma_mask = 0x07,
+		.udma_mask = ATA_UDMA6,
 		.port_ops = &it821x_smart_port_ops
 	};
 	static const struct ata_port_info info_passthru = {
@@ -699,6 +889,13 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		.udma_mask = ATA_UDMA6,
 		.port_ops = &it821x_passthru_port_ops
 	};
+	static const struct ata_port_info info_rdc = {
+		.flags = ATA_FLAG_SLAVE_POSS,
+		.pio_mask = 0x1f,
+		.mwdma_mask = 0x07,
+		/* No UDMA */
+		.port_ops = &it821x_rdc_port_ops
+	};
 
 	const struct ata_port_info *ppi[] = { NULL, NULL };
 	static char *mode[2] = { "pass through", "smart" };
@@ -707,21 +904,25 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	rc = pcim_enable_device(pdev);
 	if (rc)
 		return rc;
+		
+	if (pdev->vendor == PCI_VENDOR_ID_RDC) {
+		ppi[0] = &info_rdc;
+	} else {
+		/* Force the card into bypass mode if so requested */
+		if (it8212_noraid) {
+			printk(KERN_INFO DRV_NAME ": forcing bypass mode.\n");
+			it821x_disable_raid(pdev);
+		}
+		pci_read_config_byte(pdev, 0x50, &conf);
+		conf &= 1;
 
-	/* Force the card into bypass mode if so requested */
-	if (it8212_noraid) {
-		printk(KERN_INFO DRV_NAME ": forcing bypass mode.\n");
-		it821x_disable_raid(pdev);
+		printk(KERN_INFO DRV_NAME": controller in %s mode.\n",
+								mode[conf]);
+		if (conf == 0)
+			ppi[0] = &info_passthru;
+		else
+			ppi[0] = &info_smart;
 	}
-	pci_read_config_byte(pdev, 0x50, &conf);
-	conf &= 1;
-
-	printk(KERN_INFO DRV_NAME ": controller in %s mode.\n", mode[conf]);
-	if (conf == 0)
-		ppi[0] = &info_passthru;
-	else
-		ppi[0] = &info_smart;
-
 	return ata_pci_sff_init_one(pdev, ppi, &it821x_sht, NULL);
 }
 
@@ -745,6 +946,7 @@ static int it821x_reinit_one(struct pci_dev *pdev)
 static const struct pci_device_id it821x[] = {
 	{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8211), },
 	{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8212), },
+	{ PCI_VDEVICE(RDC, 0x1010), },
 
 	{ },
 };
diff --git a/include/linux/libata.h b/include/linux/libata.h
index d4b8e5fa3e8b..06b80337303b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -750,6 +750,7 @@ struct ata_port_operations {
 	void (*set_piomode)(struct ata_port *ap, struct ata_device *dev);
 	void (*set_dmamode)(struct ata_port *ap, struct ata_device *dev);
 	int  (*set_mode)(struct ata_link *link, struct ata_device **r_failed_dev);
+	unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf, u16 *id);
 
 	void (*dev_config)(struct ata_device *dev);
 
@@ -951,6 +952,8 @@ extern void ata_id_string(const u16 *id, unsigned char *s,
 			  unsigned int ofs, unsigned int len);
 extern void ata_id_c_string(const u16 *id, unsigned char *s,
 			    unsigned int ofs, unsigned int len);
+extern unsigned int ata_do_dev_read_id(struct ata_device *dev,
+					struct ata_taskfile *tf, u16 *id);
 extern void ata_qc_complete(struct ata_queued_cmd *qc);
 extern int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active);
 extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
-- 
cgit v1.2.3


From ae375044d31075a31de5a839e07ded7f67b660aa Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 31 Jul 2008 00:38:01 -0700
Subject: netfilter: nf_conntrack_tcp: decrease timeouts while data in
 unacknowledged

In order to time out dead connections quicker, keep track of outstanding data
and cap the timeout.

Suggested by Herbert Xu.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nf_conntrack_tcp.h |  3 +++
 net/netfilter/nf_conntrack_proto_tcp.c     | 29 ++++++++++++++++++++++++-----
 2 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index 22ce29995f13..a049df4f2236 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -30,6 +30,9 @@ enum tcp_conntrack {
 /* Be liberal in window checking */
 #define IP_CT_TCP_FLAG_BE_LIBERAL		0x08
 
+/* Has unacknowledged data */
+#define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED	0x10
+
 struct nf_ct_tcp_flags {
 	u_int8_t flags;
 	u_int8_t mask;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 420a10d8eb1e..6f61261888ef 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -67,7 +67,8 @@ static const char *const tcp_conntrack_names[] = {
 /* RFC1122 says the R2 limit should be at least 100 seconds.
    Linux uses 15 packets as limit, which corresponds
    to ~13-30min depending on RTO. */
-static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
+static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly    =   5 MINS;
+static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly =   5 MINS;
 
 static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
 	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
@@ -625,8 +626,10 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		swin = win + (sack - ack);
 		if (sender->td_maxwin < swin)
 			sender->td_maxwin = swin;
-		if (after(end, sender->td_end))
+		if (after(end, sender->td_end)) {
 			sender->td_end = end;
+			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+		}
 		/*
 		 * Update receiver data.
 		 */
@@ -637,6 +640,8 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			if (win == 0)
 				receiver->td_maxend++;
 		}
+		if (ack == receiver->td_end)
+			receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 
 		/*
 		 * Check retransmissions.
@@ -951,9 +956,16 @@ static int tcp_packet(struct nf_conn *ct,
 	if (old_state != new_state
 	    && new_state == TCP_CONNTRACK_FIN_WAIT)
 		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
-	timeout = ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans
-		  && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
-		  ? nf_ct_tcp_timeout_max_retrans : tcp_timeouts[new_state];
+
+	if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans &&
+	    tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans)
+		timeout = nf_ct_tcp_timeout_max_retrans;
+	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
+		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
+		 tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged)
+		timeout = nf_ct_tcp_timeout_unacknowledged;
+	else
+		timeout = tcp_timeouts[new_state];
 	write_unlock_bh(&tcp_lock);
 
 	nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
@@ -1235,6 +1247,13 @@ static struct ctl_table tcp_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
 	},
+	{
+		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
+		.data		= &nf_ct_tcp_timeout_unacknowledged,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_jiffies,
+	},
 	{
 		.ctl_name	= NET_NF_CONNTRACK_TCP_LOOSE,
 		.procname	= "nf_conntrack_tcp_loose",
-- 
cgit v1.2.3


From dacdd0e04768da1fd2b24a6ee274c582b40d0c5b Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 17 Jul 2008 16:54:19 -0700
Subject: [PATCH] configfs: Include linux/err.h in linux/configfs.h

We now use PTR_ERR() in the ->make_item() and ->make_group() operations.
Folks including configfs.h need err.h.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/configfs/dir.c        | 2 +-
 include/linux/configfs.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 179589be063a..2495f23e33f4 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1094,7 +1094,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	kfree(name);
 	if (ret) {
 		/*
-		 * If item == NULL, then link_obj() was never called.
+		 * If ret != 0, then link_obj() was never called.
 		 * There are no extra references to clean up.
 		 */
 		goto out_put;
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index d62c19ff041c..0a5491baf0bc 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -40,6 +40,7 @@
 #include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
+#include <linux/err.h>
 
 #include <asm/atomic.h>
 
-- 
cgit v1.2.3


From ecb3d28c7edd58b54f16838c434b342ba9195bec Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 18 Jun 2008 19:29:05 -0700
Subject: [PATCH] configfs: Convenience macros for attribute definition.

Sysfs has the _ATTR() and _ATTR_RO() macros to make defining extended
form attributes easier.  configfs should have something similiar.

- _CONFIGFS_ATTR() and _CONFIGFS_ATTR_RO() are the counterparts to the
  sysfs macros.
- CONFIGFS_ATTR_STRUCT() creates the extended form attribute structure.
- CONFIGFS_ATTR_OPS() defines the show_attribute()/store_attribute()
  operations that call the show()/store() operations of the extended
  form configfs_attributes.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 Documentation/filesystems/configfs/configfs.txt    |  17 +-
 .../filesystems/configfs/configfs_example.c        | 485 ---------------------
 .../configfs/configfs_example_explicit.c           | 485 +++++++++++++++++++++
 .../filesystems/configfs/configfs_example_macros.c | 448 +++++++++++++++++++
 include/linux/configfs.h                           |  67 ++-
 5 files changed, 1012 insertions(+), 490 deletions(-)
 delete mode 100644 Documentation/filesystems/configfs/configfs_example.c
 create mode 100644 Documentation/filesystems/configfs/configfs_example_explicit.c
 create mode 100644 Documentation/filesystems/configfs/configfs_example_macros.c

(limited to 'include/linux')

diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt
index 44c97e6accb2..fabcb0e00f25 100644
--- a/Documentation/filesystems/configfs/configfs.txt
+++ b/Documentation/filesystems/configfs/configfs.txt
@@ -311,9 +311,20 @@ the subsystem must be ready for it.
 [An Example]
 
 The best example of these basic concepts is the simple_children
-subsystem/group and the simple_child item in configfs_example.c  It
-shows a trivial object displaying and storing an attribute, and a simple
-group creating and destroying these children.
+subsystem/group and the simple_child item in configfs_example_explicit.c
+and configfs_example_macros.c.  It shows a trivial object displaying and
+storing an attribute, and a simple group creating and destroying these
+children.
+
+The only difference between configfs_example_explicit.c and
+configfs_example_macros.c is how the attributes of the childless item
+are defined.  The childless item has extended attributes, each with
+their own show()/store() operation.  This follows a convention commonly
+used in sysfs.  configfs_example_explicit.c creates these attributes
+by explicitly defining the structures involved.  Conversely
+configfs_example_macros.c uses some convenience macros from configfs.h
+to define the attributes.  These macros are similar to their sysfs
+counterparts.
 
 [Hierarchy Navigation and the Subsystem Mutex]
 
diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c
deleted file mode 100644
index 039648791701..000000000000
--- a/Documentation/filesystems/configfs/configfs_example.c
+++ /dev/null
@@ -1,485 +0,0 @@
-/*
- * vim: noexpandtab ts=8 sts=0 sw=8:
- *
- * configfs_example.c - This file is a demonstration module containing
- *      a number of configfs subsystems.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Based on sysfs:
- * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
- *
- * configfs Copyright (C) 2005 Oracle.  All rights reserved.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#include <linux/configfs.h>
-
-
-
-/*
- * 01-childless
- *
- * This first example is a childless subsystem.  It cannot create
- * any config_items.  It just has attributes.
- *
- * Note that we are enclosing the configfs_subsystem inside a container.
- * This is not necessary if a subsystem has no attributes directly
- * on the subsystem.  See the next example, 02-simple-children, for
- * such a subsystem.
- */
-
-struct childless {
-	struct configfs_subsystem subsys;
-	int showme;
-	int storeme;
-};
-
-struct childless_attribute {
-	struct configfs_attribute attr;
-	ssize_t (*show)(struct childless *, char *);
-	ssize_t (*store)(struct childless *, const char *, size_t);
-};
-
-static inline struct childless *to_childless(struct config_item *item)
-{
-	return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
-}
-
-static ssize_t childless_showme_read(struct childless *childless,
-				     char *page)
-{
-	ssize_t pos;
-
-	pos = sprintf(page, "%d\n", childless->showme);
-	childless->showme++;
-
-	return pos;
-}
-
-static ssize_t childless_storeme_read(struct childless *childless,
-				      char *page)
-{
-	return sprintf(page, "%d\n", childless->storeme);
-}
-
-static ssize_t childless_storeme_write(struct childless *childless,
-				       const char *page,
-				       size_t count)
-{
-	unsigned long tmp;
-	char *p = (char *) page;
-
-	tmp = simple_strtoul(p, &p, 10);
-	if (!p || (*p && (*p != '\n')))
-		return -EINVAL;
-
-	if (tmp > INT_MAX)
-		return -ERANGE;
-
-	childless->storeme = tmp;
-
-	return count;
-}
-
-static ssize_t childless_description_read(struct childless *childless,
-					  char *page)
-{
-	return sprintf(page,
-"[01-childless]\n"
-"\n"
-"The childless subsystem is the simplest possible subsystem in\n"
-"configfs.  It does not support the creation of child config_items.\n"
-"It only has a few attributes.  In fact, it isn't much different\n"
-"than a directory in /proc.\n");
-}
-
-static struct childless_attribute childless_attr_showme = {
-	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO },
-	.show	= childless_showme_read,
-};
-static struct childless_attribute childless_attr_storeme = {
-	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR },
-	.show	= childless_storeme_read,
-	.store	= childless_storeme_write,
-};
-static struct childless_attribute childless_attr_description = {
-	.attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO },
-	.show = childless_description_read,
-};
-
-static struct configfs_attribute *childless_attrs[] = {
-	&childless_attr_showme.attr,
-	&childless_attr_storeme.attr,
-	&childless_attr_description.attr,
-	NULL,
-};
-
-static ssize_t childless_attr_show(struct config_item *item,
-				   struct configfs_attribute *attr,
-				   char *page)
-{
-	struct childless *childless = to_childless(item);
-	struct childless_attribute *childless_attr =
-		container_of(attr, struct childless_attribute, attr);
-	ssize_t ret = 0;
-
-	if (childless_attr->show)
-		ret = childless_attr->show(childless, page);
-	return ret;
-}
-
-static ssize_t childless_attr_store(struct config_item *item,
-				    struct configfs_attribute *attr,
-				    const char *page, size_t count)
-{
-	struct childless *childless = to_childless(item);
-	struct childless_attribute *childless_attr =
-		container_of(attr, struct childless_attribute, attr);
-	ssize_t ret = -EINVAL;
-
-	if (childless_attr->store)
-		ret = childless_attr->store(childless, page, count);
-	return ret;
-}
-
-static struct configfs_item_operations childless_item_ops = {
-	.show_attribute		= childless_attr_show,
-	.store_attribute	= childless_attr_store,
-};
-
-static struct config_item_type childless_type = {
-	.ct_item_ops	= &childless_item_ops,
-	.ct_attrs	= childless_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct childless childless_subsys = {
-	.subsys = {
-		.su_group = {
-			.cg_item = {
-				.ci_namebuf = "01-childless",
-				.ci_type = &childless_type,
-			},
-		},
-	},
-};
-
-
-/* ----------------------------------------------------------------- */
-
-/*
- * 02-simple-children
- *
- * This example merely has a simple one-attribute child.  Note that
- * there is no extra attribute structure, as the child's attribute is
- * known from the get-go.  Also, there is no container for the
- * subsystem, as it has no attributes of its own.
- */
-
-struct simple_child {
-	struct config_item item;
-	int storeme;
-};
-
-static inline struct simple_child *to_simple_child(struct config_item *item)
-{
-	return item ? container_of(item, struct simple_child, item) : NULL;
-}
-
-static struct configfs_attribute simple_child_attr_storeme = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "storeme",
-	.ca_mode = S_IRUGO | S_IWUSR,
-};
-
-static struct configfs_attribute *simple_child_attrs[] = {
-	&simple_child_attr_storeme,
-	NULL,
-};
-
-static ssize_t simple_child_attr_show(struct config_item *item,
-				      struct configfs_attribute *attr,
-				      char *page)
-{
-	ssize_t count;
-	struct simple_child *simple_child = to_simple_child(item);
-
-	count = sprintf(page, "%d\n", simple_child->storeme);
-
-	return count;
-}
-
-static ssize_t simple_child_attr_store(struct config_item *item,
-				       struct configfs_attribute *attr,
-				       const char *page, size_t count)
-{
-	struct simple_child *simple_child = to_simple_child(item);
-	unsigned long tmp;
-	char *p = (char *) page;
-
-	tmp = simple_strtoul(p, &p, 10);
-	if (!p || (*p && (*p != '\n')))
-		return -EINVAL;
-
-	if (tmp > INT_MAX)
-		return -ERANGE;
-
-	simple_child->storeme = tmp;
-
-	return count;
-}
-
-static void simple_child_release(struct config_item *item)
-{
-	kfree(to_simple_child(item));
-}
-
-static struct configfs_item_operations simple_child_item_ops = {
-	.release		= simple_child_release,
-	.show_attribute		= simple_child_attr_show,
-	.store_attribute	= simple_child_attr_store,
-};
-
-static struct config_item_type simple_child_type = {
-	.ct_item_ops	= &simple_child_item_ops,
-	.ct_attrs	= simple_child_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-
-struct simple_children {
-	struct config_group group;
-};
-
-static inline struct simple_children *to_simple_children(struct config_item *item)
-{
-	return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
-}
-
-static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
-{
-	struct simple_child *simple_child;
-
-	simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
-	if (!simple_child)
-		return ERR_PTR(-ENOMEM);
-
-
-	config_item_init_type_name(&simple_child->item, name,
-				   &simple_child_type);
-
-	simple_child->storeme = 0;
-
-	return &simple_child->item;
-}
-
-static struct configfs_attribute simple_children_attr_description = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "description",
-	.ca_mode = S_IRUGO,
-};
-
-static struct configfs_attribute *simple_children_attrs[] = {
-	&simple_children_attr_description,
-	NULL,
-};
-
-static ssize_t simple_children_attr_show(struct config_item *item,
-			   		 struct configfs_attribute *attr,
-			   		 char *page)
-{
-	return sprintf(page,
-"[02-simple-children]\n"
-"\n"
-"This subsystem allows the creation of child config_items.  These\n"
-"items have only one attribute that is readable and writeable.\n");
-}
-
-static void simple_children_release(struct config_item *item)
-{
-	kfree(to_simple_children(item));
-}
-
-static struct configfs_item_operations simple_children_item_ops = {
-	.release 	= simple_children_release,
-	.show_attribute	= simple_children_attr_show,
-};
-
-/*
- * Note that, since no extra work is required on ->drop_item(),
- * no ->drop_item() is provided.
- */
-static struct configfs_group_operations simple_children_group_ops = {
-	.make_item	= simple_children_make_item,
-};
-
-static struct config_item_type simple_children_type = {
-	.ct_item_ops	= &simple_children_item_ops,
-	.ct_group_ops	= &simple_children_group_ops,
-	.ct_attrs	= simple_children_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct configfs_subsystem simple_children_subsys = {
-	.su_group = {
-		.cg_item = {
-			.ci_namebuf = "02-simple-children",
-			.ci_type = &simple_children_type,
-		},
-	},
-};
-
-
-/* ----------------------------------------------------------------- */
-
-/*
- * 03-group-children
- *
- * This example reuses the simple_children group from above.  However,
- * the simple_children group is not the subsystem itself, it is a
- * child of the subsystem.  Creation of a group in the subsystem creates
- * a new simple_children group.  That group can then have simple_child
- * children of its own.
- */
-
-static struct config_group *group_children_make_group(struct config_group *group, const char *name)
-{
-	struct simple_children *simple_children;
-
-	simple_children = kzalloc(sizeof(struct simple_children),
-				  GFP_KERNEL);
-	if (!simple_children)
-		return ERR_PTR(-ENOMEM);
-
-
-	config_group_init_type_name(&simple_children->group, name,
-				    &simple_children_type);
-
-	return &simple_children->group;
-}
-
-static struct configfs_attribute group_children_attr_description = {
-	.ca_owner = THIS_MODULE,
-	.ca_name = "description",
-	.ca_mode = S_IRUGO,
-};
-
-static struct configfs_attribute *group_children_attrs[] = {
-	&group_children_attr_description,
-	NULL,
-};
-
-static ssize_t group_children_attr_show(struct config_item *item,
-			   		struct configfs_attribute *attr,
-			   		char *page)
-{
-	return sprintf(page,
-"[03-group-children]\n"
-"\n"
-"This subsystem allows the creation of child config_groups.  These\n"
-"groups are like the subsystem simple-children.\n");
-}
-
-static struct configfs_item_operations group_children_item_ops = {
-	.show_attribute	= group_children_attr_show,
-};
-
-/*
- * Note that, since no extra work is required on ->drop_item(),
- * no ->drop_item() is provided.
- */
-static struct configfs_group_operations group_children_group_ops = {
-	.make_group	= group_children_make_group,
-};
-
-static struct config_item_type group_children_type = {
-	.ct_item_ops	= &group_children_item_ops,
-	.ct_group_ops	= &group_children_group_ops,
-	.ct_attrs	= group_children_attrs,
-	.ct_owner	= THIS_MODULE,
-};
-
-static struct configfs_subsystem group_children_subsys = {
-	.su_group = {
-		.cg_item = {
-			.ci_namebuf = "03-group-children",
-			.ci_type = &group_children_type,
-		},
-	},
-};
-
-/* ----------------------------------------------------------------- */
-
-/*
- * We're now done with our subsystem definitions.
- * For convenience in this module, here's a list of them all.  It
- * allows the init function to easily register them.  Most modules
- * will only have one subsystem, and will only call register_subsystem
- * on it directly.
- */
-static struct configfs_subsystem *example_subsys[] = {
-	&childless_subsys.subsys,
-	&simple_children_subsys,
-	&group_children_subsys,
-	NULL,
-};
-
-static int __init configfs_example_init(void)
-{
-	int ret;
-	int i;
-	struct configfs_subsystem *subsys;
-
-	for (i = 0; example_subsys[i]; i++) {
-		subsys = example_subsys[i];
-
-		config_group_init(&subsys->su_group);
-		mutex_init(&subsys->su_mutex);
-		ret = configfs_register_subsystem(subsys);
-		if (ret) {
-			printk(KERN_ERR "Error %d while registering subsystem %s\n",
-			       ret,
-			       subsys->su_group.cg_item.ci_namebuf);
-			goto out_unregister;
-		}
-	}
-
-	return 0;
-
-out_unregister:
-	for (; i >= 0; i--) {
-		configfs_unregister_subsystem(example_subsys[i]);
-	}
-
-	return ret;
-}
-
-static void __exit configfs_example_exit(void)
-{
-	int i;
-
-	for (i = 0; example_subsys[i]; i++) {
-		configfs_unregister_subsystem(example_subsys[i]);
-	}
-}
-
-module_init(configfs_example_init);
-module_exit(configfs_example_exit);
-MODULE_LICENSE("GPL");
diff --git a/Documentation/filesystems/configfs/configfs_example_explicit.c b/Documentation/filesystems/configfs/configfs_example_explicit.c
new file mode 100644
index 000000000000..d428cc9f07f3
--- /dev/null
+++ b/Documentation/filesystems/configfs/configfs_example_explicit.c
@@ -0,0 +1,485 @@
+/*
+ * vim: noexpandtab ts=8 sts=0 sw=8:
+ *
+ * configfs_example_explicit.c - This file is a demonstration module
+ *      containing a number of configfs subsystems.  It explicitly defines
+ *      each structure without using the helper macros defined in
+ *      configfs.h.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+
+
+/*
+ * 01-childless
+ *
+ * This first example is a childless subsystem.  It cannot create
+ * any config_items.  It just has attributes.
+ *
+ * Note that we are enclosing the configfs_subsystem inside a container.
+ * This is not necessary if a subsystem has no attributes directly
+ * on the subsystem.  See the next example, 02-simple-children, for
+ * such a subsystem.
+ */
+
+struct childless {
+	struct configfs_subsystem subsys;
+	int showme;
+	int storeme;
+};
+
+struct childless_attribute {
+	struct configfs_attribute attr;
+	ssize_t (*show)(struct childless *, char *);
+	ssize_t (*store)(struct childless *, const char *, size_t);
+};
+
+static inline struct childless *to_childless(struct config_item *item)
+{
+	return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
+}
+
+static ssize_t childless_showme_read(struct childless *childless,
+				     char *page)
+{
+	ssize_t pos;
+
+	pos = sprintf(page, "%d\n", childless->showme);
+	childless->showme++;
+
+	return pos;
+}
+
+static ssize_t childless_storeme_read(struct childless *childless,
+				      char *page)
+{
+	return sprintf(page, "%d\n", childless->storeme);
+}
+
+static ssize_t childless_storeme_write(struct childless *childless,
+				       const char *page,
+				       size_t count)
+{
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	childless->storeme = tmp;
+
+	return count;
+}
+
+static ssize_t childless_description_read(struct childless *childless,
+					  char *page)
+{
+	return sprintf(page,
+"[01-childless]\n"
+"\n"
+"The childless subsystem is the simplest possible subsystem in\n"
+"configfs.  It does not support the creation of child config_items.\n"
+"It only has a few attributes.  In fact, it isn't much different\n"
+"than a directory in /proc.\n");
+}
+
+static struct childless_attribute childless_attr_showme = {
+	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO },
+	.show	= childless_showme_read,
+};
+static struct childless_attribute childless_attr_storeme = {
+	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR },
+	.show	= childless_storeme_read,
+	.store	= childless_storeme_write,
+};
+static struct childless_attribute childless_attr_description = {
+	.attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO },
+	.show = childless_description_read,
+};
+
+static struct configfs_attribute *childless_attrs[] = {
+	&childless_attr_showme.attr,
+	&childless_attr_storeme.attr,
+	&childless_attr_description.attr,
+	NULL,
+};
+
+static ssize_t childless_attr_show(struct config_item *item,
+				   struct configfs_attribute *attr,
+				   char *page)
+{
+	struct childless *childless = to_childless(item);
+	struct childless_attribute *childless_attr =
+		container_of(attr, struct childless_attribute, attr);
+	ssize_t ret = 0;
+
+	if (childless_attr->show)
+		ret = childless_attr->show(childless, page);
+	return ret;
+}
+
+static ssize_t childless_attr_store(struct config_item *item,
+				    struct configfs_attribute *attr,
+				    const char *page, size_t count)
+{
+	struct childless *childless = to_childless(item);
+	struct childless_attribute *childless_attr =
+		container_of(attr, struct childless_attribute, attr);
+	ssize_t ret = -EINVAL;
+
+	if (childless_attr->store)
+		ret = childless_attr->store(childless, page, count);
+	return ret;
+}
+
+static struct configfs_item_operations childless_item_ops = {
+	.show_attribute		= childless_attr_show,
+	.store_attribute	= childless_attr_store,
+};
+
+static struct config_item_type childless_type = {
+	.ct_item_ops	= &childless_item_ops,
+	.ct_attrs	= childless_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct childless childless_subsys = {
+	.subsys = {
+		.su_group = {
+			.cg_item = {
+				.ci_namebuf = "01-childless",
+				.ci_type = &childless_type,
+			},
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 02-simple-children
+ *
+ * This example merely has a simple one-attribute child.  Note that
+ * there is no extra attribute structure, as the child's attribute is
+ * known from the get-go.  Also, there is no container for the
+ * subsystem, as it has no attributes of its own.
+ */
+
+struct simple_child {
+	struct config_item item;
+	int storeme;
+};
+
+static inline struct simple_child *to_simple_child(struct config_item *item)
+{
+	return item ? container_of(item, struct simple_child, item) : NULL;
+}
+
+static struct configfs_attribute simple_child_attr_storeme = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "storeme",
+	.ca_mode = S_IRUGO | S_IWUSR,
+};
+
+static struct configfs_attribute *simple_child_attrs[] = {
+	&simple_child_attr_storeme,
+	NULL,
+};
+
+static ssize_t simple_child_attr_show(struct config_item *item,
+				      struct configfs_attribute *attr,
+				      char *page)
+{
+	ssize_t count;
+	struct simple_child *simple_child = to_simple_child(item);
+
+	count = sprintf(page, "%d\n", simple_child->storeme);
+
+	return count;
+}
+
+static ssize_t simple_child_attr_store(struct config_item *item,
+				       struct configfs_attribute *attr,
+				       const char *page, size_t count)
+{
+	struct simple_child *simple_child = to_simple_child(item);
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	simple_child->storeme = tmp;
+
+	return count;
+}
+
+static void simple_child_release(struct config_item *item)
+{
+	kfree(to_simple_child(item));
+}
+
+static struct configfs_item_operations simple_child_item_ops = {
+	.release		= simple_child_release,
+	.show_attribute		= simple_child_attr_show,
+	.store_attribute	= simple_child_attr_store,
+};
+
+static struct config_item_type simple_child_type = {
+	.ct_item_ops	= &simple_child_item_ops,
+	.ct_attrs	= simple_child_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+
+struct simple_children {
+	struct config_group group;
+};
+
+static inline struct simple_children *to_simple_children(struct config_item *item)
+{
+	return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
+}
+
+static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
+{
+	struct simple_child *simple_child;
+
+	simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
+	if (!simple_child)
+		return ERR_PTR(-ENOMEM);
+
+	config_item_init_type_name(&simple_child->item, name,
+				   &simple_child_type);
+
+	simple_child->storeme = 0;
+
+	return &simple_child->item;
+}
+
+static struct configfs_attribute simple_children_attr_description = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "description",
+	.ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *simple_children_attrs[] = {
+	&simple_children_attr_description,
+	NULL,
+};
+
+static ssize_t simple_children_attr_show(struct config_item *item,
+					 struct configfs_attribute *attr,
+					 char *page)
+{
+	return sprintf(page,
+"[02-simple-children]\n"
+"\n"
+"This subsystem allows the creation of child config_items.  These\n"
+"items have only one attribute that is readable and writeable.\n");
+}
+
+static void simple_children_release(struct config_item *item)
+{
+	kfree(to_simple_children(item));
+}
+
+static struct configfs_item_operations simple_children_item_ops = {
+	.release	= simple_children_release,
+	.show_attribute	= simple_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations simple_children_group_ops = {
+	.make_item	= simple_children_make_item,
+};
+
+static struct config_item_type simple_children_type = {
+	.ct_item_ops	= &simple_children_item_ops,
+	.ct_group_ops	= &simple_children_group_ops,
+	.ct_attrs	= simple_children_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem simple_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "02-simple-children",
+			.ci_type = &simple_children_type,
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 03-group-children
+ *
+ * This example reuses the simple_children group from above.  However,
+ * the simple_children group is not the subsystem itself, it is a
+ * child of the subsystem.  Creation of a group in the subsystem creates
+ * a new simple_children group.  That group can then have simple_child
+ * children of its own.
+ */
+
+static struct config_group *group_children_make_group(struct config_group *group, const char *name)
+{
+	struct simple_children *simple_children;
+
+	simple_children = kzalloc(sizeof(struct simple_children),
+				  GFP_KERNEL);
+	if (!simple_children)
+		return ERR_PTR(-ENOMEM);
+
+	config_group_init_type_name(&simple_children->group, name,
+				    &simple_children_type);
+
+	return &simple_children->group;
+}
+
+static struct configfs_attribute group_children_attr_description = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "description",
+	.ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *group_children_attrs[] = {
+	&group_children_attr_description,
+	NULL,
+};
+
+static ssize_t group_children_attr_show(struct config_item *item,
+					struct configfs_attribute *attr,
+					char *page)
+{
+	return sprintf(page,
+"[03-group-children]\n"
+"\n"
+"This subsystem allows the creation of child config_groups.  These\n"
+"groups are like the subsystem simple-children.\n");
+}
+
+static struct configfs_item_operations group_children_item_ops = {
+	.show_attribute	= group_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations group_children_group_ops = {
+	.make_group	= group_children_make_group,
+};
+
+static struct config_item_type group_children_type = {
+	.ct_item_ops	= &group_children_item_ops,
+	.ct_group_ops	= &group_children_group_ops,
+	.ct_attrs	= group_children_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem group_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "03-group-children",
+			.ci_type = &group_children_type,
+		},
+	},
+};
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * We're now done with our subsystem definitions.
+ * For convenience in this module, here's a list of them all.  It
+ * allows the init function to easily register them.  Most modules
+ * will only have one subsystem, and will only call register_subsystem
+ * on it directly.
+ */
+static struct configfs_subsystem *example_subsys[] = {
+	&childless_subsys.subsys,
+	&simple_children_subsys,
+	&group_children_subsys,
+	NULL,
+};
+
+static int __init configfs_example_init(void)
+{
+	int ret;
+	int i;
+	struct configfs_subsystem *subsys;
+
+	for (i = 0; example_subsys[i]; i++) {
+		subsys = example_subsys[i];
+
+		config_group_init(&subsys->su_group);
+		mutex_init(&subsys->su_mutex);
+		ret = configfs_register_subsystem(subsys);
+		if (ret) {
+			printk(KERN_ERR "Error %d while registering subsystem %s\n",
+			       ret,
+			       subsys->su_group.cg_item.ci_namebuf);
+			goto out_unregister;
+		}
+	}
+
+	return 0;
+
+out_unregister:
+	for (; i >= 0; i--) {
+		configfs_unregister_subsystem(example_subsys[i]);
+	}
+
+	return ret;
+}
+
+static void __exit configfs_example_exit(void)
+{
+	int i;
+
+	for (i = 0; example_subsys[i]; i++) {
+		configfs_unregister_subsystem(example_subsys[i]);
+	}
+}
+
+module_init(configfs_example_init);
+module_exit(configfs_example_exit);
+MODULE_LICENSE("GPL");
diff --git a/Documentation/filesystems/configfs/configfs_example_macros.c b/Documentation/filesystems/configfs/configfs_example_macros.c
new file mode 100644
index 000000000000..d8e30a0378aa
--- /dev/null
+++ b/Documentation/filesystems/configfs/configfs_example_macros.c
@@ -0,0 +1,448 @@
+/*
+ * vim: noexpandtab ts=8 sts=0 sw=8:
+ *
+ * configfs_example_macros.c - This file is a demonstration module
+ *      containing a number of configfs subsystems.  It uses the helper
+ *      macros defined by configfs.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+
+
+/*
+ * 01-childless
+ *
+ * This first example is a childless subsystem.  It cannot create
+ * any config_items.  It just has attributes.
+ *
+ * Note that we are enclosing the configfs_subsystem inside a container.
+ * This is not necessary if a subsystem has no attributes directly
+ * on the subsystem.  See the next example, 02-simple-children, for
+ * such a subsystem.
+ */
+
+struct childless {
+	struct configfs_subsystem subsys;
+	int showme;
+	int storeme;
+};
+
+static inline struct childless *to_childless(struct config_item *item)
+{
+	return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
+}
+
+CONFIGFS_ATTR_STRUCT(childless);
+#define CHILDLESS_ATTR(_name, _mode, _show, _store)	\
+struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR(_name, _mode, _show, _store)
+#define CHILDLESS_ATTR_RO(_name, _show)	\
+struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR_RO(_name, _show);
+
+static ssize_t childless_showme_read(struct childless *childless,
+				     char *page)
+{
+	ssize_t pos;
+
+	pos = sprintf(page, "%d\n", childless->showme);
+	childless->showme++;
+
+	return pos;
+}
+
+static ssize_t childless_storeme_read(struct childless *childless,
+				      char *page)
+{
+	return sprintf(page, "%d\n", childless->storeme);
+}
+
+static ssize_t childless_storeme_write(struct childless *childless,
+				       const char *page,
+				       size_t count)
+{
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	childless->storeme = tmp;
+
+	return count;
+}
+
+static ssize_t childless_description_read(struct childless *childless,
+					  char *page)
+{
+	return sprintf(page,
+"[01-childless]\n"
+"\n"
+"The childless subsystem is the simplest possible subsystem in\n"
+"configfs.  It does not support the creation of child config_items.\n"
+"It only has a few attributes.  In fact, it isn't much different\n"
+"than a directory in /proc.\n");
+}
+
+CHILDLESS_ATTR_RO(showme, childless_showme_read);
+CHILDLESS_ATTR(storeme, S_IRUGO | S_IWUSR, childless_storeme_read,
+	       childless_storeme_write);
+CHILDLESS_ATTR_RO(description, childless_description_read);
+
+static struct configfs_attribute *childless_attrs[] = {
+	&childless_attr_showme.attr,
+	&childless_attr_storeme.attr,
+	&childless_attr_description.attr,
+	NULL,
+};
+
+CONFIGFS_ATTR_OPS(childless);
+static struct configfs_item_operations childless_item_ops = {
+	.show_attribute		= childless_attr_show,
+	.store_attribute	= childless_attr_store,
+};
+
+static struct config_item_type childless_type = {
+	.ct_item_ops	= &childless_item_ops,
+	.ct_attrs	= childless_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct childless childless_subsys = {
+	.subsys = {
+		.su_group = {
+			.cg_item = {
+				.ci_namebuf = "01-childless",
+				.ci_type = &childless_type,
+			},
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 02-simple-children
+ *
+ * This example merely has a simple one-attribute child.  Note that
+ * there is no extra attribute structure, as the child's attribute is
+ * known from the get-go.  Also, there is no container for the
+ * subsystem, as it has no attributes of its own.
+ */
+
+struct simple_child {
+	struct config_item item;
+	int storeme;
+};
+
+static inline struct simple_child *to_simple_child(struct config_item *item)
+{
+	return item ? container_of(item, struct simple_child, item) : NULL;
+}
+
+static struct configfs_attribute simple_child_attr_storeme = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "storeme",
+	.ca_mode = S_IRUGO | S_IWUSR,
+};
+
+static struct configfs_attribute *simple_child_attrs[] = {
+	&simple_child_attr_storeme,
+	NULL,
+};
+
+static ssize_t simple_child_attr_show(struct config_item *item,
+				      struct configfs_attribute *attr,
+				      char *page)
+{
+	ssize_t count;
+	struct simple_child *simple_child = to_simple_child(item);
+
+	count = sprintf(page, "%d\n", simple_child->storeme);
+
+	return count;
+}
+
+static ssize_t simple_child_attr_store(struct config_item *item,
+				       struct configfs_attribute *attr,
+				       const char *page, size_t count)
+{
+	struct simple_child *simple_child = to_simple_child(item);
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	simple_child->storeme = tmp;
+
+	return count;
+}
+
+static void simple_child_release(struct config_item *item)
+{
+	kfree(to_simple_child(item));
+}
+
+static struct configfs_item_operations simple_child_item_ops = {
+	.release		= simple_child_release,
+	.show_attribute		= simple_child_attr_show,
+	.store_attribute	= simple_child_attr_store,
+};
+
+static struct config_item_type simple_child_type = {
+	.ct_item_ops	= &simple_child_item_ops,
+	.ct_attrs	= simple_child_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+
+struct simple_children {
+	struct config_group group;
+};
+
+static inline struct simple_children *to_simple_children(struct config_item *item)
+{
+	return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
+}
+
+static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
+{
+	struct simple_child *simple_child;
+
+	simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
+	if (!simple_child)
+		return ERR_PTR(-ENOMEM);
+
+	config_item_init_type_name(&simple_child->item, name,
+				   &simple_child_type);
+
+	simple_child->storeme = 0;
+
+	return &simple_child->item;
+}
+
+static struct configfs_attribute simple_children_attr_description = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "description",
+	.ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *simple_children_attrs[] = {
+	&simple_children_attr_description,
+	NULL,
+};
+
+static ssize_t simple_children_attr_show(struct config_item *item,
+					 struct configfs_attribute *attr,
+					 char *page)
+{
+	return sprintf(page,
+"[02-simple-children]\n"
+"\n"
+"This subsystem allows the creation of child config_items.  These\n"
+"items have only one attribute that is readable and writeable.\n");
+}
+
+static void simple_children_release(struct config_item *item)
+{
+	kfree(to_simple_children(item));
+}
+
+static struct configfs_item_operations simple_children_item_ops = {
+	.release	= simple_children_release,
+	.show_attribute	= simple_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations simple_children_group_ops = {
+	.make_item	= simple_children_make_item,
+};
+
+static struct config_item_type simple_children_type = {
+	.ct_item_ops	= &simple_children_item_ops,
+	.ct_group_ops	= &simple_children_group_ops,
+	.ct_attrs	= simple_children_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem simple_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "02-simple-children",
+			.ci_type = &simple_children_type,
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 03-group-children
+ *
+ * This example reuses the simple_children group from above.  However,
+ * the simple_children group is not the subsystem itself, it is a
+ * child of the subsystem.  Creation of a group in the subsystem creates
+ * a new simple_children group.  That group can then have simple_child
+ * children of its own.
+ */
+
+static struct config_group *group_children_make_group(struct config_group *group, const char *name)
+{
+	struct simple_children *simple_children;
+
+	simple_children = kzalloc(sizeof(struct simple_children),
+				  GFP_KERNEL);
+	if (!simple_children)
+		return ERR_PTR(-ENOMEM);
+
+	config_group_init_type_name(&simple_children->group, name,
+				    &simple_children_type);
+
+	return &simple_children->group;
+}
+
+static struct configfs_attribute group_children_attr_description = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "description",
+	.ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *group_children_attrs[] = {
+	&group_children_attr_description,
+	NULL,
+};
+
+static ssize_t group_children_attr_show(struct config_item *item,
+					struct configfs_attribute *attr,
+					char *page)
+{
+	return sprintf(page,
+"[03-group-children]\n"
+"\n"
+"This subsystem allows the creation of child config_groups.  These\n"
+"groups are like the subsystem simple-children.\n");
+}
+
+static struct configfs_item_operations group_children_item_ops = {
+	.show_attribute	= group_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations group_children_group_ops = {
+	.make_group	= group_children_make_group,
+};
+
+static struct config_item_type group_children_type = {
+	.ct_item_ops	= &group_children_item_ops,
+	.ct_group_ops	= &group_children_group_ops,
+	.ct_attrs	= group_children_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct configfs_subsystem group_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "03-group-children",
+			.ci_type = &group_children_type,
+		},
+	},
+};
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * We're now done with our subsystem definitions.
+ * For convenience in this module, here's a list of them all.  It
+ * allows the init function to easily register them.  Most modules
+ * will only have one subsystem, and will only call register_subsystem
+ * on it directly.
+ */
+static struct configfs_subsystem *example_subsys[] = {
+	&childless_subsys.subsys,
+	&simple_children_subsys,
+	&group_children_subsys,
+	NULL,
+};
+
+static int __init configfs_example_init(void)
+{
+	int ret;
+	int i;
+	struct configfs_subsystem *subsys;
+
+	for (i = 0; example_subsys[i]; i++) {
+		subsys = example_subsys[i];
+
+		config_group_init(&subsys->su_group);
+		mutex_init(&subsys->su_mutex);
+		ret = configfs_register_subsystem(subsys);
+		if (ret) {
+			printk(KERN_ERR "Error %d while registering subsystem %s\n",
+			       ret,
+			       subsys->su_group.cg_item.ci_namebuf);
+			goto out_unregister;
+		}
+	}
+
+	return 0;
+
+out_unregister:
+	for (; i >= 0; i--) {
+		configfs_unregister_subsystem(example_subsys[i]);
+	}
+
+	return ret;
+}
+
+static void __exit configfs_example_exit(void)
+{
+	int i;
+
+	for (i = 0; example_subsys[i]; i++) {
+		configfs_unregister_subsystem(example_subsys[i]);
+	}
+}
+
+module_init(configfs_example_init);
+module_exit(configfs_example_exit);
+MODULE_LICENSE("GPL");
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 0a5491baf0bc..7f627775c947 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -130,8 +130,25 @@ struct configfs_attribute {
 /*
  * Users often need to create attribute structures for their configurable
  * attributes, containing a configfs_attribute member and function pointers
- * for the show() and store() operations on that attribute. They can use
- * this macro (similar to sysfs' __ATTR) to make defining attributes easier.
+ * for the show() and store() operations on that attribute. If they don't
+ * need anything else on the extended attribute structure, they can use
+ * this macro to define it  The argument _item is the name of the
+ * config_item structure.
+ */
+#define CONFIGFS_ATTR_STRUCT(_item)					\
+struct _item##_attribute {						\
+	struct configfs_attribute attr;					\
+	ssize_t (*show)(struct _item *, char *);			\
+	ssize_t (*store)(struct _item *, const char *, size_t);		\
+}
+
+/*
+ * With the extended attribute structure, users can use this macro
+ * (similar to sysfs' __ATTR) to make defining attributes easier.
+ * An example:
+ * #define MYITEM_ATTR(_name, _mode, _show, _store)	\
+ * struct myitem_attribute childless_attr_##_name =	\
+ *         __CONFIGFS_ATTR(_name, _mode, _show, _store)
  */
 #define __CONFIGFS_ATTR(_name, _mode, _show, _store)			\
 {									\
@@ -143,6 +160,52 @@ struct configfs_attribute {
 	.show	= _show,						\
 	.store	= _store,						\
 }
+/* Here is a readonly version, only requiring a show() operation */
+#define __CONFIGFS_ATTR_RO(_name, _show)				\
+{									\
+	.attr	= {							\
+			.ca_name = __stringify(_name),			\
+			.ca_mode = 0444,				\
+			.ca_owner = THIS_MODULE,			\
+	},								\
+	.show	= _show,						\
+}
+
+/*
+ * With these extended attributes, the simple show_attribute() and
+ * store_attribute() operations need to call the show() and store() of the
+ * attributes.  This is a common pattern, so we provide a macro to define
+ * them.  The argument _item is the name of the config_item structure.
+ * This macro expects the attributes to be named "struct <name>_attribute"
+ * and the function to_<name>() to exist;
+ */
+#define CONFIGFS_ATTR_OPS(_item)					\
+static ssize_t _item##_attr_show(struct config_item *item,		\
+				 struct configfs_attribute *attr,	\
+				 char *page)				\
+{									\
+	struct _item *_item = to_##_item(item);				\
+	struct _item##_attribute *_item##_attr =			\
+		container_of(attr, struct _item##_attribute, attr);	\
+	ssize_t ret = 0;						\
+									\
+	if (_item##_attr->show)						\
+		ret = _item##_attr->show(_item, page);			\
+	return ret;							\
+}									\
+static ssize_t _item##_attr_store(struct config_item *item,		\
+				  struct configfs_attribute *attr,	\
+				  const char *page, size_t count)	\
+{									\
+	struct _item *_item = to_##_item(item);				\
+	struct _item##_attribute *_item##_attr =			\
+		container_of(attr, struct _item##_attribute, attr);	\
+	ssize_t ret = -EINVAL;						\
+									\
+	if (_item##_attr->store)					\
+		ret = _item##_attr->store(_item, page, count);		\
+	return ret;							\
+}
 
 /*
  * If allow_link() exists, the item can symlink(2) out to other
-- 
cgit v1.2.3


From c3f26a269c2421f97f10cf8ed05d5099b573af4d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 31 Jul 2008 16:58:50 -0700
Subject: netdev: Fix lockdep warnings in multiqueue configurations.

When support for multiple TX queues were added, the
netif_tx_lock() routines we converted to iterate over
all TX queues and grab each queue's spinlock.

This causes heartburn for lockdep and it's not a healthy
thing to do with lots of TX queues anyways.

So modify this to use a top-level lock and a "frozen"
state for the individual TX queues.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ifb.c         | 12 ++++---
 include/linux/netdevice.h | 86 ++++++++++++++++++++++++++++++-----------------
 net/core/dev.c            |  1 +
 net/core/netpoll.c        |  1 +
 net/core/pktgen.c         |  7 ++--
 net/sched/sch_generic.c   |  6 ++--
 net/sched/sch_teql.c      |  9 ++---
 7 files changed, 78 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 0960e69b2da4..e4fbefc8c82f 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -69,18 +69,20 @@ static void ri_tasklet(unsigned long dev)
 	struct net_device *_dev = (struct net_device *)dev;
 	struct ifb_private *dp = netdev_priv(_dev);
 	struct net_device_stats *stats = &_dev->stats;
+	struct netdev_queue *txq;
 	struct sk_buff *skb;
 
+	txq = netdev_get_tx_queue(_dev, 0);
 	dp->st_task_enter++;
 	if ((skb = skb_peek(&dp->tq)) == NULL) {
 		dp->st_txq_refl_try++;
-		if (netif_tx_trylock(_dev)) {
+		if (__netif_tx_trylock(txq)) {
 			dp->st_rxq_enter++;
 			while ((skb = skb_dequeue(&dp->rq)) != NULL) {
 				skb_queue_tail(&dp->tq, skb);
 				dp->st_rx2tx_tran++;
 			}
-			netif_tx_unlock(_dev);
+			__netif_tx_unlock(txq);
 		} else {
 			/* reschedule */
 			dp->st_rxq_notenter++;
@@ -115,7 +117,7 @@ static void ri_tasklet(unsigned long dev)
 			BUG();
 	}
 
-	if (netif_tx_trylock(_dev)) {
+	if (__netif_tx_trylock(txq)) {
 		dp->st_rxq_check++;
 		if ((skb = skb_peek(&dp->rq)) == NULL) {
 			dp->tasklet_pending = 0;
@@ -123,10 +125,10 @@ static void ri_tasklet(unsigned long dev)
 				netif_wake_queue(_dev);
 		} else {
 			dp->st_rxq_rsch++;
-			netif_tx_unlock(_dev);
+			__netif_tx_unlock(txq);
 			goto resched;
 		}
-		netif_tx_unlock(_dev);
+		__netif_tx_unlock(txq);
 	} else {
 resched:
 		dp->tasklet_pending = 1;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b4d056ceab96..ee583f642a9f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -440,6 +440,7 @@ static inline void napi_synchronize(const struct napi_struct *n)
 enum netdev_queue_state_t
 {
 	__QUEUE_STATE_XOFF,
+	__QUEUE_STATE_FROZEN,
 };
 
 struct netdev_queue {
@@ -636,7 +637,7 @@ struct net_device
 	unsigned int		real_num_tx_queues;
 
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
-
+	spinlock_t		tx_global_lock;
 /*
  * One part is mostly used on xmit path (device)
  */
@@ -1099,6 +1100,11 @@ static inline int netif_queue_stopped(const struct net_device *dev)
 	return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
 }
 
+static inline int netif_tx_queue_frozen(const struct netdev_queue *dev_queue)
+{
+	return test_bit(__QUEUE_STATE_FROZEN, &dev_queue->state);
+}
+
 /**
  *	netif_running - test if up
  *	@dev: network device
@@ -1475,6 +1481,26 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
 	txq->xmit_lock_owner = smp_processor_id();
 }
 
+static inline int __netif_tx_trylock(struct netdev_queue *txq)
+{
+	int ok = spin_trylock(&txq->_xmit_lock);
+	if (likely(ok))
+		txq->xmit_lock_owner = smp_processor_id();
+	return ok;
+}
+
+static inline void __netif_tx_unlock(struct netdev_queue *txq)
+{
+	txq->xmit_lock_owner = -1;
+	spin_unlock(&txq->_xmit_lock);
+}
+
+static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
+{
+	txq->xmit_lock_owner = -1;
+	spin_unlock_bh(&txq->_xmit_lock);
+}
+
 /**
  *	netif_tx_lock - grab network device transmit lock
  *	@dev: network device
@@ -1484,12 +1510,23 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
  */
 static inline void netif_tx_lock(struct net_device *dev)
 {
-	int cpu = smp_processor_id();
 	unsigned int i;
+	int cpu;
 
+	spin_lock(&dev->tx_global_lock);
+	cpu = smp_processor_id();
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+		/* We are the only thread of execution doing a
+		 * freeze, but we have to grab the _xmit_lock in
+		 * order to synchronize with threads which are in
+		 * the ->hard_start_xmit() handler and already
+		 * checked the frozen bit.
+		 */
 		__netif_tx_lock(txq, cpu);
+		set_bit(__QUEUE_STATE_FROZEN, &txq->state);
+		__netif_tx_unlock(txq);
 	}
 }
 
@@ -1499,40 +1536,22 @@ static inline void netif_tx_lock_bh(struct net_device *dev)
 	netif_tx_lock(dev);
 }
 
-static inline int __netif_tx_trylock(struct netdev_queue *txq)
-{
-	int ok = spin_trylock(&txq->_xmit_lock);
-	if (likely(ok))
-		txq->xmit_lock_owner = smp_processor_id();
-	return ok;
-}
-
-static inline int netif_tx_trylock(struct net_device *dev)
-{
-	return __netif_tx_trylock(netdev_get_tx_queue(dev, 0));
-}
-
-static inline void __netif_tx_unlock(struct netdev_queue *txq)
-{
-	txq->xmit_lock_owner = -1;
-	spin_unlock(&txq->_xmit_lock);
-}
-
-static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
-{
-	txq->xmit_lock_owner = -1;
-	spin_unlock_bh(&txq->_xmit_lock);
-}
-
 static inline void netif_tx_unlock(struct net_device *dev)
 {
 	unsigned int i;
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		__netif_tx_unlock(txq);
-	}
 
+		/* No need to grab the _xmit_lock here.  If the
+		 * queue is not stopped for another reason, we
+		 * force a schedule.
+		 */
+		clear_bit(__QUEUE_STATE_FROZEN, &txq->state);
+		if (!test_bit(__QUEUE_STATE_XOFF, &txq->state))
+			__netif_schedule(txq->qdisc);
+	}
+	spin_unlock(&dev->tx_global_lock);
 }
 
 static inline void netif_tx_unlock_bh(struct net_device *dev)
@@ -1556,13 +1575,18 @@ static inline void netif_tx_unlock_bh(struct net_device *dev)
 static inline void netif_tx_disable(struct net_device *dev)
 {
 	unsigned int i;
+	int cpu;
 
-	netif_tx_lock_bh(dev);
+	local_bh_disable();
+	cpu = smp_processor_id();
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+		__netif_tx_lock(txq, cpu);
 		netif_tx_stop_queue(txq);
+		__netif_tx_unlock(txq);
 	}
-	netif_tx_unlock_bh(dev);
+	local_bh_enable();
 }
 
 static inline void netif_addr_lock(struct net_device *dev)
diff --git a/net/core/dev.c b/net/core/dev.c
index 63d6bcddbf46..69320a56a084 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4200,6 +4200,7 @@ static void netdev_init_queues(struct net_device *dev)
 {
 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+	spin_lock_init(&dev->tx_global_lock);
 }
 
 /**
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c12720895ecf..6c7af390be0a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -70,6 +70,7 @@ static void queue_process(struct work_struct *work)
 		local_irq_save(flags);
 		__netif_tx_lock(txq, smp_processor_id());
 		if (netif_tx_queue_stopped(txq) ||
+		    netif_tx_queue_frozen(txq) ||
 		    dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
 			skb_queue_head(&npinfo->txq, skb);
 			__netif_tx_unlock(txq);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index c7d484f7e1c4..3284605f2ec7 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3305,6 +3305,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 	txq = netdev_get_tx_queue(odev, queue_map);
 	if (netif_tx_queue_stopped(txq) ||
+	    netif_tx_queue_frozen(txq) ||
 	    need_resched()) {
 		idle_start = getCurUs();
 
@@ -3320,7 +3321,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 		pkt_dev->idle_acc += getCurUs() - idle_start;
 
-		if (netif_tx_queue_stopped(txq)) {
+		if (netif_tx_queue_stopped(txq) ||
+		    netif_tx_queue_frozen(txq)) {
 			pkt_dev->next_tx_us = getCurUs();	/* TODO */
 			pkt_dev->next_tx_ns = 0;
 			goto out;	/* Try the next interface */
@@ -3352,7 +3354,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
 	txq = netdev_get_tx_queue(odev, queue_map);
 
 	__netif_tx_lock_bh(txq);
-	if (!netif_tx_queue_stopped(txq)) {
+	if (!netif_tx_queue_stopped(txq) &&
+	    !netif_tx_queue_frozen(txq)) {
 
 		atomic_inc(&(pkt_dev->skb->users));
 	      retry_now:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 345838a2e369..9c9cd4d94890 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -135,7 +135,8 @@ static inline int qdisc_restart(struct Qdisc *q)
 	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_subqueue_stopped(dev, skb))
+	if (!netif_tx_queue_stopped(txq) &&
+	    !netif_tx_queue_frozen(txq))
 		ret = dev_hard_start_xmit(skb, dev, txq);
 	HARD_TX_UNLOCK(dev, txq);
 
@@ -162,7 +163,8 @@ static inline int qdisc_restart(struct Qdisc *q)
 		break;
 	}
 
-	if (ret && netif_tx_queue_stopped(txq))
+	if (ret && (netif_tx_queue_stopped(txq) ||
+		    netif_tx_queue_frozen(txq)))
 		ret = 0;
 
 	return ret;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 537223642b6e..2c35c678563b 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -305,10 +305,11 @@ restart:
 
 		switch (teql_resolve(skb, skb_res, slave)) {
 		case 0:
-			if (netif_tx_trylock(slave)) {
-				if (!__netif_subqueue_stopped(slave, subq) &&
+			if (__netif_tx_trylock(slave_txq)) {
+				if (!netif_tx_queue_stopped(slave_txq) &&
+				    !netif_tx_queue_frozen(slave_txq) &&
 				    slave->hard_start_xmit(skb, slave) == 0) {
-					netif_tx_unlock(slave);
+					__netif_tx_unlock(slave_txq);
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
 					master->stats.tx_packets++;
@@ -316,7 +317,7 @@ restart:
 						qdisc_pkt_len(skb);
 					return 0;
 				}
-				netif_tx_unlock(slave);
+				__netif_tx_unlock(slave_txq);
 			}
 			if (netif_queue_stopped(dev))
 				busy = 1;
-- 
cgit v1.2.3


From bc4768eb081a67642c0c44c34ea597c273bdedcb Mon Sep 17 00:00:00 2001
From: Julius Volz <juliusv@google.com>
Date: Thu, 31 Jul 2008 20:45:24 -0700
Subject: ipvs: Move userspace definitions to include/linux/ip_vs.h

Current versions of ipvsadm include "/usr/src/linux/include/net/ip_vs.h"
directly. This file also contains kernel-only definitions. Normally, public
definitions should live in include/linux, so this patch moves the
definitions shared with userspace to a new file, "include/linux/ip_vs.h".

This also removes the unused NFC_IPVS_PROPERTY bitmask, which was once
used to point into skb->nfcache.

To make old ipvsadms still compile with this, the old header file includes
the new one.

Thanks to Dave Miller and Horms for noting/adding the missing Kbuild entry
for the new header file.

Signed-off-by: Julius Volz <juliusv@google.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild  |   1 +
 include/linux/ip_vs.h | 245 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/net/ip_vs.h   | 253 ++------------------------------------------------
 3 files changed, 254 insertions(+), 245 deletions(-)
 create mode 100644 include/linux/ip_vs.h

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 4c4142c5aa6e..a26f565e8189 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -97,6 +97,7 @@ header-y += ioctl.h
 header-y += ip6_tunnel.h
 header-y += ipmi_msgdefs.h
 header-y += ipsec.h
+header-y += ip_vs.h
 header-y += ipx.h
 header-y += irda.h
 header-y += iso_fs.h
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
new file mode 100644
index 000000000000..ec6eb49af2d8
--- /dev/null
+++ b/include/linux/ip_vs.h
@@ -0,0 +1,245 @@
+/*
+ *      IP Virtual Server
+ *      data structure and functionality definitions
+ */
+
+#ifndef _IP_VS_H
+#define _IP_VS_H
+
+#include <linux/types.h>	/* For __beXX types in userland */
+
+#define IP_VS_VERSION_CODE	0x010201
+#define NVERSION(version)			\
+	(version >> 16) & 0xFF,			\
+	(version >> 8) & 0xFF,			\
+	version & 0xFF
+
+/*
+ *      Virtual Service Flags
+ */
+#define IP_VS_SVC_F_PERSISTENT	0x0001		/* persistent port */
+#define IP_VS_SVC_F_HASHED	0x0002		/* hashed entry */
+
+/*
+ *      Destination Server Flags
+ */
+#define IP_VS_DEST_F_AVAILABLE	0x0001		/* server is available */
+#define IP_VS_DEST_F_OVERLOAD	0x0002		/* server is overloaded */
+
+/*
+ *      IPVS sync daemon states
+ */
+#define IP_VS_STATE_NONE	0x0000		/* daemon is stopped */
+#define IP_VS_STATE_MASTER	0x0001		/* started as master */
+#define IP_VS_STATE_BACKUP	0x0002		/* started as backup */
+
+/*
+ *      IPVS socket options
+ */
+#define IP_VS_BASE_CTL		(64+1024+64)		/* base */
+
+#define IP_VS_SO_SET_NONE	IP_VS_BASE_CTL		/* just peek */
+#define IP_VS_SO_SET_INSERT	(IP_VS_BASE_CTL+1)
+#define IP_VS_SO_SET_ADD	(IP_VS_BASE_CTL+2)
+#define IP_VS_SO_SET_EDIT	(IP_VS_BASE_CTL+3)
+#define IP_VS_SO_SET_DEL	(IP_VS_BASE_CTL+4)
+#define IP_VS_SO_SET_FLUSH	(IP_VS_BASE_CTL+5)
+#define IP_VS_SO_SET_LIST	(IP_VS_BASE_CTL+6)
+#define IP_VS_SO_SET_ADDDEST	(IP_VS_BASE_CTL+7)
+#define IP_VS_SO_SET_DELDEST	(IP_VS_BASE_CTL+8)
+#define IP_VS_SO_SET_EDITDEST	(IP_VS_BASE_CTL+9)
+#define IP_VS_SO_SET_TIMEOUT	(IP_VS_BASE_CTL+10)
+#define IP_VS_SO_SET_STARTDAEMON (IP_VS_BASE_CTL+11)
+#define IP_VS_SO_SET_STOPDAEMON (IP_VS_BASE_CTL+12)
+#define IP_VS_SO_SET_RESTORE    (IP_VS_BASE_CTL+13)
+#define IP_VS_SO_SET_SAVE       (IP_VS_BASE_CTL+14)
+#define IP_VS_SO_SET_ZERO	(IP_VS_BASE_CTL+15)
+#define IP_VS_SO_SET_MAX	IP_VS_SO_SET_ZERO
+
+#define IP_VS_SO_GET_VERSION	IP_VS_BASE_CTL
+#define IP_VS_SO_GET_INFO	(IP_VS_BASE_CTL+1)
+#define IP_VS_SO_GET_SERVICES	(IP_VS_BASE_CTL+2)
+#define IP_VS_SO_GET_SERVICE	(IP_VS_BASE_CTL+3)
+#define IP_VS_SO_GET_DESTS	(IP_VS_BASE_CTL+4)
+#define IP_VS_SO_GET_DEST	(IP_VS_BASE_CTL+5)	/* not used now */
+#define IP_VS_SO_GET_TIMEOUT	(IP_VS_BASE_CTL+6)
+#define IP_VS_SO_GET_DAEMON	(IP_VS_BASE_CTL+7)
+#define IP_VS_SO_GET_MAX	IP_VS_SO_GET_DAEMON
+
+
+/*
+ *      IPVS Connection Flags
+ */
+#define IP_VS_CONN_F_FWD_MASK	0x0007		/* mask for the fwd methods */
+#define IP_VS_CONN_F_MASQ	0x0000		/* masquerading/NAT */
+#define IP_VS_CONN_F_LOCALNODE	0x0001		/* local node */
+#define IP_VS_CONN_F_TUNNEL	0x0002		/* tunneling */
+#define IP_VS_CONN_F_DROUTE	0x0003		/* direct routing */
+#define IP_VS_CONN_F_BYPASS	0x0004		/* cache bypass */
+#define IP_VS_CONN_F_SYNC	0x0020		/* entry created by sync */
+#define IP_VS_CONN_F_HASHED	0x0040		/* hashed entry */
+#define IP_VS_CONN_F_NOOUTPUT	0x0080		/* no output packets */
+#define IP_VS_CONN_F_INACTIVE	0x0100		/* not established */
+#define IP_VS_CONN_F_OUT_SEQ	0x0200		/* must do output seq adjust */
+#define IP_VS_CONN_F_IN_SEQ	0x0400		/* must do input seq adjust */
+#define IP_VS_CONN_F_SEQ_MASK	0x0600		/* in/out sequence mask */
+#define IP_VS_CONN_F_NO_CPORT	0x0800		/* no client port set yet */
+#define IP_VS_CONN_F_TEMPLATE	0x1000		/* template, not connection */
+
+#define IP_VS_SCHEDNAME_MAXLEN	16
+#define IP_VS_IFNAME_MAXLEN	16
+
+
+/*
+ *	The struct ip_vs_service_user and struct ip_vs_dest_user are
+ *	used to set IPVS rules through setsockopt.
+ */
+struct ip_vs_service_user {
+	/* virtual service addresses */
+	u_int16_t		protocol;
+	__be32			addr;		/* virtual ip address */
+	__be16			port;
+	u_int32_t		fwmark;		/* firwall mark of service */
+
+	/* virtual service options */
+	char			sched_name[IP_VS_SCHEDNAME_MAXLEN];
+	unsigned		flags;		/* virtual service flags */
+	unsigned		timeout;	/* persistent timeout in sec */
+	__be32			netmask;	/* persistent netmask */
+};
+
+
+struct ip_vs_dest_user {
+	/* destination server address */
+	__be32			addr;
+	__be16			port;
+
+	/* real server options */
+	unsigned		conn_flags;	/* connection flags */
+	int			weight;		/* destination weight */
+
+	/* thresholds for active connections */
+	u_int32_t		u_threshold;	/* upper threshold */
+	u_int32_t		l_threshold;	/* lower threshold */
+};
+
+
+/*
+ *	IPVS statistics object (for user space)
+ */
+struct ip_vs_stats_user
+{
+	__u32                   conns;          /* connections scheduled */
+	__u32                   inpkts;         /* incoming packets */
+	__u32                   outpkts;        /* outgoing packets */
+	__u64                   inbytes;        /* incoming bytes */
+	__u64                   outbytes;       /* outgoing bytes */
+
+	__u32			cps;		/* current connection rate */
+	__u32			inpps;		/* current in packet rate */
+	__u32			outpps;		/* current out packet rate */
+	__u32			inbps;		/* current in byte rate */
+	__u32			outbps;		/* current out byte rate */
+};
+
+
+/* The argument to IP_VS_SO_GET_INFO */
+struct ip_vs_getinfo {
+	/* version number */
+	unsigned int		version;
+
+	/* size of connection hash table */
+	unsigned int		size;
+
+	/* number of virtual services */
+	unsigned int		num_services;
+};
+
+
+/* The argument to IP_VS_SO_GET_SERVICE */
+struct ip_vs_service_entry {
+	/* which service: user fills in these */
+	u_int16_t		protocol;
+	__be32			addr;		/* virtual address */
+	__be16			port;
+	u_int32_t		fwmark;		/* firwall mark of service */
+
+	/* service options */
+	char			sched_name[IP_VS_SCHEDNAME_MAXLEN];
+	unsigned		flags;          /* virtual service flags */
+	unsigned		timeout;	/* persistent timeout */
+	__be32			netmask;	/* persistent netmask */
+
+	/* number of real servers */
+	unsigned int		num_dests;
+
+	/* statistics */
+	struct ip_vs_stats_user stats;
+};
+
+
+struct ip_vs_dest_entry {
+	__be32			addr;		/* destination address */
+	__be16			port;
+	unsigned		conn_flags;	/* connection flags */
+	int			weight;		/* destination weight */
+
+	u_int32_t		u_threshold;	/* upper threshold */
+	u_int32_t		l_threshold;	/* lower threshold */
+
+	u_int32_t		activeconns;	/* active connections */
+	u_int32_t		inactconns;	/* inactive connections */
+	u_int32_t		persistconns;	/* persistent connections */
+
+	/* statistics */
+	struct ip_vs_stats_user stats;
+};
+
+
+/* The argument to IP_VS_SO_GET_DESTS */
+struct ip_vs_get_dests {
+	/* which service: user fills in these */
+	u_int16_t		protocol;
+	__be32			addr;		/* virtual address */
+	__be16			port;
+	u_int32_t		fwmark;		/* firwall mark of service */
+
+	/* number of real servers */
+	unsigned int		num_dests;
+
+	/* the real servers */
+	struct ip_vs_dest_entry	entrytable[0];
+};
+
+
+/* The argument to IP_VS_SO_GET_SERVICES */
+struct ip_vs_get_services {
+	/* number of virtual services */
+	unsigned int		num_services;
+
+	/* service table */
+	struct ip_vs_service_entry entrytable[0];
+};
+
+
+/* The argument to IP_VS_SO_GET_TIMEOUT */
+struct ip_vs_timeout_user {
+	int			tcp_timeout;
+	int			tcp_fin_timeout;
+	int			udp_timeout;
+};
+
+
+/* The argument to IP_VS_SO_GET_DAEMON */
+struct ip_vs_daemon_user {
+	/* sync daemon state (master/backup) */
+	int			state;
+
+	/* multicast interface name */
+	char			mcast_ifn[IP_VS_IFNAME_MAXLEN];
+
+	/* SyncID we belong to */
+	int			syncid;
+};
+
+#endif	/* _IP_VS_H */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 9a51ebad3f1f..cbb59ebed4ae 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -3,254 +3,17 @@
  *      data structure and functionality definitions
  */
 
-#ifndef _IP_VS_H
-#define _IP_VS_H
-
-#include <asm/types.h>		/* For __uXX types */
-#include <linux/types.h>	/* For __beXX types in userland */
-
-#include <linux/sysctl.h>	/* For ctl_path */
-
-#define IP_VS_VERSION_CODE	0x010201
-#define NVERSION(version)			\
-	(version >> 16) & 0xFF,			\
-	(version >> 8) & 0xFF,			\
-	version & 0xFF
-
-/*
- *      Virtual Service Flags
- */
-#define IP_VS_SVC_F_PERSISTENT	0x0001		/* persistent port */
-#define IP_VS_SVC_F_HASHED	0x0002		/* hashed entry */
-
-/*
- *      Destination Server Flags
- */
-#define IP_VS_DEST_F_AVAILABLE	0x0001		/* server is available */
-#define IP_VS_DEST_F_OVERLOAD	0x0002		/* server is overloaded */
-
-/*
- *      IPVS sync daemon states
- */
-#define IP_VS_STATE_NONE	0x0000		/* daemon is stopped */
-#define IP_VS_STATE_MASTER	0x0001		/* started as master */
-#define IP_VS_STATE_BACKUP	0x0002		/* started as backup */
-
-/*
- *      IPVS socket options
- */
-#define IP_VS_BASE_CTL		(64+1024+64)		/* base */
-
-#define IP_VS_SO_SET_NONE	IP_VS_BASE_CTL		/* just peek */
-#define IP_VS_SO_SET_INSERT	(IP_VS_BASE_CTL+1)
-#define IP_VS_SO_SET_ADD	(IP_VS_BASE_CTL+2)
-#define IP_VS_SO_SET_EDIT	(IP_VS_BASE_CTL+3)
-#define IP_VS_SO_SET_DEL	(IP_VS_BASE_CTL+4)
-#define IP_VS_SO_SET_FLUSH	(IP_VS_BASE_CTL+5)
-#define IP_VS_SO_SET_LIST	(IP_VS_BASE_CTL+6)
-#define IP_VS_SO_SET_ADDDEST	(IP_VS_BASE_CTL+7)
-#define IP_VS_SO_SET_DELDEST	(IP_VS_BASE_CTL+8)
-#define IP_VS_SO_SET_EDITDEST	(IP_VS_BASE_CTL+9)
-#define IP_VS_SO_SET_TIMEOUT	(IP_VS_BASE_CTL+10)
-#define IP_VS_SO_SET_STARTDAEMON (IP_VS_BASE_CTL+11)
-#define IP_VS_SO_SET_STOPDAEMON (IP_VS_BASE_CTL+12)
-#define IP_VS_SO_SET_RESTORE    (IP_VS_BASE_CTL+13)
-#define IP_VS_SO_SET_SAVE       (IP_VS_BASE_CTL+14)
-#define IP_VS_SO_SET_ZERO	(IP_VS_BASE_CTL+15)
-#define IP_VS_SO_SET_MAX	IP_VS_SO_SET_ZERO
-
-#define IP_VS_SO_GET_VERSION	IP_VS_BASE_CTL
-#define IP_VS_SO_GET_INFO	(IP_VS_BASE_CTL+1)
-#define IP_VS_SO_GET_SERVICES	(IP_VS_BASE_CTL+2)
-#define IP_VS_SO_GET_SERVICE	(IP_VS_BASE_CTL+3)
-#define IP_VS_SO_GET_DESTS	(IP_VS_BASE_CTL+4)
-#define IP_VS_SO_GET_DEST	(IP_VS_BASE_CTL+5)	/* not used now */
-#define IP_VS_SO_GET_TIMEOUT	(IP_VS_BASE_CTL+6)
-#define IP_VS_SO_GET_DAEMON	(IP_VS_BASE_CTL+7)
-#define IP_VS_SO_GET_MAX	IP_VS_SO_GET_DAEMON
-
-
-/*
- *      IPVS Connection Flags
- */
-#define IP_VS_CONN_F_FWD_MASK	0x0007		/* mask for the fwd methods */
-#define IP_VS_CONN_F_MASQ	0x0000		/* masquerading/NAT */
-#define IP_VS_CONN_F_LOCALNODE	0x0001		/* local node */
-#define IP_VS_CONN_F_TUNNEL	0x0002		/* tunneling */
-#define IP_VS_CONN_F_DROUTE	0x0003		/* direct routing */
-#define IP_VS_CONN_F_BYPASS	0x0004		/* cache bypass */
-#define IP_VS_CONN_F_SYNC	0x0020		/* entry created by sync */
-#define IP_VS_CONN_F_HASHED	0x0040		/* hashed entry */
-#define IP_VS_CONN_F_NOOUTPUT	0x0080		/* no output packets */
-#define IP_VS_CONN_F_INACTIVE	0x0100		/* not established */
-#define IP_VS_CONN_F_OUT_SEQ	0x0200		/* must do output seq adjust */
-#define IP_VS_CONN_F_IN_SEQ	0x0400		/* must do input seq adjust */
-#define IP_VS_CONN_F_SEQ_MASK	0x0600		/* in/out sequence mask */
-#define IP_VS_CONN_F_NO_CPORT	0x0800		/* no client port set yet */
-#define IP_VS_CONN_F_TEMPLATE	0x1000		/* template, not connection */
-
-/* Move it to better place one day, for now keep it unique */
-#define NFC_IPVS_PROPERTY	0x10000
-
-#define IP_VS_SCHEDNAME_MAXLEN	16
-#define IP_VS_IFNAME_MAXLEN	16
-
-
-/*
- *	The struct ip_vs_service_user and struct ip_vs_dest_user are
- *	used to set IPVS rules through setsockopt.
- */
-struct ip_vs_service_user {
-	/* virtual service addresses */
-	u_int16_t		protocol;
-	__be32			addr;		/* virtual ip address */
-	__be16			port;
-	u_int32_t		fwmark;		/* firwall mark of service */
-
-	/* virtual service options */
-	char			sched_name[IP_VS_SCHEDNAME_MAXLEN];
-	unsigned		flags;		/* virtual service flags */
-	unsigned		timeout;	/* persistent timeout in sec */
-	__be32			netmask;	/* persistent netmask */
-};
-
-
-struct ip_vs_dest_user {
-	/* destination server address */
-	__be32			addr;
-	__be16			port;
-
-	/* real server options */
-	unsigned		conn_flags;	/* connection flags */
-	int			weight;		/* destination weight */
-
-	/* thresholds for active connections */
-	u_int32_t		u_threshold;	/* upper threshold */
-	u_int32_t		l_threshold;	/* lower threshold */
-};
-
-
-/*
- *	IPVS statistics object (for user space)
- */
-struct ip_vs_stats_user
-{
-	__u32                   conns;          /* connections scheduled */
-	__u32                   inpkts;         /* incoming packets */
-	__u32                   outpkts;        /* outgoing packets */
-	__u64                   inbytes;        /* incoming bytes */
-	__u64                   outbytes;       /* outgoing bytes */
-
-	__u32			cps;		/* current connection rate */
-	__u32			inpps;		/* current in packet rate */
-	__u32			outpps;		/* current out packet rate */
-	__u32			inbps;		/* current in byte rate */
-	__u32			outbps;		/* current out byte rate */
-};
-
-
-/* The argument to IP_VS_SO_GET_INFO */
-struct ip_vs_getinfo {
-	/* version number */
-	unsigned int		version;
-
-	/* size of connection hash table */
-	unsigned int		size;
-
-	/* number of virtual services */
-	unsigned int		num_services;
-};
-
-
-/* The argument to IP_VS_SO_GET_SERVICE */
-struct ip_vs_service_entry {
-	/* which service: user fills in these */
-	u_int16_t		protocol;
-	__be32			addr;		/* virtual address */
-	__be16			port;
-	u_int32_t		fwmark;		/* firwall mark of service */
-
-	/* service options */
-	char			sched_name[IP_VS_SCHEDNAME_MAXLEN];
-	unsigned		flags;          /* virtual service flags */
-	unsigned		timeout;	/* persistent timeout */
-	__be32			netmask;	/* persistent netmask */
-
-	/* number of real servers */
-	unsigned int		num_dests;
-
-	/* statistics */
-	struct ip_vs_stats_user stats;
-};
-
-
-struct ip_vs_dest_entry {
-	__be32			addr;		/* destination address */
-	__be16			port;
-	unsigned		conn_flags;	/* connection flags */
-	int			weight;		/* destination weight */
-
-	u_int32_t		u_threshold;	/* upper threshold */
-	u_int32_t		l_threshold;	/* lower threshold */
-
-	u_int32_t		activeconns;	/* active connections */
-	u_int32_t		inactconns;	/* inactive connections */
-	u_int32_t		persistconns;	/* persistent connections */
-
-	/* statistics */
-	struct ip_vs_stats_user stats;
-};
-
-
-/* The argument to IP_VS_SO_GET_DESTS */
-struct ip_vs_get_dests {
-	/* which service: user fills in these */
-	u_int16_t		protocol;
-	__be32			addr;		/* virtual address */
-	__be16			port;
-	u_int32_t		fwmark;		/* firwall mark of service */
-
-	/* number of real servers */
-	unsigned int		num_dests;
-
-	/* the real servers */
-	struct ip_vs_dest_entry	entrytable[0];
-};
-
-
-/* The argument to IP_VS_SO_GET_SERVICES */
-struct ip_vs_get_services {
-	/* number of virtual services */
-	unsigned int		num_services;
-
-	/* service table */
-	struct ip_vs_service_entry entrytable[0];
-};
-
-
-/* The argument to IP_VS_SO_GET_TIMEOUT */
-struct ip_vs_timeout_user {
-	int			tcp_timeout;
-	int			tcp_fin_timeout;
-	int			udp_timeout;
-};
-
-
-/* The argument to IP_VS_SO_GET_DAEMON */
-struct ip_vs_daemon_user {
-	/* sync daemon state (master/backup) */
-	int			state;
-
-	/* multicast interface name */
-	char			mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-	/* SyncID we belong to */
-	int			syncid;
-};
+#ifndef _NET_IP_VS_H
+#define _NET_IP_VS_H
 
+#include <linux/ip_vs.h>                /* definitions shared with userland */
 
+/* old ipvsadm versions still include this file directly */
 #ifdef __KERNEL__
 
+#include <asm/types.h>                  /* for __uXX types */
+
+#include <linux/sysctl.h>               /* for ctl_path */
 #include <linux/list.h>                 /* for struct list_head */
 #include <linux/spinlock.h>             /* for struct rwlock_t */
 #include <asm/atomic.h>                 /* for struct atomic_t */
@@ -981,4 +744,4 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
 
 #endif /* __KERNEL__ */
 
-#endif	/* _IP_VS_H */
+#endif	/* _NET_IP_VS_H */
-- 
cgit v1.2.3


From 4a7b61d23505854dff7d04cc11944566cffdd0ee Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 31 Jul 2008 20:52:08 -0700
Subject: skbuff: add missing kernel-doc for do_not_encrypt

Add missing kernel-doc notation to sk_buff:

Warning(linux-2.6.27-rc1-git2//include/linux/skbuff.h:345): No description found for parameter 'do_not_encrypt'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a640385e0598..cfcc45b3bef0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -243,6 +243,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@tc_index: Traffic control index
  *	@tc_verd: traffic control verdict
  *	@ndisc_nodetype: router type (from link layer)
+ *	@do_not_encrypt: set to prevent encryption of this frame
  *	@dma_cookie: a cookie to one of several possible DMA operations
  *		done by skb DMA functions
  *	@secmark: security marking
-- 
cgit v1.2.3


From a4b526b3ba6353cd89a38e41da48ed83b0ead16f Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 1 Aug 2008 16:39:12 +0200
Subject: [S390] Optimize storage key operations for anon pages

For anonymous pages without a swap cache backing the check in
page_remove_rmap for the physical dirty bit in page_remove_rmap is
unnecessary. The instructions that are used to check and reset the dirty
bit are expensive. Removing the check noticably speeds up process exit.
In addition the clearing of the dirty bit in __SetPageUptodate is
pointless as well. With these two changes there is no storage key
operation for an anonymous page anymore if it does not hit the swap
space.

The micro benchmark which repeatedly executes an empty shell script
gets about 5% faster.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/page-flags.h | 3 ---
 mm/rmap.c                  | 3 ++-
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 54590a9a103e..25aaccdb2f26 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -239,9 +239,6 @@ static inline void __SetPageUptodate(struct page *page)
 {
 	smp_wmb();
 	__set_bit(PG_uptodate, &(page)->flags);
-#ifdef CONFIG_S390
-	page_clear_dirty(page);
-#endif
 }
 
 static inline void SetPageUptodate(struct page *page)
diff --git a/mm/rmap.c b/mm/rmap.c
index 99bc3f9cd796..94a5246a3f98 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -667,7 +667,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
 		 * Leaving it set also helps swapoff to reinstate ptes
 		 * faster for those pages still in swapcache.
 		 */
-		if (page_test_dirty(page)) {
+		if ((!PageAnon(page) || PageSwapCache(page)) &&
+		    page_test_dirty(page)) {
 			page_clear_dirty(page);
 			set_page_dirty(page);
 		}
-- 
cgit v1.2.3


From 1027abe8827b47f7e9c4ed6514fde3d44f79963c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 Jul 2008 04:13:04 -0400
Subject: [PATCH] merge locate_fd() and get_unused_fd()

	New primitive: alloc_fd(start, flags).  get_unused_fd() and
get_unused_fd_flags() become wrappers on top of it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fcntl.c           | 87 +++++++++-------------------------------------------
 fs/file.c            | 61 ++++++++++++++++++++++++++++++++++++
 fs/open.c            | 56 ---------------------------------
 include/linux/file.h |  3 +-
 4 files changed, 77 insertions(+), 130 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 61d625136813..2e40799daad6 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -49,73 +49,6 @@ static int get_close_on_exec(unsigned int fd)
 	return res;
 }
 
-/*
- * locate_fd finds a free file descriptor in the open_fds fdset,
- * expanding the fd arrays if necessary.  Must be called with the
- * file_lock held for write.
- */
-
-static int locate_fd(unsigned int orig_start, int cloexec)
-{
-	struct files_struct *files = current->files;
-	unsigned int newfd;
-	unsigned int start;
-	int error;
-	struct fdtable *fdt;
-
-	spin_lock(&files->file_lock);
-repeat:
-	fdt = files_fdtable(files);
-	/*
-	 * Someone might have closed fd's in the range
-	 * orig_start..fdt->next_fd
-	 */
-	start = orig_start;
-	if (start < files->next_fd)
-		start = files->next_fd;
-
-	newfd = start;
-	if (start < fdt->max_fds)
-		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
-					   fdt->max_fds, start);
-
-	error = expand_files(files, newfd);
-	if (error < 0)
-		goto out;
-
-	/*
-	 * If we needed to expand the fs array we
-	 * might have blocked - try again.
-	 */
-	if (error)
-		goto repeat;
-
-	if (start <= files->next_fd)
-		files->next_fd = newfd + 1;
-
-	FD_SET(newfd, fdt->open_fds);
-	if (cloexec)
-		FD_SET(newfd, fdt->close_on_exec);
-	else
-		FD_CLR(newfd, fdt->close_on_exec);
-	error = newfd;
-
-out:
-	spin_unlock(&files->file_lock);
-	return error;
-}
-
-static int dupfd(struct file *file, unsigned int start, int cloexec)
-{
-	int fd = locate_fd(start, cloexec);
-	if (fd >= 0)
-		fd_install(fd, file);
-	else
-		fput(file);
-
-	return fd;
-}
-
 asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 {
 	int err = -EBADF;
@@ -194,10 +127,15 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 asmlinkage long sys_dup(unsigned int fildes)
 {
 	int ret = -EBADF;
-	struct file * file = fget(fildes);
-
-	if (file)
-		ret = dupfd(file, 0, 0);
+	struct file *file = fget(fildes);
+
+	if (file) {
+		ret = get_unused_fd();
+		if (ret >= 0)
+			fd_install(ret, file);
+		else
+			fput(file);
+	}
 	return ret;
 }
 
@@ -322,8 +260,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_DUPFD_CLOEXEC:
 		if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
 			break;
-		get_file(filp);
-		err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC);
+		err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
+		if (err >= 0) {
+			get_file(filp);
+			fd_install(err, filp);
+		}
 		break;
 	case F_GETFD:
 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
diff --git a/fs/file.c b/fs/file.c
index d8773b19fe47..f313314f996f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
  *  Manage the dynamic fd arrays in the process files_struct.
  */
 
+#include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/time.h>
@@ -432,3 +433,63 @@ struct files_struct init_files = {
 	},
 	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock),
 };
+
+/*
+ * allocate a file descriptor, mark it busy.
+ */
+int alloc_fd(unsigned start, unsigned flags)
+{
+	struct files_struct *files = current->files;
+	unsigned int fd;
+	int error;
+	struct fdtable *fdt;
+
+	spin_lock(&files->file_lock);
+repeat:
+	fdt = files_fdtable(files);
+	fd = start;
+	if (fd < files->next_fd)
+		fd = files->next_fd;
+
+	if (fd < fdt->max_fds)
+		fd = find_next_zero_bit(fdt->open_fds->fds_bits,
+					   fdt->max_fds, fd);
+
+	error = expand_files(files, fd);
+	if (error < 0)
+		goto out;
+
+	/*
+	 * If we needed to expand the fs array we
+	 * might have blocked - try again.
+	 */
+	if (error)
+		goto repeat;
+
+	if (start <= files->next_fd)
+		files->next_fd = fd + 1;
+
+	FD_SET(fd, fdt->open_fds);
+	if (flags & O_CLOEXEC)
+		FD_SET(fd, fdt->close_on_exec);
+	else
+		FD_CLR(fd, fdt->close_on_exec);
+	error = fd;
+#if 1
+	/* Sanity check */
+	if (rcu_dereference(fdt->fd[fd]) != NULL) {
+		printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
+		rcu_assign_pointer(fdt->fd[fd], NULL);
+	}
+#endif
+
+out:
+	spin_unlock(&files->file_lock);
+	return error;
+}
+
+int get_unused_fd(void)
+{
+	return alloc_fd(0, 0);
+}
+EXPORT_SYMBOL(get_unused_fd);
diff --git a/fs/open.c b/fs/open.c
index 52647be277a2..07da9359481c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -963,62 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 }
 EXPORT_SYMBOL(dentry_open);
 
-/*
- * Find an empty file descriptor entry, and mark it busy.
- */
-int get_unused_fd_flags(int flags)
-{
-	struct files_struct * files = current->files;
-	int fd, error;
-	struct fdtable *fdt;
-
-	spin_lock(&files->file_lock);
-
-repeat:
-	fdt = files_fdtable(files);
-	fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
-				files->next_fd);
-
-	/* Do we need to expand the fd array or fd set?  */
-	error = expand_files(files, fd);
-	if (error < 0)
-		goto out;
-
-	if (error) {
-		/*
-	 	 * If we needed to expand the fs array we
-		 * might have blocked - try again.
-		 */
-		goto repeat;
-	}
-
-	FD_SET(fd, fdt->open_fds);
-	if (flags & O_CLOEXEC)
-		FD_SET(fd, fdt->close_on_exec);
-	else
-		FD_CLR(fd, fdt->close_on_exec);
-	files->next_fd = fd + 1;
-#if 1
-	/* Sanity check */
-	if (fdt->fd[fd] != NULL) {
-		printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
-		fdt->fd[fd] = NULL;
-	}
-#endif
-	error = fd;
-
-out:
-	spin_unlock(&files->file_lock);
-	return error;
-}
-
-int get_unused_fd(void)
-{
-	return get_unused_fd_flags(0);
-}
-
-EXPORT_SYMBOL(get_unused_fd);
-
 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
 	struct fdtable *fdt = files_fdtable(files);
diff --git a/include/linux/file.h b/include/linux/file.h
index 27c64bdc68c9..a20259e248a5 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -34,8 +34,9 @@ extern struct file *fget(unsigned int fd);
 extern struct file *fget_light(unsigned int fd, int *fput_needed);
 extern void set_close_on_exec(unsigned int fd, int flag);
 extern void put_filp(struct file *);
+extern int alloc_fd(unsigned start, unsigned flags);
 extern int get_unused_fd(void);
-extern int get_unused_fd_flags(int flags);
+#define get_unused_fd_flags(flags) alloc_fd(0, (flags))
 extern void put_unused_fd(unsigned int fd);
 
 extern void fd_install(unsigned int fd, struct file *file);
-- 
cgit v1.2.3


From 77e69dac3cefacee939cb107ae9cd520a62338e0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 1 Aug 2008 04:29:18 -0400
Subject: [PATCH] fix races and leaks in vfs_quota_on() users

* new helper: vfs_quota_on_path(); equivalent of vfs_quota_on() sans the
  pathname resolution.
* callers of vfs_quota_on() that do their own pathname resolution and
  checks based on it are switched to vfs_quota_on_path(); that way we
  avoid the races.
* reiserfs leaked dentry/vfsmount references on several failure exits.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dquot.c               | 33 ++++++++++++++++++++-------------
 fs/ext3/super.c          |  3 ++-
 fs/ext4/super.c          |  3 ++-
 fs/reiserfs/super.c      | 16 +++++++++-------
 include/linux/quotaops.h |  2 ++
 5 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index 1346eebe74ce..8ec4d6cc7633 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1793,6 +1793,21 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
 	return ret;
 }
 
+int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
+		      struct path *path)
+{
+	int error = security_quota_on(path->dentry);
+	if (error)
+		return error;
+	/* Quota file not on the same filesystem? */
+	if (path->mnt->mnt_sb != sb)
+		error = -EXDEV;
+	else
+		error = vfs_quota_on_inode(path->dentry->d_inode, type,
+					   format_id);
+	return error;
+}
+
 /* Actual function called from quotactl() */
 int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
 		 int remount)
@@ -1804,19 +1819,10 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
 		return vfs_quota_on_remount(sb, type);
 
 	error = path_lookup(path, LOOKUP_FOLLOW, &nd);
-	if (error < 0)
-		return error;
-	error = security_quota_on(nd.path.dentry);
-	if (error)
-		goto out_path;
-	/* Quota file not on the same filesystem? */
-	if (nd.path.mnt->mnt_sb != sb)
-		error = -EXDEV;
-	else
-		error = vfs_quota_on_inode(nd.path.dentry->d_inode, type,
-					   format_id);
-out_path:
-	path_put(&nd.path);
+	if (!error) {
+		error = vfs_quota_on_path(sb, type, format_id, &nd.path);
+		path_put(&nd.path);
+	}
 	return error;
 }
 
@@ -2185,6 +2191,7 @@ EXPORT_SYMBOL(unregister_quota_format);
 EXPORT_SYMBOL(dqstats);
 EXPORT_SYMBOL(dq_data_lock);
 EXPORT_SYMBOL(vfs_quota_on);
+EXPORT_SYMBOL(vfs_quota_on_path);
 EXPORT_SYMBOL(vfs_quota_on_mount);
 EXPORT_SYMBOL(vfs_quota_off);
 EXPORT_SYMBOL(vfs_quota_sync);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8ddced384674..f38a5afc39a1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2810,8 +2810,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 		journal_unlock_updates(EXT3_SB(sb)->s_journal);
 	}
 
+	err = vfs_quota_on_path(sb, type, format_id, &nd.path);
 	path_put(&nd.path);
-	return vfs_quota_on(sb, type, format_id, path, remount);
+	return err;
 }
 
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b5479b1dff14..1e69f29a8c55 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3352,8 +3352,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 	}
 
+	err = vfs_quota_on_path(sb, type, format_id, &nd.path);
 	path_put(&nd.path);
-	return vfs_quota_on(sb, type, format_id, path, remount);
+	return err;
 }
 
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 879e54d35c2d..282a13596c70 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2076,8 +2076,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 		return err;
 	/* Quotafile not on the same filesystem? */
 	if (nd.path.mnt->mnt_sb != sb) {
-		path_put(&nd.path);
-		return -EXDEV;
+		err = -EXDEV;
+		goto out;
 	}
 	inode = nd.path.dentry->d_inode;
 	/* We must not pack tails for quota files on reiserfs for quota IO to work */
@@ -2087,8 +2087,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 			reiserfs_warning(sb,
 				"reiserfs: Unpacking tail of quota file failed"
 				" (%d). Cannot turn on quotas.", err);
-			path_put(&nd.path);
-			return -EINVAL;
+			err = -EINVAL;
+			goto out;
 		}
 		mark_inode_dirty(inode);
 	}
@@ -2109,13 +2109,15 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 		/* Just start temporary transaction and finish it */
 		err = journal_begin(&th, sb, 1);
 		if (err)
-			return err;
+			goto out;
 		err = journal_end_sync(&th, sb, 1);
 		if (err)
-			return err;
+			goto out;
 	}
+	err = vfs_quota_on_path(sb, type, format_id, &nd.path);
+out:
 	path_put(&nd.path);
-	return vfs_quota_on(sb, type, format_id, path, 0);
+	return err;
 }
 
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 742187f7a05c..ca6b9b5c8d52 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -43,6 +43,8 @@ int dquot_mark_dquot_dirty(struct dquot *dquot);
 
 int vfs_quota_on(struct super_block *sb, int type, int format_id,
  	char *path, int remount);
+int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
+ 	struct path *path);
 int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
  	int format_id, int type);
 int vfs_quota_off(struct super_block *sb, int type, int remount);
-- 
cgit v1.2.3


From 8d66bf5481002b0960aa49aed0987c73f5d7816c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 1 Aug 2008 09:05:54 -0400
Subject: [PATCH] pass struct path * to do_add_mount()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/mntpt.c         |  2 +-
 fs/cifs/cifs_dfs_ref.c |  2 +-
 fs/namespace.c         | 16 ++++++++--------
 fs/nfs/namespace.c     |  2 +-
 include/linux/mount.h  |  3 ++-
 5 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2f5503902c37..78db4953a800 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -232,7 +232,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	}
 
 	mntget(newmnt);
-	err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
+	err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts);
 	switch (err) {
 	case 0:
 		path_put(&nd->path);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index d82374c9e329..d2c8eef84f3c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,7 +226,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
 	int err;
 
 	mntget(newmnt);
-	err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist);
+	err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist);
 	switch (err) {
 	case 0:
 		path_put(&nd->path);
diff --git a/fs/namespace.c b/fs/namespace.c
index 411728c0c8bb..6e283c93b50d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1667,31 +1667,31 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
-	return do_add_mount(mnt, nd, mnt_flags, NULL);
+	return do_add_mount(mnt, &nd->path, mnt_flags, NULL);
 }
 
 /*
  * add a mount into a namespace's mount tree
  * - provide the option of adding the new mount to an expiration list
  */
-int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
+int do_add_mount(struct vfsmount *newmnt, struct path *path,
 		 int mnt_flags, struct list_head *fslist)
 {
 	int err;
 
 	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
-	while (d_mountpoint(nd->path.dentry) &&
-	       follow_down(&nd->path.mnt, &nd->path.dentry))
+	while (d_mountpoint(path->dentry) &&
+	       follow_down(&path->mnt, &path->dentry))
 		;
 	err = -EINVAL;
-	if (!check_mnt(nd->path.mnt))
+	if (!check_mnt(path->mnt))
 		goto unlock;
 
 	/* Refuse the same filesystem on the same mount point */
 	err = -EBUSY;
-	if (nd->path.mnt->mnt_sb == newmnt->mnt_sb &&
-	    nd->path.mnt->mnt_root == nd->path.dentry)
+	if (path->mnt->mnt_sb == newmnt->mnt_sb &&
+	    path->mnt->mnt_root == path->dentry)
 		goto unlock;
 
 	err = -EINVAL;
@@ -1699,7 +1699,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
 		goto unlock;
 
 	newmnt->mnt_flags = mnt_flags;
-	if ((err = graft_tree(newmnt, &nd->path)))
+	if ((err = graft_tree(newmnt, path)))
 		goto unlock;
 
 	if (fslist) /* add to the specified expiration list */
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2f285ef76399..66df08dd1caf 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -129,7 +129,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 		goto out_err;
 
 	mntget(mnt);
-	err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
+	err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
 			   &nfs_automount_list);
 	if (err < 0) {
 		mntput(mnt);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index b5efaa2132ab..30a1d63b6fb5 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -105,7 +105,8 @@ extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 
 struct nameidata;
 
-extern int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
+struct path;
+extern int do_add_mount(struct vfsmount *newmnt, struct path *path,
 			int mnt_flags, struct list_head *fslist);
 
 extern void mark_mounts_for_expiry(struct list_head *mounts);
-- 
cgit v1.2.3


From 6c5e0c4d518a37e1d5d794c14433e80284415079 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 1 Aug 2008 20:31:32 +0200
Subject: block: add a blk_plug_device_unlocked() that grabs the queue lock

blk_plug_device() must be called with the queue lock held, so callers
often just grab and release the lock for that purpose. Add a helper
that does just that.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 18 ++++++++++++++++++
 include/linux/blkdev.h |  1 +
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index fef79ccb2a11..4889eb86a39e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -212,6 +212,24 @@ void blk_plug_device(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_plug_device);
 
+/**
+ * blk_plug_device_unlocked - plug a device without queue lock held
+ * @q:    The &struct request_queue to plug
+ *
+ * Description:
+ *   Like @blk_plug_device(), but grabs the queue lock and disables
+ *   interrupts.
+ **/
+void blk_plug_device_unlocked(struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	blk_plug_device(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL(blk_plug_device_unlocked);
+
 /*
  * remove the queue from the plugged list, if present. called with
  * queue lock held and interrupts disabled.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 88d68081a0f1..e61f22be4d0e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -655,6 +655,7 @@ extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_plug_device(struct request_queue *);
+extern void blk_plug_device_unlocked(struct request_queue *);
 extern int blk_remove_plug(struct request_queue *);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_cmd_ioctl(struct file *, struct request_queue *,
-- 
cgit v1.2.3


From 5c7edcd7ee6b77b88252fe4096dce1a46a60c829 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Thu, 31 Jul 2008 02:04:09 -0700
Subject: tracehook: fix exit_signal=0 case

My commit 2b2a1ff64afbadac842bbc58c5166962cf4f7664 introduced a regression
(sorry about that) for the odd case of exit_signal=0 (e.g. clone_flags=0).
This is not a normal use, but it's used by a case in the glibc test suite.

Dying with exit_signal=0 sends no signal, but it's supposed to wake up a
parent's blocked wait*() calls (unlike the delayed_group_leader case).
This fixes tracehook_notify_death() and its caller to distinguish a
"signal 0" wakeup from the delayed_group_leader case (with no wakeup).

Signed-off-by: Roland McGrath <roland@redhat.com>
Tested-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 21 +++++++++++++--------
 kernel/exit.c             |  6 +++---
 2 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index b1875582c1a1..12532839f508 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -493,16 +493,21 @@ static inline int tracehook_notify_jctl(int notify, int why)
  * @death_cookie:	value to pass to tracehook_report_death()
  * @group_dead:		nonzero if this was the last thread in the group to die
  *
- * Return the signal number to send our parent with do_notify_parent(), or
- * zero to send no signal and leave a zombie, or -1 to self-reap right now.
+ * A return value >= 0 means call do_notify_parent() with that signal
+ * number.  Negative return value can be %DEATH_REAP to self-reap right
+ * now, or %DEATH_DELAYED_GROUP_LEADER to a zombie without notifying our
+ * parent.  Note that a return value of 0 means a do_notify_parent() call
+ * that sends no signal, but still wakes up a parent blocked in wait*().
  *
  * Called with write_lock_irq(&tasklist_lock) held.
  */
+#define DEATH_REAP			-1
+#define DEATH_DELAYED_GROUP_LEADER	-2
 static inline int tracehook_notify_death(struct task_struct *task,
 					 void **death_cookie, int group_dead)
 {
 	if (task->exit_signal == -1)
-		return task->ptrace ? SIGCHLD : -1;
+		return task->ptrace ? SIGCHLD : DEATH_REAP;
 
 	/*
 	 * If something other than our normal parent is ptracing us, then
@@ -512,21 +517,21 @@ static inline int tracehook_notify_death(struct task_struct *task,
 	if (thread_group_empty(task) && !ptrace_reparented(task))
 		return task->exit_signal;
 
-	return task->ptrace ? SIGCHLD : 0;
+	return task->ptrace ? SIGCHLD : DEATH_DELAYED_GROUP_LEADER;
 }
 
 /**
  * tracehook_report_death - task is dead and ready to be reaped
  * @task:		@current task now exiting
- * @signal:		signal number sent to parent, or 0 or -1
+ * @signal:		return value from tracheook_notify_death()
  * @death_cookie:	value passed back from tracehook_notify_death()
  * @group_dead:		nonzero if this was the last thread in the group to die
  *
  * Thread has just become a zombie or is about to self-reap.  If positive,
  * @signal is the signal number just sent to the parent (usually %SIGCHLD).
- * If @signal is -1, this thread will self-reap.  If @signal is 0, this is
- * a delayed_group_leader() zombie.  The @death_cookie was passed back by
- * tracehook_notify_death().
+ * If @signal is %DEATH_REAP, this thread will self-reap.  If @signal is
+ * %DEATH_DELAYED_GROUP_LEADER, this is a delayed_group_leader() zombie.
+ * The @death_cookie was passed back by tracehook_notify_death().
  *
  * If normal reaping is not inhibited, @task->exit_state might be changing
  * in parallel.
diff --git a/kernel/exit.c b/kernel/exit.c
index eb4d6470d1d0..38ec40630149 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -911,10 +911,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 		tsk->exit_signal = SIGCHLD;
 
 	signal = tracehook_notify_death(tsk, &cookie, group_dead);
-	if (signal > 0)
+	if (signal >= 0)
 		signal = do_notify_parent(tsk, signal);
 
-	tsk->exit_state = signal < 0 ? EXIT_DEAD : EXIT_ZOMBIE;
+	tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE;
 
 	/* mt-exec, de_thread() is waiting for us */
 	if (thread_group_leader(tsk) &&
@@ -927,7 +927,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	tracehook_report_death(tsk, signal, cookie, group_dead);
 
 	/* If the process is dead, release it - nobody will wait for it */
-	if (signal < 0)
+	if (signal == DEATH_REAP)
 		release_task(tsk);
 }
 
-- 
cgit v1.2.3


From 4744b43431e8613f920c5cba88346756f53c5165 Mon Sep 17 00:00:00 2001
From: Tim Bird <tim.bird@am.sony.com>
Date: Fri, 1 Aug 2008 14:05:50 -0700
Subject: embedded: fix vc_translate operator precedence

This fixes a bug in operator precedence in the newly introduced vc_translate
macro.  Without this fix, the translation of some characters on the
kernel console is garbled.

This patch was copied to the e-mail list previously for testing.  Now,
all reports confirm that it works, so this is an official post for
application.

Signed-off-by: Tim Bird <tim.bird@am.sony.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/vt_kern.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 14c0e91be9b5..8c8119ffee12 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -74,7 +74,7 @@ void con_protect_unimap(struct vc_data *vc, int rdonly);
 int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
 
 #define vc_translate(vc, c) ((vc)->vc_translate[(c) |			\
-					(vc)->vc_toggle_meta ? 0x80 : 0])
+					((vc)->vc_toggle_meta ? 0x80 : 0)])
 #else
 #define con_set_trans_old(arg) (0)
 #define con_get_trans_old(arg) (-EINVAL)
-- 
cgit v1.2.3


From ff4cc1de2401ad44ae084c3f5a9e898af0879520 Mon Sep 17 00:00:00 2001
From: Karsten Keil <kkeil@suse.de>
Date: Wed, 30 Jul 2008 18:26:58 +0200
Subject: mISDN cleanup user interface

The channelmap should have the same size on 32 and 64 bit systems
and should not depend on endianess.
Thanks to David Woodhouse for spotting this.

Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 drivers/isdn/hardware/mISDN/hfcmulti.c |  6 +++---
 drivers/isdn/hardware/mISDN/hfcpci.c   |  2 +-
 drivers/isdn/mISDN/l1oip_core.c        |  6 ++----
 drivers/isdn/mISDN/socket.c            |  4 ++--
 include/linux/mISDNif.h                | 32 +++++++++++++++++++++++++++-----
 5 files changed, 35 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 2649ea55a9e8..10144e871c06 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -3971,7 +3971,7 @@ open_bchannel(struct hfc_multi *hc, struct dchannel *dch,
 	struct bchannel	*bch;
 	int		ch;
 
-	if (!test_bit(rq->adr.channel, &dch->dev.channelmap[0]))
+	if (!test_channelmap(rq->adr.channel, dch->dev.channelmap))
 		return -EINVAL;
 	if (rq->protocol == ISDN_P_NONE)
 		return -EINVAL;
@@ -4587,7 +4587,7 @@ init_e1_port(struct hfc_multi *hc, struct hm_map *m)
 		list_add(&bch->ch.list, &dch->dev.bchannels);
 		hc->chan[ch].bch = bch;
 		hc->chan[ch].port = 0;
-		test_and_set_bit(bch->nr, &dch->dev.channelmap[0]);
+		set_channelmap(bch->nr, dch->dev.channelmap);
 	}
 	/* set optical line type */
 	if (port[Port_cnt] & 0x001) {
@@ -4755,7 +4755,7 @@ init_multi_port(struct hfc_multi *hc, int pt)
 		list_add(&bch->ch.list, &dch->dev.bchannels);
 		hc->chan[i + ch].bch = bch;
 		hc->chan[i + ch].port = pt;
-		test_and_set_bit(bch->nr, &dch->dev.channelmap[0]);
+		set_channelmap(bch->nr, dch->dev.channelmap);
 	}
 	/* set master clock */
 	if (port[Port_cnt] & 0x001) {
diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
index 3231814e7efa..9cf5edbb1a9b 100644
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ b/drivers/isdn/hardware/mISDN/hfcpci.c
@@ -2056,7 +2056,7 @@ setup_card(struct hfc_pci *card)
 	card->dch.dev.nrbchan = 2;
 	for (i = 0; i < 2; i++) {
 		card->bch[i].nr = i + 1;
-		test_and_set_bit(i + 1, &card->dch.dev.channelmap[0]);
+		set_channelmap(i + 1, card->dch.dev.channelmap);
 		card->bch[i].debug = debug;
 		mISDN_initbchannel(&card->bch[i], MAX_DATA_MEM);
 		card->bch[i].hw = card;
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index 155b99780c4f..e42150a57780 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -1006,8 +1006,7 @@ open_bchannel(struct l1oip *hc, struct dchannel *dch, struct channel_req *rq)
 	struct bchannel	*bch;
 	int		ch;
 
-	if (!test_bit(rq->adr.channel & 0x1f,
-		&dch->dev.channelmap[rq->adr.channel >> 5]))
+	if (!test_channelmap(rq->adr.channel, dch->dev.channelmap))
 		return -EINVAL;
 	if (rq->protocol == ISDN_P_NONE)
 		return -EINVAL;
@@ -1412,8 +1411,7 @@ init_card(struct l1oip *hc, int pri, int bundle)
 		bch->ch.nr = i + ch;
 		list_add(&bch->ch.list, &dch->dev.bchannels);
 		hc->chan[i + ch].bch = bch;
-		test_and_set_bit(bch->nr & 0x1f,
-			&dch->dev.channelmap[bch->nr >> 5]);
+		set_channelmap(bch->nr, dch->dev.channelmap);
 	}
 	ret = mISDN_register_device(&dch->dev, hc->name);
 	if (ret)
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 4ba4cc364c9e..e5a20f9542d1 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -379,7 +379,7 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			di.Bprotocols = dev->Bprotocols | get_all_Bprotocols();
 			di.protocol = dev->D.protocol;
 			memcpy(di.channelmap, dev->channelmap,
-				MISDN_CHMAP_SIZE * 4);
+				sizeof(di.channelmap));
 			di.nrbchan = dev->nrbchan;
 			strcpy(di.name, dev->name);
 			if (copy_to_user((void __user *)arg, &di, sizeof(di)))
@@ -637,7 +637,7 @@ base_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			di.Bprotocols = dev->Bprotocols | get_all_Bprotocols();
 			di.protocol = dev->D.protocol;
 			memcpy(di.channelmap, dev->channelmap,
-				MISDN_CHMAP_SIZE * 4);
+				sizeof(di.channelmap));
 			di.nrbchan = dev->nrbchan;
 			strcpy(di.name, dev->name);
 			if (copy_to_user((void __user *)arg, &di, sizeof(di)))
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 5c948f337817..8f2d60da04e7 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -37,7 +37,7 @@
  */
 #define	MISDN_MAJOR_VERSION	1
 #define	MISDN_MINOR_VERSION	0
-#define MISDN_RELEASE		18
+#define MISDN_RELEASE		19
 
 /* primitives for information exchange
  * generell format
@@ -242,7 +242,8 @@ struct mISDNhead {
 #define TEI_SAPI		63
 #define CTRL_SAPI		0
 
-#define MISDN_CHMAP_SIZE	4
+#define MISDN_MAX_CHANNEL	127
+#define MISDN_CHMAP_SIZE	((MISDN_MAX_CHANNEL + 1) >> 3)
 
 #define SOL_MISDN	0
 
@@ -275,11 +276,32 @@ struct mISDN_devinfo {
 	u_int			Dprotocols;
 	u_int			Bprotocols;
 	u_int			protocol;
-	u_long			channelmap[MISDN_CHMAP_SIZE];
+	u_char			channelmap[MISDN_CHMAP_SIZE];
 	u_int			nrbchan;
 	char			name[MISDN_MAX_IDLEN];
 };
 
+static inline int
+test_channelmap(u_int nr, u_char *map)
+{
+	if (nr <= MISDN_MAX_CHANNEL)
+		return map[nr >> 3] & (1 << (nr & 7));
+	else
+		return 0;
+}
+
+static inline void
+set_channelmap(u_int nr, u_char *map)
+{
+	map[nr >> 3] |= (1 << (nr & 7));
+}
+
+static inline void
+clear_channelmap(u_int nr, u_char *map)
+{
+	map[nr >> 3] &= ~(1 << (nr & 7));
+}
+
 /* CONTROL_CHANNEL parameters */
 #define MISDN_CTRL_GETOP		0x0000
 #define MISDN_CTRL_LOOP			0x0001
@@ -405,7 +427,7 @@ struct mISDNdevice {
 	u_int			Dprotocols;
 	u_int			Bprotocols;
 	u_int			nrbchan;
-	u_long			channelmap[MISDN_CHMAP_SIZE];
+	u_char			channelmap[MISDN_CHMAP_SIZE];
 	struct list_head	bchannels;
 	struct mISDNchannel	*teimgr;
 	struct device		dev;
@@ -430,7 +452,7 @@ struct mISDNstack {
 #endif
 };
 
-/* global alloc/queue dunctions */
+/* global alloc/queue functions */
 
 static inline struct sk_buff *
 mI_alloc_skb(unsigned int len, gfp_t gfp_mask)
-- 
cgit v1.2.3


From 85ebd00334099fd5d296bcae74a66c943d46686d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@misterjones.org>
Date: Sat, 2 Aug 2008 19:12:23 +0200
Subject: Fix IHEX firmware generation/loading

Fix both the IHEX firmware generation (len field always null, and EOF
marker a byte too short) and loading (struct ihex_binrec needs to be
packed to reflect the on-disk structure).

Signed-off-by: Marc Zyngier <maz@misterjones.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 firmware/ihex2fw.c   | 6 +++---
 include/linux/ihex.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/firmware/ihex2fw.c b/firmware/ihex2fw.c
index 660b191ed75e..8f7fdaa9e010 100644
--- a/firmware/ihex2fw.c
+++ b/firmware/ihex2fw.c
@@ -250,19 +250,19 @@ static void file_record(struct ihex_binrec *record)
 
 static int output_records(int outfd)
 {
-	unsigned char zeroes[5] = {0, 0, 0, 0, 0};
+	unsigned char zeroes[6] = {0, 0, 0, 0, 0, 0};
 	struct ihex_binrec *p = records;
 
 	while (p) {
 		uint16_t writelen = (p->len + 9) & ~3;
 
 		p->addr = htonl(p->addr);
-		p->len = htonl(p->len);
+		p->len = htons(p->len);
 		write(outfd, &p->addr, writelen);
 		p = p->next;
 	}
 	/* EOF record is zero length, since we don't bother to represent
 	   the type field in the binary version */
-	write(outfd, zeroes, 5);
+	write(outfd, zeroes, 6);
 	return 0;
 }
diff --git a/include/linux/ihex.h b/include/linux/ihex.h
index 2baace2788a7..31d8629e75a1 100644
--- a/include/linux/ihex.h
+++ b/include/linux/ihex.h
@@ -18,7 +18,7 @@ struct ihex_binrec {
 	__be32 addr;
 	__be16 len;
 	uint8_t data[0];
-} __attribute__((aligned(4)));
+} __attribute__((packed));
 
 /* Find the next record, taking into account the 4-byte alignment */
 static inline const struct ihex_binrec *
-- 
cgit v1.2.3


From cf368d2f9aced8adc8bd6b1f04294a71551d5fce Mon Sep 17 00:00:00 2001
From: Alexander Beregalov <a.beregalov@gmail.com>
Date: Sun, 3 Aug 2008 03:03:57 +0400
Subject: drivers/video/console/promcon.c: fix build error

drivers/video/console/promcon.c:158: error: implicit declaration of
function 'con_protect_unimap'

Introduced by commit a29ccf6f823a84d89e1c7aaaf221cf7282022024
("embedded: fix vc_translate operator precedence").

Signed-off-by: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Tim Bird <tim.bird@am.sony.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/vt_kern.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 8c8119ffee12..1c78d56c57e5 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -86,6 +86,7 @@ int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
 #define con_copy_unimap(d, s) (0)
 #define con_get_unimap(vc, ct, uct, list) (-EINVAL)
 #define con_free_unimap(vc) do { ; } while (0)
+#define con_protect_unimap(vc, rdonly) do { ; } while (0)
 
 #define vc_translate(vc, c) (c)
 #endif
-- 
cgit v1.2.3


From 63870295de9adb365cd121dab94379b8cfdf986a Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 4 Aug 2008 10:39:46 +0900
Subject: maple: Clean up maple_driver_register/unregister routines.

These were completely inconsistent. Clean these up to take a maple_driver
pointer directly for consistency.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/input/keyboard/maple_keyb.c |  6 +++---
 drivers/sh/maple/maple.c            | 37 ++++++++++++++++++++++++++-----------
 include/linux/maple.h               |  4 +++-
 3 files changed, 32 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/maple_keyb.c b/drivers/input/keyboard/maple_keyb.c
index 42f5d4ec39ab..3f5151a0fd15 100644
--- a/drivers/input/keyboard/maple_keyb.c
+++ b/drivers/input/keyboard/maple_keyb.c
@@ -235,17 +235,17 @@ static struct maple_driver dc_kbd_driver = {
 		.name = "Dreamcast_keyboard",
 		.probe = probe_maple_kbd,
 		.remove = remove_maple_kbd,
-       },
+	},
 };
 
 static int __init dc_kbd_init(void)
 {
-	return maple_driver_register(&dc_kbd_driver.drv);
+	return maple_driver_register(&dc_kbd_driver);
 }
 
 static void __exit dc_kbd_exit(void)
 {
-	driver_unregister(&dc_kbd_driver.drv);
+	maple_driver_unregister(&dc_kbd_driver);
 }
 
 module_init(dc_kbd_init);
diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
index be97789fa5fd..a6b4dc3cfcba 100644
--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -2,6 +2,7 @@
  * Core maple bus functionality
  *
  *  Copyright (C) 2007, 2008 Adrian McMenamin
+ *  Copyright (C) 2001 - 2008 Paul Mundt
  *
  * Based on 2.4 code by:
  *
@@ -31,7 +32,7 @@
 #include <mach/dma.h>
 #include <mach/sysasic.h>
 
-MODULE_AUTHOR("Yaegshi Takeshi, Paul Mundt, M.R. Brown, Adrian McMenamin");
+MODULE_AUTHOR("Yaegashi Takeshi, Paul Mundt, M. R. Brown, Adrian McMenamin");
 MODULE_DESCRIPTION("Maple bus driver for Dreamcast");
 MODULE_LICENSE("GPL v2");
 MODULE_SUPPORTED_DEVICE("{{SEGA, Dreamcast/Maple}}");
@@ -65,19 +66,35 @@ static bool checked[4];
 static struct maple_device *baseunits[4];
 
 /**
- *  maple_driver_register - register a device driver
- *  automatically makes the driver bus a maple bus
- *  @drv: the driver to be registered
+ * maple_driver_register - register a maple driver
+ * @drv: maple driver to be registered.
+ *
+ * Registers the passed in @drv, while updating the bus type.
+ * Devices with matching function IDs will be automatically probed.
  */
-int maple_driver_register(struct device_driver *drv)
+int maple_driver_register(struct maple_driver *drv)
 {
 	if (!drv)
 		return -EINVAL;
-	drv->bus = &maple_bus_type;
-	return driver_register(drv);
+
+	drv->drv.bus = &maple_bus_type;
+
+	return driver_register(&drv->drv);
 }
 EXPORT_SYMBOL_GPL(maple_driver_register);
 
+/**
+ * maple_driver_unregister - unregister a maple driver.
+ * @drv: maple driver to unregister.
+ *
+ * Cleans up after maple_driver_register(). To be invoked in the exit
+ * path of any module drivers.
+ */
+void maple_driver_unregister(struct maple_driver *drv)
+{
+	driver_unregister(&drv->drv);
+}
+
 /* set hardware registers to enable next round of dma */
 static void maplebus_dma_reset(void)
 {
@@ -724,11 +741,9 @@ static int maple_get_dma_buffer(void)
 static int match_maple_bus_driver(struct device *devptr,
 				  struct device_driver *drvptr)
 {
-	struct maple_driver *maple_drv;
-	struct maple_device *maple_dev;
+	struct maple_driver *maple_drv = to_maple_driver(drvptr);
+	struct maple_device *maple_dev = to_maple_dev(devptr);
 
-	maple_drv = container_of(drvptr, struct maple_driver, drv);
-	maple_dev = container_of(devptr, struct maple_device, dev);
 	/* Trap empty port case */
 	if (maple_dev->devinfo.function == 0xFFFFFFFF)
 		return 0;
diff --git a/include/linux/maple.h b/include/linux/maple.h
index c853b1066018..b2b7ce0fb1f7 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -70,7 +70,9 @@ void maple_getcond_callback(struct maple_device *dev,
 			    void (*callback) (struct mapleq * mq),
 			    unsigned long interval,
 			    unsigned long function);
-int maple_driver_register(struct device_driver *drv);
+int maple_driver_register(struct maple_driver *);
+void maple_driver_unregister(struct maple_driver *);
+
 int maple_add_packet_sleeps(struct maple_device *mdev, u32 function,
 	u32 command, u32 length, void *data);
 void maple_clear_dev(struct maple_device *mdev);
-- 
cgit v1.2.3


From 617870632de6739fca0893f3e6648e9ae1bd0ddb Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Mon, 4 Aug 2008 10:58:24 +0900
Subject: maple: Kill useless private_data pointer.

We can simply wrap in to the dev_set/get_drvdata(), there's no reason
to track an extra level of private data on top of the struct device.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/input/keyboard/maple_keyb.c | 15 ++++++++-------
 drivers/sh/maple/maple.c            |  1 +
 include/linux/maple.h               |  4 +++-
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/maple_keyb.c b/drivers/input/keyboard/maple_keyb.c
index 3f5151a0fd15..22f17a593be7 100644
--- a/drivers/input/keyboard/maple_keyb.c
+++ b/drivers/input/keyboard/maple_keyb.c
@@ -139,7 +139,7 @@ static void dc_scan_kbd(struct dc_kbd *kbd)
 static void dc_kbd_callback(struct mapleq *mq)
 {
 	struct maple_device *mapledev = mq->dev;
-	struct dc_kbd *kbd = mapledev->private_data;
+	struct dc_kbd *kbd = maple_get_drvdata(mapledev);
 	unsigned long *buf = mq->recvbuf;
 
 	/*
@@ -175,8 +175,6 @@ static int probe_maple_kbd(struct device *dev)
 		goto fail;
 	}
 
-	mdev->private_data = kbd;
-
 	kbd->dev = idev;
 	memcpy(kbd->keycode, dc_kbd_keycode, sizeof(kbd->keycode));
 
@@ -204,27 +202,30 @@ static int probe_maple_kbd(struct device *dev)
 		MAPLE_FUNC_KEYBOARD);
 
 	mdev->driver = mdrv;
+
+	maple_set_drvdata(mdev, kbd);
+
 	return error;
 
 fail:
 	input_free_device(idev);
 	kfree(kbd);
-	mdev->private_data = NULL;
+	maple_set_drvdata(mdev, NULL);
 	return error;
 }
 
 static int remove_maple_kbd(struct device *dev)
 {
 	struct maple_device *mdev = to_maple_dev(dev);
-	struct dc_kbd *kbd;
+	struct dc_kbd *kbd = maple_get_drvdata(mdev);
 
 	mutex_lock(&maple_keyb_mutex);
 
-	kbd = mdev->private_data;
-	mdev->private_data = NULL;
 	input_unregister_device(kbd->dev);
 	kfree(kbd);
 
+	maple_set_drvdata(mdev, NULL);
+
 	mutex_unlock(&maple_keyb_mutex);
 	return 0;
 }
diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
index a6b4dc3cfcba..be77a39f224c 100644
--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -94,6 +94,7 @@ void maple_driver_unregister(struct maple_driver *drv)
 {
 	driver_unregister(&drv->drv);
 }
+EXPORT_SYMBOL_GPL(maple_driver_unregister);
 
 /* set hardware registers to enable next round of dma */
 static void maplebus_dma_reset(void)
diff --git a/include/linux/maple.h b/include/linux/maple.h
index b2b7ce0fb1f7..c23d3f51ba40 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -51,7 +51,6 @@ struct maple_devinfo {
 struct maple_device {
 	struct maple_driver *driver;
 	struct mapleq *mq;
-	void *private_data;
 	void (*callback) (struct mapleq * mq);
 	unsigned long when, interval, function;
 	struct maple_devinfo devinfo;
@@ -80,4 +79,7 @@ void maple_clear_dev(struct maple_device *mdev);
 #define to_maple_dev(n) container_of(n, struct maple_device, dev)
 #define to_maple_driver(n) container_of(n, struct maple_driver, drv)
 
+#define maple_get_drvdata(d)		dev_get_drvdata(&(d)->dev)
+#define maple_set_drvdata(d,p)		dev_set_drvdata(&(d)->dev, (p))
+
 #endif				/* __LINUX_MAPLE_H */
-- 
cgit v1.2.3


From 98f7dfd86cbbd377e2cbc293529681b914296f68 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 18 Jul 2008 13:52:59 +0800
Subject: mac80211: pass dtim_period to low level driver

This patch adds the dtim_period in ieee80211_bss_conf, this allows the low
level driver to know the dtim_period, and to plan power save accordingly.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  | 13 +++++++++++++
 include/net/mac80211.h     |  4 +++-
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mlme.c        | 11 +++++++++++
 4 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a1630ba0b87c..7f4df7c7659d 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -506,6 +506,19 @@ struct ieee80211_channel_sw_ie {
 	u8 count;
 } __attribute__ ((packed));
 
+/**
+ * struct ieee80211_tim
+ *
+ * This structure refers to "Traffic Indication Map information element"
+ */
+struct ieee80211_tim_ie {
+	u8 dtim_count;
+	u8 dtim_period;
+	u8 bitmap_ctrl;
+	/* variable size: 1 - 251 bytes */
+	u8 virtual_map[0];
+} __attribute__ ((packed));
+
 struct ieee80211_mgmt {
 	__le16 frame_control;
 	__le16 duration;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b52721008be8..9d99f2e0a204 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -177,9 +177,10 @@ enum ieee80211_bss_change {
  * @aid: association ID number, valid only when @assoc is true
  * @use_cts_prot: use CTS protection
  * @use_short_preamble: use 802.11b short preamble
+ * @dtim_period: num of beacons before the next DTIM, for PSM
  * @timestamp: beacon timestamp
  * @beacon_int: beacon interval
- * @assoc_capability: capabbilities taken from assoc resp
+ * @assoc_capability: capabilities taken from assoc resp
  * @assoc_ht: association in HT mode
  * @ht_conf: ht capabilities
  * @ht_bss_conf: ht extended capabilities
@@ -191,6 +192,7 @@ struct ieee80211_bss_conf {
 	/* erp related data */
 	bool use_cts_prot;
 	bool use_short_preamble;
+	u8 dtim_period;
 	u16 beacon_int;
 	u16 assoc_capability;
 	u64 timestamp;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a2e200f9811e..ec59345af65b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -82,6 +82,7 @@ struct ieee80211_sta_bss {
 
 	u8 bssid[ETH_ALEN];
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
+	u8 dtim_period;
 	u16 capability; /* host byte order */
 	enum ieee80211_band band;
 	int freq;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index acb04133a95d..591e6331c427 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -551,6 +551,7 @@ static void ieee80211_set_associated(struct net_device *dev,
 			/* set timing information */
 			sdata->bss_conf.beacon_int = bss->beacon_int;
 			sdata->bss_conf.timestamp = bss->timestamp;
+			sdata->bss_conf.dtim_period = bss->dtim_period;
 
 			changed |= ieee80211_handle_bss_capability(sdata, bss);
 
@@ -2688,6 +2689,16 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int);
 	bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info);
 
+	if (elems->tim) {
+		struct ieee80211_tim_ie *tim_ie =
+			(struct ieee80211_tim_ie *)elems->tim;
+		bss->dtim_period = tim_ie->dtim_period;
+	}
+
+	/* set default value for buggy APs */
+	if (!elems->tim || bss->dtim_period == 0)
+		bss->dtim_period = 1;
+
 	bss->supp_rates_len = 0;
 	if (elems->supp_rates) {
 		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
-- 
cgit v1.2.3


From 1a3f7d98e5f50f21ce6fb1406a35531d9596c5c6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 4 Aug 2008 16:50:38 -0700
Subject: Revert "UFS: add const to parser token table"

This reverts commit f9247273cb69ba101877e946d2d83044409cc8c5 (and
fb2e405fc1fc8b20d9c78eaa1c7fd5a297efde43 - "fix fs/nfs/nfsroot.c
compilation" - that fixed a missed conversion).

The changes cause problems for at least the sparc build.  Let's re-do
them when the exact issues are resolved.

Requested-by: Andrew Morton <akpm@linux-foundation.org>
Requested-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/nfsroot.c       | 2 +-
 fs/ufs/super.c         | 2 +-
 include/linux/parser.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8478fc25daee..46763d1cd397 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t __initconst tokens = {
+static match_table_t __initdata tokens = {
 	{Opt_port, "port=%u"},
 	{Opt_rsize, "rsize=%u"},
 	{Opt_wsize, "wsize=%u"},
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3e30e40aa24d..3141969b456d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
 	unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
-	const struct match_token *tp = tokens;
+	struct match_token *tp = tokens;
 
 	while (tp->token != Opt_onerror_panic && tp->token != mval)
 		++tp;
diff --git a/include/linux/parser.h b/include/linux/parser.h
index cc554ca8bc78..7dcd05075756 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -14,7 +14,7 @@ struct match_token {
 	const char *pattern;
 };
 
-typedef const struct match_token match_table_t[];
+typedef struct match_token match_table_t[];
 
 /* Maximum number of arguments that match_token will find in a pattern */
 enum {MAX_OPT_ARGS = 3};
-- 
cgit v1.2.3


From 115a326c1e5cab457924356123bbfd7d783ecf9d Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Mon, 4 Aug 2008 13:56:01 -0700
Subject: tracehook: kerneldoc fix

My last change to tracehook.h made it confuse the kerneldoc parser.
Move the #define's before the comment so it's happy again.

Signed-off-by: Roland McGrath <roland@redhat.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 12532839f508..ab3ef7aefa95 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -487,6 +487,9 @@ static inline int tracehook_notify_jctl(int notify, int why)
 	return notify || (current->ptrace & PT_PTRACED);
 }
 
+#define DEATH_REAP			-1
+#define DEATH_DELAYED_GROUP_LEADER	-2
+
 /**
  * tracehook_notify_death - task is dead, ready to notify parent
  * @task:		@current task now exiting
@@ -501,8 +504,6 @@ static inline int tracehook_notify_jctl(int notify, int why)
  *
  * Called with write_lock_irq(&tasklist_lock) held.
  */
-#define DEATH_REAP			-1
-#define DEATH_DELAYED_GROUP_LEADER	-2
 static inline int tracehook_notify_death(struct task_struct *task,
 					 void **death_cookie, int group_dead)
 {
-- 
cgit v1.2.3


From 529ae9aaa08378cfe2a4350bded76f32cc8ff0ce Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 2 Aug 2008 12:01:03 +0200
Subject: mm: rename page trylock

Converting page lock to new locking bitops requires a change of page flag
operation naming, so we might as well convert it to something nicer
(!TestSetPageLocked_Lock => trylock_page, SetPageLocked => set_page_locked).

This also facilitates lockdeping of page lock.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/scsi/sg.c           |  2 +-
 fs/afs/write.c              |  2 +-
 fs/cifs/file.c              |  2 +-
 fs/jbd/commit.c             |  4 +--
 fs/jbd2/commit.c            |  2 +-
 fs/reiserfs/journal.c       |  2 +-
 fs/splice.c                 |  2 +-
 fs/xfs/linux-2.6/xfs_aops.c |  4 +--
 include/linux/page-flags.h  |  2 +-
 include/linux/pagemap.h     | 67 +++++++++++++++++++++++++++------------------
 mm/filemap.c                | 12 ++++----
 mm/memory.c                 |  2 +-
 mm/migrate.c                |  4 +--
 mm/rmap.c                   |  2 +-
 mm/shmem.c                  |  4 +--
 mm/swap.c                   |  2 +-
 mm/swap_state.c             |  8 +++---
 mm/swapfile.c               |  2 +-
 mm/truncate.c               |  4 +--
 mm/vmscan.c                 |  4 +--
 20 files changed, 74 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index d3b8ebb83776..3d36270a8b4d 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1747,7 +1747,7 @@ st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages,
                  */
 		flush_dcache_page(pages[i]);
 		/* ?? Is locking needed? I don't think so */
-		/* if (TestSetPageLocked(pages[i]))
+		/* if (!trylock_page(pages[i]))
 		   goto out_unlock; */
         }
 
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9a849ad3c489..065b4e10681a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -404,7 +404,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
 			page = pages[loop];
 			if (page->index > wb->last)
 				break;
-			if (TestSetPageLocked(page))
+			if (!trylock_page(page))
 				break;
 			if (!PageDirty(page) ||
 			    page_private(page) != (unsigned long) wb) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0aac824371a5..e692c42f24b5 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1280,7 +1280,7 @@ retry:
 
 			if (first < 0)
 				lock_page(page);
-			else if (TestSetPageLocked(page))
+			else if (!trylock_page(page))
 				break;
 
 			if (unlikely(page->mapping != mapping)) {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 2eccbfaa1d48..81a9ad7177ca 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh)
 		goto nope;
 
 	/* OK, it's a truncated page */
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		goto nope;
 
 	page_cache_get(page);
@@ -446,7 +446,7 @@ void journal_commit_transaction(journal_t *journal)
 			spin_lock(&journal->j_list_lock);
 		}
 		if (unlikely(!buffer_uptodate(bh))) {
-			if (TestSetPageLocked(bh->b_page)) {
+			if (!trylock_page(bh->b_page)) {
 				spin_unlock(&journal->j_list_lock);
 				lock_page(bh->b_page);
 				spin_lock(&journal->j_list_lock);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index adf0395f318e..f2ad061e95ec 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -67,7 +67,7 @@ static void release_buffer_page(struct buffer_head *bh)
 		goto nope;
 
 	/* OK, it's a truncated page */
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		goto nope;
 
 	page_cache_get(page);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c8f60ee183b5..ce2208b27118 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -627,7 +627,7 @@ static int journal_list_still_alive(struct super_block *s,
 static void release_buffer_page(struct buffer_head *bh)
 {
 	struct page *page = bh->b_page;
-	if (!page->mapping && !TestSetPageLocked(page)) {
+	if (!page->mapping && trylock_page(page)) {
 		page_cache_get(page);
 		put_bh(bh);
 		if (!page->mapping)
diff --git a/fs/splice.c b/fs/splice.c
index b30311ba8af6..1bbc6f4bb09c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -371,7 +371,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 			 * for an in-flight io page
 			 */
 			if (flags & SPLICE_F_NONBLOCK) {
-				if (TestSetPageLocked(page)) {
+				if (!trylock_page(page)) {
 					error = -EAGAIN;
 					break;
 				}
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 0b211cba1909..fa73179233ad 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -675,7 +675,7 @@ xfs_probe_cluster(
 			} else
 				pg_offset = PAGE_CACHE_SIZE;
 
-			if (page->index == tindex && !TestSetPageLocked(page)) {
+			if (page->index == tindex && trylock_page(page)) {
 				pg_len = xfs_probe_page(page, pg_offset, mapped);
 				unlock_page(page);
 			}
@@ -759,7 +759,7 @@ xfs_convert_page(
 
 	if (page->index != tindex)
 		goto fail;
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		goto fail;
 	if (PageWriteback(page))
 		goto fail_unlock_page;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 25aaccdb2f26..c74d3e875314 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -163,7 +163,7 @@ static inline int Page##uname(struct page *page) 			\
 
 struct page;	/* forward declaration */
 
-PAGEFLAG(Locked, locked) TESTSCFLAG(Locked, locked)
+TESTPAGEFLAG(Locked, locked)
 PAGEFLAG(Error, error)
 PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
 PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 69ed3cb1197a..5da31c12101c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -250,29 +250,6 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
 	return read_cache_page(mapping, index, filler, data);
 }
 
-int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
-				pgoff_t index, gfp_t gfp_mask);
-int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
-				pgoff_t index, gfp_t gfp_mask);
-extern void remove_from_page_cache(struct page *page);
-extern void __remove_from_page_cache(struct page *page);
-
-/*
- * Like add_to_page_cache_locked, but used to add newly allocated pages:
- * the page is new, so we can just run SetPageLocked() against it.
- */
-static inline int add_to_page_cache(struct page *page,
-		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
-{
-	int error;
-
-	SetPageLocked(page);
-	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
-	if (unlikely(error))
-		ClearPageLocked(page);
-	return error;
-}
-
 /*
  * Return byte-offset into filesystem object for page.
  */
@@ -294,13 +271,28 @@ extern int __lock_page_killable(struct page *page);
 extern void __lock_page_nosync(struct page *page);
 extern void unlock_page(struct page *page);
 
+static inline void set_page_locked(struct page *page)
+{
+	set_bit(PG_locked, &page->flags);
+}
+
+static inline void clear_page_locked(struct page *page)
+{
+	clear_bit(PG_locked, &page->flags);
+}
+
+static inline int trylock_page(struct page *page)
+{
+	return !test_and_set_bit(PG_locked, &page->flags);
+}
+
 /*
  * lock_page may only be called if we have the page's inode pinned.
  */
 static inline void lock_page(struct page *page)
 {
 	might_sleep();
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		__lock_page(page);
 }
 
@@ -312,7 +304,7 @@ static inline void lock_page(struct page *page)
 static inline int lock_page_killable(struct page *page)
 {
 	might_sleep();
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		return __lock_page_killable(page);
 	return 0;
 }
@@ -324,7 +316,7 @@ static inline int lock_page_killable(struct page *page)
 static inline void lock_page_nosync(struct page *page)
 {
 	might_sleep();
-	if (TestSetPageLocked(page))
+	if (!trylock_page(page))
 		__lock_page_nosync(page);
 }
 	
@@ -409,4 +401,27 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
 	return ret;
 }
 
+int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
+				pgoff_t index, gfp_t gfp_mask);
+int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
+				pgoff_t index, gfp_t gfp_mask);
+extern void remove_from_page_cache(struct page *page);
+extern void __remove_from_page_cache(struct page *page);
+
+/*
+ * Like add_to_page_cache_locked, but used to add newly allocated pages:
+ * the page is new, so we can just run set_page_locked() against it.
+ */
+static inline int add_to_page_cache(struct page *page,
+		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
+{
+	int error;
+
+	set_page_locked(page);
+	error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
+	if (unlikely(error))
+		clear_page_locked(page);
+	return error;
+}
+
 #endif /* _LINUX_PAGEMAP_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index d97d1ad55473..54e968650855 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -558,14 +558,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
  * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
  *
  * The first mb is necessary to safely close the critical section opened by the
- * TestSetPageLocked(), the second mb is necessary to enforce ordering between
- * the clear_bit and the read of the waitqueue (to avoid SMP races with a
- * parallel wait_on_page_locked()).
+ * test_and_set_bit() to lock the page; the second mb is necessary to enforce
+ * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
+ * races with a parallel wait_on_page_locked()).
  */
 void unlock_page(struct page *page)
 {
 	smp_mb__before_clear_bit();
-	if (!TestClearPageLocked(page))
+	if (!test_and_clear_bit(PG_locked, &page->flags))
 		BUG();
 	smp_mb__after_clear_bit(); 
 	wake_up_page(page, PG_locked);
@@ -931,7 +931,7 @@ grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
 	struct page *page = find_get_page(mapping, index);
 
 	if (page) {
-		if (!TestSetPageLocked(page))
+		if (trylock_page(page))
 			return page;
 		page_cache_release(page);
 		return NULL;
@@ -1027,7 +1027,7 @@ find_page:
 			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
 					!mapping->a_ops->is_partially_uptodate)
 				goto page_not_up_to_date;
-			if (TestSetPageLocked(page))
+			if (!trylock_page(page))
 				goto page_not_up_to_date;
 			if (!mapping->a_ops->is_partially_uptodate(page,
 								desc, offset))
diff --git a/mm/memory.c b/mm/memory.c
index a472bcd4b061..1002f473f497 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1789,7 +1789,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * not dirty accountable.
 	 */
 	if (PageAnon(old_page)) {
-		if (!TestSetPageLocked(old_page)) {
+		if (trylock_page(old_page)) {
 			reuse = can_share_swap_page(old_page);
 			unlock_page(old_page);
 		}
diff --git a/mm/migrate.c b/mm/migrate.c
index 153572fb60b8..2a80136b23bb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -605,7 +605,7 @@ static int move_to_new_page(struct page *newpage, struct page *page)
 	 * establishing additional references. We are the only one
 	 * holding a reference to the new page at this point.
 	 */
-	if (TestSetPageLocked(newpage))
+	if (!trylock_page(newpage))
 		BUG();
 
 	/* Prepare mapping for the new page.*/
@@ -667,7 +667,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	BUG_ON(charge);
 
 	rc = -EAGAIN;
-	if (TestSetPageLocked(page)) {
+	if (!trylock_page(page)) {
 		if (!force)
 			goto move_newpage;
 		lock_page(page);
diff --git a/mm/rmap.c b/mm/rmap.c
index 94a5246a3f98..1ea4e6fcee77 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -422,7 +422,7 @@ int page_referenced(struct page *page, int is_locked,
 			referenced += page_referenced_anon(page, mem_cont);
 		else if (is_locked)
 			referenced += page_referenced_file(page, mem_cont);
-		else if (TestSetPageLocked(page))
+		else if (!trylock_page(page))
 			referenced++;
 		else {
 			if (page->mapping)
diff --git a/mm/shmem.c b/mm/shmem.c
index c1e5a3b4f758..04fb4f1ab88e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1265,7 +1265,7 @@ repeat:
 		}
 
 		/* We have to do this with page locked to prevent races */
-		if (TestSetPageLocked(swappage)) {
+		if (!trylock_page(swappage)) {
 			shmem_swp_unmap(entry);
 			spin_unlock(&info->lock);
 			wait_on_page_locked(swappage);
@@ -1329,7 +1329,7 @@ repeat:
 		shmem_swp_unmap(entry);
 		filepage = find_get_page(mapping, idx);
 		if (filepage &&
-		    (!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
+		    (!PageUptodate(filepage) || !trylock_page(filepage))) {
 			spin_unlock(&info->lock);
 			wait_on_page_locked(filepage);
 			page_cache_release(filepage);
diff --git a/mm/swap.c b/mm/swap.c
index 7417a2adbe50..9e0cb3118079 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -444,7 +444,7 @@ void pagevec_strip(struct pagevec *pvec)
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
 
-		if (PagePrivate(page) && !TestSetPageLocked(page)) {
+		if (PagePrivate(page) && trylock_page(page)) {
 			if (PagePrivate(page))
 				try_to_release_page(page, 0);
 			unlock_page(page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b8035b055129..167cf2dc8a03 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -201,7 +201,7 @@ void delete_from_swap_cache(struct page *page)
  */
 static inline void free_swap_cache(struct page *page)
 {
-	if (PageSwapCache(page) && !TestSetPageLocked(page)) {
+	if (PageSwapCache(page) && trylock_page(page)) {
 		remove_exclusive_swap_page(page);
 		unlock_page(page);
 	}
@@ -302,9 +302,9 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * re-using the just freed swap entry for an existing page.
 		 * May fail (-ENOMEM) if radix-tree node allocation failed.
 		 */
-		SetPageLocked(new_page);
+		set_page_locked(new_page);
 		err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
-		if (!err) {
+		if (likely(!err)) {
 			/*
 			 * Initiate read into locked page and return.
 			 */
@@ -312,7 +312,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			swap_readpage(NULL, new_page);
 			return new_page;
 		}
-		ClearPageLocked(new_page);
+		clear_page_locked(new_page);
 		swap_free(entry);
 	} while (err != -ENOMEM);
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index bb7f79641f9e..1e330f2998fa 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -403,7 +403,7 @@ void free_swap_and_cache(swp_entry_t entry)
 	if (p) {
 		if (swap_entry_free(p, swp_offset(entry)) == 1) {
 			page = find_get_page(&swapper_space, entry.val);
-			if (page && unlikely(TestSetPageLocked(page))) {
+			if (page && unlikely(!trylock_page(page))) {
 				page_cache_release(page);
 				page = NULL;
 			}
diff --git a/mm/truncate.c b/mm/truncate.c
index 894e9a70699f..250505091d37 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -187,7 +187,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			if (page_index > next)
 				next = page_index;
 			next++;
-			if (TestSetPageLocked(page))
+			if (!trylock_page(page))
 				continue;
 			if (PageWriteback(page)) {
 				unlock_page(page);
@@ -280,7 +280,7 @@ unsigned long __invalidate_mapping_pages(struct address_space *mapping,
 			pgoff_t index;
 			int lock_failed;
 
-			lock_failed = TestSetPageLocked(page);
+			lock_failed = !trylock_page(page);
 
 			/*
 			 * We really shouldn't be looking at the ->index of an
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 75be453628bf..1ff1a58e7c10 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -496,7 +496,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		page = lru_to_page(page_list);
 		list_del(&page->lru);
 
-		if (TestSetPageLocked(page))
+		if (!trylock_page(page))
 			goto keep;
 
 		VM_BUG_ON(PageActive(page));
@@ -582,7 +582,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				 * A synchronous write - probably a ramdisk.  Go
 				 * ahead and try to reclaim the page.
 				 */
-				if (TestSetPageLocked(page))
+				if (!trylock_page(page))
 					goto keep;
 				if (PageDirty(page) || PageWriteback(page))
 					goto keep_locked;
-- 
cgit v1.2.3


From ca5de404ff036a29b25e9a83f6919c9f606c5841 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 2 Aug 2008 12:02:13 +0200
Subject: fs: rename buffer trylock

Like the page lock change, this also requires name change, so convert the
raw test_and_set bitop to a trylock.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                 | 4 ++--
 fs/jbd/commit.c             | 2 +-
 fs/ntfs/aops.c              | 2 +-
 fs/ntfs/compress.c          | 2 +-
 fs/ntfs/mft.c               | 4 ++--
 fs/reiserfs/inode.c         | 2 +-
 fs/reiserfs/journal.c       | 4 ++--
 fs/xfs/linux-2.6/xfs_aops.c | 2 +-
 include/linux/buffer_head.h | 8 ++++++--
 9 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 4dbe52948e8f..38653e36e225 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1720,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		 */
 		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
 			lock_buffer(bh);
-		} else if (test_set_buffer_locked(bh)) {
+		} else if (!trylock_buffer(bh)) {
 			redirty_page_for_writepage(wbc, page);
 			continue;
 		}
@@ -3000,7 +3000,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
 
 		if (rw == SWRITE || rw == SWRITE_SYNC)
 			lock_buffer(bh);
-		else if (test_set_buffer_locked(bh))
+		else if (!trylock_buffer(bh))
 			continue;
 
 		if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 81a9ad7177ca..ae08c057e751 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -221,7 +221,7 @@ write_out_data:
 		 * blocking lock_buffer().
 		 */
 		if (buffer_dirty(bh)) {
-			if (test_set_buffer_locked(bh)) {
+			if (!trylock_buffer(bh)) {
 				BUFFER_TRACE(bh, "needs blocking lock");
 				spin_unlock(&journal->j_list_lock);
 				/* Write out all data to prevent deadlocks */
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 00e9ccde8e42..b38f944f0667 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1194,7 +1194,7 @@ lock_retry_remap:
 		tbh = bhs[i];
 		if (!tbh)
 			continue;
-		if (unlikely(test_set_buffer_locked(tbh)))
+		if (!trylock_buffer(tbh))
 			BUG();
 		/* The buffer dirty state is now irrelevant, just clean it. */
 		clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 33ff314cc507..9669541d0119 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -665,7 +665,7 @@ lock_retry_remap:
 	for (i = 0; i < nr_bhs; i++) {
 		struct buffer_head *tbh = bhs[i];
 
-		if (unlikely(test_set_buffer_locked(tbh)))
+		if (!trylock_buffer(tbh))
 			continue;
 		if (unlikely(buffer_uptodate(tbh))) {
 			unlock_buffer(tbh);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 790defb847e7..17d32ca6bc35 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
 		for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
 			struct buffer_head *tbh = bhs[i_bhs];
 
-			if (unlikely(test_set_buffer_locked(tbh)))
+			if (!trylock_buffer(tbh))
 				BUG();
 			BUG_ON(!buffer_uptodate(tbh));
 			clear_buffer_dirty(tbh);
@@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
 	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
 		struct buffer_head *tbh = bhs[i_bhs];
 
-		if (unlikely(test_set_buffer_locked(tbh)))
+		if (!trylock_buffer(tbh))
 			BUG();
 		BUG_ON(!buffer_uptodate(tbh));
 		clear_buffer_dirty(tbh);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 192269698a8a..5699171212ae 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page,
 		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
 			lock_buffer(bh);
 		} else {
-			if (test_set_buffer_locked(bh)) {
+			if (!trylock_buffer(bh)) {
 				redirty_page_for_writepage(wbc, page);
 				continue;
 			}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index ce2208b27118..c21df71943a6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -855,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock,
 		jh = JH_ENTRY(list->next);
 		bh = jh->bh;
 		get_bh(bh);
-		if (test_set_buffer_locked(bh)) {
+		if (!trylock_buffer(bh)) {
 			if (!buffer_dirty(bh)) {
 				list_move(&jh->list, &tmp);
 				goto loop_next;
@@ -3871,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
 {
 	PROC_INFO_INC(p_s_sb, journal.prepare);
 
-	if (test_set_buffer_locked(bh)) {
+	if (!trylock_buffer(bh)) {
 		if (!wait)
 			return 0;
 		lock_buffer(bh);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index fa73179233ad..fa47e43b8b41 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1104,7 +1104,7 @@ xfs_page_state_convert(
 			 * that we are writing into for the first time.
 			 */
 			type = IOMAP_NEW;
-			if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
+			if (trylock_buffer(bh)) {
 				ASSERT(buffer_mapped(bh));
 				if (iomap_valid)
 					all_bh = 1;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 50cfe8ceb478..eadaab44015f 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -115,7 +115,6 @@ BUFFER_FNS(Uptodate, uptodate)
 BUFFER_FNS(Dirty, dirty)
 TAS_BUFFER_FNS(Dirty, dirty)
 BUFFER_FNS(Lock, locked)
-TAS_BUFFER_FNS(Lock, locked)
 BUFFER_FNS(Req, req)
 TAS_BUFFER_FNS(Req, req)
 BUFFER_FNS(Mapped, mapped)
@@ -321,10 +320,15 @@ static inline void wait_on_buffer(struct buffer_head *bh)
 		__wait_on_buffer(bh);
 }
 
+static inline int trylock_buffer(struct buffer_head *bh)
+{
+	return likely(!test_and_set_bit(BH_Lock, &bh->b_state));
+}
+
 static inline void lock_buffer(struct buffer_head *bh)
 {
 	might_sleep();
-	if (test_set_buffer_locked(bh))
+	if (!trylock_buffer(bh))
 		__lock_buffer(bh);
 }
 
-- 
cgit v1.2.3


From 378a2f090f7a478704a372a4869b8a9ac206234e Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 4 Aug 2008 22:31:03 -0700
Subject: net_sched: Add qdisc __NET_XMIT_STOLEN flag

Patrick McHardy <kaber@trash.net> noticed:
"The other problem that affects all qdiscs supporting actions is
TC_ACT_QUEUED/TC_ACT_STOLEN getting mapped to NET_XMIT_SUCCESS
even though the packet is not queued, corrupting upper qdiscs'
qlen counters."

and later explained:
"The reason why it translates it at all seems to be to not increase
the drops counter. Within a single qdisc this could be avoided by
other means easily, upper qdiscs would still increase the counter
when we return anything besides NET_XMIT_SUCCESS though.

This means we need a new NET_XMIT return value to indicate this to
the upper qdiscs. So I'd suggest to introduce NET_XMIT_STOLEN,
return that to upper qdiscs and translate it to NET_XMIT_SUCCESS
in dev_queue_xmit, similar to NET_XMIT_BYPASS."

David Miller <davem@davemloft.net> noticed:
"Maybe these NET_XMIT_* values being passed around should be a set of
bits. They could be composed of base meanings, combined with specific
attributes.

So you could say "NET_XMIT_DROP | __NET_XMIT_NO_DROP_COUNT"

The attributes get masked out by the top-level ->enqueue() caller,
such that the base meanings are the only thing that make their
way up into the stack. If it's only about communication within the
qdisc tree, let's simply code it that way."

This patch is trying to realize these ideas.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/net/sch_generic.h | 14 +++++++++++++-
 net/sched/sch_atm.c       | 12 +++++++-----
 net/sched/sch_cbq.c       | 23 +++++++++++++++--------
 net/sched/sch_dsmark.c    |  8 +++++---
 net/sched/sch_hfsc.c      |  8 +++++---
 net/sched/sch_htb.c       | 18 +++++++++++-------
 net/sched/sch_netem.c     |  3 ++-
 net/sched/sch_prio.c      |  8 +++++---
 net/sched/sch_red.c       |  2 +-
 net/sched/sch_sfq.c       |  2 +-
 net/sched/sch_tbf.c       |  3 ++-
 12 files changed, 68 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ee583f642a9f..abbf5d52ec86 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -64,6 +64,7 @@ struct wireless_dev;
 #define NET_XMIT_BYPASS		4	/* packet does not leave via dequeue;
 					   (TC use only - dev_queue_xmit
 					   returns this as NET_XMIT_SUCCESS) */
+#define NET_XMIT_MASK		0xFFFF	/* qdisc flags in net/sch_generic.h */
 
 /* Backlog congestion levels */
 #define NET_RX_SUCCESS		0   /* keep 'em coming, baby */
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c5bb13065051..f15b045a85e9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -343,6 +343,18 @@ static inline unsigned int qdisc_pkt_len(struct sk_buff *skb)
 	return qdisc_skb_cb(skb)->pkt_len;
 }
 
+#ifdef CONFIG_NET_CLS_ACT
+/* additional qdisc xmit flags */
+enum net_xmit_qdisc_t {
+	__NET_XMIT_STOLEN = 0x00010000,
+};
+
+#define net_xmit_drop_count(e)	((e) & __NET_XMIT_STOLEN ? 0 : 1)
+
+#else
+#define net_xmit_drop_count(e)	(1)
+#endif
+
 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 #ifdef CONFIG_NET_SCHED
@@ -355,7 +367,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
 {
 	qdisc_skb_cb(skb)->pkt_len = skb->len;
-	return qdisc_enqueue(skb, sch);
+	return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
 }
 
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 6b517b9dac5b..27dd773481bc 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -415,7 +415,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
 			kfree_skb(skb);
-			return NET_XMIT_SUCCESS;
+			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			kfree_skb(skb);
 			goto drop;
@@ -432,9 +432,11 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	ret = qdisc_enqueue(skb, flow->q);
 	if (ret != 0) {
 drop: __maybe_unused
-		sch->qstats.drops++;
-		if (flow)
-			flow->qstats.drops++;
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			if (flow)
+				flow->qstats.drops++;
+		}
 		return ret;
 	}
 	sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -530,7 +532,7 @@ static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
 	if (!ret) {
 		sch->q.qlen++;
 		sch->qstats.requeues++;
-	} else {
+	} else if (net_xmit_drop_count(ret)) {
 		sch->qstats.drops++;
 		p->link.qstats.drops++;
 	}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 14954bf4a683..765ae5659000 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -256,7 +256,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		case TC_ACT_RECLASSIFY:
@@ -397,9 +397,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return ret;
 	}
 
-	sch->qstats.drops++;
-	cbq_mark_toplevel(q, cl);
-	cl->qstats.drops++;
+	if (net_xmit_drop_count(ret)) {
+		sch->qstats.drops++;
+		cbq_mark_toplevel(q, cl);
+		cl->qstats.drops++;
+	}
 	return ret;
 }
 
@@ -430,8 +432,10 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 			cbq_activate_class(cl);
 		return 0;
 	}
-	sch->qstats.drops++;
-	cl->qstats.drops++;
+	if (net_xmit_drop_count(ret)) {
+		sch->qstats.drops++;
+		cl->qstats.drops++;
+	}
 	return ret;
 }
 
@@ -664,13 +668,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 	q->rx_class = NULL;
 
 	if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
+		int ret;
 
 		cbq_mark_toplevel(q, cl);
 
 		q->rx_class = cl;
 		cl->q->__parent = sch;
 
-		if (qdisc_enqueue(skb, cl->q) == 0) {
+		ret = qdisc_enqueue(skb, cl->q);
+		if (ret == NET_XMIT_SUCCESS) {
 			sch->q.qlen++;
 			sch->bstats.packets++;
 			sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -678,7 +684,8 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 				cbq_activate_class(cl);
 			return 0;
 		}
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(ret))
+			sch->qstats.drops++;
 		return 0;
 	}
 
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index a935676987e2..7170275d9f99 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -236,7 +236,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
 			kfree_skb(skb);
-			return NET_XMIT_SUCCESS;
+			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 
 		case TC_ACT_SHOT:
 			goto drop;
@@ -254,7 +254,8 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	err = qdisc_enqueue(skb, p->q);
 	if (err != NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(err))
+			sch->qstats.drops++;
 		return err;
 	}
 
@@ -321,7 +322,8 @@ static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
 
 	err = p->q->ops->requeue(skb, p->q);
 	if (err != NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(err))
+			sch->qstats.drops++;
 		return err;
 	}
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 0ae7d19dcba8..5cf9ae716118 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1166,7 +1166,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -1586,8 +1586,10 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	err = qdisc_enqueue(skb, cl->qdisc);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
-		cl->qstats.drops++;
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(err)) {
+			cl->qstats.drops++;
+			sch->qstats.drops++;
+		}
 		return err;
 	}
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 75a40951c4f2..538d79b489ae 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -221,7 +221,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 		switch (result) {
 		case TC_ACT_QUEUED:
 		case TC_ACT_STOLEN:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -572,9 +572,11 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		kfree_skb(skb);
 		return ret;
 #endif
-	} else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
-		cl->qstats.drops++;
+	} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			cl->qstats.drops++;
+		}
 		return NET_XMIT_DROP;
 	} else {
 		cl->bstats.packets +=
@@ -615,10 +617,12 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 		kfree_skb(skb);
 		return ret;
 #endif
-	} else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
+	} else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) !=
 		   NET_XMIT_SUCCESS) {
-		sch->qstats.drops++;
-		cl->qstats.drops++;
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			cl->qstats.drops++;
+		}
 		return NET_XMIT_DROP;
 	} else
 		htb_activate(q, cl);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a59085700678..6cd6f2bc749e 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -240,8 +240,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		sch->q.qlen++;
 		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
-	} else
+	} else if (net_xmit_drop_count(ret)) {
 		sch->qstats.drops++;
+	}
 
 	pr_debug("netem: enqueue ret %d\n", ret);
 	return ret;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index f849243eb095..adb1a52b77d3 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -45,7 +45,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		switch (err) {
 		case TC_ACT_STOLEN:
 		case TC_ACT_QUEUED:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -88,7 +88,8 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
 	}
-	sch->qstats.drops++;
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
 	return ret;
 }
 
@@ -114,7 +115,8 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
 		sch->qstats.requeues++;
 		return 0;
 	}
-	sch->qstats.drops++;
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
 	return NET_XMIT_DROP;
 }
 
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 3f2d1d7f3bbd..5da05839e225 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -97,7 +97,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.packets++;
 		sch->q.qlen++;
-	} else {
+	} else if (net_xmit_drop_count(ret)) {
 		q->stats.pdrop++;
 		sch->qstats.drops++;
 	}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 8589da666568..3a456e1b829a 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -178,7 +178,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 		switch (result) {
 		case TC_ACT_STOLEN:
 		case TC_ACT_QUEUED:
-			*qerr = NET_XMIT_SUCCESS;
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 		case TC_ACT_SHOT:
 			return 0;
 		}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index b296672f7632..7d3b7ff3bf07 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -135,7 +135,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 	ret = qdisc_enqueue(skb, q->qdisc);
 	if (ret != 0) {
-		sch->qstats.drops++;
+		if (net_xmit_drop_count(ret))
+			sch->qstats.drops++;
 		return ret;
 	}
 
-- 
cgit v1.2.3


From cc6533e98a7f3cb7fce9d740da49195c7aa523a4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 4 Aug 2008 23:04:08 -0700
Subject: net: Kill plain NET_XMIT_BYPASS.

dst_input() was doing something completely absurd, looping
on skb->dst->input() if NET_XMIT_BYPASS was seen, but these
functions never return such an error.

And as a result plain ole' NET_XMIT_BYPASS has no more
references and can be completely killed off.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 ---
 include/net/dst.h         | 12 +-----------
 2 files changed, 1 insertion(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index abbf5d52ec86..488c56e649b5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -61,9 +61,6 @@ struct wireless_dev;
 #define NET_XMIT_DROP		1	/* skb dropped			*/
 #define NET_XMIT_CN		2	/* congestion notification	*/
 #define NET_XMIT_POLICED	3	/* skb is shot by police	*/
-#define NET_XMIT_BYPASS		4	/* packet does not leave via dequeue;
-					   (TC use only - dev_queue_xmit
-					   returns this as NET_XMIT_SUCCESS) */
 #define NET_XMIT_MASK		0xFFFF	/* qdisc flags in net/sch_generic.h */
 
 /* Backlog congestion levels */
diff --git a/include/net/dst.h b/include/net/dst.h
index c5c318a628f8..8a8b71e5f3f1 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -252,17 +252,7 @@ static inline int dst_output(struct sk_buff *skb)
 /* Input packet from network to transport.  */
 static inline int dst_input(struct sk_buff *skb)
 {
-	int err;
-
-	for (;;) {
-		err = skb->dst->input(skb);
-
-		if (likely(err == 0))
-			return err;
-		/* Oh, Jamal... Seems, I will not forgive you this mess. :-) */
-		if (unlikely(err != NET_XMIT_BYPASS))
-			return err;
-	}
+	return skb->dst->input(skb);
 }
 
 static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
-- 
cgit v1.2.3


From 39b986a6c73434d122967dc86efb295ab9a28437 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 5 Aug 2008 18:16:57 +0200
Subject: ide: sanitize struct ide_port_ops documentation (take 2)

v2:
Add missing '@'-s.  (Noticed by Randy Dunlap)

Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index b846bc44a27e..b1fb15f10a00 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -509,24 +509,33 @@ struct ide_tp_ops {
 
 extern const struct ide_tp_ops default_tp_ops;
 
+/**
+ * struct ide_port_ops - IDE port operations
+ *
+ * @init_dev:		host specific initialization of a device
+ * @set_pio_mode:	routine to program host for PIO mode
+ * @set_dma_mode:	routine to program host for DMA mode
+ * @selectproc:		tweaks hardware to select drive
+ * @reset_poll:		chipset polling based on hba specifics
+ * @pre_reset:		chipset specific changes to default for device-hba resets
+ * @resetproc:		routine to reset controller after a disk reset
+ * @maskproc:		special host masking for drive selection
+ * @quirkproc:		check host's drive quirk list
+ *
+ * @mdma_filter:	filter MDMA modes
+ * @udma_filter:	filter UDMA modes
+ *
+ * @cable_detect:	detect cable type
+ */
 struct ide_port_ops {
-	/* host specific initialization of a device */
 	void	(*init_dev)(ide_drive_t *);
-	/* routine to program host for PIO mode */
 	void	(*set_pio_mode)(ide_drive_t *, const u8);
-	/* routine to program host for DMA mode */
 	void	(*set_dma_mode)(ide_drive_t *, const u8);
-	/* tweaks hardware to select drive */
 	void	(*selectproc)(ide_drive_t *);
-	/* chipset polling based on hba specifics */
 	int	(*reset_poll)(ide_drive_t *);
-	/* chipset specific changes to default for device-hba resets */
 	void	(*pre_reset)(ide_drive_t *);
-	/* routine to reset controller after a disk reset */
 	void	(*resetproc)(ide_drive_t *);
-	/* special host masking for drive selection */
 	void	(*maskproc)(ide_drive_t *, int);
-	/* check host's drive quirk list */
 	void	(*quirkproc)(ide_drive_t *);
 
 	u8	(*mdma_filter)(ide_drive_t *);
-- 
cgit v1.2.3


From c5bfc3757f1d843a8e1261840c1f53c5062f8e92 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 5 Aug 2008 18:17:01 +0200
Subject: ide: remove CONFIG_IDE_MAX_HWIFS

The benefits of a user settable CONFIG_IDE_MAX_HWIFS have become pretty
tiny and are no longer considered worth the trouble of an own option.

Simply always #define MAX_HWIFS to 10.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Kconfig | 10 ----------
 include/linux/ide.h | 13 +------------
 2 files changed, 1 insertion(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 130ef64b44f7..a34758d29516 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -54,16 +54,6 @@ menuconfig IDE
 
 if IDE
 
-config IDE_MAX_HWIFS
-	int "Max IDE interfaces"
-	depends on ALPHA || SUPERH || IA64 || EMBEDDED
-	range 1 10
-	default 4
-	help
-	  This is the maximum number of IDE hardware interfaces that will
-	  be supported by the driver. Make sure it is at least as high as
-	  the number of IDE interfaces in your system.
-
 config BLK_DEV_IDE
 	tristate "Enhanced IDE/MFM/RLL disk/cdrom/tape/floppy support"
 	---help---
diff --git a/include/linux/ide.h b/include/linux/ide.h
index b1fb15f10a00..87c12ed96954 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -219,18 +219,7 @@ static inline int __ide_default_irq(unsigned long base)
 #include <asm-generic/ide_iops.h>
 #endif
 
-#ifndef MAX_HWIFS
-#if defined(CONFIG_BLACKFIN) || defined(CONFIG_H8300) || defined(CONFIG_XTENSA)
-# define MAX_HWIFS	1
-#else
-# define MAX_HWIFS	10
-#endif
-#endif
-
-#if !defined(MAX_HWIFS) || defined(CONFIG_EMBEDDED)
-#undef MAX_HWIFS
-#define MAX_HWIFS	CONFIG_IDE_MAX_HWIFS
-#endif
+#define MAX_HWIFS	10
 
 /* Currently only m68k, apus and m8xx need it */
 #ifndef IDE_ARCH_ACK_INTR
-- 
cgit v1.2.3


From c6e2bee26eee190b20cd87e71b288bca6a5357a4 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Tue, 5 Aug 2008 13:01:05 -0700
Subject: kdump: report actual value of VMCOREINFO_OSRELEASE in VMCOREINFO

The current implementation reports the structure name as
VMCOREINFO_OSRELEASE in VMCOREINFO, e.g.

        VMCOREINFO_OSRELEASE=init_uts_ns.name.release

That doesn't make sense because it's always the same. Instead, use the
value, e.g.

        VMCOREINFO_OSRELEASE=2.6.26-rc3

That's also what the 'makedumpfile -g' does.

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Cc: "Ken'ichi Ohmichi" <oomichi@mxs.nes.nec.co.jp>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kexec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 82f88a8a827b..32110cede64f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -130,8 +130,8 @@ void vmcoreinfo_append_str(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
 unsigned long paddr_vmcoreinfo_note(void);
 
-#define VMCOREINFO_OSRELEASE(name) \
-	vmcoreinfo_append_str("OSRELEASE=%s\n", #name)
+#define VMCOREINFO_OSRELEASE(value) \
+	vmcoreinfo_append_str("OSRELEASE=%s\n", value)
 #define VMCOREINFO_PAGESIZE(value) \
 	vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
 #define VMCOREINFO_SYMBOL(name) \
-- 
cgit v1.2.3


From 60cadec9da7b6c91aca51f408c828f7e74a68379 Mon Sep 17 00:00:00 2001
From: Shadi Ammouri <shadi@marvell.com>
Date: Tue, 5 Aug 2008 13:01:09 -0700
Subject: spi: new orion_spi driver

This adds an SPI driver for the SPI controller found in various Marvell
Orion ARM SoCs.  It currently supports only one slave, which must use SPI
mode 0.

[dbrownell@users.sourceforge.net: cleanups, meet specs, pass "sparse"]
Signed-off-by: Shadi Ammouri <shadi@marvell.com>
Signed-off-by: Saeed Bishara <saeed@marvell.com>
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/Kconfig           |   6 +
 drivers/spi/Makefile          |   1 +
 drivers/spi/orion_spi.c       | 574 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/orion_spi.h |  17 ++
 4 files changed, 598 insertions(+)
 create mode 100644 drivers/spi/orion_spi.c
 create mode 100644 include/linux/spi/orion_spi.h

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 2303521b4f09..b9d0efb6803f 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -149,6 +149,12 @@ config SPI_OMAP24XX
 	  SPI master controller for OMAP24xx/OMAP34xx Multichannel SPI
 	  (McSPI) modules.
 
+config SPI_ORION
+	tristate "Orion SPI master (EXPERIMENTAL)"
+	depends on PLAT_ORION && EXPERIMENTAL
+	help
+	  This enables using the SPI master controller on the Orion chips.
+
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
 	depends on ARCH_PXA && EXPERIMENTAL
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 7fca043ce723..ccf18de34e1e 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_SPI_LM70_LLP)		+= spi_lm70llp.o
 obj-$(CONFIG_SPI_PXA2XX)		+= pxa2xx_spi.o
 obj-$(CONFIG_SPI_OMAP_UWIRE)		+= omap_uwire.o
 obj-$(CONFIG_SPI_OMAP24XX)		+= omap2_mcspi.o
+obj-$(CONFIG_SPI_ORION)			+= orion_spi.o
 obj-$(CONFIG_SPI_MPC52xx_PSC)		+= mpc52xx_psc_spi.o
 obj-$(CONFIG_SPI_MPC83xx)		+= spi_mpc83xx.o
 obj-$(CONFIG_SPI_S3C24XX_GPIO)		+= spi_s3c24xx_gpio.o
diff --git a/drivers/spi/orion_spi.c b/drivers/spi/orion_spi.c
new file mode 100644
index 000000000000..c4eaacd6e553
--- /dev/null
+++ b/drivers/spi/orion_spi.c
@@ -0,0 +1,574 @@
+/*
+ * orion_spi.c -- Marvell Orion SPI controller driver
+ *
+ * Author: Shadi Ammouri <shadi@marvell.com>
+ * Copyright (C) 2007-2008 Marvell Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/orion_spi.h>
+#include <asm/unaligned.h>
+
+#define DRIVER_NAME			"orion_spi"
+
+#define ORION_NUM_CHIPSELECTS		1 /* only one slave is supported*/
+#define ORION_SPI_WAIT_RDY_MAX_LOOP	2000 /* in usec */
+
+#define ORION_SPI_IF_CTRL_REG		0x00
+#define ORION_SPI_IF_CONFIG_REG		0x04
+#define ORION_SPI_DATA_OUT_REG		0x08
+#define ORION_SPI_DATA_IN_REG		0x0c
+#define ORION_SPI_INT_CAUSE_REG		0x10
+
+#define ORION_SPI_IF_8_16_BIT_MODE	(1 << 5)
+#define ORION_SPI_CLK_PRESCALE_MASK	0x1F
+
+struct orion_spi {
+	struct work_struct	work;
+
+	/* Lock access to transfer list.	*/
+	spinlock_t		lock;
+
+	struct list_head	msg_queue;
+	struct spi_master	*master;
+	void __iomem		*base;
+	unsigned int		max_speed;
+	unsigned int		min_speed;
+	struct orion_spi_info	*spi_info;
+};
+
+static struct workqueue_struct *orion_spi_wq;
+
+static inline void __iomem *spi_reg(struct orion_spi *orion_spi, u32 reg)
+{
+	return orion_spi->base + reg;
+}
+
+static inline void
+orion_spi_setbits(struct orion_spi *orion_spi, u32 reg, u32 mask)
+{
+	void __iomem *reg_addr = spi_reg(orion_spi, reg);
+	u32 val;
+
+	val = readl(reg_addr);
+	val |= mask;
+	writel(val, reg_addr);
+}
+
+static inline void
+orion_spi_clrbits(struct orion_spi *orion_spi, u32 reg, u32 mask)
+{
+	void __iomem *reg_addr = spi_reg(orion_spi, reg);
+	u32 val;
+
+	val = readl(reg_addr);
+	val &= ~mask;
+	writel(val, reg_addr);
+}
+
+static int orion_spi_set_transfer_size(struct orion_spi *orion_spi, int size)
+{
+	if (size == 16) {
+		orion_spi_setbits(orion_spi, ORION_SPI_IF_CONFIG_REG,
+				  ORION_SPI_IF_8_16_BIT_MODE);
+	} else if (size == 8) {
+		orion_spi_clrbits(orion_spi, ORION_SPI_IF_CONFIG_REG,
+				  ORION_SPI_IF_8_16_BIT_MODE);
+	} else {
+		pr_debug("Bad bits per word value %d (only 8 or 16 are "
+			 "allowed).\n", size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int orion_spi_baudrate_set(struct spi_device *spi, unsigned int speed)
+{
+	u32 tclk_hz;
+	u32 rate;
+	u32 prescale;
+	u32 reg;
+	struct orion_spi *orion_spi;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+
+	tclk_hz = orion_spi->spi_info->tclk;
+
+	/*
+	 * the supported rates are: 4,6,8...30
+	 * round up as we look for equal or less speed
+	 */
+	rate = DIV_ROUND_UP(tclk_hz, speed);
+	rate = roundup(rate, 2);
+
+	/* check if requested speed is too small */
+	if (rate > 30)
+		return -EINVAL;
+
+	if (rate < 4)
+		rate = 4;
+
+	/* Convert the rate to SPI clock divisor value.	*/
+	prescale = 0x10 + rate/2;
+
+	reg = readl(spi_reg(orion_spi, ORION_SPI_IF_CONFIG_REG));
+	reg = ((reg & ~ORION_SPI_CLK_PRESCALE_MASK) | prescale);
+	writel(reg, spi_reg(orion_spi, ORION_SPI_IF_CONFIG_REG));
+
+	return 0;
+}
+
+/*
+ * called only when no transfer is active on the bus
+ */
+static int
+orion_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
+{
+	struct orion_spi *orion_spi;
+	unsigned int speed = spi->max_speed_hz;
+	unsigned int bits_per_word = spi->bits_per_word;
+	int	rc;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+
+	if ((t != NULL) && t->speed_hz)
+		speed = t->speed_hz;
+
+	if ((t != NULL) && t->bits_per_word)
+		bits_per_word = t->bits_per_word;
+
+	rc = orion_spi_baudrate_set(spi, speed);
+	if (rc)
+		return rc;
+
+	return orion_spi_set_transfer_size(orion_spi, bits_per_word);
+}
+
+static void orion_spi_set_cs(struct orion_spi *orion_spi, int enable)
+{
+	if (enable)
+		orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
+	else
+		orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
+}
+
+static inline int orion_spi_wait_till_ready(struct orion_spi *orion_spi)
+{
+	int i;
+
+	for (i = 0; i < ORION_SPI_WAIT_RDY_MAX_LOOP; i++) {
+		if (readl(spi_reg(orion_spi, ORION_SPI_INT_CAUSE_REG)))
+			return 1;
+		else
+			udelay(1);
+	}
+
+	return -1;
+}
+
+static inline int
+orion_spi_write_read_8bit(struct spi_device *spi,
+			  const u8 **tx_buf, u8 **rx_buf)
+{
+	void __iomem *tx_reg, *rx_reg, *int_reg;
+	struct orion_spi *orion_spi;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+	tx_reg = spi_reg(orion_spi, ORION_SPI_DATA_OUT_REG);
+	rx_reg = spi_reg(orion_spi, ORION_SPI_DATA_IN_REG);
+	int_reg = spi_reg(orion_spi, ORION_SPI_INT_CAUSE_REG);
+
+	/* clear the interrupt cause register */
+	writel(0x0, int_reg);
+
+	if (tx_buf && *tx_buf)
+		writel(*(*tx_buf)++, tx_reg);
+	else
+		writel(0, tx_reg);
+
+	if (orion_spi_wait_till_ready(orion_spi) < 0) {
+		dev_err(&spi->dev, "TXS timed out\n");
+		return -1;
+	}
+
+	if (rx_buf && *rx_buf)
+		*(*rx_buf)++ = readl(rx_reg);
+
+	return 1;
+}
+
+static inline int
+orion_spi_write_read_16bit(struct spi_device *spi,
+			   const u16 **tx_buf, u16 **rx_buf)
+{
+	void __iomem *tx_reg, *rx_reg, *int_reg;
+	struct orion_spi *orion_spi;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+	tx_reg = spi_reg(orion_spi, ORION_SPI_DATA_OUT_REG);
+	rx_reg = spi_reg(orion_spi, ORION_SPI_DATA_IN_REG);
+	int_reg = spi_reg(orion_spi, ORION_SPI_INT_CAUSE_REG);
+
+	/* clear the interrupt cause register */
+	writel(0x0, int_reg);
+
+	if (tx_buf && *tx_buf)
+		writel(__cpu_to_le16(get_unaligned((*tx_buf)++)), tx_reg);
+	else
+		writel(0, tx_reg);
+
+	if (orion_spi_wait_till_ready(orion_spi) < 0) {
+		dev_err(&spi->dev, "TXS timed out\n");
+		return -1;
+	}
+
+	if (rx_buf && *rx_buf)
+		put_unaligned(__le16_to_cpu(readl(rx_reg)), (*rx_buf)++);
+
+	return 1;
+}
+
+static unsigned int
+orion_spi_write_read(struct spi_device *spi, struct spi_transfer *xfer)
+{
+	struct orion_spi *orion_spi;
+	unsigned int count;
+	int word_len;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+	word_len = spi->bits_per_word;
+	count = xfer->len;
+
+	if (word_len == 8) {
+		const u8 *tx = xfer->tx_buf;
+		u8 *rx = xfer->rx_buf;
+
+		do {
+			if (orion_spi_write_read_8bit(spi, &tx, &rx) < 0)
+				goto out;
+			count--;
+		} while (count);
+	} else if (word_len == 16) {
+		const u16 *tx = xfer->tx_buf;
+		u16 *rx = xfer->rx_buf;
+
+		do {
+			if (orion_spi_write_read_16bit(spi, &tx, &rx) < 0)
+				goto out;
+			count -= 2;
+		} while (count);
+	}
+
+out:
+	return xfer->len - count;
+}
+
+
+static void orion_spi_work(struct work_struct *work)
+{
+	struct orion_spi *orion_spi =
+		container_of(work, struct orion_spi, work);
+
+	spin_lock_irq(&orion_spi->lock);
+	while (!list_empty(&orion_spi->msg_queue)) {
+		struct spi_message *m;
+		struct spi_device *spi;
+		struct spi_transfer *t = NULL;
+		int par_override = 0;
+		int status = 0;
+		int cs_active = 0;
+
+		m = container_of(orion_spi->msg_queue.next, struct spi_message,
+				 queue);
+
+		list_del_init(&m->queue);
+		spin_unlock_irq(&orion_spi->lock);
+
+		spi = m->spi;
+
+		/* Load defaults */
+		status = orion_spi_setup_transfer(spi, NULL);
+
+		if (status < 0)
+			goto msg_done;
+
+		list_for_each_entry(t, &m->transfers, transfer_list) {
+			if (par_override || t->speed_hz || t->bits_per_word) {
+				par_override = 1;
+				status = orion_spi_setup_transfer(spi, t);
+				if (status < 0)
+					break;
+				if (!t->speed_hz && !t->bits_per_word)
+					par_override = 0;
+			}
+
+			if (!cs_active) {
+				orion_spi_set_cs(orion_spi, 1);
+				cs_active = 1;
+			}
+
+			if (t->len)
+				m->actual_length +=
+					orion_spi_write_read(spi, t);
+
+			if (t->delay_usecs)
+				udelay(t->delay_usecs);
+
+			if (t->cs_change) {
+				orion_spi_set_cs(orion_spi, 0);
+				cs_active = 0;
+			}
+		}
+
+msg_done:
+		if (cs_active)
+			orion_spi_set_cs(orion_spi, 0);
+
+		m->status = status;
+		m->complete(m->context);
+
+		spin_lock_irq(&orion_spi->lock);
+	}
+
+	spin_unlock_irq(&orion_spi->lock);
+}
+
+static int __init orion_spi_reset(struct orion_spi *orion_spi)
+{
+	/* Verify that the CS is deasserted */
+	orion_spi_set_cs(orion_spi, 0);
+
+	return 0;
+}
+
+static int orion_spi_setup(struct spi_device *spi)
+{
+	struct orion_spi *orion_spi;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+
+	if (spi->mode) {
+		dev_err(&spi->dev, "setup: unsupported mode bits %x\n",
+			spi->mode);
+		return -EINVAL;
+	}
+
+	if (spi->bits_per_word == 0)
+		spi->bits_per_word = 8;
+
+	if ((spi->max_speed_hz == 0)
+			|| (spi->max_speed_hz > orion_spi->max_speed))
+		spi->max_speed_hz = orion_spi->max_speed;
+
+	if (spi->max_speed_hz < orion_spi->min_speed) {
+		dev_err(&spi->dev, "setup: requested speed too low %d Hz\n",
+			spi->max_speed_hz);
+		return -EINVAL;
+	}
+
+	/*
+	 * baudrate & width will be set orion_spi_setup_transfer
+	 */
+	return 0;
+}
+
+static int orion_spi_transfer(struct spi_device *spi, struct spi_message *m)
+{
+	struct orion_spi *orion_spi;
+	struct spi_transfer *t = NULL;
+	unsigned long flags;
+
+	m->actual_length = 0;
+	m->status = 0;
+
+	/* reject invalid messages and transfers */
+	if (list_empty(&m->transfers) || !m->complete)
+		return -EINVAL;
+
+	orion_spi = spi_master_get_devdata(spi->master);
+
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		unsigned int bits_per_word = spi->bits_per_word;
+
+		if (t->tx_buf == NULL && t->rx_buf == NULL && t->len) {
+			dev_err(&spi->dev,
+				"message rejected : "
+				"invalid transfer data buffers\n");
+			goto msg_rejected;
+		}
+
+		if ((t != NULL) && t->bits_per_word)
+			bits_per_word = t->bits_per_word;
+
+		if ((bits_per_word != 8) && (bits_per_word != 16)) {
+			dev_err(&spi->dev,
+				"message rejected : "
+				"invalid transfer bits_per_word (%d bits)\n",
+				bits_per_word);
+			goto msg_rejected;
+		}
+		/*make sure buffer length is even when working in 16 bit mode*/
+		if ((t != NULL) && (t->bits_per_word == 16) && (t->len & 1)) {
+			dev_err(&spi->dev,
+				"message rejected : "
+				"odd data length (%d) while in 16 bit mode\n",
+				t->len);
+			goto msg_rejected;
+		}
+
+		if (t->speed_hz < orion_spi->min_speed) {
+			dev_err(&spi->dev,
+				"message rejected : "
+				"device min speed (%d Hz) exceeds "
+				"required transfer speed (%d Hz)\n",
+				orion_spi->min_speed, t->speed_hz);
+			goto msg_rejected;
+		}
+	}
+
+
+	spin_lock_irqsave(&orion_spi->lock, flags);
+	list_add_tail(&m->queue, &orion_spi->msg_queue);
+	queue_work(orion_spi_wq, &orion_spi->work);
+	spin_unlock_irqrestore(&orion_spi->lock, flags);
+
+	return 0;
+msg_rejected:
+	/* Message rejected and not queued */
+	m->status = -EINVAL;
+	if (m->complete)
+		m->complete(m->context);
+	return -EINVAL;
+}
+
+static int __init orion_spi_probe(struct platform_device *pdev)
+{
+	struct spi_master *master;
+	struct orion_spi *spi;
+	struct resource *r;
+	struct orion_spi_info *spi_info;
+	int status = 0;
+
+	spi_info = pdev->dev.platform_data;
+
+	master = spi_alloc_master(&pdev->dev, sizeof *spi);
+	if (master == NULL) {
+		dev_dbg(&pdev->dev, "master allocation failed\n");
+		return -ENOMEM;
+	}
+
+	if (pdev->id != -1)
+		master->bus_num = pdev->id;
+
+	master->setup = orion_spi_setup;
+	master->transfer = orion_spi_transfer;
+	master->num_chipselect = ORION_NUM_CHIPSELECTS;
+
+	dev_set_drvdata(&pdev->dev, master);
+
+	spi = spi_master_get_devdata(master);
+	spi->master = master;
+	spi->spi_info = spi_info;
+
+	spi->max_speed = DIV_ROUND_UP(spi_info->tclk, 4);
+	spi->min_speed = DIV_ROUND_UP(spi_info->tclk, 30);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (r == NULL) {
+		status = -ENODEV;
+		goto out;
+	}
+
+	if (!request_mem_region(r->start, (r->end - r->start) + 1,
+				pdev->dev.bus_id)) {
+		status = -EBUSY;
+		goto out;
+	}
+	spi->base = ioremap(r->start, SZ_1K);
+
+	INIT_WORK(&spi->work, orion_spi_work);
+
+	spin_lock_init(&spi->lock);
+	INIT_LIST_HEAD(&spi->msg_queue);
+
+	if (orion_spi_reset(spi) < 0)
+		goto out_rel_mem;
+
+	status = spi_register_master(master);
+	if (status < 0)
+		goto out_rel_mem;
+
+	return status;
+
+out_rel_mem:
+	release_mem_region(r->start, (r->end - r->start) + 1);
+
+out:
+	spi_master_put(master);
+	return status;
+}
+
+
+static int __exit orion_spi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master;
+	struct orion_spi *spi;
+	struct resource *r;
+
+	master = dev_get_drvdata(&pdev->dev);
+	spi = spi_master_get_devdata(master);
+
+	cancel_work_sync(&spi->work);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(r->start, (r->end - r->start) + 1);
+
+	spi_unregister_master(master);
+
+	return 0;
+}
+
+MODULE_ALIAS("platform:" DRIVER_NAME);
+
+static struct platform_driver orion_spi_driver = {
+	.driver = {
+		.name	= DRIVER_NAME,
+		.owner	= THIS_MODULE,
+	},
+	.remove		= __exit_p(orion_spi_remove),
+};
+
+static int __init orion_spi_init(void)
+{
+	orion_spi_wq = create_singlethread_workqueue(
+				orion_spi_driver.driver.name);
+	if (orion_spi_wq == NULL)
+		return -ENOMEM;
+
+	return platform_driver_probe(&orion_spi_driver, orion_spi_probe);
+}
+module_init(orion_spi_init);
+
+static void __exit orion_spi_exit(void)
+{
+	flush_workqueue(orion_spi_wq);
+	platform_driver_unregister(&orion_spi_driver);
+
+	destroy_workqueue(orion_spi_wq);
+}
+module_exit(orion_spi_exit);
+
+MODULE_DESCRIPTION("Orion SPI driver");
+MODULE_AUTHOR("Shadi Ammouri <shadi@marvell.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/spi/orion_spi.h b/include/linux/spi/orion_spi.h
new file mode 100644
index 000000000000..b4d9fa6f797c
--- /dev/null
+++ b/include/linux/spi/orion_spi.h
@@ -0,0 +1,17 @@
+/*
+ * orion_spi.h
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __LINUX_SPI_ORION_SPI_H
+#define __LINUX_SPI_ORION_SPI_H
+
+struct orion_spi_info {
+	u32	tclk;		/* no <linux/clk.h> support yet */
+};
+
+
+#endif
-- 
cgit v1.2.3


From f6ac436dcc4c34709bcde355f3f2254ac0a183d4 Mon Sep 17 00:00:00 2001
From: Mark Asselstine <mark.asselstine@windriver.com>
Date: Tue, 5 Aug 2008 13:01:24 -0700
Subject: Remove the deprecated cli() sti() functions

These functions have been deprecated for some time now but remained until
all legacy callers could be removed.  With a few commits in 2.6.26 this
has happened so now we can remove these deprecated functions.

Signed-off-by: Mark Asselstine <mark.asselstine@windriver.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/00-INDEX            |   2 -
 Documentation/cli-sti-removal.txt | 133 --------------------------------------
 include/linux/interrupt.h         |  29 ---------
 3 files changed, 164 deletions(-)
 delete mode 100644 Documentation/cli-sti-removal.txt

(limited to 'include/linux')

diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 6de71308a906..5b5aba404aac 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -89,8 +89,6 @@ cciss.txt
 	- info, major/minor #'s for Compaq's SMART Array Controllers.
 cdrom/
 	- directory with information on the CD-ROM drivers that Linux has.
-cli-sti-removal.txt
-	- cli()/sti() removal guide.
 computone.txt
 	- info on Computone Intelliport II/Plus Multiport Serial Driver.
 connector/
diff --git a/Documentation/cli-sti-removal.txt b/Documentation/cli-sti-removal.txt
deleted file mode 100644
index 60932b02fcb3..000000000000
--- a/Documentation/cli-sti-removal.txt
+++ /dev/null
@@ -1,133 +0,0 @@
-
-#### cli()/sti() removal guide, started by Ingo Molnar <mingo@redhat.com>
-
-
-as of 2.5.28, five popular macros have been removed on SMP, and
-are being phased out on UP:
-
- cli(), sti(), save_flags(flags), save_flags_cli(flags), restore_flags(flags)
-
-until now it was possible to protect driver code against interrupt
-handlers via a cli(), but from now on other, more lightweight methods
-have to be used for synchronization, such as spinlocks or semaphores.
-
-for example, driver code that used to do something like:
-
-	struct driver_data;
-
-	irq_handler (...)
-	{
-		....
-		driver_data.finish = 1;
-		driver_data.new_work = 0;
-		....
-	}
-
-	...
-
-	ioctl_func (...)
-	{
-		...
-		cli();
-		...
-		driver_data.finish = 0;
-		driver_data.new_work = 2;
-		...
-		sti();
-		...
-	}
-
-was SMP-correct because the cli() function ensured that no
-interrupt handler (amongst them the above irq_handler()) function
-would execute while the cli()-ed section is executing.
-
-but from now on a more direct method of locking has to be used:
-
-	DEFINE_SPINLOCK(driver_lock);
-	struct driver_data;
-
-	irq_handler (...)
-	{
-		unsigned long flags;
-		....
-		spin_lock_irqsave(&driver_lock, flags);
-		....
-		driver_data.finish = 1;
-		driver_data.new_work = 0;
-		....
-		spin_unlock_irqrestore(&driver_lock, flags);
-		....
-	}
-
-	...
-
-	ioctl_func (...)
-	{
-		...
-		spin_lock_irq(&driver_lock);
-		...
-		driver_data.finish = 0;
-		driver_data.new_work = 2;
-		...
-		spin_unlock_irq(&driver_lock);
-		...
-	}
-
-the above code has a number of advantages:
-
-- the locking relation is easier to understand - actual lock usage
-  pinpoints the critical sections. cli() usage is too opaque.
-  Easier to understand means it's easier to debug.
-
-- it's faster, because spinlocks are faster to acquire than the
-  potentially heavily-used IRQ lock. Furthermore, your driver does
-  not have to wait eg. for a big heavy SCSI interrupt to finish,
-  because the driver_lock spinlock is only used by your driver.
-  cli() on the other hand was used by many drivers, and extended
-  the critical section to the whole IRQ handler function - creating
-  serious lock contention.
-
- 
-to make the transition easier, we've still kept the cli(), sti(),
-save_flags(), save_flags_cli() and restore_flags() macros defined
-on UP systems - but their usage will be phased out until 2.6 is
-released.
-
-drivers that want to disable local interrupts (interrupts on the
-current CPU), can use the following five macros:
-
-  local_irq_disable(), local_irq_enable(), local_save_flags(flags),
-  local_irq_save(flags), local_irq_restore(flags)
-
-but beware, their meaning and semantics are much simpler, far from
-that of the old cli(), sti(), save_flags(flags) and restore_flags(flags)
-SMP meaning:
-
-    local_irq_disable()       => turn local IRQs off
-
-    local_irq_enable()        => turn local IRQs on
-
-    local_save_flags(flags)   => save the current IRQ state into flags. The
-                                 state can be on or off. (on some
-                                 architectures there's even more bits in it.)
-
-    local_irq_save(flags)     => save the current IRQ state into flags and
-                                 disable interrupts.
-
-    local_irq_restore(flags)  => restore the IRQ state from flags.
-
-(local_irq_save can save both irqs on and irqs off state, and
-local_irq_restore can restore into both irqs on and irqs off state.)
-
-another related change is that synchronize_irq() now takes a parameter:
-synchronize_irq(irq). This change too has the purpose of making SMP
-synchronization more lightweight - this way you can wait for your own
-interrupt handler to finish, no need to wait for other IRQ sources.
-
-
-why were these changes done? The main reason was the architectural burden
-of maintaining the cli()/sti() interface - it became a real problem. The
-new interrupt system is much more streamlined, easier to understand, debug,
-and it's also a bit faster - the same happened to it that will happen to
-cli()/sti() using drivers once they convert to spinlocks :-)
-
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 62aa4f895abe..58ff4e74b2f3 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -223,35 +223,6 @@ static inline int disable_irq_wake(unsigned int irq)
 #define or_softirq_pending(x)  (local_softirq_pending() |= (x))
 #endif
 
-/*
- * Temporary defines for UP kernels, until all code gets fixed.
- */
-#ifndef CONFIG_SMP
-static inline void __deprecated cli(void)
-{
-	local_irq_disable();
-}
-static inline void __deprecated sti(void)
-{
-	local_irq_enable();
-}
-static inline void __deprecated save_flags(unsigned long *x)
-{
-	local_save_flags(*x);
-}
-#define save_flags(x) save_flags(&x)
-static inline void __deprecated restore_flags(unsigned long x)
-{
-	local_irq_restore(x);
-}
-
-static inline void __deprecated save_and_cli(unsigned long *x)
-{
-	local_irq_save(*x);
-}
-#define save_and_cli(x)	save_and_cli(&x)
-#endif /* CONFIG_SMP */
-
 /* Some architectures might implement lazy enabling/disabling of
  * interrupts. In some cases, such as stop_machine, we might want
  * to ensure that after a local_irq_disable(), interrupts have
-- 
cgit v1.2.3


From bf1db69fbf4ff511e88736ce2e6318846f34492b Mon Sep 17 00:00:00 2001
From: Richard Hughes <richard@hughsie.com>
Date: Tue, 5 Aug 2008 13:01:35 -0700
Subject: pm_qos: spelling fixes

A documentation cleanup patch.  With a minor tweak to clarify units for
kbs.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: mark gross <mgross@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/power/pm_qos_interface.txt |  7 ++++++-
 include/linux/pm_qos_params.h            |  2 +-
 kernel/pm_qos_params.c                   | 16 ++++++++--------
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
index 49adb1a33514..c40866e8b957 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.txt
@@ -1,4 +1,4 @@
-PM quality of Service interface.
+PM Quality Of Service Interface.
 
 This interface provides a kernel and user mode interface for registering
 performance expectations by drivers, subsystems and user space applications on
@@ -7,6 +7,11 @@ one of the parameters.
 Currently we have {cpu_dma_latency, network_latency, network_throughput} as the
 initial set of pm_qos parameters.
 
+Each parameters have defined units:
+ * latency: usec
+ * timeout: usec
+ * throughput: kbs (kilo bit / sec)
+
 The infrastructure exposes multiple misc device nodes one per implemented
 parameter.  The set of parameters implement is defined by pm_qos_power_init()
 and pm_qos_params.h.  This is done because having the available parameters
diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h
index 2e4e97bd19f7..d74f75ed1e47 100644
--- a/include/linux/pm_qos_params.h
+++ b/include/linux/pm_qos_params.h
@@ -1,6 +1,6 @@
 /* interface for the pm_qos_power infrastructure of the linux kernel.
  *
- * Mark Gross
+ * Mark Gross <mgross@linux.intel.com>
  */
 #include <linux/list.h>
 #include <linux/notifier.h>
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 8cb757026386..da9c2dda6a4e 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -24,7 +24,7 @@
  * requirement that the application has is cleaned up when closes the file
  * pointer or exits the pm_qos_object will get an opportunity to clean up.
  *
- * mark gross mgross@linux.intel.com
+ * Mark Gross <mgross@linux.intel.com>
  */
 
 #include <linux/pm_qos_params.h>
@@ -211,8 +211,8 @@ EXPORT_SYMBOL_GPL(pm_qos_requirement);
  * @value: defines the qos request
  *
  * This function inserts a new entry in the pm_qos_class list of requested qos
- * performance charactoistics.  It recomputes the agregate QoS expectations for
- * the pm_qos_class of parrameters.
+ * performance characteristics.  It recomputes the aggregate QoS expectations
+ * for the pm_qos_class of parameters.
  */
 int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value)
 {
@@ -250,10 +250,10 @@ EXPORT_SYMBOL_GPL(pm_qos_add_requirement);
  * @name: identifies the request
  * @value: defines the qos request
  *
- * Updates an existing qos requierement for the pm_qos_class of parameters along
+ * Updates an existing qos requirement for the pm_qos_class of parameters along
  * with updating the target pm_qos_class value.
  *
- * If the named request isn't in the lest then no change is made.
+ * If the named request isn't in the list then no change is made.
  */
 int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value)
 {
@@ -287,7 +287,7 @@ EXPORT_SYMBOL_GPL(pm_qos_update_requirement);
  * @pm_qos_class: identifies which list of qos request to us
  * @name: identifies the request
  *
- * Will remove named qos request from pm_qos_class list of parrameters and
+ * Will remove named qos request from pm_qos_class list of parameters and
  * recompute the current target value for the pm_qos_class.
  */
 void pm_qos_remove_requirement(int pm_qos_class, char *name)
@@ -319,7 +319,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
  * @notifier: notifier block managed by caller.
  *
  * will register the notifier into a notification chain that gets called
- * uppon changes to the pm_qos_class target value.
+ * upon changes to the pm_qos_class target value.
  */
  int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
@@ -338,7 +338,7 @@ EXPORT_SYMBOL_GPL(pm_qos_add_notifier);
  * @notifier: notifier block to be removed.
  *
  * will remove the notifier from the notification chain that gets called
- * uppon changes to the pm_qos_class target value.
+ * upon changes to the pm_qos_class target value.
  */
 int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
-- 
cgit v1.2.3


From f780a9f119caa48088b230836a7fa73d1096de7c Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Date: Wed, 6 Aug 2008 20:14:06 -0700
Subject: mlx4_core: Add ethernet fields to CQE struct

Add ethernet-related fields to struct mlx4_cqe so that the mlx4_en
ethernet NIC driver can share the same definition.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/cq.c | 33 ++++++++++++++++-----------------
 include/linux/mlx4/cq.h         | 36 ++++++++++++++++++++++++------------
 2 files changed, 40 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index a1464574bfdd..d0866a3636e2 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -515,17 +515,17 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
 	wc->vendor_err = cqe->vendor_err_syndrome;
 }
 
-static int mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum)
+static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
 {
-	return ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4	|
-				      MLX4_CQE_IPOIB_STATUS_IPV4F	|
-				      MLX4_CQE_IPOIB_STATUS_IPV4OPT	|
-				      MLX4_CQE_IPOIB_STATUS_IPV6	|
-				      MLX4_CQE_IPOIB_STATUS_IPOK)) ==
-		cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4	|
-			    MLX4_CQE_IPOIB_STATUS_IPOK))		&&
-		(status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_UDP	|
-				      MLX4_CQE_IPOIB_STATUS_TCP))	&&
+	return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4      |
+				      MLX4_CQE_STATUS_IPV4F     |
+				      MLX4_CQE_STATUS_IPV4OPT   |
+				      MLX4_CQE_STATUS_IPV6      |
+				      MLX4_CQE_STATUS_IPOK)) ==
+		cpu_to_be16(MLX4_CQE_STATUS_IPV4        |
+			    MLX4_CQE_STATUS_IPOK))              &&
+		(status & cpu_to_be16(MLX4_CQE_STATUS_UDP       |
+				      MLX4_CQE_STATUS_TCP))     &&
 		checksum == cpu_to_be16(0xffff);
 }
 
@@ -582,17 +582,17 @@ repoll:
 	}
 
 	if (!*cur_qp ||
-	    (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) {
+	    (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) {
 		/*
 		 * We do not have to take the QP table lock here,
 		 * because CQs will be locked while QPs are removed
 		 * from the table.
 		 */
 		mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
-				       be32_to_cpu(cqe->my_qpn));
+				       be32_to_cpu(cqe->vlan_my_qpn));
 		if (unlikely(!mqp)) {
 			printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
-			       cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff);
+			       cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
 			return -EINVAL;
 		}
 
@@ -692,14 +692,13 @@ repoll:
 		}
 
 		wc->slid	   = be16_to_cpu(cqe->rlid);
-		wc->sl		   = cqe->sl >> 4;
+		wc->sl		   = be16_to_cpu(cqe->sl_vid >> 12);
 		g_mlpath_rqpn	   = be32_to_cpu(cqe->g_mlpath_rqpn);
 		wc->src_qp	   = g_mlpath_rqpn & 0xffffff;
 		wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
 		wc->wc_flags	  |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
 		wc->pkey_index     = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
-		wc->csum_ok	   = mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,
-							   cqe->checksum);
+		wc->csum_ok	   = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum);
 	}
 
 	return 0;
@@ -767,7 +766,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
 	 */
 	while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
 		cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
-		if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) {
+		if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
 			if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
 				mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
 			++nfreed;
diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h
index 071cf96cf01f..6f65b2c8bb89 100644
--- a/include/linux/mlx4/cq.h
+++ b/include/linux/mlx4/cq.h
@@ -39,17 +39,18 @@
 #include <linux/mlx4/doorbell.h>
 
 struct mlx4_cqe {
-	__be32			my_qpn;
+	__be32			vlan_my_qpn;
 	__be32			immed_rss_invalid;
 	__be32			g_mlpath_rqpn;
-	u8			sl;
-	u8			reserved1;
+	__be16			sl_vid;
 	__be16			rlid;
-	__be32			ipoib_status;
+	__be16			status;
+	u8			ipv6_ext_mask;
+	u8			badfcs_enc;
 	__be32			byte_cnt;
 	__be16			wqe_index;
 	__be16			checksum;
-	u8			reserved2[3];
+	u8			reserved[3];
 	u8			owner_sr_opcode;
 };
 
@@ -63,6 +64,11 @@ struct mlx4_err_cqe {
 	u8			owner_sr_opcode;
 };
 
+enum {
+	MLX4_CQE_VLAN_PRESENT_MASK	= 1 << 29,
+	MLX4_CQE_QPN_MASK		= 0xffffff,
+};
+
 enum {
 	MLX4_CQE_OWNER_MASK	= 0x80,
 	MLX4_CQE_IS_SEND_MASK	= 0x40,
@@ -86,13 +92,19 @@ enum {
 };
 
 enum {
-	MLX4_CQE_IPOIB_STATUS_IPV4			= 1 << 22,
-	MLX4_CQE_IPOIB_STATUS_IPV4F			= 1 << 23,
-	MLX4_CQE_IPOIB_STATUS_IPV6			= 1 << 24,
-	MLX4_CQE_IPOIB_STATUS_IPV4OPT			= 1 << 25,
-	MLX4_CQE_IPOIB_STATUS_TCP			= 1 << 26,
-	MLX4_CQE_IPOIB_STATUS_UDP			= 1 << 27,
-	MLX4_CQE_IPOIB_STATUS_IPOK			= 1 << 28,
+	MLX4_CQE_STATUS_IPV4		= 1 << 6,
+	MLX4_CQE_STATUS_IPV4F		= 1 << 7,
+	MLX4_CQE_STATUS_IPV6		= 1 << 8,
+	MLX4_CQE_STATUS_IPV4OPT		= 1 << 9,
+	MLX4_CQE_STATUS_TCP		= 1 << 10,
+	MLX4_CQE_STATUS_UDP		= 1 << 11,
+	MLX4_CQE_STATUS_IPOK		= 1 << 12,
+};
+
+enum {
+	MLX4_CQE_LLC                     = 1,
+	MLX4_CQE_SNAP                    = 1 << 1,
+	MLX4_CQE_BAD_FCS                 = 1 << 4,
 };
 
 static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd,
-- 
cgit v1.2.3


From b11f8d8cc3bb2fa6fa55286babc1a5ebb2e932c4 Mon Sep 17 00:00:00 2001
From: Brandon Philips <brandon@ifup.org>
Date: Tue, 15 Jul 2008 02:18:41 -0700
Subject: ethtool: Expand ethtool_cmd.speed to 32 bits

Introduce the speed_hi field to ethtool_cmd, using the reserved space,
to expand the speed field to 2^32 Megabits/second.

Making this field expansion now gives us plenty of time to fix up the
user-space pieces that use SIOCETHTOOL before hardware faster than 64
Gb/s is available.

Signed-off-by: Brandon Philips <bphilips@suse.de>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/ethtool.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 8bb5e87df365..b4b038b89ee6 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -27,9 +27,24 @@ struct ethtool_cmd {
 	__u8	autoneg;	/* Enable or disable autonegotiation */
 	__u32	maxtxpkt;	/* Tx pkts before generating tx int */
 	__u32	maxrxpkt;	/* Rx pkts before generating rx int */
-	__u32	reserved[4];
+	__u16	speed_hi;
+	__u16	reserved2;
+	__u32	reserved[3];
 };
 
+static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep,
+						__u32 speed)
+{
+
+	ep->speed = (__u16)speed;
+	ep->speed_hi = (__u16)(speed >> 16);
+}
+
+static inline __u32 ethtool_cmd_speed(struct ethtool_cmd *ep)
+{
+	return (ep->speed_hi << 16) | ep->speed;
+}
+
 #define ETHTOOL_BUSINFO_LEN	32
 /* these strings are set to whatever the driver author decides... */
 struct ethtool_drvinfo {
-- 
cgit v1.2.3


From fe414248551e2880fe8913577699003ff145ab9d Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurentp@cse-semaphore.com>
Date: Wed, 23 Jul 2008 17:41:52 +0200
Subject: dm9000: Support MAC address setting through platform data.

The dm9000 driver reads the chip's MAC address from the attached EEPROM. When
no EEPROM is present, or when the MAC address is invalid, it falls back to
reading the address from the chip.

This patch lets platform code set the desired MAC address through platform
data.

Signed-off-by: Laurent Pinchart <laurentp@cse-semaphore.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/net/dm9000.c   | 5 +++++
 include/linux/dm9000.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 0b0f1c407a7e..f42c23f42652 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -1374,6 +1374,11 @@ dm9000_probe(struct platform_device *pdev)
 	for (i = 0; i < 6; i += 2)
 		dm9000_read_eeprom(db, i / 2, ndev->dev_addr+i);
 
+	if (!is_valid_ether_addr(ndev->dev_addr) && pdata != NULL) {
+		mac_src = "platform data";
+		memcpy(ndev->dev_addr, pdata->dev_addr, 6);
+	}
+
 	if (!is_valid_ether_addr(ndev->dev_addr)) {
 		/* try reading from mac */
 		
diff --git a/include/linux/dm9000.h b/include/linux/dm9000.h
index fc82446b6425..c30879cf93bc 100644
--- a/include/linux/dm9000.h
+++ b/include/linux/dm9000.h
@@ -27,6 +27,7 @@
 
 struct dm9000_plat_data {
 	unsigned int	flags;
+	unsigned char	dev_addr[6];
 
 	/* allow replacement IO routines */
 
-- 
cgit v1.2.3


From 7d283aee50351ec19eaf654a8690d77c4e1dff50 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 6 Aug 2008 15:21:26 -0700
Subject: list.h: Add list_splice_tail() and list_splice_tail_init()

If you are using linked lists for queues list_splice() will not do what
you would expect even if you use the elements passed reversed. We need
to handle these differently. We add list_splice_tail() and
list_splice_tail_init().

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/dma/ioat_dma.c    |  2 +-
 drivers/usb/host/ehci-q.c |  2 +-
 include/linux/list.h      | 47 ++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 40 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index a52156e56886..bc8c6e3470ca 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -551,7 +551,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
 	/* write address into NextDescriptor field of last desc in chain */
 	to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
 							first->async_tx.phys;
-	__list_splice(&new_chain, ioat_chan->used_desc.prev);
+	list_splice_tail(&new_chain, &ioat_chan->used_desc);
 
 	ioat_chan->dmacount += desc_count;
 	ioat_chan->pending += desc_count;
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 2622b6596d7c..3712b925b315 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -932,7 +932,7 @@ static struct ehci_qh *qh_append_tds (
 
 			list_del (&qtd->qtd_list);
 			list_add (&dummy->qtd_list, qtd_list);
-			__list_splice (qtd_list, qh->qtd_list.prev);
+			list_splice_tail(qtd_list, &qh->qtd_list);
 
 			ehci_qtd_init(ehci, qtd, qtd->qtd_dma);
 			qh->dummy = qtd;
diff --git a/include/linux/list.h b/include/linux/list.h
index 453916bc0412..a886f27a1181 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -215,21 +215,21 @@ static inline int list_is_singular(const struct list_head *head)
 }
 
 static inline void __list_splice(const struct list_head *list,
-				 struct list_head *head)
+				 struct list_head *prev,
+				 struct list_head *next)
 {
 	struct list_head *first = list->next;
 	struct list_head *last = list->prev;
-	struct list_head *at = head->next;
 
-	first->prev = head;
-	head->next = first;
+	first->prev = prev;
+	prev->next = first;
 
-	last->next = at;
-	at->prev = last;
+	last->next = next;
+	next->prev = last;
 }
 
 /**
- * list_splice - join two lists
+ * list_splice - join two lists, this is designed for stacks
  * @list: the new list to add.
  * @head: the place to add it in the first list.
  */
@@ -237,7 +237,19 @@ static inline void list_splice(const struct list_head *list,
 				struct list_head *head)
 {
 	if (!list_empty(list))
-		__list_splice(list, head);
+		__list_splice(list, head, head->next);
+}
+
+/**
+ * list_splice_tail - join two lists, each list being a queue
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice_tail(struct list_head *list,
+				struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head->prev, head);
 }
 
 /**
@@ -251,7 +263,24 @@ static inline void list_splice_init(struct list_head *list,
 				    struct list_head *head)
 {
 	if (!list_empty(list)) {
-		__list_splice(list, head);
+		__list_splice(list, head, head->next);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+/**
+ * list_splice_tail_init - join two lists, each list being a queue, and
+ *     reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_tail_init(struct list_head *list,
+					 struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head->prev, head);
 		INIT_LIST_HEAD(list);
 	}
 }
-- 
cgit v1.2.3


From 00e8a4da8cf0d7dba8cc4b0da28ea0f12dcf6b36 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 6 Aug 2008 13:28:54 -0700
Subject: list.h: add list_cut_position()

This adds list_cut_position() which lets you cut a list into
two lists given a pivot in the list.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/list.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index a886f27a1181..1d109e2ef0a9 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -214,6 +214,46 @@ static inline int list_is_singular(const struct list_head *head)
 	return !list_empty(head) && (head->next == head->prev);
 }
 
+static inline void __list_cut_position(struct list_head *list,
+		struct list_head *head, struct list_head *entry)
+{
+	struct list_head *new_first = entry->next;
+	list->next = head->next;
+	list->next->prev = list;
+	list->prev = entry;
+	entry->next = list;
+	head->next = new_first;
+	new_first->prev = head;
+}
+
+/**
+ * list_cut_position - cut a list into two
+ * @list: a new list to add all removed entries
+ * @head: a list with entries
+ * @entry: an entry within head, could be the head itself
+ *	and if so we won't cut the list
+ *
+ * This helper moves the initial part of @head, up to and
+ * including @entry, from @head to @list. You should
+ * pass on @entry an element you know is on @head. @list
+ * should be an empty list or a list you do not care about
+ * losing its data.
+ *
+ */
+static inline void list_cut_position(struct list_head *list,
+		struct list_head *head, struct list_head *entry)
+{
+	if (list_empty(head))
+		return;
+	if (list_is_singular(head) &&
+		(head->next != entry && head != entry))
+		return;
+	if (entry == head)
+		INIT_LIST_HEAD(list);
+	else
+		__list_cut_position(list, head, entry);
+}
+
 static inline void __list_splice(const struct list_head *list,
 				 struct list_head *prev,
 				 struct list_head *next)
-- 
cgit v1.2.3


From 5861bbfcc10fc0358abf52c7d22850c8d180f0b0 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Thu, 7 Aug 2008 16:55:03 -0700
Subject: tracehook: fix CLONE_PTRACE

In the change in commit 09a05394fe2448a4139b014936330af23fa7ec83, I
overlooked two nits in the logic and this broke using CLONE_PTRACE
when PTRACE_O_TRACE* are not being used.

A parent that is itself traced at all but not using PTRACE_O_TRACE*,
using CLONE_PTRACE would have its new child fail to be traced.

A parent that is not itself traced at all that uses CLONE_PTRACE
(which should be a no-op in this case) would confuse the bookkeeping
and lead to a crash at exit time.

This restores the missing checks and fixes both failure modes.

Reported-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Roland McGrath <roland@redhat.com>
---
 include/linux/ptrace.h    | 2 +-
 include/linux/tracehook.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index fd31756e1a00..ea7416c901d1 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -172,7 +172,7 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 	child->ptrace = 0;
 	if (unlikely(ptrace)) {
 		child->ptrace = current->ptrace;
-		__ptrace_link(child, current->parent);
+		ptrace_link(child, current->parent);
 	}
 }
 
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index ab3ef7aefa95..b48d81969574 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -280,7 +280,7 @@ static inline void tracehook_report_clone(int trace, struct pt_regs *regs,
 					  unsigned long clone_flags,
 					  pid_t pid, struct task_struct *child)
 {
-	if (unlikely(trace)) {
+	if (unlikely(trace) || unlikely(clone_flags & CLONE_PTRACE)) {
 		/*
 		 * The child starts up with an immediate SIGSTOP.
 		 */
-- 
cgit v1.2.3


From 2727f226a65e034f93846def7fab314dee430df3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Fri, 8 Aug 2008 15:13:27 +0100
Subject: [ARM] fix pnx4008 build errors

include/linux/i2c-pnx.h was missed when moving the include files.
Fix it now; it doesn't really need to include mach/i2c.h at all.
Successfully build tested with pnx4008_defconfig, which had
failed in linux-next.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-pnx4008/include/mach/i2c.h | 3 ---
 include/linux/i2c-pnx.h                  | 4 +++-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-pnx4008/include/mach/i2c.h b/arch/arm/mach-pnx4008/include/mach/i2c.h
index 92e8d65006f7..259ac53abf40 100644
--- a/arch/arm/mach-pnx4008/include/mach/i2c.h
+++ b/arch/arm/mach-pnx4008/include/mach/i2c.h
@@ -12,9 +12,6 @@
 #ifndef __ASM_ARCH_I2C_H__
 #define __ASM_ARCH_I2C_H__
 
-#include <linux/pm.h>
-#include <linux/platform_device.h>
-
 enum {
 	mstatus_tdi = 0x00000001,
 	mstatus_afi = 0x00000002,
diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h
index e6e9c814da61..f13255e06406 100644
--- a/include/linux/i2c-pnx.h
+++ b/include/linux/i2c-pnx.h
@@ -12,7 +12,9 @@
 #ifndef __I2C_PNX_H__
 #define __I2C_PNX_H__
 
-#include <asm/arch/i2c.h>
+#include <linux/pm.h>
+
+struct platform_device;
 
 struct i2c_pnx_mif {
 	int			ret;		/* Return value */
-- 
cgit v1.2.3


From 6724cce8fb4b408ae1a2fab455050f3407c80144 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 8 Aug 2008 13:56:20 -0700
Subject: list.h: fix fatal kernel-doc error

Fix fatal multi-line kernel-doc error in list.h:
function short description must be on one line.

Error(linux-2.6.27-rc2-git3//include/linux/list.h:318): duplicate section name 'Description'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 1d109e2ef0a9..db35ef02e745 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -309,11 +309,11 @@ static inline void list_splice_init(struct list_head *list,
 }
 
 /**
- * list_splice_tail_init - join two lists, each list being a queue, and
- *     reinitialise the emptied list.
+ * list_splice_tail_init - join two lists and reinitialise the emptied list
  * @list: the new list to add.
  * @head: the place to add it in the first list.
  *
+ * Each of the lists is a queue.
  * The list at @list is reinitialised
  */
 static inline void list_splice_tail_init(struct list_head *list,
-- 
cgit v1.2.3