From 47376ceba54600cec4dd9e7c4fe8b98e4269633a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 16 Dec 2009 23:25:50 +0100
Subject: reiserfs: Fix reiserfs lock <-> inode mutex dependency inversion

The reiserfs lock -> inode mutex dependency gets inverted when we
relax the lock while walking to the tree.

To fix this, use a specialized version of reiserfs_mutex_lock_safe
that takes care of mutex subclasses. Then we can grab the inode
mutex with I_MUTEX_XATTR subclass without any reiserfs lock
dependency.

This fixes the following report:

[ INFO: possible circular locking dependency detected ]
2.6.32-06793-gf405425-dirty #2
-------------------------------------------------------
mv/18566 is trying to acquire lock:
 (&REISERFS_SB(s)->lock){+.+.+.}, at: [<c1110708>] reiserfs_write_lock+0x28=
/0x40

but task is already holding lock:
 (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [<c111033c>]
reiserfs_for_each_xattr+0x10c/0x380

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&sb->s_type->i_mutex_key#5/3){+.+.+.}:
       [<c104f723>] validate_chain+0xa23/0xf70
       [<c1050155>] __lock_acquire+0x4e5/0xa70
       [<c105075a>] lock_acquire+0x7a/0xa0
       [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
       [<c11102b4>] reiserfs_for_each_xattr+0x84/0x380
       [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
       [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
       [<c10a565c>] generic_delete_inode+0x9c/0x150
       [<c10a574d>] generic_drop_inode+0x3d/0x60
       [<c10a4667>] iput+0x47/0x50
       [<c109cc0b>] do_unlinkat+0xdb/0x160
       [<c109cca0>] sys_unlink+0x10/0x20
       [<c1002c50>] sysenter_do_call+0x12/0x36

-> #0 (&REISERFS_SB(s)->lock){+.+.+.}:
       [<c104fc68>] validate_chain+0xf68/0xf70
       [<c1050155>] __lock_acquire+0x4e5/0xa70
       [<c105075a>] lock_acquire+0x7a/0xa0
       [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
       [<c1110708>] reiserfs_write_lock+0x28/0x40
       [<c1103d6b>] search_by_key+0x1f7b/0x21b0
       [<c10e73ef>] search_by_entry_key+0x1f/0x3b0
       [<c10e77f7>] reiserfs_find_entry+0x77/0x400
       [<c10e81e5>] reiserfs_lookup+0x85/0x130
       [<c109a144>] __lookup_hash+0xb4/0x110
       [<c109b763>] lookup_one_len+0xb3/0x100
       [<c1110350>] reiserfs_for_each_xattr+0x120/0x380
       [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
       [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
       [<c10a565c>] generic_delete_inode+0x9c/0x150
       [<c10a574d>] generic_drop_inode+0x3d/0x60
       [<c10a4667>] iput+0x47/0x50
       [<c10a1c4f>] dentry_iput+0x6f/0xf0
       [<c10a1d74>] d_kill+0x24/0x50
       [<c10a396b>] dput+0x5b/0x120
       [<c109ca89>] sys_renameat+0x1b9/0x230
       [<c109cb28>] sys_rename+0x28/0x30
       [<c1002c50>] sysenter_do_call+0x12/0x36

other info that might help us debug this:

2 locks held by mv/18566:
 #0:  (&sb->s_type->i_mutex_key#5/1){+.+.+.}, at: [<c109b6ac>]
lock_rename+0xcc/0xd0
 #1:  (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [<c111033c>]
reiserfs_for_each_xattr+0x10c/0x380

stack backtrace:
Pid: 18566, comm: mv Tainted: G         C 2.6.32-06793-gf405425-dirty #2
Call Trace:
 [<c134b252>] ? printk+0x18/0x1e
 [<c104e790>] print_circular_bug+0xc0/0xd0
 [<c104fc68>] validate_chain+0xf68/0xf70
 [<c104c8cb>] ? trace_hardirqs_off+0xb/0x10
 [<c1050155>] __lock_acquire+0x4e5/0xa70
 [<c105075a>] lock_acquire+0x7a/0xa0
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c134b60a>] ? schedule+0x27a/0x440
 [<c1110708>] reiserfs_write_lock+0x28/0x40
 [<c1103d6b>] search_by_key+0x1f7b/0x21b0
 [<c1050176>] ? __lock_acquire+0x506/0xa70
 [<c1051267>] ? lock_release_non_nested+0x1e7/0x340
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c104e354>] ? trace_hardirqs_on_caller+0x124/0x170
 [<c104e3ab>] ? trace_hardirqs_on+0xb/0x10
 [<c1042a55>] ? T.316+0x15/0x1a0
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c10e73ef>] search_by_entry_key+0x1f/0x3b0
 [<c134bf2a>] ? __mutex_unlock_slowpath+0x9a/0x120
 [<c104e354>] ? trace_hardirqs_on_caller+0x124/0x170
 [<c10e77f7>] reiserfs_find_entry+0x77/0x400
 [<c10e81e5>] reiserfs_lookup+0x85/0x130
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c109a144>] __lookup_hash+0xb4/0x110
 [<c109b763>] lookup_one_len+0xb3/0x100
 [<c1110350>] reiserfs_for_each_xattr+0x120/0x380
 [<c110ffe0>] ? delete_one_xattr+0x0/0x1c0
 [<c1003342>] ? math_error+0x22/0x150
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
 [<c10a561f>] ? generic_delete_inode+0x5f/0x150
 [<c10ef4f0>] ? reiserfs_delete_inode+0x0/0x140
 [<c10a565c>] generic_delete_inode+0x9c/0x150
 [<c10a574d>] generic_drop_inode+0x3d/0x60
 [<c10a4667>] iput+0x47/0x50
 [<c10a1c4f>] dentry_iput+0x6f/0xf0
 [<c10a1d74>] d_kill+0x24/0x50
 [<c10a396b>] dput+0x5b/0x120
 [<c109ca89>] sys_renameat+0x1b9/0x230
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c104c8cb>] ? trace_hardirqs_off+0xb/0x10
 [<c1042dde>] ? cpu_clock+0x4e/0x60
 [<c1350825>] ? do_page_fault+0x155/0x370
 [<c1041816>] ? up_read+0x16/0x30
 [<c1350825>] ? do_page_fault+0x155/0x370
 [<c109cb28>] sys_rename+0x28/0x30
 [<c1002c50>] sysenter_do_call+0x12/0x36

Reported-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/reiserfs_fs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index a05b4a20768d..4351b49e2b1e 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -97,6 +97,15 @@ static inline void reiserfs_mutex_lock_safe(struct mutex *m,
 	reiserfs_write_lock(s);
 }
 
+static inline void
+reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
+			       struct super_block *s)
+{
+	reiserfs_write_unlock(s);
+	mutex_lock_nested(m, subclass);
+	reiserfs_write_lock(s);
+}
+
 /*
  * When we schedule, we usually want to also release the write lock,
  * according to the previous bkl based locking scheme of reiserfs.
-- 
cgit v1.2.3


From 2d1c861871d767153538a77c498752b36d4bb4b8 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 9 Dec 2009 17:52:13 +1100
Subject: PCI/cardbus: Add a fixup hook and fix powerpc

The cardbus code creates PCI devices without ever going through the
necessary fixup bits and pieces that normal PCI devices go through.

There's in fact a commented out call to pcibios_fixup_bus() in there,
it's commented because ... it doesn't work.

I could make pcibios_fixup_bus() do the right thing on powerpc easily
but I felt it cleaner instead to provide a specific hook pci_fixup_cardbus
for which a weak empty implementation is provided by the PCI core.

This fixes cardbus on powerbooks and probably all other PowerPC
platforms which was broken completely for ever on some platforms and
since 2.6.31 on others such as PowerBooks when we made the DMA ops
mandatory (since those are setup by the fixups).

Acked-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/powerpc/kernel/pci-common.c | 13 +++++++++++++
 drivers/pci/pci.c                |  5 +++++
 drivers/pcmcia/cardbus.c         |  2 +-
 include/linux/pci.h              |  3 +++
 4 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index e8dfdbd9327a..cadbed679fbb 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1107,6 +1107,12 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus)
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		struct dev_archdata *sd = &dev->dev.archdata;
 
+		/* Cardbus can call us to add new devices to a bus, so ignore
+		 * those who are already fully discovered
+		 */
+		if (dev->is_added)
+			continue;
+
 		/* Setup OF node pointer in archdata */
 		sd->of_node = pci_device_to_OF_node(dev);
 
@@ -1147,6 +1153,13 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 }
 EXPORT_SYMBOL(pcibios_fixup_bus);
 
+void __devinit pci_fixup_cardbus(struct pci_bus *bus)
+{
+	/* Now fixup devices on that bus */
+	pcibios_setup_bus_devices(bus);
+}
+
+
 static int skip_isa_ioresource_align(struct pci_dev *dev)
 {
 	if ((ppc_pci_flags & PPC_PCI_CAN_SKIP_ISA_ALIGN) &&
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d50522bf16b1..864e703cf737 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2798,6 +2798,11 @@ int __attribute__ ((weak)) pci_ext_cfg_avail(struct pci_dev *dev)
 	return 1;
 }
 
+void __weak pci_fixup_cardbus(struct pci_bus *bus)
+{
+}
+EXPORT_SYMBOL(pci_fixup_cardbus);
+
 static int __init pci_setup(char *str)
 {
 	while (str) {
diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c
index cdf50f3bc2df..d99f846451a3 100644
--- a/drivers/pcmcia/cardbus.c
+++ b/drivers/pcmcia/cardbus.c
@@ -222,7 +222,7 @@ int __ref cb_alloc(struct pcmcia_socket *s)
 	unsigned int max, pass;
 
 	s->functions = pci_scan_slot(bus, PCI_DEVFN(0, 0));
-/*	pcibios_fixup_bus(bus); */
+	pci_fixup_cardbus(bus); 
 
 	max = bus->secondary;
 	for (pass = 0; pass < 2; pass++)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index bf1e67080849..5da0690d9cee 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -566,6 +566,9 @@ void pcibios_align_resource(void *, struct resource *, resource_size_t,
 				resource_size_t);
 void pcibios_update_irq(struct pci_dev *, int irq);
 
+/* Weak but can be overriden by arch */
+void pci_fixup_cardbus(struct pci_bus *);
+
 /* Generic PCI functions used internally */
 
 extern struct pci_bus *pci_find_bus(int domain, int busnr);
-- 
cgit v1.2.3


From 9a418af5df03ad133cd8c8f6742b75e542db6392 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 17 Dec 2009 13:55:48 +0100
Subject: mac80211: fix peer HT capabilities

I noticed yesterday, because Jeff had noticed
a speed regression, cf. bug
http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2138
that the SM PS settings for peers were wrong.
Instead of overwriting the SM PS settings with
the local bits, we need to keep the remote bits.

The bug was part of the original HT code from
over two years ago, but unfortunately nobody
noticed that it makes no sense -- we shouldn't
be overwriting the peer's setting with our own
but rather keep it intact when masking the peer
capabilities with our own.

While fixing that, I noticed that the masking of
capabilities is completely useless for most of
the bits, so also fix those other bits.

Finally, I also noticed that PSMP_SUPPORT no
longer exists in the final 802.11n version, so
also remove that.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rt2x00/rt2800lib.c |  3 +--
 include/linux/ieee80211.h               |  2 +-
 net/mac80211/ht.c                       | 25 ++++++++++++++++++++++---
 3 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index 6bf6c0f12f35..27bf887f1453 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c
@@ -2080,8 +2080,7 @@ int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
 	    IEEE80211_HT_CAP_SGI_20 |
 	    IEEE80211_HT_CAP_SGI_40 |
 	    IEEE80211_HT_CAP_TX_STBC |
-	    IEEE80211_HT_CAP_RX_STBC |
-	    IEEE80211_HT_CAP_PSMP_SUPPORT;
+	    IEEE80211_HT_CAP_RX_STBC;
 	spec->ht.ampdu_factor = 3;
 	spec->ht.ampdu_density = 4;
 	spec->ht.mcs.tx_params =
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d9724a28c0c2..163c840437d6 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -832,7 +832,7 @@ struct ieee80211_ht_cap {
 #define IEEE80211_HT_CAP_DELAY_BA		0x0400
 #define IEEE80211_HT_CAP_MAX_AMSDU		0x0800
 #define IEEE80211_HT_CAP_DSSSCCK40		0x1000
-#define IEEE80211_HT_CAP_PSMP_SUPPORT		0x2000
+#define IEEE80211_HT_CAP_RESERVED		0x2000
 #define IEEE80211_HT_CAP_40MHZ_INTOLERANT	0x4000
 #define IEEE80211_HT_CAP_LSIG_TXOP_PROT		0x8000
 
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 3787455fb696..d7dcee680728 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -34,9 +34,28 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
 
 	ht_cap->ht_supported = true;
 
-	ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & sband->ht_cap.cap;
-	ht_cap->cap &= ~IEEE80211_HT_CAP_SM_PS;
-	ht_cap->cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS;
+	/*
+	 * The bits listed in this expression should be
+	 * the same for the peer and us, if the station
+	 * advertises more then we can't use those thus
+	 * we mask them out.
+	 */
+	ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) &
+		(sband->ht_cap.cap |
+		 ~(IEEE80211_HT_CAP_LDPC_CODING |
+		   IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
+		   IEEE80211_HT_CAP_GRN_FLD |
+		   IEEE80211_HT_CAP_SGI_20 |
+		   IEEE80211_HT_CAP_SGI_40 |
+		   IEEE80211_HT_CAP_DSSSCCK40));
+	/*
+	 * The STBC bits are asymmetric -- if we don't have
+	 * TX then mask out the peer's RX and vice versa.
+	 */
+	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC))
+		ht_cap->cap &= ~IEEE80211_HT_CAP_RX_STBC;
+	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC))
+		ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC;
 
 	ampdu_info = ht_cap_ie->ampdu_params_info;
 	ht_cap->ampdu_factor =
-- 
cgit v1.2.3


From cc3e1bea5d87635c519da657303690f5538bb4eb Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 23 Dec 2009 06:52:08 -0500
Subject: ext4, jbd2: Add barriers for file systems with exernal journals

This is a bit complicated because we are trying to optimize when we
send barriers to the fs data disk.  We could just throw in an extra
barrier to the data disk whenever we send a barrier to the journal
disk, but that's not always strictly necessary.

We only need to send a barrier during a commit when there are data
blocks which are must be written out due to an inode written in
ordered mode, or if fsync() depends on the commit to force data blocks
to disk.  Finally, before we drop transactions from the beginning of
the journal during a checkpoint operation, we need to guarantee that
any blocks that were flushed out to the data disk are firmly on the
rust platter before we drop the transaction from the journal.

Thanks to Oleg Drokin for pointing out this flaw in ext3/ext4.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/fsync.c      | 16 ++++++++++++++--
 fs/jbd2/checkpoint.c | 15 +++++++++++++++
 fs/jbd2/commit.c     | 19 +++++++++++--------
 include/linux/jbd2.h |  1 +
 4 files changed, 41 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
 		return ext4_force_commit(inode->i_sb);
 
 	commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
-	if (jbd2_log_start_commit(journal, commit_tid))
+	if (jbd2_log_start_commit(journal, commit_tid)) {
+		/*
+		 * When the journal is on a different device than the
+		 * fs data disk, we need to issue the barrier in
+		 * writeback mode.  (In ordered mode, the jbd2 layer
+		 * will take care of issuing the barrier.  In
+		 * data=journal, all of the data blocks are written to
+		 * the journal device.)
+		 */
+		if (ext4_should_writeback_data(inode) &&
+		    (journal->j_fs_dev != journal->j_dev) &&
+		    (journal->j_flags & JBD2_BARRIER))
+			blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 		jbd2_log_wait_commit(journal, commit_tid);
-	else if (journal->j_flags & JBD2_BARRIER)
+	} else if (journal->j_flags & JBD2_BARRIER)
 		blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 	return ret;
 }
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b20..886849370950 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
 #include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
 #include <trace/events/jbd2.h>
 
 /*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	journal->j_tail_sequence = first_tid;
 	journal->j_tail = blocknr;
 	spin_unlock(&journal->j_state_lock);
+
+	/*
+	 * If there is an external journal, we need to make sure that
+	 * any data blocks that were recently written out --- perhaps
+	 * by jbd2_log_do_checkpoint() --- are flushed out before we
+	 * drop the transactions from the external journal.  It's
+	 * unlikely this will be necessary, especially with a
+	 * appropriately sized journal, but we need this to guarantee
+	 * correctness.  Fortunately jbd2_cleanup_journal_tail()
+	 * doesn't get called all that often.
+	 */
+	if ((journal->j_fs_dev != journal->j_dev) &&
+	    (journal->j_flags & JBD2_BARRIER))
+		blkdev_issue_flush(journal->j_fs_dev, NULL);
 	if (!(journal->j_flags & JBD2_ABORT))
 		jbd2_journal_update_superblock(journal, 1);
 	return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6a10238d2c63..1bc74b6f26d2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
 			ret = err;
 		spin_lock(&journal->j_list_lock);
 		J_ASSERT(jinode->i_transaction == commit_transaction);
+		commit_transaction->t_flushed_data_blocks = 1;
 		jinode->i_flags &= ~JI_COMMIT_RUNNING;
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
@@ -708,8 +709,17 @@ start_journal_io:
 		}
 	}
 
-	/* Done it all: now write the commit record asynchronously. */
+	/* 
+	 * If the journal is not located on the file system device,
+	 * then we must flush the file system device before we issue
+	 * the commit record
+	 */
+	if (commit_transaction->t_flushed_data_blocks &&
+	    (journal->j_fs_dev != journal->j_dev) &&
+	    (journal->j_flags & JBD2_BARRIER))
+		blkdev_issue_flush(journal->j_fs_dev, NULL);
 
+	/* Done it all: now write the commit record asynchronously. */
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
 				      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
 		err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
 			blkdev_issue_flush(journal->j_dev, NULL);
 	}
 
-	/*
-	 * This is the right place to wait for data buffers both for ASYNC
-	 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
-	 * the commit block went to disk (which happens above). If commit is
-	 * SYNC, we need to wait for data buffers before we start writing
-	 * commit block, which happens below in such setting.
-	 */
 	err = journal_finish_inode_data_buffers(journal, commit_transaction);
 	if (err) {
 		printk(KERN_WARNING
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f1011f7f3d41..638ce4554c76 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -653,6 +653,7 @@ struct transaction_s
 	 * waiting for it to finish.
 	 */
 	unsigned int t_synchronous_commit:1;
+	unsigned int t_flushed_data_blocks:1;
 
 	/*
 	 * For use by the filesystem to store fs-specific data
-- 
cgit v1.2.3


From 17bd55d037a02b04d9119511cfd1a4b985d20f63 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 23 Dec 2009 07:57:07 -0500
Subject: fs-writeback: Add helper function to start writeback if idle

ext4, at least, would like to start pushing on writeback if it starts
to get close to ENOSPC when reserving worst-case blocks for delalloc
writes.  Writing out delalloc data will convert those worst-case
predictions into usually smaller actual usage, freeing up space
before we hit ENOSPC based on this speculation.

Thanks to Jens for the suggestion for the helper function,
& the naming help.

I've made the helper return status on whether writeback was
started even though I don't plan to use it in the ext4 patch;
it seems like it would be potentially useful to test this
in some cases.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Acked-by: Jan Kara <jack@suse.cz>
---
 fs/fs-writeback.c         | 17 +++++++++++++++++
 include/linux/writeback.h |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f19..f6c2155e0026 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1186,6 +1186,23 @@ void writeback_inodes_sb(struct super_block *sb)
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
+/**
+ * writeback_inodes_sb_if_idle	-	start writeback if none underway
+ * @sb: the superblock
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_if_idle(struct super_block *sb)
+{
+	if (!writeback_in_progress(sb->s_bdi)) {
+		writeback_inodes_sb(sb);
+		return 1;
+	} else
+		return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
+
 /**
  * sync_inodes_sb	-	sync sb inode pages
  * @sb: the superblock
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c18c008f4bbf..76e8903cd204 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -70,6 +70,7 @@ struct writeback_control {
 struct bdi_writeback;
 int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *);
+int writeback_inodes_sb_if_idle(struct super_block *);
 void sync_inodes_sb(struct super_block *);
 void writeback_inodes_wbc(struct writeback_control *wbc);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
-- 
cgit v1.2.3


From 28f6aeea3f12d37bd258b2c0d5ba891bff4ec479 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Fri, 25 Dec 2009 17:30:22 -0800
Subject: net: restore ip source validation

when using policy routing and the skb mark:
there are cases where a back path validation requires us
to use a different routing table for src ip validation than
the one used for mapping ingress dst ip.
One such a case is transparent proxying where we pretend to be
the destination system and therefore the local table
is used for incoming packets but possibly a main table would
be used on outbound.
Make the default behavior to allow the above and if users
need to turn on the symmetry via sysctl src_valid_mark

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 1 +
 include/linux/sysctl.h     | 1 +
 net/ipv4/devinet.c         | 1 +
 net/ipv4/fib_frontend.c    | 2 ++
 4 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 699e85c01a4d..b2304929434e 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -81,6 +81,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 #define IN_DEV_FORWARD(in_dev)		IN_DEV_CONF_GET((in_dev), FORWARDING)
 #define IN_DEV_MFORWARD(in_dev)		IN_DEV_ANDCONF((in_dev), MC_FORWARDING)
 #define IN_DEV_RPFILTER(in_dev)		IN_DEV_MAXCONF((in_dev), RP_FILTER)
+#define IN_DEV_SRC_VMARK(in_dev)    	IN_DEV_ORCONF((in_dev), SRC_VMARK)
 #define IN_DEV_SOURCE_ROUTE(in_dev)	IN_DEV_ANDCONF((in_dev), \
 						       ACCEPT_SOURCE_ROUTE)
 #define IN_DEV_ACCEPT_LOCAL(in_dev)	IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 877ba039e6a4..bd27fbc9db62 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -482,6 +482,7 @@ enum
 	NET_IPV4_CONF_ARP_ACCEPT=21,
 	NET_IPV4_CONF_ARP_NOTIFY=22,
 	NET_IPV4_CONF_ACCEPT_LOCAL=23,
+	NET_IPV4_CONF_SRC_VMARK=24,
 	__NET_IPV4_CONF_MAX
 };
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5cdbc102a418..040c4f05b653 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1397,6 +1397,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
 					"accept_source_route"),
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
+		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3323168ee52d..82dbf711d6d0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -252,6 +252,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 		no_addr = in_dev->ifa_list == NULL;
 		rpf = IN_DEV_RPFILTER(in_dev);
 		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
+		if (mark && !IN_DEV_SRC_VMARK(in_dev))
+			fl.mark = 0;
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From 81744ee44ab2845c16ffd7d6f762f7b4a49a4750 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 29 Dec 2009 08:35:35 +0100
Subject: block: Fix incorrect alignment offset reporting and update
 documentation

queue_sector_alignment_offset returned the wrong value which caused
partitions to report an incorrect alignment_offset.  Since offset
alignment calculation is needed several places it has been split into a
separate helper function.  The topology stacking function has been
updated accordingly.

Furthermore, comments have been added to clarify how the stacking
function works.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Tested-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 44 +++++++++++++++++++++++++++++++++-----------
 include/linux/blkdev.h | 11 +++++++++--
 2 files changed, 42 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index e14fcbcedbfa..d52d4adc440b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -505,20 +505,30 @@ static unsigned int lcm(unsigned int a, unsigned int b)
 
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
- * @t:	the stacking driver limits (top)
- * @b:  the underlying queue limits (bottom)
+ * @t:	the stacking driver limits (top device)
+ * @b:  the underlying queue limits (bottom, component device)
  * @offset:  offset to beginning of data within component device
  *
  * Description:
- *    Merges two queue_limit structs.  Returns 0 if alignment didn't
- *    change.  Returns -1 if adding the bottom device caused
- *    misalignment.
+ *    This function is used by stacking drivers like MD and DM to ensure
+ *    that all component devices have compatible block sizes and
+ *    alignments.  The stacking driver must provide a queue_limits
+ *    struct (top) and then iteratively call the stacking function for
+ *    all component (bottom) devices.  The stacking function will
+ *    attempt to combine the values and ensure proper alignment.
+ *
+ *    Returns 0 if the top and bottom queue_limits are compatible.  The
+ *    top device's block sizes and alignment offsets may be adjusted to
+ *    ensure alignment with the bottom device. If no compatible sizes
+ *    and alignments exist, -1 is returned and the resulting top
+ *    queue_limits will have the misaligned flag set to indicate that
+ *    the alignment_offset is undefined.
  */
 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		     sector_t offset)
 {
 	sector_t alignment;
-	unsigned int top, bottom, granularity;
+	unsigned int top, bottom;
 
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
@@ -536,15 +546,18 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->max_segment_size = min_not_zero(t->max_segment_size,
 					   b->max_segment_size);
 
-	granularity = max(b->physical_block_size, b->io_min);
-	alignment = b->alignment_offset - (offset & (granularity - 1));
+	alignment = queue_limit_alignment_offset(b, offset);
 
+	/* Bottom device has different alignment.  Check that it is
+	 * compatible with the current top alignment.
+	 */
 	if (t->alignment_offset != alignment) {
 
 		top = max(t->physical_block_size, t->io_min)
 			+ t->alignment_offset;
-		bottom = granularity + alignment;
+		bottom = max(b->physical_block_size, b->io_min) + alignment;
 
+		/* Verify that top and bottom intervals line up */
 		if (max(top, bottom) & (min(top, bottom) - 1))
 			t->misaligned = 1;
 	}
@@ -561,32 +574,39 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->no_cluster |= b->no_cluster;
 	t->discard_zeroes_data &= b->discard_zeroes_data;
 
+	/* Physical block size a multiple of the logical block size? */
 	if (t->physical_block_size & (t->logical_block_size - 1)) {
 		t->physical_block_size = t->logical_block_size;
 		t->misaligned = 1;
 	}
 
+	/* Minimum I/O a multiple of the physical block size? */
 	if (t->io_min & (t->physical_block_size - 1)) {
 		t->io_min = t->physical_block_size;
 		t->misaligned = 1;
 	}
 
+	/* Optimal I/O a multiple of the physical block size? */
 	if (t->io_opt & (t->physical_block_size - 1)) {
 		t->io_opt = 0;
 		t->misaligned = 1;
 	}
 
+	/* Find lowest common alignment_offset */
 	t->alignment_offset = lcm(t->alignment_offset, alignment)
 		& (max(t->physical_block_size, t->io_min) - 1);
 
+	/* Verify that new alignment_offset is on a logical block boundary */
 	if (t->alignment_offset & (t->logical_block_size - 1))
 		t->misaligned = 1;
 
 	/* Discard alignment and granularity */
 	if (b->discard_granularity) {
+		unsigned int granularity = b->discard_granularity;
+		offset &= granularity - 1;
 
-		alignment = b->discard_alignment -
-			(offset & (b->discard_granularity - 1));
+		alignment = (granularity + b->discard_alignment - offset)
+			& (granularity - 1);
 
 		if (t->discard_granularity != 0 &&
 		    t->discard_alignment != alignment) {
@@ -598,6 +618,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 				t->discard_misaligned = 1;
 		}
 
+		t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
+						      b->max_discard_sectors);
 		t->discard_granularity = max(t->discard_granularity,
 					     b->discard_granularity);
 		t->discard_alignment = lcm(t->discard_alignment, alignment) &
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 784a919aa0d0..59b832be3044 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1116,11 +1116,18 @@ static inline int queue_alignment_offset(struct request_queue *q)
 	return q->limits.alignment_offset;
 }
 
+static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset)
+{
+	unsigned int granularity = max(lim->physical_block_size, lim->io_min);
+
+	offset &= granularity - 1;
+	return (granularity + lim->alignment_offset - offset) & (granularity - 1);
+}
+
 static inline int queue_sector_alignment_offset(struct request_queue *q,
 						sector_t sector)
 {
-	return ((sector << 9) - q->limits.alignment_offset)
-		& (q->limits.io_min - 1);
+	return queue_limit_alignment_offset(&q->limits, sector << 9);
 }
 
 static inline int bdev_alignment_offset(struct block_device *bdev)
-- 
cgit v1.2.3


From db5d247ae811f49185a71e703b65acad845e4b18 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <cladisch@fastmail.net>
Date: Thu, 24 Dec 2009 12:05:58 +0100
Subject: firewire: fix use of multiple AV/C devices, allow multiple FCP
 listeners

Control of more than one AV/C device at once --- e.g. camcorders, tape
decks, audio devices, TV tuners --- failed or worked only unreliably,
depending on driver implementation.  This affected kernelspace and
userspace drivers alike and was caused by firewire-core's inability to
accept multiple registrations of FCP listeners.

The fix allows multiple address handlers to be registered for the FCP
command and response registers.  When a request for these registers is
received, all handlers are invoked, and the Firewire response is
generated by the core and not by any handler.

The cdev API does not change, i.e., userspace is still expected to send
a response for FCP requests; this response is silently ignored.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de> (changelog, rebased, whitespace)
---
 drivers/firewire/core-cdev.c            |  26 ++++---
 drivers/firewire/core-transaction.c     | 118 ++++++++++++++++++++++++++------
 drivers/media/dvb/firewire/firedtv-fw.c |  12 +---
 include/linux/firewire-cdev.h           |   3 +
 include/linux/firewire.h                |   4 +-
 5 files changed, 119 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 231e6ee5ba43..2cb22d160f6e 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -601,8 +601,9 @@ static void release_request(struct client *client,
 	struct inbound_transaction_resource *r = container_of(resource,
 			struct inbound_transaction_resource, resource);
 
-	fw_send_response(client->device->card, r->request,
-			 RCODE_CONFLICT_ERROR);
+	if (r->request)
+		fw_send_response(client->device->card, r->request,
+				 RCODE_CONFLICT_ERROR);
 	kfree(r);
 }
 
@@ -645,7 +646,8 @@ static void handle_request(struct fw_card *card, struct fw_request *request,
  failed:
 	kfree(r);
 	kfree(e);
-	fw_send_response(card, request, RCODE_CONFLICT_ERROR);
+	if (request)
+		fw_send_response(card, request, RCODE_CONFLICT_ERROR);
 }
 
 static void release_address_handler(struct client *client,
@@ -715,15 +717,17 @@ static int ioctl_send_response(struct client *client, void *buffer)
 
 	r = container_of(resource, struct inbound_transaction_resource,
 			 resource);
-	if (request->length < r->length)
-		r->length = request->length;
-
-	if (copy_from_user(r->data, u64_to_uptr(request->data), r->length)) {
-		ret = -EFAULT;
-		goto out;
+	if (r->request) {
+		if (request->length < r->length)
+			r->length = request->length;
+		if (copy_from_user(r->data, u64_to_uptr(request->data),
+				   r->length)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		fw_send_response(client->device->card, r->request,
+				 request->rcode);
 	}
-
-	fw_send_response(client->device->card, r->request, request->rcode);
  out:
 	kfree(r);
 
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 842739df23e2..495849eb13cc 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -432,14 +432,20 @@ static struct fw_address_handler *lookup_overlapping_address_handler(
 	return NULL;
 }
 
+static bool is_enclosing_handler(struct fw_address_handler *handler,
+				 unsigned long long offset, size_t length)
+{
+	return handler->offset <= offset &&
+		offset + length <= handler->offset + handler->length;
+}
+
 static struct fw_address_handler *lookup_enclosing_address_handler(
 	struct list_head *list, unsigned long long offset, size_t length)
 {
 	struct fw_address_handler *handler;
 
 	list_for_each_entry(handler, list, link) {
-		if (handler->offset <= offset &&
-		    offset + length <= handler->offset + handler->length)
+		if (is_enclosing_handler(handler, offset, length))
 			return handler;
 	}
 
@@ -465,6 +471,12 @@ const struct fw_address_region fw_unit_space_region =
 	{ .start = 0xfffff0000900ULL, .end = 0x1000000000000ULL, };
 #endif  /*  0  */
 
+static bool is_in_fcp_region(u64 offset, size_t length)
+{
+	return offset >= (CSR_REGISTER_BASE | CSR_FCP_COMMAND) &&
+		offset + length <= (CSR_REGISTER_BASE | CSR_FCP_END);
+}
+
 /**
  * fw_core_add_address_handler - register for incoming requests
  * @handler: callback
@@ -477,8 +489,11 @@ const struct fw_address_region fw_unit_space_region =
  * give the details of the particular request.
  *
  * Return value:  0 on success, non-zero otherwise.
+ *
  * The start offset of the handler's address region is determined by
  * fw_core_add_address_handler() and is returned in handler->offset.
+ *
+ * Address allocations are exclusive, except for the FCP registers.
  */
 int fw_core_add_address_handler(struct fw_address_handler *handler,
 				const struct fw_address_region *region)
@@ -498,10 +513,12 @@ int fw_core_add_address_handler(struct fw_address_handler *handler,
 
 	handler->offset = region->start;
 	while (handler->offset + handler->length <= region->end) {
-		other =
-		    lookup_overlapping_address_handler(&address_handler_list,
-						       handler->offset,
-						       handler->length);
+		if (is_in_fcp_region(handler->offset, handler->length))
+			other = NULL;
+		else
+			other = lookup_overlapping_address_handler
+					(&address_handler_list,
+					 handler->offset, handler->length);
 		if (other != NULL) {
 			handler->offset += other->length;
 		} else {
@@ -668,6 +685,9 @@ static struct fw_request *allocate_request(struct fw_packet *p)
 void fw_send_response(struct fw_card *card,
 		      struct fw_request *request, int rcode)
 {
+	if (WARN_ONCE(!request, "invalid for FCP address handlers"))
+		return;
+
 	/* unified transaction or broadcast transaction: don't respond */
 	if (request->ack != ACK_PENDING ||
 	    HEADER_DESTINATION_IS_BROADCAST(request->request_header[0])) {
@@ -686,26 +706,15 @@ void fw_send_response(struct fw_card *card,
 }
 EXPORT_SYMBOL(fw_send_response);
 
-void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
+static void handle_exclusive_region_request(struct fw_card *card,
+					    struct fw_packet *p,
+					    struct fw_request *request,
+					    unsigned long long offset)
 {
 	struct fw_address_handler *handler;
-	struct fw_request *request;
-	unsigned long long offset;
 	unsigned long flags;
 	int tcode, destination, source;
 
-	if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
-		return;
-
-	request = allocate_request(p);
-	if (request == NULL) {
-		/* FIXME: send statically allocated busy packet. */
-		return;
-	}
-
-	offset      =
-		((unsigned long long)
-		 HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) | p->header[2];
 	tcode       = HEADER_GET_TCODE(p->header[0]);
 	destination = HEADER_GET_DESTINATION(p->header[0]);
 	source      = HEADER_GET_SOURCE(p->header[1]);
@@ -732,6 +741,73 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
 					  request->data, request->length,
 					  handler->callback_data);
 }
+
+static void handle_fcp_region_request(struct fw_card *card,
+				      struct fw_packet *p,
+				      struct fw_request *request,
+				      unsigned long long offset)
+{
+	struct fw_address_handler *handler;
+	unsigned long flags;
+	int tcode, destination, source;
+
+	if ((offset != (CSR_REGISTER_BASE | CSR_FCP_COMMAND) &&
+	     offset != (CSR_REGISTER_BASE | CSR_FCP_RESPONSE)) ||
+	    request->length > 0x200) {
+		fw_send_response(card, request, RCODE_ADDRESS_ERROR);
+
+		return;
+	}
+
+	tcode       = HEADER_GET_TCODE(p->header[0]);
+	destination = HEADER_GET_DESTINATION(p->header[0]);
+	source      = HEADER_GET_SOURCE(p->header[1]);
+
+	if (tcode != TCODE_WRITE_QUADLET_REQUEST &&
+	    tcode != TCODE_WRITE_BLOCK_REQUEST) {
+		fw_send_response(card, request, RCODE_TYPE_ERROR);
+
+		return;
+	}
+
+	spin_lock_irqsave(&address_handler_lock, flags);
+	list_for_each_entry(handler, &address_handler_list, link) {
+		if (is_enclosing_handler(handler, offset, request->length))
+			handler->address_callback(card, NULL, tcode,
+						  destination, source,
+						  p->generation, p->speed,
+						  offset, request->data,
+						  request->length,
+						  handler->callback_data);
+	}
+	spin_unlock_irqrestore(&address_handler_lock, flags);
+
+	fw_send_response(card, request, RCODE_COMPLETE);
+}
+
+void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
+{
+	struct fw_request *request;
+	unsigned long long offset;
+
+	if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
+		return;
+
+	request = allocate_request(p);
+	if (request == NULL) {
+		/* FIXME: send statically allocated busy packet. */
+		return;
+	}
+
+	offset = ((u64)HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) |
+		p->header[2];
+
+	if (!is_in_fcp_region(offset, request->length))
+		handle_exclusive_region_request(card, p, request, offset);
+	else
+		handle_fcp_region_request(card, p, request, offset);
+
+}
 EXPORT_SYMBOL(fw_core_handle_request);
 
 void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c
index fe44789ab037..6223bf01efe9 100644
--- a/drivers/media/dvb/firewire/firedtv-fw.c
+++ b/drivers/media/dvb/firewire/firedtv-fw.c
@@ -202,14 +202,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request,
 	unsigned long flags;
 	int su;
 
-	if ((tcode != TCODE_WRITE_QUADLET_REQUEST &&
-	     tcode != TCODE_WRITE_BLOCK_REQUEST) ||
-	    offset != CSR_REGISTER_BASE + CSR_FCP_RESPONSE ||
-	    length == 0 ||
-	    (((u8 *)payload)[0] & 0xf0) != 0) {
-		fw_send_response(card, request, RCODE_TYPE_ERROR);
+	if (length < 2 || (((u8 *)payload)[0] & 0xf0) != 0)
 		return;
-	}
 
 	su = ((u8 *)payload)[1] & 0x7;
 
@@ -230,10 +224,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request,
 	}
 	spin_unlock_irqrestore(&node_list_lock, flags);
 
-	if (fdtv) {
+	if (fdtv)
 		avc_recv(fdtv, payload, length);
-		fw_send_response(card, request, RCODE_COMPLETE);
-	}
 }
 
 static struct fw_address_handler fcp_handler = {
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index c6b3ca3af6df..1f716d9f714b 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -340,6 +340,9 @@ struct fw_cdev_send_response {
  * The @closure field is passed back to userspace in the response event.
  * The @handle field is an out parameter, returning a handle to the allocated
  * range to be used for later deallocation of the range.
+ *
+ * The address range is allocated on all local nodes.  The address allocation
+ * is exclusive except for the FCP command and response registers.
  */
 struct fw_cdev_allocate {
 	__u64 offset;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 9416a461b696..a0e67150a729 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -248,8 +248,8 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode,
 					  void *data, size_t length,
 					  void *callback_data);
 /*
- * Important note:  The callback must guarantee that either fw_send_response()
- * or kfree() is called on the @request.
+ * Important note:  Except for the FCP registers, the callback must guarantee
+ * that either fw_send_response() or kfree() is called on the @request.
  */
 typedef void (*fw_address_callback_t)(struct fw_card *card,
 				      struct fw_request *request,
-- 
cgit v1.2.3


From 9bd3f98821a83041e77ee25158b80b535d02d7b4 Mon Sep 17 00:00:00 2001
From: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 08:41:07 +0100
Subject: block: blk_rq_err_sectors cleanup

blk_rq_err_sectors() seems useless, get rid of it.

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 59b832be3044..9b98173a8184 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -845,7 +845,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
  * blk_rq_err_bytes()		: bytes left till the next error boundary
  * blk_rq_sectors()		: sectors left in the entire request
  * blk_rq_cur_sectors()		: sectors left in the current segment
- * blk_rq_err_sectors()		: sectors left till the next error boundary
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
@@ -874,11 +873,6 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> 9;
 }
 
-static inline unsigned int blk_rq_err_sectors(const struct request *rq)
-{
-	return blk_rq_err_bytes(rq) >> 9;
-}
-
 /*
  * Request issue related functions.
  */
-- 
cgit v1.2.3


From e96dc9674cb597de4fee757ed005c8465072d13f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:26 +0800
Subject: tracing/syscalls: Fix typo in SYSCALL_DEFINE0

The struct syscall_metadata variable name in SYSCALL_DEFINE0
should be __syscall_meta__##sname instead of __syscall_meta_##sname
to match the name that is in SYSCALL_DEFINE1/2/3/4/5/6.

This error causes event_enter_##sname->data to point to the wrong
location, which causes syscalls which are defined by SYSCALL_DEFINE0()
not to be traced.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D2E.1010807@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 65793e90d6f6..207466a49f3d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -195,7 +195,7 @@ struct perf_event_attr;
 	static const struct syscall_metadata __used		\
 	  __attribute__((__aligned__(4)))			\
 	  __attribute__((section("__syscalls_metadata")))	\
-	  __syscall_meta_##sname = {				\
+	  __syscall_meta__##sname = {				\
 		.name 		= "sys_"#sname,			\
 		.nb_args 	= 0,				\
 		.enter_event	= &event_enter__##sname,	\
-- 
cgit v1.2.3


From ed656d8deccc5669afa33387568e7ec6f14e3e94 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Sat, 26 Dec 2009 17:58:11 +0100
Subject: kfifo: Fix typo in comment

It's DECLARE_KFIFO, not DECLARED_KFIFO.

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 3d44e9c65a8e..7c6b32a1421c 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -81,7 +81,7 @@ union { \
 }
 
 /**
- * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
+ * INIT_KFIFO - Initialize a kfifo declared by DECLARE_KFIFO
  * @name: name of the declared kfifo datatype
  */
 #define INIT_KFIFO(name) \
-- 
cgit v1.2.3


From d7f0eea9e431e1b8b0742a74db1a9490730b2a25 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 30 Dec 2009 15:36:42 +0800
Subject: ACPI: introduce kernel parameter acpi_sleep=sci_force_enable

Introduce kernel parameter acpi_sleep=sci_force_enable

some laptop requires SCI_EN being set directly on resume,
or else they hung somewhere in the resume code path.

We already have a blacklist for these laptops but we still need
this option, especially when debugging some suspend/resume problems,
in case there are systems that need this workaround and are not yet
in the blacklist.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/kernel-parameters.txt |  5 ++++-
 arch/x86/kernel/acpi/sleep.c        |  2 ++
 drivers/acpi/sleep.c                | 29 +++++++++++++++++------------
 include/linux/acpi.h                |  1 +
 4 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5ba4d9dff113..736d45602886 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -240,7 +240,7 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	acpi_sleep=	[HW,ACPI] Sleep options
 			Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
-				  old_ordering, s4_nonvs }
+				  old_ordering, s4_nonvs, sci_force_enable }
 			See Documentation/power/video.txt for information on
 			s3_bios and s3_mode.
 			s3_beep is for debugging; it makes the PC's speaker beep
@@ -253,6 +253,9 @@ and is between 256 and 4096 characters. It is defined in the file
 			of _PTS is used by default).
 			s4_nonvs prevents the kernel from saving/restoring the
 			ACPI NVS memory during hibernation.
+			sci_force_enable causes the kernel to set SCI_EN directly
+			on resume from S1/S3 (which is against the ACPI spec,
+			but some broken systems don't work without it).
 
 	acpi_use_timer_override [HW,ACPI]
 			Use timer override. For some broken Nvidia NF5 boards
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 82e508677b91..f9961034e557 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str)
 #endif
 		if (strncmp(str, "old_ordering", 12) == 0)
 			acpi_old_suspend_ordering();
+		if (strncmp(str, "sci_force_enable", 16) == 0)
+			acpi_set_sci_en_on_resume();
 		str = strchr(str, ',');
 		if (str != NULL)
 			str += strspn(str, ", \t");
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 5f2c379ab7bf..79d33d908b5a 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -80,6 +80,23 @@ static int acpi_sleep_prepare(u32 acpi_state)
 
 #ifdef CONFIG_ACPI_SLEEP
 static u32 acpi_target_sleep_state = ACPI_STATE_S0;
+/*
+ * According to the ACPI specification the BIOS should make sure that ACPI is
+ * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states.  Still,
+ * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI
+ * on such systems during resume.  Unfortunately that doesn't help in
+ * particularly pathological cases in which SCI_EN has to be set directly on
+ * resume, although the specification states very clearly that this flag is
+ * owned by the hardware.  The set_sci_en_on_resume variable will be set in such
+ * cases.
+ */
+static bool set_sci_en_on_resume;
+
+void __init acpi_set_sci_en_on_resume(void)
+{
+	set_sci_en_on_resume = true;
+}
+
 /*
  * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the
  * user to request that behavior by using the 'acpi_old_suspend_ordering'
@@ -170,18 +187,6 @@ static void acpi_pm_end(void)
 #endif /* CONFIG_ACPI_SLEEP */
 
 #ifdef CONFIG_SUSPEND
-/*
- * According to the ACPI specification the BIOS should make sure that ACPI is
- * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states.  Still,
- * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI
- * on such systems during resume.  Unfortunately that doesn't help in
- * particularly pathological cases in which SCI_EN has to be set directly on
- * resume, although the specification states very clearly that this flag is
- * owned by the hardware.  The set_sci_en_on_resume variable will be set in such
- * cases.
- */
-static bool set_sci_en_on_resume;
-
 extern void do_suspend_lowlevel(void);
 
 static u32 acpi_suspend_states[] = {
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ce945d4845fc..36924255c0d5 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -251,6 +251,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n,
 void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
 void __init acpi_s4_no_nvs(void);
+void __init acpi_set_sci_en_on_resume(void);
 #endif /* CONFIG_PM_SLEEP */
 
 struct acpi_osc_context {
-- 
cgit v1.2.3


From 2f5cb43406d0b29b96248f5328a14a6f6abf8ae6 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 30 Dec 2009 08:23:30 +0000
Subject: phylib: Properly reinitialize PHYs after hibernation

Since hibernation assumes power loss, we should fully reinitialize
PHYs (including platform fixups), as if PHYs were just attached.

This patch factors phy_init_hw() out of phy_attach_direct(), then
converts mdio_bus to dev_pm_ops and adds an appropriate restore()
callback.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c   | 50 +++++++++++++++++++++++++++++++++++++-------
 drivers/net/phy/phy_device.c | 30 +++++++++++++-------------
 include/linux/phy.h          |  1 +
 3 files changed, 59 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 49252d390903..e17b70291bbc 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -264,6 +264,8 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv)
 		(phydev->phy_id & phydrv->phy_id_mask));
 }
 
+#ifdef CONFIG_PM
+
 static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
 	struct device_driver *drv = phydev->dev.driver;
@@ -295,10 +297,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 	return true;
 }
 
-/* Suspend and resume.  Copied from platform_suspend and
- * platform_resume
- */
-static int mdio_bus_suspend(struct device * dev, pm_message_t state)
+static int mdio_bus_suspend(struct device *dev)
 {
 	struct phy_driver *phydrv = to_phy_driver(dev->driver);
 	struct phy_device *phydev = to_phy_device(dev);
@@ -318,7 +317,7 @@ static int mdio_bus_suspend(struct device * dev, pm_message_t state)
 	return phydrv->suspend(phydev);
 }
 
-static int mdio_bus_resume(struct device * dev)
+static int mdio_bus_resume(struct device *dev)
 {
 	struct phy_driver *phydrv = to_phy_driver(dev->driver);
 	struct phy_device *phydev = to_phy_device(dev);
@@ -338,11 +337,48 @@ no_resume:
 	return 0;
 }
 
+static int mdio_bus_restore(struct device *dev)
+{
+	struct phy_device *phydev = to_phy_device(dev);
+	struct net_device *netdev = phydev->attached_dev;
+	int ret;
+
+	if (!netdev)
+		return 0;
+
+	ret = phy_init_hw(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* The PHY needs to renegotiate. */
+	phydev->link = 0;
+	phydev->state = PHY_UP;
+
+	phy_start_machine(phydev, NULL);
+
+	return 0;
+}
+
+static struct dev_pm_ops mdio_bus_pm_ops = {
+	.suspend = mdio_bus_suspend,
+	.resume = mdio_bus_resume,
+	.freeze = mdio_bus_suspend,
+	.thaw = mdio_bus_resume,
+	.restore = mdio_bus_restore,
+};
+
+#define MDIO_BUS_PM_OPS (&mdio_bus_pm_ops)
+
+#else
+
+#define MDIO_BUS_PM_OPS NULL
+
+#endif /* CONFIG_PM */
+
 struct bus_type mdio_bus_type = {
 	.name		= "mdio_bus",
 	.match		= mdio_bus_match,
-	.suspend	= mdio_bus_suspend,
-	.resume		= mdio_bus_resume,
+	.pm		= MDIO_BUS_PM_OPS,
 };
 EXPORT_SYMBOL(mdio_bus_type);
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index b10fedd82143..8212b2b93422 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -378,6 +378,20 @@ void phy_disconnect(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_disconnect);
 
+int phy_init_hw(struct phy_device *phydev)
+{
+	int ret;
+
+	if (!phydev->drv || !phydev->drv->config_init)
+		return 0;
+
+	ret = phy_scan_fixups(phydev);
+	if (ret < 0)
+		return ret;
+
+	return phydev->drv->config_init(phydev);
+}
+
 /**
  * phy_attach_direct - attach a network device to a given PHY device pointer
  * @dev: network device to attach
@@ -425,21 +439,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	/* Do initial configuration here, now that
 	 * we have certain key parameters
 	 * (dev_flags and interface) */
-	if (phydev->drv->config_init) {
-		int err;
-
-		err = phy_scan_fixups(phydev);
-
-		if (err < 0)
-			return err;
-
-		err = phydev->drv->config_init(phydev);
-
-		if (err < 0)
-			return err;
-	}
-
-	return 0;
+	return phy_init_hw(phydev);
 }
 EXPORT_SYMBOL(phy_attach_direct);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index b1368b8f6572..7968defd2fa7 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -447,6 +447,7 @@ struct phy_device* get_phy_device(struct mii_bus *bus, int addr);
 int phy_device_register(struct phy_device *phy);
 int phy_clear_interrupt(struct phy_device *phydev);
 int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
+int phy_init_hw(struct phy_device *phydev);
 int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		u32 flags, phy_interface_t interface);
 struct phy_device * phy_attach(struct net_device *dev,
-- 
cgit v1.2.3


From 0719d3434747889b314a1e8add776418c4148bcf Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 30 Dec 2009 00:39:22 +0100
Subject: reiserfs: Fix reiserfs lock <-> i_xattr_sem dependency inversion

i_xattr_sem depends on the reiserfs lock. But after we grab
i_xattr_sem, we may relax/relock the reiserfs lock while waiting
on a freezed filesystem, creating a dependency inversion between
the two locks.

In order to avoid the i_xattr_sem -> reiserfs lock dependency, let's
create a reiserfs_down_read_safe() that acts like
reiserfs_mutex_lock_safe(): relax the reiserfs lock while grabbing
another lock to avoid undesired dependencies induced by the
heivyweight reiserfs lock.

This fixes the following warning:

[  990.005931] =======================================================
[  990.012373] [ INFO: possible circular locking dependency detected ]
[  990.013233] 2.6.33-rc1 #1
[  990.013233] -------------------------------------------------------
[  990.013233] dbench/1891 is trying to acquire lock:
[  990.013233]  (&REISERFS_SB(s)->lock){+.+.+.}, at: [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]
[  990.013233] but task is already holding lock:
[  990.013233]  (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]
[  990.013233] which lock already depends on the new lock.
[  990.013233]
[  990.013233]
[  990.013233] the existing dependency chain (in reverse order) is:
[  990.013233]
[  990.013233] -> #1 (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}:
[  990.013233]        [<ffffffff81063afc>] __lock_acquire+0xf9c/0x1560
[  990.013233]        [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]        [<ffffffff814ac194>] down_write+0x44/0x80
[  990.013233]        [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]        [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]        [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]        [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]        [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]        [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]        [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]        [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]        [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b
[  990.013233]
[  990.013233] -> #0 (&REISERFS_SB(s)->lock){+.+.+.}:
[  990.013233]        [<ffffffff81063e30>] __lock_acquire+0x12d0/0x1560
[  990.013233]        [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]        [<ffffffff814aba77>] __mutex_lock_common+0x47/0x3b0
[  990.013233]        [<ffffffff814abebe>] mutex_lock_nested+0x3e/0x50
[  990.013233]        [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]        [<ffffffff811340e5>] reiserfs_prepare_write+0x45/0x180
[  990.013233]        [<ffffffff81158bb6>] reiserfs_xattr_set_handle+0x2a6/0x470
[  990.013233]        [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]        [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]        [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]        [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]        [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]        [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]        [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]        [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b
[  990.013233]
[  990.013233] other info that might help us debug this:
[  990.013233]
[  990.013233] 2 locks held by dbench/1891:
[  990.013233]  #0:  (&sb->s_type->i_mutex_key#12){+.+.+.}, at: [<ffffffff810e2678>] vfs_setxattr+0x78/0xc0
[  990.013233]  #1:  (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]
[  990.013233] stack backtrace:
[  990.013233] Pid: 1891, comm: dbench Not tainted 2.6.33-rc1 #1
[  990.013233] Call Trace:
[  990.013233]  [<ffffffff81061639>] print_circular_bug+0xe9/0xf0
[  990.013233]  [<ffffffff81063e30>] __lock_acquire+0x12d0/0x1560
[  990.013233]  [<ffffffff8115899a>] ? reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]  [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff8115899a>] ? reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]  [<ffffffff814aba77>] __mutex_lock_common+0x47/0x3b0
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff81062592>] ? mark_held_locks+0x72/0xa0
[  990.013233]  [<ffffffff814ab81d>] ? __mutex_unlock_slowpath+0xbd/0x140
[  990.013233]  [<ffffffff810628ad>] ? trace_hardirqs_on_caller+0x14d/0x1a0
[  990.013233]  [<ffffffff814abebe>] mutex_lock_nested+0x3e/0x50
[  990.013233]  [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff811340e5>] reiserfs_prepare_write+0x45/0x180
[  990.013233]  [<ffffffff81158bb6>] reiserfs_xattr_set_handle+0x2a6/0x470
[  990.013233]  [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]  [<ffffffff814abcb4>] ? __mutex_lock_common+0x284/0x3b0
[  990.013233]  [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]  [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]  [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]  [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]  [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]  [<ffffffff81056018>] ? sched_clock_cpu+0xb8/0x100
[  990.013233]  [<ffffffff8105eded>] ? trace_hardirqs_off+0xd/0x10
[  990.013233]  [<ffffffff810560a3>] ? cpu_clock+0x43/0x50
[  990.013233]  [<ffffffff810c6820>] ? fget+0xb0/0x110
[  990.013233]  [<ffffffff810c6770>] ? fget+0x0/0x110
[  990.013233]  [<ffffffff81002ddc>] ? sysret_check+0x27/0x62
[  990.013233]  [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]  [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b

Reported-and-tested-by: Christian Kujau <lists@nerdbynature.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 fs/reiserfs/xattr.c         | 2 +-
 include/linux/reiserfs_fs.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8891cd88a3f4..a0e2e7acdc75 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -484,7 +484,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-	down_write(&REISERFS_I(inode)->i_xattr_sem);
+	reiserfs_down_read_safe(&REISERFS_I(inode)->i_xattr_sem, inode->i_sb);
 
 	xahash = xattr_hash(buffer, buffer_size);
 	while (buffer_pos < buffer_size || buffer_pos == 0) {
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 4351b49e2b1e..35d3f459b0ac 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -106,6 +106,14 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 	reiserfs_write_lock(s);
 }
 
+static inline void
+reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
+{
+	reiserfs_write_unlock(s);
+	down_read(sem);
+	reiserfs_write_lock(s);
+}
+
 /*
  * When we schedule, we usually want to also release the write lock,
  * according to the previous bkl based locking scheme of reiserfs.
-- 
cgit v1.2.3


From c4a62ca362258d98f42efb282cfbf9b61caffdbe Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 30 Dec 2009 03:20:19 +0100
Subject: reiserfs: Warn on lock relax if taken recursively

When we relax the reiserfs lock to avoid creating unwanted
dependencies against others locks while grabbing these,
we want to ensure it has not been taken recursively, otherwise
the lock won't be really relaxed. Only its depth will be decreased.
The unwanted dependency would then actually happen.

To prevent from that, add a reiserfs_lock_check_recursive() call
in the places that need it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 fs/reiserfs/lock.c          | 9 +++++++++
 include/linux/reiserfs_fs.h | 9 +++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index ee2cfc0fd8a7..b87aa2c1afc1 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
 		reiserfs_panic(sb, "%s called without kernel lock held %d",
 			       caller);
 }
+
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *sb)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
+
+	WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
+}
+#endif
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 35d3f459b0ac..793bf8351ab8 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -62,6 +62,12 @@ void reiserfs_write_unlock(struct super_block *s);
 int reiserfs_write_lock_once(struct super_block *s);
 void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *s);
+#else
+static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
+#endif
+
 /*
  * Several mutexes depend on the write lock.
  * However sometimes we want to relax the write lock while we hold
@@ -92,6 +98,7 @@ void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 static inline void reiserfs_mutex_lock_safe(struct mutex *m,
 			       struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	mutex_lock(m);
 	reiserfs_write_lock(s);
@@ -101,6 +108,7 @@ static inline void
 reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 			       struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	mutex_lock_nested(m, subclass);
 	reiserfs_write_lock(s);
@@ -109,6 +117,7 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 static inline void
 reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	down_read(sem);
 	reiserfs_write_lock(s);
-- 
cgit v1.2.3


From 96d07d211739fd2450ac54e81d00fa40fcd4b1bd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 20 Nov 2009 14:16:33 +0100
Subject: resource: move kernel function inside __KERNEL__

It is an internal function. Move it inside __KERNEL__ ifdef, along
with task_struct declaration.

Then we get:
--- /usr/include/linux/resource.h       2009-09-14 15:09:29.000000000 +0200
+++ usr/include/linux/resource.h       2010-01-04 11:30:54.000000000 +0100
@@ -3,8 +3,6 @@

 #include <linux/time.h>

-struct task_struct;
-
 /*
  * Resource control/accounting header file for linux
  */
@@ -70,6 +68,5 @@
  */
 #include <asm/resource.h>

-int getrusage(struct task_struct *p, int who, struct rusage *ru);

 #endif

***********

include/linux/Kbuild is untouched, since unifdef is run even on
headers-y nowadays.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 include/linux/resource.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/resource.h b/include/linux/resource.h
index 40fc7e626082..f1e914eefeab 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -3,8 +3,6 @@
 
 #include <linux/time.h>
 
-struct task_struct;
-
 /*
  * Resource control/accounting header file for linux
  */
@@ -70,6 +68,12 @@ struct rlimit {
  */
 #include <asm/resource.h>
 
+#ifdef __KERNEL__
+
+struct task_struct;
+
 int getrusage(struct task_struct *p, int who, struct rusage __user *ru);
 
+#endif /* __KERNEL__ */
+
 #endif
-- 
cgit v1.2.3


From 3e10e716abf3c71bdb5d86b8f507f9e72236c9cd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 19 Nov 2009 17:16:37 +0100
Subject: resource: add helpers for fetching rlimits

We want to be sure that compiler fetches the limit variable only
once, so add helpers for fetching current and maximal resource
limits which do that.

Add them to sched.h (instead of resource.h) due to circular dependency
 sched.h->resource.h->task_struct
Alternative would be to create a separate res_access.h or similar.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f2f842db03ce..8d4991be9d53 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2601,6 +2601,28 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 }
 #endif /* CONFIG_MM_OWNER */
 
+static inline unsigned long task_rlimit(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
+}
+
+static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max);
+}
+
+static inline unsigned long rlimit(unsigned int limit)
+{
+	return task_rlimit(current, limit);
+}
+
+static inline unsigned long rlimit_max(unsigned int limit)
+{
+	return task_rlimit_max(current, limit);
+}
+
 #endif /* __KERNEL__ */
 
 #endif
-- 
cgit v1.2.3


From 1ae861e652b5457e7fa98ccbc55abea1e207916e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 31 Dec 2009 12:15:54 +0100
Subject: PCI/PM: Use per-device D3 delays

It turns out that some PCI devices require extra delays when changing
power state from D3 to D0 (and the other way around).  Although this
is against the PCI specification, we can handle it quite easily by
allowing drivers to define arbitrary D3 delays for devices known to
require extra time for switching power states.

Introduce additional field d3_delay in struct pci_dev and use it to
store the value of the device's D0->D3 delay, in miliseconds.  Make
the PCI PM core code use the per-device d3_delay unless
pci_pm_d3_delay is greater (in which case the latter is used).
[This also allows the driver to specify d3_delay shorter than the
 10 ms required by the PCI standard if the device is known to be able
 to handle that.]

Make the sky2 driver set d3_delay to 150 for devices handled by it.

Fixes http://bugzilla.kernel.org/show_bug.cgi?id=14730 which is a
listed regression from 2.6.30.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/net/sky2.c  |  1 +
 drivers/pci/pci.c   | 19 +++++++++++++++----
 include/linux/pci.h |  1 +
 3 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 1c01b96c9611..2d28d58200d0 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -4684,6 +4684,7 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
 	INIT_WORK(&hw->restart_work, sky2_restart);
 
 	pci_set_drvdata(pdev, hw);
+	pdev->d3_delay = 150;
 
 	return 0;
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0906599ebfde..315fea47e784 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -29,7 +29,17 @@ const char *pci_power_names[] = {
 };
 EXPORT_SYMBOL_GPL(pci_power_names);
 
-unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT;
+unsigned int pci_pm_d3_delay;
+
+static void pci_dev_d3_sleep(struct pci_dev *dev)
+{
+	unsigned int delay = dev->d3_delay;
+
+	if (delay < pci_pm_d3_delay)
+		delay = pci_pm_d3_delay;
+
+	msleep(delay);
+}
 
 #ifdef CONFIG_PCI_DOMAINS
 int pci_domains_supported = 1;
@@ -522,7 +532,7 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
 	/* Mandatory power management transition delays */
 	/* see PCI PM 1.1 5.6.1 table 18 */
 	if (state == PCI_D3hot || dev->current_state == PCI_D3hot)
-		msleep(pci_pm_d3_delay);
+		pci_dev_d3_sleep(dev);
 	else if (state == PCI_D2 || dev->current_state == PCI_D2)
 		udelay(PCI_PM_D2_DELAY);
 
@@ -1409,6 +1419,7 @@ void pci_pm_init(struct pci_dev *dev)
 	}
 
 	dev->pm_cap = pm;
+	dev->d3_delay = PCI_PM_D3_WAIT;
 
 	dev->d1_support = false;
 	dev->d2_support = false;
@@ -2247,12 +2258,12 @@ static int pci_pm_reset(struct pci_dev *dev, int probe)
 	csr &= ~PCI_PM_CTRL_STATE_MASK;
 	csr |= PCI_D3hot;
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
-	msleep(pci_pm_d3_delay);
+	pci_dev_d3_sleep(dev);
 
 	csr &= ~PCI_PM_CTRL_STATE_MASK;
 	csr |= PCI_D0;
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
-	msleep(pci_pm_d3_delay);
+	pci_dev_d3_sleep(dev);
 
 	return 0;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5da0690d9cee..174e5392e51e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -243,6 +243,7 @@ struct pci_dev {
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
 	unsigned int	no_d1d2:1;	/* Only allow D0 and D3 */
 	unsigned int	wakeup_prepared:1;
+	unsigned int	d3_delay;	/* D3->D0 transition time in ms */
 
 #ifdef CONFIG_PCIEASPM
 	struct pcie_link_state	*link_state;	/* ASPM link state. */
-- 
cgit v1.2.3


From cfe79c00a2f4f687eed8b7534d1d3d3d35540c29 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier.adi@gmail.com>
Date: Wed, 6 Jan 2010 17:23:23 +0000
Subject: NOMMU: Avoiding duplicate icache flushes of shared maps

When working with FDPIC, there are many shared mappings of read-only
code regions between applications (the C library, applet packages like
busybox, etc.), but the current do_mmap_pgoff() function will issue an
icache flush whenever a VMA is added to an MM instead of only doing it
when the map is initially created.

The flush can instead be done when a region is first mmapped PROT_EXEC.
Note that we may not rely on the first mapping of a region being
executable - it's possible for it to be PROT_READ only, so we have to
remember whether we've flushed the region or not, and then flush the
entire region when a bit of it is made executable.

However, this also affects the brk area.  That will no longer be
executable.  We can mprotect() it to PROT_EXEC on MPU-mode kernels, but
for NOMMU mode kernels, when it increases the brk allocation, making
sys_brk() flush the extra from the icache should suffice.  The brk area
probably isn't used by NOMMU programs since the brk area can only use up
the leavings from the stack allocation, where the stack allocation is
larger than requested.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 ++
 mm/nommu.c               | 11 ++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 84a524afb3dc..84d020bed083 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -123,6 +123,8 @@ struct vm_region {
 	struct file	*vm_file;	/* the backing file or NULL */
 
 	atomic_t	vm_usage;	/* region usage count */
+	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
+						* this region */
 };
 
 /*
diff --git a/mm/nommu.c b/mm/nommu.c
index 6f9248f89bde..a8d17521624a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -432,6 +432,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	/*
 	 * Ok, looks good - let it rip.
 	 */
+	flush_icache_range(mm->brk, brk);
 	return mm->brk = brk;
 }
 
@@ -1353,10 +1354,14 @@ unsigned long do_mmap_pgoff(struct file *file,
 share:
 	add_vma_to_mm(current->mm, vma);
 
-	up_write(&nommu_region_sem);
+	/* we flush the region from the icache only when the first executable
+	 * mapping of it is made  */
+	if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) {
+		flush_icache_range(region->vm_start, region->vm_end);
+		region->vm_icache_flushed = true;
+	}
 
-	if (prot & PROT_EXEC)
-		flush_icache_range(result, result + len);
+	up_write(&nommu_region_sem);
 
 	kleave(" = %lx", result);
 	return result;
-- 
cgit v1.2.3


From 6144a85a0e018c19bc4b24f7eb6c1f3f7431813d Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 7 Jan 2010 11:58:36 -0600
Subject: maccess,probe_kernel: Allow arch specific override
 probe_kernel_(read|write)

Some archs such as blackfin, would like to have an arch specific
probe_kernel_read() and probe_kernel_write() implementation which can
fall back to the generic implementation if no special operations are
needed.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 include/linux/uaccess.h |  4 +++-
 mm/maccess.c            | 11 +++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 6b58367d145e..d512d98dfb7d 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -94,6 +94,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
  * happens, handle that and return -EFAULT.
  */
 extern long probe_kernel_read(void *dst, void *src, size_t size);
+extern long __probe_kernel_read(void *dst, void *src, size_t size);
 
 /*
  * probe_kernel_write(): safely attempt to write to a location
@@ -104,6 +105,7 @@ extern long probe_kernel_read(void *dst, void *src, size_t size);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-extern long probe_kernel_write(void *dst, void *src, size_t size);
+extern long notrace probe_kernel_write(void *dst, void *src, size_t size);
+extern long notrace __probe_kernel_write(void *dst, void *src, size_t size);
 
 #endif		/* __LINUX_UACCESS_H__ */
diff --git a/mm/maccess.c b/mm/maccess.c
index 9073695ff25f..4e348dbaecd7 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -14,7 +14,11 @@
  * Safely read from address @src to the buffer at @dst.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long probe_kernel_read(void *dst, void *src, size_t size)
+
+long __weak probe_kernel_read(void *dst, void *src, size_t size)
+    __attribute__((alias("__probe_kernel_read")));
+
+long __probe_kernel_read(void *dst, void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
@@ -39,7 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long notrace __weak probe_kernel_write(void *dst, void *src, size_t size)
+long __weak probe_kernel_write(void *dst, void *src, size_t size)
+    __attribute__((alias("__probe_kernel_write")));
+
+long __probe_kernel_write(void *dst, void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
-- 
cgit v1.2.3


From b11e1eca7ed9c0b5dab21a62c11acc711d9bdda0 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 7 Jan 2010 11:58:37 -0600
Subject: kgdb: Fix kernel-doc format error in kgdb.h

linux-next-20081022//include/linux/kgdb.h:308): duplicate section name 'Description'

and fix typos in that file's kernel-doc comments.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kgdb.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 6adcc297e354..19ec41a183f5 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -29,8 +29,7 @@ struct pt_regs;
  *
  *	On some architectures it is required to skip a breakpoint
  *	exception when it occurs after a breakpoint has been removed.
- *	This can be implemented in the architecture specific portion of
- *	for kgdb.
+ *	This can be implemented in the architecture specific portion of kgdb.
  */
 extern int kgdb_skipexception(int exception, struct pt_regs *regs);
 
@@ -65,7 +64,7 @@ struct uart_port;
 /**
  *	kgdb_breakpoint - compiled in breakpoint
  *
- *	This will be impelmented a static inline per architecture.  This
+ *	This will be implemented as a static inline per architecture.  This
  *	function is called by the kgdb core to execute an architecture
  *	specific trap to cause kgdb to enter the exception processing.
  *
@@ -190,7 +189,7 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code,
  *	@flags: Current IRQ state
  *
  *	On SMP systems, we need to get the attention of the other CPUs
- *	and get them be in a known state.  This should do what is needed
+ *	and get them into a known state.  This should do what is needed
  *	to get the other CPUs to call kgdb_wait(). Note that on some arches,
  *	the NMI approach is not used for rounding up all the CPUs. For example,
  *	in case of MIPS, smp_call_function() is used to roundup CPUs. In
-- 
cgit v1.2.3


From 4b529401c5089cf33f7165607cbc2fde43357bfb Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <andreas.fenkart@streamunlimited.com>
Date: Fri, 8 Jan 2010 14:42:31 -0800
Subject: mm: make totalhigh_pages unsigned long

Makes it consistent with the extern declaration, used when CONFIG_HIGHMEM
is set Removes redundant casts in printout messages

Signed-off-by: Andreas Fenkart <andreas.fenkart@streamunlimited.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Chen Liqin <liqin.chen@sunplusct.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/init.c               | 2 +-
 arch/mips/mm/init.c              | 2 +-
 arch/mips/sgi-ip27/ip27-memory.c | 2 +-
 arch/mn10300/mm/init.c           | 3 +--
 arch/score/mm/init.c             | 2 +-
 arch/x86/mm/init_32.c            | 3 +--
 include/linux/highmem.h          | 2 +-
 7 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 52c40d155672..a04ffbbbe253 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -616,7 +616,7 @@ void __init mem_init(void)
 		"%dK data, %dK init, %luK highmem)\n",
 		nr_free_pages() << (PAGE_SHIFT-10), codesize >> 10,
 		datasize >> 10, initsize >> 10,
-		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+		totalhigh_pages << (PAGE_SHIFT-10));
 
 	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
 		extern int sysctl_overcommit_memory;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 9e8d00389eef..1651942f7feb 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -424,7 +424,7 @@ void __init mem_init(void)
 	       reservedpages << (PAGE_SHIFT-10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+	       totalhigh_pages << (PAGE_SHIFT-10));
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index f61c164d1e67..bc1297109cc5 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -505,5 +505,5 @@ void __init mem_init(void)
 	       (num_physpages - tmp) << (PAGE_SHIFT-10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+	       totalhigh_pages << (PAGE_SHIFT-10));
 }
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index ec1420562dc7..dd27a9a35152 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -118,8 +118,7 @@ void __init mem_init(void)
 	       reservedpages << (PAGE_SHIFT - 10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT - 10))
-	       );
+	       totalhigh_pages << (PAGE_SHIFT - 10));
 }
 
 /*
diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c
index 8c15b2c85d5a..dfaf458d6702 100644
--- a/arch/score/mm/init.c
+++ b/arch/score/mm/init.c
@@ -106,7 +106,7 @@ void __init mem_init(void)
 			ram << (PAGE_SHIFT-10), codesize >> 10,
 			reservedpages << (PAGE_SHIFT-10), datasize >> 10,
 			initsize >> 10,
-			(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+			totalhigh_pages << (PAGE_SHIFT-10));
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c973f8e2a6cf..9a0c258a86be 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -892,8 +892,7 @@ void __init mem_init(void)
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
 		initsize >> 10,
-		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
-	       );
+		totalhigh_pages << (PAGE_SHIFT-10));
 
 	printk(KERN_INFO "virtual kernel memory layout:\n"
 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 211ff4497269..ab2cc20e21a5 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -46,7 +46,7 @@ void kmap_flush_unused(void);
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
 
-#define totalhigh_pages 0
+#define totalhigh_pages 0UL
 
 #ifndef ARCH_HAS_KMAP
 static inline void *kmap(struct page *page)
-- 
cgit v1.2.3


From e992cd9b72a18122bd5c958715623057f110793f Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 8 Jan 2010 14:42:35 -0800
Subject: kmemcheck: make bitfield annotations truly no-ops when disabled

It turns out that even zero-sized struct members (int foo[0];) will affect
the struct layout, causing us in particular to lose 4 bytes in struct
sock.

This patch fixes the regression in CONFIG_KMEMCHECK=n case.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmemcheck.h | 110 ++++++++++++++++++++++++----------------------
 1 file changed, 58 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index e880d4cf9e22..08d7dc4ddf40 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -36,6 +36,56 @@ int kmemcheck_hide_addr(unsigned long address);
 
 bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size);
 
+/*
+ * Bitfield annotations
+ *
+ * How to use: If you have a struct using bitfields, for example
+ *
+ *     struct a {
+ *             int x:8, y:8;
+ *     };
+ *
+ * then this should be rewritten as
+ *
+ *     struct a {
+ *             kmemcheck_bitfield_begin(flags);
+ *             int x:8, y:8;
+ *             kmemcheck_bitfield_end(flags);
+ *     };
+ *
+ * Now the "flags_begin" and "flags_end" members may be used to refer to the
+ * beginning and end, respectively, of the bitfield (and things like
+ * &x.flags_begin is allowed). As soon as the struct is allocated, the bit-
+ * fields should be annotated:
+ *
+ *     struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
+ *     kmemcheck_annotate_bitfield(a, flags);
+ */
+#define kmemcheck_bitfield_begin(name)	\
+	int name##_begin[0];
+
+#define kmemcheck_bitfield_end(name)	\
+	int name##_end[0];
+
+#define kmemcheck_annotate_bitfield(ptr, name)				\
+	do {								\
+		int _n;							\
+									\
+		if (!ptr)						\
+			break;						\
+									\
+		_n = (long) &((ptr)->name##_end)			\
+			- (long) &((ptr)->name##_begin);		\
+		MAYBE_BUILD_BUG_ON(_n < 0);				\
+									\
+		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
+	} while (0)
+
+#define kmemcheck_annotate_variable(var)				\
+	do {								\
+		kmemcheck_mark_initialized(&(var), sizeof(var));	\
+	} while (0)							\
+
 #else
 #define kmemcheck_enabled 0
 
@@ -106,60 +156,16 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
 	return true;
 }
 
-#endif /* CONFIG_KMEMCHECK */
-
-/*
- * Bitfield annotations
- *
- * How to use: If you have a struct using bitfields, for example
- *
- *     struct a {
- *             int x:8, y:8;
- *     };
- *
- * then this should be rewritten as
- *
- *     struct a {
- *             kmemcheck_bitfield_begin(flags);
- *             int x:8, y:8;
- *             kmemcheck_bitfield_end(flags);
- *     };
- *
- * Now the "flags_begin" and "flags_end" members may be used to refer to the
- * beginning and end, respectively, of the bitfield (and things like
- * &x.flags_begin is allowed). As soon as the struct is allocated, the bit-
- * fields should be annotated:
- *
- *     struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
- *     kmemcheck_annotate_bitfield(a, flags);
- *
- * Note: We provide the same definitions for both kmemcheck and non-
- * kmemcheck kernels. This makes it harder to introduce accidental errors. It
- * is also allowed to pass NULL pointers to kmemcheck_annotate_bitfield().
- */
-#define kmemcheck_bitfield_begin(name)	\
-	int name##_begin[0];
-
-#define kmemcheck_bitfield_end(name)	\
-	int name##_end[0];
+#define kmemcheck_bitfield_begin(name)
+#define kmemcheck_bitfield_end(name)
+#define kmemcheck_annotate_bitfield(ptr, name)	\
+	do {					\
+	} while (0)
 
-#define kmemcheck_annotate_bitfield(ptr, name)				\
-	do {								\
-		int _n;							\
-									\
-		if (!ptr)						\
-			break;						\
-									\
-		_n = (long) &((ptr)->name##_end)			\
-			- (long) &((ptr)->name##_begin);		\
-		MAYBE_BUILD_BUG_ON(_n < 0);				\
-									\
-		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
+#define kmemcheck_annotate_variable(var)	\
+	do {					\
 	} while (0)
 
-#define kmemcheck_annotate_variable(var)				\
-	do {								\
-		kmemcheck_mark_initialized(&(var), sizeof(var));	\
-	} while (0)							\
+#endif /* CONFIG_KMEMCHECK */
 
 #endif /* LINUX_KMEMCHECK_H */
-- 
cgit v1.2.3


From 7dd65feb6c603e13eba501c34c662259ab38e70e Mon Sep 17 00:00:00 2001
From: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Date: Fri, 8 Jan 2010 14:42:42 -0800
Subject: lib: add support for LZO-compressed kernels

This patch series adds generic support for creating and extracting
LZO-compressed kernel images, as well as support for using such images on
the x86 and ARM architectures, and support for creating and using
LZO-compressed initrd and initramfs images.

Russell King said:

: Testing on a Cortex A9 model:
: - lzo decompressor is 65% of the time gzip takes to decompress a kernel
: - lzo kernel is 9% larger than a gzip kernel
:
: which I'm happy to say confirms your figures when comparing the two.
:
: However, when comparing your new gzip code to the old gzip code:
: - new is 99% of the size of the old code
: - new takes 42% of the time to decompress than the old code
:
: What this means is that for a proper comparison, the results get even better:
: - lzo is 7.5% larger than the old gzip'd kernel image
: - lzo takes 28% of the time that the old gzip code took
:
: So the expense seems definitely worth the effort.  The only reason I
: can think of ever using gzip would be if you needed the additional
: compression (eg, because you have limited flash to store the image.)
:
: I would argue that the default for ARM should therefore be LZO.

This patch:

The lzo compressor is worse than gzip at compression, but faster at
extraction.  Here are some figures for an ARM board I'm working on:

Uncompressed size: 3.24Mo
gzip  1.61Mo 0.72s
lzo   1.75Mo 0.48s

So for a compression ratio that is still relatively close to gzip, it's
much faster to extract, at least in that case.

This part contains:
 - Makefile routine to support lzo compression
 - Fixes to the existing lzo compressor so that it can be used in
   compressed kernels
 - wrapper around the existing lzo1x_decompress, as it only extracts one
   block at a time, while we need to extract a whole file here
 - config dialog for kernel compression

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: cleanup]
Signed-off-by: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Tested-by: Wu Zhangjin <wuzhangjin@gmail.com>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Russell King <rmk@arm.linux.org.uk>
Acked-by: Russell King <rmk@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/decompress/unlzo.h |  10 ++
 init/Kconfig                     |  18 +++-
 lib/decompress_unlzo.c           | 209 +++++++++++++++++++++++++++++++++++++++
 lib/lzo/lzo1x_decompress.c       |   9 +-
 scripts/Makefile.lib             |   5 +
 5 files changed, 244 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/decompress/unlzo.h
 create mode 100644 lib/decompress_unlzo.c

(limited to 'include/linux')

diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h
new file mode 100644
index 000000000000..987229752519
--- /dev/null
+++ b/include/linux/decompress/unlzo.h
@@ -0,0 +1,10 @@
+#ifndef DECOMPRESS_UNLZO_H
+#define DECOMPRESS_UNLZO_H
+
+int unlzo(unsigned char *inbuf, int len,
+	int(*fill)(void*, unsigned int),
+	int(*flush)(void*, unsigned int),
+	unsigned char *output,
+	int *pos,
+	void(*error)(char *x));
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index a23da9f01803..d95ca7cd5d45 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -115,10 +115,13 @@ config HAVE_KERNEL_BZIP2
 config HAVE_KERNEL_LZMA
 	bool
 
+config HAVE_KERNEL_LZO
+	bool
+
 choice
 	prompt "Kernel compression mode"
 	default KERNEL_GZIP
-	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA
+	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_LZO
 	help
 	  The linux kernel is a kind of self-extracting executable.
 	  Several compression algorithms are available, which differ
@@ -141,9 +144,8 @@ config KERNEL_GZIP
 	bool "Gzip"
 	depends on HAVE_KERNEL_GZIP
 	help
-	  The old and tried gzip compression. Its compression ratio is
-	  the poorest among the 3 choices; however its speed (both
-	  compression and decompression) is the fastest.
+	  The old and tried gzip compression. It provides a good balance
+	  between compression ratio and decompression speed.
 
 config KERNEL_BZIP2
 	bool "Bzip2"
@@ -164,6 +166,14 @@ config KERNEL_LZMA
 	  two. Compression is slowest.	The kernel size is about 33%
 	  smaller with LZMA in comparison to gzip.
 
+config KERNEL_LZO
+	bool "LZO"
+	depends on HAVE_KERNEL_LZO
+	help
+	  Its compression ratio is the poorest among the 4. The kernel
+	  size is about about 10% bigger than gzip; however its speed
+	  (both compression and decompression) is the fastest.
+
 endchoice
 
 config SWAP
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
new file mode 100644
index 000000000000..db521f45626e
--- /dev/null
+++ b/lib/decompress_unlzo.c
@@ -0,0 +1,209 @@
+/*
+ * LZO decompressor for the Linux kernel. Code borrowed from the lzo
+ * implementation by Markus Franz Xaver Johannes Oberhumer.
+ *
+ * Linux kernel adaptation:
+ * Copyright (C) 2009
+ * Albin Tonnerre, Free Electrons <albin.tonnerre@free-electrons.com>
+ *
+ * Original code:
+ * Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer
+ * All Rights Reserved.
+ *
+ * lzop and the LZO library are free software; you can redistribute them
+ * and/or modify them under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.
+ * If not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Markus F.X.J. Oberhumer
+ * <markus@oberhumer.com>
+ * http://www.oberhumer.com/opensource/lzop/
+ */
+
+#ifdef STATIC
+#include "lzo/lzo1x_decompress.c"
+#else
+#include <linux/slab.h>
+#include <linux/decompress/unlzo.h>
+#endif
+
+#include <linux/types.h>
+#include <linux/lzo.h>
+#include <linux/decompress/mm.h>
+
+#include <linux/compiler.h>
+#include <asm/unaligned.h>
+
+static const unsigned char lzop_magic[] = {
+	0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a };
+
+#define LZO_BLOCK_SIZE        (256*1024l)
+#define HEADER_HAS_FILTER      0x00000800L
+
+STATIC inline int INIT parse_header(u8 *input, u8 *skip)
+{
+	int l;
+	u8 *parse = input;
+	u8 level = 0;
+	u16 version;
+
+	/* read magic: 9 first bits */
+	for (l = 0; l < 9; l++) {
+		if (*parse++ != lzop_magic[l])
+			return 0;
+	}
+	/* get version (2bytes), skip library version (2),
+	 * 'need to be extracted' version (2) and
+	 * method (1) */
+	version = get_unaligned_be16(parse);
+	parse += 7;
+	if (version >= 0x0940)
+		level = *parse++;
+	if (get_unaligned_be32(parse) & HEADER_HAS_FILTER)
+		parse += 8; /* flags + filter info */
+	else
+		parse += 4; /* flags */
+
+	/* skip mode and mtime_low */
+	parse += 8;
+	if (version >= 0x0940)
+		parse += 4;	/* skip mtime_high */
+
+	l = *parse++;
+	/* don't care about the file name, and skip checksum */
+	parse += l + 4;
+
+	*skip = parse - input;
+	return 1;
+}
+
+STATIC inline int INIT unlzo(u8 *input, int in_len,
+				int (*fill) (void *, unsigned int),
+				int (*flush) (void *, unsigned int),
+				u8 *output, int *posp,
+				void (*error_fn) (char *x))
+{
+	u8 skip = 0, r = 0;
+	u32 src_len, dst_len;
+	size_t tmp;
+	u8 *in_buf, *in_buf_save, *out_buf;
+	int obytes_processed = 0;
+
+	set_error_fn(error_fn);
+
+	if (output) {
+		out_buf = output;
+	} else if (!flush) {
+		error("NULL output pointer and no flush function provided");
+		goto exit;
+	} else {
+		out_buf = malloc(LZO_BLOCK_SIZE);
+		if (!out_buf) {
+			error("Could not allocate output buffer");
+			goto exit;
+		}
+	}
+
+	if (input && fill) {
+		error("Both input pointer and fill function provided, don't know what to do");
+		goto exit_1;
+	} else if (input) {
+		in_buf = input;
+	} else if (!fill || !posp) {
+		error("NULL input pointer and missing position pointer or fill function");
+		goto exit_1;
+	} else {
+		in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE));
+		if (!in_buf) {
+			error("Could not allocate input buffer");
+			goto exit_1;
+		}
+	}
+	in_buf_save = in_buf;
+
+	if (posp)
+		*posp = 0;
+
+	if (fill)
+		fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+
+	if (!parse_header(input, &skip)) {
+		error("invalid header");
+		goto exit_2;
+	}
+	in_buf += skip;
+
+	if (posp)
+		*posp = skip;
+
+	for (;;) {
+		/* read uncompressed block size */
+		dst_len = get_unaligned_be32(in_buf);
+		in_buf += 4;
+
+		/* exit if last block */
+		if (dst_len == 0) {
+			if (posp)
+				*posp += 4;
+			break;
+		}
+
+		if (dst_len > LZO_BLOCK_SIZE) {
+			error("dest len longer than block size");
+			goto exit_2;
+		}
+
+		/* read compressed block size, and skip block checksum info */
+		src_len = get_unaligned_be32(in_buf);
+		in_buf += 8;
+
+		if (src_len <= 0 || src_len > dst_len) {
+			error("file corrupted");
+			goto exit_2;
+		}
+
+		/* decompress */
+		tmp = dst_len;
+		r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
+						out_buf, &tmp);
+
+		if (r != LZO_E_OK || dst_len != tmp) {
+			error("Compressed data violation");
+			goto exit_2;
+		}
+
+		obytes_processed += dst_len;
+		if (flush)
+			flush(out_buf, dst_len);
+		if (output)
+			out_buf += dst_len;
+		if (posp)
+			*posp += src_len + 12;
+		if (fill) {
+			in_buf = in_buf_save;
+			fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+		} else
+			in_buf += src_len;
+	}
+
+exit_2:
+	if (!input)
+		free(in_buf);
+exit_1:
+	if (!output)
+		free(out_buf);
+exit:
+	return obytes_processed;
+}
+
+#define decompress unlzo
diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
index 5dc6b29c1575..f2fd09850223 100644
--- a/lib/lzo/lzo1x_decompress.c
+++ b/lib/lzo/lzo1x_decompress.c
@@ -11,11 +11,13 @@
  *  Richard Purdie <rpurdie@openedhand.com>
  */
 
+#ifndef STATIC
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lzo.h>
-#include <asm/byteorder.h>
+#endif
+
 #include <asm/unaligned.h>
+#include <linux/lzo.h>
 #include "lzodefs.h"
 
 #define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
@@ -244,9 +246,10 @@ lookbehind_overrun:
 	*out_len = op - out;
 	return LZO_E_LOOKBEHIND_OVERRUN;
 }
-
+#ifndef STATIC
 EXPORT_SYMBOL_GPL(lzo1x_decompress_safe);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LZO1X Decompressor");
 
+#endif
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index cd815ac2a50b..0fe48cd91ffa 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -235,3 +235,8 @@ quiet_cmd_lzma = LZMA    $@
 cmd_lzma = (cat $(filter-out FORCE,$^) | \
 	lzma -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
 	(rm -f $@ ; false)
+
+quiet_cmd_lzo = LZO    $@
+cmd_lzo = (cat $(filter-out FORCE,$^) | \
+	lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+	(rm -f $@ ; false)
-- 
cgit v1.2.3


From 80884094e34456887ecdbd107d40e72c4a40f9c9 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Fri, 8 Jan 2010 14:43:08 -0800
Subject: gpio: adp5588-gpio: new driver for ADP5588 GPIO expanders

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |   9 ++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/adp5588-gpio.c | 266 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/adp5588.h |  12 ++
 4 files changed, 288 insertions(+)
 create mode 100644 drivers/gpio/adp5588-gpio.c

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index a019b49ecc9b..1f1d88ae68d6 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -172,6 +172,15 @@ config GPIO_ADP5520
 	  To compile this driver as a module, choose M here: the module will
 	  be called adp5520-gpio.
 
+config GPIO_ADP5588
+	tristate "ADP5588 I2C GPIO expander"
+	depends on I2C
+	help
+	  This option enables support for 18 GPIOs found
+	  on Analog Devices ADP5588 GPIO Expanders.
+	  To compile this driver as a module, choose M here: the module will be
+	  called adp5588-gpio.
+
 comment "PCI GPIO expanders:"
 
 config GPIO_CS5535
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 52fe4cf734c7..48687238edb1 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -5,6 +5,7 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_ADP5520)	+= adp5520-gpio.o
+obj-$(CONFIG_GPIO_ADP5588)	+= adp5588-gpio.o
 obj-$(CONFIG_GPIO_LANGWELL)	+= langwell_gpio.o
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
diff --git a/drivers/gpio/adp5588-gpio.c b/drivers/gpio/adp5588-gpio.c
new file mode 100644
index 000000000000..afc097a16b33
--- /dev/null
+++ b/drivers/gpio/adp5588-gpio.c
@@ -0,0 +1,266 @@
+/*
+ * GPIO Chip driver for Analog Devices
+ * ADP5588 I/O Expander and QWERTY Keypad Controller
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+
+#include <linux/i2c/adp5588.h>
+
+#define DRV_NAME		"adp5588-gpio"
+#define MAXGPIO			18
+#define ADP_BANK(offs)		((offs) >> 3)
+#define ADP_BIT(offs)		(1u << ((offs) & 0x7))
+
+struct adp5588_gpio {
+	struct i2c_client *client;
+	struct gpio_chip gpio_chip;
+	struct mutex lock;	/* protect cached dir, dat_out */
+	unsigned gpio_start;
+	uint8_t dat_out[3];
+	uint8_t dir[3];
+};
+
+static int adp5588_gpio_read(struct i2c_client *client, u8 reg)
+{
+	int ret = i2c_smbus_read_byte_data(client, reg);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Read Error\n");
+
+	return ret;
+}
+
+static int adp5588_gpio_write(struct i2c_client *client, u8 reg, u8 val)
+{
+	int ret = i2c_smbus_write_byte_data(client, reg, val);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Write Error\n");
+
+	return ret;
+}
+
+static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned off)
+{
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	return !!(adp5588_gpio_read(dev->client, GPIO_DAT_STAT1 + ADP_BANK(off))
+		  & ADP_BIT(off));
+}
+
+static void adp5588_gpio_set_value(struct gpio_chip *chip,
+				   unsigned off, int val)
+{
+	unsigned bank, bit;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+	bit = ADP_BIT(off);
+
+	mutex_lock(&dev->lock);
+	if (val)
+		dev->dat_out[bank] |= bit;
+	else
+		dev->dat_out[bank] &= ~bit;
+
+	adp5588_gpio_write(dev->client, GPIO_DAT_OUT1 + bank,
+			   dev->dat_out[bank]);
+	mutex_unlock(&dev->lock);
+}
+
+static int adp5588_gpio_direction_input(struct gpio_chip *chip, unsigned off)
+{
+	int ret;
+	unsigned bank;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+
+	mutex_lock(&dev->lock);
+	dev->dir[bank] &= ~ADP_BIT(off);
+	ret = adp5588_gpio_write(dev->client, GPIO_DIR1 + bank, dev->dir[bank]);
+	mutex_unlock(&dev->lock);
+
+	return ret;
+}
+
+static int adp5588_gpio_direction_output(struct gpio_chip *chip,
+					 unsigned off, int val)
+{
+	int ret;
+	unsigned bank, bit;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+	bit = ADP_BIT(off);
+
+	mutex_lock(&dev->lock);
+	dev->dir[bank] |= bit;
+
+	if (val)
+		dev->dat_out[bank] |= bit;
+	else
+		dev->dat_out[bank] &= ~bit;
+
+	ret = adp5588_gpio_write(dev->client, GPIO_DAT_OUT1 + bank,
+				 dev->dat_out[bank]);
+	ret |= adp5588_gpio_write(dev->client, GPIO_DIR1 + bank,
+				 dev->dir[bank]);
+	mutex_unlock(&dev->lock);
+
+	return ret;
+}
+
+static int __devinit adp5588_gpio_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct adp5588_gpio_platform_data *pdata = client->dev.platform_data;
+	struct adp5588_gpio *dev;
+	struct gpio_chip *gc;
+	int ret, i, revid;
+
+	if (pdata == NULL) {
+		dev_err(&client->dev, "missing platform data\n");
+		return -ENODEV;
+	}
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "SMBUS Byte Data not Supported\n");
+		return -EIO;
+	}
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (dev == NULL) {
+		dev_err(&client->dev, "failed to alloc memory\n");
+		return -ENOMEM;
+	}
+
+	dev->client = client;
+
+	gc = &dev->gpio_chip;
+	gc->direction_input = adp5588_gpio_direction_input;
+	gc->direction_output = adp5588_gpio_direction_output;
+	gc->get = adp5588_gpio_get_value;
+	gc->set = adp5588_gpio_set_value;
+	gc->can_sleep = 1;
+
+	gc->base = pdata->gpio_start;
+	gc->ngpio = MAXGPIO;
+	gc->label = client->name;
+	gc->owner = THIS_MODULE;
+
+	mutex_init(&dev->lock);
+
+
+	ret = adp5588_gpio_read(dev->client, DEV_ID);
+	if (ret < 0)
+		goto err;
+
+	revid = ret & ADP5588_DEVICE_ID_MASK;
+
+	for (i = 0, ret = 0; i <= ADP_BANK(MAXGPIO); i++) {
+		dev->dat_out[i] = adp5588_gpio_read(client, GPIO_DAT_OUT1 + i);
+		dev->dir[i] = adp5588_gpio_read(client, GPIO_DIR1 + i);
+		ret |= adp5588_gpio_write(client, KP_GPIO1 + i, 0);
+		ret |= adp5588_gpio_write(client, GPIO_PULL1 + i,
+				(pdata->pullup_dis_mask >> (8 * i)) & 0xFF);
+
+		if (ret)
+			goto err;
+	}
+
+	ret = gpiochip_add(&dev->gpio_chip);
+	if (ret)
+		goto err;
+
+	dev_info(&client->dev, "gpios %d..%d on a %s Rev. %d\n",
+			gc->base, gc->base + gc->ngpio - 1,
+			client->name, revid);
+
+	if (pdata->setup) {
+		ret = pdata->setup(client, gc->base, gc->ngpio, pdata->context);
+		if (ret < 0)
+			dev_warn(&client->dev, "setup failed, %d\n", ret);
+	}
+
+	i2c_set_clientdata(client, dev);
+	return 0;
+
+err:
+	kfree(dev);
+	return ret;
+}
+
+static int __devexit adp5588_gpio_remove(struct i2c_client *client)
+{
+	struct adp5588_gpio_platform_data *pdata = client->dev.platform_data;
+	struct adp5588_gpio *dev = i2c_get_clientdata(client);
+	int ret;
+
+	if (pdata->teardown) {
+		ret = pdata->teardown(client,
+				      dev->gpio_chip.base, dev->gpio_chip.ngpio,
+				      pdata->context);
+		if (ret < 0) {
+			dev_err(&client->dev, "teardown failed %d\n", ret);
+			return ret;
+		}
+	}
+
+	ret = gpiochip_remove(&dev->gpio_chip);
+	if (ret) {
+		dev_err(&client->dev, "gpiochip_remove failed %d\n", ret);
+		return ret;
+	}
+
+	kfree(dev);
+	return 0;
+}
+
+static const struct i2c_device_id adp5588_gpio_id[] = {
+	{DRV_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, adp5588_gpio_id);
+
+static struct i2c_driver adp5588_gpio_driver = {
+	.driver = {
+		   .name = DRV_NAME,
+		   },
+	.probe = adp5588_gpio_probe,
+	.remove = __devexit_p(adp5588_gpio_remove),
+	.id_table = adp5588_gpio_id,
+};
+
+static int __init adp5588_gpio_init(void)
+{
+	return i2c_add_driver(&adp5588_gpio_driver);
+}
+
+module_init(adp5588_gpio_init);
+
+static void __exit adp5588_gpio_exit(void)
+{
+	i2c_del_driver(&adp5588_gpio_driver);
+}
+
+module_exit(adp5588_gpio_exit);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("GPIO ADP5588 Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
index fc5db826b48e..02c9af374741 100644
--- a/include/linux/i2c/adp5588.h
+++ b/include/linux/i2c/adp5588.h
@@ -89,4 +89,16 @@ struct adp5588_kpad_platform_data {
 	unsigned short unlock_key2;	/* Unlock Key 2 */
 };
 
+struct adp5588_gpio_platform_data {
+	unsigned gpio_start;		/* GPIO Chip base # */
+	unsigned pullup_dis_mask;	/* Pull-Up Disable Mask */
+	int	(*setup)(struct i2c_client *client,
+				int gpio, unsigned ngpio,
+				void *context);
+	int	(*teardown)(struct i2c_client *client,
+				int gpio, unsigned ngpio,
+				void *context);
+	void	*context;
+};
+
 #endif
-- 
cgit v1.2.3


From a29815a333c6c6e677294bbe5958e771d0aad3fd Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 10 Jan 2010 16:28:09 +0200
Subject: core, x86: make LIST_POISON less deadly

The list macros use LIST_POISON1 and LIST_POISON2 as undereferencable
pointers in order to trap erronous use of freed list_heads.  Unfortunately
userspace can arrange for those pointers to actually be dereferencable,
potentially turning an oops to an expolit.

To avoid this allow architectures (currently x86_64 only) to override
the default values for these pointers with truly-undereferencable values.
This is easy on x86_64 as the virtual address space is large and contains
areas that cannot be mapped.

Other 64-bit architectures will likely find similar unmapped ranges.

[ingo: switch to 0xdead000000000000 as the unmapped area]
[ingo: add comments, cleanup]
[jaswinder: eliminate sparse warnings]

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig       |  5 +++++
 include/linux/poison.h | 16 ++++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6bf1f1ac478c..cbcbfdee3ee0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1247,6 +1247,11 @@ config ARCH_MEMORY_PROBE
 	def_bool X86_64
 	depends on MEMORY_HOTPLUG
 
+config ILLEGAL_POINTER_VALUE
+       hex
+       default 0 if X86_32
+       default 0xdead000000000000 if X86_64
+
 source "mm/Kconfig"
 
 config HIGHPTE
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 7fc194aef8c2..2110a81c5e2a 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -2,13 +2,25 @@
 #define _LINUX_POISON_H
 
 /********** include/linux/list.h **********/
+
+/*
+ * Architectures might want to move the poison pointer offset
+ * into some well-recognized area such as 0xdead000000000000,
+ * that is also not mappable by user-space exploits:
+ */
+#ifdef CONFIG_ILLEGAL_POINTER_VALUE
+# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
+#else
+# define POISON_POINTER_DELTA 0
+#endif
+
 /*
  * These are non-NULL pointers that will result in page faults
  * under normal circumstances, used to verify that nobody uses
  * non-initialized list entries.
  */
-#define LIST_POISON1  ((void *) 0x00100100)
-#define LIST_POISON2  ((void *) 0x00200200)
+#define LIST_POISON1  ((void *) 0x00100100 + POISON_POINTER_DELTA)
+#define LIST_POISON2  ((void *) 0x00200200 + POISON_POINTER_DELTA)
 
 /********** include/linux/timer.h **********/
 /*
-- 
cgit v1.2.3


From 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 11 Jan 2010 11:14:44 +0900
Subject: libata: retry link resume if necessary

Interestingly, when SIDPR is used in ata_piix, writes to DET in
SControl sometimes get ignored leading to detection failure.  Update
sata_link_resume() such that it reads back SControl after clearing DET
and retry if it's not clear.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: fengxiangjun <fengxiangjun@neusoft.com>
Reported-by: Jim Faulkner <jfaulkne@ccs.neu.edu>
Cc: stable@kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 38 +++++++++++++++++++++++++++++++-------
 include/linux/libata.h    |  3 +++
 2 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 22ff51bdbc8a..6728328f3bea 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3790,21 +3790,45 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
 int sata_link_resume(struct ata_link *link, const unsigned long *params,
 		     unsigned long deadline)
 {
+	int tries = ATA_LINK_RESUME_TRIES;
 	u32 scontrol, serror;
 	int rc;
 
 	if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
 		return rc;
 
-	scontrol = (scontrol & 0x0f0) | 0x300;
+	/*
+	 * Writes to SControl sometimes get ignored under certain
+	 * controllers (ata_piix SIDPR).  Make sure DET actually is
+	 * cleared.
+	 */
+	do {
+		scontrol = (scontrol & 0x0f0) | 0x300;
+		if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+			return rc;
+		/*
+		 * Some PHYs react badly if SStatus is pounded
+		 * immediately after resuming.  Delay 200ms before
+		 * debouncing.
+		 */
+		msleep(200);
 
-	if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
-		return rc;
+		/* is SControl restored correctly? */
+		if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+			return rc;
+	} while ((scontrol & 0xf0f) != 0x300 && --tries);
 
-	/* Some PHYs react badly if SStatus is pounded immediately
-	 * after resuming.  Delay 200ms before debouncing.
-	 */
-	msleep(200);
+	if ((scontrol & 0xf0f) != 0x300) {
+		ata_link_printk(link, KERN_ERR,
+				"failed to resume link (SControl %X)\n",
+				scontrol);
+		return 0;
+	}
+
+	if (tries < ATA_LINK_RESUME_TRIES)
+		ata_link_printk(link, KERN_WARNING,
+				"link resume succeeded after %d retries\n",
+				ATA_LINK_RESUME_TRIES - tries);
 
 	if ((rc = sata_link_debounce(link, params, deadline)))
 		return rc;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 6a9c4ddd3d95..73112250862c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -354,6 +354,9 @@ enum {
 	/* max tries if error condition is still set after ->error_handler */
 	ATA_EH_MAX_TRIES	= 5,
 
+	/* sometimes resuming a link requires several retries */
+	ATA_LINK_RESUME_TRIES	= 5,
+
 	/* how hard are we gonna try to probe/recover devices */
 	ATA_PROBE_MAX_TRIES	= 3,
 	ATA_EH_DEV_TRIES	= 3,
-- 
cgit v1.2.3


From 2c761270d5520dd84ab0b4e47c24d99ff8503c38 Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Tue, 12 Jan 2010 17:39:16 +1100
Subject: lib: Introduce generic list_sort function

There are two copies of list_sort() in the tree already, one in the DRM
code, another in ubifs.  Now XFS needs this as well.  Create a generic
list_sort() function from the ubifs version and convert existing users
to it so we don't end up with yet another copy in the tree.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Artem Bityutskiy <dedekind@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_modes.c |  90 ++------------------------------------
 fs/ubifs/gc.c               |  96 +----------------------------------------
 include/linux/list_sort.h   |  11 +++++
 lib/Makefile                |   2 +-
 lib/list_sort.c             | 102 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 119 insertions(+), 182 deletions(-)
 create mode 100644 include/linux/list_sort.h
 create mode 100644 lib/list_sort.c

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 6d81a02463a3..76d63394c776 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1,9 +1,4 @@
 /*
- * The list_sort function is (presumably) licensed under the GPL (see the
- * top level "COPYING" file for details).
- *
- * The remainder of this file is:
- *
  * Copyright © 1997-2003 by The XFree86 Project, Inc.
  * Copyright © 2007 Dave Airlie
  * Copyright © 2007-2008 Intel Corporation
@@ -36,6 +31,7 @@
  */
 
 #include <linux/list.h>
+#include <linux/list_sort.h>
 #include "drmP.h"
 #include "drm.h"
 #include "drm_crtc.h"
@@ -855,6 +851,7 @@ EXPORT_SYMBOL(drm_mode_prune_invalid);
 
 /**
  * drm_mode_compare - compare modes for favorability
+ * @priv: unused
  * @lh_a: list_head for first mode
  * @lh_b: list_head for second mode
  *
@@ -868,7 +865,7 @@ EXPORT_SYMBOL(drm_mode_prune_invalid);
  * Negative if @lh_a is better than @lh_b, zero if they're equivalent, or
  * positive if @lh_b is better than @lh_a.
  */
-static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b)
+static int drm_mode_compare(void *priv, struct list_head *lh_a, struct list_head *lh_b)
 {
 	struct drm_display_mode *a = list_entry(lh_a, struct drm_display_mode, head);
 	struct drm_display_mode *b = list_entry(lh_b, struct drm_display_mode, head);
@@ -885,85 +882,6 @@ static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b)
 	return diff;
 }
 
-/* FIXME: what we don't have a list sort function? */
-/* list sort from Mark J Roberts (mjr@znex.org) */
-void list_sort(struct list_head *head,
-	       int (*cmp)(struct list_head *a, struct list_head *b))
-{
-	struct list_head *p, *q, *e, *list, *tail, *oldhead;
-	int insize, nmerges, psize, qsize, i;
-
-	list = head->next;
-	list_del(head);
-	insize = 1;
-	for (;;) {
-		p = oldhead = list;
-		list = tail = NULL;
-		nmerges = 0;
-
-		while (p) {
-			nmerges++;
-			q = p;
-			psize = 0;
-			for (i = 0; i < insize; i++) {
-				psize++;
-				q = q->next == oldhead ? NULL : q->next;
-				if (!q)
-					break;
-			}
-
-			qsize = insize;
-			while (psize > 0 || (qsize > 0 && q)) {
-				if (!psize) {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				} else if (!qsize || !q) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else if (cmp(p, q) <= 0) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				}
-				if (tail)
-					tail->next = e;
-				else
-					list = e;
-				e->prev = tail;
-				tail = e;
-			}
-			p = q;
-		}
-
-		tail->next = list;
-		list->prev = tail;
-
-		if (nmerges <= 1)
-			break;
-
-		insize *= 2;
-	}
-
-	head->next = list;
-	head->prev = list->prev;
-	list->prev->next = head;
-	list->prev = head;
-}
-
 /**
  * drm_mode_sort - sort mode list
  * @mode_list: list to sort
@@ -975,7 +893,7 @@ void list_sort(struct list_head *head,
  */
 void drm_mode_sort(struct list_head *mode_list)
 {
-	list_sort(mode_list, drm_mode_compare);
+	list_sort(NULL, mode_list, drm_mode_compare);
 }
 EXPORT_SYMBOL(drm_mode_sort);
 
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a7..e5a3d8e96bb7 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/list_sort.h>
 #include "ubifs.h"
 
 /*
@@ -107,101 +108,6 @@ static int switch_gc_head(struct ubifs_info *c)
 	return err;
 }
 
-/**
- * list_sort - sort a list.
- * @priv: private data, passed to @cmp
- * @head: the list to sort
- * @cmp: the elements comparison function
- *
- * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
- * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
- * in ascending order.
- *
- * The comparison function @cmp is supposed to return a negative value if @a is
- * than @b, and a positive value if @a is greater than @b. If @a and @b are
- * equivalent, then it does not matter what this function returns.
- */
-static void list_sort(void *priv, struct list_head *head,
-		      int (*cmp)(void *priv, struct list_head *a,
-				 struct list_head *b))
-{
-	struct list_head *p, *q, *e, *list, *tail, *oldhead;
-	int insize, nmerges, psize, qsize, i;
-
-	if (list_empty(head))
-		return;
-
-	list = head->next;
-	list_del(head);
-	insize = 1;
-	for (;;) {
-		p = oldhead = list;
-		list = tail = NULL;
-		nmerges = 0;
-
-		while (p) {
-			nmerges++;
-			q = p;
-			psize = 0;
-			for (i = 0; i < insize; i++) {
-				psize++;
-				q = q->next == oldhead ? NULL : q->next;
-				if (!q)
-					break;
-			}
-
-			qsize = insize;
-			while (psize > 0 || (qsize > 0 && q)) {
-				if (!psize) {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				} else if (!qsize || !q) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else if (cmp(priv, p, q) <= 0) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				}
-				if (tail)
-					tail->next = e;
-				else
-					list = e;
-				e->prev = tail;
-				tail = e;
-			}
-			p = q;
-		}
-
-		tail->next = list;
-		list->prev = tail;
-
-		if (nmerges <= 1)
-			break;
-
-		insize *= 2;
-	}
-
-	head->next = list;
-	head->prev = list->prev;
-	list->prev->next = head;
-	list->prev = head;
-}
-
 /**
  * data_nodes_cmp - compare 2 data nodes.
  * @priv: UBIFS file-system description object
diff --git a/include/linux/list_sort.h b/include/linux/list_sort.h
new file mode 100644
index 000000000000..1a2df2efb771
--- /dev/null
+++ b/include/linux/list_sort.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_LIST_SORT_H
+#define _LINUX_LIST_SORT_H
+
+#include <linux/types.h>
+
+struct list_head;
+
+void list_sort(void *priv, struct list_head *head,
+	       int (*cmp)(void *priv, struct list_head *a,
+			  struct list_head *b));
+#endif
diff --git a/lib/Makefile b/lib/Makefile
index 911b25aed1e7..3b0b4a696db9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o
+	 string_helpers.o gcd.o list_sort.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/list_sort.c b/lib/list_sort.c
new file mode 100644
index 000000000000..19d11e0bb958
--- /dev/null
+++ b/lib/list_sort.c
@@ -0,0 +1,102 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+/**
+ * list_sort - sort a list.
+ * @priv: private data, passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
+ * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
+ * in ascending order.
+ *
+ * The comparison function @cmp is supposed to return a negative value if @a is
+ * less than @b, and a positive value if @a is greater than @b. If @a and @b
+ * are equivalent, then it does not matter what this function returns.
+ */
+void list_sort(void *priv, struct list_head *head,
+	       int (*cmp)(void *priv, struct list_head *a,
+			  struct list_head *b))
+{
+	struct list_head *p, *q, *e, *list, *tail, *oldhead;
+	int insize, nmerges, psize, qsize, i;
+
+	if (list_empty(head))
+		return;
+
+	list = head->next;
+	list_del(head);
+	insize = 1;
+	for (;;) {
+		p = oldhead = list;
+		list = tail = NULL;
+		nmerges = 0;
+
+		while (p) {
+			nmerges++;
+			q = p;
+			psize = 0;
+			for (i = 0; i < insize; i++) {
+				psize++;
+				q = q->next == oldhead ? NULL : q->next;
+				if (!q)
+					break;
+			}
+
+			qsize = insize;
+			while (psize > 0 || (qsize > 0 && q)) {
+				if (!psize) {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				} else if (!qsize || !q) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else if (cmp(priv, p, q) <= 0) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				}
+				if (tail)
+					tail->next = e;
+				else
+					list = e;
+				e->prev = tail;
+				tail = e;
+			}
+			p = q;
+		}
+
+		tail->next = list;
+		list->prev = tail;
+
+		if (nmerges <= 1)
+			break;
+
+		insize *= 2;
+	}
+
+	head->next = list;
+	head->prev = list->prev;
+	list->prev->next = head;
+	list->prev = head;
+}
+
+EXPORT_SYMBOL(list_sort);
-- 
cgit v1.2.3