From 47376ceba54600cec4dd9e7c4fe8b98e4269633a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Dec 2009 23:25:50 +0100 Subject: reiserfs: Fix reiserfs lock <-> inode mutex dependency inversion The reiserfs lock -> inode mutex dependency gets inverted when we relax the lock while walking to the tree. To fix this, use a specialized version of reiserfs_mutex_lock_safe that takes care of mutex subclasses. Then we can grab the inode mutex with I_MUTEX_XATTR subclass without any reiserfs lock dependency. This fixes the following report: [ INFO: possible circular locking dependency detected ] 2.6.32-06793-gf405425-dirty #2 ------------------------------------------------------- mv/18566 is trying to acquire lock: (&REISERFS_SB(s)->lock){+.+.+.}, at: [] reiserfs_write_lock+0x28= /0x40 but task is already holding lock: (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [] reiserfs_for_each_xattr+0x10c/0x380 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&sb->s_type->i_mutex_key#5/3){+.+.+.}: [] validate_chain+0xa23/0xf70 [] __lock_acquire+0x4e5/0xa70 [] lock_acquire+0x7a/0xa0 [] mutex_lock_nested+0x5f/0x2b0 [] reiserfs_for_each_xattr+0x84/0x380 [] reiserfs_delete_xattrs+0x15/0x50 [] reiserfs_delete_inode+0x8f/0x140 [] generic_delete_inode+0x9c/0x150 [] generic_drop_inode+0x3d/0x60 [] iput+0x47/0x50 [] do_unlinkat+0xdb/0x160 [] sys_unlink+0x10/0x20 [] sysenter_do_call+0x12/0x36 -> #0 (&REISERFS_SB(s)->lock){+.+.+.}: [] validate_chain+0xf68/0xf70 [] __lock_acquire+0x4e5/0xa70 [] lock_acquire+0x7a/0xa0 [] mutex_lock_nested+0x5f/0x2b0 [] reiserfs_write_lock+0x28/0x40 [] search_by_key+0x1f7b/0x21b0 [] search_by_entry_key+0x1f/0x3b0 [] reiserfs_find_entry+0x77/0x400 [] reiserfs_lookup+0x85/0x130 [] __lookup_hash+0xb4/0x110 [] lookup_one_len+0xb3/0x100 [] reiserfs_for_each_xattr+0x120/0x380 [] reiserfs_delete_xattrs+0x15/0x50 [] reiserfs_delete_inode+0x8f/0x140 [] generic_delete_inode+0x9c/0x150 [] generic_drop_inode+0x3d/0x60 [] iput+0x47/0x50 [] dentry_iput+0x6f/0xf0 [] d_kill+0x24/0x50 [] dput+0x5b/0x120 [] sys_renameat+0x1b9/0x230 [] sys_rename+0x28/0x30 [] sysenter_do_call+0x12/0x36 other info that might help us debug this: 2 locks held by mv/18566: #0: (&sb->s_type->i_mutex_key#5/1){+.+.+.}, at: [] lock_rename+0xcc/0xd0 #1: (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [] reiserfs_for_each_xattr+0x10c/0x380 stack backtrace: Pid: 18566, comm: mv Tainted: G C 2.6.32-06793-gf405425-dirty #2 Call Trace: [] ? printk+0x18/0x1e [] print_circular_bug+0xc0/0xd0 [] validate_chain+0xf68/0xf70 [] ? trace_hardirqs_off+0xb/0x10 [] __lock_acquire+0x4e5/0xa70 [] lock_acquire+0x7a/0xa0 [] ? reiserfs_write_lock+0x28/0x40 [] mutex_lock_nested+0x5f/0x2b0 [] ? reiserfs_write_lock+0x28/0x40 [] ? reiserfs_write_lock+0x28/0x40 [] ? schedule+0x27a/0x440 [] reiserfs_write_lock+0x28/0x40 [] search_by_key+0x1f7b/0x21b0 [] ? __lock_acquire+0x506/0xa70 [] ? lock_release_non_nested+0x1e7/0x340 [] ? reiserfs_write_lock+0x28/0x40 [] ? trace_hardirqs_on_caller+0x124/0x170 [] ? trace_hardirqs_on+0xb/0x10 [] ? T.316+0x15/0x1a0 [] ? sched_clock_cpu+0x9d/0x100 [] search_by_entry_key+0x1f/0x3b0 [] ? __mutex_unlock_slowpath+0x9a/0x120 [] ? trace_hardirqs_on_caller+0x124/0x170 [] reiserfs_find_entry+0x77/0x400 [] reiserfs_lookup+0x85/0x130 [] ? sched_clock_cpu+0x9d/0x100 [] __lookup_hash+0xb4/0x110 [] lookup_one_len+0xb3/0x100 [] reiserfs_for_each_xattr+0x120/0x380 [] ? delete_one_xattr+0x0/0x1c0 [] ? math_error+0x22/0x150 [] ? reiserfs_write_lock+0x28/0x40 [] reiserfs_delete_xattrs+0x15/0x50 [] ? reiserfs_write_lock+0x28/0x40 [] reiserfs_delete_inode+0x8f/0x140 [] ? generic_delete_inode+0x5f/0x150 [] ? reiserfs_delete_inode+0x0/0x140 [] generic_delete_inode+0x9c/0x150 [] generic_drop_inode+0x3d/0x60 [] iput+0x47/0x50 [] dentry_iput+0x6f/0xf0 [] d_kill+0x24/0x50 [] dput+0x5b/0x120 [] sys_renameat+0x1b9/0x230 [] ? sched_clock_cpu+0x9d/0x100 [] ? trace_hardirqs_off+0xb/0x10 [] ? cpu_clock+0x4e/0x60 [] ? do_page_fault+0x155/0x370 [] ? up_read+0x16/0x30 [] ? do_page_fault+0x155/0x370 [] sys_rename+0x28/0x30 [] sysenter_do_call+0x12/0x36 Reported-by: Alexander Beregalov Signed-off-by: Frederic Weisbecker Cc: Chris Mason Cc: Ingo Molnar Cc: Thomas Gleixner --- include/linux/reiserfs_fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index a05b4a20768d..4351b49e2b1e 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -97,6 +97,15 @@ static inline void reiserfs_mutex_lock_safe(struct mutex *m, reiserfs_write_lock(s); } +static inline void +reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, + struct super_block *s) +{ + reiserfs_write_unlock(s); + mutex_lock_nested(m, subclass); + reiserfs_write_lock(s); +} + /* * When we schedule, we usually want to also release the write lock, * according to the previous bkl based locking scheme of reiserfs. -- cgit v1.2.3 From 2d1c861871d767153538a77c498752b36d4bb4b8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 9 Dec 2009 17:52:13 +1100 Subject: PCI/cardbus: Add a fixup hook and fix powerpc The cardbus code creates PCI devices without ever going through the necessary fixup bits and pieces that normal PCI devices go through. There's in fact a commented out call to pcibios_fixup_bus() in there, it's commented because ... it doesn't work. I could make pcibios_fixup_bus() do the right thing on powerpc easily but I felt it cleaner instead to provide a specific hook pci_fixup_cardbus for which a weak empty implementation is provided by the PCI core. This fixes cardbus on powerbooks and probably all other PowerPC platforms which was broken completely for ever on some platforms and since 2.6.31 on others such as PowerBooks when we made the DMA ops mandatory (since those are setup by the fixups). Acked-by: Dominik Brodowski Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Jesse Barnes --- arch/powerpc/kernel/pci-common.c | 13 +++++++++++++ drivers/pci/pci.c | 5 +++++ drivers/pcmcia/cardbus.c | 2 +- include/linux/pci.h | 3 +++ 4 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e8dfdbd9327a..cadbed679fbb 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1107,6 +1107,12 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { struct dev_archdata *sd = &dev->dev.archdata; + /* Cardbus can call us to add new devices to a bus, so ignore + * those who are already fully discovered + */ + if (dev->is_added) + continue; + /* Setup OF node pointer in archdata */ sd->of_node = pci_device_to_OF_node(dev); @@ -1147,6 +1153,13 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus) } EXPORT_SYMBOL(pcibios_fixup_bus); +void __devinit pci_fixup_cardbus(struct pci_bus *bus) +{ + /* Now fixup devices on that bus */ + pcibios_setup_bus_devices(bus); +} + + static int skip_isa_ioresource_align(struct pci_dev *dev) { if ((ppc_pci_flags & PPC_PCI_CAN_SKIP_ISA_ALIGN) && diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d50522bf16b1..864e703cf737 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2798,6 +2798,11 @@ int __attribute__ ((weak)) pci_ext_cfg_avail(struct pci_dev *dev) return 1; } +void __weak pci_fixup_cardbus(struct pci_bus *bus) +{ +} +EXPORT_SYMBOL(pci_fixup_cardbus); + static int __init pci_setup(char *str) { while (str) { diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c index cdf50f3bc2df..d99f846451a3 100644 --- a/drivers/pcmcia/cardbus.c +++ b/drivers/pcmcia/cardbus.c @@ -222,7 +222,7 @@ int __ref cb_alloc(struct pcmcia_socket *s) unsigned int max, pass; s->functions = pci_scan_slot(bus, PCI_DEVFN(0, 0)); -/* pcibios_fixup_bus(bus); */ + pci_fixup_cardbus(bus); max = bus->secondary; for (pass = 0; pass < 2; pass++) diff --git a/include/linux/pci.h b/include/linux/pci.h index bf1e67080849..5da0690d9cee 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -566,6 +566,9 @@ void pcibios_align_resource(void *, struct resource *, resource_size_t, resource_size_t); void pcibios_update_irq(struct pci_dev *, int irq); +/* Weak but can be overriden by arch */ +void pci_fixup_cardbus(struct pci_bus *); + /* Generic PCI functions used internally */ extern struct pci_bus *pci_find_bus(int domain, int busnr); -- cgit v1.2.3 From 9a418af5df03ad133cd8c8f6742b75e542db6392 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 17 Dec 2009 13:55:48 +0100 Subject: mac80211: fix peer HT capabilities I noticed yesterday, because Jeff had noticed a speed regression, cf. bug http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2138 that the SM PS settings for peers were wrong. Instead of overwriting the SM PS settings with the local bits, we need to keep the remote bits. The bug was part of the original HT code from over two years ago, but unfortunately nobody noticed that it makes no sense -- we shouldn't be overwriting the peer's setting with our own but rather keep it intact when masking the peer capabilities with our own. While fixing that, I noticed that the masking of capabilities is completely useless for most of the bits, so also fix those other bits. Finally, I also noticed that PSMP_SUPPORT no longer exists in the final 802.11n version, so also remove that. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2800lib.c | 3 +-- include/linux/ieee80211.h | 2 +- net/mac80211/ht.c | 25 ++++++++++++++++++++++--- 3 files changed, 24 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index 6bf6c0f12f35..27bf887f1453 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -2080,8 +2080,7 @@ int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_TX_STBC | - IEEE80211_HT_CAP_RX_STBC | - IEEE80211_HT_CAP_PSMP_SUPPORT; + IEEE80211_HT_CAP_RX_STBC; spec->ht.ampdu_factor = 3; spec->ht.ampdu_density = 4; spec->ht.mcs.tx_params = diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index d9724a28c0c2..163c840437d6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -832,7 +832,7 @@ struct ieee80211_ht_cap { #define IEEE80211_HT_CAP_DELAY_BA 0x0400 #define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 #define IEEE80211_HT_CAP_DSSSCCK40 0x1000 -#define IEEE80211_HT_CAP_PSMP_SUPPORT 0x2000 +#define IEEE80211_HT_CAP_RESERVED 0x2000 #define IEEE80211_HT_CAP_40MHZ_INTOLERANT 0x4000 #define IEEE80211_HT_CAP_LSIG_TXOP_PROT 0x8000 diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 3787455fb696..d7dcee680728 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -34,9 +34,28 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, ht_cap->ht_supported = true; - ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & sband->ht_cap.cap; - ht_cap->cap &= ~IEEE80211_HT_CAP_SM_PS; - ht_cap->cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS; + /* + * The bits listed in this expression should be + * the same for the peer and us, if the station + * advertises more then we can't use those thus + * we mask them out. + */ + ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & + (sband->ht_cap.cap | + ~(IEEE80211_HT_CAP_LDPC_CODING | + IEEE80211_HT_CAP_SUP_WIDTH_20_40 | + IEEE80211_HT_CAP_GRN_FLD | + IEEE80211_HT_CAP_SGI_20 | + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_DSSSCCK40)); + /* + * The STBC bits are asymmetric -- if we don't have + * TX then mask out the peer's RX and vice versa. + */ + if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC)) + ht_cap->cap &= ~IEEE80211_HT_CAP_RX_STBC; + if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC)) + ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC; ampdu_info = ht_cap_ie->ampdu_params_info; ht_cap->ampdu_factor = -- cgit v1.2.3 From cc3e1bea5d87635c519da657303690f5538bb4eb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 23 Dec 2009 06:52:08 -0500 Subject: ext4, jbd2: Add barriers for file systems with exernal journals This is a bit complicated because we are trying to optimize when we send barriers to the fs data disk. We could just throw in an extra barrier to the data disk whenever we send a barrier to the journal disk, but that's not always strictly necessary. We only need to send a barrier during a commit when there are data blocks which are must be written out due to an inode written in ordered mode, or if fsync() depends on the commit to force data blocks to disk. Finally, before we drop transactions from the beginning of the journal during a checkpoint operation, we need to guarantee that any blocks that were flushed out to the data disk are firmly on the rust platter before we drop the transaction from the journal. Thanks to Oleg Drokin for pointing out this flaw in ext3/ext4. Signed-off-by: "Theodore Ts'o" --- fs/ext4/fsync.c | 16 ++++++++++++++-- fs/jbd2/checkpoint.c | 15 +++++++++++++++ fs/jbd2/commit.c | 19 +++++++++++-------- include/linux/jbd2.h | 1 + 4 files changed, 41 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 0b22497d92e1..98bd140aad01 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) return ext4_force_commit(inode->i_sb); commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; - if (jbd2_log_start_commit(journal, commit_tid)) + if (jbd2_log_start_commit(journal, commit_tid)) { + /* + * When the journal is on a different device than the + * fs data disk, we need to issue the barrier in + * writeback mode. (In ordered mode, the jbd2 layer + * will take care of issuing the barrier. In + * data=journal, all of the data blocks are written to + * the journal device.) + */ + if (ext4_should_writeback_data(inode) && + (journal->j_fs_dev != journal->j_dev) && + (journal->j_flags & JBD2_BARRIER)) + blkdev_issue_flush(inode->i_sb->s_bdev, NULL); jbd2_log_wait_commit(journal, commit_tid); - else if (journal->j_flags & JBD2_BARRIER) + } else if (journal->j_flags & JBD2_BARRIER) blkdev_issue_flush(inode->i_sb->s_bdev, NULL); return ret; } diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index ca0f5eb62b20..886849370950 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -22,6 +22,7 @@ #include #include #include +#include #include /* @@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal) journal->j_tail_sequence = first_tid; journal->j_tail = blocknr; spin_unlock(&journal->j_state_lock); + + /* + * If there is an external journal, we need to make sure that + * any data blocks that were recently written out --- perhaps + * by jbd2_log_do_checkpoint() --- are flushed out before we + * drop the transactions from the external journal. It's + * unlikely this will be necessary, especially with a + * appropriately sized journal, but we need this to guarantee + * correctness. Fortunately jbd2_cleanup_journal_tail() + * doesn't get called all that often. + */ + if ((journal->j_fs_dev != journal->j_dev) && + (journal->j_flags & JBD2_BARRIER)) + blkdev_issue_flush(journal->j_fs_dev, NULL); if (!(journal->j_flags & JBD2_ABORT)) jbd2_journal_update_superblock(journal, 1); return 0; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6a10238d2c63..1bc74b6f26d2 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal, ret = err; spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); + commit_transaction->t_flushed_data_blocks = 1; jinode->i_flags &= ~JI_COMMIT_RUNNING; wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); } @@ -708,8 +709,17 @@ start_journal_io: } } - /* Done it all: now write the commit record asynchronously. */ + /* + * If the journal is not located on the file system device, + * then we must flush the file system device before we issue + * the commit record + */ + if (commit_transaction->t_flushed_data_blocks && + (journal->j_fs_dev != journal->j_dev) && + (journal->j_flags & JBD2_BARRIER)) + blkdev_issue_flush(journal->j_fs_dev, NULL); + /* Done it all: now write the commit record asynchronously. */ if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { err = journal_submit_commit_record(journal, commit_transaction, @@ -720,13 +730,6 @@ start_journal_io: blkdev_issue_flush(journal->j_dev, NULL); } - /* - * This is the right place to wait for data buffers both for ASYNC - * and !ASYNC commit. If commit is ASYNC, we need to wait only after - * the commit block went to disk (which happens above). If commit is - * SYNC, we need to wait for data buffers before we start writing - * commit block, which happens below in such setting. - */ err = journal_finish_inode_data_buffers(journal, commit_transaction); if (err) { printk(KERN_WARNING diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f1011f7f3d41..638ce4554c76 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -653,6 +653,7 @@ struct transaction_s * waiting for it to finish. */ unsigned int t_synchronous_commit:1; + unsigned int t_flushed_data_blocks:1; /* * For use by the filesystem to store fs-specific data -- cgit v1.2.3 From 17bd55d037a02b04d9119511cfd1a4b985d20f63 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 23 Dec 2009 07:57:07 -0500 Subject: fs-writeback: Add helper function to start writeback if idle ext4, at least, would like to start pushing on writeback if it starts to get close to ENOSPC when reserving worst-case blocks for delalloc writes. Writing out delalloc data will convert those worst-case predictions into usually smaller actual usage, freeing up space before we hit ENOSPC based on this speculation. Thanks to Jens for the suggestion for the helper function, & the naming help. I've made the helper return status on whether writeback was started even though I don't plan to use it in the ext4 patch; it seems like it would be potentially useful to test this in some cases. Signed-off-by: Eric Sandeen Acked-by: Jan Kara --- fs/fs-writeback.c | 17 +++++++++++++++++ include/linux/writeback.h | 1 + 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 49bc1b8e8f19..f6c2155e0026 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1186,6 +1186,23 @@ void writeback_inodes_sb(struct super_block *sb) } EXPORT_SYMBOL(writeback_inodes_sb); +/** + * writeback_inodes_sb_if_idle - start writeback if none underway + * @sb: the superblock + * + * Invoke writeback_inodes_sb if no writeback is currently underway. + * Returns 1 if writeback was started, 0 if not. + */ +int writeback_inodes_sb_if_idle(struct super_block *sb) +{ + if (!writeback_in_progress(sb->s_bdi)) { + writeback_inodes_sb(sb); + return 1; + } else + return 0; +} +EXPORT_SYMBOL(writeback_inodes_sb_if_idle); + /** * sync_inodes_sb - sync sb inode pages * @sb: the superblock diff --git a/include/linux/writeback.h b/include/linux/writeback.h index c18c008f4bbf..76e8903cd204 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -70,6 +70,7 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); +int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); void writeback_inodes_wbc(struct writeback_control *wbc); long wb_do_writeback(struct bdi_writeback *wb, int force_wait); -- cgit v1.2.3 From 28f6aeea3f12d37bd258b2c0d5ba891bff4ec479 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Fri, 25 Dec 2009 17:30:22 -0800 Subject: net: restore ip source validation when using policy routing and the skb mark: there are cases where a back path validation requires us to use a different routing table for src ip validation than the one used for mapping ingress dst ip. One such a case is transparent proxying where we pretend to be the destination system and therefore the local table is used for incoming packets but possibly a main table would be used on outbound. Make the default behavior to allow the above and if users need to turn on the symmetry via sysctl src_valid_mark Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + include/linux/sysctl.h | 1 + net/ipv4/devinet.c | 1 + net/ipv4/fib_frontend.c | 2 ++ 4 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 699e85c01a4d..b2304929434e 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -81,6 +81,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) #define IN_DEV_MFORWARD(in_dev) IN_DEV_ANDCONF((in_dev), MC_FORWARDING) #define IN_DEV_RPFILTER(in_dev) IN_DEV_MAXCONF((in_dev), RP_FILTER) +#define IN_DEV_SRC_VMARK(in_dev) IN_DEV_ORCONF((in_dev), SRC_VMARK) #define IN_DEV_SOURCE_ROUTE(in_dev) IN_DEV_ANDCONF((in_dev), \ ACCEPT_SOURCE_ROUTE) #define IN_DEV_ACCEPT_LOCAL(in_dev) IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 877ba039e6a4..bd27fbc9db62 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -482,6 +482,7 @@ enum NET_IPV4_CONF_ARP_ACCEPT=21, NET_IPV4_CONF_ARP_NOTIFY=22, NET_IPV4_CONF_ACCEPT_LOCAL=23, + NET_IPV4_CONF_SRC_VMARK=24, __NET_IPV4_CONF_MAX }; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5cdbc102a418..040c4f05b653 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1397,6 +1397,7 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, "accept_source_route"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), + DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3323168ee52d..82dbf711d6d0 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -252,6 +252,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); + if (mark && !IN_DEV_SRC_VMARK(in_dev)) + fl.mark = 0; } rcu_read_unlock(); -- cgit v1.2.3 From 81744ee44ab2845c16ffd7d6f762f7b4a49a4750 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 29 Dec 2009 08:35:35 +0100 Subject: block: Fix incorrect alignment offset reporting and update documentation queue_sector_alignment_offset returned the wrong value which caused partitions to report an incorrect alignment_offset. Since offset alignment calculation is needed several places it has been split into a separate helper function. The topology stacking function has been updated accordingly. Furthermore, comments have been added to clarify how the stacking function works. Signed-off-by: Martin K. Petersen Tested-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-settings.c | 44 +++++++++++++++++++++++++++++++++----------- include/linux/blkdev.h | 11 +++++++++-- 2 files changed, 42 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/block/blk-settings.c b/block/blk-settings.c index e14fcbcedbfa..d52d4adc440b 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -505,20 +505,30 @@ static unsigned int lcm(unsigned int a, unsigned int b) /** * blk_stack_limits - adjust queue_limits for stacked devices - * @t: the stacking driver limits (top) - * @b: the underlying queue limits (bottom) + * @t: the stacking driver limits (top device) + * @b: the underlying queue limits (bottom, component device) * @offset: offset to beginning of data within component device * * Description: - * Merges two queue_limit structs. Returns 0 if alignment didn't - * change. Returns -1 if adding the bottom device caused - * misalignment. + * This function is used by stacking drivers like MD and DM to ensure + * that all component devices have compatible block sizes and + * alignments. The stacking driver must provide a queue_limits + * struct (top) and then iteratively call the stacking function for + * all component (bottom) devices. The stacking function will + * attempt to combine the values and ensure proper alignment. + * + * Returns 0 if the top and bottom queue_limits are compatible. The + * top device's block sizes and alignment offsets may be adjusted to + * ensure alignment with the bottom device. If no compatible sizes + * and alignments exist, -1 is returned and the resulting top + * queue_limits will have the misaligned flag set to indicate that + * the alignment_offset is undefined. */ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset) { sector_t alignment; - unsigned int top, bottom, granularity; + unsigned int top, bottom; t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); @@ -536,15 +546,18 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); - granularity = max(b->physical_block_size, b->io_min); - alignment = b->alignment_offset - (offset & (granularity - 1)); + alignment = queue_limit_alignment_offset(b, offset); + /* Bottom device has different alignment. Check that it is + * compatible with the current top alignment. + */ if (t->alignment_offset != alignment) { top = max(t->physical_block_size, t->io_min) + t->alignment_offset; - bottom = granularity + alignment; + bottom = max(b->physical_block_size, b->io_min) + alignment; + /* Verify that top and bottom intervals line up */ if (max(top, bottom) & (min(top, bottom) - 1)) t->misaligned = 1; } @@ -561,32 +574,39 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->no_cluster |= b->no_cluster; t->discard_zeroes_data &= b->discard_zeroes_data; + /* Physical block size a multiple of the logical block size? */ if (t->physical_block_size & (t->logical_block_size - 1)) { t->physical_block_size = t->logical_block_size; t->misaligned = 1; } + /* Minimum I/O a multiple of the physical block size? */ if (t->io_min & (t->physical_block_size - 1)) { t->io_min = t->physical_block_size; t->misaligned = 1; } + /* Optimal I/O a multiple of the physical block size? */ if (t->io_opt & (t->physical_block_size - 1)) { t->io_opt = 0; t->misaligned = 1; } + /* Find lowest common alignment_offset */ t->alignment_offset = lcm(t->alignment_offset, alignment) & (max(t->physical_block_size, t->io_min) - 1); + /* Verify that new alignment_offset is on a logical block boundary */ if (t->alignment_offset & (t->logical_block_size - 1)) t->misaligned = 1; /* Discard alignment and granularity */ if (b->discard_granularity) { + unsigned int granularity = b->discard_granularity; + offset &= granularity - 1; - alignment = b->discard_alignment - - (offset & (b->discard_granularity - 1)); + alignment = (granularity + b->discard_alignment - offset) + & (granularity - 1); if (t->discard_granularity != 0 && t->discard_alignment != alignment) { @@ -598,6 +618,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->discard_misaligned = 1; } + t->max_discard_sectors = min_not_zero(t->max_discard_sectors, + b->max_discard_sectors); t->discard_granularity = max(t->discard_granularity, b->discard_granularity); t->discard_alignment = lcm(t->discard_alignment, alignment) & diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 784a919aa0d0..59b832be3044 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1116,11 +1116,18 @@ static inline int queue_alignment_offset(struct request_queue *q) return q->limits.alignment_offset; } +static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset) +{ + unsigned int granularity = max(lim->physical_block_size, lim->io_min); + + offset &= granularity - 1; + return (granularity + lim->alignment_offset - offset) & (granularity - 1); +} + static inline int queue_sector_alignment_offset(struct request_queue *q, sector_t sector) { - return ((sector << 9) - q->limits.alignment_offset) - & (q->limits.io_min - 1); + return queue_limit_alignment_offset(&q->limits, sector << 9); } static inline int bdev_alignment_offset(struct block_device *bdev) -- cgit v1.2.3 From db5d247ae811f49185a71e703b65acad845e4b18 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Thu, 24 Dec 2009 12:05:58 +0100 Subject: firewire: fix use of multiple AV/C devices, allow multiple FCP listeners Control of more than one AV/C device at once --- e.g. camcorders, tape decks, audio devices, TV tuners --- failed or worked only unreliably, depending on driver implementation. This affected kernelspace and userspace drivers alike and was caused by firewire-core's inability to accept multiple registrations of FCP listeners. The fix allows multiple address handlers to be registered for the FCP command and response registers. When a request for these registers is received, all handlers are invoked, and the Firewire response is generated by the core and not by any handler. The cdev API does not change, i.e., userspace is still expected to send a response for FCP requests; this response is silently ignored. Signed-off-by: Clemens Ladisch Signed-off-by: Stefan Richter (changelog, rebased, whitespace) --- drivers/firewire/core-cdev.c | 26 ++++--- drivers/firewire/core-transaction.c | 118 ++++++++++++++++++++++++++------ drivers/media/dvb/firewire/firedtv-fw.c | 12 +--- include/linux/firewire-cdev.h | 3 + include/linux/firewire.h | 4 +- 5 files changed, 119 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 231e6ee5ba43..2cb22d160f6e 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -601,8 +601,9 @@ static void release_request(struct client *client, struct inbound_transaction_resource *r = container_of(resource, struct inbound_transaction_resource, resource); - fw_send_response(client->device->card, r->request, - RCODE_CONFLICT_ERROR); + if (r->request) + fw_send_response(client->device->card, r->request, + RCODE_CONFLICT_ERROR); kfree(r); } @@ -645,7 +646,8 @@ static void handle_request(struct fw_card *card, struct fw_request *request, failed: kfree(r); kfree(e); - fw_send_response(card, request, RCODE_CONFLICT_ERROR); + if (request) + fw_send_response(card, request, RCODE_CONFLICT_ERROR); } static void release_address_handler(struct client *client, @@ -715,15 +717,17 @@ static int ioctl_send_response(struct client *client, void *buffer) r = container_of(resource, struct inbound_transaction_resource, resource); - if (request->length < r->length) - r->length = request->length; - - if (copy_from_user(r->data, u64_to_uptr(request->data), r->length)) { - ret = -EFAULT; - goto out; + if (r->request) { + if (request->length < r->length) + r->length = request->length; + if (copy_from_user(r->data, u64_to_uptr(request->data), + r->length)) { + ret = -EFAULT; + goto out; + } + fw_send_response(client->device->card, r->request, + request->rcode); } - - fw_send_response(client->device->card, r->request, request->rcode); out: kfree(r); diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 842739df23e2..495849eb13cc 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -432,14 +432,20 @@ static struct fw_address_handler *lookup_overlapping_address_handler( return NULL; } +static bool is_enclosing_handler(struct fw_address_handler *handler, + unsigned long long offset, size_t length) +{ + return handler->offset <= offset && + offset + length <= handler->offset + handler->length; +} + static struct fw_address_handler *lookup_enclosing_address_handler( struct list_head *list, unsigned long long offset, size_t length) { struct fw_address_handler *handler; list_for_each_entry(handler, list, link) { - if (handler->offset <= offset && - offset + length <= handler->offset + handler->length) + if (is_enclosing_handler(handler, offset, length)) return handler; } @@ -465,6 +471,12 @@ const struct fw_address_region fw_unit_space_region = { .start = 0xfffff0000900ULL, .end = 0x1000000000000ULL, }; #endif /* 0 */ +static bool is_in_fcp_region(u64 offset, size_t length) +{ + return offset >= (CSR_REGISTER_BASE | CSR_FCP_COMMAND) && + offset + length <= (CSR_REGISTER_BASE | CSR_FCP_END); +} + /** * fw_core_add_address_handler - register for incoming requests * @handler: callback @@ -477,8 +489,11 @@ const struct fw_address_region fw_unit_space_region = * give the details of the particular request. * * Return value: 0 on success, non-zero otherwise. + * * The start offset of the handler's address region is determined by * fw_core_add_address_handler() and is returned in handler->offset. + * + * Address allocations are exclusive, except for the FCP registers. */ int fw_core_add_address_handler(struct fw_address_handler *handler, const struct fw_address_region *region) @@ -498,10 +513,12 @@ int fw_core_add_address_handler(struct fw_address_handler *handler, handler->offset = region->start; while (handler->offset + handler->length <= region->end) { - other = - lookup_overlapping_address_handler(&address_handler_list, - handler->offset, - handler->length); + if (is_in_fcp_region(handler->offset, handler->length)) + other = NULL; + else + other = lookup_overlapping_address_handler + (&address_handler_list, + handler->offset, handler->length); if (other != NULL) { handler->offset += other->length; } else { @@ -668,6 +685,9 @@ static struct fw_request *allocate_request(struct fw_packet *p) void fw_send_response(struct fw_card *card, struct fw_request *request, int rcode) { + if (WARN_ONCE(!request, "invalid for FCP address handlers")) + return; + /* unified transaction or broadcast transaction: don't respond */ if (request->ack != ACK_PENDING || HEADER_DESTINATION_IS_BROADCAST(request->request_header[0])) { @@ -686,26 +706,15 @@ void fw_send_response(struct fw_card *card, } EXPORT_SYMBOL(fw_send_response); -void fw_core_handle_request(struct fw_card *card, struct fw_packet *p) +static void handle_exclusive_region_request(struct fw_card *card, + struct fw_packet *p, + struct fw_request *request, + unsigned long long offset) { struct fw_address_handler *handler; - struct fw_request *request; - unsigned long long offset; unsigned long flags; int tcode, destination, source; - if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE) - return; - - request = allocate_request(p); - if (request == NULL) { - /* FIXME: send statically allocated busy packet. */ - return; - } - - offset = - ((unsigned long long) - HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) | p->header[2]; tcode = HEADER_GET_TCODE(p->header[0]); destination = HEADER_GET_DESTINATION(p->header[0]); source = HEADER_GET_SOURCE(p->header[1]); @@ -732,6 +741,73 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p) request->data, request->length, handler->callback_data); } + +static void handle_fcp_region_request(struct fw_card *card, + struct fw_packet *p, + struct fw_request *request, + unsigned long long offset) +{ + struct fw_address_handler *handler; + unsigned long flags; + int tcode, destination, source; + + if ((offset != (CSR_REGISTER_BASE | CSR_FCP_COMMAND) && + offset != (CSR_REGISTER_BASE | CSR_FCP_RESPONSE)) || + request->length > 0x200) { + fw_send_response(card, request, RCODE_ADDRESS_ERROR); + + return; + } + + tcode = HEADER_GET_TCODE(p->header[0]); + destination = HEADER_GET_DESTINATION(p->header[0]); + source = HEADER_GET_SOURCE(p->header[1]); + + if (tcode != TCODE_WRITE_QUADLET_REQUEST && + tcode != TCODE_WRITE_BLOCK_REQUEST) { + fw_send_response(card, request, RCODE_TYPE_ERROR); + + return; + } + + spin_lock_irqsave(&address_handler_lock, flags); + list_for_each_entry(handler, &address_handler_list, link) { + if (is_enclosing_handler(handler, offset, request->length)) + handler->address_callback(card, NULL, tcode, + destination, source, + p->generation, p->speed, + offset, request->data, + request->length, + handler->callback_data); + } + spin_unlock_irqrestore(&address_handler_lock, flags); + + fw_send_response(card, request, RCODE_COMPLETE); +} + +void fw_core_handle_request(struct fw_card *card, struct fw_packet *p) +{ + struct fw_request *request; + unsigned long long offset; + + if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE) + return; + + request = allocate_request(p); + if (request == NULL) { + /* FIXME: send statically allocated busy packet. */ + return; + } + + offset = ((u64)HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) | + p->header[2]; + + if (!is_in_fcp_region(offset, request->length)) + handle_exclusive_region_request(card, p, request, offset); + else + handle_fcp_region_request(card, p, request, offset); + +} EXPORT_SYMBOL(fw_core_handle_request); void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c index fe44789ab037..6223bf01efe9 100644 --- a/drivers/media/dvb/firewire/firedtv-fw.c +++ b/drivers/media/dvb/firewire/firedtv-fw.c @@ -202,14 +202,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request, unsigned long flags; int su; - if ((tcode != TCODE_WRITE_QUADLET_REQUEST && - tcode != TCODE_WRITE_BLOCK_REQUEST) || - offset != CSR_REGISTER_BASE + CSR_FCP_RESPONSE || - length == 0 || - (((u8 *)payload)[0] & 0xf0) != 0) { - fw_send_response(card, request, RCODE_TYPE_ERROR); + if (length < 2 || (((u8 *)payload)[0] & 0xf0) != 0) return; - } su = ((u8 *)payload)[1] & 0x7; @@ -230,10 +224,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request, } spin_unlock_irqrestore(&node_list_lock, flags); - if (fdtv) { + if (fdtv) avc_recv(fdtv, payload, length); - fw_send_response(card, request, RCODE_COMPLETE); - } } static struct fw_address_handler fcp_handler = { diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index c6b3ca3af6df..1f716d9f714b 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -340,6 +340,9 @@ struct fw_cdev_send_response { * The @closure field is passed back to userspace in the response event. * The @handle field is an out parameter, returning a handle to the allocated * range to be used for later deallocation of the range. + * + * The address range is allocated on all local nodes. The address allocation + * is exclusive except for the FCP command and response registers. */ struct fw_cdev_allocate { __u64 offset; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 9416a461b696..a0e67150a729 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -248,8 +248,8 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode, void *data, size_t length, void *callback_data); /* - * Important note: The callback must guarantee that either fw_send_response() - * or kfree() is called on the @request. + * Important note: Except for the FCP registers, the callback must guarantee + * that either fw_send_response() or kfree() is called on the @request. */ typedef void (*fw_address_callback_t)(struct fw_card *card, struct fw_request *request, -- cgit v1.2.3 From 9bd3f98821a83041e77ee25158b80b535d02d7b4 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Wed, 30 Dec 2009 08:41:07 +0100 Subject: block: blk_rq_err_sectors cleanup blk_rq_err_sectors() seems useless, get rid of it. Signed-off-by: Gui Jianfeng Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 59b832be3044..9b98173a8184 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -845,7 +845,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) * blk_rq_err_bytes() : bytes left till the next error boundary * blk_rq_sectors() : sectors left in the entire request * blk_rq_cur_sectors() : sectors left in the current segment - * blk_rq_err_sectors() : sectors left till the next error boundary */ static inline sector_t blk_rq_pos(const struct request *rq) { @@ -874,11 +873,6 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } -static inline unsigned int blk_rq_err_sectors(const struct request *rq) -{ - return blk_rq_err_bytes(rq) >> 9; -} - /* * Request issue related functions. */ -- cgit v1.2.3 From e96dc9674cb597de4fee757ed005c8465072d13f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:26 +0800 Subject: tracing/syscalls: Fix typo in SYSCALL_DEFINE0 The struct syscall_metadata variable name in SYSCALL_DEFINE0 should be __syscall_meta__##sname instead of __syscall_meta_##sname to match the name that is in SYSCALL_DEFINE1/2/3/4/5/6. This error causes event_enter_##sname->data to point to the wrong location, which causes syscalls which are defined by SYSCALL_DEFINE0() not to be traced. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D2E.1010807@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 65793e90d6f6..207466a49f3d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -195,7 +195,7 @@ struct perf_event_attr; static const struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ __attribute__((section("__syscalls_metadata"))) \ - __syscall_meta_##sname = { \ + __syscall_meta__##sname = { \ .name = "sys_"#sname, \ .nb_args = 0, \ .enter_event = &event_enter__##sname, \ -- cgit v1.2.3 From ed656d8deccc5669afa33387568e7ec6f14e3e94 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Sat, 26 Dec 2009 17:58:11 +0100 Subject: kfifo: Fix typo in comment It's DECLARE_KFIFO, not DECLARED_KFIFO. Signed-off-by: Rolf Eike Beer Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 3d44e9c65a8e..7c6b32a1421c 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -81,7 +81,7 @@ union { \ } /** - * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO + * INIT_KFIFO - Initialize a kfifo declared by DECLARE_KFIFO * @name: name of the declared kfifo datatype */ #define INIT_KFIFO(name) \ -- cgit v1.2.3 From d7f0eea9e431e1b8b0742a74db1a9490730b2a25 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 30 Dec 2009 15:36:42 +0800 Subject: ACPI: introduce kernel parameter acpi_sleep=sci_force_enable Introduce kernel parameter acpi_sleep=sci_force_enable some laptop requires SCI_EN being set directly on resume, or else they hung somewhere in the resume code path. We already have a blacklist for these laptops but we still need this option, especially when debugging some suspend/resume problems, in case there are systems that need this workaround and are not yet in the blacklist. Signed-off-by: Zhang Rui Acked-by: Rafael J. Wysocki Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 5 ++++- arch/x86/kernel/acpi/sleep.c | 2 ++ drivers/acpi/sleep.c | 29 +++++++++++++++++------------ include/linux/acpi.h | 1 + 4 files changed, 24 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5ba4d9dff113..736d45602886 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -240,7 +240,7 @@ and is between 256 and 4096 characters. It is defined in the file acpi_sleep= [HW,ACPI] Sleep options Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, - old_ordering, s4_nonvs } + old_ordering, s4_nonvs, sci_force_enable } See Documentation/power/video.txt for information on s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep @@ -253,6 +253,9 @@ and is between 256 and 4096 characters. It is defined in the file of _PTS is used by default). s4_nonvs prevents the kernel from saving/restoring the ACPI NVS memory during hibernation. + sci_force_enable causes the kernel to set SCI_EN directly + on resume from S1/S3 (which is against the ACPI spec, + but some broken systems don't work without it). acpi_use_timer_override [HW,ACPI] Use timer override. For some broken Nvidia NF5 boards diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 82e508677b91..f9961034e557 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str) #endif if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); + if (strncmp(str, "sci_force_enable", 16) == 0) + acpi_set_sci_en_on_resume(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 5f2c379ab7bf..79d33d908b5a 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -80,6 +80,23 @@ static int acpi_sleep_prepare(u32 acpi_state) #ifdef CONFIG_ACPI_SLEEP static u32 acpi_target_sleep_state = ACPI_STATE_S0; +/* + * According to the ACPI specification the BIOS should make sure that ACPI is + * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, + * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI + * on such systems during resume. Unfortunately that doesn't help in + * particularly pathological cases in which SCI_EN has to be set directly on + * resume, although the specification states very clearly that this flag is + * owned by the hardware. The set_sci_en_on_resume variable will be set in such + * cases. + */ +static bool set_sci_en_on_resume; + +void __init acpi_set_sci_en_on_resume(void) +{ + set_sci_en_on_resume = true; +} + /* * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the * user to request that behavior by using the 'acpi_old_suspend_ordering' @@ -170,18 +187,6 @@ static void acpi_pm_end(void) #endif /* CONFIG_ACPI_SLEEP */ #ifdef CONFIG_SUSPEND -/* - * According to the ACPI specification the BIOS should make sure that ACPI is - * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, - * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI - * on such systems during resume. Unfortunately that doesn't help in - * particularly pathological cases in which SCI_EN has to be set directly on - * resume, although the specification states very clearly that this flag is - * owned by the hardware. The set_sci_en_on_resume variable will be set in such - * cases. - */ -static bool set_sci_en_on_resume; - extern void do_suspend_lowlevel(void); static u32 acpi_suspend_states[] = { diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ce945d4845fc..36924255c0d5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -251,6 +251,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); void __init acpi_s4_no_nvs(void); +void __init acpi_set_sci_en_on_resume(void); #endif /* CONFIG_PM_SLEEP */ struct acpi_osc_context { -- cgit v1.2.3 From 2f5cb43406d0b29b96248f5328a14a6f6abf8ae6 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 30 Dec 2009 08:23:30 +0000 Subject: phylib: Properly reinitialize PHYs after hibernation Since hibernation assumes power loss, we should fully reinitialize PHYs (including platform fixups), as if PHYs were just attached. This patch factors phy_init_hw() out of phy_attach_direct(), then converts mdio_bus to dev_pm_ops and adds an appropriate restore() callback. Signed-off-by: Anton Vorontsov Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 50 +++++++++++++++++++++++++++++++++++++------- drivers/net/phy/phy_device.c | 30 +++++++++++++------------- include/linux/phy.h | 1 + 3 files changed, 59 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 49252d390903..e17b70291bbc 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -264,6 +264,8 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv) (phydev->phy_id & phydrv->phy_id_mask)); } +#ifdef CONFIG_PM + static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { struct device_driver *drv = phydev->dev.driver; @@ -295,10 +297,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) return true; } -/* Suspend and resume. Copied from platform_suspend and - * platform_resume - */ -static int mdio_bus_suspend(struct device * dev, pm_message_t state) +static int mdio_bus_suspend(struct device *dev) { struct phy_driver *phydrv = to_phy_driver(dev->driver); struct phy_device *phydev = to_phy_device(dev); @@ -318,7 +317,7 @@ static int mdio_bus_suspend(struct device * dev, pm_message_t state) return phydrv->suspend(phydev); } -static int mdio_bus_resume(struct device * dev) +static int mdio_bus_resume(struct device *dev) { struct phy_driver *phydrv = to_phy_driver(dev->driver); struct phy_device *phydev = to_phy_device(dev); @@ -338,11 +337,48 @@ no_resume: return 0; } +static int mdio_bus_restore(struct device *dev) +{ + struct phy_device *phydev = to_phy_device(dev); + struct net_device *netdev = phydev->attached_dev; + int ret; + + if (!netdev) + return 0; + + ret = phy_init_hw(phydev); + if (ret < 0) + return ret; + + /* The PHY needs to renegotiate. */ + phydev->link = 0; + phydev->state = PHY_UP; + + phy_start_machine(phydev, NULL); + + return 0; +} + +static struct dev_pm_ops mdio_bus_pm_ops = { + .suspend = mdio_bus_suspend, + .resume = mdio_bus_resume, + .freeze = mdio_bus_suspend, + .thaw = mdio_bus_resume, + .restore = mdio_bus_restore, +}; + +#define MDIO_BUS_PM_OPS (&mdio_bus_pm_ops) + +#else + +#define MDIO_BUS_PM_OPS NULL + +#endif /* CONFIG_PM */ + struct bus_type mdio_bus_type = { .name = "mdio_bus", .match = mdio_bus_match, - .suspend = mdio_bus_suspend, - .resume = mdio_bus_resume, + .pm = MDIO_BUS_PM_OPS, }; EXPORT_SYMBOL(mdio_bus_type); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index b10fedd82143..8212b2b93422 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -378,6 +378,20 @@ void phy_disconnect(struct phy_device *phydev) } EXPORT_SYMBOL(phy_disconnect); +int phy_init_hw(struct phy_device *phydev) +{ + int ret; + + if (!phydev->drv || !phydev->drv->config_init) + return 0; + + ret = phy_scan_fixups(phydev); + if (ret < 0) + return ret; + + return phydev->drv->config_init(phydev); +} + /** * phy_attach_direct - attach a network device to a given PHY device pointer * @dev: network device to attach @@ -425,21 +439,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, /* Do initial configuration here, now that * we have certain key parameters * (dev_flags and interface) */ - if (phydev->drv->config_init) { - int err; - - err = phy_scan_fixups(phydev); - - if (err < 0) - return err; - - err = phydev->drv->config_init(phydev); - - if (err < 0) - return err; - } - - return 0; + return phy_init_hw(phydev); } EXPORT_SYMBOL(phy_attach_direct); diff --git a/include/linux/phy.h b/include/linux/phy.h index b1368b8f6572..7968defd2fa7 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -447,6 +447,7 @@ struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_device_register(struct phy_device *phy); int phy_clear_interrupt(struct phy_device *phydev); int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); +int phy_init_hw(struct phy_device *phydev); int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface); struct phy_device * phy_attach(struct net_device *dev, -- cgit v1.2.3 From 0719d3434747889b314a1e8add776418c4148bcf Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 30 Dec 2009 00:39:22 +0100 Subject: reiserfs: Fix reiserfs lock <-> i_xattr_sem dependency inversion i_xattr_sem depends on the reiserfs lock. But after we grab i_xattr_sem, we may relax/relock the reiserfs lock while waiting on a freezed filesystem, creating a dependency inversion between the two locks. In order to avoid the i_xattr_sem -> reiserfs lock dependency, let's create a reiserfs_down_read_safe() that acts like reiserfs_mutex_lock_safe(): relax the reiserfs lock while grabbing another lock to avoid undesired dependencies induced by the heivyweight reiserfs lock. This fixes the following warning: [ 990.005931] ======================================================= [ 990.012373] [ INFO: possible circular locking dependency detected ] [ 990.013233] 2.6.33-rc1 #1 [ 990.013233] ------------------------------------------------------- [ 990.013233] dbench/1891 is trying to acquire lock: [ 990.013233] (&REISERFS_SB(s)->lock){+.+.+.}, at: [] reiserfs_write_lock+0x35/0x50 [ 990.013233] [ 990.013233] but task is already holding lock: [ 990.013233] (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [] reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [ 990.013233] which lock already depends on the new lock. [ 990.013233] [ 990.013233] [ 990.013233] the existing dependency chain (in reverse order) is: [ 990.013233] [ 990.013233] -> #1 (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}: [ 990.013233] [] __lock_acquire+0xf9c/0x1560 [ 990.013233] [] lock_acquire+0x8f/0xb0 [ 990.013233] [] down_write+0x44/0x80 [ 990.013233] [] reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [] reiserfs_xattr_set+0xb0/0x150 [ 990.013233] [] user_set+0x8a/0x90 [ 990.013233] [] reiserfs_setxattr+0xaa/0xb0 [ 990.013233] [] __vfs_setxattr_noperm+0x36/0xa0 [ 990.013233] [] vfs_setxattr+0xbc/0xc0 [ 990.013233] [] setxattr+0xc0/0x150 [ 990.013233] [] sys_fsetxattr+0x8d/0xa0 [ 990.013233] [] system_call_fastpath+0x16/0x1b [ 990.013233] [ 990.013233] -> #0 (&REISERFS_SB(s)->lock){+.+.+.}: [ 990.013233] [] __lock_acquire+0x12d0/0x1560 [ 990.013233] [] lock_acquire+0x8f/0xb0 [ 990.013233] [] __mutex_lock_common+0x47/0x3b0 [ 990.013233] [] mutex_lock_nested+0x3e/0x50 [ 990.013233] [] reiserfs_write_lock+0x35/0x50 [ 990.013233] [] reiserfs_prepare_write+0x45/0x180 [ 990.013233] [] reiserfs_xattr_set_handle+0x2a6/0x470 [ 990.013233] [] reiserfs_xattr_set+0xb0/0x150 [ 990.013233] [] user_set+0x8a/0x90 [ 990.013233] [] reiserfs_setxattr+0xaa/0xb0 [ 990.013233] [] __vfs_setxattr_noperm+0x36/0xa0 [ 990.013233] [] vfs_setxattr+0xbc/0xc0 [ 990.013233] [] setxattr+0xc0/0x150 [ 990.013233] [] sys_fsetxattr+0x8d/0xa0 [ 990.013233] [] system_call_fastpath+0x16/0x1b [ 990.013233] [ 990.013233] other info that might help us debug this: [ 990.013233] [ 990.013233] 2 locks held by dbench/1891: [ 990.013233] #0: (&sb->s_type->i_mutex_key#12){+.+.+.}, at: [] vfs_setxattr+0x78/0xc0 [ 990.013233] #1: (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [] reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [ 990.013233] stack backtrace: [ 990.013233] Pid: 1891, comm: dbench Not tainted 2.6.33-rc1 #1 [ 990.013233] Call Trace: [ 990.013233] [] print_circular_bug+0xe9/0xf0 [ 990.013233] [] __lock_acquire+0x12d0/0x1560 [ 990.013233] [] ? reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [] lock_acquire+0x8f/0xb0 [ 990.013233] [] ? reiserfs_write_lock+0x35/0x50 [ 990.013233] [] ? reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [] __mutex_lock_common+0x47/0x3b0 [ 990.013233] [] ? reiserfs_write_lock+0x35/0x50 [ 990.013233] [] ? reiserfs_write_lock+0x35/0x50 [ 990.013233] [] ? mark_held_locks+0x72/0xa0 [ 990.013233] [] ? __mutex_unlock_slowpath+0xbd/0x140 [ 990.013233] [] ? trace_hardirqs_on_caller+0x14d/0x1a0 [ 990.013233] [] mutex_lock_nested+0x3e/0x50 [ 990.013233] [] reiserfs_write_lock+0x35/0x50 [ 990.013233] [] reiserfs_prepare_write+0x45/0x180 [ 990.013233] [] reiserfs_xattr_set_handle+0x2a6/0x470 [ 990.013233] [] reiserfs_xattr_set+0xb0/0x150 [ 990.013233] [] ? __mutex_lock_common+0x284/0x3b0 [ 990.013233] [] user_set+0x8a/0x90 [ 990.013233] [] reiserfs_setxattr+0xaa/0xb0 [ 990.013233] [] __vfs_setxattr_noperm+0x36/0xa0 [ 990.013233] [] vfs_setxattr+0xbc/0xc0 [ 990.013233] [] setxattr+0xc0/0x150 [ 990.013233] [] ? sched_clock_cpu+0xb8/0x100 [ 990.013233] [] ? trace_hardirqs_off+0xd/0x10 [ 990.013233] [] ? cpu_clock+0x43/0x50 [ 990.013233] [] ? fget+0xb0/0x110 [ 990.013233] [] ? fget+0x0/0x110 [ 990.013233] [] ? sysret_check+0x27/0x62 [ 990.013233] [] sys_fsetxattr+0x8d/0xa0 [ 990.013233] [] system_call_fastpath+0x16/0x1b Reported-and-tested-by: Christian Kujau Signed-off-by: Frederic Weisbecker Cc: Alexander Beregalov Cc: Chris Mason Cc: Ingo Molnar --- fs/reiserfs/xattr.c | 2 +- include/linux/reiserfs_fs.h | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 8891cd88a3f4..a0e2e7acdc75 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -484,7 +484,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, if (IS_ERR(dentry)) return PTR_ERR(dentry); - down_write(&REISERFS_I(inode)->i_xattr_sem); + reiserfs_down_read_safe(&REISERFS_I(inode)->i_xattr_sem, inode->i_sb); xahash = xattr_hash(buffer, buffer_size); while (buffer_pos < buffer_size || buffer_pos == 0) { diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 4351b49e2b1e..35d3f459b0ac 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -106,6 +106,14 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, reiserfs_write_lock(s); } +static inline void +reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) +{ + reiserfs_write_unlock(s); + down_read(sem); + reiserfs_write_lock(s); +} + /* * When we schedule, we usually want to also release the write lock, * according to the previous bkl based locking scheme of reiserfs. -- cgit v1.2.3 From c4a62ca362258d98f42efb282cfbf9b61caffdbe Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 30 Dec 2009 03:20:19 +0100 Subject: reiserfs: Warn on lock relax if taken recursively When we relax the reiserfs lock to avoid creating unwanted dependencies against others locks while grabbing these, we want to ensure it has not been taken recursively, otherwise the lock won't be really relaxed. Only its depth will be decreased. The unwanted dependency would then actually happen. To prevent from that, add a reiserfs_lock_check_recursive() call in the places that need it. Signed-off-by: Frederic Weisbecker Cc: Alexander Beregalov Cc: Chris Mason Cc: Ingo Molnar --- fs/reiserfs/lock.c | 9 +++++++++ include/linux/reiserfs_fs.h | 9 +++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c index ee2cfc0fd8a7..b87aa2c1afc1 100644 --- a/fs/reiserfs/lock.c +++ b/fs/reiserfs/lock.c @@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller) reiserfs_panic(sb, "%s called without kernel lock held %d", caller); } + +#ifdef CONFIG_REISERFS_CHECK +void reiserfs_lock_check_recursive(struct super_block *sb) +{ + struct reiserfs_sb_info *sb_i = REISERFS_SB(sb); + + WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n"); +} +#endif diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 35d3f459b0ac..793bf8351ab8 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -62,6 +62,12 @@ void reiserfs_write_unlock(struct super_block *s); int reiserfs_write_lock_once(struct super_block *s); void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); +#ifdef CONFIG_REISERFS_CHECK +void reiserfs_lock_check_recursive(struct super_block *s); +#else +static inline void reiserfs_lock_check_recursive(struct super_block *s) { } +#endif + /* * Several mutexes depend on the write lock. * However sometimes we want to relax the write lock while we hold @@ -92,6 +98,7 @@ void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); static inline void reiserfs_mutex_lock_safe(struct mutex *m, struct super_block *s) { + reiserfs_lock_check_recursive(s); reiserfs_write_unlock(s); mutex_lock(m); reiserfs_write_lock(s); @@ -101,6 +108,7 @@ static inline void reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, struct super_block *s) { + reiserfs_lock_check_recursive(s); reiserfs_write_unlock(s); mutex_lock_nested(m, subclass); reiserfs_write_lock(s); @@ -109,6 +117,7 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, static inline void reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) { + reiserfs_lock_check_recursive(s); reiserfs_write_unlock(s); down_read(sem); reiserfs_write_lock(s); -- cgit v1.2.3 From 96d07d211739fd2450ac54e81d00fa40fcd4b1bd Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 20 Nov 2009 14:16:33 +0100 Subject: resource: move kernel function inside __KERNEL__ It is an internal function. Move it inside __KERNEL__ ifdef, along with task_struct declaration. Then we get: --- /usr/include/linux/resource.h 2009-09-14 15:09:29.000000000 +0200 +++ usr/include/linux/resource.h 2010-01-04 11:30:54.000000000 +0100 @@ -3,8 +3,6 @@ #include -struct task_struct; - /* * Resource control/accounting header file for linux */ @@ -70,6 +68,5 @@ */ #include -int getrusage(struct task_struct *p, int who, struct rusage *ru); #endif *********** include/linux/Kbuild is untouched, since unifdef is run even on headers-y nowadays. Signed-off-by: Jiri Slaby --- include/linux/resource.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/resource.h b/include/linux/resource.h index 40fc7e626082..f1e914eefeab 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -3,8 +3,6 @@ #include -struct task_struct; - /* * Resource control/accounting header file for linux */ @@ -70,6 +68,12 @@ struct rlimit { */ #include +#ifdef __KERNEL__ + +struct task_struct; + int getrusage(struct task_struct *p, int who, struct rusage __user *ru); +#endif /* __KERNEL__ */ + #endif -- cgit v1.2.3 From 3e10e716abf3c71bdb5d86b8f507f9e72236c9cd Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 19 Nov 2009 17:16:37 +0100 Subject: resource: add helpers for fetching rlimits We want to be sure that compiler fetches the limit variable only once, so add helpers for fetching current and maximal resource limits which do that. Add them to sched.h (instead of resource.h) due to circular dependency sched.h->resource.h->task_struct Alternative would be to create a separate res_access.h or similar. Signed-off-by: Jiri Slaby Cc: James Morris Cc: Heiko Carstens Cc: Andrew Morton Cc: Ingo Molnar --- include/linux/sched.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f2f842db03ce..8d4991be9d53 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2601,6 +2601,28 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ +static inline unsigned long task_rlimit(const struct task_struct *tsk, + unsigned int limit) +{ + return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur); +} + +static inline unsigned long task_rlimit_max(const struct task_struct *tsk, + unsigned int limit) +{ + return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max); +} + +static inline unsigned long rlimit(unsigned int limit) +{ + return task_rlimit(current, limit); +} + +static inline unsigned long rlimit_max(unsigned int limit) +{ + return task_rlimit_max(current, limit); +} + #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 1ae861e652b5457e7fa98ccbc55abea1e207916e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 31 Dec 2009 12:15:54 +0100 Subject: PCI/PM: Use per-device D3 delays It turns out that some PCI devices require extra delays when changing power state from D3 to D0 (and the other way around). Although this is against the PCI specification, we can handle it quite easily by allowing drivers to define arbitrary D3 delays for devices known to require extra time for switching power states. Introduce additional field d3_delay in struct pci_dev and use it to store the value of the device's D0->D3 delay, in miliseconds. Make the PCI PM core code use the per-device d3_delay unless pci_pm_d3_delay is greater (in which case the latter is used). [This also allows the driver to specify d3_delay shorter than the 10 ms required by the PCI standard if the device is known to be able to handle that.] Make the sky2 driver set d3_delay to 150 for devices handled by it. Fixes http://bugzilla.kernel.org/show_bug.cgi?id=14730 which is a listed regression from 2.6.30. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/net/sky2.c | 1 + drivers/pci/pci.c | 19 +++++++++++++++---- include/linux/pci.h | 1 + 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 1c01b96c9611..2d28d58200d0 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -4684,6 +4684,7 @@ static int __devinit sky2_probe(struct pci_dev *pdev, INIT_WORK(&hw->restart_work, sky2_restart); pci_set_drvdata(pdev, hw); + pdev->d3_delay = 150; return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 0906599ebfde..315fea47e784 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -29,7 +29,17 @@ const char *pci_power_names[] = { }; EXPORT_SYMBOL_GPL(pci_power_names); -unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT; +unsigned int pci_pm_d3_delay; + +static void pci_dev_d3_sleep(struct pci_dev *dev) +{ + unsigned int delay = dev->d3_delay; + + if (delay < pci_pm_d3_delay) + delay = pci_pm_d3_delay; + + msleep(delay); +} #ifdef CONFIG_PCI_DOMAINS int pci_domains_supported = 1; @@ -522,7 +532,7 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) /* Mandatory power management transition delays */ /* see PCI PM 1.1 5.6.1 table 18 */ if (state == PCI_D3hot || dev->current_state == PCI_D3hot) - msleep(pci_pm_d3_delay); + pci_dev_d3_sleep(dev); else if (state == PCI_D2 || dev->current_state == PCI_D2) udelay(PCI_PM_D2_DELAY); @@ -1409,6 +1419,7 @@ void pci_pm_init(struct pci_dev *dev) } dev->pm_cap = pm; + dev->d3_delay = PCI_PM_D3_WAIT; dev->d1_support = false; dev->d2_support = false; @@ -2247,12 +2258,12 @@ static int pci_pm_reset(struct pci_dev *dev, int probe) csr &= ~PCI_PM_CTRL_STATE_MASK; csr |= PCI_D3hot; pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr); - msleep(pci_pm_d3_delay); + pci_dev_d3_sleep(dev); csr &= ~PCI_PM_CTRL_STATE_MASK; csr |= PCI_D0; pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr); - msleep(pci_pm_d3_delay); + pci_dev_d3_sleep(dev); return 0; } diff --git a/include/linux/pci.h b/include/linux/pci.h index 5da0690d9cee..174e5392e51e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -243,6 +243,7 @@ struct pci_dev { unsigned int d2_support:1; /* Low power state D2 is supported */ unsigned int no_d1d2:1; /* Only allow D0 and D3 */ unsigned int wakeup_prepared:1; + unsigned int d3_delay; /* D3->D0 transition time in ms */ #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state. */ -- cgit v1.2.3 From cfe79c00a2f4f687eed8b7534d1d3d3d35540c29 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 6 Jan 2010 17:23:23 +0000 Subject: NOMMU: Avoiding duplicate icache flushes of shared maps When working with FDPIC, there are many shared mappings of read-only code regions between applications (the C library, applet packages like busybox, etc.), but the current do_mmap_pgoff() function will issue an icache flush whenever a VMA is added to an MM instead of only doing it when the map is initially created. The flush can instead be done when a region is first mmapped PROT_EXEC. Note that we may not rely on the first mapping of a region being executable - it's possible for it to be PROT_READ only, so we have to remember whether we've flushed the region or not, and then flush the entire region when a bit of it is made executable. However, this also affects the brk area. That will no longer be executable. We can mprotect() it to PROT_EXEC on MPU-mode kernels, but for NOMMU mode kernels, when it increases the brk allocation, making sys_brk() flush the extra from the icache should suffice. The brk area probably isn't used by NOMMU programs since the brk area can only use up the leavings from the stack allocation, where the stack allocation is larger than requested. Signed-off-by: David Howells Signed-off-by: Mike Frysinger Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 ++ mm/nommu.c | 11 ++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 84a524afb3dc..84d020bed083 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -123,6 +123,8 @@ struct vm_region { struct file *vm_file; /* the backing file or NULL */ atomic_t vm_usage; /* region usage count */ + bool vm_icache_flushed : 1; /* true if the icache has been flushed for + * this region */ }; /* diff --git a/mm/nommu.c b/mm/nommu.c index 6f9248f89bde..a8d17521624a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -432,6 +432,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) /* * Ok, looks good - let it rip. */ + flush_icache_range(mm->brk, brk); return mm->brk = brk; } @@ -1353,10 +1354,14 @@ unsigned long do_mmap_pgoff(struct file *file, share: add_vma_to_mm(current->mm, vma); - up_write(&nommu_region_sem); + /* we flush the region from the icache only when the first executable + * mapping of it is made */ + if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { + flush_icache_range(region->vm_start, region->vm_end); + region->vm_icache_flushed = true; + } - if (prot & PROT_EXEC) - flush_icache_range(result, result + len); + up_write(&nommu_region_sem); kleave(" = %lx", result); return result; -- cgit v1.2.3 From 6144a85a0e018c19bc4b24f7eb6c1f3f7431813d Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 7 Jan 2010 11:58:36 -0600 Subject: maccess,probe_kernel: Allow arch specific override probe_kernel_(read|write) Some archs such as blackfin, would like to have an arch specific probe_kernel_read() and probe_kernel_write() implementation which can fall back to the generic implementation if no special operations are needed. CC: Thomas Gleixner CC: Ingo Molnar Signed-off-by: Jason Wessel Signed-off-by: Mike Frysinger --- include/linux/uaccess.h | 4 +++- mm/maccess.c | 11 +++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 6b58367d145e..d512d98dfb7d 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -94,6 +94,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, * happens, handle that and return -EFAULT. */ extern long probe_kernel_read(void *dst, void *src, size_t size); +extern long __probe_kernel_read(void *dst, void *src, size_t size); /* * probe_kernel_write(): safely attempt to write to a location @@ -104,6 +105,7 @@ extern long probe_kernel_read(void *dst, void *src, size_t size); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -extern long probe_kernel_write(void *dst, void *src, size_t size); +extern long notrace probe_kernel_write(void *dst, void *src, size_t size); +extern long notrace __probe_kernel_write(void *dst, void *src, size_t size); #endif /* __LINUX_UACCESS_H__ */ diff --git a/mm/maccess.c b/mm/maccess.c index 9073695ff25f..4e348dbaecd7 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -14,7 +14,11 @@ * Safely read from address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ -long probe_kernel_read(void *dst, void *src, size_t size) + +long __weak probe_kernel_read(void *dst, void *src, size_t size) + __attribute__((alias("__probe_kernel_read"))); + +long __probe_kernel_read(void *dst, void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); @@ -39,7 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -long notrace __weak probe_kernel_write(void *dst, void *src, size_t size) +long __weak probe_kernel_write(void *dst, void *src, size_t size) + __attribute__((alias("__probe_kernel_write"))); + +long __probe_kernel_write(void *dst, void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); -- cgit v1.2.3 From b11e1eca7ed9c0b5dab21a62c11acc711d9bdda0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 7 Jan 2010 11:58:37 -0600 Subject: kgdb: Fix kernel-doc format error in kgdb.h linux-next-20081022//include/linux/kgdb.h:308): duplicate section name 'Description' and fix typos in that file's kernel-doc comments. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Jason Wessel --- include/linux/kgdb.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 6adcc297e354..19ec41a183f5 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -29,8 +29,7 @@ struct pt_regs; * * On some architectures it is required to skip a breakpoint * exception when it occurs after a breakpoint has been removed. - * This can be implemented in the architecture specific portion of - * for kgdb. + * This can be implemented in the architecture specific portion of kgdb. */ extern int kgdb_skipexception(int exception, struct pt_regs *regs); @@ -65,7 +64,7 @@ struct uart_port; /** * kgdb_breakpoint - compiled in breakpoint * - * This will be impelmented a static inline per architecture. This + * This will be implemented as a static inline per architecture. This * function is called by the kgdb core to execute an architecture * specific trap to cause kgdb to enter the exception processing. * @@ -190,7 +189,7 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code, * @flags: Current IRQ state * * On SMP systems, we need to get the attention of the other CPUs - * and get them be in a known state. This should do what is needed + * and get them into a known state. This should do what is needed * to get the other CPUs to call kgdb_wait(). Note that on some arches, * the NMI approach is not used for rounding up all the CPUs. For example, * in case of MIPS, smp_call_function() is used to roundup CPUs. In -- cgit v1.2.3 From 4b529401c5089cf33f7165607cbc2fde43357bfb Mon Sep 17 00:00:00 2001 From: Andreas Fenkart Date: Fri, 8 Jan 2010 14:42:31 -0800 Subject: mm: make totalhigh_pages unsigned long Makes it consistent with the extern declaration, used when CONFIG_HIGHMEM is set Removes redundant casts in printout messages Signed-off-by: Andreas Fenkart Acked-by: Russell King Cc: Ralf Baechle Cc: David Howells Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Chen Liqin Cc: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/init.c | 2 +- arch/mips/mm/init.c | 2 +- arch/mips/sgi-ip27/ip27-memory.c | 2 +- arch/mn10300/mm/init.c | 3 +-- arch/score/mm/init.c | 2 +- arch/x86/mm/init_32.c | 3 +-- include/linux/highmem.h | 2 +- 7 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 52c40d155672..a04ffbbbe253 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -616,7 +616,7 @@ void __init mem_init(void) "%dK data, %dK init, %luK highmem)\n", nr_free_pages() << (PAGE_SHIFT-10), codesize >> 10, datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + totalhigh_pages << (PAGE_SHIFT-10)); if (PAGE_SIZE >= 16384 && num_physpages <= 128) { extern int sysctl_overcommit_memory; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 9e8d00389eef..1651942f7feb 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -424,7 +424,7 @@ void __init mem_init(void) reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + totalhigh_pages << (PAGE_SHIFT-10)); } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index f61c164d1e67..bc1297109cc5 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -505,5 +505,5 @@ void __init mem_init(void) (num_physpages - tmp) << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + totalhigh_pages << (PAGE_SHIFT-10)); } diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c index ec1420562dc7..dd27a9a35152 100644 --- a/arch/mn10300/mm/init.c +++ b/arch/mn10300/mm/init.c @@ -118,8 +118,7 @@ void __init mem_init(void) reservedpages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT - 10)) - ); + totalhigh_pages << (PAGE_SHIFT - 10)); } /* diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c index 8c15b2c85d5a..dfaf458d6702 100644 --- a/arch/score/mm/init.c +++ b/arch/score/mm/init.c @@ -106,7 +106,7 @@ void __init mem_init(void) ram << (PAGE_SHIFT-10), codesize >> 10, reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + totalhigh_pages << (PAGE_SHIFT-10)); } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c973f8e2a6cf..9a0c258a86be 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -892,8 +892,7 @@ void __init mem_init(void) reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) - ); + totalhigh_pages << (PAGE_SHIFT-10)); printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 211ff4497269..ab2cc20e21a5 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -46,7 +46,7 @@ void kmap_flush_unused(void); static inline unsigned int nr_free_highpages(void) { return 0; } -#define totalhigh_pages 0 +#define totalhigh_pages 0UL #ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) -- cgit v1.2.3 From e992cd9b72a18122bd5c958715623057f110793f Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 8 Jan 2010 14:42:35 -0800 Subject: kmemcheck: make bitfield annotations truly no-ops when disabled It turns out that even zero-sized struct members (int foo[0];) will affect the struct layout, causing us in particular to lose 4 bytes in struct sock. This patch fixes the regression in CONFIG_KMEMCHECK=n case. Reported-by: Eric Dumazet Signed-off-by: Vegard Nossum Acked-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmemcheck.h | 110 ++++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index e880d4cf9e22..08d7dc4ddf40 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -36,6 +36,56 @@ int kmemcheck_hide_addr(unsigned long address); bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size); +/* + * Bitfield annotations + * + * How to use: If you have a struct using bitfields, for example + * + * struct a { + * int x:8, y:8; + * }; + * + * then this should be rewritten as + * + * struct a { + * kmemcheck_bitfield_begin(flags); + * int x:8, y:8; + * kmemcheck_bitfield_end(flags); + * }; + * + * Now the "flags_begin" and "flags_end" members may be used to refer to the + * beginning and end, respectively, of the bitfield (and things like + * &x.flags_begin is allowed). As soon as the struct is allocated, the bit- + * fields should be annotated: + * + * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL); + * kmemcheck_annotate_bitfield(a, flags); + */ +#define kmemcheck_bitfield_begin(name) \ + int name##_begin[0]; + +#define kmemcheck_bitfield_end(name) \ + int name##_end[0]; + +#define kmemcheck_annotate_bitfield(ptr, name) \ + do { \ + int _n; \ + \ + if (!ptr) \ + break; \ + \ + _n = (long) &((ptr)->name##_end) \ + - (long) &((ptr)->name##_begin); \ + MAYBE_BUILD_BUG_ON(_n < 0); \ + \ + kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ + } while (0) + +#define kmemcheck_annotate_variable(var) \ + do { \ + kmemcheck_mark_initialized(&(var), sizeof(var)); \ + } while (0) \ + #else #define kmemcheck_enabled 0 @@ -106,60 +156,16 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) return true; } -#endif /* CONFIG_KMEMCHECK */ - -/* - * Bitfield annotations - * - * How to use: If you have a struct using bitfields, for example - * - * struct a { - * int x:8, y:8; - * }; - * - * then this should be rewritten as - * - * struct a { - * kmemcheck_bitfield_begin(flags); - * int x:8, y:8; - * kmemcheck_bitfield_end(flags); - * }; - * - * Now the "flags_begin" and "flags_end" members may be used to refer to the - * beginning and end, respectively, of the bitfield (and things like - * &x.flags_begin is allowed). As soon as the struct is allocated, the bit- - * fields should be annotated: - * - * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL); - * kmemcheck_annotate_bitfield(a, flags); - * - * Note: We provide the same definitions for both kmemcheck and non- - * kmemcheck kernels. This makes it harder to introduce accidental errors. It - * is also allowed to pass NULL pointers to kmemcheck_annotate_bitfield(). - */ -#define kmemcheck_bitfield_begin(name) \ - int name##_begin[0]; - -#define kmemcheck_bitfield_end(name) \ - int name##_end[0]; +#define kmemcheck_bitfield_begin(name) +#define kmemcheck_bitfield_end(name) +#define kmemcheck_annotate_bitfield(ptr, name) \ + do { \ + } while (0) -#define kmemcheck_annotate_bitfield(ptr, name) \ - do { \ - int _n; \ - \ - if (!ptr) \ - break; \ - \ - _n = (long) &((ptr)->name##_end) \ - - (long) &((ptr)->name##_begin); \ - MAYBE_BUILD_BUG_ON(_n < 0); \ - \ - kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ +#define kmemcheck_annotate_variable(var) \ + do { \ } while (0) -#define kmemcheck_annotate_variable(var) \ - do { \ - kmemcheck_mark_initialized(&(var), sizeof(var)); \ - } while (0) \ +#endif /* CONFIG_KMEMCHECK */ #endif /* LINUX_KMEMCHECK_H */ -- cgit v1.2.3 From 7dd65feb6c603e13eba501c34c662259ab38e70e Mon Sep 17 00:00:00 2001 From: Albin Tonnerre Date: Fri, 8 Jan 2010 14:42:42 -0800 Subject: lib: add support for LZO-compressed kernels This patch series adds generic support for creating and extracting LZO-compressed kernel images, as well as support for using such images on the x86 and ARM architectures, and support for creating and using LZO-compressed initrd and initramfs images. Russell King said: : Testing on a Cortex A9 model: : - lzo decompressor is 65% of the time gzip takes to decompress a kernel : - lzo kernel is 9% larger than a gzip kernel : : which I'm happy to say confirms your figures when comparing the two. : : However, when comparing your new gzip code to the old gzip code: : - new is 99% of the size of the old code : - new takes 42% of the time to decompress than the old code : : What this means is that for a proper comparison, the results get even better: : - lzo is 7.5% larger than the old gzip'd kernel image : - lzo takes 28% of the time that the old gzip code took : : So the expense seems definitely worth the effort. The only reason I : can think of ever using gzip would be if you needed the additional : compression (eg, because you have limited flash to store the image.) : : I would argue that the default for ARM should therefore be LZO. This patch: The lzo compressor is worse than gzip at compression, but faster at extraction. Here are some figures for an ARM board I'm working on: Uncompressed size: 3.24Mo gzip 1.61Mo 0.72s lzo 1.75Mo 0.48s So for a compression ratio that is still relatively close to gzip, it's much faster to extract, at least in that case. This part contains: - Makefile routine to support lzo compression - Fixes to the existing lzo compressor so that it can be used in compressed kernels - wrapper around the existing lzo1x_decompress, as it only extracts one block at a time, while we need to extract a whole file here - config dialog for kernel compression [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: cleanup] Signed-off-by: Albin Tonnerre Tested-by: Wu Zhangjin Acked-by: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Tested-by: Russell King Acked-by: Russell King Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/decompress/unlzo.h | 10 ++ init/Kconfig | 18 +++- lib/decompress_unlzo.c | 209 +++++++++++++++++++++++++++++++++++++++ lib/lzo/lzo1x_decompress.c | 9 +- scripts/Makefile.lib | 5 + 5 files changed, 244 insertions(+), 7 deletions(-) create mode 100644 include/linux/decompress/unlzo.h create mode 100644 lib/decompress_unlzo.c (limited to 'include/linux') diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h new file mode 100644 index 000000000000..987229752519 --- /dev/null +++ b/include/linux/decompress/unlzo.h @@ -0,0 +1,10 @@ +#ifndef DECOMPRESS_UNLZO_H +#define DECOMPRESS_UNLZO_H + +int unlzo(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error)(char *x)); +#endif diff --git a/init/Kconfig b/init/Kconfig index a23da9f01803..d95ca7cd5d45 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -115,10 +115,13 @@ config HAVE_KERNEL_BZIP2 config HAVE_KERNEL_LZMA bool +config HAVE_KERNEL_LZO + bool + choice prompt "Kernel compression mode" default KERNEL_GZIP - depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA + depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_LZO help The linux kernel is a kind of self-extracting executable. Several compression algorithms are available, which differ @@ -141,9 +144,8 @@ config KERNEL_GZIP bool "Gzip" depends on HAVE_KERNEL_GZIP help - The old and tried gzip compression. Its compression ratio is - the poorest among the 3 choices; however its speed (both - compression and decompression) is the fastest. + The old and tried gzip compression. It provides a good balance + between compression ratio and decompression speed. config KERNEL_BZIP2 bool "Bzip2" @@ -164,6 +166,14 @@ config KERNEL_LZMA two. Compression is slowest. The kernel size is about 33% smaller with LZMA in comparison to gzip. +config KERNEL_LZO + bool "LZO" + depends on HAVE_KERNEL_LZO + help + Its compression ratio is the poorest among the 4. The kernel + size is about about 10% bigger than gzip; however its speed + (both compression and decompression) is the fastest. + endchoice config SWAP diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c new file mode 100644 index 000000000000..db521f45626e --- /dev/null +++ b/lib/decompress_unlzo.c @@ -0,0 +1,209 @@ +/* + * LZO decompressor for the Linux kernel. Code borrowed from the lzo + * implementation by Markus Franz Xaver Johannes Oberhumer. + * + * Linux kernel adaptation: + * Copyright (C) 2009 + * Albin Tonnerre, Free Electrons + * + * Original code: + * Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer + * All Rights Reserved. + * + * lzop and the LZO library are free software; you can redistribute them + * and/or modify them under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Markus F.X.J. Oberhumer + * + * http://www.oberhumer.com/opensource/lzop/ + */ + +#ifdef STATIC +#include "lzo/lzo1x_decompress.c" +#else +#include +#include +#endif + +#include +#include +#include + +#include +#include + +static const unsigned char lzop_magic[] = { + 0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a }; + +#define LZO_BLOCK_SIZE (256*1024l) +#define HEADER_HAS_FILTER 0x00000800L + +STATIC inline int INIT parse_header(u8 *input, u8 *skip) +{ + int l; + u8 *parse = input; + u8 level = 0; + u16 version; + + /* read magic: 9 first bits */ + for (l = 0; l < 9; l++) { + if (*parse++ != lzop_magic[l]) + return 0; + } + /* get version (2bytes), skip library version (2), + * 'need to be extracted' version (2) and + * method (1) */ + version = get_unaligned_be16(parse); + parse += 7; + if (version >= 0x0940) + level = *parse++; + if (get_unaligned_be32(parse) & HEADER_HAS_FILTER) + parse += 8; /* flags + filter info */ + else + parse += 4; /* flags */ + + /* skip mode and mtime_low */ + parse += 8; + if (version >= 0x0940) + parse += 4; /* skip mtime_high */ + + l = *parse++; + /* don't care about the file name, and skip checksum */ + parse += l + 4; + + *skip = parse - input; + return 1; +} + +STATIC inline int INIT unlzo(u8 *input, int in_len, + int (*fill) (void *, unsigned int), + int (*flush) (void *, unsigned int), + u8 *output, int *posp, + void (*error_fn) (char *x)) +{ + u8 skip = 0, r = 0; + u32 src_len, dst_len; + size_t tmp; + u8 *in_buf, *in_buf_save, *out_buf; + int obytes_processed = 0; + + set_error_fn(error_fn); + + if (output) { + out_buf = output; + } else if (!flush) { + error("NULL output pointer and no flush function provided"); + goto exit; + } else { + out_buf = malloc(LZO_BLOCK_SIZE); + if (!out_buf) { + error("Could not allocate output buffer"); + goto exit; + } + } + + if (input && fill) { + error("Both input pointer and fill function provided, don't know what to do"); + goto exit_1; + } else if (input) { + in_buf = input; + } else if (!fill || !posp) { + error("NULL input pointer and missing position pointer or fill function"); + goto exit_1; + } else { + in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE)); + if (!in_buf) { + error("Could not allocate input buffer"); + goto exit_1; + } + } + in_buf_save = in_buf; + + if (posp) + *posp = 0; + + if (fill) + fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE)); + + if (!parse_header(input, &skip)) { + error("invalid header"); + goto exit_2; + } + in_buf += skip; + + if (posp) + *posp = skip; + + for (;;) { + /* read uncompressed block size */ + dst_len = get_unaligned_be32(in_buf); + in_buf += 4; + + /* exit if last block */ + if (dst_len == 0) { + if (posp) + *posp += 4; + break; + } + + if (dst_len > LZO_BLOCK_SIZE) { + error("dest len longer than block size"); + goto exit_2; + } + + /* read compressed block size, and skip block checksum info */ + src_len = get_unaligned_be32(in_buf); + in_buf += 8; + + if (src_len <= 0 || src_len > dst_len) { + error("file corrupted"); + goto exit_2; + } + + /* decompress */ + tmp = dst_len; + r = lzo1x_decompress_safe((u8 *) in_buf, src_len, + out_buf, &tmp); + + if (r != LZO_E_OK || dst_len != tmp) { + error("Compressed data violation"); + goto exit_2; + } + + obytes_processed += dst_len; + if (flush) + flush(out_buf, dst_len); + if (output) + out_buf += dst_len; + if (posp) + *posp += src_len + 12; + if (fill) { + in_buf = in_buf_save; + fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE)); + } else + in_buf += src_len; + } + +exit_2: + if (!input) + free(in_buf); +exit_1: + if (!output) + free(out_buf); +exit: + return obytes_processed; +} + +#define decompress unlzo diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c index 5dc6b29c1575..f2fd09850223 100644 --- a/lib/lzo/lzo1x_decompress.c +++ b/lib/lzo/lzo1x_decompress.c @@ -11,11 +11,13 @@ * Richard Purdie */ +#ifndef STATIC #include #include -#include -#include +#endif + #include +#include #include "lzodefs.h" #define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) @@ -244,9 +246,10 @@ lookbehind_overrun: *out_len = op - out; return LZO_E_LOOKBEHIND_OVERRUN; } - +#ifndef STATIC EXPORT_SYMBOL_GPL(lzo1x_decompress_safe); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO1X Decompressor"); +#endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index cd815ac2a50b..0fe48cd91ffa 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -235,3 +235,8 @@ quiet_cmd_lzma = LZMA $@ cmd_lzma = (cat $(filter-out FORCE,$^) | \ lzma -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ (rm -f $@ ; false) + +quiet_cmd_lzo = LZO $@ +cmd_lzo = (cat $(filter-out FORCE,$^) | \ + lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) -- cgit v1.2.3 From 80884094e34456887ecdbd107d40e72c4a40f9c9 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Fri, 8 Jan 2010 14:43:08 -0800 Subject: gpio: adp5588-gpio: new driver for ADP5588 GPIO expanders Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Cc: Jean Delvare Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/Kconfig | 9 ++ drivers/gpio/Makefile | 1 + drivers/gpio/adp5588-gpio.c | 266 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/i2c/adp5588.h | 12 ++ 4 files changed, 288 insertions(+) create mode 100644 drivers/gpio/adp5588-gpio.c (limited to 'include/linux') diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index a019b49ecc9b..1f1d88ae68d6 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -172,6 +172,15 @@ config GPIO_ADP5520 To compile this driver as a module, choose M here: the module will be called adp5520-gpio. +config GPIO_ADP5588 + tristate "ADP5588 I2C GPIO expander" + depends on I2C + help + This option enables support for 18 GPIOs found + on Analog Devices ADP5588 GPIO Expanders. + To compile this driver as a module, choose M here: the module will be + called adp5588-gpio. + comment "PCI GPIO expanders:" config GPIO_CS5535 diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 52fe4cf734c7..48687238edb1 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -5,6 +5,7 @@ ccflags-$(CONFIG_DEBUG_GPIO) += -DDEBUG obj-$(CONFIG_GPIOLIB) += gpiolib.o obj-$(CONFIG_GPIO_ADP5520) += adp5520-gpio.o +obj-$(CONFIG_GPIO_ADP5588) += adp5588-gpio.o obj-$(CONFIG_GPIO_LANGWELL) += langwell_gpio.o obj-$(CONFIG_GPIO_MAX7301) += max7301.o obj-$(CONFIG_GPIO_MAX732X) += max732x.o diff --git a/drivers/gpio/adp5588-gpio.c b/drivers/gpio/adp5588-gpio.c new file mode 100644 index 000000000000..afc097a16b33 --- /dev/null +++ b/drivers/gpio/adp5588-gpio.c @@ -0,0 +1,266 @@ +/* + * GPIO Chip driver for Analog Devices + * ADP5588 I/O Expander and QWERTY Keypad Controller + * + * Copyright 2009 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#include +#include +#include +#include +#include + +#include + +#define DRV_NAME "adp5588-gpio" +#define MAXGPIO 18 +#define ADP_BANK(offs) ((offs) >> 3) +#define ADP_BIT(offs) (1u << ((offs) & 0x7)) + +struct adp5588_gpio { + struct i2c_client *client; + struct gpio_chip gpio_chip; + struct mutex lock; /* protect cached dir, dat_out */ + unsigned gpio_start; + uint8_t dat_out[3]; + uint8_t dir[3]; +}; + +static int adp5588_gpio_read(struct i2c_client *client, u8 reg) +{ + int ret = i2c_smbus_read_byte_data(client, reg); + + if (ret < 0) + dev_err(&client->dev, "Read Error\n"); + + return ret; +} + +static int adp5588_gpio_write(struct i2c_client *client, u8 reg, u8 val) +{ + int ret = i2c_smbus_write_byte_data(client, reg, val); + + if (ret < 0) + dev_err(&client->dev, "Write Error\n"); + + return ret; +} + +static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned off) +{ + struct adp5588_gpio *dev = + container_of(chip, struct adp5588_gpio, gpio_chip); + + return !!(adp5588_gpio_read(dev->client, GPIO_DAT_STAT1 + ADP_BANK(off)) + & ADP_BIT(off)); +} + +static void adp5588_gpio_set_value(struct gpio_chip *chip, + unsigned off, int val) +{ + unsigned bank, bit; + struct adp5588_gpio *dev = + container_of(chip, struct adp5588_gpio, gpio_chip); + + bank = ADP_BANK(off); + bit = ADP_BIT(off); + + mutex_lock(&dev->lock); + if (val) + dev->dat_out[bank] |= bit; + else + dev->dat_out[bank] &= ~bit; + + adp5588_gpio_write(dev->client, GPIO_DAT_OUT1 + bank, + dev->dat_out[bank]); + mutex_unlock(&dev->lock); +} + +static int adp5588_gpio_direction_input(struct gpio_chip *chip, unsigned off) +{ + int ret; + unsigned bank; + struct adp5588_gpio *dev = + container_of(chip, struct adp5588_gpio, gpio_chip); + + bank = ADP_BANK(off); + + mutex_lock(&dev->lock); + dev->dir[bank] &= ~ADP_BIT(off); + ret = adp5588_gpio_write(dev->client, GPIO_DIR1 + bank, dev->dir[bank]); + mutex_unlock(&dev->lock); + + return ret; +} + +static int adp5588_gpio_direction_output(struct gpio_chip *chip, + unsigned off, int val) +{ + int ret; + unsigned bank, bit; + struct adp5588_gpio *dev = + container_of(chip, struct adp5588_gpio, gpio_chip); + + bank = ADP_BANK(off); + bit = ADP_BIT(off); + + mutex_lock(&dev->lock); + dev->dir[bank] |= bit; + + if (val) + dev->dat_out[bank] |= bit; + else + dev->dat_out[bank] &= ~bit; + + ret = adp5588_gpio_write(dev->client, GPIO_DAT_OUT1 + bank, + dev->dat_out[bank]); + ret |= adp5588_gpio_write(dev->client, GPIO_DIR1 + bank, + dev->dir[bank]); + mutex_unlock(&dev->lock); + + return ret; +} + +static int __devinit adp5588_gpio_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct adp5588_gpio_platform_data *pdata = client->dev.platform_data; + struct adp5588_gpio *dev; + struct gpio_chip *gc; + int ret, i, revid; + + if (pdata == NULL) { + dev_err(&client->dev, "missing platform data\n"); + return -ENODEV; + } + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_BYTE_DATA)) { + dev_err(&client->dev, "SMBUS Byte Data not Supported\n"); + return -EIO; + } + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (dev == NULL) { + dev_err(&client->dev, "failed to alloc memory\n"); + return -ENOMEM; + } + + dev->client = client; + + gc = &dev->gpio_chip; + gc->direction_input = adp5588_gpio_direction_input; + gc->direction_output = adp5588_gpio_direction_output; + gc->get = adp5588_gpio_get_value; + gc->set = adp5588_gpio_set_value; + gc->can_sleep = 1; + + gc->base = pdata->gpio_start; + gc->ngpio = MAXGPIO; + gc->label = client->name; + gc->owner = THIS_MODULE; + + mutex_init(&dev->lock); + + + ret = adp5588_gpio_read(dev->client, DEV_ID); + if (ret < 0) + goto err; + + revid = ret & ADP5588_DEVICE_ID_MASK; + + for (i = 0, ret = 0; i <= ADP_BANK(MAXGPIO); i++) { + dev->dat_out[i] = adp5588_gpio_read(client, GPIO_DAT_OUT1 + i); + dev->dir[i] = adp5588_gpio_read(client, GPIO_DIR1 + i); + ret |= adp5588_gpio_write(client, KP_GPIO1 + i, 0); + ret |= adp5588_gpio_write(client, GPIO_PULL1 + i, + (pdata->pullup_dis_mask >> (8 * i)) & 0xFF); + + if (ret) + goto err; + } + + ret = gpiochip_add(&dev->gpio_chip); + if (ret) + goto err; + + dev_info(&client->dev, "gpios %d..%d on a %s Rev. %d\n", + gc->base, gc->base + gc->ngpio - 1, + client->name, revid); + + if (pdata->setup) { + ret = pdata->setup(client, gc->base, gc->ngpio, pdata->context); + if (ret < 0) + dev_warn(&client->dev, "setup failed, %d\n", ret); + } + + i2c_set_clientdata(client, dev); + return 0; + +err: + kfree(dev); + return ret; +} + +static int __devexit adp5588_gpio_remove(struct i2c_client *client) +{ + struct adp5588_gpio_platform_data *pdata = client->dev.platform_data; + struct adp5588_gpio *dev = i2c_get_clientdata(client); + int ret; + + if (pdata->teardown) { + ret = pdata->teardown(client, + dev->gpio_chip.base, dev->gpio_chip.ngpio, + pdata->context); + if (ret < 0) { + dev_err(&client->dev, "teardown failed %d\n", ret); + return ret; + } + } + + ret = gpiochip_remove(&dev->gpio_chip); + if (ret) { + dev_err(&client->dev, "gpiochip_remove failed %d\n", ret); + return ret; + } + + kfree(dev); + return 0; +} + +static const struct i2c_device_id adp5588_gpio_id[] = { + {DRV_NAME, 0}, + {} +}; + +MODULE_DEVICE_TABLE(i2c, adp5588_gpio_id); + +static struct i2c_driver adp5588_gpio_driver = { + .driver = { + .name = DRV_NAME, + }, + .probe = adp5588_gpio_probe, + .remove = __devexit_p(adp5588_gpio_remove), + .id_table = adp5588_gpio_id, +}; + +static int __init adp5588_gpio_init(void) +{ + return i2c_add_driver(&adp5588_gpio_driver); +} + +module_init(adp5588_gpio_init); + +static void __exit adp5588_gpio_exit(void) +{ + i2c_del_driver(&adp5588_gpio_driver); +} + +module_exit(adp5588_gpio_exit); + +MODULE_AUTHOR("Michael Hennerich "); +MODULE_DESCRIPTION("GPIO ADP5588 Driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h index fc5db826b48e..02c9af374741 100644 --- a/include/linux/i2c/adp5588.h +++ b/include/linux/i2c/adp5588.h @@ -89,4 +89,16 @@ struct adp5588_kpad_platform_data { unsigned short unlock_key2; /* Unlock Key 2 */ }; +struct adp5588_gpio_platform_data { + unsigned gpio_start; /* GPIO Chip base # */ + unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ + int (*setup)(struct i2c_client *client, + int gpio, unsigned ngpio, + void *context); + int (*teardown)(struct i2c_client *client, + int gpio, unsigned ngpio, + void *context); + void *context; +}; + #endif -- cgit v1.2.3 From a29815a333c6c6e677294bbe5958e771d0aad3fd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 10 Jan 2010 16:28:09 +0200 Subject: core, x86: make LIST_POISON less deadly The list macros use LIST_POISON1 and LIST_POISON2 as undereferencable pointers in order to trap erronous use of freed list_heads. Unfortunately userspace can arrange for those pointers to actually be dereferencable, potentially turning an oops to an expolit. To avoid this allow architectures (currently x86_64 only) to override the default values for these pointers with truly-undereferencable values. This is easy on x86_64 as the virtual address space is large and contains areas that cannot be mapped. Other 64-bit architectures will likely find similar unmapped ranges. [ingo: switch to 0xdead000000000000 as the unmapped area] [ingo: add comments, cleanup] [jaswinder: eliminate sparse warnings] Acked-by: Linus Torvalds Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar Signed-off-by: Avi Kivity Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 5 +++++ include/linux/poison.h | 16 ++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6bf1f1ac478c..cbcbfdee3ee0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1247,6 +1247,11 @@ config ARCH_MEMORY_PROBE def_bool X86_64 depends on MEMORY_HOTPLUG +config ILLEGAL_POINTER_VALUE + hex + default 0 if X86_32 + default 0xdead000000000000 if X86_64 + source "mm/Kconfig" config HIGHPTE diff --git a/include/linux/poison.h b/include/linux/poison.h index 7fc194aef8c2..2110a81c5e2a 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -2,13 +2,25 @@ #define _LINUX_POISON_H /********** include/linux/list.h **********/ + +/* + * Architectures might want to move the poison pointer offset + * into some well-recognized area such as 0xdead000000000000, + * that is also not mappable by user-space exploits: + */ +#ifdef CONFIG_ILLEGAL_POINTER_VALUE +# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL) +#else +# define POISON_POINTER_DELTA 0 +#endif + /* * These are non-NULL pointers that will result in page faults * under normal circumstances, used to verify that nobody uses * non-initialized list entries. */ -#define LIST_POISON1 ((void *) 0x00100100) -#define LIST_POISON2 ((void *) 0x00200200) +#define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA) +#define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA) /********** include/linux/timer.h **********/ /* -- cgit v1.2.3 From 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 11 Jan 2010 11:14:44 +0900 Subject: libata: retry link resume if necessary Interestingly, when SIDPR is used in ata_piix, writes to DET in SControl sometimes get ignored leading to detection failure. Update sata_link_resume() such that it reads back SControl after clearing DET and retry if it's not clear. Signed-off-by: Tejun Heo Reported-by: fengxiangjun Reported-by: Jim Faulkner Cc: stable@kernel.org Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 38 +++++++++++++++++++++++++++++++------- include/linux/libata.h | 3 +++ 2 files changed, 34 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 22ff51bdbc8a..6728328f3bea 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3790,21 +3790,45 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params, int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline) { + int tries = ATA_LINK_RESUME_TRIES; u32 scontrol, serror; int rc; if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol))) return rc; - scontrol = (scontrol & 0x0f0) | 0x300; + /* + * Writes to SControl sometimes get ignored under certain + * controllers (ata_piix SIDPR). Make sure DET actually is + * cleared. + */ + do { + scontrol = (scontrol & 0x0f0) | 0x300; + if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol))) + return rc; + /* + * Some PHYs react badly if SStatus is pounded + * immediately after resuming. Delay 200ms before + * debouncing. + */ + msleep(200); - if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol))) - return rc; + /* is SControl restored correctly? */ + if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol))) + return rc; + } while ((scontrol & 0xf0f) != 0x300 && --tries); - /* Some PHYs react badly if SStatus is pounded immediately - * after resuming. Delay 200ms before debouncing. - */ - msleep(200); + if ((scontrol & 0xf0f) != 0x300) { + ata_link_printk(link, KERN_ERR, + "failed to resume link (SControl %X)\n", + scontrol); + return 0; + } + + if (tries < ATA_LINK_RESUME_TRIES) + ata_link_printk(link, KERN_WARNING, + "link resume succeeded after %d retries\n", + ATA_LINK_RESUME_TRIES - tries); if ((rc = sata_link_debounce(link, params, deadline))) return rc; diff --git a/include/linux/libata.h b/include/linux/libata.h index 6a9c4ddd3d95..73112250862c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -354,6 +354,9 @@ enum { /* max tries if error condition is still set after ->error_handler */ ATA_EH_MAX_TRIES = 5, + /* sometimes resuming a link requires several retries */ + ATA_LINK_RESUME_TRIES = 5, + /* how hard are we gonna try to probe/recover devices */ ATA_PROBE_MAX_TRIES = 3, ATA_EH_DEV_TRIES = 3, -- cgit v1.2.3 From 2c761270d5520dd84ab0b4e47c24d99ff8503c38 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 12 Jan 2010 17:39:16 +1100 Subject: lib: Introduce generic list_sort function There are two copies of list_sort() in the tree already, one in the DRM code, another in ubifs. Now XFS needs this as well. Create a generic list_sort() function from the ubifs version and convert existing users to it so we don't end up with yet another copy in the tree. Signed-off-by: Dave Chinner Acked-by: Dave Airlie Acked-by: Artem Bityutskiy Signed-off-by: Linus Torvalds --- drivers/gpu/drm/drm_modes.c | 90 ++------------------------------------ fs/ubifs/gc.c | 96 +---------------------------------------- include/linux/list_sort.h | 11 +++++ lib/Makefile | 2 +- lib/list_sort.c | 102 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 119 insertions(+), 182 deletions(-) create mode 100644 include/linux/list_sort.h create mode 100644 lib/list_sort.c (limited to 'include/linux') diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 6d81a02463a3..76d63394c776 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1,9 +1,4 @@ /* - * The list_sort function is (presumably) licensed under the GPL (see the - * top level "COPYING" file for details). - * - * The remainder of this file is: - * * Copyright © 1997-2003 by The XFree86 Project, Inc. * Copyright © 2007 Dave Airlie * Copyright © 2007-2008 Intel Corporation @@ -36,6 +31,7 @@ */ #include +#include #include "drmP.h" #include "drm.h" #include "drm_crtc.h" @@ -855,6 +851,7 @@ EXPORT_SYMBOL(drm_mode_prune_invalid); /** * drm_mode_compare - compare modes for favorability + * @priv: unused * @lh_a: list_head for first mode * @lh_b: list_head for second mode * @@ -868,7 +865,7 @@ EXPORT_SYMBOL(drm_mode_prune_invalid); * Negative if @lh_a is better than @lh_b, zero if they're equivalent, or * positive if @lh_b is better than @lh_a. */ -static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b) +static int drm_mode_compare(void *priv, struct list_head *lh_a, struct list_head *lh_b) { struct drm_display_mode *a = list_entry(lh_a, struct drm_display_mode, head); struct drm_display_mode *b = list_entry(lh_b, struct drm_display_mode, head); @@ -885,85 +882,6 @@ static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b) return diff; } -/* FIXME: what we don't have a list sort function? */ -/* list sort from Mark J Roberts (mjr@znex.org) */ -void list_sort(struct list_head *head, - int (*cmp)(struct list_head *a, struct list_head *b)) -{ - struct list_head *p, *q, *e, *list, *tail, *oldhead; - int insize, nmerges, psize, qsize, i; - - list = head->next; - list_del(head); - insize = 1; - for (;;) { - p = oldhead = list; - list = tail = NULL; - nmerges = 0; - - while (p) { - nmerges++; - q = p; - psize = 0; - for (i = 0; i < insize; i++) { - psize++; - q = q->next == oldhead ? NULL : q->next; - if (!q) - break; - } - - qsize = insize; - while (psize > 0 || (qsize > 0 && q)) { - if (!psize) { - e = q; - q = q->next; - qsize--; - if (q == oldhead) - q = NULL; - } else if (!qsize || !q) { - e = p; - p = p->next; - psize--; - if (p == oldhead) - p = NULL; - } else if (cmp(p, q) <= 0) { - e = p; - p = p->next; - psize--; - if (p == oldhead) - p = NULL; - } else { - e = q; - q = q->next; - qsize--; - if (q == oldhead) - q = NULL; - } - if (tail) - tail->next = e; - else - list = e; - e->prev = tail; - tail = e; - } - p = q; - } - - tail->next = list; - list->prev = tail; - - if (nmerges <= 1) - break; - - insize *= 2; - } - - head->next = list; - head->prev = list->prev; - list->prev->next = head; - list->prev = head; -} - /** * drm_mode_sort - sort mode list * @mode_list: list to sort @@ -975,7 +893,7 @@ void list_sort(struct list_head *head, */ void drm_mode_sort(struct list_head *mode_list) { - list_sort(mode_list, drm_mode_compare); + list_sort(NULL, mode_list, drm_mode_compare); } EXPORT_SYMBOL(drm_mode_sort); diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 618c2701d3a7..e5a3d8e96bb7 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -54,6 +54,7 @@ */ #include +#include #include "ubifs.h" /* @@ -107,101 +108,6 @@ static int switch_gc_head(struct ubifs_info *c) return err; } -/** - * list_sort - sort a list. - * @priv: private data, passed to @cmp - * @head: the list to sort - * @cmp: the elements comparison function - * - * This function has been implemented by Mark J Roberts . It - * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted - * in ascending order. - * - * The comparison function @cmp is supposed to return a negative value if @a is - * than @b, and a positive value if @a is greater than @b. If @a and @b are - * equivalent, then it does not matter what this function returns. - */ -static void list_sort(void *priv, struct list_head *head, - int (*cmp)(void *priv, struct list_head *a, - struct list_head *b)) -{ - struct list_head *p, *q, *e, *list, *tail, *oldhead; - int insize, nmerges, psize, qsize, i; - - if (list_empty(head)) - return; - - list = head->next; - list_del(head); - insize = 1; - for (;;) { - p = oldhead = list; - list = tail = NULL; - nmerges = 0; - - while (p) { - nmerges++; - q = p; - psize = 0; - for (i = 0; i < insize; i++) { - psize++; - q = q->next == oldhead ? NULL : q->next; - if (!q) - break; - } - - qsize = insize; - while (psize > 0 || (qsize > 0 && q)) { - if (!psize) { - e = q; - q = q->next; - qsize--; - if (q == oldhead) - q = NULL; - } else if (!qsize || !q) { - e = p; - p = p->next; - psize--; - if (p == oldhead) - p = NULL; - } else if (cmp(priv, p, q) <= 0) { - e = p; - p = p->next; - psize--; - if (p == oldhead) - p = NULL; - } else { - e = q; - q = q->next; - qsize--; - if (q == oldhead) - q = NULL; - } - if (tail) - tail->next = e; - else - list = e; - e->prev = tail; - tail = e; - } - p = q; - } - - tail->next = list; - list->prev = tail; - - if (nmerges <= 1) - break; - - insize *= 2; - } - - head->next = list; - head->prev = list->prev; - list->prev->next = head; - list->prev = head; -} - /** * data_nodes_cmp - compare 2 data nodes. * @priv: UBIFS file-system description object diff --git a/include/linux/list_sort.h b/include/linux/list_sort.h new file mode 100644 index 000000000000..1a2df2efb771 --- /dev/null +++ b/include/linux/list_sort.h @@ -0,0 +1,11 @@ +#ifndef _LINUX_LIST_SORT_H +#define _LINUX_LIST_SORT_H + +#include + +struct list_head; + +void list_sort(void *priv, struct list_head *head, + int (*cmp)(void *priv, struct list_head *a, + struct list_head *b)); +#endif diff --git a/lib/Makefile b/lib/Makefile index 911b25aed1e7..3b0b4a696db9 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - string_helpers.o gcd.o + string_helpers.o gcd.o list_sort.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/list_sort.c b/lib/list_sort.c new file mode 100644 index 000000000000..19d11e0bb958 --- /dev/null +++ b/lib/list_sort.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include + +/** + * list_sort - sort a list. + * @priv: private data, passed to @cmp + * @head: the list to sort + * @cmp: the elements comparison function + * + * This function has been implemented by Mark J Roberts . It + * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted + * in ascending order. + * + * The comparison function @cmp is supposed to return a negative value if @a is + * less than @b, and a positive value if @a is greater than @b. If @a and @b + * are equivalent, then it does not matter what this function returns. + */ +void list_sort(void *priv, struct list_head *head, + int (*cmp)(void *priv, struct list_head *a, + struct list_head *b)) +{ + struct list_head *p, *q, *e, *list, *tail, *oldhead; + int insize, nmerges, psize, qsize, i; + + if (list_empty(head)) + return; + + list = head->next; + list_del(head); + insize = 1; + for (;;) { + p = oldhead = list; + list = tail = NULL; + nmerges = 0; + + while (p) { + nmerges++; + q = p; + psize = 0; + for (i = 0; i < insize; i++) { + psize++; + q = q->next == oldhead ? NULL : q->next; + if (!q) + break; + } + + qsize = insize; + while (psize > 0 || (qsize > 0 && q)) { + if (!psize) { + e = q; + q = q->next; + qsize--; + if (q == oldhead) + q = NULL; + } else if (!qsize || !q) { + e = p; + p = p->next; + psize--; + if (p == oldhead) + p = NULL; + } else if (cmp(priv, p, q) <= 0) { + e = p; + p = p->next; + psize--; + if (p == oldhead) + p = NULL; + } else { + e = q; + q = q->next; + qsize--; + if (q == oldhead) + q = NULL; + } + if (tail) + tail->next = e; + else + list = e; + e->prev = tail; + tail = e; + } + p = q; + } + + tail->next = list; + list->prev = tail; + + if (nmerges <= 1) + break; + + insize *= 2; + } + + head->next = list; + head->prev = list->prev; + list->prev->next = head; + list->prev = head; +} + +EXPORT_SYMBOL(list_sort); -- cgit v1.2.3