From 8e552c36d90c03d2cabf5373788998966751b609 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Mon, 12 May 2008 21:46:29 -0400 Subject: power_supply: add CHARGE_COUNTER property and olpc_battery support for it This adds PROP_CHARGE_COUNTER to the power supply class (documenting it as well). The OLPC battery driver uses this for spitting out its ACR values (in uAh). We have some rounding errors (the data sheet claims 416.7, the math actually works out to 416.666667, so we're forced to choose between overflows or precision loss. I chose precision loss, and stuck w/ data sheet values), but I don't think anyone will care that much. Signed-off-by: Andres Salomon Signed-off-by: Anton Vorontsov --- include/linux/power_supply.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 68ed19ccf1f7..ea96ead1d39d 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -78,6 +78,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_CHARGE_EMPTY, POWER_SUPPLY_PROP_CHARGE_NOW, POWER_SUPPLY_PROP_CHARGE_AVG, + POWER_SUPPLY_PROP_CHARGE_COUNTER, POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN, POWER_SUPPLY_PROP_ENERGY_FULL, -- cgit v1.2.3 From d8e64406a037a64444175730294e449c9e21f5ec Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 23 Jul 2008 13:09:48 -0700 Subject: md: delay notification of 'active_idle' to the recovery thread sysfs_notify might sleep, so do not call it from md_safemode_timeout. Signed-off-by: Dan Williams --- drivers/md/md.c | 5 ++++- include/linux/raid/md_k.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/md/md.c b/drivers/md/md.c index c2ff77ccec50..0f1b83096425 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3483,7 +3483,7 @@ static void md_safemode_timeout(unsigned long data) if (!atomic_read(&mddev->writes_pending)) { mddev->safemode = 1; if (mddev->external) - sysfs_notify(&mddev->kobj, NULL, "array_state"); + set_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags); } md_wakeup_thread(mddev->thread); } @@ -6051,6 +6051,9 @@ void md_check_recovery(mddev_t *mddev) if (mddev->bitmap) bitmap_daemon_work(mddev->bitmap); + if (test_and_clear_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags)) + sysfs_notify(&mddev->kobj, NULL, "array_state"); + if (mddev->ro) return; diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 9f2549ac0e2d..c200b9a34aff 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -128,6 +128,7 @@ struct mddev_s #define MD_CHANGE_DEVS 0 /* Some device status has changed */ #define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ #define MD_CHANGE_PENDING 2 /* superblock update in progress */ +#define MD_NOTIFY_ARRAY_STATE 3 /* atomic context wants to notify userspace */ int ro; -- cgit v1.2.3 From 9403540c0653122ca34884a180439ddbfcbcb524 Mon Sep 17 00:00:00 2001 From: Barry Naujok Date: Wed, 21 May 2008 16:50:46 +1000 Subject: dcache: Add case-insensitive support d_ci_add() routine This add a dcache entry to the dcache for lookup, but changing the name that is associated with the entry rather than the one passed in to the lookup routine. First, it sees if the case-exact match already exists in the dcache and uses it if one exists. Otherwise, it allocates a new node with the new name and splices it into the dcache. Original code from ntfs_lookup in fs/ntfs/namei.c by Anton Altaparmakov. Signed-off-by: Barry Naujok Signed-off-by: Anton Altaparmakov Acked-by: Christoph Hellwig --- fs/dcache.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/dcache.h | 1 + 2 files changed, 103 insertions(+) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index f2584d22cb45..101663d15e9f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1220,6 +1220,107 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) return new; } +/** + * d_add_ci - lookup or allocate new dentry with case-exact name + * @inode: the inode case-insensitive lookup has found + * @dentry: the negative dentry that was passed to the parent's lookup func + * @name: the case-exact name to be associated with the returned dentry + * + * This is to avoid filling the dcache with case-insensitive names to the + * same inode, only the actual correct case is stored in the dcache for + * case-insensitive filesystems. + * + * For a case-insensitive lookup match and if the the case-exact dentry + * already exists in in the dcache, use it and return it. + * + * If no entry exists with the exact case name, allocate new dentry with + * the exact case, and return the spliced entry. + */ +struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry, + struct qstr *name) +{ + int error; + struct dentry *found; + struct dentry *new; + + /* Does a dentry matching the name exist already? */ + found = d_hash_and_lookup(dentry->d_parent, name); + /* If not, create it now and return */ + if (!found) { + new = d_alloc(dentry->d_parent, name); + if (!new) { + error = -ENOMEM; + goto err_out; + } + found = d_splice_alias(inode, new); + if (found) { + dput(new); + return found; + } + return new; + } + /* Matching dentry exists, check if it is negative. */ + if (found->d_inode) { + if (unlikely(found->d_inode != inode)) { + /* This can't happen because bad inodes are unhashed. */ + BUG_ON(!is_bad_inode(inode)); + BUG_ON(!is_bad_inode(found->d_inode)); + } + /* + * Already have the inode and the dentry attached, decrement + * the reference count to balance the iget() done + * earlier on. We found the dentry using d_lookup() so it + * cannot be disconnected and thus we do not need to worry + * about any NFS/disconnectedness issues here. + */ + iput(inode); + return found; + } + /* + * Negative dentry: instantiate it unless the inode is a directory and + * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED), + * in which case d_move() that in place of the found dentry. + */ + if (!S_ISDIR(inode->i_mode)) { + /* Not a directory; everything is easy. */ + d_instantiate(found, inode); + return found; + } + spin_lock(&dcache_lock); + if (list_empty(&inode->i_dentry)) { + /* + * Directory without a 'disconnected' dentry; we need to do + * d_instantiate() by hand because it takes dcache_lock which + * we already hold. + */ + list_add(&found->d_alias, &inode->i_dentry); + found->d_inode = inode; + spin_unlock(&dcache_lock); + security_d_instantiate(found, inode); + return found; + } + /* + * Directory with a 'disconnected' dentry; get a reference to the + * 'disconnected' dentry. + */ + new = list_entry(inode->i_dentry.next, struct dentry, d_alias); + dget_locked(new); + spin_unlock(&dcache_lock); + /* Do security vodoo. */ + security_d_instantiate(found, inode); + /* Move new in place of found. */ + d_move(new, found); + /* Balance the iget() we did above. */ + iput(inode); + /* Throw away found. */ + dput(found); + /* Use new as the actual dentry. */ + return new; + +err_out: + iput(inode); + return ERR_PTR(error); +} /** * d_lookup - search for a dentry @@ -2254,6 +2355,7 @@ EXPORT_SYMBOL(d_path); EXPORT_SYMBOL(d_prune_aliases); EXPORT_SYMBOL(d_rehash); EXPORT_SYMBOL(d_splice_alias); +EXPORT_SYMBOL(d_add_ci); EXPORT_SYMBOL(d_validate); EXPORT_SYMBOL(dget_locked); EXPORT_SYMBOL(dput); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 98202c672fde..07aa198f19ed 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -230,6 +230,7 @@ extern void d_delete(struct dentry *); extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_anon(struct inode *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); +extern struct dentry * d_add_ci(struct inode *, struct dentry *, struct qstr *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); extern void shrink_dcache_for_umount(struct super_block *); -- cgit v1.2.3 From e930bffe95e1e886a1ede80726ea38df5838d067 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 25 Jul 2008 16:24:52 +0200 Subject: KVM: Synchronize guest physical memory map to host virtual memory map Synchronize changes to host virtual addresses which are part of a KVM memory slot to the KVM shadow mmu. This allows pte operations like swapping, page migration, and madvise() to transparently work with KVM. Signed-off-by: Andrea Arcangeli Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 100 +++++++++++++++++++++++++++++++++ arch/x86/kvm/paging_tmpl.h | 12 ++++ include/asm-x86/kvm_host.h | 6 ++ include/linux/kvm_host.h | 24 ++++++++ virt/kvm/kvm_main.c | 135 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 277 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2fa231923cf7..0bfe2bd305eb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -653,6 +653,84 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) account_shadowed(kvm, gfn); } +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) +{ + u64 *spte; + int need_tlb_flush = 0; + + while ((spte = rmap_next(kvm, rmapp, NULL))) { + BUG_ON(!(*spte & PT_PRESENT_MASK)); + rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); + rmap_remove(kvm, spte); + set_shadow_pte(spte, shadow_trap_nonpresent_pte); + need_tlb_flush = 1; + } + return need_tlb_flush; +} + +static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, + int (*handler)(struct kvm *kvm, unsigned long *rmapp)) +{ + int i; + int retval = 0; + + /* + * If mmap_sem isn't taken, we can look the memslots with only + * the mmu_lock by skipping over the slots with userspace_addr == 0. + */ + for (i = 0; i < kvm->nmemslots; i++) { + struct kvm_memory_slot *memslot = &kvm->memslots[i]; + unsigned long start = memslot->userspace_addr; + unsigned long end; + + /* mmu_lock protects userspace_addr */ + if (!start) + continue; + + end = start + (memslot->npages << PAGE_SHIFT); + if (hva >= start && hva < end) { + gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; + retval |= handler(kvm, &memslot->rmap[gfn_offset]); + retval |= handler(kvm, + &memslot->lpage_info[ + gfn_offset / + KVM_PAGES_PER_HPAGE].rmap_pde); + } + } + + return retval; +} + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); +} + +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) +{ + u64 *spte; + int young = 0; + + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + int _young; + u64 _spte = *spte; + BUG_ON(!(_spte & PT_PRESENT_MASK)); + _young = _spte & PT_ACCESSED_MASK; + if (_young) { + young = 1; + clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); + } + spte = rmap_next(kvm, rmapp, spte); + } + return young; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + return kvm_handle_hva(kvm, hva, kvm_age_rmapp); +} + #ifdef MMU_DEBUG static int is_empty_shadow_page(u64 *spt) { @@ -1203,6 +1281,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) int r; int largepage = 0; pfn_t pfn; + unsigned long mmu_seq; down_read(¤t->mm->mmap_sem); if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { @@ -1210,6 +1289,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) largepage = 1; } + mmu_seq = vcpu->kvm->mmu_notifier_seq; + /* implicit mb(), we'll read before PT lock is unlocked */ pfn = gfn_to_pfn(vcpu->kvm, gfn); up_read(¤t->mm->mmap_sem); @@ -1220,6 +1301,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) } spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu, mmu_seq)) + goto out_unlock; kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, v, write, largepage, gfn, pfn, PT32E_ROOT_LEVEL); @@ -1227,6 +1310,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) return r; + +out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + return 0; } @@ -1345,6 +1433,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, int r; int largepage = 0; gfn_t gfn = gpa >> PAGE_SHIFT; + unsigned long mmu_seq; ASSERT(vcpu); ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); @@ -1358,6 +1447,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, gfn &= ~(KVM_PAGES_PER_HPAGE-1); largepage = 1; } + mmu_seq = vcpu->kvm->mmu_notifier_seq; + /* implicit mb(), we'll read before PT lock is unlocked */ pfn = gfn_to_pfn(vcpu->kvm, gfn); up_read(¤t->mm->mmap_sem); if (is_error_pfn(pfn)) { @@ -1365,12 +1456,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, return 1; } spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu, mmu_seq)) + goto out_unlock; kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); spin_unlock(&vcpu->kvm->mmu_lock); return r; + +out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + return 0; } static void nonpaging_free(struct kvm_vcpu *vcpu) @@ -1670,6 +1768,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn &= ~(KVM_PAGES_PER_HPAGE-1); vcpu->arch.update_pte.largepage = 1; } + vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; + /* implicit mb(), we'll read before PT lock is unlocked */ pfn = gfn_to_pfn(vcpu->kvm, gfn); up_read(¤t->mm->mmap_sem); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4d918220baeb..f72ac1fa35f0 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, pfn = vcpu->arch.update_pte.pfn; if (is_error_pfn(pfn)) return; + if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) + return; kvm_get_pfn(pfn); mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), @@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, int r; pfn_t pfn; int largepage = 0; + unsigned long mmu_seq; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); kvm_mmu_audit(vcpu, "pre page fault"); @@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, largepage = 1; } } + mmu_seq = vcpu->kvm->mmu_notifier_seq; + /* implicit mb(), we'll read before PT lock is unlocked */ pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); up_read(¤t->mm->mmap_sem); @@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, } spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu, mmu_seq)) + goto out_unlock; kvm_mmu_free_some_pages(vcpu); shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, largepage, &write_pt, pfn); @@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, spin_unlock(&vcpu->kvm->mmu_lock); return write_pt; + +out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + return 0; } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index bc34dc21f178..0f3c53114614 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -251,6 +252,7 @@ struct kvm_vcpu_arch { gfn_t gfn; /* presumed gfn during guest pte update */ pfn_t pfn; /* pfn corresponding to that gfn */ int largepage; + unsigned long mmu_seq; } update_pte; struct i387_fxsave_struct host_fx_image; @@ -729,4 +731,8 @@ asmlinkage void kvm_handle_fault_on_reboot(void); KVM_EX_ENTRY " 666b, 667b \n\t" \ ".popsection" +#define KVM_ARCH_WANT_MMU_NOTIFIER +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_age_hva(struct kvm *kvm, unsigned long hva); + #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 07d68a8ae8e9..8525afc53107 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -121,6 +121,12 @@ struct kvm { struct kvm_coalesced_mmio_dev *coalesced_mmio_dev; struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; #endif + +#ifdef KVM_ARCH_WANT_MMU_NOTIFIER + struct mmu_notifier mmu_notifier; + unsigned long mmu_notifier_seq; + long mmu_notifier_count; +#endif }; /* The guest did something we don't support. */ @@ -332,4 +338,22 @@ int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) #define kvm_trace_cleanup() ((void)0) #endif +#ifdef KVM_ARCH_WANT_MMU_NOTIFIER +static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq) +{ + if (unlikely(vcpu->kvm->mmu_notifier_count)) + return 1; + /* + * Both reads happen under the mmu_lock and both values are + * modified under mmu_lock, so there's no need of smb_rmb() + * here in between, otherwise mmu_notifier_count should be + * read before mmu_notifier_seq, see + * mmu_notifier_invalidate_range_end write side. + */ + if (vcpu->kvm->mmu_notifier_seq != mmu_seq) + return 1; + return 0; +} +#endif + #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3735212cd3f8..7dd9b0b85e4e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -192,6 +192,123 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) +{ + return container_of(mn, struct kvm, mmu_notifier); +} + +static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + int need_tlb_flush; + + /* + * When ->invalidate_page runs, the linux pte has been zapped + * already but the page is still allocated until + * ->invalidate_page returns. So if we increase the sequence + * here the kvm page fault will notice if the spte can't be + * established because the page is going to be freed. If + * instead the kvm page fault establishes the spte before + * ->invalidate_page runs, kvm_unmap_hva will release it + * before returning. + * + * The sequence increase only need to be seen at spin_unlock + * time, and not at spin_lock time. + * + * Increasing the sequence after the spin_unlock would be + * unsafe because the kvm page fault could then establish the + * pte after kvm_unmap_hva returned, without noticing the page + * is going to be freed. + */ + spin_lock(&kvm->mmu_lock); + kvm->mmu_notifier_seq++; + need_tlb_flush = kvm_unmap_hva(kvm, address); + spin_unlock(&kvm->mmu_lock); + + /* we've to flush the tlb before the pages can be freed */ + if (need_tlb_flush) + kvm_flush_remote_tlbs(kvm); + +} + +static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + int need_tlb_flush = 0; + + spin_lock(&kvm->mmu_lock); + /* + * The count increase must become visible at unlock time as no + * spte can be established without taking the mmu_lock and + * count is also read inside the mmu_lock critical section. + */ + kvm->mmu_notifier_count++; + for (; start < end; start += PAGE_SIZE) + need_tlb_flush |= kvm_unmap_hva(kvm, start); + spin_unlock(&kvm->mmu_lock); + + /* we've to flush the tlb before the pages can be freed */ + if (need_tlb_flush) + kvm_flush_remote_tlbs(kvm); +} + +static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + + spin_lock(&kvm->mmu_lock); + /* + * This sequence increase will notify the kvm page fault that + * the page that is going to be mapped in the spte could have + * been freed. + */ + kvm->mmu_notifier_seq++; + /* + * The above sequence increase must be visible before the + * below count decrease but both values are read by the kvm + * page fault under mmu_lock spinlock so we don't need to add + * a smb_wmb() here in between the two. + */ + kvm->mmu_notifier_count--; + spin_unlock(&kvm->mmu_lock); + + BUG_ON(kvm->mmu_notifier_count < 0); +} + +static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + int young; + + spin_lock(&kvm->mmu_lock); + young = kvm_age_hva(kvm, address); + spin_unlock(&kvm->mmu_lock); + + if (young) + kvm_flush_remote_tlbs(kvm); + + return young; +} + +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { + .invalidate_page = kvm_mmu_notifier_invalidate_page, + .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, + .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, + .clear_flush_young = kvm_mmu_notifier_clear_flush_young, +}; +#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ + static struct kvm *kvm_create_vm(void) { struct kvm *kvm = kvm_arch_create_vm(); @@ -212,6 +329,21 @@ static struct kvm *kvm_create_vm(void) (struct kvm_coalesced_mmio_ring *)page_address(page); #endif +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) + { + int err; + kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; + err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); + if (err) { +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + put_page(page); +#endif + kfree(kvm); + return ERR_PTR(err); + } + } +#endif + kvm->mm = current->mm; atomic_inc(&kvm->mm->mm_count); spin_lock_init(&kvm->mmu_lock); @@ -271,6 +403,9 @@ static void kvm_destroy_vm(struct kvm *kvm) #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET if (kvm->coalesced_mmio_ring != NULL) free_page((unsigned long)kvm->coalesced_mmio_ring); +#endif +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) + mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); #endif kvm_arch_destroy_vm(kvm); mmdrop(mm); -- cgit v1.2.3 From ed8486243379ef3e6c61363df915882945c0eaec Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 29 Jul 2008 11:30:57 +0300 Subject: KVM: Advertise synchronized mmu support to userspace Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 1 + include/linux/kvm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c7b01efe0646..0d682fc6aeb3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -883,6 +883,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: + case KVM_CAP_SYNC_MMU: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 0ea064cbfbc8..69511f74f912 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -371,6 +371,7 @@ struct kvm_trace_rec { #define KVM_CAP_PV_MMU 13 #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 +#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ /* * ioctls for VM fds -- cgit v1.2.3 From 8978b74253280d59e97cf49a3ec2c0cbccd5b801 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 29 Jul 2008 13:38:53 +0900 Subject: generic, x86: fix add iommu_num_pages helper function This IOMMU helper function doesn't work for some architectures: http://marc.info/?l=linux-kernel&m=121699304403202&w=2 It also breaks POWER and SPARC builds: http://marc.info/?l=linux-kernel&m=121730388001890&w=2 Currently, only x86 IOMMUs use this so let's move it to x86 for now. Reported-by: Stephen Rothwell Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 8 ++++++++ include/asm-x86/iommu.h | 2 ++ include/linux/iommu-helper.h | 1 - lib/iommu-helper.c | 8 -------- 4 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 8dbffb846de9..87d4d6964ec2 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void) pci_swiotlb_init(); } + +unsigned long iommu_num_pages(unsigned long addr, unsigned long len) +{ + unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); + + return size >> PAGE_SHIFT; +} +EXPORT_SYMBOL(iommu_num_pages); #endif /* diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index ecc8061904a9..5f888cc5be49 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -7,6 +7,8 @@ extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; +extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); + #ifdef CONFIG_GART_IOMMU extern int gart_iommu_aperture; extern int gart_iommu_aperture_allowed; diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index f8598f583944..c975caf75385 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -8,4 +8,3 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long align_mask); extern void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr); -extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index 889ddce2021e..a3b8d4c3f77a 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -80,11 +80,3 @@ void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr) } } EXPORT_SYMBOL(iommu_area_free); - -unsigned long iommu_num_pages(unsigned long addr, unsigned long len) -{ - unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); - - return size >> PAGE_SHIFT; -} -EXPORT_SYMBOL(iommu_num_pages); -- cgit v1.2.3 From 1795cf48b322b4d19230a40dbe7181acedd34a94 Mon Sep 17 00:00:00 2001 From: Adrian McMenamin Date: Tue, 29 Jul 2008 22:10:56 +0900 Subject: sh/maple: clean maple bus code This patch cleans up the handling of the maple bus queue to remove the risk of races when adding packets. It also removes references to the redundant connect and disconnect functions. Signed-off-by: Adrian McMenamin Signed-off-by: Paul Mundt --- drivers/sh/maple/maple.c | 265 ++++++++++++++++++++++++++++++++--------------- include/linux/maple.h | 6 +- 2 files changed, 189 insertions(+), 82 deletions(-) (limited to 'include/linux') diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c index 617efb1640b1..be97789fa5fd 100644 --- a/drivers/sh/maple/maple.c +++ b/drivers/sh/maple/maple.c @@ -24,13 +24,12 @@ #include #include #include +#include #include #include #include -#include -#include -#include -#include +#include +#include MODULE_AUTHOR("Yaegshi Takeshi, Paul Mundt, M.R. Brown, Adrian McMenamin"); MODULE_DESCRIPTION("Maple bus driver for Dreamcast"); @@ -46,14 +45,15 @@ static DECLARE_WORK(maple_vblank_process, maple_vblank_handler); static LIST_HEAD(maple_waitq); static LIST_HEAD(maple_sentq); -static DEFINE_MUTEX(maple_list_lock); +/* mutex to protect queue of waiting packets */ +static DEFINE_MUTEX(maple_wlist_lock); static struct maple_driver maple_dummy_driver; static struct device maple_bus; static int subdevice_map[MAPLE_PORTS]; static unsigned long *maple_sendbuf, *maple_sendptr, *maple_lastptr; static unsigned long maple_pnp_time; -static int started, scanning, liststatus, fullscan; +static int started, scanning, fullscan; static struct kmem_cache *maple_queue_cache; struct maple_device_specify { @@ -129,35 +129,124 @@ static void maple_release_device(struct device *dev) kfree(mdev); } -/** +/* * maple_add_packet - add a single instruction to the queue - * @mq: instruction to add to waiting queue + * @mdev - maple device + * @function - function on device being queried + * @command - maple command to add + * @length - length of command string (in 32 bit words) + * @data - remainder of command string */ -void maple_add_packet(struct mapleq *mq) +int maple_add_packet(struct maple_device *mdev, u32 function, u32 command, + size_t length, void *data) { - mutex_lock(&maple_list_lock); - list_add(&mq->list, &maple_waitq); - mutex_unlock(&maple_list_lock); + int locking, ret = 0; + void *sendbuf = NULL; + + mutex_lock(&maple_wlist_lock); + /* bounce if device already locked */ + locking = mutex_is_locked(&mdev->mq->mutex); + if (locking) { + ret = -EBUSY; + goto out; + } + + mutex_lock(&mdev->mq->mutex); + + if (length) { + sendbuf = kmalloc(length * 4, GFP_KERNEL); + if (!sendbuf) { + mutex_unlock(&mdev->mq->mutex); + ret = -ENOMEM; + goto out; + } + ((__be32 *)sendbuf)[0] = cpu_to_be32(function); + } + + mdev->mq->command = command; + mdev->mq->length = length; + if (length > 1) + memcpy(sendbuf + 4, data, (length - 1) * 4); + mdev->mq->sendbuf = sendbuf; + + list_add(&mdev->mq->list, &maple_waitq); +out: + mutex_unlock(&maple_wlist_lock); + return ret; } EXPORT_SYMBOL_GPL(maple_add_packet); +/* + * maple_add_packet_sleeps - add a single instruction to the queue + * - waits for lock to be free + * @mdev - maple device + * @function - function on device being queried + * @command - maple command to add + * @length - length of command string (in 32 bit words) + * @data - remainder of command string + */ +int maple_add_packet_sleeps(struct maple_device *mdev, u32 function, + u32 command, size_t length, void *data) +{ + int locking, ret = 0; + void *sendbuf = NULL; + + locking = mutex_lock_interruptible(&mdev->mq->mutex); + if (locking) { + ret = -EIO; + goto out; + } + + if (length) { + sendbuf = kmalloc(length * 4, GFP_KERNEL); + if (!sendbuf) { + mutex_unlock(&mdev->mq->mutex); + ret = -ENOMEM; + goto out; + } + ((__be32 *)sendbuf)[0] = cpu_to_be32(function); + } + + mdev->mq->command = command; + mdev->mq->length = length; + if (length > 1) + memcpy(sendbuf + 4, data, (length - 1) * 4); + mdev->mq->sendbuf = sendbuf; + + mutex_lock(&maple_wlist_lock); + list_add(&mdev->mq->list, &maple_waitq); + mutex_unlock(&maple_wlist_lock); +out: + return ret; +} +EXPORT_SYMBOL_GPL(maple_add_packet_sleeps); + static struct mapleq *maple_allocq(struct maple_device *mdev) { struct mapleq *mq; mq = kmalloc(sizeof(*mq), GFP_KERNEL); if (!mq) - return NULL; + goto failed_nomem; mq->dev = mdev; mq->recvbufdcsp = kmem_cache_zalloc(maple_queue_cache, GFP_KERNEL); mq->recvbuf = (void *) P2SEGADDR(mq->recvbufdcsp); - if (!mq->recvbuf) { - kfree(mq); - return NULL; - } + if (!mq->recvbuf) + goto failed_p2; + /* + * most devices do not need the mutex - but + * anything that injects block reads or writes + * will rely on it + */ + mutex_init(&mq->mutex); return mq; + +failed_p2: + kfree(mq); +failed_nomem: + return NULL; } static struct maple_device *maple_alloc_dev(int port, int unit) @@ -178,7 +267,6 @@ static struct maple_device *maple_alloc_dev(int port, int unit) } mdev->dev.bus = &maple_bus_type; mdev->dev.parent = &maple_bus; - mdev->function = 0; return mdev; } @@ -216,7 +304,6 @@ static void maple_build_block(struct mapleq *mq) *maple_sendptr++ = PHYSADDR(mq->recvbuf); *maple_sendptr++ = mq->command | (to << 8) | (from << 16) | (len << 24); - while (len-- > 0) *maple_sendptr++ = *lsendbuf++; } @@ -224,22 +311,27 @@ static void maple_build_block(struct mapleq *mq) /* build up command queue */ static void maple_send(void) { - int i; - int maple_packets; + int i, maple_packets = 0; struct mapleq *mq, *nmq; if (!list_empty(&maple_sentq)) return; - if (list_empty(&maple_waitq) || !maple_dma_done()) + mutex_lock(&maple_wlist_lock); + if (list_empty(&maple_waitq) || !maple_dma_done()) { + mutex_unlock(&maple_wlist_lock); return; - maple_packets = 0; - maple_sendptr = maple_lastptr = maple_sendbuf; + } + mutex_unlock(&maple_wlist_lock); + maple_lastptr = maple_sendbuf; + maple_sendptr = maple_sendbuf; + mutex_lock(&maple_wlist_lock); list_for_each_entry_safe(mq, nmq, &maple_waitq, list) { maple_build_block(mq); list_move(&mq->list, &maple_sentq); if (maple_packets++ > MAPLE_MAXPACKETS) break; } + mutex_unlock(&maple_wlist_lock); if (maple_packets > 0) { for (i = 0; i < (1 << MAPLE_DMA_PAGES); i++) dma_cache_sync(0, maple_sendbuf + i * PAGE_SIZE, @@ -247,7 +339,8 @@ static void maple_send(void) } } -static int attach_matching_maple_driver(struct device_driver *driver, +/* check if there is a driver registered likely to match this device */ +static int check_matching_maple_driver(struct device_driver *driver, void *devptr) { struct maple_driver *maple_drv; @@ -255,12 +348,8 @@ static int attach_matching_maple_driver(struct device_driver *driver, mdev = devptr; maple_drv = to_maple_driver(driver); - if (mdev->devinfo.function & be32_to_cpu(maple_drv->function)) { - if (maple_drv->connect(mdev) == 0) { - mdev->driver = maple_drv; - return 1; - } - } + if (mdev->devinfo.function & cpu_to_be32(maple_drv->function)) + return 1; return 0; } @@ -268,11 +357,6 @@ static void maple_detach_driver(struct maple_device *mdev) { if (!mdev) return; - if (mdev->driver) { - if (mdev->driver->disconnect) - mdev->driver->disconnect(mdev); - } - mdev->driver = NULL; device_unregister(&mdev->dev); mdev = NULL; } @@ -328,8 +412,8 @@ static void maple_attach_driver(struct maple_device *mdev) mdev->port, mdev->unit, function); matched = - bus_for_each_drv(&maple_bus_type, NULL, mdev, - attach_matching_maple_driver); + bus_for_each_drv(&maple_bus_type, NULL, mdev, + check_matching_maple_driver); if (matched == 0) { /* Driver does not exist yet */ @@ -373,45 +457,48 @@ static int detach_maple_device(struct device *device, void *portptr) static int setup_maple_commands(struct device *device, void *ignored) { + int add; struct maple_device *maple_dev = to_maple_dev(device); if ((maple_dev->interval > 0) && time_after(jiffies, maple_dev->when)) { - maple_dev->when = jiffies + maple_dev->interval; - maple_dev->mq->command = MAPLE_COMMAND_GETCOND; - maple_dev->mq->sendbuf = &maple_dev->function; - maple_dev->mq->length = 1; - maple_add_packet(maple_dev->mq); - liststatus++; + /* bounce if we cannot lock */ + add = maple_add_packet(maple_dev, + be32_to_cpu(maple_dev->devinfo.function), + MAPLE_COMMAND_GETCOND, 1, NULL); + if (!add) + maple_dev->when = jiffies + maple_dev->interval; } else { - if (time_after(jiffies, maple_pnp_time)) { - maple_dev->mq->command = MAPLE_COMMAND_DEVINFO; - maple_dev->mq->length = 0; - maple_add_packet(maple_dev->mq); - liststatus++; - } + if (time_after(jiffies, maple_pnp_time)) + /* This will also bounce */ + maple_add_packet(maple_dev, 0, + MAPLE_COMMAND_DEVINFO, 0, NULL); } - return 0; } /* VBLANK bottom half - implemented via workqueue */ static void maple_vblank_handler(struct work_struct *work) { - if (!maple_dma_done()) - return; - if (!list_empty(&maple_sentq)) + if (!list_empty(&maple_sentq) || !maple_dma_done()) return; + ctrl_outl(0, MAPLE_ENABLE); - liststatus = 0; + bus_for_each_dev(&maple_bus_type, NULL, NULL, setup_maple_commands); + if (time_after(jiffies, maple_pnp_time)) maple_pnp_time = jiffies + MAPLE_PNP_INTERVAL; - if (liststatus && list_empty(&maple_sentq)) { - INIT_LIST_HEAD(&maple_sentq); + + mutex_lock(&maple_wlist_lock); + if (!list_empty(&maple_waitq) && list_empty(&maple_sentq)) { + mutex_unlock(&maple_wlist_lock); maple_send(); + } else { + mutex_unlock(&maple_wlist_lock); } + maplebus_dma_reset(); } @@ -422,8 +509,8 @@ static void maple_map_subunits(struct maple_device *mdev, int submask) struct maple_device *mdev_add; struct maple_device_specify ds; + ds.port = mdev->port; for (k = 0; k < 5; k++) { - ds.port = mdev->port; ds.unit = k + 1; retval = bus_for_each_dev(&maple_bus_type, NULL, &ds, @@ -437,9 +524,9 @@ static void maple_map_subunits(struct maple_device *mdev, int submask) mdev_add = maple_alloc_dev(mdev->port, k + 1); if (!mdev_add) return; - mdev_add->mq->command = MAPLE_COMMAND_DEVINFO; - mdev_add->mq->length = 0; - maple_add_packet(mdev_add->mq); + maple_add_packet(mdev_add, 0, MAPLE_COMMAND_DEVINFO, + 0, NULL); + /* mark that we are checking sub devices */ scanning = 1; } submask = submask >> 1; @@ -505,6 +592,28 @@ static void maple_response_devinfo(struct maple_device *mdev, } } +static void maple_port_rescan(void) +{ + int i; + struct maple_device *mdev; + + fullscan = 1; + for (i = 0; i < MAPLE_PORTS; i++) { + if (checked[i] == false) { + fullscan = 0; + mdev = baseunits[i]; + /* + * test lock in case scan has failed + * but device is still locked + */ + if (mutex_is_locked(&mdev->mq->mutex)) + mutex_unlock(&mdev->mq->mutex); + maple_add_packet(mdev, 0, MAPLE_COMMAND_DEVINFO, + 0, NULL); + } + } +} + /* maple dma end bottom half - implemented via workqueue */ static void maple_dma_handler(struct work_struct *work) { @@ -512,7 +621,6 @@ static void maple_dma_handler(struct work_struct *work) struct maple_device *dev; char *recvbuf; enum maple_code code; - int i; if (!maple_dma_done()) return; @@ -522,6 +630,10 @@ static void maple_dma_handler(struct work_struct *work) recvbuf = mq->recvbuf; code = recvbuf[0]; dev = mq->dev; + kfree(mq->sendbuf); + mutex_unlock(&mq->mutex); + list_del_init(&mq->list); + switch (code) { case MAPLE_RESPONSE_NONE: maple_response_none(dev, mq); @@ -558,26 +670,16 @@ static void maple_dma_handler(struct work_struct *work) break; } } - INIT_LIST_HEAD(&maple_sentq); + /* if scanning is 1 then we have subdevices to check */ if (scanning == 1) { maple_send(); scanning = 2; } else scanning = 0; - - if (!fullscan) { - fullscan = 1; - for (i = 0; i < MAPLE_PORTS; i++) { - if (checked[i] == false) { - fullscan = 0; - dev = baseunits[i]; - dev->mq->command = - MAPLE_COMMAND_DEVINFO; - dev->mq->length = 0; - maple_add_packet(dev->mq); - } - } - } + /*check if we have actually tested all ports yet */ + if (!fullscan) + maple_port_rescan(); + /* mark that we have been through the first scan */ if (started == 0) started = 1; } @@ -631,7 +733,7 @@ static int match_maple_bus_driver(struct device *devptr, if (maple_dev->devinfo.function == 0xFFFFFFFF) return 0; else if (maple_dev->devinfo.function & - be32_to_cpu(maple_drv->function)) + cpu_to_be32(maple_drv->function)) return 1; return 0; } @@ -713,6 +815,9 @@ static int __init maple_bus_init(void) if (!maple_queue_cache) goto cleanup_bothirqs; + INIT_LIST_HEAD(&maple_waitq); + INIT_LIST_HEAD(&maple_sentq); + /* setup maple ports */ for (i = 0; i < MAPLE_PORTS; i++) { checked[i] = false; @@ -723,9 +828,7 @@ static int __init maple_bus_init(void) maple_free_dev(mdev[i]); goto cleanup_cache; } - mdev[i]->mq->command = MAPLE_COMMAND_DEVINFO; - mdev[i]->mq->length = 0; - maple_add_packet(mdev[i]->mq); + maple_add_packet(mdev[i], 0, MAPLE_COMMAND_DEVINFO, 0, NULL); subdevice_map[i] = 0; } diff --git a/include/linux/maple.h b/include/linux/maple.h index 523a286bb477..c853b1066018 100644 --- a/include/linux/maple.h +++ b/include/linux/maple.h @@ -2,6 +2,7 @@ #define __LINUX_MAPLE_H #include +#include extern struct bus_type maple_bus_type; @@ -33,6 +34,7 @@ struct mapleq { void *sendbuf, *recvbuf, *recvbufdcsp; unsigned char length; enum maple_code command; + struct mutex mutex; }; struct maple_devinfo { @@ -69,7 +71,9 @@ void maple_getcond_callback(struct maple_device *dev, unsigned long interval, unsigned long function); int maple_driver_register(struct device_driver *drv); -void maple_add_packet(struct mapleq *mq); +int maple_add_packet_sleeps(struct maple_device *mdev, u32 function, + u32 command, u32 length, void *data); +void maple_clear_dev(struct maple_device *mdev); #define to_maple_dev(n) container_of(n, struct maple_device, dev) #define to_maple_driver(n) container_of(n, struct maple_driver, drv) -- cgit v1.2.3 From f1b23361a0f15497d4c6795a2935b2e98064ddfb Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Mon, 21 Jul 2008 21:18:19 -0300 Subject: rfkill: document the rfkill struct locking (v2) Reorder fields in struct rfkill and add comments to make it clear which fields are protected by rfkill->mutex. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/linux/rfkill.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index c5f6e54ec6ae..741d1a62cc3f 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -68,7 +68,8 @@ enum rfkill_state { * @user_claim_unsupported: Whether the hardware supports exclusive * RF-kill control by userspace. Set this before registering. * @user_claim: Set when the switch is controlled exlusively by userspace. - * @mutex: Guards switch state transitions + * @mutex: Guards switch state transitions. It serializes callbacks + * and also protects the state. * @data: Pointer to the RF button drivers private data which will be * passed along when toggling radio state. * @toggle_radio(): Mandatory handler to control state of the radio. @@ -89,12 +90,13 @@ struct rfkill { const char *name; enum rfkill_type type; - enum rfkill_state state; bool user_claim_unsupported; bool user_claim; + /* the mutex serializes callbacks and also protects + * the state */ struct mutex mutex; - + enum rfkill_state state; void *data; int (*toggle_radio)(void *data, enum rfkill_state state); int (*get_state)(void *data, enum rfkill_state *state); -- cgit v1.2.3 From d0f09804144fd9471a13cf4d80e66842c7fa114f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 29 Jul 2008 11:32:07 +0200 Subject: mac80211: partially fix skb->cb use This patch fixes mac80211 to not use the skb->cb over the queue step from virtual interfaces to the master. The patch also, for now, disables aggregation because that would still require requeuing, will fix that in a separate patch. There are two other places (software requeue and powersaving stations) where requeue can happen, but that is not currently used by any drivers/not possible to use respectively. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath5k/base.c | 2 +- drivers/net/wireless/b43/xmit.c | 2 +- drivers/net/wireless/b43legacy/xmit.c | 2 +- drivers/net/wireless/iwlwifi/iwl-tx.c | 2 +- drivers/net/wireless/iwlwifi/iwl3945-base.c | 2 +- drivers/net/wireless/rt2x00/rt2x00mac.c | 2 +- include/linux/skbuff.h | 5 ++- include/net/mac80211.h | 6 ---- net/core/skbuff.c | 3 ++ net/mac80211/main.c | 8 +---- net/mac80211/mlme.c | 8 ++--- net/mac80211/tx.c | 47 +++++++++++++---------------- net/mac80211/wme.c | 3 ++ 13 files changed, 40 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index 1106d1c06298..ff3fad794b61 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -1237,7 +1237,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf) pktlen = skb->len; - if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) { + if (info->control.hw_key) { keyidx = info->control.hw_key->hw_key_idx; pktlen += info->control.icv_len; } diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c index 8d54502222a6..9dda8169f7cc 100644 --- a/drivers/net/wireless/b43/xmit.c +++ b/drivers/net/wireless/b43/xmit.c @@ -192,7 +192,7 @@ int b43_generate_txhdr(struct b43_wldev *dev, const struct b43_phy *phy = &dev->phy; const struct ieee80211_hdr *wlhdr = (const struct ieee80211_hdr *)fragment_data; - int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)); + int use_encryption = !!info->control.hw_key; __le16 fctl = wlhdr->frame_control; struct ieee80211_rate *fbrate; u8 rate, rate_fb; diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c index e969ed8d412d..68e1f8c78727 100644 --- a/drivers/net/wireless/b43legacy/xmit.c +++ b/drivers/net/wireless/b43legacy/xmit.c @@ -192,7 +192,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev, u16 cookie) { const struct ieee80211_hdr *wlhdr; - int use_encryption = (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)); + int use_encryption = !!info->control.hw_key; u16 fctl; u8 rate; struct ieee80211_rate *rate_fb; diff --git a/drivers/net/wireless/iwlwifi/iwl-tx.c b/drivers/net/wireless/iwlwifi/iwl-tx.c index 9b50b1052b09..f72cd0bf6aa3 100644 --- a/drivers/net/wireless/iwlwifi/iwl-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-tx.c @@ -906,7 +906,7 @@ int iwl_tx_skb(struct iwl_priv *priv, struct sk_buff *skb) * first entry */ iwl_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len); - if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) + if (info->control.hw_key) iwl_tx_cmd_build_hwcrypto(priv, info, tx_cmd, skb, sta_id); /* Set up TFD's 2nd entry to point directly to remainder of skb, diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 05121f395c4e..7c82ecfa30a4 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -2667,7 +2667,7 @@ static int iwl3945_tx_skb(struct iwl3945_priv *priv, struct sk_buff *skb) * first entry */ iwl3945_hw_txq_attach_buf_to_tfd(priv, tfd, txcmd_phys, len); - if (!(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) + if (info->control.hw_key) iwl3945_build_tx_cmd_hwcrypto(priv, info, out_cmd, skb, 0); /* Set up TFD's 2nd entry to point directly to remainder of skb, diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index 042ab00d8bd2..c3ee4ecba792 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -63,7 +63,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev, */ memcpy(skb->cb, frag_skb->cb, sizeof(skb->cb)); rts_info = IEEE80211_SKB_CB(skb); - rts_info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + rts_info->control.hw_key = NULL; rts_info->flags &= ~IEEE80211_TX_CTL_USE_RTS_CTS; rts_info->flags &= ~IEEE80211_TX_CTL_USE_CTS_PROTECT; rts_info->flags &= ~IEEE80211_TX_CTL_REQ_TX_STATUS; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7ea44f6621f2..a640385e0598 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -316,7 +316,10 @@ struct sk_buff { #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif - /* 14 bit hole */ +#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) + __u8 do_not_encrypt:1; +#endif + /* 0/13/14 bit hole */ #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 74487f268237..b52721008be8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -206,8 +206,6 @@ struct ieee80211_bss_conf { * These flags are used with the @flags member of &ieee80211_tx_info. * * @IEEE80211_TX_CTL_REQ_TX_STATUS: request TX status callback for this frame. - * @IEEE80211_TX_CTL_DO_NOT_ENCRYPT: send this frame without encryption; - * e.g., for EAPOL frame * @IEEE80211_TX_CTL_USE_RTS_CTS: use RTS-CTS before sending frame * @IEEE80211_TX_CTL_USE_CTS_PROTECT: use CTS protection for the frame (e.g., * for combined 802.11g / 802.11b networks) @@ -220,7 +218,6 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_CTL_SHORT_PREAMBLE: TBD * @IEEE80211_TX_CTL_LONG_RETRY_LIMIT: this frame should be send using the * through set_retry_limit configured long retry value - * @IEEE80211_TX_CTL_EAPOL_FRAME: internal to mac80211 * @IEEE80211_TX_CTL_SEND_AFTER_DTIM: send this frame after DTIM beacon * @IEEE80211_TX_CTL_AMPDU: this frame should be sent as part of an A-MPDU * @IEEE80211_TX_CTL_OFDM_HT: this frame can be sent in HT OFDM rates. number @@ -253,7 +250,6 @@ struct ieee80211_bss_conf { */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), - IEEE80211_TX_CTL_DO_NOT_ENCRYPT = BIT(1), IEEE80211_TX_CTL_USE_RTS_CTS = BIT(2), IEEE80211_TX_CTL_USE_CTS_PROTECT = BIT(3), IEEE80211_TX_CTL_NO_ACK = BIT(4), @@ -263,7 +259,6 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_FIRST_FRAGMENT = BIT(8), IEEE80211_TX_CTL_SHORT_PREAMBLE = BIT(9), IEEE80211_TX_CTL_LONG_RETRY_LIMIT = BIT(10), - IEEE80211_TX_CTL_EAPOL_FRAME = BIT(11), IEEE80211_TX_CTL_SEND_AFTER_DTIM = BIT(12), IEEE80211_TX_CTL_AMPDU = BIT(13), IEEE80211_TX_CTL_OFDM_HT = BIT(14), @@ -323,7 +318,6 @@ struct ieee80211_tx_info { struct ieee80211_vif *vif; struct ieee80211_key_conf *hw_key; unsigned long jiffies; - int ifindex; u16 aid; s8 rts_cts_rate_idx, alt_retry_rate_idx; u8 retry_limit; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e0c92274189..84640172d65d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -485,6 +485,9 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(head); C(data); C(truesize); +#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) + C(do_not_encrypt); +#endif atomic_set(&n->users, 1); atomic_inc(&(skb_shinfo(skb)->dataref)); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index b5830f7055cf..a4c5b90de769 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1233,18 +1233,12 @@ static void ieee80211_tasklet_handler(unsigned long data) /* Remove added headers (e.g., QoS control), encryption header/MIC, etc. to * make a prepared TX frame (one that has been given to hw) to look like brand * new IEEE 802.11 frame that is ready to go through TX processing again. - * Also, tx_packet_data in cb is restored from tx_control. */ + */ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, struct ieee80211_key *key, struct sk_buff *skb) { int hdrlen, iv_len, mic_len; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - - info->flags &= IEEE80211_TX_CTL_REQ_TX_STATUS | - IEEE80211_TX_CTL_DO_NOT_ENCRYPT | - IEEE80211_TX_CTL_REQUEUE | - IEEE80211_TX_CTL_EAPOL_FRAME; hdrlen = ieee80211_get_hdrlen_from_skb(skb); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d7c371e36bf0..35eb767cbcbe 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -606,7 +606,6 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb, int encrypt) { struct ieee80211_sub_if_data *sdata; - struct ieee80211_tx_info *info; sdata = IEEE80211_DEV_TO_SUB_IF(dev); skb->dev = sdata->local->mdev; @@ -614,11 +613,8 @@ void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb, skb_set_network_header(skb, 0); skb_set_transport_header(skb, 0); - info = IEEE80211_SKB_CB(skb); - memset(info, 0, sizeof(struct ieee80211_tx_info)); - info->control.ifindex = sdata->dev->ifindex; - if (!encrypt) - info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + skb->iif = sdata->dev->ifindex; + skb->do_not_encrypt = !encrypt; dev_queue_xmit(skb); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c5f78059c6ca..69019e943873 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -439,14 +439,14 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); u16 fc = tx->fc; - if (unlikely(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT)) + if (unlikely(tx->skb->do_not_encrypt)) tx->key = NULL; else if (tx->sta && (key = rcu_dereference(tx->sta->key))) tx->key = key; else if ((key = rcu_dereference(tx->sdata->default_key))) tx->key = key; else if (tx->sdata->drop_unencrypted && - !(info->flags & IEEE80211_TX_CTL_EAPOL_FRAME) && + (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) && !(info->flags & IEEE80211_TX_CTL_INJECTED)) { I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); return TX_DROP; @@ -476,7 +476,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) } if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) - info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + tx->skb->do_not_encrypt = 1; return TX_CONTINUE; } @@ -732,6 +732,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) memcpy(skb_put(frag, copylen), pos, copylen); memcpy(frag->cb, first->cb, sizeof(frag->cb)); skb_copy_queue_mapping(frag, first); + frag->do_not_encrypt = first->do_not_encrypt; pos += copylen; left -= copylen; @@ -852,7 +853,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, sband = tx->local->hw.wiphy->bands[tx->channel->band]; - info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + skb->do_not_encrypt = 1; info->flags |= IEEE80211_TX_CTL_INJECTED; tx->flags &= ~IEEE80211_TX_FRAGMENTED; @@ -925,8 +926,7 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, skb_trim(skb, skb->len - FCS_LEN); } if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) - info->flags &= - ~IEEE80211_TX_CTL_DO_NOT_ENCRYPT; + tx->skb->do_not_encrypt = 0; if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) tx->flags |= IEEE80211_TX_FRAGMENTED; break; @@ -1042,10 +1042,9 @@ static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx, struct sk_buff *skb, struct net_device *mdev) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct net_device *dev; - dev = dev_get_by_index(&init_net, info->control.ifindex); + dev = dev_get_by_index(&init_net, skb->iif); if (unlikely(dev && !is_ieee80211_device(dev, mdev))) { dev_put(dev); dev = NULL; @@ -1306,8 +1305,8 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, bool may_encrypt; int ret; - if (info->control.ifindex) - odev = dev_get_by_index(&init_net, info->control.ifindex); + if (skb->iif) + odev = dev_get_by_index(&init_net, skb->iif); if (unlikely(odev && !is_ieee80211_device(odev, dev))) { dev_put(odev); odev = NULL; @@ -1321,9 +1320,13 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, return 0; } + memset(info, 0, sizeof(*info)); + + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + osdata = IEEE80211_DEV_TO_SUB_IF(odev); - may_encrypt = !(info->flags & IEEE80211_TX_CTL_DO_NOT_ENCRYPT); + may_encrypt = !skb->do_not_encrypt; headroom = osdata->local->tx_headroom; if (may_encrypt) @@ -1348,7 +1351,6 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_radiotap_header *prthdr = (struct ieee80211_radiotap_header *)skb->data; u16 len_rthdr; @@ -1371,11 +1373,11 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, skb->dev = local->mdev; /* needed because we set skb device to master */ - info->control.ifindex = dev->ifindex; + skb->iif = dev->ifindex; - info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; - /* Interfaces should always request a status report */ - info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + /* sometimes we do encrypt injected frames, will be fixed + * up in radiotap parser if not wanted */ + skb->do_not_encrypt = 0; /* * fix up the pointers accounting for the radiotap @@ -1419,7 +1421,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata; int ret = 1, head_need; u16 ethertype, hdrlen, meshhdrlen = 0; @@ -1645,14 +1646,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, nh_pos += hdrlen; h_pos += hdrlen; - info = IEEE80211_SKB_CB(skb); - memset(info, 0, sizeof(*info)); - info->control.ifindex = dev->ifindex; - if (ethertype == ETH_P_PAE) - info->flags |= IEEE80211_TX_CTL_EAPOL_FRAME; - - /* Interfaces should always request a status report */ - info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + skb->iif = dev->ifindex; skb->dev = local->mdev; dev->stats.tx_packets++; @@ -1922,6 +1916,8 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, info = IEEE80211_SKB_CB(skb); + skb->do_not_encrypt = 1; + info->band = band; rate_control_get_rate(local->mdev, sband, skb, &rsel); @@ -1940,7 +1936,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, info->tx_rate_idx = rsel.rate_idx; info->flags |= IEEE80211_TX_CTL_NO_ACK; - info->flags |= IEEE80211_TX_CTL_DO_NOT_ENCRYPT; info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; if (sdata->bss_conf.use_short_preamble && diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 07edda0b8a5c..28437f0001db 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -188,6 +188,9 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, { int i; + /* XXX: currently broken due to cb/requeue use */ + return -EPERM; + /* prepare the filter and save it for the SW queue * matching the received HW queue */ -- cgit v1.2.3 From ce0ad7f0952581ba75ab6aee55bb1ed9bb22cf4f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 30 Jul 2008 15:23:13 +1000 Subject: powerpc/mm: Lockless get_user_pages_fast() for 64-bit (v3) Implement lockless get_user_pages_fast for 64-bit powerpc. Page table existence is guaranteed with RCU, and speculative page references are used to take a reference to the pages without having a prior existence guarantee on them. Signed-off-by: Nick Piggin Signed-off-by: Dave Kleikamp Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 3 + arch/powerpc/mm/Makefile | 3 +- arch/powerpc/mm/gup.c | 280 ++++++++++++++++++++++++++++++++++++ include/asm-powerpc/pgtable-ppc64.h | 2 + include/linux/pagemap.h | 23 +++ 5 files changed, 310 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/mm/gup.c (limited to 'include/linux') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 587da5e0990f..63c9cafda9c4 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -42,6 +42,9 @@ config GENERIC_HARDIRQS bool default y +config HAVE_GET_USER_PAGES_FAST + def_bool PPC64 + config HAVE_SETUP_PER_CPU_AREA def_bool PPC64 diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 1c00e0196f6c..e7392b45a5ef 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -12,7 +12,8 @@ obj-y := fault.o mem.o \ mmu_context_$(CONFIG_WORD_SIZE).o hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC64) += hash_utils_64.o \ - slb_low.o slb.o stab.o mmap.o $(hash-y) + slb_low.o slb.o stab.o \ + gup.o mmap.o $(hash-y) obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ tlb_$(CONFIG_WORD_SIZE).o diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c new file mode 100644 index 000000000000..9fdf4d6335e4 --- /dev/null +++ b/arch/powerpc/mm/gup.c @@ -0,0 +1,280 @@ +/* + * Lockless get_user_pages_fast for powerpc + * + * Copyright (C) 2008 Nick Piggin + * Copyright (C) 2008 Novell Inc. + */ +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include + +/* + * The performance critical leaf functions are made noinline otherwise gcc + * inlines everything into a single function which results in too much + * register pressure. + */ +static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + pte_t *ptep; + + result = _PAGE_PRESENT|_PAGE_USER; + if (write) + result |= _PAGE_RW; + mask = result | _PAGE_SPECIAL; + + ptep = pte_offset_kernel(&pmd, addr); + do { + pte_t pte = *ptep; + struct page *page; + + if ((pte_val(pte) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + if (!page_cache_get_speculative(page)) + return 0; + if (unlikely(pte != *ptep)) { + put_page(page); + return 0; + } + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + + return 1; +} + +#ifdef CONFIG_HUGETLB_PAGE +static noinline int gup_huge_pte(pte_t *ptep, struct hstate *hstate, + unsigned long *addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long mask; + unsigned long pte_end; + struct page *head, *page; + pte_t pte; + int refs; + + pte_end = (*addr + huge_page_size(hstate)) & huge_page_mask(hstate); + if (pte_end < end) + end = pte_end; + + pte = *ptep; + mask = _PAGE_PRESENT|_PAGE_USER; + if (write) + mask |= _PAGE_RW; + if ((pte_val(pte) & mask) != mask) + return 0; + /* hugepages are never "special" */ + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + + refs = 0; + head = pte_page(pte); + page = head + ((*addr & ~huge_page_mask(hstate)) >> PAGE_SHIFT); + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (*addr += PAGE_SIZE, *addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + if (unlikely(pte != *ptep)) { + /* Could be optimized better */ + while (*nr) { + put_page(page); + (*nr)--; + } + } + + return 1; +} +#endif /* CONFIG_HUGETLB_PAGE */ + +static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp; + + pmdp = pmd_offset(&pud, addr); + do { + pmd_t pmd = *pmdp; + + next = pmd_addr_end(addr, end); + if (pmd_none(pmd)) + return 0; + if (!gup_pte_range(pmd, addr, next, write, pages, nr)) + return 0; + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp; + + pudp = pud_offset(&pgd, addr); + do { + pud_t pud = *pudp; + + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (!gup_pmd_range(pud, addr, next, write, pages, nr)) + return 0; + } while (pudp++, addr = next, addr != end); + + return 1; +} + +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + pgd_t *pgdp; + int psize, nr = 0; + unsigned int shift; + + pr_debug("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + start, len))) + goto slow_irqon; + + pr_debug(" aligned: %lx .. %lx\n", start, end); + +#ifdef CONFIG_HUGETLB_PAGE + /* We bail out on slice boundary crossing when hugetlb is + * enabled in order to not have to deal with two different + * page table formats + */ + if (addr < SLICE_LOW_TOP) { + if (end > SLICE_LOW_TOP) + goto slow_irqon; + + if (unlikely(GET_LOW_SLICE_INDEX(addr) != + GET_LOW_SLICE_INDEX(end - 1))) + goto slow_irqon; + } else { + if (unlikely(GET_HIGH_SLICE_INDEX(addr) != + GET_HIGH_SLICE_INDEX(end - 1))) + goto slow_irqon; + } +#endif /* CONFIG_HUGETLB_PAGE */ + + /* + * XXX: batch / limit 'nr', to avoid large irq off latency + * needs some instrumenting to determine the common sizes used by + * important workloads (eg. DB2), and whether limiting the batch size + * will decrease performance. + * + * It seems like we're in the clear for the moment. Direct-IO is + * the main guy that batches up lots of get_user_pages, and even + * they are limited to 64-at-a-time which is not so many. + */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables from being freed on powerpc. + * + * So long as we atomically load page table pointers versus teardown, + * we can follow the address down to the the page and take a ref on it. + */ + local_irq_disable(); + + psize = get_slice_psize(mm, addr); + shift = mmu_psize_defs[psize].shift; + +#ifdef CONFIG_HUGETLB_PAGE + if (unlikely(mmu_huge_psizes[psize])) { + pte_t *ptep; + unsigned long a = addr; + unsigned long sz = ((1UL) << shift); + struct hstate *hstate = size_to_hstate(sz); + + BUG_ON(!hstate); + /* + * XXX: could be optimized to avoid hstate + * lookup entirely (just use shift) + */ + + do { + VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, a)].shift); + ptep = huge_pte_offset(mm, a); + pr_debug(" %016lx: huge ptep %p\n", a, ptep); + if (!ptep || !gup_huge_pte(ptep, hstate, &a, end, write, pages, + &nr)) + goto slow; + } while (a != end); + } else +#endif /* CONFIG_HUGETLB_PAGE */ + { + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift); + pr_debug(" %016lx: normal pgd %p\n", addr, (void *)pgd); + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + goto slow; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + goto slow; + } while (pgdp++, addr = next, addr != end); + } + local_irq_enable(); + + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); + return nr; + + { + int ret; + +slow: + local_irq_enable(); +slow_irqon: + pr_debug(" slow path ! nr = %d\n", nr); + + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + /* Have to be a bit careful with return values */ + if (nr > 0) { + if (ret < 0) + ret = nr; + else + ret += nr; + } + + return ret; + } +} diff --git a/include/asm-powerpc/pgtable-ppc64.h b/include/asm-powerpc/pgtable-ppc64.h index 5fc78c0be302..74c6f380b805 100644 --- a/include/asm-powerpc/pgtable-ppc64.h +++ b/include/asm-powerpc/pgtable-ppc64.h @@ -461,6 +461,8 @@ void pgtable_cache_init(void); return pt; } +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long address); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a39b38ccdc97..69ed3cb1197a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -143,6 +143,29 @@ static inline int page_cache_get_speculative(struct page *page) return 1; } +/* + * Same as above, but add instead of inc (could just be merged) + */ +static inline int page_cache_add_speculative(struct page *page, int count) +{ + VM_BUG_ON(in_interrupt()); + +#if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU) +# ifdef CONFIG_PREEMPT + VM_BUG_ON(!in_atomic()); +# endif + VM_BUG_ON(page_count(page) == 0); + atomic_add(count, &page->_count); + +#else + if (unlikely(!atomic_add_unless(&page->_count, count, 0))) + return 0; +#endif + VM_BUG_ON(PageCompound(page) && page != compound_head(page)); + + return 1; +} + static inline int page_freeze_refs(struct page *page, int count) { return likely(atomic_cmpxchg(&page->_count, count, 0) == count); -- cgit v1.2.3 From e2ce4eaa76214f65a3f328ec5b45c30248115768 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Wed, 30 Apr 2008 15:10:07 +0100 Subject: regulator: consumer device interface Add support to allow consumer device drivers to control their regulator power supply. This uses a similar API to the kernel clock interface in that consumer drivers can get and put a regulator (like they can with clocks atm) and get/set voltage, current limit, mode, enable and disable. This should allow consumers complete control over their supply voltage and current limit. This also compiles out if not in use so drivers can be reused in systems with no regulator based power control. Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 284 +++++++++++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 include/linux/regulator/consumer.h (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h new file mode 100644 index 000000000000..afdc4558bb94 --- /dev/null +++ b/include/linux/regulator/consumer.h @@ -0,0 +1,284 @@ +/* + * consumer.h -- SoC Regulator consumer support. + * + * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC. + * + * Author: Liam Girdwood + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Regulator Consumer Interface. + * + * A Power Management Regulator framework for SoC based devices. + * Features:- + * o Voltage and current level control. + * o Operating mode control. + * o Regulator status. + * o sysfs entries for showing client devices and status + * + * EXPERIMENTAL FEATURES: + * Dynamic Regulator operating Mode Switching (DRMS) - allows regulators + * to use most efficient operating mode depending upon voltage and load and + * is transparent to client drivers. + * + * e.g. Devices x,y,z share regulator r. Device x and y draw 20mA each during + * IO and 1mA at idle. Device z draws 100mA when under load and 5mA when + * idling. Regulator r has > 90% efficiency in NORMAL mode at loads > 100mA + * but this drops rapidly to 60% when below 100mA. Regulator r has > 90% + * efficiency in IDLE mode at loads < 10mA. Thus regulator r will operate + * in normal mode for loads > 10mA and in IDLE mode for load <= 10mA. + * + */ + +#ifndef __LINUX_REGULATOR_CONSUMER_H_ +#define __LINUX_REGULATOR_CONSUMER_H_ + +/* + * Regulator operating modes. + * + * Regulators can run in a variety of different operating modes depending on + * output load. This allows further system power savings by selecting the + * best (and most efficient) regulator mode for a desired load. + * + * Most drivers will only care about NORMAL. The modes below are generic and + * will probably not match the naming convention of your regulator data sheet + * but should match the use cases in the datasheet. + * + * In order of power efficiency (least efficient at top). + * + * Mode Description + * FAST Regulator can handle fast changes in it's load. + * e.g. useful in CPU voltage & frequency scaling where + * load can quickly increase with CPU frequency increases. + * + * NORMAL Normal regulator power supply mode. Most drivers will + * use this mode. + * + * IDLE Regulator runs in a more efficient mode for light + * loads. Can be used for devices that have a low power + * requirement during periods of inactivity. This mode + * may be more noisy than NORMAL and may not be able + * to handle fast load switching. + * + * STANDBY Regulator runs in the most efficient mode for very + * light loads. Can be used by devices when they are + * in a sleep/standby state. This mode is likely to be + * the most noisy and may not be able to handle fast load + * switching. + * + * NOTE: Most regulators will only support a subset of these modes. Some + * will only just support NORMAL. + * + * These modes can be OR'ed together to make up a mask of valid register modes. + */ + +#define REGULATOR_MODE_FAST 0x1 +#define REGULATOR_MODE_NORMAL 0x2 +#define REGULATOR_MODE_IDLE 0x4 +#define REGULATOR_MODE_STANDBY 0x8 + +/* + * Regulator notifier events. + * + * UNDER_VOLTAGE Regulator output is under voltage. + * OVER_CURRENT Regulator output current is too high. + * REGULATION_OUT Regulator output is out of regulation. + * FAIL Regulator output has failed. + * OVER_TEMP Regulator over temp. + * FORCE_DISABLE Regulator shut down by software. + * + * NOTE: These events can be OR'ed together when passed into handler. + */ + +#define REGULATOR_EVENT_UNDER_VOLTAGE 0x01 +#define REGULATOR_EVENT_OVER_CURRENT 0x02 +#define REGULATOR_EVENT_REGULATION_OUT 0x04 +#define REGULATOR_EVENT_FAIL 0x08 +#define REGULATOR_EVENT_OVER_TEMP 0x10 +#define REGULATOR_EVENT_FORCE_DISABLE 0x20 + +struct regulator; + +/** + * struct regulator_bulk_data - Data used for bulk regulator operations. + * + * @supply The name of the supply. Initialised by the user before + * using the bulk regulator APIs. + * @consumer The regulator consumer for the supply. This will be managed + * by the bulk API. + * + * The regulator APIs provide a series of regulator_bulk_() API calls as + * a convenience to consumers which require multiple supplies. This + * structure is used to manage data for these calls. + */ +struct regulator_bulk_data { + const char *supply; + struct regulator *consumer; +}; + +#if defined(CONFIG_REGULATOR) + +/* regulator get and put */ +struct regulator *__must_check regulator_get(struct device *dev, + const char *id); +void regulator_put(struct regulator *regulator); + +/* regulator output control and status */ +int regulator_enable(struct regulator *regulator); +int regulator_disable(struct regulator *regulator); +int regulator_force_disable(struct regulator *regulator); +int regulator_is_enabled(struct regulator *regulator); + +int regulator_bulk_get(struct device *dev, int num_consumers, + struct regulator_bulk_data *consumers); +int regulator_bulk_enable(int num_consumers, + struct regulator_bulk_data *consumers); +int regulator_bulk_disable(int num_consumers, + struct regulator_bulk_data *consumers); +void regulator_bulk_free(int num_consumers, + struct regulator_bulk_data *consumers); + +int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV); +int regulator_get_voltage(struct regulator *regulator); +int regulator_set_current_limit(struct regulator *regulator, + int min_uA, int max_uA); +int regulator_get_current_limit(struct regulator *regulator); + +int regulator_set_mode(struct regulator *regulator, unsigned int mode); +unsigned int regulator_get_mode(struct regulator *regulator); +int regulator_set_optimum_mode(struct regulator *regulator, int load_uA); + +/* regulator notifier block */ +int regulator_register_notifier(struct regulator *regulator, + struct notifier_block *nb); +int regulator_unregister_notifier(struct regulator *regulator, + struct notifier_block *nb); + +/* driver data - core doesn't touch */ +void *regulator_get_drvdata(struct regulator *regulator); +void regulator_set_drvdata(struct regulator *regulator, void *data); + +#else + +/* + * Make sure client drivers will still build on systems with no software + * controllable voltage or current regulators. + */ +static inline struct regulator *__must_check regulator_get(struct device *dev, + const char *id) +{ + /* Nothing except the stubbed out regulator API should be + * looking at the value except to check if it is an error + * value so the actual return value doesn't matter. + */ + return (struct regulator *)id; +} +static inline void regulator_put(struct regulator *regulator) +{ +} + +static inline int regulator_enable(struct regulator *regulator) +{ + return 0; +} + +static inline int regulator_disable(struct regulator *regulator) +{ + return 0; +} + +static inline int regulator_is_enabled(struct regulator *regulator) +{ + return 1; +} + +static inline int regulator_bulk_get(struct device *dev, + int num_consumers, + struct regulator_bulk_data *consumers) +{ + return 0; +} + +static inline int regulator_bulk_enable(int num_consumers, + struct regulator_bulk_data *consumers) +{ + return 0; +} + +static inline int regulator_bulk_disable(int num_consumers, + struct regulator_bulk_data *consumers) +{ + return 0; +} + +static inline void regulator_bulk_free(int num_consumers, + struct regulator_bulk_data *consumers) +{ +} + +static inline int regulator_set_voltage(struct regulator *regulator, + int min_uV, int max_uV) +{ + return 0; +} + +static inline int regulator_get_voltage(struct regulator *regulator) +{ + return 0; +} + +static inline int regulator_set_current_limit(struct regulator *regulator, + int min_uA, int max_uA) +{ + return 0; +} + +static inline int regulator_get_current_limit(struct regulator *regulator) +{ + return 0; +} + +static inline int regulator_set_mode(struct regulator *regulator, + unsigned int mode) +{ + return 0; +} + +static inline unsigned int regulator_get_mode(struct regulator *regulator) +{ + return REGULATOR_MODE_NORMAL; +} + +static inline int regulator_set_optimum_mode(struct regulator *regulator, + int load_uA) +{ + return REGULATOR_MODE_NORMAL; +} + +static inline int regulator_register_notifier(struct regulator *regulator, + struct notifier_block *nb) +{ + return 0; +} + +static inline int regulator_unregister_notifier(struct regulator *regulator, + struct notifier_block *nb) +{ + return 0; +} + +static inline void *regulator_get_drvdata(struct regulator *regulator) +{ + return NULL; +} + +static inline void regulator_set_drvdata(struct regulator *regulator, + void *data) +{ +} + +#endif + +#endif -- cgit v1.2.3 From 571a354b1542a274d88617e1f6703f3fe7a517f1 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Wed, 30 Apr 2008 15:42:28 +0100 Subject: regulator: regulator driver interface This allows regulator drivers to register their regulators and provide operations to the core. It also has a notifier call chain for propagating regulator events to clients. Signed-off-by: Liam Girdwood Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 99 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 include/linux/regulator/driver.h (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h new file mode 100644 index 000000000000..1d712c7172a2 --- /dev/null +++ b/include/linux/regulator/driver.h @@ -0,0 +1,99 @@ +/* + * driver.h -- SoC Regulator driver support. + * + * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC. + * + * Author: Liam Girdwood + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Regulator Driver Interface. + */ + +#ifndef __LINUX_REGULATOR_DRIVER_H_ +#define __LINUX_REGULATOR_DRIVER_H_ + +#include +#include + +struct regulator_constraints; +struct regulator_dev; + +/** + * struct regulator_ops - regulator operations. + * + * This struct describes regulator operations. + */ +struct regulator_ops { + + /* get/set regulator voltage */ + int (*set_voltage) (struct regulator_dev *, int min_uV, int max_uV); + int (*get_voltage) (struct regulator_dev *); + + /* get/set regulator current */ + int (*set_current_limit) (struct regulator_dev *, + int min_uA, int max_uA); + int (*get_current_limit) (struct regulator_dev *); + + /* enable/disable regulator */ + int (*enable) (struct regulator_dev *); + int (*disable) (struct regulator_dev *); + int (*is_enabled) (struct regulator_dev *); + + /* get/set regulator operating mode (defined in regulator.h) */ + int (*set_mode) (struct regulator_dev *, unsigned int mode); + unsigned int (*get_mode) (struct regulator_dev *); + + /* get most efficient regulator operating mode for load */ + unsigned int (*get_optimum_mode) (struct regulator_dev *, int input_uV, + int output_uV, int load_uA); + + /* the operations below are for configuration of regulator state when + * it's parent PMIC enters a global STANBY/HIBERNATE state */ + + /* set regulator suspend voltage */ + int (*set_suspend_voltage) (struct regulator_dev *, int uV); + + /* enable/disable regulator in suspend state */ + int (*set_suspend_enable) (struct regulator_dev *); + int (*set_suspend_disable) (struct regulator_dev *); + + /* set regulator suspend operating mode (defined in regulator.h) */ + int (*set_suspend_mode) (struct regulator_dev *, unsigned int mode); +}; + +/* + * Regulators can either control voltage or current. + */ +enum regulator_type { + REGULATOR_VOLTAGE, + REGULATOR_CURRENT, +}; + +/** + * struct regulator_desc - Regulator descriptor + * + */ +struct regulator_desc { + const char *name; + int id; + struct regulator_ops *ops; + int irq; + enum regulator_type type; + struct module *owner; +}; + + +struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, + void *reg_data); +void regulator_unregister(struct regulator_dev *rdev); + +int regulator_notifier_call_chain(struct regulator_dev *rdev, + unsigned long event, void *data); + +void *rdev_get_drvdata(struct regulator_dev *rdev); +int rdev_get_id(struct regulator_dev *rdev); + +#endif -- cgit v1.2.3 From 4c1184e85cb381121a5273ea20ad31ca3faa0a4f Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Wed, 30 Apr 2008 15:46:09 +0100 Subject: regulator: machine driver interface This interface is for machine specific code and allows the creation of voltage/current domains (with constraints) for each regulator. It can provide regulator constraints that will prevent device damage through overvoltage or over current caused by buggy client drivers. It also allows the creation of a regulator tree whereby some regulators are supplied by others (similar to a clock tree). Signed-off-by: Liam Girdwood Signed-off-by: Philipp Zabel Signed-off-by: Mark Brown --- include/linux/regulator/machine.h | 104 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 include/linux/regulator/machine.h (limited to 'include/linux') diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h new file mode 100644 index 000000000000..11e737dbfcf2 --- /dev/null +++ b/include/linux/regulator/machine.h @@ -0,0 +1,104 @@ +/* + * machine.h -- SoC Regulator support, machine/board driver API. + * + * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC. + * + * Author: Liam Girdwood + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Regulator Machine/Board Interface. + */ + +#ifndef __LINUX_REGULATOR_MACHINE_H_ +#define __LINUX_REGULATOR_MACHINE_H_ + +#include +#include + +struct regulator; + +/* + * Regulator operation constraint flags. These flags are used to enable + * certain regulator operations and can be OR'ed together. + * + * VOLTAGE: Regulator output voltage can be changed by software on this + * board/machine. + * CURRENT: Regulator output current can be changed by software on this + * board/machine. + * MODE: Regulator operating mode can be changed by software on this + * board/machine. + * STATUS: Regulator can be enabled and disabled. + * DRMS: Dynamic Regulator Mode Switching is enabled for this regulator. + */ + +#define REGULATOR_CHANGE_VOLTAGE 0x1 +#define REGULATOR_CHANGE_CURRENT 0x2 +#define REGULATOR_CHANGE_MODE 0x4 +#define REGULATOR_CHANGE_STATUS 0x8 +#define REGULATOR_CHANGE_DRMS 0x10 + +/** + * struct regulator_state - regulator state during low power syatem states + * + * This describes a regulators state during a system wide low power state. + */ +struct regulator_state { + int uV; /* suspend voltage */ + unsigned int mode; /* suspend regulator operating mode */ + int enabled; /* is regulator enabled in this suspend state */ +}; + +/** + * struct regulation_constraints - regulator operating constraints. + * + * This struct describes regulator and board/machine specific constraints. + */ +struct regulation_constraints { + + char *name; + + /* voltage output range (inclusive) - for voltage control */ + int min_uV; + int max_uV; + + /* current output range (inclusive) - for current control */ + int min_uA; + int max_uA; + + /* valid regulator operating modes for this machine */ + unsigned int valid_modes_mask; + + /* valid operations for regulator on this machine */ + unsigned int valid_ops_mask; + + /* regulator input voltage - only if supply is another regulator */ + int input_uV; + + /* regulator suspend states for global PMIC STANDBY/HIBERNATE */ + struct regulator_state state_disk; + struct regulator_state state_mem; + struct regulator_state state_standby; + suspend_state_t initial_state; /* suspend state to set at init */ + + /* constriant flags */ + unsigned always_on:1; /* regulator never off when system is on */ + unsigned boot_on:1; /* bootloader/firmware enabled regulator */ + unsigned apply_uV:1; /* apply uV constraint iff min == max */ +}; + +int regulator_set_supply(const char *regulator, const char *regulator_supply); + +const char *regulator_get_supply(const char *regulator); + +int regulator_set_machine_constraints(const char *regulator, + struct regulation_constraints *constraints); + +int regulator_set_device_supply(const char *regulator, struct device *dev, + const char *supply); + +int regulator_suspend_prepare(suspend_state_t state); + +#endif -- cgit v1.2.3 From 48d335ba3164ce99cb8847513d0e3b6ee604eb20 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 30 Apr 2008 15:50:21 +0100 Subject: regulator: fixed regulator interface This patch adds support for fixed regulators. This class of regulator is not software controllable but can coexist on machines with software controlable regulators. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/fixed.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/linux/regulator/fixed.h (limited to 'include/linux') diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h new file mode 100644 index 000000000000..1387a5d2190e --- /dev/null +++ b/include/linux/regulator/fixed.h @@ -0,0 +1,22 @@ +/* + * fixed.h + * + * Copyright 2008 Wolfson Microelectronics PLC. + * + * Author: Mark Brown + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + */ + +#ifndef __REGULATOR_FIXED_H +#define __REGULATOR_FIXED_H + +struct fixed_voltage_config { + const char *supply_name; + int microvolts; +}; + +#endif -- cgit v1.2.3 From 0eb5d5ab3ec99bfd22ff16797d95835369ffb25b Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 11 Jul 2008 17:28:06 +0200 Subject: regulator: TI bq24022 Li-Ion Charger driver This adds a regulator driver for the TI bq24022 Single-Chip Li-Ion Charger with its nCE and ISET2 pins connected to GPIOs. Signed-off-by: Philipp Zabel Signed-off-by: Liam Girdwood --- drivers/regulator/Kconfig | 10 +++ drivers/regulator/Makefile | 2 + drivers/regulator/bq24022.c | 167 ++++++++++++++++++++++++++++++++++++++ include/linux/regulator/bq24022.h | 21 +++++ 4 files changed, 200 insertions(+) create mode 100644 drivers/regulator/bq24022.c create mode 100644 include/linux/regulator/bq24022.h (limited to 'include/linux') diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 84f89ecce69e..a656128f1fdd 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -46,4 +46,14 @@ config REGULATOR_VIRTUAL_CONSUMER If unsure, say no. +config REGULATOR_BQ24022 + tristate "TI bq24022 Dual Input 1-Cell Li-Ion Charger IC" + default n + select REGULATOR + help + This driver controls a TI bq24022 Charger attached via + GPIOs. The provided current regulator can enable/disable + charging select between 100 mA and 500 mA charging current + limit. + endmenu diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 29528b78c8de..ac2c64efe65c 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -7,4 +7,6 @@ obj-$(CONFIG_REGULATOR) += core.o obj-$(CONFIG_REGULATOR_FIXED_VOLTAGE) += fixed.o obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o +obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o + ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG diff --git a/drivers/regulator/bq24022.c b/drivers/regulator/bq24022.c new file mode 100644 index 000000000000..263699d6152d --- /dev/null +++ b/drivers/regulator/bq24022.c @@ -0,0 +1,167 @@ +/* + * Support for TI bq24022 (bqTINY-II) Dual Input (USB/AC Adpater) + * 1-Cell Li-Ion Charger connected via GPIOs. + * + * Copyright (c) 2008 Philipp Zabel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +static int bq24022_set_current_limit(struct regulator_dev *rdev, + int min_uA, int max_uA) +{ + struct platform_device *pdev = rdev_get_drvdata(rdev); + struct bq24022_mach_info *pdata = pdev->dev.platform_data; + + dev_dbg(&pdev->dev, "setting current limit to %s mA\n", + max_uA >= 500000 ? "500" : "100"); + + /* REVISIT: maybe return error if min_uA != 0 ? */ + gpio_set_value(pdata->gpio_iset2, max_uA >= 500000); + return 0; +} + +static int bq24022_get_current_limit(struct regulator_dev *rdev) +{ + struct platform_device *pdev = rdev_get_drvdata(rdev); + struct bq24022_mach_info *pdata = pdev->dev.platform_data; + + return gpio_get_value(pdata->gpio_iset2) ? 500000 : 100000; +} + +static int bq24022_enable(struct regulator_dev *rdev) +{ + struct platform_device *pdev = rdev_get_drvdata(rdev); + struct bq24022_mach_info *pdata = pdev->dev.platform_data; + + dev_dbg(&pdev->dev, "enabling charger\n"); + + gpio_set_value(pdata->gpio_nce, 0); + return 0; +} + +static int bq24022_disable(struct regulator_dev *rdev) +{ + struct platform_device *pdev = rdev_get_drvdata(rdev); + struct bq24022_mach_info *pdata = pdev->dev.platform_data; + + dev_dbg(&pdev->dev, "disabling charger\n"); + + gpio_set_value(pdata->gpio_nce, 1); + return 0; +} + +static int bq24022_is_enabled(struct regulator_dev *rdev) +{ + struct platform_device *pdev = rdev_get_drvdata(rdev); + struct bq24022_mach_info *pdata = pdev->dev.platform_data; + + return !gpio_get_value(pdata->gpio_nce); +} + +static struct regulator_ops bq24022_ops = { + .set_current_limit = bq24022_set_current_limit, + .get_current_limit = bq24022_get_current_limit, + .enable = bq24022_enable, + .disable = bq24022_disable, + .is_enabled = bq24022_is_enabled, +}; + +static struct regulator_desc bq24022_desc = { + .name = "bq24022", + .ops = &bq24022_ops, + .type = REGULATOR_CURRENT, +}; + +static int __init bq24022_probe(struct platform_device *pdev) +{ + struct bq24022_mach_info *pdata = pdev->dev.platform_data; + struct regulator_dev *bq24022; + int ret; + + if (!pdata || !pdata->gpio_nce || !pdata->gpio_iset2) + return -EINVAL; + + ret = gpio_request(pdata->gpio_nce, "ncharge_en"); + if (ret) { + dev_dbg(&pdev->dev, "couldn't request nCE GPIO: %d\n", + pdata->gpio_nce); + goto err_ce; + } + ret = gpio_request(pdata->gpio_iset2, "charge_mode"); + if (ret) { + dev_dbg(&pdev->dev, "couldn't request ISET2 GPIO: %d\n", + pdata->gpio_iset2); + goto err_iset2; + } + ret = gpio_direction_output(pdata->gpio_iset2, 0); + ret = gpio_direction_output(pdata->gpio_nce, 1); + + bq24022 = regulator_register(&bq24022_desc, pdev); + if (IS_ERR(bq24022)) { + dev_dbg(&pdev->dev, "couldn't register regulator\n"); + ret = PTR_ERR(bq24022); + goto err_reg; + } + platform_set_drvdata(pdev, bq24022); + dev_dbg(&pdev->dev, "registered regulator\n"); + + return 0; +err_reg: + gpio_free(pdata->gpio_iset2); +err_iset2: + gpio_free(pdata->gpio_nce); +err_ce: + return ret; +} + +static int __devexit bq24022_remove(struct platform_device *pdev) +{ + struct bq24022_mach_info *pdata = pdev->dev.platform_data; + struct regulator_dev *bq24022 = platform_get_drvdata(pdev); + + regulator_unregister(bq24022); + gpio_free(pdata->gpio_iset2); + gpio_free(pdata->gpio_nce); + + return 0; +} + +static struct platform_driver bq24022_driver = { + .driver = { + .name = "bq24022", + }, + .remove = __devexit_p(bq24022_remove), +}; + +static int __init bq24022_init(void) +{ + return platform_driver_probe(&bq24022_driver, bq24022_probe); +} + +static void __exit bq24022_exit(void) +{ + platform_driver_unregister(&bq24022_driver); +} + +/* + * make sure this is probed before gpio_vbus and pda_power, + * but after asic3 or other GPIO expander drivers. + */ +subsys_initcall(bq24022_init); +module_exit(bq24022_exit); + +MODULE_AUTHOR("Philipp Zabel"); +MODULE_DESCRIPTION("TI bq24022 Li-Ion Charger driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/regulator/bq24022.h b/include/linux/regulator/bq24022.h new file mode 100644 index 000000000000..e84b0a9feda5 --- /dev/null +++ b/include/linux/regulator/bq24022.h @@ -0,0 +1,21 @@ +/* + * Support for TI bq24022 (bqTINY-II) Dual Input (USB/AC Adpater) + * 1-Cell Li-Ion Charger connected via GPIOs. + * + * Copyright (c) 2008 Philipp Zabel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +/** + * bq24022_mach_info - platform data for bq24022 + * @gpio_nce: GPIO line connected to the nCE pin, used to enable / disable charging + * @gpio_iset2: GPIO line connected to the ISET2 pin, used to limit charging current to 100 mA / 500 mA + */ +struct bq24022_mach_info { + int gpio_nce; + int gpio_iset2; +}; -- cgit v1.2.3 From 785957d3e8c6fb37b18bf671923a76dbd8240025 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 30 Jul 2008 03:03:15 -0700 Subject: tcp: MD5: Use MIB counter instead of warning for MD5 mismatch. From a report by Matti Aarnio, and preliminary patch by Adam Langley. Signed-off-by: David S. Miller --- include/linux/snmp.h | 2 ++ net/ipv4/proc.c | 2 ++ net/ipv4/tcp_ipv4.c | 10 ++-------- net/ipv6/tcp_ipv6.c | 27 ++++++++------------------- 4 files changed, 14 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 5df62ef1280c..7a6e6bba4a71 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -214,6 +214,8 @@ enum LINUX_MIB_TCPDSACKIGNOREDOLD, /* TCPSACKIgnoredOld */ LINUX_MIB_TCPDSACKIGNOREDNOUNDO, /* TCPSACKIgnoredNoUndo */ LINUX_MIB_TCPSPURIOUSRTOS, /* TCPSpuriousRTOs */ + LINUX_MIB_TCPMD5NOTFOUND, /* TCPMD5NotFound */ + LINUX_MIB_TCPMD5UNEXPECTED, /* TCPMD5Unexpected */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 834356ea99df..8f5a403f6f6b 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -232,6 +232,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), + SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), + SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a2b06d0cc26b..b3875c0d83c7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1116,18 +1116,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) return 0; if (hash_expected && !hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return 1; } if (!hash_expected && hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return 1; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cff778b23a7f..1db45216b232 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -849,28 +849,17 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr); hash_location = tcp_parse_md5sig_option(th); - /* do we have a hash as expected? */ - if (!hash_expected) { - if (!hash_location) - return 0; - if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash NOT expected but found " - "(" NIP6_FMT ", %u)->" - "(" NIP6_FMT ", %u)\n", - NIP6(ip6h->saddr), ntohs(th->source), - NIP6(ip6h->daddr), ntohs(th->dest)); - } + /* We've parsed the options - do we have a hash? */ + if (!hash_expected && !hash_location) + return 0; + + if (hash_expected && !hash_location) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return 1; } - if (!hash_location) { - if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash expected but NOT found " - "(" NIP6_FMT ", %u)->" - "(" NIP6_FMT ", %u)\n", - NIP6(ip6h->saddr), ntohs(th->source), - NIP6(ip6h->daddr), ntohs(th->dest)); - } + if (!hash_expected && hash_location) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return 1; } -- cgit v1.2.3 From 96d8b647cfff90c8ff07863866aacdcd9d13cead Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 29 Jul 2008 13:54:11 +0100 Subject: [MTD] [NAND] fix subpage read for small page NAND Current implementation of subpage read feature for NAND has issues with small page devices. Small page NAND do not support RNDOUT command. So subpage feature is not applicable for them. This patch disables support of subpage for small page NAND. The code is verified on nandsim(SP NAND simulation) and on LP NAND devices. Thanks a lot to Artem for finding this issue. Signed-off-by: Alexey Korolev Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/nand.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 83f678702dff..81774e5facf4 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -177,7 +177,9 @@ typedef enum { #define NAND_MUST_PAD(chip) (!(chip->options & NAND_NO_PADDING)) #define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG)) #define NAND_HAS_COPYBACK(chip) ((chip->options & NAND_COPYBACK)) -#define NAND_SUBPAGE_READ(chip) ((chip->ecc.mode == NAND_ECC_SOFT)) +/* Large page NAND with SOFT_ECC should support subpage reads */ +#define NAND_SUBPAGE_READ(chip) ((chip->ecc.mode == NAND_ECC_SOFT) \ + && (chip->page_shift > 9)) /* Mask to zero out the chip options, which come from the id table */ #define NAND_CHIPOPTIONS_MSK (0x0000ffff & ~NAND_NO_AUTOINCR) -- cgit v1.2.3 From 95b1bc20532c18e3f19cd460c8350350c84ffbb2 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 29 Jul 2008 22:28:12 -0700 Subject: [MTD] MTD_DEBUG always does compile-time typechecks The current style for debug messages is to ensure they're always parsed by the compiler and then subjected to dead code removal. That way builds won't break only when debug options get enabled, which is common when they are stripped out early by CPP. This patch makes CONFIG_MTD_DEBUG adopt that convention. Signed-off-by: David Brownell Signed-off-by: David Woodhouse --- include/linux/mtd/mtd.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 4ed40caff4e5..922636548558 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -272,7 +272,11 @@ static inline void mtd_erase_callback(struct erase_info *instr) printk(KERN_INFO args); \ } while(0) #else /* CONFIG_MTD_DEBUG */ -#define DEBUG(n, args...) do { } while(0) +#define DEBUG(n, args...) \ + do { \ + if (0) \ + printk(KERN_INFO args); \ + } while(0) #endif /* CONFIG_MTD_DEBUG */ -- cgit v1.2.3 From 1a4e564b7db999fbe5d88318c96ac8747699d417 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 29 Jul 2008 22:32:57 -0700 Subject: resource: add resource_size() Avoid one-off errors by introducing a resource_size() function. Signed-off-by: Magnus Damm Cc: Ben Dooks Cc: Jean Delvare Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 4 ++++ kernel/resource.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 2cd07cc29687..22d2115458c6 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -118,6 +118,10 @@ extern int allocate_resource(struct resource *root, struct resource *new, int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size); resource_size_t resource_alignment(struct resource *res); +static inline resource_size_t resource_size(struct resource *res) +{ + return res->end - res->start + 1; +} /* Convenience shorthand with allocation */ #define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name)) diff --git a/kernel/resource.c b/kernel/resource.c index 74af2d7cb5a1..f5b518eabefe 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -490,7 +490,7 @@ resource_size_t resource_alignment(struct resource *res) { switch (res->flags & (IORESOURCE_SIZEALIGN | IORESOURCE_STARTALIGN)) { case IORESOURCE_SIZEALIGN: - return res->end - res->start + 1; + return resource_size(res); case IORESOURCE_STARTALIGN: return res->start; default: -- cgit v1.2.3 From a1531acd43310a7e4571d52e8846640667f4c74b Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 29 Jul 2008 22:32:58 -0700 Subject: cpufreq acpi: only call _PPC after cpufreq ACPI init funcs got called already Ingo Molnar provided a fix to not call _PPC at processor driver initialization time in "[PATCH] ACPI: fix cpufreq regression" (git commit e4233dec749a3519069d9390561b5636a75c7579) But it can still happen that _PPC is called at processor driver initialization time. This patch should make sure that this is not possible anymore. Signed-off-by: Thomas Renninger Cc: Andi Kleen Cc: Len Brown Cc: Dave Jones Cc: Ingo Molnar Cc: Venkatesh Pallipadi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c | 6 ++++++ drivers/acpi/processor_perflib.c | 15 +++++++++++++-- drivers/cpufreq/cpufreq.c | 3 +++ include/linux/cpufreq.h | 1 + 4 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c index 69288f653144..3233fe84d158 100644 --- a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c +++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c @@ -96,6 +96,12 @@ static int pmi_notifier(struct notifier_block *nb, struct cpufreq_frequency_table *cbe_freqs; u8 node; + /* Should this really be called for CPUFREQ_ADJUST, CPUFREQ_INCOMPATIBLE + * and CPUFREQ_NOTIFY policy events?) + */ + if (event == CPUFREQ_START) + return 0; + cbe_freqs = cpufreq_frequency_get_table(policy->cpu); node = cbe_cpu_to_node(policy->cpu); diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index b4749969c6b4..e98071a64810 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -64,7 +64,13 @@ static DEFINE_MUTEX(performance_mutex); * policy is adjusted accordingly. */ -static unsigned int ignore_ppc = 0; +/* ignore_ppc: + * -1 -> cpufreq low level drivers not initialized -> _PSS, etc. not called yet + * ignore _PPC + * 0 -> cpufreq low level drivers initialized -> consider _PPC values + * 1 -> ignore _PPC totally -> forced by user through boot param + */ +static unsigned int ignore_ppc = -1; module_param(ignore_ppc, uint, 0644); MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \ "limited by BIOS, this should help"); @@ -72,7 +78,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \ #define PPC_REGISTERED 1 #define PPC_IN_USE 2 -static int acpi_processor_ppc_status = 0; +static int acpi_processor_ppc_status; static int acpi_processor_ppc_notifier(struct notifier_block *nb, unsigned long event, void *data) @@ -81,6 +87,11 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb, struct acpi_processor *pr; unsigned int ppc = 0; + if (event == CPUFREQ_START && ignore_ppc <= 0) { + ignore_ppc = 0; + return 0; + } + if (ignore_ppc) return 0; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8d6a3ff02672..8a67f16987db 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -825,6 +825,9 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) policy->user_policy.min = policy->cpuinfo.min_freq; policy->user_policy.max = policy->cpuinfo.max_freq; + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_START, policy); + #ifdef CONFIG_SMP #ifdef CONFIG_HOTPLUG_CPU diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 2270ca5ec631..6fd5668aa572 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -106,6 +106,7 @@ struct cpufreq_policy { #define CPUFREQ_ADJUST (0) #define CPUFREQ_INCOMPATIBLE (1) #define CPUFREQ_NOTIFY (2) +#define CPUFREQ_START (3) #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */ #define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */ -- cgit v1.2.3 From 1d1958f05095a7e9ecbba86235122784a3d1b561 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Jul 2008 22:33:16 -0700 Subject: mm: remove find_max_pfn_with_active_regions It has no user now Also print out info about adding/removing active regions. Signed-off-by: Yinghai Lu Acked-by: Mel Gorman Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - mm/page_alloc.c | 17 ----------------- 2 files changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 866a3dbe5c75..5e2c8af49998 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1041,7 +1041,6 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn, extern void get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn); extern unsigned long find_min_pfn_with_active_regions(void); -extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); typedef int (*work_fn_t)(unsigned long, unsigned long, void *); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3cf3d05b6bd4..401d104d2bb6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3753,23 +3753,6 @@ unsigned long __init find_min_pfn_with_active_regions(void) return find_min_pfn_for_node(MAX_NUMNODES); } -/** - * find_max_pfn_with_active_regions - Find the maximum PFN registered - * - * It returns the maximum PFN based on information provided via - * add_active_range(). - */ -unsigned long __init find_max_pfn_with_active_regions(void) -{ - int i; - unsigned long max_pfn = 0; - - for (i = 0; i < nr_nodemap_entries; i++) - max_pfn = max(max_pfn, early_node_map[i].end_pfn); - - return max_pfn; -} - /* * early_calculate_totalpages() * Sum pages in active regions for movable zone. -- cgit v1.2.3 From 3f1712bac586069d6c891a8201457283b27e8abe Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 29 Jul 2008 22:33:32 -0700 Subject: print_ip_sym(): use %pS Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 57aefa160a92..b96144887444 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -108,8 +108,7 @@ static inline void print_fn_descriptor_symbol(const char *fmt, void *addr) static inline void print_ip_sym(unsigned long ip) { - printk("[<%p>]", (void *) ip); - print_symbol(" %s\n", ip); + printk("[<%p>] %pS\n", (void *) ip, (void *) ip); } #endif /*_LINUX_KALLSYMS_H*/ -- cgit v1.2.3 From 2c203003f64de5fe55ae35712942100d270667fa Mon Sep 17 00:00:00 2001 From: Jerome Arbez-Gindre Date: Tue, 29 Jul 2008 22:33:33 -0700 Subject: connector: add a BlackBoard user to connector Add a BlackBoard user to connector. BlackBoard is part of the TSP GPL sampling framework (http://savannah.nongnu.org/p/tsp) [akpm@linux-foundation.org: add comment] Signed-off-by: Jerome Arbez-Gindre Acked-by: Evgeniy Polyakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/connector.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index 96a89d3d6727..5c7f9468f753 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -38,8 +38,9 @@ #define CN_W1_VAL 0x1 #define CN_IDX_V86D 0x4 #define CN_VAL_V86D_UVESAFB 0x1 +#define CN_IDX_BB 0x5 /* BlackBoard, from the TSP GPL sampling framework */ -#define CN_NETLINK_USERS 5 +#define CN_NETLINK_USERS 6 /* * Maximum connector's message size. -- cgit v1.2.3 From 204b885e7322656284626949e51f292fe61313fa Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 29 Jul 2008 22:33:42 -0700 Subject: introduce lower_32_bits() macro The file kernel.h contains the upper_32_bits macro. This patch adds the other part, the lower_32_bits macro. Its first use will be in the driver for AMD IOMMU. Cc: H. Peter Anvin Signed-off-by: Joerg Roedel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index fdbbf72ca2eb..aaa998f65c7a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -75,6 +75,12 @@ extern const char linux_proc_banner[]; */ #define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) +/** + * lower_32_bits - return bits 0-31 of a number + * @n: the number we're accessing + */ +#define lower_32_bits(n) ((u32)(n)) + #define KERN_EMERG "<0>" /* system is unusable */ #define KERN_ALERT "<1>" /* action must be taken immediately */ #define KERN_CRIT "<2>" /* critical conditions */ -- cgit v1.2.3 From c627f9cc046c7cd93b4525d89377fb409e170a18 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 29 Jul 2008 22:33:53 -0700 Subject: mm: add zap_vma_ptes(): a library function to unmap driver ptes zap_vma_ptes() is intended to be used by drivers to unmap ptes assigned to the driver private vmas. This interface is similar to zap_page_range() but is less general & less likely to be abused. Needed by the GRU driver. Signed-off-by: Jack Steiner Cc: Nick Piggin Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ mm/memory.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5e2c8af49998..335288bff1b7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -744,6 +744,8 @@ struct zap_details { struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); +int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, + unsigned long size); unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *); unsigned long unmap_vmas(struct mmu_gather **tlb, diff --git a/mm/memory.c b/mm/memory.c index 67f0ab9077d9..6793b9c68107 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -994,6 +994,29 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, return end; } +/** + * zap_vma_ptes - remove ptes mapping the vma + * @vma: vm_area_struct holding ptes to be zapped + * @address: starting address of pages to zap + * @size: number of bytes to zap + * + * This function only unmaps ptes assigned to VM_PFNMAP vmas. + * + * The entire address range must be fully contained within the vma. + * + * Returns 0 if successful. + */ +int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, + unsigned long size) +{ + if (address < vma->vm_start || address + size > vma->vm_end || + !(vma->vm_flags & VM_PFNMAP)) + return -1; + zap_page_range(vma, address, size, NULL); + return 0; +} +EXPORT_SYMBOL_GPL(zap_vma_ptes); + /* * Do a quick page-table lookup for a single page. */ -- cgit v1.2.3 From 3dd730f2b49f101b90d283c3efc4e6cd826dd8f6 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 29 Jul 2008 16:07:37 +1000 Subject: cpumask: statement expressions confuse some versions of gcc when you take the address of the result. Noticed on a sparc64 compile using a version 3.4.5 cross compiler. kernel/time/tick-common.c: In function `tick_check_new_device': kernel/time/tick-common.c:210: error: invalid lvalue in unary `&' ... Just make it a regular expression. Signed-off-by: Stephen Rothwell Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 96d0509fb8d8..d3219d73f8e6 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -287,7 +287,7 @@ static inline const cpumask_t *get_cpu_mask(unsigned int cpu) * gcc optimizes it out (it's a constant) and there's no huge stack * variable created: */ -#define cpumask_of_cpu(cpu) ({ *get_cpu_mask(cpu); }) +#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu)) #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) -- cgit v1.2.3 From 1f938d060a7bc01b5f82d46db3e38cd501b445a6 Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Mon, 21 Jul 2008 00:06:19 +0400 Subject: libata.h: replace __FUNCTION__ with __func__ Signed-off-by: Alexander Beregalov Signed-off-by: Jeff Garzik --- include/linux/libata.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 5b247b8a6b3b..d4b8e5fa3e8b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -60,9 +60,9 @@ /* note: prints function name for you */ #ifdef ATA_DEBUG -#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args) +#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args) #ifdef ATA_VERBOSE_DEBUG -#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args) +#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args) #else #define VPRINTK(fmt, args...) #endif /* ATA_VERBOSE_DEBUG */ @@ -71,7 +71,7 @@ #define VPRINTK(fmt, args...) #endif /* ATA_DEBUG */ -#define BPRINTK(fmt, args...) if (ap->flags & ATA_FLAG_DEBUGMSG) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args) +#define BPRINTK(fmt, args...) if (ap->flags & ATA_FLAG_DEBUGMSG) printk(KERN_ERR "%s: " fmt, __func__, ## args) /* NEW: debug levels */ #define HAVE_LIBATA_MSG 1 -- cgit v1.2.3 From 963e4975c6f93c148ca809d986d412201df9af89 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 24 Jul 2008 17:16:06 +0100 Subject: pata_it821x: Driver updates and reworking - Add support for the RDC 1010 variant - Rework the core library to have a read_id method. This allows the hacky bits of it821x to go and prepares us for pata_hd - Switch from WARN to BUG in ata_id_string as it will reboot if you get it wrong so WARN won't be seen - Allow the issue of command 0xFC on the 821x. This is needed to query rebuild status. - Tidy up printk formatting - Do more ident rewriting on RAID volumes to handle firmware provided ident data which is rather wonky - Report the firmware revision and device layout in RAID mode - Don't try and disable raid on the 8211 or RDC - they don't have the relevant bits Signed-off-by: Alan Cox Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 31 +++++- drivers/ata/pata_it821x.c | 270 ++++++++++++++++++++++++++++++++++++++++------ include/linux/libata.h | 3 + 3 files changed, 265 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index f69d1548b562..5ba96c5052c8 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1132,6 +1132,8 @@ void ata_id_string(const u16 *id, unsigned char *s, { unsigned int c; + BUG_ON(len & 1); + while (len > 0) { c = id[ofs] >> 8; *s = c; @@ -1165,8 +1167,6 @@ void ata_id_c_string(const u16 *id, unsigned char *s, { unsigned char *p; - WARN_ON(!(len & 1)); - ata_id_string(id, s, ofs, len - 1); p = s + strnlen(s, len - 1); @@ -1885,6 +1885,23 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev) return 3 << ATA_SHIFT_PIO; } +/** + * ata_do_dev_read_id - default ID read method + * @dev: device + * @tf: proposed taskfile + * @id: data buffer + * + * Issue the identify taskfile and hand back the buffer containing + * identify data. For some RAID controllers and for pre ATA devices + * this function is wrapped or replaced by the driver + */ +unsigned int ata_do_dev_read_id(struct ata_device *dev, + struct ata_taskfile *tf, u16 *id) +{ + return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, + id, sizeof(id[0]) * ATA_ID_WORDS, 0); +} + /** * ata_dev_read_id - Read ID data from the specified device * @dev: target device @@ -1920,7 +1937,7 @@ int ata_dev_read_id(struct ata_device *dev, unsigned int *p_class, if (ata_msg_ctl(ap)) ata_dev_printk(dev, KERN_DEBUG, "%s: ENTER\n", __func__); - retry: +retry: ata_tf_init(dev, &tf); switch (class) { @@ -1948,8 +1965,11 @@ int ata_dev_read_id(struct ata_device *dev, unsigned int *p_class, */ tf.flags |= ATA_TFLAG_POLLING; - err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, - id, sizeof(id[0]) * ATA_ID_WORDS, 0); + if (ap->ops->read_id) + err_mask = ap->ops->read_id(dev, &tf, id); + else + err_mask = ata_do_dev_read_id(dev, &tf, id); + if (err_mask) { if (err_mask & AC_ERR_NODEV_HINT) { ata_dev_printk(dev, KERN_DEBUG, @@ -6283,6 +6303,7 @@ EXPORT_SYMBOL_GPL(ata_host_resume); #endif /* CONFIG_PM */ EXPORT_SYMBOL_GPL(ata_id_string); EXPORT_SYMBOL_GPL(ata_id_c_string); +EXPORT_SYMBOL_GPL(ata_do_dev_read_id); EXPORT_SYMBOL_GPL(ata_scsi_simulate); EXPORT_SYMBOL_GPL(ata_pio_need_iordy); diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c index e10816931b2f..27843c70eb9d 100644 --- a/drivers/ata/pata_it821x.c +++ b/drivers/ata/pata_it821x.c @@ -80,7 +80,7 @@ #define DRV_NAME "pata_it821x" -#define DRV_VERSION "0.3.8" +#define DRV_VERSION "0.4.0" struct it821x_dev { @@ -425,6 +425,8 @@ static unsigned int it821x_smart_qc_issue(struct ata_queued_cmd *qc) case ATA_CMD_WRITE_MULTI: case ATA_CMD_WRITE_MULTI_EXT: case ATA_CMD_ID_ATA: + case ATA_CMD_INIT_DEV_PARAMS: + case 0xFC: /* Internal 'report rebuild state' */ /* Arguably should just no-op this one */ case ATA_CMD_SET_FEATURES: return ata_sff_qc_issue(qc); @@ -509,7 +511,7 @@ static void it821x_dev_config(struct ata_device *adev) if (strstr(model_num, "Integrated Technology Express")) { /* RAID mode */ - printk(KERN_INFO "IT821x %sRAID%d volume", + ata_dev_printk(adev, KERN_INFO, "%sRAID%d volume", adev->id[147]?"Bootable ":"", adev->id[129]); if (adev->id[129] != 1) @@ -519,37 +521,51 @@ static void it821x_dev_config(struct ata_device *adev) /* This is a controller firmware triggered funny, don't report the drive faulty! */ adev->horkage &= ~ATA_HORKAGE_DIAGNOSTIC; + /* No HPA in 'smart' mode */ + adev->horkage |= ATA_HORKAGE_BROKEN_HPA; } /** - * it821x_ident_hack - Hack identify data up - * @ap: Port + * it821x_read_id - Hack identify data up + * @adev: device to read + * @tf: proposed taskfile + * @id: buffer for returned ident data * - * Walk the devices on this firmware driven port and slightly + * Query the devices on this firmware driven port and slightly * mash the identify data to stop us and common tools trying to * use features not firmware supported. The firmware itself does * some masking (eg SMART) but not enough. - * - * This is a bit of an abuse of the cable method, but it is the - * only method called at the right time. We could modify the libata - * core specifically for ident hacking but while we have one offender - * it seems better to keep the fallout localised. */ -static int it821x_ident_hack(struct ata_port *ap) +static unsigned int it821x_read_id(struct ata_device *adev, + struct ata_taskfile *tf, u16 *id) { - struct ata_device *adev; - ata_link_for_each_dev(adev, &ap->link) { - if (ata_dev_enabled(adev)) { - adev->id[84] &= ~(1 << 6); /* No FUA */ - adev->id[85] &= ~(1 << 10); /* No HPA */ - adev->id[76] = 0; /* No NCQ/AN etc */ - } + unsigned int err_mask; + unsigned char model_num[ATA_ID_PROD_LEN + 1]; + + err_mask = ata_do_dev_read_id(adev, tf, id); + if (err_mask) + return err_mask; + ata_id_c_string(id, model_num, ATA_ID_PROD, sizeof(model_num)); + + id[83] &= ~(1 << 12); /* Cache flush is firmware handled */ + id[83] &= ~(1 << 13); /* Ditto for LBA48 flushes */ + id[84] &= ~(1 << 6); /* No FUA */ + id[85] &= ~(1 << 10); /* No HPA */ + id[76] = 0; /* No NCQ/AN etc */ + + if (strstr(model_num, "Integrated Technology Express")) { + /* Set feature bits the firmware neglects */ + id[49] |= 0x0300; /* LBA, DMA */ + id[82] |= 0x0400; /* LBA48 */ + id[83] &= 0x7FFF; + id[83] |= 0x4000; /* Word 83 is valid */ + id[86] |= 0x0400; /* LBA48 on */ + id[ATA_ID_MAJOR_VER] |= 0x1F; } - return ata_cable_unknown(ap); + return err_mask; } - /** * it821x_check_atapi_dma - ATAPI DMA handler * @qc: Command we are about to issue @@ -577,6 +593,136 @@ static int it821x_check_atapi_dma(struct ata_queued_cmd *qc) return 0; } +/** + * it821x_display_disk - display disk setup + * @n: Device number + * @buf: Buffer block from firmware + * + * Produce a nice informative display of the device setup as provided + * by the firmware. + */ + +static void it821x_display_disk(int n, u8 *buf) +{ + unsigned char id[41]; + int mode = 0; + char *mtype; + char mbuf[8]; + char *cbl = "(40 wire cable)"; + + static const char *types[5] = { + "RAID0", "RAID1" "RAID 0+1", "JBOD", "DISK" + }; + + if (buf[52] > 4) /* No Disk */ + return; + + ata_id_c_string((u16 *)buf, id, 0, 41); + + if (buf[51]) { + mode = ffs(buf[51]); + mtype = "UDMA"; + } else if (buf[49]) { + mode = ffs(buf[49]); + mtype = "MWDMA"; + } + + if (buf[76]) + cbl = ""; + + if (mode) + snprintf(mbuf, 8, "%5s%d", mtype, mode - 1); + else + strcpy(mbuf, "PIO"); + if (buf[52] == 4) + printk(KERN_INFO "%d: %-6s %-8s %s %s\n", + n, mbuf, types[buf[52]], id, cbl); + else + printk(KERN_INFO "%d: %-6s %-8s Volume: %1d %s %s\n", + n, mbuf, types[buf[52]], buf[53], id, cbl); + if (buf[125] < 100) + printk(KERN_INFO "%d: Rebuilding: %d%%\n", n, buf[125]); +} + +/** + * it821x_firmware_command - issue firmware command + * @ap: IT821x port to interrogate + * @cmd: command + * @len: length + * + * Issue firmware commands expecting data back from the controller. We + * use this to issue commands that do not go via the normal paths. Other + * commands such as 0xFC can be issued normally. + */ + +static u8 *it821x_firmware_command(struct ata_port *ap, u8 cmd, int len) +{ + u8 status; + int n = 0; + u16 *buf = kmalloc(len, GFP_KERNEL); + if (buf == NULL) { + printk(KERN_ERR "it821x_firmware_command: Out of memory\n"); + return NULL; + } + /* This isn't quite a normal ATA command as we are talking to the + firmware not the drives */ + ap->ctl |= ATA_NIEN; + iowrite8(ap->ctl, ap->ioaddr.ctl_addr); + ata_wait_idle(ap); + iowrite8(ATA_DEVICE_OBS, ap->ioaddr.device_addr); + iowrite8(cmd, ap->ioaddr.command_addr); + udelay(1); + /* This should be almost immediate but a little paranoia goes a long + way. */ + while(n++ < 10) { + status = ioread8(ap->ioaddr.status_addr); + if (status & ATA_ERR) { + kfree(buf); + printk(KERN_ERR "it821x_firmware_command: rejected\n"); + return NULL; + } + if (status & ATA_DRQ) { + ioread16_rep(ap->ioaddr.data_addr, buf, len/2); + return (u8 *)buf; + } + mdelay(1); + } + kfree(buf); + printk(KERN_ERR "it821x_firmware_command: timeout\n"); + return NULL; +} + +/** + * it821x_probe_firmware - firmware reporting/setup + * @ap: IT821x port being probed + * + * Probe the firmware of the controller by issuing firmware command + * 0xFA and analysing the returned data. + */ + +static void it821x_probe_firmware(struct ata_port *ap) +{ + u8 *buf; + int i; + + /* This is a bit ugly as we can't just issue a task file to a device + as this is controller magic */ + + buf = it821x_firmware_command(ap, 0xFA, 512); + + if (buf != NULL) { + printk(KERN_INFO "pata_it821x: Firmware %02X/%02X/%02X%02X\n", + buf[505], + buf[506], + buf[507], + buf[508]); + for (i = 0; i < 4; i++) + it821x_display_disk(i, buf + 128 * i); + kfree(buf); + } +} + + /** * it821x_port_start - port setup @@ -610,6 +756,8 @@ static int it821x_port_start(struct ata_port *ap) /* Long I/O's although allowed in LBA48 space cause the onboard firmware to enter the twighlight zone */ /* No ATAPI DMA in this mode either */ + if (ap->port_no == 0) + it821x_probe_firmware(ap); } /* Pull the current clocks from 0x50 */ if (conf & (1 << (1 + ap->port_no))) @@ -631,6 +779,25 @@ static int it821x_port_start(struct ata_port *ap) return 0; } +/** + * it821x_rdc_cable - Cable detect for RDC1010 + * @ap: port we are checking + * + * Return the RDC1010 cable type. Unlike the IT821x we know how to do + * this and can do host side cable detect + */ + +static int it821x_rdc_cable(struct ata_port *ap) +{ + u16 r40; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + pci_read_config_word(pdev, 0x40, &r40); + if (r40 & (1 << (2 + ap->port_no))) + return ATA_CBL_PATA40; + return ATA_CBL_PATA80; +} + static struct scsi_host_template it821x_sht = { ATA_BMDMA_SHT(DRV_NAME), }; @@ -641,9 +808,10 @@ static struct ata_port_operations it821x_smart_port_ops = { .check_atapi_dma= it821x_check_atapi_dma, .qc_issue = it821x_smart_qc_issue, - .cable_detect = it821x_ident_hack, + .cable_detect = ata_cable_80wire, .set_mode = it821x_smart_set_mode, .dev_config = it821x_dev_config, + .read_id = it821x_read_id, .port_start = it821x_port_start, }; @@ -664,8 +832,29 @@ static struct ata_port_operations it821x_passthru_port_ops = { .port_start = it821x_port_start, }; +static struct ata_port_operations it821x_rdc_port_ops = { + .inherits = &ata_bmdma_port_ops, + + .check_atapi_dma= it821x_check_atapi_dma, + .sff_dev_select = it821x_passthru_dev_select, + .bmdma_start = it821x_passthru_bmdma_start, + .bmdma_stop = it821x_passthru_bmdma_stop, + .qc_issue = it821x_passthru_qc_issue, + + .cable_detect = it821x_rdc_cable, + .set_piomode = it821x_passthru_set_piomode, + .set_dmamode = it821x_passthru_set_dmamode, + + .port_start = it821x_port_start, +}; + static void it821x_disable_raid(struct pci_dev *pdev) { + /* Neither the RDC nor the IT8211 */ + if (pdev->vendor != PCI_VENDOR_ID_ITE || + pdev->device != PCI_DEVICE_ID_ITE_8212) + return; + /* Reset local CPU, and set BIOS not ready */ pci_write_config_byte(pdev, 0x5E, 0x01); @@ -690,6 +879,7 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id) .flags = ATA_FLAG_SLAVE_POSS, .pio_mask = 0x1f, .mwdma_mask = 0x07, + .udma_mask = ATA_UDMA6, .port_ops = &it821x_smart_port_ops }; static const struct ata_port_info info_passthru = { @@ -699,6 +889,13 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id) .udma_mask = ATA_UDMA6, .port_ops = &it821x_passthru_port_ops }; + static const struct ata_port_info info_rdc = { + .flags = ATA_FLAG_SLAVE_POSS, + .pio_mask = 0x1f, + .mwdma_mask = 0x07, + /* No UDMA */ + .port_ops = &it821x_rdc_port_ops + }; const struct ata_port_info *ppi[] = { NULL, NULL }; static char *mode[2] = { "pass through", "smart" }; @@ -707,21 +904,25 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id) rc = pcim_enable_device(pdev); if (rc) return rc; + + if (pdev->vendor == PCI_VENDOR_ID_RDC) { + ppi[0] = &info_rdc; + } else { + /* Force the card into bypass mode if so requested */ + if (it8212_noraid) { + printk(KERN_INFO DRV_NAME ": forcing bypass mode.\n"); + it821x_disable_raid(pdev); + } + pci_read_config_byte(pdev, 0x50, &conf); + conf &= 1; - /* Force the card into bypass mode if so requested */ - if (it8212_noraid) { - printk(KERN_INFO DRV_NAME ": forcing bypass mode.\n"); - it821x_disable_raid(pdev); + printk(KERN_INFO DRV_NAME": controller in %s mode.\n", + mode[conf]); + if (conf == 0) + ppi[0] = &info_passthru; + else + ppi[0] = &info_smart; } - pci_read_config_byte(pdev, 0x50, &conf); - conf &= 1; - - printk(KERN_INFO DRV_NAME ": controller in %s mode.\n", mode[conf]); - if (conf == 0) - ppi[0] = &info_passthru; - else - ppi[0] = &info_smart; - return ata_pci_sff_init_one(pdev, ppi, &it821x_sht, NULL); } @@ -745,6 +946,7 @@ static int it821x_reinit_one(struct pci_dev *pdev) static const struct pci_device_id it821x[] = { { PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8211), }, { PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8212), }, + { PCI_VDEVICE(RDC, 0x1010), }, { }, }; diff --git a/include/linux/libata.h b/include/linux/libata.h index d4b8e5fa3e8b..06b80337303b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -750,6 +750,7 @@ struct ata_port_operations { void (*set_piomode)(struct ata_port *ap, struct ata_device *dev); void (*set_dmamode)(struct ata_port *ap, struct ata_device *dev); int (*set_mode)(struct ata_link *link, struct ata_device **r_failed_dev); + unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf, u16 *id); void (*dev_config)(struct ata_device *dev); @@ -951,6 +952,8 @@ extern void ata_id_string(const u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern void ata_id_c_string(const u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); +extern unsigned int ata_do_dev_read_id(struct ata_device *dev, + struct ata_taskfile *tf, u16 *id); extern void ata_qc_complete(struct ata_queued_cmd *qc); extern int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active); extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd, -- cgit v1.2.3 From ae375044d31075a31de5a839e07ded7f67b660aa Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jul 2008 00:38:01 -0700 Subject: netfilter: nf_conntrack_tcp: decrease timeouts while data in unacknowledged In order to time out dead connections quicker, keep track of outstanding data and cap the timeout. Suggested by Herbert Xu. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_tcp.h | 3 +++ net/netfilter/nf_conntrack_proto_tcp.c | 29 ++++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index 22ce29995f13..a049df4f2236 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -30,6 +30,9 @@ enum tcp_conntrack { /* Be liberal in window checking */ #define IP_CT_TCP_FLAG_BE_LIBERAL 0x08 +/* Has unacknowledged data */ +#define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED 0x10 + struct nf_ct_tcp_flags { u_int8_t flags; u_int8_t mask; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 420a10d8eb1e..6f61261888ef 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -67,7 +67,8 @@ static const char *const tcp_conntrack_names[] = { /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; +static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; +static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly = 5 MINS; static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = { [TCP_CONNTRACK_SYN_SENT] = 2 MINS, @@ -625,8 +626,10 @@ static bool tcp_in_window(const struct nf_conn *ct, swin = win + (sack - ack); if (sender->td_maxwin < swin) sender->td_maxwin = swin; - if (after(end, sender->td_end)) + if (after(end, sender->td_end)) { sender->td_end = end; + sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; + } /* * Update receiver data. */ @@ -637,6 +640,8 @@ static bool tcp_in_window(const struct nf_conn *ct, if (win == 0) receiver->td_maxend++; } + if (ack == receiver->td_end) + receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; /* * Check retransmissions. @@ -951,9 +956,16 @@ static int tcp_packet(struct nf_conn *ct, if (old_state != new_state && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; - timeout = ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans - && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans - ? nf_ct_tcp_timeout_max_retrans : tcp_timeouts[new_state]; + + if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && + tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans) + timeout = nf_ct_tcp_timeout_max_retrans; + else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & + IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && + tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged) + timeout = nf_ct_tcp_timeout_unacknowledged; + else + timeout = tcp_timeouts[new_state]; write_unlock_bh(&tcp_lock); nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); @@ -1235,6 +1247,13 @@ static struct ctl_table tcp_sysctl_table[] = { .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, + { + .procname = "nf_conntrack_tcp_timeout_unacknowledged", + .data = &nf_ct_tcp_timeout_unacknowledged, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, { .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, .procname = "nf_conntrack_tcp_loose", -- cgit v1.2.3 From dacdd0e04768da1fd2b24a6ee274c582b40d0c5b Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 17 Jul 2008 16:54:19 -0700 Subject: [PATCH] configfs: Include linux/err.h in linux/configfs.h We now use PTR_ERR() in the ->make_item() and ->make_group() operations. Folks including configfs.h need err.h. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/configfs/dir.c | 2 +- include/linux/configfs.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 179589be063a..2495f23e33f4 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1094,7 +1094,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) kfree(name); if (ret) { /* - * If item == NULL, then link_obj() was never called. + * If ret != 0, then link_obj() was never called. * There are no extra references to clean up. */ goto out_put; diff --git a/include/linux/configfs.h b/include/linux/configfs.h index d62c19ff041c..0a5491baf0bc 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -40,6 +40,7 @@ #include #include #include +#include #include -- cgit v1.2.3 From ecb3d28c7edd58b54f16838c434b342ba9195bec Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 18 Jun 2008 19:29:05 -0700 Subject: [PATCH] configfs: Convenience macros for attribute definition. Sysfs has the _ATTR() and _ATTR_RO() macros to make defining extended form attributes easier. configfs should have something similiar. - _CONFIGFS_ATTR() and _CONFIGFS_ATTR_RO() are the counterparts to the sysfs macros. - CONFIGFS_ATTR_STRUCT() creates the extended form attribute structure. - CONFIGFS_ATTR_OPS() defines the show_attribute()/store_attribute() operations that call the show()/store() operations of the extended form configfs_attributes. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- Documentation/filesystems/configfs/configfs.txt | 17 +- .../filesystems/configfs/configfs_example.c | 485 --------------------- .../configfs/configfs_example_explicit.c | 485 +++++++++++++++++++++ .../filesystems/configfs/configfs_example_macros.c | 448 +++++++++++++++++++ include/linux/configfs.h | 67 ++- 5 files changed, 1012 insertions(+), 490 deletions(-) delete mode 100644 Documentation/filesystems/configfs/configfs_example.c create mode 100644 Documentation/filesystems/configfs/configfs_example_explicit.c create mode 100644 Documentation/filesystems/configfs/configfs_example_macros.c (limited to 'include/linux') diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt index 44c97e6accb2..fabcb0e00f25 100644 --- a/Documentation/filesystems/configfs/configfs.txt +++ b/Documentation/filesystems/configfs/configfs.txt @@ -311,9 +311,20 @@ the subsystem must be ready for it. [An Example] The best example of these basic concepts is the simple_children -subsystem/group and the simple_child item in configfs_example.c It -shows a trivial object displaying and storing an attribute, and a simple -group creating and destroying these children. +subsystem/group and the simple_child item in configfs_example_explicit.c +and configfs_example_macros.c. It shows a trivial object displaying and +storing an attribute, and a simple group creating and destroying these +children. + +The only difference between configfs_example_explicit.c and +configfs_example_macros.c is how the attributes of the childless item +are defined. The childless item has extended attributes, each with +their own show()/store() operation. This follows a convention commonly +used in sysfs. configfs_example_explicit.c creates these attributes +by explicitly defining the structures involved. Conversely +configfs_example_macros.c uses some convenience macros from configfs.h +to define the attributes. These macros are similar to their sysfs +counterparts. [Hierarchy Navigation and the Subsystem Mutex] diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c deleted file mode 100644 index 039648791701..000000000000 --- a/Documentation/filesystems/configfs/configfs_example.c +++ /dev/null @@ -1,485 +0,0 @@ -/* - * vim: noexpandtab ts=8 sts=0 sw=8: - * - * configfs_example.c - This file is a demonstration module containing - * a number of configfs subsystems. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * - * Based on sysfs: - * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel - * - * configfs Copyright (C) 2005 Oracle. All rights reserved. - */ - -#include -#include -#include - -#include - - - -/* - * 01-childless - * - * This first example is a childless subsystem. It cannot create - * any config_items. It just has attributes. - * - * Note that we are enclosing the configfs_subsystem inside a container. - * This is not necessary if a subsystem has no attributes directly - * on the subsystem. See the next example, 02-simple-children, for - * such a subsystem. - */ - -struct childless { - struct configfs_subsystem subsys; - int showme; - int storeme; -}; - -struct childless_attribute { - struct configfs_attribute attr; - ssize_t (*show)(struct childless *, char *); - ssize_t (*store)(struct childless *, const char *, size_t); -}; - -static inline struct childless *to_childless(struct config_item *item) -{ - return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL; -} - -static ssize_t childless_showme_read(struct childless *childless, - char *page) -{ - ssize_t pos; - - pos = sprintf(page, "%d\n", childless->showme); - childless->showme++; - - return pos; -} - -static ssize_t childless_storeme_read(struct childless *childless, - char *page) -{ - return sprintf(page, "%d\n", childless->storeme); -} - -static ssize_t childless_storeme_write(struct childless *childless, - const char *page, - size_t count) -{ - unsigned long tmp; - char *p = (char *) page; - - tmp = simple_strtoul(p, &p, 10); - if (!p || (*p && (*p != '\n'))) - return -EINVAL; - - if (tmp > INT_MAX) - return -ERANGE; - - childless->storeme = tmp; - - return count; -} - -static ssize_t childless_description_read(struct childless *childless, - char *page) -{ - return sprintf(page, -"[01-childless]\n" -"\n" -"The childless subsystem is the simplest possible subsystem in\n" -"configfs. It does not support the creation of child config_items.\n" -"It only has a few attributes. In fact, it isn't much different\n" -"than a directory in /proc.\n"); -} - -static struct childless_attribute childless_attr_showme = { - .attr = { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO }, - .show = childless_showme_read, -}; -static struct childless_attribute childless_attr_storeme = { - .attr = { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR }, - .show = childless_storeme_read, - .store = childless_storeme_write, -}; -static struct childless_attribute childless_attr_description = { - .attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO }, - .show = childless_description_read, -}; - -static struct configfs_attribute *childless_attrs[] = { - &childless_attr_showme.attr, - &childless_attr_storeme.attr, - &childless_attr_description.attr, - NULL, -}; - -static ssize_t childless_attr_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - struct childless *childless = to_childless(item); - struct childless_attribute *childless_attr = - container_of(attr, struct childless_attribute, attr); - ssize_t ret = 0; - - if (childless_attr->show) - ret = childless_attr->show(childless, page); - return ret; -} - -static ssize_t childless_attr_store(struct config_item *item, - struct configfs_attribute *attr, - const char *page, size_t count) -{ - struct childless *childless = to_childless(item); - struct childless_attribute *childless_attr = - container_of(attr, struct childless_attribute, attr); - ssize_t ret = -EINVAL; - - if (childless_attr->store) - ret = childless_attr->store(childless, page, count); - return ret; -} - -static struct configfs_item_operations childless_item_ops = { - .show_attribute = childless_attr_show, - .store_attribute = childless_attr_store, -}; - -static struct config_item_type childless_type = { - .ct_item_ops = &childless_item_ops, - .ct_attrs = childless_attrs, - .ct_owner = THIS_MODULE, -}; - -static struct childless childless_subsys = { - .subsys = { - .su_group = { - .cg_item = { - .ci_namebuf = "01-childless", - .ci_type = &childless_type, - }, - }, - }, -}; - - -/* ----------------------------------------------------------------- */ - -/* - * 02-simple-children - * - * This example merely has a simple one-attribute child. Note that - * there is no extra attribute structure, as the child's attribute is - * known from the get-go. Also, there is no container for the - * subsystem, as it has no attributes of its own. - */ - -struct simple_child { - struct config_item item; - int storeme; -}; - -static inline struct simple_child *to_simple_child(struct config_item *item) -{ - return item ? container_of(item, struct simple_child, item) : NULL; -} - -static struct configfs_attribute simple_child_attr_storeme = { - .ca_owner = THIS_MODULE, - .ca_name = "storeme", - .ca_mode = S_IRUGO | S_IWUSR, -}; - -static struct configfs_attribute *simple_child_attrs[] = { - &simple_child_attr_storeme, - NULL, -}; - -static ssize_t simple_child_attr_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - ssize_t count; - struct simple_child *simple_child = to_simple_child(item); - - count = sprintf(page, "%d\n", simple_child->storeme); - - return count; -} - -static ssize_t simple_child_attr_store(struct config_item *item, - struct configfs_attribute *attr, - const char *page, size_t count) -{ - struct simple_child *simple_child = to_simple_child(item); - unsigned long tmp; - char *p = (char *) page; - - tmp = simple_strtoul(p, &p, 10); - if (!p || (*p && (*p != '\n'))) - return -EINVAL; - - if (tmp > INT_MAX) - return -ERANGE; - - simple_child->storeme = tmp; - - return count; -} - -static void simple_child_release(struct config_item *item) -{ - kfree(to_simple_child(item)); -} - -static struct configfs_item_operations simple_child_item_ops = { - .release = simple_child_release, - .show_attribute = simple_child_attr_show, - .store_attribute = simple_child_attr_store, -}; - -static struct config_item_type simple_child_type = { - .ct_item_ops = &simple_child_item_ops, - .ct_attrs = simple_child_attrs, - .ct_owner = THIS_MODULE, -}; - - -struct simple_children { - struct config_group group; -}; - -static inline struct simple_children *to_simple_children(struct config_item *item) -{ - return item ? container_of(to_config_group(item), struct simple_children, group) : NULL; -} - -static struct config_item *simple_children_make_item(struct config_group *group, const char *name) -{ - struct simple_child *simple_child; - - simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); - if (!simple_child) - return ERR_PTR(-ENOMEM); - - - config_item_init_type_name(&simple_child->item, name, - &simple_child_type); - - simple_child->storeme = 0; - - return &simple_child->item; -} - -static struct configfs_attribute simple_children_attr_description = { - .ca_owner = THIS_MODULE, - .ca_name = "description", - .ca_mode = S_IRUGO, -}; - -static struct configfs_attribute *simple_children_attrs[] = { - &simple_children_attr_description, - NULL, -}; - -static ssize_t simple_children_attr_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - return sprintf(page, -"[02-simple-children]\n" -"\n" -"This subsystem allows the creation of child config_items. These\n" -"items have only one attribute that is readable and writeable.\n"); -} - -static void simple_children_release(struct config_item *item) -{ - kfree(to_simple_children(item)); -} - -static struct configfs_item_operations simple_children_item_ops = { - .release = simple_children_release, - .show_attribute = simple_children_attr_show, -}; - -/* - * Note that, since no extra work is required on ->drop_item(), - * no ->drop_item() is provided. - */ -static struct configfs_group_operations simple_children_group_ops = { - .make_item = simple_children_make_item, -}; - -static struct config_item_type simple_children_type = { - .ct_item_ops = &simple_children_item_ops, - .ct_group_ops = &simple_children_group_ops, - .ct_attrs = simple_children_attrs, - .ct_owner = THIS_MODULE, -}; - -static struct configfs_subsystem simple_children_subsys = { - .su_group = { - .cg_item = { - .ci_namebuf = "02-simple-children", - .ci_type = &simple_children_type, - }, - }, -}; - - -/* ----------------------------------------------------------------- */ - -/* - * 03-group-children - * - * This example reuses the simple_children group from above. However, - * the simple_children group is not the subsystem itself, it is a - * child of the subsystem. Creation of a group in the subsystem creates - * a new simple_children group. That group can then have simple_child - * children of its own. - */ - -static struct config_group *group_children_make_group(struct config_group *group, const char *name) -{ - struct simple_children *simple_children; - - simple_children = kzalloc(sizeof(struct simple_children), - GFP_KERNEL); - if (!simple_children) - return ERR_PTR(-ENOMEM); - - - config_group_init_type_name(&simple_children->group, name, - &simple_children_type); - - return &simple_children->group; -} - -static struct configfs_attribute group_children_attr_description = { - .ca_owner = THIS_MODULE, - .ca_name = "description", - .ca_mode = S_IRUGO, -}; - -static struct configfs_attribute *group_children_attrs[] = { - &group_children_attr_description, - NULL, -}; - -static ssize_t group_children_attr_show(struct config_item *item, - struct configfs_attribute *attr, - char *page) -{ - return sprintf(page, -"[03-group-children]\n" -"\n" -"This subsystem allows the creation of child config_groups. These\n" -"groups are like the subsystem simple-children.\n"); -} - -static struct configfs_item_operations group_children_item_ops = { - .show_attribute = group_children_attr_show, -}; - -/* - * Note that, since no extra work is required on ->drop_item(), - * no ->drop_item() is provided. - */ -static struct configfs_group_operations group_children_group_ops = { - .make_group = group_children_make_group, -}; - -static struct config_item_type group_children_type = { - .ct_item_ops = &group_children_item_ops, - .ct_group_ops = &group_children_group_ops, - .ct_attrs = group_children_attrs, - .ct_owner = THIS_MODULE, -}; - -static struct configfs_subsystem group_children_subsys = { - .su_group = { - .cg_item = { - .ci_namebuf = "03-group-children", - .ci_type = &group_children_type, - }, - }, -}; - -/* ----------------------------------------------------------------- */ - -/* - * We're now done with our subsystem definitions. - * For convenience in this module, here's a list of them all. It - * allows the init function to easily register them. Most modules - * will only have one subsystem, and will only call register_subsystem - * on it directly. - */ -static struct configfs_subsystem *example_subsys[] = { - &childless_subsys.subsys, - &simple_children_subsys, - &group_children_subsys, - NULL, -}; - -static int __init configfs_example_init(void) -{ - int ret; - int i; - struct configfs_subsystem *subsys; - - for (i = 0; example_subsys[i]; i++) { - subsys = example_subsys[i]; - - config_group_init(&subsys->su_group); - mutex_init(&subsys->su_mutex); - ret = configfs_register_subsystem(subsys); - if (ret) { - printk(KERN_ERR "Error %d while registering subsystem %s\n", - ret, - subsys->su_group.cg_item.ci_namebuf); - goto out_unregister; - } - } - - return 0; - -out_unregister: - for (; i >= 0; i--) { - configfs_unregister_subsystem(example_subsys[i]); - } - - return ret; -} - -static void __exit configfs_example_exit(void) -{ - int i; - - for (i = 0; example_subsys[i]; i++) { - configfs_unregister_subsystem(example_subsys[i]); - } -} - -module_init(configfs_example_init); -module_exit(configfs_example_exit); -MODULE_LICENSE("GPL"); diff --git a/Documentation/filesystems/configfs/configfs_example_explicit.c b/Documentation/filesystems/configfs/configfs_example_explicit.c new file mode 100644 index 000000000000..d428cc9f07f3 --- /dev/null +++ b/Documentation/filesystems/configfs/configfs_example_explicit.c @@ -0,0 +1,485 @@ +/* + * vim: noexpandtab ts=8 sts=0 sw=8: + * + * configfs_example_explicit.c - This file is a demonstration module + * containing a number of configfs subsystems. It explicitly defines + * each structure without using the helper macros defined in + * configfs.h. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + */ + +#include +#include +#include + +#include + + + +/* + * 01-childless + * + * This first example is a childless subsystem. It cannot create + * any config_items. It just has attributes. + * + * Note that we are enclosing the configfs_subsystem inside a container. + * This is not necessary if a subsystem has no attributes directly + * on the subsystem. See the next example, 02-simple-children, for + * such a subsystem. + */ + +struct childless { + struct configfs_subsystem subsys; + int showme; + int storeme; +}; + +struct childless_attribute { + struct configfs_attribute attr; + ssize_t (*show)(struct childless *, char *); + ssize_t (*store)(struct childless *, const char *, size_t); +}; + +static inline struct childless *to_childless(struct config_item *item) +{ + return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL; +} + +static ssize_t childless_showme_read(struct childless *childless, + char *page) +{ + ssize_t pos; + + pos = sprintf(page, "%d\n", childless->showme); + childless->showme++; + + return pos; +} + +static ssize_t childless_storeme_read(struct childless *childless, + char *page) +{ + return sprintf(page, "%d\n", childless->storeme); +} + +static ssize_t childless_storeme_write(struct childless *childless, + const char *page, + size_t count) +{ + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + if (tmp > INT_MAX) + return -ERANGE; + + childless->storeme = tmp; + + return count; +} + +static ssize_t childless_description_read(struct childless *childless, + char *page) +{ + return sprintf(page, +"[01-childless]\n" +"\n" +"The childless subsystem is the simplest possible subsystem in\n" +"configfs. It does not support the creation of child config_items.\n" +"It only has a few attributes. In fact, it isn't much different\n" +"than a directory in /proc.\n"); +} + +static struct childless_attribute childless_attr_showme = { + .attr = { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO }, + .show = childless_showme_read, +}; +static struct childless_attribute childless_attr_storeme = { + .attr = { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR }, + .show = childless_storeme_read, + .store = childless_storeme_write, +}; +static struct childless_attribute childless_attr_description = { + .attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO }, + .show = childless_description_read, +}; + +static struct configfs_attribute *childless_attrs[] = { + &childless_attr_showme.attr, + &childless_attr_storeme.attr, + &childless_attr_description.attr, + NULL, +}; + +static ssize_t childless_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + struct childless *childless = to_childless(item); + struct childless_attribute *childless_attr = + container_of(attr, struct childless_attribute, attr); + ssize_t ret = 0; + + if (childless_attr->show) + ret = childless_attr->show(childless, page); + return ret; +} + +static ssize_t childless_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *page, size_t count) +{ + struct childless *childless = to_childless(item); + struct childless_attribute *childless_attr = + container_of(attr, struct childless_attribute, attr); + ssize_t ret = -EINVAL; + + if (childless_attr->store) + ret = childless_attr->store(childless, page, count); + return ret; +} + +static struct configfs_item_operations childless_item_ops = { + .show_attribute = childless_attr_show, + .store_attribute = childless_attr_store, +}; + +static struct config_item_type childless_type = { + .ct_item_ops = &childless_item_ops, + .ct_attrs = childless_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct childless childless_subsys = { + .subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "01-childless", + .ci_type = &childless_type, + }, + }, + }, +}; + + +/* ----------------------------------------------------------------- */ + +/* + * 02-simple-children + * + * This example merely has a simple one-attribute child. Note that + * there is no extra attribute structure, as the child's attribute is + * known from the get-go. Also, there is no container for the + * subsystem, as it has no attributes of its own. + */ + +struct simple_child { + struct config_item item; + int storeme; +}; + +static inline struct simple_child *to_simple_child(struct config_item *item) +{ + return item ? container_of(item, struct simple_child, item) : NULL; +} + +static struct configfs_attribute simple_child_attr_storeme = { + .ca_owner = THIS_MODULE, + .ca_name = "storeme", + .ca_mode = S_IRUGO | S_IWUSR, +}; + +static struct configfs_attribute *simple_child_attrs[] = { + &simple_child_attr_storeme, + NULL, +}; + +static ssize_t simple_child_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + ssize_t count; + struct simple_child *simple_child = to_simple_child(item); + + count = sprintf(page, "%d\n", simple_child->storeme); + + return count; +} + +static ssize_t simple_child_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *page, size_t count) +{ + struct simple_child *simple_child = to_simple_child(item); + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + if (tmp > INT_MAX) + return -ERANGE; + + simple_child->storeme = tmp; + + return count; +} + +static void simple_child_release(struct config_item *item) +{ + kfree(to_simple_child(item)); +} + +static struct configfs_item_operations simple_child_item_ops = { + .release = simple_child_release, + .show_attribute = simple_child_attr_show, + .store_attribute = simple_child_attr_store, +}; + +static struct config_item_type simple_child_type = { + .ct_item_ops = &simple_child_item_ops, + .ct_attrs = simple_child_attrs, + .ct_owner = THIS_MODULE, +}; + + +struct simple_children { + struct config_group group; +}; + +static inline struct simple_children *to_simple_children(struct config_item *item) +{ + return item ? container_of(to_config_group(item), struct simple_children, group) : NULL; +} + +static struct config_item *simple_children_make_item(struct config_group *group, const char *name) +{ + struct simple_child *simple_child; + + simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); + if (!simple_child) + return ERR_PTR(-ENOMEM); + + config_item_init_type_name(&simple_child->item, name, + &simple_child_type); + + simple_child->storeme = 0; + + return &simple_child->item; +} + +static struct configfs_attribute simple_children_attr_description = { + .ca_owner = THIS_MODULE, + .ca_name = "description", + .ca_mode = S_IRUGO, +}; + +static struct configfs_attribute *simple_children_attrs[] = { + &simple_children_attr_description, + NULL, +}; + +static ssize_t simple_children_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + return sprintf(page, +"[02-simple-children]\n" +"\n" +"This subsystem allows the creation of child config_items. These\n" +"items have only one attribute that is readable and writeable.\n"); +} + +static void simple_children_release(struct config_item *item) +{ + kfree(to_simple_children(item)); +} + +static struct configfs_item_operations simple_children_item_ops = { + .release = simple_children_release, + .show_attribute = simple_children_attr_show, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations simple_children_group_ops = { + .make_item = simple_children_make_item, +}; + +static struct config_item_type simple_children_type = { + .ct_item_ops = &simple_children_item_ops, + .ct_group_ops = &simple_children_group_ops, + .ct_attrs = simple_children_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem simple_children_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "02-simple-children", + .ci_type = &simple_children_type, + }, + }, +}; + + +/* ----------------------------------------------------------------- */ + +/* + * 03-group-children + * + * This example reuses the simple_children group from above. However, + * the simple_children group is not the subsystem itself, it is a + * child of the subsystem. Creation of a group in the subsystem creates + * a new simple_children group. That group can then have simple_child + * children of its own. + */ + +static struct config_group *group_children_make_group(struct config_group *group, const char *name) +{ + struct simple_children *simple_children; + + simple_children = kzalloc(sizeof(struct simple_children), + GFP_KERNEL); + if (!simple_children) + return ERR_PTR(-ENOMEM); + + config_group_init_type_name(&simple_children->group, name, + &simple_children_type); + + return &simple_children->group; +} + +static struct configfs_attribute group_children_attr_description = { + .ca_owner = THIS_MODULE, + .ca_name = "description", + .ca_mode = S_IRUGO, +}; + +static struct configfs_attribute *group_children_attrs[] = { + &group_children_attr_description, + NULL, +}; + +static ssize_t group_children_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + return sprintf(page, +"[03-group-children]\n" +"\n" +"This subsystem allows the creation of child config_groups. These\n" +"groups are like the subsystem simple-children.\n"); +} + +static struct configfs_item_operations group_children_item_ops = { + .show_attribute = group_children_attr_show, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations group_children_group_ops = { + .make_group = group_children_make_group, +}; + +static struct config_item_type group_children_type = { + .ct_item_ops = &group_children_item_ops, + .ct_group_ops = &group_children_group_ops, + .ct_attrs = group_children_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem group_children_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "03-group-children", + .ci_type = &group_children_type, + }, + }, +}; + +/* ----------------------------------------------------------------- */ + +/* + * We're now done with our subsystem definitions. + * For convenience in this module, here's a list of them all. It + * allows the init function to easily register them. Most modules + * will only have one subsystem, and will only call register_subsystem + * on it directly. + */ +static struct configfs_subsystem *example_subsys[] = { + &childless_subsys.subsys, + &simple_children_subsys, + &group_children_subsys, + NULL, +}; + +static int __init configfs_example_init(void) +{ + int ret; + int i; + struct configfs_subsystem *subsys; + + for (i = 0; example_subsys[i]; i++) { + subsys = example_subsys[i]; + + config_group_init(&subsys->su_group); + mutex_init(&subsys->su_mutex); + ret = configfs_register_subsystem(subsys); + if (ret) { + printk(KERN_ERR "Error %d while registering subsystem %s\n", + ret, + subsys->su_group.cg_item.ci_namebuf); + goto out_unregister; + } + } + + return 0; + +out_unregister: + for (; i >= 0; i--) { + configfs_unregister_subsystem(example_subsys[i]); + } + + return ret; +} + +static void __exit configfs_example_exit(void) +{ + int i; + + for (i = 0; example_subsys[i]; i++) { + configfs_unregister_subsystem(example_subsys[i]); + } +} + +module_init(configfs_example_init); +module_exit(configfs_example_exit); +MODULE_LICENSE("GPL"); diff --git a/Documentation/filesystems/configfs/configfs_example_macros.c b/Documentation/filesystems/configfs/configfs_example_macros.c new file mode 100644 index 000000000000..d8e30a0378aa --- /dev/null +++ b/Documentation/filesystems/configfs/configfs_example_macros.c @@ -0,0 +1,448 @@ +/* + * vim: noexpandtab ts=8 sts=0 sw=8: + * + * configfs_example_macros.c - This file is a demonstration module + * containing a number of configfs subsystems. It uses the helper + * macros defined by configfs.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + */ + +#include +#include +#include + +#include + + + +/* + * 01-childless + * + * This first example is a childless subsystem. It cannot create + * any config_items. It just has attributes. + * + * Note that we are enclosing the configfs_subsystem inside a container. + * This is not necessary if a subsystem has no attributes directly + * on the subsystem. See the next example, 02-simple-children, for + * such a subsystem. + */ + +struct childless { + struct configfs_subsystem subsys; + int showme; + int storeme; +}; + +static inline struct childless *to_childless(struct config_item *item) +{ + return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL; +} + +CONFIGFS_ATTR_STRUCT(childless); +#define CHILDLESS_ATTR(_name, _mode, _show, _store) \ +struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR(_name, _mode, _show, _store) +#define CHILDLESS_ATTR_RO(_name, _show) \ +struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR_RO(_name, _show); + +static ssize_t childless_showme_read(struct childless *childless, + char *page) +{ + ssize_t pos; + + pos = sprintf(page, "%d\n", childless->showme); + childless->showme++; + + return pos; +} + +static ssize_t childless_storeme_read(struct childless *childless, + char *page) +{ + return sprintf(page, "%d\n", childless->storeme); +} + +static ssize_t childless_storeme_write(struct childless *childless, + const char *page, + size_t count) +{ + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + if (tmp > INT_MAX) + return -ERANGE; + + childless->storeme = tmp; + + return count; +} + +static ssize_t childless_description_read(struct childless *childless, + char *page) +{ + return sprintf(page, +"[01-childless]\n" +"\n" +"The childless subsystem is the simplest possible subsystem in\n" +"configfs. It does not support the creation of child config_items.\n" +"It only has a few attributes. In fact, it isn't much different\n" +"than a directory in /proc.\n"); +} + +CHILDLESS_ATTR_RO(showme, childless_showme_read); +CHILDLESS_ATTR(storeme, S_IRUGO | S_IWUSR, childless_storeme_read, + childless_storeme_write); +CHILDLESS_ATTR_RO(description, childless_description_read); + +static struct configfs_attribute *childless_attrs[] = { + &childless_attr_showme.attr, + &childless_attr_storeme.attr, + &childless_attr_description.attr, + NULL, +}; + +CONFIGFS_ATTR_OPS(childless); +static struct configfs_item_operations childless_item_ops = { + .show_attribute = childless_attr_show, + .store_attribute = childless_attr_store, +}; + +static struct config_item_type childless_type = { + .ct_item_ops = &childless_item_ops, + .ct_attrs = childless_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct childless childless_subsys = { + .subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "01-childless", + .ci_type = &childless_type, + }, + }, + }, +}; + + +/* ----------------------------------------------------------------- */ + +/* + * 02-simple-children + * + * This example merely has a simple one-attribute child. Note that + * there is no extra attribute structure, as the child's attribute is + * known from the get-go. Also, there is no container for the + * subsystem, as it has no attributes of its own. + */ + +struct simple_child { + struct config_item item; + int storeme; +}; + +static inline struct simple_child *to_simple_child(struct config_item *item) +{ + return item ? container_of(item, struct simple_child, item) : NULL; +} + +static struct configfs_attribute simple_child_attr_storeme = { + .ca_owner = THIS_MODULE, + .ca_name = "storeme", + .ca_mode = S_IRUGO | S_IWUSR, +}; + +static struct configfs_attribute *simple_child_attrs[] = { + &simple_child_attr_storeme, + NULL, +}; + +static ssize_t simple_child_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + ssize_t count; + struct simple_child *simple_child = to_simple_child(item); + + count = sprintf(page, "%d\n", simple_child->storeme); + + return count; +} + +static ssize_t simple_child_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *page, size_t count) +{ + struct simple_child *simple_child = to_simple_child(item); + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + if (tmp > INT_MAX) + return -ERANGE; + + simple_child->storeme = tmp; + + return count; +} + +static void simple_child_release(struct config_item *item) +{ + kfree(to_simple_child(item)); +} + +static struct configfs_item_operations simple_child_item_ops = { + .release = simple_child_release, + .show_attribute = simple_child_attr_show, + .store_attribute = simple_child_attr_store, +}; + +static struct config_item_type simple_child_type = { + .ct_item_ops = &simple_child_item_ops, + .ct_attrs = simple_child_attrs, + .ct_owner = THIS_MODULE, +}; + + +struct simple_children { + struct config_group group; +}; + +static inline struct simple_children *to_simple_children(struct config_item *item) +{ + return item ? container_of(to_config_group(item), struct simple_children, group) : NULL; +} + +static struct config_item *simple_children_make_item(struct config_group *group, const char *name) +{ + struct simple_child *simple_child; + + simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL); + if (!simple_child) + return ERR_PTR(-ENOMEM); + + config_item_init_type_name(&simple_child->item, name, + &simple_child_type); + + simple_child->storeme = 0; + + return &simple_child->item; +} + +static struct configfs_attribute simple_children_attr_description = { + .ca_owner = THIS_MODULE, + .ca_name = "description", + .ca_mode = S_IRUGO, +}; + +static struct configfs_attribute *simple_children_attrs[] = { + &simple_children_attr_description, + NULL, +}; + +static ssize_t simple_children_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + return sprintf(page, +"[02-simple-children]\n" +"\n" +"This subsystem allows the creation of child config_items. These\n" +"items have only one attribute that is readable and writeable.\n"); +} + +static void simple_children_release(struct config_item *item) +{ + kfree(to_simple_children(item)); +} + +static struct configfs_item_operations simple_children_item_ops = { + .release = simple_children_release, + .show_attribute = simple_children_attr_show, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations simple_children_group_ops = { + .make_item = simple_children_make_item, +}; + +static struct config_item_type simple_children_type = { + .ct_item_ops = &simple_children_item_ops, + .ct_group_ops = &simple_children_group_ops, + .ct_attrs = simple_children_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem simple_children_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "02-simple-children", + .ci_type = &simple_children_type, + }, + }, +}; + + +/* ----------------------------------------------------------------- */ + +/* + * 03-group-children + * + * This example reuses the simple_children group from above. However, + * the simple_children group is not the subsystem itself, it is a + * child of the subsystem. Creation of a group in the subsystem creates + * a new simple_children group. That group can then have simple_child + * children of its own. + */ + +static struct config_group *group_children_make_group(struct config_group *group, const char *name) +{ + struct simple_children *simple_children; + + simple_children = kzalloc(sizeof(struct simple_children), + GFP_KERNEL); + if (!simple_children) + return ERR_PTR(-ENOMEM); + + config_group_init_type_name(&simple_children->group, name, + &simple_children_type); + + return &simple_children->group; +} + +static struct configfs_attribute group_children_attr_description = { + .ca_owner = THIS_MODULE, + .ca_name = "description", + .ca_mode = S_IRUGO, +}; + +static struct configfs_attribute *group_children_attrs[] = { + &group_children_attr_description, + NULL, +}; + +static ssize_t group_children_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + return sprintf(page, +"[03-group-children]\n" +"\n" +"This subsystem allows the creation of child config_groups. These\n" +"groups are like the subsystem simple-children.\n"); +} + +static struct configfs_item_operations group_children_item_ops = { + .show_attribute = group_children_attr_show, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations group_children_group_ops = { + .make_group = group_children_make_group, +}; + +static struct config_item_type group_children_type = { + .ct_item_ops = &group_children_item_ops, + .ct_group_ops = &group_children_group_ops, + .ct_attrs = group_children_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem group_children_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "03-group-children", + .ci_type = &group_children_type, + }, + }, +}; + +/* ----------------------------------------------------------------- */ + +/* + * We're now done with our subsystem definitions. + * For convenience in this module, here's a list of them all. It + * allows the init function to easily register them. Most modules + * will only have one subsystem, and will only call register_subsystem + * on it directly. + */ +static struct configfs_subsystem *example_subsys[] = { + &childless_subsys.subsys, + &simple_children_subsys, + &group_children_subsys, + NULL, +}; + +static int __init configfs_example_init(void) +{ + int ret; + int i; + struct configfs_subsystem *subsys; + + for (i = 0; example_subsys[i]; i++) { + subsys = example_subsys[i]; + + config_group_init(&subsys->su_group); + mutex_init(&subsys->su_mutex); + ret = configfs_register_subsystem(subsys); + if (ret) { + printk(KERN_ERR "Error %d while registering subsystem %s\n", + ret, + subsys->su_group.cg_item.ci_namebuf); + goto out_unregister; + } + } + + return 0; + +out_unregister: + for (; i >= 0; i--) { + configfs_unregister_subsystem(example_subsys[i]); + } + + return ret; +} + +static void __exit configfs_example_exit(void) +{ + int i; + + for (i = 0; example_subsys[i]; i++) { + configfs_unregister_subsystem(example_subsys[i]); + } +} + +module_init(configfs_example_init); +module_exit(configfs_example_exit); +MODULE_LICENSE("GPL"); diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 0a5491baf0bc..7f627775c947 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -130,8 +130,25 @@ struct configfs_attribute { /* * Users often need to create attribute structures for their configurable * attributes, containing a configfs_attribute member and function pointers - * for the show() and store() operations on that attribute. They can use - * this macro (similar to sysfs' __ATTR) to make defining attributes easier. + * for the show() and store() operations on that attribute. If they don't + * need anything else on the extended attribute structure, they can use + * this macro to define it The argument _item is the name of the + * config_item structure. + */ +#define CONFIGFS_ATTR_STRUCT(_item) \ +struct _item##_attribute { \ + struct configfs_attribute attr; \ + ssize_t (*show)(struct _item *, char *); \ + ssize_t (*store)(struct _item *, const char *, size_t); \ +} + +/* + * With the extended attribute structure, users can use this macro + * (similar to sysfs' __ATTR) to make defining attributes easier. + * An example: + * #define MYITEM_ATTR(_name, _mode, _show, _store) \ + * struct myitem_attribute childless_attr_##_name = \ + * __CONFIGFS_ATTR(_name, _mode, _show, _store) */ #define __CONFIGFS_ATTR(_name, _mode, _show, _store) \ { \ @@ -143,6 +160,52 @@ struct configfs_attribute { .show = _show, \ .store = _store, \ } +/* Here is a readonly version, only requiring a show() operation */ +#define __CONFIGFS_ATTR_RO(_name, _show) \ +{ \ + .attr = { \ + .ca_name = __stringify(_name), \ + .ca_mode = 0444, \ + .ca_owner = THIS_MODULE, \ + }, \ + .show = _show, \ +} + +/* + * With these extended attributes, the simple show_attribute() and + * store_attribute() operations need to call the show() and store() of the + * attributes. This is a common pattern, so we provide a macro to define + * them. The argument _item is the name of the config_item structure. + * This macro expects the attributes to be named "struct _attribute" + * and the function to_() to exist; + */ +#define CONFIGFS_ATTR_OPS(_item) \ +static ssize_t _item##_attr_show(struct config_item *item, \ + struct configfs_attribute *attr, \ + char *page) \ +{ \ + struct _item *_item = to_##_item(item); \ + struct _item##_attribute *_item##_attr = \ + container_of(attr, struct _item##_attribute, attr); \ + ssize_t ret = 0; \ + \ + if (_item##_attr->show) \ + ret = _item##_attr->show(_item, page); \ + return ret; \ +} \ +static ssize_t _item##_attr_store(struct config_item *item, \ + struct configfs_attribute *attr, \ + const char *page, size_t count) \ +{ \ + struct _item *_item = to_##_item(item); \ + struct _item##_attribute *_item##_attr = \ + container_of(attr, struct _item##_attribute, attr); \ + ssize_t ret = -EINVAL; \ + \ + if (_item##_attr->store) \ + ret = _item##_attr->store(_item, page, count); \ + return ret; \ +} /* * If allow_link() exists, the item can symlink(2) out to other -- cgit v1.2.3 From c3f26a269c2421f97f10cf8ed05d5099b573af4d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 31 Jul 2008 16:58:50 -0700 Subject: netdev: Fix lockdep warnings in multiqueue configurations. When support for multiple TX queues were added, the netif_tx_lock() routines we converted to iterate over all TX queues and grab each queue's spinlock. This causes heartburn for lockdep and it's not a healthy thing to do with lots of TX queues anyways. So modify this to use a top-level lock and a "frozen" state for the individual TX queues. Signed-off-by: David S. Miller --- drivers/net/ifb.c | 12 ++++--- include/linux/netdevice.h | 86 ++++++++++++++++++++++++++++++----------------- net/core/dev.c | 1 + net/core/netpoll.c | 1 + net/core/pktgen.c | 7 ++-- net/sched/sch_generic.c | 6 ++-- net/sched/sch_teql.c | 9 ++--- 7 files changed, 78 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 0960e69b2da4..e4fbefc8c82f 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -69,18 +69,20 @@ static void ri_tasklet(unsigned long dev) struct net_device *_dev = (struct net_device *)dev; struct ifb_private *dp = netdev_priv(_dev); struct net_device_stats *stats = &_dev->stats; + struct netdev_queue *txq; struct sk_buff *skb; + txq = netdev_get_tx_queue(_dev, 0); dp->st_task_enter++; if ((skb = skb_peek(&dp->tq)) == NULL) { dp->st_txq_refl_try++; - if (netif_tx_trylock(_dev)) { + if (__netif_tx_trylock(txq)) { dp->st_rxq_enter++; while ((skb = skb_dequeue(&dp->rq)) != NULL) { skb_queue_tail(&dp->tq, skb); dp->st_rx2tx_tran++; } - netif_tx_unlock(_dev); + __netif_tx_unlock(txq); } else { /* reschedule */ dp->st_rxq_notenter++; @@ -115,7 +117,7 @@ static void ri_tasklet(unsigned long dev) BUG(); } - if (netif_tx_trylock(_dev)) { + if (__netif_tx_trylock(txq)) { dp->st_rxq_check++; if ((skb = skb_peek(&dp->rq)) == NULL) { dp->tasklet_pending = 0; @@ -123,10 +125,10 @@ static void ri_tasklet(unsigned long dev) netif_wake_queue(_dev); } else { dp->st_rxq_rsch++; - netif_tx_unlock(_dev); + __netif_tx_unlock(txq); goto resched; } - netif_tx_unlock(_dev); + __netif_tx_unlock(txq); } else { resched: dp->tasklet_pending = 1; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b4d056ceab96..ee583f642a9f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -440,6 +440,7 @@ static inline void napi_synchronize(const struct napi_struct *n) enum netdev_queue_state_t { __QUEUE_STATE_XOFF, + __QUEUE_STATE_FROZEN, }; struct netdev_queue { @@ -636,7 +637,7 @@ struct net_device unsigned int real_num_tx_queues; unsigned long tx_queue_len; /* Max frames per queue allowed */ - + spinlock_t tx_global_lock; /* * One part is mostly used on xmit path (device) */ @@ -1099,6 +1100,11 @@ static inline int netif_queue_stopped(const struct net_device *dev) return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); } +static inline int netif_tx_queue_frozen(const struct netdev_queue *dev_queue) +{ + return test_bit(__QUEUE_STATE_FROZEN, &dev_queue->state); +} + /** * netif_running - test if up * @dev: network device @@ -1475,6 +1481,26 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq) txq->xmit_lock_owner = smp_processor_id(); } +static inline int __netif_tx_trylock(struct netdev_queue *txq) +{ + int ok = spin_trylock(&txq->_xmit_lock); + if (likely(ok)) + txq->xmit_lock_owner = smp_processor_id(); + return ok; +} + +static inline void __netif_tx_unlock(struct netdev_queue *txq) +{ + txq->xmit_lock_owner = -1; + spin_unlock(&txq->_xmit_lock); +} + +static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) +{ + txq->xmit_lock_owner = -1; + spin_unlock_bh(&txq->_xmit_lock); +} + /** * netif_tx_lock - grab network device transmit lock * @dev: network device @@ -1484,12 +1510,23 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq) */ static inline void netif_tx_lock(struct net_device *dev) { - int cpu = smp_processor_id(); unsigned int i; + int cpu; + spin_lock(&dev->tx_global_lock); + cpu = smp_processor_id(); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + /* We are the only thread of execution doing a + * freeze, but we have to grab the _xmit_lock in + * order to synchronize with threads which are in + * the ->hard_start_xmit() handler and already + * checked the frozen bit. + */ __netif_tx_lock(txq, cpu); + set_bit(__QUEUE_STATE_FROZEN, &txq->state); + __netif_tx_unlock(txq); } } @@ -1499,40 +1536,22 @@ static inline void netif_tx_lock_bh(struct net_device *dev) netif_tx_lock(dev); } -static inline int __netif_tx_trylock(struct netdev_queue *txq) -{ - int ok = spin_trylock(&txq->_xmit_lock); - if (likely(ok)) - txq->xmit_lock_owner = smp_processor_id(); - return ok; -} - -static inline int netif_tx_trylock(struct net_device *dev) -{ - return __netif_tx_trylock(netdev_get_tx_queue(dev, 0)); -} - -static inline void __netif_tx_unlock(struct netdev_queue *txq) -{ - txq->xmit_lock_owner = -1; - spin_unlock(&txq->_xmit_lock); -} - -static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) -{ - txq->xmit_lock_owner = -1; - spin_unlock_bh(&txq->_xmit_lock); -} - static inline void netif_tx_unlock(struct net_device *dev) { unsigned int i; for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - __netif_tx_unlock(txq); - } + /* No need to grab the _xmit_lock here. If the + * queue is not stopped for another reason, we + * force a schedule. + */ + clear_bit(__QUEUE_STATE_FROZEN, &txq->state); + if (!test_bit(__QUEUE_STATE_XOFF, &txq->state)) + __netif_schedule(txq->qdisc); + } + spin_unlock(&dev->tx_global_lock); } static inline void netif_tx_unlock_bh(struct net_device *dev) @@ -1556,13 +1575,18 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) static inline void netif_tx_disable(struct net_device *dev) { unsigned int i; + int cpu; - netif_tx_lock_bh(dev); + local_bh_disable(); + cpu = smp_processor_id(); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + __netif_tx_lock(txq, cpu); netif_tx_stop_queue(txq); + __netif_tx_unlock(txq); } - netif_tx_unlock_bh(dev); + local_bh_enable(); } static inline void netif_addr_lock(struct net_device *dev) diff --git a/net/core/dev.c b/net/core/dev.c index 63d6bcddbf46..69320a56a084 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4200,6 +4200,7 @@ static void netdev_init_queues(struct net_device *dev) { netdev_init_one_queue(dev, &dev->rx_queue, NULL); netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); + spin_lock_init(&dev->tx_global_lock); } /** diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c12720895ecf..6c7af390be0a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -70,6 +70,7 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); __netif_tx_lock(txq, smp_processor_id()); if (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq) || dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index c7d484f7e1c4..3284605f2ec7 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3305,6 +3305,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) txq = netdev_get_tx_queue(odev, queue_map); if (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq) || need_resched()) { idle_start = getCurUs(); @@ -3320,7 +3321,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; - if (netif_tx_queue_stopped(txq)) { + if (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3352,7 +3354,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) txq = netdev_get_tx_queue(odev, queue_map); __netif_tx_lock_bh(txq); - if (!netif_tx_queue_stopped(txq)) { + if (!netif_tx_queue_stopped(txq) && + !netif_tx_queue_frozen(txq)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 345838a2e369..9c9cd4d94890 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -135,7 +135,8 @@ static inline int qdisc_restart(struct Qdisc *q) txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_subqueue_stopped(dev, skb)) + if (!netif_tx_queue_stopped(txq) && + !netif_tx_queue_frozen(txq)) ret = dev_hard_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); @@ -162,7 +163,8 @@ static inline int qdisc_restart(struct Qdisc *q) break; } - if (ret && netif_tx_queue_stopped(txq)) + if (ret && (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq))) ret = 0; return ret; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 537223642b6e..2c35c678563b 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -305,10 +305,11 @@ restart: switch (teql_resolve(skb, skb_res, slave)) { case 0: - if (netif_tx_trylock(slave)) { - if (!__netif_subqueue_stopped(slave, subq) && + if (__netif_tx_trylock(slave_txq)) { + if (!netif_tx_queue_stopped(slave_txq) && + !netif_tx_queue_frozen(slave_txq) && slave->hard_start_xmit(skb, slave) == 0) { - netif_tx_unlock(slave); + __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->stats.tx_packets++; @@ -316,7 +317,7 @@ restart: qdisc_pkt_len(skb); return 0; } - netif_tx_unlock(slave); + __netif_tx_unlock(slave_txq); } if (netif_queue_stopped(dev)) busy = 1; -- cgit v1.2.3 From bc4768eb081a67642c0c44c34ea597c273bdedcb Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Thu, 31 Jul 2008 20:45:24 -0700 Subject: ipvs: Move userspace definitions to include/linux/ip_vs.h Current versions of ipvsadm include "/usr/src/linux/include/net/ip_vs.h" directly. This file also contains kernel-only definitions. Normally, public definitions should live in include/linux, so this patch moves the definitions shared with userspace to a new file, "include/linux/ip_vs.h". This also removes the unused NFC_IPVS_PROPERTY bitmask, which was once used to point into skb->nfcache. To make old ipvsadms still compile with this, the old header file includes the new one. Thanks to Dave Miller and Horms for noting/adding the missing Kbuild entry for the new header file. Signed-off-by: Julius Volz Acked-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/Kbuild | 1 + include/linux/ip_vs.h | 245 ++++++++++++++++++++++++++++++++++++++++++++++++ include/net/ip_vs.h | 253 ++------------------------------------------------ 3 files changed, 254 insertions(+), 245 deletions(-) create mode 100644 include/linux/ip_vs.h (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 4c4142c5aa6e..a26f565e8189 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -97,6 +97,7 @@ header-y += ioctl.h header-y += ip6_tunnel.h header-y += ipmi_msgdefs.h header-y += ipsec.h +header-y += ip_vs.h header-y += ipx.h header-y += irda.h header-y += iso_fs.h diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h new file mode 100644 index 000000000000..ec6eb49af2d8 --- /dev/null +++ b/include/linux/ip_vs.h @@ -0,0 +1,245 @@ +/* + * IP Virtual Server + * data structure and functionality definitions + */ + +#ifndef _IP_VS_H +#define _IP_VS_H + +#include /* For __beXX types in userland */ + +#define IP_VS_VERSION_CODE 0x010201 +#define NVERSION(version) \ + (version >> 16) & 0xFF, \ + (version >> 8) & 0xFF, \ + version & 0xFF + +/* + * Virtual Service Flags + */ +#define IP_VS_SVC_F_PERSISTENT 0x0001 /* persistent port */ +#define IP_VS_SVC_F_HASHED 0x0002 /* hashed entry */ + +/* + * Destination Server Flags + */ +#define IP_VS_DEST_F_AVAILABLE 0x0001 /* server is available */ +#define IP_VS_DEST_F_OVERLOAD 0x0002 /* server is overloaded */ + +/* + * IPVS sync daemon states + */ +#define IP_VS_STATE_NONE 0x0000 /* daemon is stopped */ +#define IP_VS_STATE_MASTER 0x0001 /* started as master */ +#define IP_VS_STATE_BACKUP 0x0002 /* started as backup */ + +/* + * IPVS socket options + */ +#define IP_VS_BASE_CTL (64+1024+64) /* base */ + +#define IP_VS_SO_SET_NONE IP_VS_BASE_CTL /* just peek */ +#define IP_VS_SO_SET_INSERT (IP_VS_BASE_CTL+1) +#define IP_VS_SO_SET_ADD (IP_VS_BASE_CTL+2) +#define IP_VS_SO_SET_EDIT (IP_VS_BASE_CTL+3) +#define IP_VS_SO_SET_DEL (IP_VS_BASE_CTL+4) +#define IP_VS_SO_SET_FLUSH (IP_VS_BASE_CTL+5) +#define IP_VS_SO_SET_LIST (IP_VS_BASE_CTL+6) +#define IP_VS_SO_SET_ADDDEST (IP_VS_BASE_CTL+7) +#define IP_VS_SO_SET_DELDEST (IP_VS_BASE_CTL+8) +#define IP_VS_SO_SET_EDITDEST (IP_VS_BASE_CTL+9) +#define IP_VS_SO_SET_TIMEOUT (IP_VS_BASE_CTL+10) +#define IP_VS_SO_SET_STARTDAEMON (IP_VS_BASE_CTL+11) +#define IP_VS_SO_SET_STOPDAEMON (IP_VS_BASE_CTL+12) +#define IP_VS_SO_SET_RESTORE (IP_VS_BASE_CTL+13) +#define IP_VS_SO_SET_SAVE (IP_VS_BASE_CTL+14) +#define IP_VS_SO_SET_ZERO (IP_VS_BASE_CTL+15) +#define IP_VS_SO_SET_MAX IP_VS_SO_SET_ZERO + +#define IP_VS_SO_GET_VERSION IP_VS_BASE_CTL +#define IP_VS_SO_GET_INFO (IP_VS_BASE_CTL+1) +#define IP_VS_SO_GET_SERVICES (IP_VS_BASE_CTL+2) +#define IP_VS_SO_GET_SERVICE (IP_VS_BASE_CTL+3) +#define IP_VS_SO_GET_DESTS (IP_VS_BASE_CTL+4) +#define IP_VS_SO_GET_DEST (IP_VS_BASE_CTL+5) /* not used now */ +#define IP_VS_SO_GET_TIMEOUT (IP_VS_BASE_CTL+6) +#define IP_VS_SO_GET_DAEMON (IP_VS_BASE_CTL+7) +#define IP_VS_SO_GET_MAX IP_VS_SO_GET_DAEMON + + +/* + * IPVS Connection Flags + */ +#define IP_VS_CONN_F_FWD_MASK 0x0007 /* mask for the fwd methods */ +#define IP_VS_CONN_F_MASQ 0x0000 /* masquerading/NAT */ +#define IP_VS_CONN_F_LOCALNODE 0x0001 /* local node */ +#define IP_VS_CONN_F_TUNNEL 0x0002 /* tunneling */ +#define IP_VS_CONN_F_DROUTE 0x0003 /* direct routing */ +#define IP_VS_CONN_F_BYPASS 0x0004 /* cache bypass */ +#define IP_VS_CONN_F_SYNC 0x0020 /* entry created by sync */ +#define IP_VS_CONN_F_HASHED 0x0040 /* hashed entry */ +#define IP_VS_CONN_F_NOOUTPUT 0x0080 /* no output packets */ +#define IP_VS_CONN_F_INACTIVE 0x0100 /* not established */ +#define IP_VS_CONN_F_OUT_SEQ 0x0200 /* must do output seq adjust */ +#define IP_VS_CONN_F_IN_SEQ 0x0400 /* must do input seq adjust */ +#define IP_VS_CONN_F_SEQ_MASK 0x0600 /* in/out sequence mask */ +#define IP_VS_CONN_F_NO_CPORT 0x0800 /* no client port set yet */ +#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ + +#define IP_VS_SCHEDNAME_MAXLEN 16 +#define IP_VS_IFNAME_MAXLEN 16 + + +/* + * The struct ip_vs_service_user and struct ip_vs_dest_user are + * used to set IPVS rules through setsockopt. + */ +struct ip_vs_service_user { + /* virtual service addresses */ + u_int16_t protocol; + __be32 addr; /* virtual ip address */ + __be16 port; + u_int32_t fwmark; /* firwall mark of service */ + + /* virtual service options */ + char sched_name[IP_VS_SCHEDNAME_MAXLEN]; + unsigned flags; /* virtual service flags */ + unsigned timeout; /* persistent timeout in sec */ + __be32 netmask; /* persistent netmask */ +}; + + +struct ip_vs_dest_user { + /* destination server address */ + __be32 addr; + __be16 port; + + /* real server options */ + unsigned conn_flags; /* connection flags */ + int weight; /* destination weight */ + + /* thresholds for active connections */ + u_int32_t u_threshold; /* upper threshold */ + u_int32_t l_threshold; /* lower threshold */ +}; + + +/* + * IPVS statistics object (for user space) + */ +struct ip_vs_stats_user +{ + __u32 conns; /* connections scheduled */ + __u32 inpkts; /* incoming packets */ + __u32 outpkts; /* outgoing packets */ + __u64 inbytes; /* incoming bytes */ + __u64 outbytes; /* outgoing bytes */ + + __u32 cps; /* current connection rate */ + __u32 inpps; /* current in packet rate */ + __u32 outpps; /* current out packet rate */ + __u32 inbps; /* current in byte rate */ + __u32 outbps; /* current out byte rate */ +}; + + +/* The argument to IP_VS_SO_GET_INFO */ +struct ip_vs_getinfo { + /* version number */ + unsigned int version; + + /* size of connection hash table */ + unsigned int size; + + /* number of virtual services */ + unsigned int num_services; +}; + + +/* The argument to IP_VS_SO_GET_SERVICE */ +struct ip_vs_service_entry { + /* which service: user fills in these */ + u_int16_t protocol; + __be32 addr; /* virtual address */ + __be16 port; + u_int32_t fwmark; /* firwall mark of service */ + + /* service options */ + char sched_name[IP_VS_SCHEDNAME_MAXLEN]; + unsigned flags; /* virtual service flags */ + unsigned timeout; /* persistent timeout */ + __be32 netmask; /* persistent netmask */ + + /* number of real servers */ + unsigned int num_dests; + + /* statistics */ + struct ip_vs_stats_user stats; +}; + + +struct ip_vs_dest_entry { + __be32 addr; /* destination address */ + __be16 port; + unsigned conn_flags; /* connection flags */ + int weight; /* destination weight */ + + u_int32_t u_threshold; /* upper threshold */ + u_int32_t l_threshold; /* lower threshold */ + + u_int32_t activeconns; /* active connections */ + u_int32_t inactconns; /* inactive connections */ + u_int32_t persistconns; /* persistent connections */ + + /* statistics */ + struct ip_vs_stats_user stats; +}; + + +/* The argument to IP_VS_SO_GET_DESTS */ +struct ip_vs_get_dests { + /* which service: user fills in these */ + u_int16_t protocol; + __be32 addr; /* virtual address */ + __be16 port; + u_int32_t fwmark; /* firwall mark of service */ + + /* number of real servers */ + unsigned int num_dests; + + /* the real servers */ + struct ip_vs_dest_entry entrytable[0]; +}; + + +/* The argument to IP_VS_SO_GET_SERVICES */ +struct ip_vs_get_services { + /* number of virtual services */ + unsigned int num_services; + + /* service table */ + struct ip_vs_service_entry entrytable[0]; +}; + + +/* The argument to IP_VS_SO_GET_TIMEOUT */ +struct ip_vs_timeout_user { + int tcp_timeout; + int tcp_fin_timeout; + int udp_timeout; +}; + + +/* The argument to IP_VS_SO_GET_DAEMON */ +struct ip_vs_daemon_user { + /* sync daemon state (master/backup) */ + int state; + + /* multicast interface name */ + char mcast_ifn[IP_VS_IFNAME_MAXLEN]; + + /* SyncID we belong to */ + int syncid; +}; + +#endif /* _IP_VS_H */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 9a51ebad3f1f..cbb59ebed4ae 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -3,254 +3,17 @@ * data structure and functionality definitions */ -#ifndef _IP_VS_H -#define _IP_VS_H - -#include /* For __uXX types */ -#include /* For __beXX types in userland */ - -#include /* For ctl_path */ - -#define IP_VS_VERSION_CODE 0x010201 -#define NVERSION(version) \ - (version >> 16) & 0xFF, \ - (version >> 8) & 0xFF, \ - version & 0xFF - -/* - * Virtual Service Flags - */ -#define IP_VS_SVC_F_PERSISTENT 0x0001 /* persistent port */ -#define IP_VS_SVC_F_HASHED 0x0002 /* hashed entry */ - -/* - * Destination Server Flags - */ -#define IP_VS_DEST_F_AVAILABLE 0x0001 /* server is available */ -#define IP_VS_DEST_F_OVERLOAD 0x0002 /* server is overloaded */ - -/* - * IPVS sync daemon states - */ -#define IP_VS_STATE_NONE 0x0000 /* daemon is stopped */ -#define IP_VS_STATE_MASTER 0x0001 /* started as master */ -#define IP_VS_STATE_BACKUP 0x0002 /* started as backup */ - -/* - * IPVS socket options - */ -#define IP_VS_BASE_CTL (64+1024+64) /* base */ - -#define IP_VS_SO_SET_NONE IP_VS_BASE_CTL /* just peek */ -#define IP_VS_SO_SET_INSERT (IP_VS_BASE_CTL+1) -#define IP_VS_SO_SET_ADD (IP_VS_BASE_CTL+2) -#define IP_VS_SO_SET_EDIT (IP_VS_BASE_CTL+3) -#define IP_VS_SO_SET_DEL (IP_VS_BASE_CTL+4) -#define IP_VS_SO_SET_FLUSH (IP_VS_BASE_CTL+5) -#define IP_VS_SO_SET_LIST (IP_VS_BASE_CTL+6) -#define IP_VS_SO_SET_ADDDEST (IP_VS_BASE_CTL+7) -#define IP_VS_SO_SET_DELDEST (IP_VS_BASE_CTL+8) -#define IP_VS_SO_SET_EDITDEST (IP_VS_BASE_CTL+9) -#define IP_VS_SO_SET_TIMEOUT (IP_VS_BASE_CTL+10) -#define IP_VS_SO_SET_STARTDAEMON (IP_VS_BASE_CTL+11) -#define IP_VS_SO_SET_STOPDAEMON (IP_VS_BASE_CTL+12) -#define IP_VS_SO_SET_RESTORE (IP_VS_BASE_CTL+13) -#define IP_VS_SO_SET_SAVE (IP_VS_BASE_CTL+14) -#define IP_VS_SO_SET_ZERO (IP_VS_BASE_CTL+15) -#define IP_VS_SO_SET_MAX IP_VS_SO_SET_ZERO - -#define IP_VS_SO_GET_VERSION IP_VS_BASE_CTL -#define IP_VS_SO_GET_INFO (IP_VS_BASE_CTL+1) -#define IP_VS_SO_GET_SERVICES (IP_VS_BASE_CTL+2) -#define IP_VS_SO_GET_SERVICE (IP_VS_BASE_CTL+3) -#define IP_VS_SO_GET_DESTS (IP_VS_BASE_CTL+4) -#define IP_VS_SO_GET_DEST (IP_VS_BASE_CTL+5) /* not used now */ -#define IP_VS_SO_GET_TIMEOUT (IP_VS_BASE_CTL+6) -#define IP_VS_SO_GET_DAEMON (IP_VS_BASE_CTL+7) -#define IP_VS_SO_GET_MAX IP_VS_SO_GET_DAEMON - - -/* - * IPVS Connection Flags - */ -#define IP_VS_CONN_F_FWD_MASK 0x0007 /* mask for the fwd methods */ -#define IP_VS_CONN_F_MASQ 0x0000 /* masquerading/NAT */ -#define IP_VS_CONN_F_LOCALNODE 0x0001 /* local node */ -#define IP_VS_CONN_F_TUNNEL 0x0002 /* tunneling */ -#define IP_VS_CONN_F_DROUTE 0x0003 /* direct routing */ -#define IP_VS_CONN_F_BYPASS 0x0004 /* cache bypass */ -#define IP_VS_CONN_F_SYNC 0x0020 /* entry created by sync */ -#define IP_VS_CONN_F_HASHED 0x0040 /* hashed entry */ -#define IP_VS_CONN_F_NOOUTPUT 0x0080 /* no output packets */ -#define IP_VS_CONN_F_INACTIVE 0x0100 /* not established */ -#define IP_VS_CONN_F_OUT_SEQ 0x0200 /* must do output seq adjust */ -#define IP_VS_CONN_F_IN_SEQ 0x0400 /* must do input seq adjust */ -#define IP_VS_CONN_F_SEQ_MASK 0x0600 /* in/out sequence mask */ -#define IP_VS_CONN_F_NO_CPORT 0x0800 /* no client port set yet */ -#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ - -/* Move it to better place one day, for now keep it unique */ -#define NFC_IPVS_PROPERTY 0x10000 - -#define IP_VS_SCHEDNAME_MAXLEN 16 -#define IP_VS_IFNAME_MAXLEN 16 - - -/* - * The struct ip_vs_service_user and struct ip_vs_dest_user are - * used to set IPVS rules through setsockopt. - */ -struct ip_vs_service_user { - /* virtual service addresses */ - u_int16_t protocol; - __be32 addr; /* virtual ip address */ - __be16 port; - u_int32_t fwmark; /* firwall mark of service */ - - /* virtual service options */ - char sched_name[IP_VS_SCHEDNAME_MAXLEN]; - unsigned flags; /* virtual service flags */ - unsigned timeout; /* persistent timeout in sec */ - __be32 netmask; /* persistent netmask */ -}; - - -struct ip_vs_dest_user { - /* destination server address */ - __be32 addr; - __be16 port; - - /* real server options */ - unsigned conn_flags; /* connection flags */ - int weight; /* destination weight */ - - /* thresholds for active connections */ - u_int32_t u_threshold; /* upper threshold */ - u_int32_t l_threshold; /* lower threshold */ -}; - - -/* - * IPVS statistics object (for user space) - */ -struct ip_vs_stats_user -{ - __u32 conns; /* connections scheduled */ - __u32 inpkts; /* incoming packets */ - __u32 outpkts; /* outgoing packets */ - __u64 inbytes; /* incoming bytes */ - __u64 outbytes; /* outgoing bytes */ - - __u32 cps; /* current connection rate */ - __u32 inpps; /* current in packet rate */ - __u32 outpps; /* current out packet rate */ - __u32 inbps; /* current in byte rate */ - __u32 outbps; /* current out byte rate */ -}; - - -/* The argument to IP_VS_SO_GET_INFO */ -struct ip_vs_getinfo { - /* version number */ - unsigned int version; - - /* size of connection hash table */ - unsigned int size; - - /* number of virtual services */ - unsigned int num_services; -}; - - -/* The argument to IP_VS_SO_GET_SERVICE */ -struct ip_vs_service_entry { - /* which service: user fills in these */ - u_int16_t protocol; - __be32 addr; /* virtual address */ - __be16 port; - u_int32_t fwmark; /* firwall mark of service */ - - /* service options */ - char sched_name[IP_VS_SCHEDNAME_MAXLEN]; - unsigned flags; /* virtual service flags */ - unsigned timeout; /* persistent timeout */ - __be32 netmask; /* persistent netmask */ - - /* number of real servers */ - unsigned int num_dests; - - /* statistics */ - struct ip_vs_stats_user stats; -}; - - -struct ip_vs_dest_entry { - __be32 addr; /* destination address */ - __be16 port; - unsigned conn_flags; /* connection flags */ - int weight; /* destination weight */ - - u_int32_t u_threshold; /* upper threshold */ - u_int32_t l_threshold; /* lower threshold */ - - u_int32_t activeconns; /* active connections */ - u_int32_t inactconns; /* inactive connections */ - u_int32_t persistconns; /* persistent connections */ - - /* statistics */ - struct ip_vs_stats_user stats; -}; - - -/* The argument to IP_VS_SO_GET_DESTS */ -struct ip_vs_get_dests { - /* which service: user fills in these */ - u_int16_t protocol; - __be32 addr; /* virtual address */ - __be16 port; - u_int32_t fwmark; /* firwall mark of service */ - - /* number of real servers */ - unsigned int num_dests; - - /* the real servers */ - struct ip_vs_dest_entry entrytable[0]; -}; - - -/* The argument to IP_VS_SO_GET_SERVICES */ -struct ip_vs_get_services { - /* number of virtual services */ - unsigned int num_services; - - /* service table */ - struct ip_vs_service_entry entrytable[0]; -}; - - -/* The argument to IP_VS_SO_GET_TIMEOUT */ -struct ip_vs_timeout_user { - int tcp_timeout; - int tcp_fin_timeout; - int udp_timeout; -}; - - -/* The argument to IP_VS_SO_GET_DAEMON */ -struct ip_vs_daemon_user { - /* sync daemon state (master/backup) */ - int state; - - /* multicast interface name */ - char mcast_ifn[IP_VS_IFNAME_MAXLEN]; - - /* SyncID we belong to */ - int syncid; -}; +#ifndef _NET_IP_VS_H +#define _NET_IP_VS_H +#include /* definitions shared with userland */ +/* old ipvsadm versions still include this file directly */ #ifdef __KERNEL__ +#include /* for __uXX types */ + +#include /* for ctl_path */ #include /* for struct list_head */ #include /* for struct rwlock_t */ #include /* for struct atomic_t */ @@ -981,4 +744,4 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) #endif /* __KERNEL__ */ -#endif /* _IP_VS_H */ +#endif /* _NET_IP_VS_H */ -- cgit v1.2.3 From 4a7b61d23505854dff7d04cc11944566cffdd0ee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 31 Jul 2008 20:52:08 -0700 Subject: skbuff: add missing kernel-doc for do_not_encrypt Add missing kernel-doc notation to sk_buff: Warning(linux-2.6.27-rc1-git2//include/linux/skbuff.h:345): No description found for parameter 'do_not_encrypt' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a640385e0598..cfcc45b3bef0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -243,6 +243,7 @@ typedef unsigned char *sk_buff_data_t; * @tc_index: Traffic control index * @tc_verd: traffic control verdict * @ndisc_nodetype: router type (from link layer) + * @do_not_encrypt: set to prevent encryption of this frame * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking -- cgit v1.2.3 From a4b526b3ba6353cd89a38e41da48ed83b0ead16f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 1 Aug 2008 16:39:12 +0200 Subject: [S390] Optimize storage key operations for anon pages For anonymous pages without a swap cache backing the check in page_remove_rmap for the physical dirty bit in page_remove_rmap is unnecessary. The instructions that are used to check and reset the dirty bit are expensive. Removing the check noticably speeds up process exit. In addition the clearing of the dirty bit in __SetPageUptodate is pointless as well. With these two changes there is no storage key operation for an anonymous page anymore if it does not hit the swap space. The micro benchmark which repeatedly executes an empty shell script gets about 5% faster. Signed-off-by: Martin Schwidefsky --- include/linux/page-flags.h | 3 --- mm/rmap.c | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 54590a9a103e..25aaccdb2f26 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -239,9 +239,6 @@ static inline void __SetPageUptodate(struct page *page) { smp_wmb(); __set_bit(PG_uptodate, &(page)->flags); -#ifdef CONFIG_S390 - page_clear_dirty(page); -#endif } static inline void SetPageUptodate(struct page *page) diff --git a/mm/rmap.c b/mm/rmap.c index 99bc3f9cd796..94a5246a3f98 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -667,7 +667,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma) * Leaving it set also helps swapoff to reinstate ptes * faster for those pages still in swapcache. */ - if (page_test_dirty(page)) { + if ((!PageAnon(page) || PageSwapCache(page)) && + page_test_dirty(page)) { page_clear_dirty(page); set_page_dirty(page); } -- cgit v1.2.3 From 1027abe8827b47f7e9c4ed6514fde3d44f79963c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 Jul 2008 04:13:04 -0400 Subject: [PATCH] merge locate_fd() and get_unused_fd() New primitive: alloc_fd(start, flags). get_unused_fd() and get_unused_fd_flags() become wrappers on top of it. Signed-off-by: Al Viro --- fs/fcntl.c | 87 +++++++++------------------------------------------- fs/file.c | 61 ++++++++++++++++++++++++++++++++++++ fs/open.c | 56 --------------------------------- include/linux/file.h | 3 +- 4 files changed, 77 insertions(+), 130 deletions(-) (limited to 'include/linux') diff --git a/fs/fcntl.c b/fs/fcntl.c index 61d625136813..2e40799daad6 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -49,73 +49,6 @@ static int get_close_on_exec(unsigned int fd) return res; } -/* - * locate_fd finds a free file descriptor in the open_fds fdset, - * expanding the fd arrays if necessary. Must be called with the - * file_lock held for write. - */ - -static int locate_fd(unsigned int orig_start, int cloexec) -{ - struct files_struct *files = current->files; - unsigned int newfd; - unsigned int start; - int error; - struct fdtable *fdt; - - spin_lock(&files->file_lock); -repeat: - fdt = files_fdtable(files); - /* - * Someone might have closed fd's in the range - * orig_start..fdt->next_fd - */ - start = orig_start; - if (start < files->next_fd) - start = files->next_fd; - - newfd = start; - if (start < fdt->max_fds) - newfd = find_next_zero_bit(fdt->open_fds->fds_bits, - fdt->max_fds, start); - - error = expand_files(files, newfd); - if (error < 0) - goto out; - - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - if (error) - goto repeat; - - if (start <= files->next_fd) - files->next_fd = newfd + 1; - - FD_SET(newfd, fdt->open_fds); - if (cloexec) - FD_SET(newfd, fdt->close_on_exec); - else - FD_CLR(newfd, fdt->close_on_exec); - error = newfd; - -out: - spin_unlock(&files->file_lock); - return error; -} - -static int dupfd(struct file *file, unsigned int start, int cloexec) -{ - int fd = locate_fd(start, cloexec); - if (fd >= 0) - fd_install(fd, file); - else - fput(file); - - return fd; -} - asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) { int err = -EBADF; @@ -194,10 +127,15 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) asmlinkage long sys_dup(unsigned int fildes) { int ret = -EBADF; - struct file * file = fget(fildes); - - if (file) - ret = dupfd(file, 0, 0); + struct file *file = fget(fildes); + + if (file) { + ret = get_unused_fd(); + if (ret >= 0) + fd_install(ret, file); + else + fput(file); + } return ret; } @@ -322,8 +260,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_DUPFD_CLOEXEC: if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) break; - get_file(filp); - err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC); + err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); + if (err >= 0) { + get_file(filp); + fd_install(err, filp); + } break; case F_GETFD: err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; diff --git a/fs/file.c b/fs/file.c index d8773b19fe47..f313314f996f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -6,6 +6,7 @@ * Manage the dynamic fd arrays in the process files_struct. */ +#include #include #include #include @@ -432,3 +433,63 @@ struct files_struct init_files = { }, .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), }; + +/* + * allocate a file descriptor, mark it busy. + */ +int alloc_fd(unsigned start, unsigned flags) +{ + struct files_struct *files = current->files; + unsigned int fd; + int error; + struct fdtable *fdt; + + spin_lock(&files->file_lock); +repeat: + fdt = files_fdtable(files); + fd = start; + if (fd < files->next_fd) + fd = files->next_fd; + + if (fd < fdt->max_fds) + fd = find_next_zero_bit(fdt->open_fds->fds_bits, + fdt->max_fds, fd); + + error = expand_files(files, fd); + if (error < 0) + goto out; + + /* + * If we needed to expand the fs array we + * might have blocked - try again. + */ + if (error) + goto repeat; + + if (start <= files->next_fd) + files->next_fd = fd + 1; + + FD_SET(fd, fdt->open_fds); + if (flags & O_CLOEXEC) + FD_SET(fd, fdt->close_on_exec); + else + FD_CLR(fd, fdt->close_on_exec); + error = fd; +#if 1 + /* Sanity check */ + if (rcu_dereference(fdt->fd[fd]) != NULL) { + printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); + rcu_assign_pointer(fdt->fd[fd], NULL); + } +#endif + +out: + spin_unlock(&files->file_lock); + return error; +} + +int get_unused_fd(void) +{ + return alloc_fd(0, 0); +} +EXPORT_SYMBOL(get_unused_fd); diff --git a/fs/open.c b/fs/open.c index 52647be277a2..07da9359481c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -963,62 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) } EXPORT_SYMBOL(dentry_open); -/* - * Find an empty file descriptor entry, and mark it busy. - */ -int get_unused_fd_flags(int flags) -{ - struct files_struct * files = current->files; - int fd, error; - struct fdtable *fdt; - - spin_lock(&files->file_lock); - -repeat: - fdt = files_fdtable(files); - fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, - files->next_fd); - - /* Do we need to expand the fd array or fd set? */ - error = expand_files(files, fd); - if (error < 0) - goto out; - - if (error) { - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - goto repeat; - } - - FD_SET(fd, fdt->open_fds); - if (flags & O_CLOEXEC) - FD_SET(fd, fdt->close_on_exec); - else - FD_CLR(fd, fdt->close_on_exec); - files->next_fd = fd + 1; -#if 1 - /* Sanity check */ - if (fdt->fd[fd] != NULL) { - printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); - fdt->fd[fd] = NULL; - } -#endif - error = fd; - -out: - spin_unlock(&files->file_lock); - return error; -} - -int get_unused_fd(void) -{ - return get_unused_fd_flags(0); -} - -EXPORT_SYMBOL(get_unused_fd); - static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files); diff --git a/include/linux/file.h b/include/linux/file.h index 27c64bdc68c9..a20259e248a5 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -34,8 +34,9 @@ extern struct file *fget(unsigned int fd); extern struct file *fget_light(unsigned int fd, int *fput_needed); extern void set_close_on_exec(unsigned int fd, int flag); extern void put_filp(struct file *); +extern int alloc_fd(unsigned start, unsigned flags); extern int get_unused_fd(void); -extern int get_unused_fd_flags(int flags); +#define get_unused_fd_flags(flags) alloc_fd(0, (flags)) extern void put_unused_fd(unsigned int fd); extern void fd_install(unsigned int fd, struct file *file); -- cgit v1.2.3 From 77e69dac3cefacee939cb107ae9cd520a62338e0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Aug 2008 04:29:18 -0400 Subject: [PATCH] fix races and leaks in vfs_quota_on() users * new helper: vfs_quota_on_path(); equivalent of vfs_quota_on() sans the pathname resolution. * callers of vfs_quota_on() that do their own pathname resolution and checks based on it are switched to vfs_quota_on_path(); that way we avoid the races. * reiserfs leaked dentry/vfsmount references on several failure exits. Signed-off-by: Al Viro --- fs/dquot.c | 33 ++++++++++++++++++++------------- fs/ext3/super.c | 3 ++- fs/ext4/super.c | 3 ++- fs/reiserfs/super.c | 16 +++++++++------- include/linux/quotaops.h | 2 ++ 5 files changed, 35 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/dquot.c b/fs/dquot.c index 1346eebe74ce..8ec4d6cc7633 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1793,6 +1793,21 @@ static int vfs_quota_on_remount(struct super_block *sb, int type) return ret; } +int vfs_quota_on_path(struct super_block *sb, int type, int format_id, + struct path *path) +{ + int error = security_quota_on(path->dentry); + if (error) + return error; + /* Quota file not on the same filesystem? */ + if (path->mnt->mnt_sb != sb) + error = -EXDEV; + else + error = vfs_quota_on_inode(path->dentry->d_inode, type, + format_id); + return error; +} + /* Actual function called from quotactl() */ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, int remount) @@ -1804,19 +1819,10 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, return vfs_quota_on_remount(sb, type); error = path_lookup(path, LOOKUP_FOLLOW, &nd); - if (error < 0) - return error; - error = security_quota_on(nd.path.dentry); - if (error) - goto out_path; - /* Quota file not on the same filesystem? */ - if (nd.path.mnt->mnt_sb != sb) - error = -EXDEV; - else - error = vfs_quota_on_inode(nd.path.dentry->d_inode, type, - format_id); -out_path: - path_put(&nd.path); + if (!error) { + error = vfs_quota_on_path(sb, type, format_id, &nd.path); + path_put(&nd.path); + } return error; } @@ -2185,6 +2191,7 @@ EXPORT_SYMBOL(unregister_quota_format); EXPORT_SYMBOL(dqstats); EXPORT_SYMBOL(dq_data_lock); EXPORT_SYMBOL(vfs_quota_on); +EXPORT_SYMBOL(vfs_quota_on_path); EXPORT_SYMBOL(vfs_quota_on_mount); EXPORT_SYMBOL(vfs_quota_off); EXPORT_SYMBOL(vfs_quota_sync); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 8ddced384674..f38a5afc39a1 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2810,8 +2810,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, journal_unlock_updates(EXT3_SB(sb)->s_journal); } + err = vfs_quota_on_path(sb, type, format_id, &nd.path); path_put(&nd.path); - return vfs_quota_on(sb, type, format_id, path, remount); + return err; } /* Read data from quotafile - avoid pagecache and such because we cannot afford diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b5479b1dff14..1e69f29a8c55 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3352,8 +3352,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } + err = vfs_quota_on_path(sb, type, format_id, &nd.path); path_put(&nd.path); - return vfs_quota_on(sb, type, format_id, path, remount); + return err; } /* Read data from quotafile - avoid pagecache and such because we cannot afford diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 879e54d35c2d..282a13596c70 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2076,8 +2076,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, return err; /* Quotafile not on the same filesystem? */ if (nd.path.mnt->mnt_sb != sb) { - path_put(&nd.path); - return -EXDEV; + err = -EXDEV; + goto out; } inode = nd.path.dentry->d_inode; /* We must not pack tails for quota files on reiserfs for quota IO to work */ @@ -2087,8 +2087,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, reiserfs_warning(sb, "reiserfs: Unpacking tail of quota file failed" " (%d). Cannot turn on quotas.", err); - path_put(&nd.path); - return -EINVAL; + err = -EINVAL; + goto out; } mark_inode_dirty(inode); } @@ -2109,13 +2109,15 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, /* Just start temporary transaction and finish it */ err = journal_begin(&th, sb, 1); if (err) - return err; + goto out; err = journal_end_sync(&th, sb, 1); if (err) - return err; + goto out; } + err = vfs_quota_on_path(sb, type, format_id, &nd.path); +out: path_put(&nd.path); - return vfs_quota_on(sb, type, format_id, path, 0); + return err; } /* Read data from quotafile - avoid pagecache and such because we cannot afford diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 742187f7a05c..ca6b9b5c8d52 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -43,6 +43,8 @@ int dquot_mark_dquot_dirty(struct dquot *dquot); int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, int remount); +int vfs_quota_on_path(struct super_block *sb, int type, int format_id, + struct path *path); int vfs_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type); int vfs_quota_off(struct super_block *sb, int type, int remount); -- cgit v1.2.3 From 8d66bf5481002b0960aa49aed0987c73f5d7816c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 1 Aug 2008 09:05:54 -0400 Subject: [PATCH] pass struct path * to do_add_mount() Signed-off-by: Al Viro --- fs/afs/mntpt.c | 2 +- fs/cifs/cifs_dfs_ref.c | 2 +- fs/namespace.c | 16 ++++++++-------- fs/nfs/namespace.c | 2 +- include/linux/mount.h | 3 ++- 5 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 2f5503902c37..78db4953a800 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -232,7 +232,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) } mntget(newmnt); - err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts); + err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts); switch (err) { case 0: path_put(&nd->path); diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index d82374c9e329..d2c8eef84f3c 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -226,7 +226,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd, int err; mntget(newmnt); - err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist); + err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist); switch (err) { case 0: path_put(&nd->path); diff --git a/fs/namespace.c b/fs/namespace.c index 411728c0c8bb..6e283c93b50d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1667,31 +1667,31 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags, if (IS_ERR(mnt)) return PTR_ERR(mnt); - return do_add_mount(mnt, nd, mnt_flags, NULL); + return do_add_mount(mnt, &nd->path, mnt_flags, NULL); } /* * add a mount into a namespace's mount tree * - provide the option of adding the new mount to an expiration list */ -int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, +int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags, struct list_head *fslist) { int err; down_write(&namespace_sem); /* Something was mounted here while we slept */ - while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path.mnt, &nd->path.dentry)) + while (d_mountpoint(path->dentry) && + follow_down(&path->mnt, &path->dentry)) ; err = -EINVAL; - if (!check_mnt(nd->path.mnt)) + if (!check_mnt(path->mnt)) goto unlock; /* Refuse the same filesystem on the same mount point */ err = -EBUSY; - if (nd->path.mnt->mnt_sb == newmnt->mnt_sb && - nd->path.mnt->mnt_root == nd->path.dentry) + if (path->mnt->mnt_sb == newmnt->mnt_sb && + path->mnt->mnt_root == path->dentry) goto unlock; err = -EINVAL; @@ -1699,7 +1699,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, goto unlock; newmnt->mnt_flags = mnt_flags; - if ((err = graft_tree(newmnt, &nd->path))) + if ((err = graft_tree(newmnt, path))) goto unlock; if (fslist) /* add to the specified expiration list */ diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 2f285ef76399..66df08dd1caf 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -129,7 +129,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) goto out_err; mntget(mnt); - err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE, + err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE, &nfs_automount_list); if (err < 0) { mntput(mnt); diff --git a/include/linux/mount.h b/include/linux/mount.h index b5efaa2132ab..30a1d63b6fb5 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -105,7 +105,8 @@ extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, struct nameidata; -extern int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, +struct path; +extern int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags, struct list_head *fslist); extern void mark_mounts_for_expiry(struct list_head *mounts); -- cgit v1.2.3 From 6c5e0c4d518a37e1d5d794c14433e80284415079 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 1 Aug 2008 20:31:32 +0200 Subject: block: add a blk_plug_device_unlocked() that grabs the queue lock blk_plug_device() must be called with the queue lock held, so callers often just grab and release the lock for that purpose. Add a helper that does just that. Signed-off-by: Jens Axboe --- block/blk-core.c | 18 ++++++++++++++++++ include/linux/blkdev.h | 1 + 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index fef79ccb2a11..4889eb86a39e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -212,6 +212,24 @@ void blk_plug_device(struct request_queue *q) } EXPORT_SYMBOL(blk_plug_device); +/** + * blk_plug_device_unlocked - plug a device without queue lock held + * @q: The &struct request_queue to plug + * + * Description: + * Like @blk_plug_device(), but grabs the queue lock and disables + * interrupts. + **/ +void blk_plug_device_unlocked(struct request_queue *q) +{ + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + blk_plug_device(q); + spin_unlock_irqrestore(q->queue_lock, flags); +} +EXPORT_SYMBOL(blk_plug_device_unlocked); + /* * remove the queue from the plugged list, if present. called with * queue lock held and interrupts disabled. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 88d68081a0f1..e61f22be4d0e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -655,6 +655,7 @@ extern struct request *blk_get_request(struct request_queue *, int, gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); extern void blk_plug_device(struct request_queue *); +extern void blk_plug_device_unlocked(struct request_queue *); extern int blk_remove_plug(struct request_queue *); extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_cmd_ioctl(struct file *, struct request_queue *, -- cgit v1.2.3 From 5c7edcd7ee6b77b88252fe4096dce1a46a60c829 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 31 Jul 2008 02:04:09 -0700 Subject: tracehook: fix exit_signal=0 case My commit 2b2a1ff64afbadac842bbc58c5166962cf4f7664 introduced a regression (sorry about that) for the odd case of exit_signal=0 (e.g. clone_flags=0). This is not a normal use, but it's used by a case in the glibc test suite. Dying with exit_signal=0 sends no signal, but it's supposed to wake up a parent's blocked wait*() calls (unlike the delayed_group_leader case). This fixes tracehook_notify_death() and its caller to distinguish a "signal 0" wakeup from the delayed_group_leader case (with no wakeup). Signed-off-by: Roland McGrath Tested-by: Serge Hallyn Signed-off-by: Linus Torvalds --- include/linux/tracehook.h | 21 +++++++++++++-------- kernel/exit.c | 6 +++--- 2 files changed, 16 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index b1875582c1a1..12532839f508 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -493,16 +493,21 @@ static inline int tracehook_notify_jctl(int notify, int why) * @death_cookie: value to pass to tracehook_report_death() * @group_dead: nonzero if this was the last thread in the group to die * - * Return the signal number to send our parent with do_notify_parent(), or - * zero to send no signal and leave a zombie, or -1 to self-reap right now. + * A return value >= 0 means call do_notify_parent() with that signal + * number. Negative return value can be %DEATH_REAP to self-reap right + * now, or %DEATH_DELAYED_GROUP_LEADER to a zombie without notifying our + * parent. Note that a return value of 0 means a do_notify_parent() call + * that sends no signal, but still wakes up a parent blocked in wait*(). * * Called with write_lock_irq(&tasklist_lock) held. */ +#define DEATH_REAP -1 +#define DEATH_DELAYED_GROUP_LEADER -2 static inline int tracehook_notify_death(struct task_struct *task, void **death_cookie, int group_dead) { if (task->exit_signal == -1) - return task->ptrace ? SIGCHLD : -1; + return task->ptrace ? SIGCHLD : DEATH_REAP; /* * If something other than our normal parent is ptracing us, then @@ -512,21 +517,21 @@ static inline int tracehook_notify_death(struct task_struct *task, if (thread_group_empty(task) && !ptrace_reparented(task)) return task->exit_signal; - return task->ptrace ? SIGCHLD : 0; + return task->ptrace ? SIGCHLD : DEATH_DELAYED_GROUP_LEADER; } /** * tracehook_report_death - task is dead and ready to be reaped * @task: @current task now exiting - * @signal: signal number sent to parent, or 0 or -1 + * @signal: return value from tracheook_notify_death() * @death_cookie: value passed back from tracehook_notify_death() * @group_dead: nonzero if this was the last thread in the group to die * * Thread has just become a zombie or is about to self-reap. If positive, * @signal is the signal number just sent to the parent (usually %SIGCHLD). - * If @signal is -1, this thread will self-reap. If @signal is 0, this is - * a delayed_group_leader() zombie. The @death_cookie was passed back by - * tracehook_notify_death(). + * If @signal is %DEATH_REAP, this thread will self-reap. If @signal is + * %DEATH_DELAYED_GROUP_LEADER, this is a delayed_group_leader() zombie. + * The @death_cookie was passed back by tracehook_notify_death(). * * If normal reaping is not inhibited, @task->exit_state might be changing * in parallel. diff --git a/kernel/exit.c b/kernel/exit.c index eb4d6470d1d0..38ec40630149 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -911,10 +911,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead) tsk->exit_signal = SIGCHLD; signal = tracehook_notify_death(tsk, &cookie, group_dead); - if (signal > 0) + if (signal >= 0) signal = do_notify_parent(tsk, signal); - tsk->exit_state = signal < 0 ? EXIT_DEAD : EXIT_ZOMBIE; + tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE; /* mt-exec, de_thread() is waiting for us */ if (thread_group_leader(tsk) && @@ -927,7 +927,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) tracehook_report_death(tsk, signal, cookie, group_dead); /* If the process is dead, release it - nobody will wait for it */ - if (signal < 0) + if (signal == DEATH_REAP) release_task(tsk); } -- cgit v1.2.3 From 4744b43431e8613f920c5cba88346756f53c5165 Mon Sep 17 00:00:00 2001 From: Tim Bird Date: Fri, 1 Aug 2008 14:05:50 -0700 Subject: embedded: fix vc_translate operator precedence This fixes a bug in operator precedence in the newly introduced vc_translate macro. Without this fix, the translation of some characters on the kernel console is garbled. This patch was copied to the e-mail list previously for testing. Now, all reports confirm that it works, so this is an official post for application. Signed-off-by: Tim Bird Signed-off-by: David Woodhouse --- include/linux/vt_kern.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index 14c0e91be9b5..8c8119ffee12 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -74,7 +74,7 @@ void con_protect_unimap(struct vc_data *vc, int rdonly); int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc); #define vc_translate(vc, c) ((vc)->vc_translate[(c) | \ - (vc)->vc_toggle_meta ? 0x80 : 0]) + ((vc)->vc_toggle_meta ? 0x80 : 0)]) #else #define con_set_trans_old(arg) (0) #define con_get_trans_old(arg) (-EINVAL) -- cgit v1.2.3 From ff4cc1de2401ad44ae084c3f5a9e898af0879520 Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Wed, 30 Jul 2008 18:26:58 +0200 Subject: mISDN cleanup user interface The channelmap should have the same size on 32 and 64 bit systems and should not depend on endianess. Thanks to David Woodhouse for spotting this. Signed-off-by: Karsten Keil --- drivers/isdn/hardware/mISDN/hfcmulti.c | 6 +++--- drivers/isdn/hardware/mISDN/hfcpci.c | 2 +- drivers/isdn/mISDN/l1oip_core.c | 6 ++---- drivers/isdn/mISDN/socket.c | 4 ++-- include/linux/mISDNif.h | 32 +++++++++++++++++++++++++++----- 5 files changed, 35 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 2649ea55a9e8..10144e871c06 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -3971,7 +3971,7 @@ open_bchannel(struct hfc_multi *hc, struct dchannel *dch, struct bchannel *bch; int ch; - if (!test_bit(rq->adr.channel, &dch->dev.channelmap[0])) + if (!test_channelmap(rq->adr.channel, dch->dev.channelmap)) return -EINVAL; if (rq->protocol == ISDN_P_NONE) return -EINVAL; @@ -4587,7 +4587,7 @@ init_e1_port(struct hfc_multi *hc, struct hm_map *m) list_add(&bch->ch.list, &dch->dev.bchannels); hc->chan[ch].bch = bch; hc->chan[ch].port = 0; - test_and_set_bit(bch->nr, &dch->dev.channelmap[0]); + set_channelmap(bch->nr, dch->dev.channelmap); } /* set optical line type */ if (port[Port_cnt] & 0x001) { @@ -4755,7 +4755,7 @@ init_multi_port(struct hfc_multi *hc, int pt) list_add(&bch->ch.list, &dch->dev.bchannels); hc->chan[i + ch].bch = bch; hc->chan[i + ch].port = pt; - test_and_set_bit(bch->nr, &dch->dev.channelmap[0]); + set_channelmap(bch->nr, dch->dev.channelmap); } /* set master clock */ if (port[Port_cnt] & 0x001) { diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index 3231814e7efa..9cf5edbb1a9b 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -2056,7 +2056,7 @@ setup_card(struct hfc_pci *card) card->dch.dev.nrbchan = 2; for (i = 0; i < 2; i++) { card->bch[i].nr = i + 1; - test_and_set_bit(i + 1, &card->dch.dev.channelmap[0]); + set_channelmap(i + 1, card->dch.dev.channelmap); card->bch[i].debug = debug; mISDN_initbchannel(&card->bch[i], MAX_DATA_MEM); card->bch[i].hw = card; diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index 155b99780c4f..e42150a57780 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -1006,8 +1006,7 @@ open_bchannel(struct l1oip *hc, struct dchannel *dch, struct channel_req *rq) struct bchannel *bch; int ch; - if (!test_bit(rq->adr.channel & 0x1f, - &dch->dev.channelmap[rq->adr.channel >> 5])) + if (!test_channelmap(rq->adr.channel, dch->dev.channelmap)) return -EINVAL; if (rq->protocol == ISDN_P_NONE) return -EINVAL; @@ -1412,8 +1411,7 @@ init_card(struct l1oip *hc, int pri, int bundle) bch->ch.nr = i + ch; list_add(&bch->ch.list, &dch->dev.bchannels); hc->chan[i + ch].bch = bch; - test_and_set_bit(bch->nr & 0x1f, - &dch->dev.channelmap[bch->nr >> 5]); + set_channelmap(bch->nr, dch->dev.channelmap); } ret = mISDN_register_device(&dch->dev, hc->name); if (ret) diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 4ba4cc364c9e..e5a20f9542d1 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -379,7 +379,7 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) di.Bprotocols = dev->Bprotocols | get_all_Bprotocols(); di.protocol = dev->D.protocol; memcpy(di.channelmap, dev->channelmap, - MISDN_CHMAP_SIZE * 4); + sizeof(di.channelmap)); di.nrbchan = dev->nrbchan; strcpy(di.name, dev->name); if (copy_to_user((void __user *)arg, &di, sizeof(di))) @@ -637,7 +637,7 @@ base_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) di.Bprotocols = dev->Bprotocols | get_all_Bprotocols(); di.protocol = dev->D.protocol; memcpy(di.channelmap, dev->channelmap, - MISDN_CHMAP_SIZE * 4); + sizeof(di.channelmap)); di.nrbchan = dev->nrbchan; strcpy(di.name, dev->name); if (copy_to_user((void __user *)arg, &di, sizeof(di))) diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 5c948f337817..8f2d60da04e7 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -37,7 +37,7 @@ */ #define MISDN_MAJOR_VERSION 1 #define MISDN_MINOR_VERSION 0 -#define MISDN_RELEASE 18 +#define MISDN_RELEASE 19 /* primitives for information exchange * generell format @@ -242,7 +242,8 @@ struct mISDNhead { #define TEI_SAPI 63 #define CTRL_SAPI 0 -#define MISDN_CHMAP_SIZE 4 +#define MISDN_MAX_CHANNEL 127 +#define MISDN_CHMAP_SIZE ((MISDN_MAX_CHANNEL + 1) >> 3) #define SOL_MISDN 0 @@ -275,11 +276,32 @@ struct mISDN_devinfo { u_int Dprotocols; u_int Bprotocols; u_int protocol; - u_long channelmap[MISDN_CHMAP_SIZE]; + u_char channelmap[MISDN_CHMAP_SIZE]; u_int nrbchan; char name[MISDN_MAX_IDLEN]; }; +static inline int +test_channelmap(u_int nr, u_char *map) +{ + if (nr <= MISDN_MAX_CHANNEL) + return map[nr >> 3] & (1 << (nr & 7)); + else + return 0; +} + +static inline void +set_channelmap(u_int nr, u_char *map) +{ + map[nr >> 3] |= (1 << (nr & 7)); +} + +static inline void +clear_channelmap(u_int nr, u_char *map) +{ + map[nr >> 3] &= ~(1 << (nr & 7)); +} + /* CONTROL_CHANNEL parameters */ #define MISDN_CTRL_GETOP 0x0000 #define MISDN_CTRL_LOOP 0x0001 @@ -405,7 +427,7 @@ struct mISDNdevice { u_int Dprotocols; u_int Bprotocols; u_int nrbchan; - u_long channelmap[MISDN_CHMAP_SIZE]; + u_char channelmap[MISDN_CHMAP_SIZE]; struct list_head bchannels; struct mISDNchannel *teimgr; struct device dev; @@ -430,7 +452,7 @@ struct mISDNstack { #endif }; -/* global alloc/queue dunctions */ +/* global alloc/queue functions */ static inline struct sk_buff * mI_alloc_skb(unsigned int len, gfp_t gfp_mask) -- cgit v1.2.3 From 85ebd00334099fd5d296bcae74a66c943d46686d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 2 Aug 2008 19:12:23 +0200 Subject: Fix IHEX firmware generation/loading Fix both the IHEX firmware generation (len field always null, and EOF marker a byte too short) and loading (struct ihex_binrec needs to be packed to reflect the on-disk structure). Signed-off-by: Marc Zyngier Signed-off-by: David Woodhouse --- firmware/ihex2fw.c | 6 +++--- include/linux/ihex.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/firmware/ihex2fw.c b/firmware/ihex2fw.c index 660b191ed75e..8f7fdaa9e010 100644 --- a/firmware/ihex2fw.c +++ b/firmware/ihex2fw.c @@ -250,19 +250,19 @@ static void file_record(struct ihex_binrec *record) static int output_records(int outfd) { - unsigned char zeroes[5] = {0, 0, 0, 0, 0}; + unsigned char zeroes[6] = {0, 0, 0, 0, 0, 0}; struct ihex_binrec *p = records; while (p) { uint16_t writelen = (p->len + 9) & ~3; p->addr = htonl(p->addr); - p->len = htonl(p->len); + p->len = htons(p->len); write(outfd, &p->addr, writelen); p = p->next; } /* EOF record is zero length, since we don't bother to represent the type field in the binary version */ - write(outfd, zeroes, 5); + write(outfd, zeroes, 6); return 0; } diff --git a/include/linux/ihex.h b/include/linux/ihex.h index 2baace2788a7..31d8629e75a1 100644 --- a/include/linux/ihex.h +++ b/include/linux/ihex.h @@ -18,7 +18,7 @@ struct ihex_binrec { __be32 addr; __be16 len; uint8_t data[0]; -} __attribute__((aligned(4))); +} __attribute__((packed)); /* Find the next record, taking into account the 4-byte alignment */ static inline const struct ihex_binrec * -- cgit v1.2.3 From cf368d2f9aced8adc8bd6b1f04294a71551d5fce Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Sun, 3 Aug 2008 03:03:57 +0400 Subject: drivers/video/console/promcon.c: fix build error drivers/video/console/promcon.c:158: error: implicit declaration of function 'con_protect_unimap' Introduced by commit a29ccf6f823a84d89e1c7aaaf221cf7282022024 ("embedded: fix vc_translate operator precedence"). Signed-off-by: Alexander Beregalov Cc: Tim Bird Signed-off-by: David Woodhouse --- include/linux/vt_kern.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index 8c8119ffee12..1c78d56c57e5 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -86,6 +86,7 @@ int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc); #define con_copy_unimap(d, s) (0) #define con_get_unimap(vc, ct, uct, list) (-EINVAL) #define con_free_unimap(vc) do { ; } while (0) +#define con_protect_unimap(vc, rdonly) do { ; } while (0) #define vc_translate(vc, c) (c) #endif -- cgit v1.2.3 From 63870295de9adb365cd121dab94379b8cfdf986a Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 4 Aug 2008 10:39:46 +0900 Subject: maple: Clean up maple_driver_register/unregister routines. These were completely inconsistent. Clean these up to take a maple_driver pointer directly for consistency. Signed-off-by: Paul Mundt --- drivers/input/keyboard/maple_keyb.c | 6 +++--- drivers/sh/maple/maple.c | 37 ++++++++++++++++++++++++++----------- include/linux/maple.h | 4 +++- 3 files changed, 32 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/keyboard/maple_keyb.c b/drivers/input/keyboard/maple_keyb.c index 42f5d4ec39ab..3f5151a0fd15 100644 --- a/drivers/input/keyboard/maple_keyb.c +++ b/drivers/input/keyboard/maple_keyb.c @@ -235,17 +235,17 @@ static struct maple_driver dc_kbd_driver = { .name = "Dreamcast_keyboard", .probe = probe_maple_kbd, .remove = remove_maple_kbd, - }, + }, }; static int __init dc_kbd_init(void) { - return maple_driver_register(&dc_kbd_driver.drv); + return maple_driver_register(&dc_kbd_driver); } static void __exit dc_kbd_exit(void) { - driver_unregister(&dc_kbd_driver.drv); + maple_driver_unregister(&dc_kbd_driver); } module_init(dc_kbd_init); diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c index be97789fa5fd..a6b4dc3cfcba 100644 --- a/drivers/sh/maple/maple.c +++ b/drivers/sh/maple/maple.c @@ -2,6 +2,7 @@ * Core maple bus functionality * * Copyright (C) 2007, 2008 Adrian McMenamin + * Copyright (C) 2001 - 2008 Paul Mundt * * Based on 2.4 code by: * @@ -31,7 +32,7 @@ #include #include -MODULE_AUTHOR("Yaegshi Takeshi, Paul Mundt, M.R. Brown, Adrian McMenamin"); +MODULE_AUTHOR("Yaegashi Takeshi, Paul Mundt, M. R. Brown, Adrian McMenamin"); MODULE_DESCRIPTION("Maple bus driver for Dreamcast"); MODULE_LICENSE("GPL v2"); MODULE_SUPPORTED_DEVICE("{{SEGA, Dreamcast/Maple}}"); @@ -65,19 +66,35 @@ static bool checked[4]; static struct maple_device *baseunits[4]; /** - * maple_driver_register - register a device driver - * automatically makes the driver bus a maple bus - * @drv: the driver to be registered + * maple_driver_register - register a maple driver + * @drv: maple driver to be registered. + * + * Registers the passed in @drv, while updating the bus type. + * Devices with matching function IDs will be automatically probed. */ -int maple_driver_register(struct device_driver *drv) +int maple_driver_register(struct maple_driver *drv) { if (!drv) return -EINVAL; - drv->bus = &maple_bus_type; - return driver_register(drv); + + drv->drv.bus = &maple_bus_type; + + return driver_register(&drv->drv); } EXPORT_SYMBOL_GPL(maple_driver_register); +/** + * maple_driver_unregister - unregister a maple driver. + * @drv: maple driver to unregister. + * + * Cleans up after maple_driver_register(). To be invoked in the exit + * path of any module drivers. + */ +void maple_driver_unregister(struct maple_driver *drv) +{ + driver_unregister(&drv->drv); +} + /* set hardware registers to enable next round of dma */ static void maplebus_dma_reset(void) { @@ -724,11 +741,9 @@ static int maple_get_dma_buffer(void) static int match_maple_bus_driver(struct device *devptr, struct device_driver *drvptr) { - struct maple_driver *maple_drv; - struct maple_device *maple_dev; + struct maple_driver *maple_drv = to_maple_driver(drvptr); + struct maple_device *maple_dev = to_maple_dev(devptr); - maple_drv = container_of(drvptr, struct maple_driver, drv); - maple_dev = container_of(devptr, struct maple_device, dev); /* Trap empty port case */ if (maple_dev->devinfo.function == 0xFFFFFFFF) return 0; diff --git a/include/linux/maple.h b/include/linux/maple.h index c853b1066018..b2b7ce0fb1f7 100644 --- a/include/linux/maple.h +++ b/include/linux/maple.h @@ -70,7 +70,9 @@ void maple_getcond_callback(struct maple_device *dev, void (*callback) (struct mapleq * mq), unsigned long interval, unsigned long function); -int maple_driver_register(struct device_driver *drv); +int maple_driver_register(struct maple_driver *); +void maple_driver_unregister(struct maple_driver *); + int maple_add_packet_sleeps(struct maple_device *mdev, u32 function, u32 command, u32 length, void *data); void maple_clear_dev(struct maple_device *mdev); -- cgit v1.2.3 From 617870632de6739fca0893f3e6648e9ae1bd0ddb Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 4 Aug 2008 10:58:24 +0900 Subject: maple: Kill useless private_data pointer. We can simply wrap in to the dev_set/get_drvdata(), there's no reason to track an extra level of private data on top of the struct device. Signed-off-by: Paul Mundt --- drivers/input/keyboard/maple_keyb.c | 15 ++++++++------- drivers/sh/maple/maple.c | 1 + include/linux/maple.h | 4 +++- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/keyboard/maple_keyb.c b/drivers/input/keyboard/maple_keyb.c index 3f5151a0fd15..22f17a593be7 100644 --- a/drivers/input/keyboard/maple_keyb.c +++ b/drivers/input/keyboard/maple_keyb.c @@ -139,7 +139,7 @@ static void dc_scan_kbd(struct dc_kbd *kbd) static void dc_kbd_callback(struct mapleq *mq) { struct maple_device *mapledev = mq->dev; - struct dc_kbd *kbd = mapledev->private_data; + struct dc_kbd *kbd = maple_get_drvdata(mapledev); unsigned long *buf = mq->recvbuf; /* @@ -175,8 +175,6 @@ static int probe_maple_kbd(struct device *dev) goto fail; } - mdev->private_data = kbd; - kbd->dev = idev; memcpy(kbd->keycode, dc_kbd_keycode, sizeof(kbd->keycode)); @@ -204,27 +202,30 @@ static int probe_maple_kbd(struct device *dev) MAPLE_FUNC_KEYBOARD); mdev->driver = mdrv; + + maple_set_drvdata(mdev, kbd); + return error; fail: input_free_device(idev); kfree(kbd); - mdev->private_data = NULL; + maple_set_drvdata(mdev, NULL); return error; } static int remove_maple_kbd(struct device *dev) { struct maple_device *mdev = to_maple_dev(dev); - struct dc_kbd *kbd; + struct dc_kbd *kbd = maple_get_drvdata(mdev); mutex_lock(&maple_keyb_mutex); - kbd = mdev->private_data; - mdev->private_data = NULL; input_unregister_device(kbd->dev); kfree(kbd); + maple_set_drvdata(mdev, NULL); + mutex_unlock(&maple_keyb_mutex); return 0; } diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c index a6b4dc3cfcba..be77a39f224c 100644 --- a/drivers/sh/maple/maple.c +++ b/drivers/sh/maple/maple.c @@ -94,6 +94,7 @@ void maple_driver_unregister(struct maple_driver *drv) { driver_unregister(&drv->drv); } +EXPORT_SYMBOL_GPL(maple_driver_unregister); /* set hardware registers to enable next round of dma */ static void maplebus_dma_reset(void) diff --git a/include/linux/maple.h b/include/linux/maple.h index b2b7ce0fb1f7..c23d3f51ba40 100644 --- a/include/linux/maple.h +++ b/include/linux/maple.h @@ -51,7 +51,6 @@ struct maple_devinfo { struct maple_device { struct maple_driver *driver; struct mapleq *mq; - void *private_data; void (*callback) (struct mapleq * mq); unsigned long when, interval, function; struct maple_devinfo devinfo; @@ -80,4 +79,7 @@ void maple_clear_dev(struct maple_device *mdev); #define to_maple_dev(n) container_of(n, struct maple_device, dev) #define to_maple_driver(n) container_of(n, struct maple_driver, drv) +#define maple_get_drvdata(d) dev_get_drvdata(&(d)->dev) +#define maple_set_drvdata(d,p) dev_set_drvdata(&(d)->dev, (p)) + #endif /* __LINUX_MAPLE_H */ -- cgit v1.2.3 From 98f7dfd86cbbd377e2cbc293529681b914296f68 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 18 Jul 2008 13:52:59 +0800 Subject: mac80211: pass dtim_period to low level driver This patch adds the dtim_period in ieee80211_bss_conf, this allows the low level driver to know the dtim_period, and to plan power save accordingly. Signed-off-by: Emmanuel Grumbach Signed-off-by: Tomas Winkler Signed-off-by: Zhu Yi Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 13 +++++++++++++ include/net/mac80211.h | 4 +++- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 11 +++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a1630ba0b87c..7f4df7c7659d 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -506,6 +506,19 @@ struct ieee80211_channel_sw_ie { u8 count; } __attribute__ ((packed)); +/** + * struct ieee80211_tim + * + * This structure refers to "Traffic Indication Map information element" + */ +struct ieee80211_tim_ie { + u8 dtim_count; + u8 dtim_period; + u8 bitmap_ctrl; + /* variable size: 1 - 251 bytes */ + u8 virtual_map[0]; +} __attribute__ ((packed)); + struct ieee80211_mgmt { __le16 frame_control; __le16 duration; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b52721008be8..9d99f2e0a204 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -177,9 +177,10 @@ enum ieee80211_bss_change { * @aid: association ID number, valid only when @assoc is true * @use_cts_prot: use CTS protection * @use_short_preamble: use 802.11b short preamble + * @dtim_period: num of beacons before the next DTIM, for PSM * @timestamp: beacon timestamp * @beacon_int: beacon interval - * @assoc_capability: capabbilities taken from assoc resp + * @assoc_capability: capabilities taken from assoc resp * @assoc_ht: association in HT mode * @ht_conf: ht capabilities * @ht_bss_conf: ht extended capabilities @@ -191,6 +192,7 @@ struct ieee80211_bss_conf { /* erp related data */ bool use_cts_prot; bool use_short_preamble; + u8 dtim_period; u16 beacon_int; u16 assoc_capability; u64 timestamp; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a2e200f9811e..ec59345af65b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -82,6 +82,7 @@ struct ieee80211_sta_bss { u8 bssid[ETH_ALEN]; u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 dtim_period; u16 capability; /* host byte order */ enum ieee80211_band band; int freq; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index acb04133a95d..591e6331c427 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -551,6 +551,7 @@ static void ieee80211_set_associated(struct net_device *dev, /* set timing information */ sdata->bss_conf.beacon_int = bss->beacon_int; sdata->bss_conf.timestamp = bss->timestamp; + sdata->bss_conf.dtim_period = bss->dtim_period; changed |= ieee80211_handle_bss_capability(sdata, bss); @@ -2688,6 +2689,16 @@ static void ieee80211_rx_bss_info(struct net_device *dev, bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int); bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info); + if (elems->tim) { + struct ieee80211_tim_ie *tim_ie = + (struct ieee80211_tim_ie *)elems->tim; + bss->dtim_period = tim_ie->dtim_period; + } + + /* set default value for buggy APs */ + if (!elems->tim || bss->dtim_period == 0) + bss->dtim_period = 1; + bss->supp_rates_len = 0; if (elems->supp_rates) { clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; -- cgit v1.2.3 From 1a3f7d98e5f50f21ce6fb1406a35531d9596c5c6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 4 Aug 2008 16:50:38 -0700 Subject: Revert "UFS: add const to parser token table" This reverts commit f9247273cb69ba101877e946d2d83044409cc8c5 (and fb2e405fc1fc8b20d9c78eaa1c7fd5a297efde43 - "fix fs/nfs/nfsroot.c compilation" - that fixed a missed conversion). The changes cause problems for at least the sparc build. Let's re-do them when the exact issues are resolved. Requested-by: Andrew Morton Requested-by: Steven Whitehouse Cc: David Miller Signed-off-by: Linus Torvalds --- fs/nfs/nfsroot.c | 2 +- fs/ufs/super.c | 2 +- include/linux/parser.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 8478fc25daee..46763d1cd397 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -127,7 +127,7 @@ enum { Opt_err }; -static match_table_t __initconst tokens = { +static match_table_t __initdata tokens = { {Opt_port, "port=%u"}, {Opt_rsize, "rsize=%u"}, {Opt_wsize, "wsize=%u"}, diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 3e30e40aa24d..3141969b456d 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs) { struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; - const struct match_token *tp = tokens; + struct match_token *tp = tokens; while (tp->token != Opt_onerror_panic && tp->token != mval) ++tp; diff --git a/include/linux/parser.h b/include/linux/parser.h index cc554ca8bc78..7dcd05075756 100644 --- a/include/linux/parser.h +++ b/include/linux/parser.h @@ -14,7 +14,7 @@ struct match_token { const char *pattern; }; -typedef const struct match_token match_table_t[]; +typedef struct match_token match_table_t[]; /* Maximum number of arguments that match_token will find in a pattern */ enum {MAX_OPT_ARGS = 3}; -- cgit v1.2.3 From 115a326c1e5cab457924356123bbfd7d783ecf9d Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 4 Aug 2008 13:56:01 -0700 Subject: tracehook: kerneldoc fix My last change to tracehook.h made it confuse the kerneldoc parser. Move the #define's before the comment so it's happy again. Signed-off-by: Roland McGrath Acked-by: Randy Dunlap Signed-off-by: Linus Torvalds --- include/linux/tracehook.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 12532839f508..ab3ef7aefa95 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -487,6 +487,9 @@ static inline int tracehook_notify_jctl(int notify, int why) return notify || (current->ptrace & PT_PTRACED); } +#define DEATH_REAP -1 +#define DEATH_DELAYED_GROUP_LEADER -2 + /** * tracehook_notify_death - task is dead, ready to notify parent * @task: @current task now exiting @@ -501,8 +504,6 @@ static inline int tracehook_notify_jctl(int notify, int why) * * Called with write_lock_irq(&tasklist_lock) held. */ -#define DEATH_REAP -1 -#define DEATH_DELAYED_GROUP_LEADER -2 static inline int tracehook_notify_death(struct task_struct *task, void **death_cookie, int group_dead) { -- cgit v1.2.3 From 529ae9aaa08378cfe2a4350bded76f32cc8ff0ce Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 2 Aug 2008 12:01:03 +0200 Subject: mm: rename page trylock Converting page lock to new locking bitops requires a change of page flag operation naming, so we might as well convert it to something nicer (!TestSetPageLocked_Lock => trylock_page, SetPageLocked => set_page_locked). This also facilitates lockdeping of page lock. Signed-off-by: Nick Piggin Acked-by: KOSAKI Motohiro Acked-by: Peter Zijlstra Acked-by: Andrew Morton Acked-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- drivers/scsi/sg.c | 2 +- fs/afs/write.c | 2 +- fs/cifs/file.c | 2 +- fs/jbd/commit.c | 4 +-- fs/jbd2/commit.c | 2 +- fs/reiserfs/journal.c | 2 +- fs/splice.c | 2 +- fs/xfs/linux-2.6/xfs_aops.c | 4 +-- include/linux/page-flags.h | 2 +- include/linux/pagemap.h | 67 +++++++++++++++++++++++++++------------------ mm/filemap.c | 12 ++++---- mm/memory.c | 2 +- mm/migrate.c | 4 +-- mm/rmap.c | 2 +- mm/shmem.c | 4 +-- mm/swap.c | 2 +- mm/swap_state.c | 8 +++--- mm/swapfile.c | 2 +- mm/truncate.c | 4 +-- mm/vmscan.c | 4 +-- 20 files changed, 74 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index d3b8ebb83776..3d36270a8b4d 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1747,7 +1747,7 @@ st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages, */ flush_dcache_page(pages[i]); /* ?? Is locking needed? I don't think so */ - /* if (TestSetPageLocked(pages[i])) + /* if (!trylock_page(pages[i])) goto out_unlock; */ } diff --git a/fs/afs/write.c b/fs/afs/write.c index 9a849ad3c489..065b4e10681a 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -404,7 +404,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, page = pages[loop]; if (page->index > wb->last) break; - if (TestSetPageLocked(page)) + if (!trylock_page(page)) break; if (!PageDirty(page) || page_private(page) != (unsigned long) wb) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0aac824371a5..e692c42f24b5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1280,7 +1280,7 @@ retry: if (first < 0) lock_page(page); - else if (TestSetPageLocked(page)) + else if (!trylock_page(page)) break; if (unlikely(page->mapping != mapping)) { diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 2eccbfaa1d48..81a9ad7177ca 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh) goto nope; /* OK, it's a truncated page */ - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto nope; page_cache_get(page); @@ -446,7 +446,7 @@ void journal_commit_transaction(journal_t *journal) spin_lock(&journal->j_list_lock); } if (unlikely(!buffer_uptodate(bh))) { - if (TestSetPageLocked(bh->b_page)) { + if (!trylock_page(bh->b_page)) { spin_unlock(&journal->j_list_lock); lock_page(bh->b_page); spin_lock(&journal->j_list_lock); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index adf0395f318e..f2ad061e95ec 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -67,7 +67,7 @@ static void release_buffer_page(struct buffer_head *bh) goto nope; /* OK, it's a truncated page */ - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto nope; page_cache_get(page); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index c8f60ee183b5..ce2208b27118 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -627,7 +627,7 @@ static int journal_list_still_alive(struct super_block *s, static void release_buffer_page(struct buffer_head *bh) { struct page *page = bh->b_page; - if (!page->mapping && !TestSetPageLocked(page)) { + if (!page->mapping && trylock_page(page)) { page_cache_get(page); put_bh(bh); if (!page->mapping) diff --git a/fs/splice.c b/fs/splice.c index b30311ba8af6..1bbc6f4bb09c 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -371,7 +371,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * for an in-flight io page */ if (flags & SPLICE_F_NONBLOCK) { - if (TestSetPageLocked(page)) { + if (!trylock_page(page)) { error = -EAGAIN; break; } diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 0b211cba1909..fa73179233ad 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -675,7 +675,7 @@ xfs_probe_cluster( } else pg_offset = PAGE_CACHE_SIZE; - if (page->index == tindex && !TestSetPageLocked(page)) { + if (page->index == tindex && trylock_page(page)) { pg_len = xfs_probe_page(page, pg_offset, mapped); unlock_page(page); } @@ -759,7 +759,7 @@ xfs_convert_page( if (page->index != tindex) goto fail; - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto fail; if (PageWriteback(page)) goto fail_unlock_page; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 25aaccdb2f26..c74d3e875314 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -163,7 +163,7 @@ static inline int Page##uname(struct page *page) \ struct page; /* forward declaration */ -PAGEFLAG(Locked, locked) TESTSCFLAG(Locked, locked) +TESTPAGEFLAG(Locked, locked) PAGEFLAG(Error, error) PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 69ed3cb1197a..5da31c12101c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -250,29 +250,6 @@ static inline struct page *read_mapping_page(struct address_space *mapping, return read_cache_page(mapping, index, filler, data); } -int add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask); -int add_to_page_cache_lru(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask); -extern void remove_from_page_cache(struct page *page); -extern void __remove_from_page_cache(struct page *page); - -/* - * Like add_to_page_cache_locked, but used to add newly allocated pages: - * the page is new, so we can just run SetPageLocked() against it. - */ -static inline int add_to_page_cache(struct page *page, - struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) -{ - int error; - - SetPageLocked(page); - error = add_to_page_cache_locked(page, mapping, offset, gfp_mask); - if (unlikely(error)) - ClearPageLocked(page); - return error; -} - /* * Return byte-offset into filesystem object for page. */ @@ -294,13 +271,28 @@ extern int __lock_page_killable(struct page *page); extern void __lock_page_nosync(struct page *page); extern void unlock_page(struct page *page); +static inline void set_page_locked(struct page *page) +{ + set_bit(PG_locked, &page->flags); +} + +static inline void clear_page_locked(struct page *page) +{ + clear_bit(PG_locked, &page->flags); +} + +static inline int trylock_page(struct page *page) +{ + return !test_and_set_bit(PG_locked, &page->flags); +} + /* * lock_page may only be called if we have the page's inode pinned. */ static inline void lock_page(struct page *page) { might_sleep(); - if (TestSetPageLocked(page)) + if (!trylock_page(page)) __lock_page(page); } @@ -312,7 +304,7 @@ static inline void lock_page(struct page *page) static inline int lock_page_killable(struct page *page) { might_sleep(); - if (TestSetPageLocked(page)) + if (!trylock_page(page)) return __lock_page_killable(page); return 0; } @@ -324,7 +316,7 @@ static inline int lock_page_killable(struct page *page) static inline void lock_page_nosync(struct page *page) { might_sleep(); - if (TestSetPageLocked(page)) + if (!trylock_page(page)) __lock_page_nosync(page); } @@ -409,4 +401,27 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size) return ret; } +int add_to_page_cache_locked(struct page *page, struct address_space *mapping, + pgoff_t index, gfp_t gfp_mask); +int add_to_page_cache_lru(struct page *page, struct address_space *mapping, + pgoff_t index, gfp_t gfp_mask); +extern void remove_from_page_cache(struct page *page); +extern void __remove_from_page_cache(struct page *page); + +/* + * Like add_to_page_cache_locked, but used to add newly allocated pages: + * the page is new, so we can just run set_page_locked() against it. + */ +static inline int add_to_page_cache(struct page *page, + struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) +{ + int error; + + set_page_locked(page); + error = add_to_page_cache_locked(page, mapping, offset, gfp_mask); + if (unlikely(error)) + clear_page_locked(page); + return error; +} + #endif /* _LINUX_PAGEMAP_H */ diff --git a/mm/filemap.c b/mm/filemap.c index d97d1ad55473..54e968650855 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -558,14 +558,14 @@ EXPORT_SYMBOL(wait_on_page_bit); * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. * * The first mb is necessary to safely close the critical section opened by the - * TestSetPageLocked(), the second mb is necessary to enforce ordering between - * the clear_bit and the read of the waitqueue (to avoid SMP races with a - * parallel wait_on_page_locked()). + * test_and_set_bit() to lock the page; the second mb is necessary to enforce + * ordering between the clear_bit and the read of the waitqueue (to avoid SMP + * races with a parallel wait_on_page_locked()). */ void unlock_page(struct page *page) { smp_mb__before_clear_bit(); - if (!TestClearPageLocked(page)) + if (!test_and_clear_bit(PG_locked, &page->flags)) BUG(); smp_mb__after_clear_bit(); wake_up_page(page, PG_locked); @@ -931,7 +931,7 @@ grab_cache_page_nowait(struct address_space *mapping, pgoff_t index) struct page *page = find_get_page(mapping, index); if (page) { - if (!TestSetPageLocked(page)) + if (trylock_page(page)) return page; page_cache_release(page); return NULL; @@ -1027,7 +1027,7 @@ find_page: if (inode->i_blkbits == PAGE_CACHE_SHIFT || !mapping->a_ops->is_partially_uptodate) goto page_not_up_to_date; - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto page_not_up_to_date; if (!mapping->a_ops->is_partially_uptodate(page, desc, offset)) diff --git a/mm/memory.c b/mm/memory.c index a472bcd4b061..1002f473f497 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1789,7 +1789,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, * not dirty accountable. */ if (PageAnon(old_page)) { - if (!TestSetPageLocked(old_page)) { + if (trylock_page(old_page)) { reuse = can_share_swap_page(old_page); unlock_page(old_page); } diff --git a/mm/migrate.c b/mm/migrate.c index 153572fb60b8..2a80136b23bb 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -605,7 +605,7 @@ static int move_to_new_page(struct page *newpage, struct page *page) * establishing additional references. We are the only one * holding a reference to the new page at this point. */ - if (TestSetPageLocked(newpage)) + if (!trylock_page(newpage)) BUG(); /* Prepare mapping for the new page.*/ @@ -667,7 +667,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, BUG_ON(charge); rc = -EAGAIN; - if (TestSetPageLocked(page)) { + if (!trylock_page(page)) { if (!force) goto move_newpage; lock_page(page); diff --git a/mm/rmap.c b/mm/rmap.c index 94a5246a3f98..1ea4e6fcee77 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -422,7 +422,7 @@ int page_referenced(struct page *page, int is_locked, referenced += page_referenced_anon(page, mem_cont); else if (is_locked) referenced += page_referenced_file(page, mem_cont); - else if (TestSetPageLocked(page)) + else if (!trylock_page(page)) referenced++; else { if (page->mapping) diff --git a/mm/shmem.c b/mm/shmem.c index c1e5a3b4f758..04fb4f1ab88e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1265,7 +1265,7 @@ repeat: } /* We have to do this with page locked to prevent races */ - if (TestSetPageLocked(swappage)) { + if (!trylock_page(swappage)) { shmem_swp_unmap(entry); spin_unlock(&info->lock); wait_on_page_locked(swappage); @@ -1329,7 +1329,7 @@ repeat: shmem_swp_unmap(entry); filepage = find_get_page(mapping, idx); if (filepage && - (!PageUptodate(filepage) || TestSetPageLocked(filepage))) { + (!PageUptodate(filepage) || !trylock_page(filepage))) { spin_unlock(&info->lock); wait_on_page_locked(filepage); page_cache_release(filepage); diff --git a/mm/swap.c b/mm/swap.c index 7417a2adbe50..9e0cb3118079 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -444,7 +444,7 @@ void pagevec_strip(struct pagevec *pvec) for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; - if (PagePrivate(page) && !TestSetPageLocked(page)) { + if (PagePrivate(page) && trylock_page(page)) { if (PagePrivate(page)) try_to_release_page(page, 0); unlock_page(page); diff --git a/mm/swap_state.c b/mm/swap_state.c index b8035b055129..167cf2dc8a03 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -201,7 +201,7 @@ void delete_from_swap_cache(struct page *page) */ static inline void free_swap_cache(struct page *page) { - if (PageSwapCache(page) && !TestSetPageLocked(page)) { + if (PageSwapCache(page) && trylock_page(page)) { remove_exclusive_swap_page(page); unlock_page(page); } @@ -302,9 +302,9 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * re-using the just freed swap entry for an existing page. * May fail (-ENOMEM) if radix-tree node allocation failed. */ - SetPageLocked(new_page); + set_page_locked(new_page); err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL); - if (!err) { + if (likely(!err)) { /* * Initiate read into locked page and return. */ @@ -312,7 +312,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, swap_readpage(NULL, new_page); return new_page; } - ClearPageLocked(new_page); + clear_page_locked(new_page); swap_free(entry); } while (err != -ENOMEM); diff --git a/mm/swapfile.c b/mm/swapfile.c index bb7f79641f9e..1e330f2998fa 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -403,7 +403,7 @@ void free_swap_and_cache(swp_entry_t entry) if (p) { if (swap_entry_free(p, swp_offset(entry)) == 1) { page = find_get_page(&swapper_space, entry.val); - if (page && unlikely(TestSetPageLocked(page))) { + if (page && unlikely(!trylock_page(page))) { page_cache_release(page); page = NULL; } diff --git a/mm/truncate.c b/mm/truncate.c index 894e9a70699f..250505091d37 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -187,7 +187,7 @@ void truncate_inode_pages_range(struct address_space *mapping, if (page_index > next) next = page_index; next++; - if (TestSetPageLocked(page)) + if (!trylock_page(page)) continue; if (PageWriteback(page)) { unlock_page(page); @@ -280,7 +280,7 @@ unsigned long __invalidate_mapping_pages(struct address_space *mapping, pgoff_t index; int lock_failed; - lock_failed = TestSetPageLocked(page); + lock_failed = !trylock_page(page); /* * We really shouldn't be looking at the ->index of an diff --git a/mm/vmscan.c b/mm/vmscan.c index 75be453628bf..1ff1a58e7c10 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -496,7 +496,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, page = lru_to_page(page_list); list_del(&page->lru); - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto keep; VM_BUG_ON(PageActive(page)); @@ -582,7 +582,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, * A synchronous write - probably a ramdisk. Go * ahead and try to reclaim the page. */ - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto keep; if (PageDirty(page) || PageWriteback(page)) goto keep_locked; -- cgit v1.2.3 From ca5de404ff036a29b25e9a83f6919c9f606c5841 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 2 Aug 2008 12:02:13 +0200 Subject: fs: rename buffer trylock Like the page lock change, this also requires name change, so convert the raw test_and_set bitop to a trylock. Signed-off-by: Nick Piggin Signed-off-by: Linus Torvalds --- fs/buffer.c | 4 ++-- fs/jbd/commit.c | 2 +- fs/ntfs/aops.c | 2 +- fs/ntfs/compress.c | 2 +- fs/ntfs/mft.c | 4 ++-- fs/reiserfs/inode.c | 2 +- fs/reiserfs/journal.c | 4 ++-- fs/xfs/linux-2.6/xfs_aops.c | 2 +- include/linux/buffer_head.h | 8 ++++++-- 9 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 4dbe52948e8f..38653e36e225 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1720,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, */ if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { lock_buffer(bh); - } else if (test_set_buffer_locked(bh)) { + } else if (!trylock_buffer(bh)) { redirty_page_for_writepage(wbc, page); continue; } @@ -3000,7 +3000,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) if (rw == SWRITE || rw == SWRITE_SYNC) lock_buffer(bh); - else if (test_set_buffer_locked(bh)) + else if (!trylock_buffer(bh)) continue; if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 81a9ad7177ca..ae08c057e751 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -221,7 +221,7 @@ write_out_data: * blocking lock_buffer(). */ if (buffer_dirty(bh)) { - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { BUFFER_TRACE(bh, "needs blocking lock"); spin_unlock(&journal->j_list_lock); /* Write out all data to prevent deadlocks */ diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 00e9ccde8e42..b38f944f0667 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1194,7 +1194,7 @@ lock_retry_remap: tbh = bhs[i]; if (!tbh) continue; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); /* The buffer dirty state is now irrelevant, just clean it. */ clear_buffer_dirty(tbh); diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 33ff314cc507..9669541d0119 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -665,7 +665,7 @@ lock_retry_remap: for (i = 0; i < nr_bhs; i++) { struct buffer_head *tbh = bhs[i]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) continue; if (unlikely(buffer_uptodate(tbh))) { unlock_buffer(tbh); diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 790defb847e7..17d32ca6bc35 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); BUG_ON(!buffer_uptodate(tbh)); clear_buffer_dirty(tbh); @@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); BUG_ON(!buffer_uptodate(tbh)); clear_buffer_dirty(tbh); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 192269698a8a..5699171212ae 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page, if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { lock_buffer(bh); } else { - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { redirty_page_for_writepage(wbc, page); continue; } diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index ce2208b27118..c21df71943a6 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -855,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock, jh = JH_ENTRY(list->next); bh = jh->bh; get_bh(bh); - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { if (!buffer_dirty(bh)) { list_move(&jh->list, &tmp); goto loop_next; @@ -3871,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb, { PROC_INFO_INC(p_s_sb, journal.prepare); - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { if (!wait) return 0; lock_buffer(bh); diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index fa73179233ad..fa47e43b8b41 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1104,7 +1104,7 @@ xfs_page_state_convert( * that we are writing into for the first time. */ type = IOMAP_NEW; - if (!test_and_set_bit(BH_Lock, &bh->b_state)) { + if (trylock_buffer(bh)) { ASSERT(buffer_mapped(bh)); if (iomap_valid) all_bh = 1; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 50cfe8ceb478..eadaab44015f 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -115,7 +115,6 @@ BUFFER_FNS(Uptodate, uptodate) BUFFER_FNS(Dirty, dirty) TAS_BUFFER_FNS(Dirty, dirty) BUFFER_FNS(Lock, locked) -TAS_BUFFER_FNS(Lock, locked) BUFFER_FNS(Req, req) TAS_BUFFER_FNS(Req, req) BUFFER_FNS(Mapped, mapped) @@ -321,10 +320,15 @@ static inline void wait_on_buffer(struct buffer_head *bh) __wait_on_buffer(bh); } +static inline int trylock_buffer(struct buffer_head *bh) +{ + return likely(!test_and_set_bit(BH_Lock, &bh->b_state)); +} + static inline void lock_buffer(struct buffer_head *bh) { might_sleep(); - if (test_set_buffer_locked(bh)) + if (!trylock_buffer(bh)) __lock_buffer(bh); } -- cgit v1.2.3 From 378a2f090f7a478704a372a4869b8a9ac206234e Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 4 Aug 2008 22:31:03 -0700 Subject: net_sched: Add qdisc __NET_XMIT_STOLEN flag Patrick McHardy noticed: "The other problem that affects all qdiscs supporting actions is TC_ACT_QUEUED/TC_ACT_STOLEN getting mapped to NET_XMIT_SUCCESS even though the packet is not queued, corrupting upper qdiscs' qlen counters." and later explained: "The reason why it translates it at all seems to be to not increase the drops counter. Within a single qdisc this could be avoided by other means easily, upper qdiscs would still increase the counter when we return anything besides NET_XMIT_SUCCESS though. This means we need a new NET_XMIT return value to indicate this to the upper qdiscs. So I'd suggest to introduce NET_XMIT_STOLEN, return that to upper qdiscs and translate it to NET_XMIT_SUCCESS in dev_queue_xmit, similar to NET_XMIT_BYPASS." David Miller noticed: "Maybe these NET_XMIT_* values being passed around should be a set of bits. They could be composed of base meanings, combined with specific attributes. So you could say "NET_XMIT_DROP | __NET_XMIT_NO_DROP_COUNT" The attributes get masked out by the top-level ->enqueue() caller, such that the base meanings are the only thing that make their way up into the stack. If it's only about communication within the qdisc tree, let's simply code it that way." This patch is trying to realize these ideas. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/sch_generic.h | 14 +++++++++++++- net/sched/sch_atm.c | 12 +++++++----- net/sched/sch_cbq.c | 23 +++++++++++++++-------- net/sched/sch_dsmark.c | 8 +++++--- net/sched/sch_hfsc.c | 8 +++++--- net/sched/sch_htb.c | 18 +++++++++++------- net/sched/sch_netem.c | 3 ++- net/sched/sch_prio.c | 8 +++++--- net/sched/sch_red.c | 2 +- net/sched/sch_sfq.c | 2 +- net/sched/sch_tbf.c | 3 ++- 12 files changed, 68 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ee583f642a9f..abbf5d52ec86 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -64,6 +64,7 @@ struct wireless_dev; #define NET_XMIT_BYPASS 4 /* packet does not leave via dequeue; (TC use only - dev_queue_xmit returns this as NET_XMIT_SUCCESS) */ +#define NET_XMIT_MASK 0xFFFF /* qdisc flags in net/sch_generic.h */ /* Backlog congestion levels */ #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c5bb13065051..f15b045a85e9 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -343,6 +343,18 @@ static inline unsigned int qdisc_pkt_len(struct sk_buff *skb) return qdisc_skb_cb(skb)->pkt_len; } +#ifdef CONFIG_NET_CLS_ACT +/* additional qdisc xmit flags */ +enum net_xmit_qdisc_t { + __NET_XMIT_STOLEN = 0x00010000, +}; + +#define net_xmit_drop_count(e) ((e) & __NET_XMIT_STOLEN ? 0 : 1) + +#else +#define net_xmit_drop_count(e) (1) +#endif + static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { #ifdef CONFIG_NET_SCHED @@ -355,7 +367,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) { qdisc_skb_cb(skb)->pkt_len = skb->len; - return qdisc_enqueue(skb, sch); + return qdisc_enqueue(skb, sch) & NET_XMIT_MASK; } static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 6b517b9dac5b..27dd773481bc 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -415,7 +415,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) case TC_ACT_QUEUED: case TC_ACT_STOLEN: kfree_skb(skb); - return NET_XMIT_SUCCESS; + return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: kfree_skb(skb); goto drop; @@ -432,9 +432,11 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, flow->q); if (ret != 0) { drop: __maybe_unused - sch->qstats.drops++; - if (flow) - flow->qstats.drops++; + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + if (flow) + flow->qstats.drops++; + } return ret; } sch->bstats.bytes += qdisc_pkt_len(skb); @@ -530,7 +532,7 @@ static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) if (!ret) { sch->q.qlen++; sch->qstats.requeues++; - } else { + } else if (net_xmit_drop_count(ret)) { sch->qstats.drops++; p->link.qstats.drops++; } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 14954bf4a683..765ae5659000 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -256,7 +256,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; case TC_ACT_RECLASSIFY: @@ -397,9 +397,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } - sch->qstats.drops++; - cbq_mark_toplevel(q, cl); - cl->qstats.drops++; + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + cbq_mark_toplevel(q, cl); + cl->qstats.drops++; + } return ret; } @@ -430,8 +432,10 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) cbq_activate_class(cl); return 0; } - sch->qstats.drops++; - cl->qstats.drops++; + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + cl->qstats.drops++; + } return ret; } @@ -664,13 +668,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) q->rx_class = NULL; if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) { + int ret; cbq_mark_toplevel(q, cl); q->rx_class = cl; cl->q->__parent = sch; - if (qdisc_enqueue(skb, cl->q) == 0) { + ret = qdisc_enqueue(skb, cl->q); + if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; sch->bstats.packets++; sch->bstats.bytes += qdisc_pkt_len(skb); @@ -678,7 +684,8 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) cbq_activate_class(cl); return 0; } - sch->qstats.drops++; + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; return 0; } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index a935676987e2..7170275d9f99 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -236,7 +236,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) case TC_ACT_QUEUED: case TC_ACT_STOLEN: kfree_skb(skb); - return NET_XMIT_SUCCESS; + return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: goto drop; @@ -254,7 +254,8 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) err = qdisc_enqueue(skb, p->q); if (err != NET_XMIT_SUCCESS) { - sch->qstats.drops++; + if (net_xmit_drop_count(err)) + sch->qstats.drops++; return err; } @@ -321,7 +322,8 @@ static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch) err = p->q->ops->requeue(skb, p->q); if (err != NET_XMIT_SUCCESS) { - sch->qstats.drops++; + if (net_xmit_drop_count(err)) + sch->qstats.drops++; return err; } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 0ae7d19dcba8..5cf9ae716118 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1166,7 +1166,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; } @@ -1586,8 +1586,10 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) err = qdisc_enqueue(skb, cl->qdisc); if (unlikely(err != NET_XMIT_SUCCESS)) { - cl->qstats.drops++; - sch->qstats.drops++; + if (net_xmit_drop_count(err)) { + cl->qstats.drops++; + sch->qstats.drops++; + } return err; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 75a40951c4f2..538d79b489ae 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -221,7 +221,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; } @@ -572,9 +572,11 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) kfree_skb(skb); return ret; #endif - } else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { - sch->qstats.drops++; - cl->qstats.drops++; + } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + cl->qstats.drops++; + } return NET_XMIT_DROP; } else { cl->bstats.packets += @@ -615,10 +617,12 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) kfree_skb(skb); return ret; #endif - } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != + } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) { - sch->qstats.drops++; - cl->qstats.drops++; + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + cl->qstats.drops++; + } return NET_XMIT_DROP; } else htb_activate(q, cl); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a59085700678..6cd6f2bc749e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -240,8 +240,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->q.qlen++; sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; - } else + } else if (net_xmit_drop_count(ret)) { sch->qstats.drops++; + } pr_debug("netem: enqueue ret %d\n", ret); return ret; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index f849243eb095..adb1a52b77d3 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -45,7 +45,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) switch (err) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; } @@ -88,7 +88,8 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->q.qlen++; return NET_XMIT_SUCCESS; } - sch->qstats.drops++; + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; return ret; } @@ -114,7 +115,8 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch) sch->qstats.requeues++; return 0; } - sch->qstats.drops++; + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; return NET_XMIT_DROP; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 3f2d1d7f3bbd..5da05839e225 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -97,7 +97,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) sch->bstats.bytes += qdisc_pkt_len(skb); sch->bstats.packets++; sch->q.qlen++; - } else { + } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; sch->qstats.drops++; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 8589da666568..3a456e1b829a 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -178,7 +178,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, switch (result) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: - *qerr = NET_XMIT_SUCCESS; + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return 0; } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index b296672f7632..7d3b7ff3bf07 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -135,7 +135,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) ret = qdisc_enqueue(skb, q->qdisc); if (ret != 0) { - sch->qstats.drops++; + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; return ret; } -- cgit v1.2.3 From cc6533e98a7f3cb7fce9d740da49195c7aa523a4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 4 Aug 2008 23:04:08 -0700 Subject: net: Kill plain NET_XMIT_BYPASS. dst_input() was doing something completely absurd, looping on skb->dst->input() if NET_XMIT_BYPASS was seen, but these functions never return such an error. And as a result plain ole' NET_XMIT_BYPASS has no more references and can be completely killed off. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 --- include/net/dst.h | 12 +----------- 2 files changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index abbf5d52ec86..488c56e649b5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -61,9 +61,6 @@ struct wireless_dev; #define NET_XMIT_DROP 1 /* skb dropped */ #define NET_XMIT_CN 2 /* congestion notification */ #define NET_XMIT_POLICED 3 /* skb is shot by police */ -#define NET_XMIT_BYPASS 4 /* packet does not leave via dequeue; - (TC use only - dev_queue_xmit - returns this as NET_XMIT_SUCCESS) */ #define NET_XMIT_MASK 0xFFFF /* qdisc flags in net/sch_generic.h */ /* Backlog congestion levels */ diff --git a/include/net/dst.h b/include/net/dst.h index c5c318a628f8..8a8b71e5f3f1 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -252,17 +252,7 @@ static inline int dst_output(struct sk_buff *skb) /* Input packet from network to transport. */ static inline int dst_input(struct sk_buff *skb) { - int err; - - for (;;) { - err = skb->dst->input(skb); - - if (likely(err == 0)) - return err; - /* Oh, Jamal... Seems, I will not forgive you this mess. :-) */ - if (unlikely(err != NET_XMIT_BYPASS)) - return err; - } + return skb->dst->input(skb); } static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) -- cgit v1.2.3 From 39b986a6c73434d122967dc86efb295ab9a28437 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 5 Aug 2008 18:16:57 +0200 Subject: ide: sanitize struct ide_port_ops documentation (take 2) v2: Add missing '@'-s. (Noticed by Randy Dunlap) Cc: Randy Dunlap Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index b846bc44a27e..b1fb15f10a00 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -509,24 +509,33 @@ struct ide_tp_ops { extern const struct ide_tp_ops default_tp_ops; +/** + * struct ide_port_ops - IDE port operations + * + * @init_dev: host specific initialization of a device + * @set_pio_mode: routine to program host for PIO mode + * @set_dma_mode: routine to program host for DMA mode + * @selectproc: tweaks hardware to select drive + * @reset_poll: chipset polling based on hba specifics + * @pre_reset: chipset specific changes to default for device-hba resets + * @resetproc: routine to reset controller after a disk reset + * @maskproc: special host masking for drive selection + * @quirkproc: check host's drive quirk list + * + * @mdma_filter: filter MDMA modes + * @udma_filter: filter UDMA modes + * + * @cable_detect: detect cable type + */ struct ide_port_ops { - /* host specific initialization of a device */ void (*init_dev)(ide_drive_t *); - /* routine to program host for PIO mode */ void (*set_pio_mode)(ide_drive_t *, const u8); - /* routine to program host for DMA mode */ void (*set_dma_mode)(ide_drive_t *, const u8); - /* tweaks hardware to select drive */ void (*selectproc)(ide_drive_t *); - /* chipset polling based on hba specifics */ int (*reset_poll)(ide_drive_t *); - /* chipset specific changes to default for device-hba resets */ void (*pre_reset)(ide_drive_t *); - /* routine to reset controller after a disk reset */ void (*resetproc)(ide_drive_t *); - /* special host masking for drive selection */ void (*maskproc)(ide_drive_t *, int); - /* check host's drive quirk list */ void (*quirkproc)(ide_drive_t *); u8 (*mdma_filter)(ide_drive_t *); -- cgit v1.2.3 From c5bfc3757f1d843a8e1261840c1f53c5062f8e92 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 5 Aug 2008 18:17:01 +0200 Subject: ide: remove CONFIG_IDE_MAX_HWIFS The benefits of a user settable CONFIG_IDE_MAX_HWIFS have become pretty tiny and are no longer considered worth the trouble of an own option. Simply always #define MAX_HWIFS to 10. Signed-off-by: Adrian Bunk Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/Kconfig | 10 ---------- include/linux/ide.h | 13 +------------ 2 files changed, 1 insertion(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 130ef64b44f7..a34758d29516 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -54,16 +54,6 @@ menuconfig IDE if IDE -config IDE_MAX_HWIFS - int "Max IDE interfaces" - depends on ALPHA || SUPERH || IA64 || EMBEDDED - range 1 10 - default 4 - help - This is the maximum number of IDE hardware interfaces that will - be supported by the driver. Make sure it is at least as high as - the number of IDE interfaces in your system. - config BLK_DEV_IDE tristate "Enhanced IDE/MFM/RLL disk/cdrom/tape/floppy support" ---help--- diff --git a/include/linux/ide.h b/include/linux/ide.h index b1fb15f10a00..87c12ed96954 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -219,18 +219,7 @@ static inline int __ide_default_irq(unsigned long base) #include #endif -#ifndef MAX_HWIFS -#if defined(CONFIG_BLACKFIN) || defined(CONFIG_H8300) || defined(CONFIG_XTENSA) -# define MAX_HWIFS 1 -#else -# define MAX_HWIFS 10 -#endif -#endif - -#if !defined(MAX_HWIFS) || defined(CONFIG_EMBEDDED) -#undef MAX_HWIFS -#define MAX_HWIFS CONFIG_IDE_MAX_HWIFS -#endif +#define MAX_HWIFS 10 /* Currently only m68k, apus and m8xx need it */ #ifndef IDE_ARCH_ACK_INTR -- cgit v1.2.3 From c6e2bee26eee190b20cd87e71b288bca6a5357a4 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Tue, 5 Aug 2008 13:01:05 -0700 Subject: kdump: report actual value of VMCOREINFO_OSRELEASE in VMCOREINFO The current implementation reports the structure name as VMCOREINFO_OSRELEASE in VMCOREINFO, e.g. VMCOREINFO_OSRELEASE=init_uts_ns.name.release That doesn't make sense because it's always the same. Instead, use the value, e.g. VMCOREINFO_OSRELEASE=2.6.26-rc3 That's also what the 'makedumpfile -g' does. Signed-off-by: Bernhard Walle Cc: "Ken'ichi Ohmichi" Acked-by: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 82f88a8a827b..32110cede64f 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -130,8 +130,8 @@ void vmcoreinfo_append_str(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); unsigned long paddr_vmcoreinfo_note(void); -#define VMCOREINFO_OSRELEASE(name) \ - vmcoreinfo_append_str("OSRELEASE=%s\n", #name) +#define VMCOREINFO_OSRELEASE(value) \ + vmcoreinfo_append_str("OSRELEASE=%s\n", value) #define VMCOREINFO_PAGESIZE(value) \ vmcoreinfo_append_str("PAGESIZE=%ld\n", value) #define VMCOREINFO_SYMBOL(name) \ -- cgit v1.2.3 From 60cadec9da7b6c91aca51f408c828f7e74a68379 Mon Sep 17 00:00:00 2001 From: Shadi Ammouri Date: Tue, 5 Aug 2008 13:01:09 -0700 Subject: spi: new orion_spi driver This adds an SPI driver for the SPI controller found in various Marvell Orion ARM SoCs. It currently supports only one slave, which must use SPI mode 0. [dbrownell@users.sourceforge.net: cleanups, meet specs, pass "sparse"] Signed-off-by: Shadi Ammouri Signed-off-by: Saeed Bishara Signed-off-by: Lennert Buytenhek Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/spi/Kconfig | 6 + drivers/spi/Makefile | 1 + drivers/spi/orion_spi.c | 574 ++++++++++++++++++++++++++++++++++++++++++ include/linux/spi/orion_spi.h | 17 ++ 4 files changed, 598 insertions(+) create mode 100644 drivers/spi/orion_spi.c create mode 100644 include/linux/spi/orion_spi.h (limited to 'include/linux') diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 2303521b4f09..b9d0efb6803f 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -149,6 +149,12 @@ config SPI_OMAP24XX SPI master controller for OMAP24xx/OMAP34xx Multichannel SPI (McSPI) modules. +config SPI_ORION + tristate "Orion SPI master (EXPERIMENTAL)" + depends on PLAT_ORION && EXPERIMENTAL + help + This enables using the SPI master controller on the Orion chips. + config SPI_PXA2XX tristate "PXA2xx SSP SPI master" depends on ARCH_PXA && EXPERIMENTAL diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index 7fca043ce723..ccf18de34e1e 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_SPI_LM70_LLP) += spi_lm70llp.o obj-$(CONFIG_SPI_PXA2XX) += pxa2xx_spi.o obj-$(CONFIG_SPI_OMAP_UWIRE) += omap_uwire.o obj-$(CONFIG_SPI_OMAP24XX) += omap2_mcspi.o +obj-$(CONFIG_SPI_ORION) += orion_spi.o obj-$(CONFIG_SPI_MPC52xx_PSC) += mpc52xx_psc_spi.o obj-$(CONFIG_SPI_MPC83xx) += spi_mpc83xx.o obj-$(CONFIG_SPI_S3C24XX_GPIO) += spi_s3c24xx_gpio.o diff --git a/drivers/spi/orion_spi.c b/drivers/spi/orion_spi.c new file mode 100644 index 000000000000..c4eaacd6e553 --- /dev/null +++ b/drivers/spi/orion_spi.c @@ -0,0 +1,574 @@ +/* + * orion_spi.c -- Marvell Orion SPI controller driver + * + * Author: Shadi Ammouri + * Copyright (C) 2007-2008 Marvell Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_NAME "orion_spi" + +#define ORION_NUM_CHIPSELECTS 1 /* only one slave is supported*/ +#define ORION_SPI_WAIT_RDY_MAX_LOOP 2000 /* in usec */ + +#define ORION_SPI_IF_CTRL_REG 0x00 +#define ORION_SPI_IF_CONFIG_REG 0x04 +#define ORION_SPI_DATA_OUT_REG 0x08 +#define ORION_SPI_DATA_IN_REG 0x0c +#define ORION_SPI_INT_CAUSE_REG 0x10 + +#define ORION_SPI_IF_8_16_BIT_MODE (1 << 5) +#define ORION_SPI_CLK_PRESCALE_MASK 0x1F + +struct orion_spi { + struct work_struct work; + + /* Lock access to transfer list. */ + spinlock_t lock; + + struct list_head msg_queue; + struct spi_master *master; + void __iomem *base; + unsigned int max_speed; + unsigned int min_speed; + struct orion_spi_info *spi_info; +}; + +static struct workqueue_struct *orion_spi_wq; + +static inline void __iomem *spi_reg(struct orion_spi *orion_spi, u32 reg) +{ + return orion_spi->base + reg; +} + +static inline void +orion_spi_setbits(struct orion_spi *orion_spi, u32 reg, u32 mask) +{ + void __iomem *reg_addr = spi_reg(orion_spi, reg); + u32 val; + + val = readl(reg_addr); + val |= mask; + writel(val, reg_addr); +} + +static inline void +orion_spi_clrbits(struct orion_spi *orion_spi, u32 reg, u32 mask) +{ + void __iomem *reg_addr = spi_reg(orion_spi, reg); + u32 val; + + val = readl(reg_addr); + val &= ~mask; + writel(val, reg_addr); +} + +static int orion_spi_set_transfer_size(struct orion_spi *orion_spi, int size) +{ + if (size == 16) { + orion_spi_setbits(orion_spi, ORION_SPI_IF_CONFIG_REG, + ORION_SPI_IF_8_16_BIT_MODE); + } else if (size == 8) { + orion_spi_clrbits(orion_spi, ORION_SPI_IF_CONFIG_REG, + ORION_SPI_IF_8_16_BIT_MODE); + } else { + pr_debug("Bad bits per word value %d (only 8 or 16 are " + "allowed).\n", size); + return -EINVAL; + } + + return 0; +} + +static int orion_spi_baudrate_set(struct spi_device *spi, unsigned int speed) +{ + u32 tclk_hz; + u32 rate; + u32 prescale; + u32 reg; + struct orion_spi *orion_spi; + + orion_spi = spi_master_get_devdata(spi->master); + + tclk_hz = orion_spi->spi_info->tclk; + + /* + * the supported rates are: 4,6,8...30 + * round up as we look for equal or less speed + */ + rate = DIV_ROUND_UP(tclk_hz, speed); + rate = roundup(rate, 2); + + /* check if requested speed is too small */ + if (rate > 30) + return -EINVAL; + + if (rate < 4) + rate = 4; + + /* Convert the rate to SPI clock divisor value. */ + prescale = 0x10 + rate/2; + + reg = readl(spi_reg(orion_spi, ORION_SPI_IF_CONFIG_REG)); + reg = ((reg & ~ORION_SPI_CLK_PRESCALE_MASK) | prescale); + writel(reg, spi_reg(orion_spi, ORION_SPI_IF_CONFIG_REG)); + + return 0; +} + +/* + * called only when no transfer is active on the bus + */ +static int +orion_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t) +{ + struct orion_spi *orion_spi; + unsigned int speed = spi->max_speed_hz; + unsigned int bits_per_word = spi->bits_per_word; + int rc; + + orion_spi = spi_master_get_devdata(spi->master); + + if ((t != NULL) && t->speed_hz) + speed = t->speed_hz; + + if ((t != NULL) && t->bits_per_word) + bits_per_word = t->bits_per_word; + + rc = orion_spi_baudrate_set(spi, speed); + if (rc) + return rc; + + return orion_spi_set_transfer_size(orion_spi, bits_per_word); +} + +static void orion_spi_set_cs(struct orion_spi *orion_spi, int enable) +{ + if (enable) + orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1); + else + orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1); +} + +static inline int orion_spi_wait_till_ready(struct orion_spi *orion_spi) +{ + int i; + + for (i = 0; i < ORION_SPI_WAIT_RDY_MAX_LOOP; i++) { + if (readl(spi_reg(orion_spi, ORION_SPI_INT_CAUSE_REG))) + return 1; + else + udelay(1); + } + + return -1; +} + +static inline int +orion_spi_write_read_8bit(struct spi_device *spi, + const u8 **tx_buf, u8 **rx_buf) +{ + void __iomem *tx_reg, *rx_reg, *int_reg; + struct orion_spi *orion_spi; + + orion_spi = spi_master_get_devdata(spi->master); + tx_reg = spi_reg(orion_spi, ORION_SPI_DATA_OUT_REG); + rx_reg = spi_reg(orion_spi, ORION_SPI_DATA_IN_REG); + int_reg = spi_reg(orion_spi, ORION_SPI_INT_CAUSE_REG); + + /* clear the interrupt cause register */ + writel(0x0, int_reg); + + if (tx_buf && *tx_buf) + writel(*(*tx_buf)++, tx_reg); + else + writel(0, tx_reg); + + if (orion_spi_wait_till_ready(orion_spi) < 0) { + dev_err(&spi->dev, "TXS timed out\n"); + return -1; + } + + if (rx_buf && *rx_buf) + *(*rx_buf)++ = readl(rx_reg); + + return 1; +} + +static inline int +orion_spi_write_read_16bit(struct spi_device *spi, + const u16 **tx_buf, u16 **rx_buf) +{ + void __iomem *tx_reg, *rx_reg, *int_reg; + struct orion_spi *orion_spi; + + orion_spi = spi_master_get_devdata(spi->master); + tx_reg = spi_reg(orion_spi, ORION_SPI_DATA_OUT_REG); + rx_reg = spi_reg(orion_spi, ORION_SPI_DATA_IN_REG); + int_reg = spi_reg(orion_spi, ORION_SPI_INT_CAUSE_REG); + + /* clear the interrupt cause register */ + writel(0x0, int_reg); + + if (tx_buf && *tx_buf) + writel(__cpu_to_le16(get_unaligned((*tx_buf)++)), tx_reg); + else + writel(0, tx_reg); + + if (orion_spi_wait_till_ready(orion_spi) < 0) { + dev_err(&spi->dev, "TXS timed out\n"); + return -1; + } + + if (rx_buf && *rx_buf) + put_unaligned(__le16_to_cpu(readl(rx_reg)), (*rx_buf)++); + + return 1; +} + +static unsigned int +orion_spi_write_read(struct spi_device *spi, struct spi_transfer *xfer) +{ + struct orion_spi *orion_spi; + unsigned int count; + int word_len; + + orion_spi = spi_master_get_devdata(spi->master); + word_len = spi->bits_per_word; + count = xfer->len; + + if (word_len == 8) { + const u8 *tx = xfer->tx_buf; + u8 *rx = xfer->rx_buf; + + do { + if (orion_spi_write_read_8bit(spi, &tx, &rx) < 0) + goto out; + count--; + } while (count); + } else if (word_len == 16) { + const u16 *tx = xfer->tx_buf; + u16 *rx = xfer->rx_buf; + + do { + if (orion_spi_write_read_16bit(spi, &tx, &rx) < 0) + goto out; + count -= 2; + } while (count); + } + +out: + return xfer->len - count; +} + + +static void orion_spi_work(struct work_struct *work) +{ + struct orion_spi *orion_spi = + container_of(work, struct orion_spi, work); + + spin_lock_irq(&orion_spi->lock); + while (!list_empty(&orion_spi->msg_queue)) { + struct spi_message *m; + struct spi_device *spi; + struct spi_transfer *t = NULL; + int par_override = 0; + int status = 0; + int cs_active = 0; + + m = container_of(orion_spi->msg_queue.next, struct spi_message, + queue); + + list_del_init(&m->queue); + spin_unlock_irq(&orion_spi->lock); + + spi = m->spi; + + /* Load defaults */ + status = orion_spi_setup_transfer(spi, NULL); + + if (status < 0) + goto msg_done; + + list_for_each_entry(t, &m->transfers, transfer_list) { + if (par_override || t->speed_hz || t->bits_per_word) { + par_override = 1; + status = orion_spi_setup_transfer(spi, t); + if (status < 0) + break; + if (!t->speed_hz && !t->bits_per_word) + par_override = 0; + } + + if (!cs_active) { + orion_spi_set_cs(orion_spi, 1); + cs_active = 1; + } + + if (t->len) + m->actual_length += + orion_spi_write_read(spi, t); + + if (t->delay_usecs) + udelay(t->delay_usecs); + + if (t->cs_change) { + orion_spi_set_cs(orion_spi, 0); + cs_active = 0; + } + } + +msg_done: + if (cs_active) + orion_spi_set_cs(orion_spi, 0); + + m->status = status; + m->complete(m->context); + + spin_lock_irq(&orion_spi->lock); + } + + spin_unlock_irq(&orion_spi->lock); +} + +static int __init orion_spi_reset(struct orion_spi *orion_spi) +{ + /* Verify that the CS is deasserted */ + orion_spi_set_cs(orion_spi, 0); + + return 0; +} + +static int orion_spi_setup(struct spi_device *spi) +{ + struct orion_spi *orion_spi; + + orion_spi = spi_master_get_devdata(spi->master); + + if (spi->mode) { + dev_err(&spi->dev, "setup: unsupported mode bits %x\n", + spi->mode); + return -EINVAL; + } + + if (spi->bits_per_word == 0) + spi->bits_per_word = 8; + + if ((spi->max_speed_hz == 0) + || (spi->max_speed_hz > orion_spi->max_speed)) + spi->max_speed_hz = orion_spi->max_speed; + + if (spi->max_speed_hz < orion_spi->min_speed) { + dev_err(&spi->dev, "setup: requested speed too low %d Hz\n", + spi->max_speed_hz); + return -EINVAL; + } + + /* + * baudrate & width will be set orion_spi_setup_transfer + */ + return 0; +} + +static int orion_spi_transfer(struct spi_device *spi, struct spi_message *m) +{ + struct orion_spi *orion_spi; + struct spi_transfer *t = NULL; + unsigned long flags; + + m->actual_length = 0; + m->status = 0; + + /* reject invalid messages and transfers */ + if (list_empty(&m->transfers) || !m->complete) + return -EINVAL; + + orion_spi = spi_master_get_devdata(spi->master); + + list_for_each_entry(t, &m->transfers, transfer_list) { + unsigned int bits_per_word = spi->bits_per_word; + + if (t->tx_buf == NULL && t->rx_buf == NULL && t->len) { + dev_err(&spi->dev, + "message rejected : " + "invalid transfer data buffers\n"); + goto msg_rejected; + } + + if ((t != NULL) && t->bits_per_word) + bits_per_word = t->bits_per_word; + + if ((bits_per_word != 8) && (bits_per_word != 16)) { + dev_err(&spi->dev, + "message rejected : " + "invalid transfer bits_per_word (%d bits)\n", + bits_per_word); + goto msg_rejected; + } + /*make sure buffer length is even when working in 16 bit mode*/ + if ((t != NULL) && (t->bits_per_word == 16) && (t->len & 1)) { + dev_err(&spi->dev, + "message rejected : " + "odd data length (%d) while in 16 bit mode\n", + t->len); + goto msg_rejected; + } + + if (t->speed_hz < orion_spi->min_speed) { + dev_err(&spi->dev, + "message rejected : " + "device min speed (%d Hz) exceeds " + "required transfer speed (%d Hz)\n", + orion_spi->min_speed, t->speed_hz); + goto msg_rejected; + } + } + + + spin_lock_irqsave(&orion_spi->lock, flags); + list_add_tail(&m->queue, &orion_spi->msg_queue); + queue_work(orion_spi_wq, &orion_spi->work); + spin_unlock_irqrestore(&orion_spi->lock, flags); + + return 0; +msg_rejected: + /* Message rejected and not queued */ + m->status = -EINVAL; + if (m->complete) + m->complete(m->context); + return -EINVAL; +} + +static int __init orion_spi_probe(struct platform_device *pdev) +{ + struct spi_master *master; + struct orion_spi *spi; + struct resource *r; + struct orion_spi_info *spi_info; + int status = 0; + + spi_info = pdev->dev.platform_data; + + master = spi_alloc_master(&pdev->dev, sizeof *spi); + if (master == NULL) { + dev_dbg(&pdev->dev, "master allocation failed\n"); + return -ENOMEM; + } + + if (pdev->id != -1) + master->bus_num = pdev->id; + + master->setup = orion_spi_setup; + master->transfer = orion_spi_transfer; + master->num_chipselect = ORION_NUM_CHIPSELECTS; + + dev_set_drvdata(&pdev->dev, master); + + spi = spi_master_get_devdata(master); + spi->master = master; + spi->spi_info = spi_info; + + spi->max_speed = DIV_ROUND_UP(spi_info->tclk, 4); + spi->min_speed = DIV_ROUND_UP(spi_info->tclk, 30); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (r == NULL) { + status = -ENODEV; + goto out; + } + + if (!request_mem_region(r->start, (r->end - r->start) + 1, + pdev->dev.bus_id)) { + status = -EBUSY; + goto out; + } + spi->base = ioremap(r->start, SZ_1K); + + INIT_WORK(&spi->work, orion_spi_work); + + spin_lock_init(&spi->lock); + INIT_LIST_HEAD(&spi->msg_queue); + + if (orion_spi_reset(spi) < 0) + goto out_rel_mem; + + status = spi_register_master(master); + if (status < 0) + goto out_rel_mem; + + return status; + +out_rel_mem: + release_mem_region(r->start, (r->end - r->start) + 1); + +out: + spi_master_put(master); + return status; +} + + +static int __exit orion_spi_remove(struct platform_device *pdev) +{ + struct spi_master *master; + struct orion_spi *spi; + struct resource *r; + + master = dev_get_drvdata(&pdev->dev); + spi = spi_master_get_devdata(master); + + cancel_work_sync(&spi->work); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(r->start, (r->end - r->start) + 1); + + spi_unregister_master(master); + + return 0; +} + +MODULE_ALIAS("platform:" DRIVER_NAME); + +static struct platform_driver orion_spi_driver = { + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, + .remove = __exit_p(orion_spi_remove), +}; + +static int __init orion_spi_init(void) +{ + orion_spi_wq = create_singlethread_workqueue( + orion_spi_driver.driver.name); + if (orion_spi_wq == NULL) + return -ENOMEM; + + return platform_driver_probe(&orion_spi_driver, orion_spi_probe); +} +module_init(orion_spi_init); + +static void __exit orion_spi_exit(void) +{ + flush_workqueue(orion_spi_wq); + platform_driver_unregister(&orion_spi_driver); + + destroy_workqueue(orion_spi_wq); +} +module_exit(orion_spi_exit); + +MODULE_DESCRIPTION("Orion SPI driver"); +MODULE_AUTHOR("Shadi Ammouri "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/spi/orion_spi.h b/include/linux/spi/orion_spi.h new file mode 100644 index 000000000000..b4d9fa6f797c --- /dev/null +++ b/include/linux/spi/orion_spi.h @@ -0,0 +1,17 @@ +/* + * orion_spi.h + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __LINUX_SPI_ORION_SPI_H +#define __LINUX_SPI_ORION_SPI_H + +struct orion_spi_info { + u32 tclk; /* no support yet */ +}; + + +#endif -- cgit v1.2.3 From f6ac436dcc4c34709bcde355f3f2254ac0a183d4 Mon Sep 17 00:00:00 2001 From: Mark Asselstine Date: Tue, 5 Aug 2008 13:01:24 -0700 Subject: Remove the deprecated cli() sti() functions These functions have been deprecated for some time now but remained until all legacy callers could be removed. With a few commits in 2.6.26 this has happened so now we can remove these deprecated functions. Signed-off-by: Mark Asselstine Reviewed-by: Matthew Wilcox Cc: Alan Cox Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/00-INDEX | 2 - Documentation/cli-sti-removal.txt | 133 -------------------------------------- include/linux/interrupt.h | 29 --------- 3 files changed, 164 deletions(-) delete mode 100644 Documentation/cli-sti-removal.txt (limited to 'include/linux') diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 6de71308a906..5b5aba404aac 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -89,8 +89,6 @@ cciss.txt - info, major/minor #'s for Compaq's SMART Array Controllers. cdrom/ - directory with information on the CD-ROM drivers that Linux has. -cli-sti-removal.txt - - cli()/sti() removal guide. computone.txt - info on Computone Intelliport II/Plus Multiport Serial Driver. connector/ diff --git a/Documentation/cli-sti-removal.txt b/Documentation/cli-sti-removal.txt deleted file mode 100644 index 60932b02fcb3..000000000000 --- a/Documentation/cli-sti-removal.txt +++ /dev/null @@ -1,133 +0,0 @@ - -#### cli()/sti() removal guide, started by Ingo Molnar - - -as of 2.5.28, five popular macros have been removed on SMP, and -are being phased out on UP: - - cli(), sti(), save_flags(flags), save_flags_cli(flags), restore_flags(flags) - -until now it was possible to protect driver code against interrupt -handlers via a cli(), but from now on other, more lightweight methods -have to be used for synchronization, such as spinlocks or semaphores. - -for example, driver code that used to do something like: - - struct driver_data; - - irq_handler (...) - { - .... - driver_data.finish = 1; - driver_data.new_work = 0; - .... - } - - ... - - ioctl_func (...) - { - ... - cli(); - ... - driver_data.finish = 0; - driver_data.new_work = 2; - ... - sti(); - ... - } - -was SMP-correct because the cli() function ensured that no -interrupt handler (amongst them the above irq_handler()) function -would execute while the cli()-ed section is executing. - -but from now on a more direct method of locking has to be used: - - DEFINE_SPINLOCK(driver_lock); - struct driver_data; - - irq_handler (...) - { - unsigned long flags; - .... - spin_lock_irqsave(&driver_lock, flags); - .... - driver_data.finish = 1; - driver_data.new_work = 0; - .... - spin_unlock_irqrestore(&driver_lock, flags); - .... - } - - ... - - ioctl_func (...) - { - ... - spin_lock_irq(&driver_lock); - ... - driver_data.finish = 0; - driver_data.new_work = 2; - ... - spin_unlock_irq(&driver_lock); - ... - } - -the above code has a number of advantages: - -- the locking relation is easier to understand - actual lock usage - pinpoints the critical sections. cli() usage is too opaque. - Easier to understand means it's easier to debug. - -- it's faster, because spinlocks are faster to acquire than the - potentially heavily-used IRQ lock. Furthermore, your driver does - not have to wait eg. for a big heavy SCSI interrupt to finish, - because the driver_lock spinlock is only used by your driver. - cli() on the other hand was used by many drivers, and extended - the critical section to the whole IRQ handler function - creating - serious lock contention. - - -to make the transition easier, we've still kept the cli(), sti(), -save_flags(), save_flags_cli() and restore_flags() macros defined -on UP systems - but their usage will be phased out until 2.6 is -released. - -drivers that want to disable local interrupts (interrupts on the -current CPU), can use the following five macros: - - local_irq_disable(), local_irq_enable(), local_save_flags(flags), - local_irq_save(flags), local_irq_restore(flags) - -but beware, their meaning and semantics are much simpler, far from -that of the old cli(), sti(), save_flags(flags) and restore_flags(flags) -SMP meaning: - - local_irq_disable() => turn local IRQs off - - local_irq_enable() => turn local IRQs on - - local_save_flags(flags) => save the current IRQ state into flags. The - state can be on or off. (on some - architectures there's even more bits in it.) - - local_irq_save(flags) => save the current IRQ state into flags and - disable interrupts. - - local_irq_restore(flags) => restore the IRQ state from flags. - -(local_irq_save can save both irqs on and irqs off state, and -local_irq_restore can restore into both irqs on and irqs off state.) - -another related change is that synchronize_irq() now takes a parameter: -synchronize_irq(irq). This change too has the purpose of making SMP -synchronization more lightweight - this way you can wait for your own -interrupt handler to finish, no need to wait for other IRQ sources. - - -why were these changes done? The main reason was the architectural burden -of maintaining the cli()/sti() interface - it became a real problem. The -new interrupt system is much more streamlined, easier to understand, debug, -and it's also a bit faster - the same happened to it that will happen to -cli()/sti() using drivers once they convert to spinlocks :-) - diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 62aa4f895abe..58ff4e74b2f3 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -223,35 +223,6 @@ static inline int disable_irq_wake(unsigned int irq) #define or_softirq_pending(x) (local_softirq_pending() |= (x)) #endif -/* - * Temporary defines for UP kernels, until all code gets fixed. - */ -#ifndef CONFIG_SMP -static inline void __deprecated cli(void) -{ - local_irq_disable(); -} -static inline void __deprecated sti(void) -{ - local_irq_enable(); -} -static inline void __deprecated save_flags(unsigned long *x) -{ - local_save_flags(*x); -} -#define save_flags(x) save_flags(&x) -static inline void __deprecated restore_flags(unsigned long x) -{ - local_irq_restore(x); -} - -static inline void __deprecated save_and_cli(unsigned long *x) -{ - local_irq_save(*x); -} -#define save_and_cli(x) save_and_cli(&x) -#endif /* CONFIG_SMP */ - /* Some architectures might implement lazy enabling/disabling of * interrupts. In some cases, such as stop_machine, we might want * to ensure that after a local_irq_disable(), interrupts have -- cgit v1.2.3 From bf1db69fbf4ff511e88736ce2e6318846f34492b Mon Sep 17 00:00:00 2001 From: Richard Hughes Date: Tue, 5 Aug 2008 13:01:35 -0700 Subject: pm_qos: spelling fixes A documentation cleanup patch. With a minor tweak to clarify units for kbs. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: mark gross Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/power/pm_qos_interface.txt | 7 ++++++- include/linux/pm_qos_params.h | 2 +- kernel/pm_qos_params.c | 16 ++++++++-------- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt index 49adb1a33514..c40866e8b957 100644 --- a/Documentation/power/pm_qos_interface.txt +++ b/Documentation/power/pm_qos_interface.txt @@ -1,4 +1,4 @@ -PM quality of Service interface. +PM Quality Of Service Interface. This interface provides a kernel and user mode interface for registering performance expectations by drivers, subsystems and user space applications on @@ -7,6 +7,11 @@ one of the parameters. Currently we have {cpu_dma_latency, network_latency, network_throughput} as the initial set of pm_qos parameters. +Each parameters have defined units: + * latency: usec + * timeout: usec + * throughput: kbs (kilo bit / sec) + The infrastructure exposes multiple misc device nodes one per implemented parameter. The set of parameters implement is defined by pm_qos_power_init() and pm_qos_params.h. This is done because having the available parameters diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h index 2e4e97bd19f7..d74f75ed1e47 100644 --- a/include/linux/pm_qos_params.h +++ b/include/linux/pm_qos_params.h @@ -1,6 +1,6 @@ /* interface for the pm_qos_power infrastructure of the linux kernel. * - * Mark Gross + * Mark Gross */ #include #include diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index 8cb757026386..da9c2dda6a4e 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -24,7 +24,7 @@ * requirement that the application has is cleaned up when closes the file * pointer or exits the pm_qos_object will get an opportunity to clean up. * - * mark gross mgross@linux.intel.com + * Mark Gross */ #include @@ -211,8 +211,8 @@ EXPORT_SYMBOL_GPL(pm_qos_requirement); * @value: defines the qos request * * This function inserts a new entry in the pm_qos_class list of requested qos - * performance charactoistics. It recomputes the agregate QoS expectations for - * the pm_qos_class of parrameters. + * performance characteristics. It recomputes the aggregate QoS expectations + * for the pm_qos_class of parameters. */ int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value) { @@ -250,10 +250,10 @@ EXPORT_SYMBOL_GPL(pm_qos_add_requirement); * @name: identifies the request * @value: defines the qos request * - * Updates an existing qos requierement for the pm_qos_class of parameters along + * Updates an existing qos requirement for the pm_qos_class of parameters along * with updating the target pm_qos_class value. * - * If the named request isn't in the lest then no change is made. + * If the named request isn't in the list then no change is made. */ int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value) { @@ -287,7 +287,7 @@ EXPORT_SYMBOL_GPL(pm_qos_update_requirement); * @pm_qos_class: identifies which list of qos request to us * @name: identifies the request * - * Will remove named qos request from pm_qos_class list of parrameters and + * Will remove named qos request from pm_qos_class list of parameters and * recompute the current target value for the pm_qos_class. */ void pm_qos_remove_requirement(int pm_qos_class, char *name) @@ -319,7 +319,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_requirement); * @notifier: notifier block managed by caller. * * will register the notifier into a notification chain that gets called - * uppon changes to the pm_qos_class target value. + * upon changes to the pm_qos_class target value. */ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) { @@ -338,7 +338,7 @@ EXPORT_SYMBOL_GPL(pm_qos_add_notifier); * @notifier: notifier block to be removed. * * will remove the notifier from the notification chain that gets called - * uppon changes to the pm_qos_class target value. + * upon changes to the pm_qos_class target value. */ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) { -- cgit v1.2.3 From f780a9f119caa48088b230836a7fa73d1096de7c Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Wed, 6 Aug 2008 20:14:06 -0700 Subject: mlx4_core: Add ethernet fields to CQE struct Add ethernet-related fields to struct mlx4_cqe so that the mlx4_en ethernet NIC driver can share the same definition. Signed-off-by: Yevgeny Petrilin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 33 ++++++++++++++++----------------- include/linux/mlx4/cq.h | 36 ++++++++++++++++++++++++------------ 2 files changed, 40 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index a1464574bfdd..d0866a3636e2 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -515,17 +515,17 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, wc->vendor_err = cqe->vendor_err_syndrome; } -static int mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum) +static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) { - return ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4 | - MLX4_CQE_IPOIB_STATUS_IPV4F | - MLX4_CQE_IPOIB_STATUS_IPV4OPT | - MLX4_CQE_IPOIB_STATUS_IPV6 | - MLX4_CQE_IPOIB_STATUS_IPOK)) == - cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4 | - MLX4_CQE_IPOIB_STATUS_IPOK)) && - (status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_UDP | - MLX4_CQE_IPOIB_STATUS_TCP)) && + return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPV4F | + MLX4_CQE_STATUS_IPV4OPT | + MLX4_CQE_STATUS_IPV6 | + MLX4_CQE_STATUS_IPOK)) == + cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPOK)) && + (status & cpu_to_be16(MLX4_CQE_STATUS_UDP | + MLX4_CQE_STATUS_TCP)) && checksum == cpu_to_be16(0xffff); } @@ -582,17 +582,17 @@ repoll: } if (!*cur_qp || - (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) { + (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) { /* * We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. */ mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, - be32_to_cpu(cqe->my_qpn)); + be32_to_cpu(cqe->vlan_my_qpn)); if (unlikely(!mqp)) { printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n", - cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff); + cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); return -EINVAL; } @@ -692,14 +692,13 @@ repoll: } wc->slid = be16_to_cpu(cqe->rlid); - wc->sl = cqe->sl >> 4; + wc->sl = be16_to_cpu(cqe->sl_vid >> 12); g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; - wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->ipoib_status, - cqe->checksum); + wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum); } return 0; @@ -767,7 +766,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) */ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); - if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) { + if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); ++nfreed; diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h index 071cf96cf01f..6f65b2c8bb89 100644 --- a/include/linux/mlx4/cq.h +++ b/include/linux/mlx4/cq.h @@ -39,17 +39,18 @@ #include struct mlx4_cqe { - __be32 my_qpn; + __be32 vlan_my_qpn; __be32 immed_rss_invalid; __be32 g_mlpath_rqpn; - u8 sl; - u8 reserved1; + __be16 sl_vid; __be16 rlid; - __be32 ipoib_status; + __be16 status; + u8 ipv6_ext_mask; + u8 badfcs_enc; __be32 byte_cnt; __be16 wqe_index; __be16 checksum; - u8 reserved2[3]; + u8 reserved[3]; u8 owner_sr_opcode; }; @@ -63,6 +64,11 @@ struct mlx4_err_cqe { u8 owner_sr_opcode; }; +enum { + MLX4_CQE_VLAN_PRESENT_MASK = 1 << 29, + MLX4_CQE_QPN_MASK = 0xffffff, +}; + enum { MLX4_CQE_OWNER_MASK = 0x80, MLX4_CQE_IS_SEND_MASK = 0x40, @@ -86,13 +92,19 @@ enum { }; enum { - MLX4_CQE_IPOIB_STATUS_IPV4 = 1 << 22, - MLX4_CQE_IPOIB_STATUS_IPV4F = 1 << 23, - MLX4_CQE_IPOIB_STATUS_IPV6 = 1 << 24, - MLX4_CQE_IPOIB_STATUS_IPV4OPT = 1 << 25, - MLX4_CQE_IPOIB_STATUS_TCP = 1 << 26, - MLX4_CQE_IPOIB_STATUS_UDP = 1 << 27, - MLX4_CQE_IPOIB_STATUS_IPOK = 1 << 28, + MLX4_CQE_STATUS_IPV4 = 1 << 6, + MLX4_CQE_STATUS_IPV4F = 1 << 7, + MLX4_CQE_STATUS_IPV6 = 1 << 8, + MLX4_CQE_STATUS_IPV4OPT = 1 << 9, + MLX4_CQE_STATUS_TCP = 1 << 10, + MLX4_CQE_STATUS_UDP = 1 << 11, + MLX4_CQE_STATUS_IPOK = 1 << 12, +}; + +enum { + MLX4_CQE_LLC = 1, + MLX4_CQE_SNAP = 1 << 1, + MLX4_CQE_BAD_FCS = 1 << 4, }; static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd, -- cgit v1.2.3 From b11f8d8cc3bb2fa6fa55286babc1a5ebb2e932c4 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Tue, 15 Jul 2008 02:18:41 -0700 Subject: ethtool: Expand ethtool_cmd.speed to 32 bits Introduce the speed_hi field to ethtool_cmd, using the reserved space, to expand the speed field to 2^32 Megabits/second. Making this field expansion now gives us plenty of time to fix up the user-space pieces that use SIOCETHTOOL before hardware faster than 64 Gb/s is available. Signed-off-by: Brandon Philips Signed-off-by: Jeff Garzik --- include/linux/ethtool.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 8bb5e87df365..b4b038b89ee6 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -27,9 +27,24 @@ struct ethtool_cmd { __u8 autoneg; /* Enable or disable autonegotiation */ __u32 maxtxpkt; /* Tx pkts before generating tx int */ __u32 maxrxpkt; /* Rx pkts before generating rx int */ - __u32 reserved[4]; + __u16 speed_hi; + __u16 reserved2; + __u32 reserved[3]; }; +static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep, + __u32 speed) +{ + + ep->speed = (__u16)speed; + ep->speed_hi = (__u16)(speed >> 16); +} + +static inline __u32 ethtool_cmd_speed(struct ethtool_cmd *ep) +{ + return (ep->speed_hi << 16) | ep->speed; +} + #define ETHTOOL_BUSINFO_LEN 32 /* these strings are set to whatever the driver author decides... */ struct ethtool_drvinfo { -- cgit v1.2.3 From fe414248551e2880fe8913577699003ff145ab9d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 23 Jul 2008 17:41:52 +0200 Subject: dm9000: Support MAC address setting through platform data. The dm9000 driver reads the chip's MAC address from the attached EEPROM. When no EEPROM is present, or when the MAC address is invalid, it falls back to reading the address from the chip. This patch lets platform code set the desired MAC address through platform data. Signed-off-by: Laurent Pinchart Signed-off-by: Jeff Garzik --- drivers/net/dm9000.c | 5 +++++ include/linux/dm9000.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c index 0b0f1c407a7e..f42c23f42652 100644 --- a/drivers/net/dm9000.c +++ b/drivers/net/dm9000.c @@ -1374,6 +1374,11 @@ dm9000_probe(struct platform_device *pdev) for (i = 0; i < 6; i += 2) dm9000_read_eeprom(db, i / 2, ndev->dev_addr+i); + if (!is_valid_ether_addr(ndev->dev_addr) && pdata != NULL) { + mac_src = "platform data"; + memcpy(ndev->dev_addr, pdata->dev_addr, 6); + } + if (!is_valid_ether_addr(ndev->dev_addr)) { /* try reading from mac */ diff --git a/include/linux/dm9000.h b/include/linux/dm9000.h index fc82446b6425..c30879cf93bc 100644 --- a/include/linux/dm9000.h +++ b/include/linux/dm9000.h @@ -27,6 +27,7 @@ struct dm9000_plat_data { unsigned int flags; + unsigned char dev_addr[6]; /* allow replacement IO routines */ -- cgit v1.2.3 From 7d283aee50351ec19eaf654a8690d77c4e1dff50 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 6 Aug 2008 15:21:26 -0700 Subject: list.h: Add list_splice_tail() and list_splice_tail_init() If you are using linked lists for queues list_splice() will not do what you would expect even if you use the elements passed reversed. We need to handle these differently. We add list_splice_tail() and list_splice_tail_init(). Signed-off-by: Peter Zijlstra Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- drivers/dma/ioat_dma.c | 2 +- drivers/usb/host/ehci-q.c | 2 +- include/linux/list.h | 47 ++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 40 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index a52156e56886..bc8c6e3470ca 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -551,7 +551,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) /* write address into NextDescriptor field of last desc in chain */ to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = first->async_tx.phys; - __list_splice(&new_chain, ioat_chan->used_desc.prev); + list_splice_tail(&new_chain, &ioat_chan->used_desc); ioat_chan->dmacount += desc_count; ioat_chan->pending += desc_count; diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 2622b6596d7c..3712b925b315 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -932,7 +932,7 @@ static struct ehci_qh *qh_append_tds ( list_del (&qtd->qtd_list); list_add (&dummy->qtd_list, qtd_list); - __list_splice (qtd_list, qh->qtd_list.prev); + list_splice_tail(qtd_list, &qh->qtd_list); ehci_qtd_init(ehci, qtd, qtd->qtd_dma); qh->dummy = qtd; diff --git a/include/linux/list.h b/include/linux/list.h index 453916bc0412..a886f27a1181 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -215,21 +215,21 @@ static inline int list_is_singular(const struct list_head *head) } static inline void __list_splice(const struct list_head *list, - struct list_head *head) + struct list_head *prev, + struct list_head *next) { struct list_head *first = list->next; struct list_head *last = list->prev; - struct list_head *at = head->next; - first->prev = head; - head->next = first; + first->prev = prev; + prev->next = first; - last->next = at; - at->prev = last; + last->next = next; + next->prev = last; } /** - * list_splice - join two lists + * list_splice - join two lists, this is designed for stacks * @list: the new list to add. * @head: the place to add it in the first list. */ @@ -237,7 +237,19 @@ static inline void list_splice(const struct list_head *list, struct list_head *head) { if (!list_empty(list)) - __list_splice(list, head); + __list_splice(list, head, head->next); +} + +/** + * list_splice_tail - join two lists, each list being a queue + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static inline void list_splice_tail(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head->prev, head); } /** @@ -251,7 +263,24 @@ static inline void list_splice_init(struct list_head *list, struct list_head *head) { if (!list_empty(list)) { - __list_splice(list, head); + __list_splice(list, head, head->next); + INIT_LIST_HEAD(list); + } +} + +/** + * list_splice_tail_init - join two lists, each list being a queue, and + * reinitialise the emptied list. + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * The list at @list is reinitialised + */ +static inline void list_splice_tail_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head->prev, head); INIT_LIST_HEAD(list); } } -- cgit v1.2.3 From 00e8a4da8cf0d7dba8cc4b0da28ea0f12dcf6b36 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 6 Aug 2008 13:28:54 -0700 Subject: list.h: add list_cut_position() This adds list_cut_position() which lets you cut a list into two lists given a pivot in the list. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/linux/list.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index a886f27a1181..1d109e2ef0a9 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -214,6 +214,46 @@ static inline int list_is_singular(const struct list_head *head) return !list_empty(head) && (head->next == head->prev); } +static inline void __list_cut_position(struct list_head *list, + struct list_head *head, struct list_head *entry) +{ + struct list_head *new_first = entry->next; + list->next = head->next; + list->next->prev = list; + list->prev = entry; + entry->next = list; + head->next = new_first; + new_first->prev = head; +} + +/** + * list_cut_position - cut a list into two + * @list: a new list to add all removed entries + * @head: a list with entries + * @entry: an entry within head, could be the head itself + * and if so we won't cut the list + * + * This helper moves the initial part of @head, up to and + * including @entry, from @head to @list. You should + * pass on @entry an element you know is on @head. @list + * should be an empty list or a list you do not care about + * losing its data. + * + */ +static inline void list_cut_position(struct list_head *list, + struct list_head *head, struct list_head *entry) +{ + if (list_empty(head)) + return; + if (list_is_singular(head) && + (head->next != entry && head != entry)) + return; + if (entry == head) + INIT_LIST_HEAD(list); + else + __list_cut_position(list, head, entry); +} + static inline void __list_splice(const struct list_head *list, struct list_head *prev, struct list_head *next) -- cgit v1.2.3 From 5861bbfcc10fc0358abf52c7d22850c8d180f0b0 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 7 Aug 2008 16:55:03 -0700 Subject: tracehook: fix CLONE_PTRACE In the change in commit 09a05394fe2448a4139b014936330af23fa7ec83, I overlooked two nits in the logic and this broke using CLONE_PTRACE when PTRACE_O_TRACE* are not being used. A parent that is itself traced at all but not using PTRACE_O_TRACE*, using CLONE_PTRACE would have its new child fail to be traced. A parent that is not itself traced at all that uses CLONE_PTRACE (which should be a no-op in this case) would confuse the bookkeeping and lead to a crash at exit time. This restores the missing checks and fixes both failure modes. Reported-by: Eduardo Habkost Signed-off-by: Roland McGrath --- include/linux/ptrace.h | 2 +- include/linux/tracehook.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index fd31756e1a00..ea7416c901d1 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -172,7 +172,7 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) child->ptrace = 0; if (unlikely(ptrace)) { child->ptrace = current->ptrace; - __ptrace_link(child, current->parent); + ptrace_link(child, current->parent); } } diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index ab3ef7aefa95..b48d81969574 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -280,7 +280,7 @@ static inline void tracehook_report_clone(int trace, struct pt_regs *regs, unsigned long clone_flags, pid_t pid, struct task_struct *child) { - if (unlikely(trace)) { + if (unlikely(trace) || unlikely(clone_flags & CLONE_PTRACE)) { /* * The child starts up with an immediate SIGSTOP. */ -- cgit v1.2.3 From 2727f226a65e034f93846def7fab314dee430df3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Aug 2008 15:13:27 +0100 Subject: [ARM] fix pnx4008 build errors include/linux/i2c-pnx.h was missed when moving the include files. Fix it now; it doesn't really need to include mach/i2c.h at all. Successfully build tested with pnx4008_defconfig, which had failed in linux-next. Signed-off-by: Russell King --- arch/arm/mach-pnx4008/include/mach/i2c.h | 3 --- include/linux/i2c-pnx.h | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-pnx4008/include/mach/i2c.h b/arch/arm/mach-pnx4008/include/mach/i2c.h index 92e8d65006f7..259ac53abf40 100644 --- a/arch/arm/mach-pnx4008/include/mach/i2c.h +++ b/arch/arm/mach-pnx4008/include/mach/i2c.h @@ -12,9 +12,6 @@ #ifndef __ASM_ARCH_I2C_H__ #define __ASM_ARCH_I2C_H__ -#include -#include - enum { mstatus_tdi = 0x00000001, mstatus_afi = 0x00000002, diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h index e6e9c814da61..f13255e06406 100644 --- a/include/linux/i2c-pnx.h +++ b/include/linux/i2c-pnx.h @@ -12,7 +12,9 @@ #ifndef __I2C_PNX_H__ #define __I2C_PNX_H__ -#include +#include + +struct platform_device; struct i2c_pnx_mif { int ret; /* Return value */ -- cgit v1.2.3 From 6724cce8fb4b408ae1a2fab455050f3407c80144 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 8 Aug 2008 13:56:20 -0700 Subject: list.h: fix fatal kernel-doc error Fix fatal multi-line kernel-doc error in list.h: function short description must be on one line. Error(linux-2.6.27-rc2-git3//include/linux/list.h:318): duplicate section name 'Description' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- include/linux/list.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 1d109e2ef0a9..db35ef02e745 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -309,11 +309,11 @@ static inline void list_splice_init(struct list_head *list, } /** - * list_splice_tail_init - join two lists, each list being a queue, and - * reinitialise the emptied list. + * list_splice_tail_init - join two lists and reinitialise the emptied list * @list: the new list to add. * @head: the place to add it in the first list. * + * Each of the lists is a queue. * The list at @list is reinitialised */ static inline void list_splice_tail_init(struct list_head *list, -- cgit v1.2.3