From 1813a68457bb45b378d5bbec615b167deff3bcfc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 20 Jul 2010 15:22:54 -0700 Subject: x86: Move alloc_desk_mask variables inside ifdef They are only useful with CONFIG_CPUMASK_OFFSTACK Avoids hundreds of warnings with a gcc 4.6 -Wall build. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index c03243ad84b4..fff1d77c3753 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -439,12 +439,12 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); static inline bool alloc_desc_masks(struct irq_desc *desc, int node, bool boot) { +#ifdef CONFIG_CPUMASK_OFFSTACK gfp_t gfp = GFP_ATOMIC; if (boot) gfp = GFP_NOWAIT; -#ifdef CONFIG_CPUMASK_OFFSTACK if (!alloc_cpumask_var_node(&desc->affinity, gfp, node)) return false; -- cgit v1.2.3 From e3239ff92a17976ac5d26fa0fe40ef3a9daf2523 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 14:06:41 +1000 Subject: memblock: Rename memblock_region to memblock_type and memblock_property to memblock_region Signed-off-by: Benjamin Herrenschmidt --- arch/arm/mm/init.c | 2 +- arch/arm/plat-omap/fb.c | 2 +- arch/microblaze/mm/init.c | 4 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/mem.c | 26 ++--- arch/powerpc/platforms/embedded6xx/wii.c | 2 +- arch/sparc/mm/init_64.c | 6 +- drivers/video/omap2/vram.c | 2 +- include/linux/memblock.h | 24 ++--- mm/memblock.c | 168 +++++++++++++++---------------- 10 files changed, 118 insertions(+), 120 deletions(-) (limited to 'include') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 7185b00650fe..d1496e65dc2d 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -237,7 +237,7 @@ static void __init arm_bootmem_free(struct meminfo *mi, unsigned long min, #ifndef CONFIG_SPARSEMEM int pfn_valid(unsigned long pfn) { - struct memblock_region *mem = &memblock.memory; + struct memblock_type *mem = &memblock.memory; unsigned int left = 0, right = mem->cnt; do { diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c index 0054b9501a53..05bf22827404 100644 --- a/arch/arm/plat-omap/fb.c +++ b/arch/arm/plat-omap/fb.c @@ -173,7 +173,7 @@ static int check_fbmem_region(int region_idx, struct omapfb_mem_region *rg, static int valid_sdram(unsigned long addr, unsigned long size) { - struct memblock_property res; + struct memblock_region res; res.base = addr; res.size = size; diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index db5934989926..afd6494fbbc6 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -77,8 +77,8 @@ void __init setup_memory(void) /* Find main memory where is the kernel */ for (i = 0; i < memblock.memory.cnt; i++) { - memory_start = (u32) memblock.memory.region[i].base; - memory_end = (u32) memblock.memory.region[i].base + memory_start = (u32) memblock.memory.regions[i].base; + memory_end = (u32) memblock.memory.regions[i].base + (u32) memblock.memory.region[i].size; if ((memory_start <= (u32)_text) && ((u32)_text <= memory_end)) { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 09dffe6efa46..b1a3784744db 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -660,7 +660,7 @@ static void __init htab_initialize(void) /* create bolted the linear mapping in the hash table */ for (i=0; i < memblock.memory.cnt; i++) { - base = (unsigned long)__va(memblock.memory.region[i].base); + base = (unsigned long)__va(memblock.memory.regions[i].base); size = memblock.memory.region[i].size; DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1a84a8d00005..a33f5c186fb7 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -86,10 +86,10 @@ int page_is_ram(unsigned long pfn) for (i=0; i < memblock.memory.cnt; i++) { unsigned long base; - base = memblock.memory.region[i].base; + base = memblock.memory.regions[i].base; if ((paddr >= base) && - (paddr < (base + memblock.memory.region[i].size))) { + (paddr < (base + memblock.memory.regions[i].size))) { return 1; } } @@ -149,7 +149,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) { - struct memblock_property res; + struct memblock_region res; unsigned long pfn, len; u64 end; int ret = -1; @@ -206,7 +206,7 @@ void __init do_init_bootmem(void) /* Add active regions with valid PFNs */ for (i = 0; i < memblock.memory.cnt; i++) { unsigned long start_pfn, end_pfn; - start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; + start_pfn = memblock.memory.regions[i].base >> PAGE_SHIFT; end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); add_active_range(0, start_pfn, end_pfn); } @@ -219,16 +219,16 @@ void __init do_init_bootmem(void) /* reserve the sections we're already using */ for (i = 0; i < memblock.reserved.cnt; i++) { - unsigned long addr = memblock.reserved.region[i].base + + unsigned long addr = memblock.reserved.regions[i].base + memblock_size_bytes(&memblock.reserved, i) - 1; if (addr < lowmem_end_addr) - reserve_bootmem(memblock.reserved.region[i].base, + reserve_bootmem(memblock.reserved.regions[i].base, memblock_size_bytes(&memblock.reserved, i), BOOTMEM_DEFAULT); - else if (memblock.reserved.region[i].base < lowmem_end_addr) { + else if (memblock.reserved.regions[i].base < lowmem_end_addr) { unsigned long adjusted_size = lowmem_end_addr - - memblock.reserved.region[i].base; - reserve_bootmem(memblock.reserved.region[i].base, + memblock.reserved.regions[i].base; + reserve_bootmem(memblock.reserved.regions[i].base, adjusted_size, BOOTMEM_DEFAULT); } } @@ -237,7 +237,7 @@ void __init do_init_bootmem(void) /* reserve the sections we're already using */ for (i = 0; i < memblock.reserved.cnt; i++) - reserve_bootmem(memblock.reserved.region[i].base, + reserve_bootmem(memblock.reserved.regions[i].base, memblock_size_bytes(&memblock.reserved, i), BOOTMEM_DEFAULT); @@ -257,10 +257,10 @@ static int __init mark_nonram_nosave(void) for (i = 0; i < memblock.memory.cnt - 1; i++) { memblock_region_max_pfn = - (memblock.memory.region[i].base >> PAGE_SHIFT) + - (memblock.memory.region[i].size >> PAGE_SHIFT); + (memblock.memory.regions[i].base >> PAGE_SHIFT) + + (memblock.memory.regions[i].size >> PAGE_SHIFT); memblock_next_region_start_pfn = - memblock.memory.region[i+1].base >> PAGE_SHIFT; + memblock.memory.regions[i+1].base >> PAGE_SHIFT; if (memblock_region_max_pfn < memblock_next_region_start_pfn) register_nosave_region(memblock_region_max_pfn, diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 5cdcc7c8d973..8450c29e9b2f 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -65,7 +65,7 @@ static int __init page_aligned(unsigned long x) void __init wii_memory_fixups(void) { - struct memblock_property *p = memblock.memory.region; + struct memblock_region *p = memblock.memory.region; /* * This is part of a workaround to allow the use of two diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index f0434513df15..16d8bee889ba 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -978,7 +978,7 @@ static void __init add_node_ranges(void) unsigned long size = memblock_size_bytes(&memblock.memory, i); unsigned long start, end; - start = memblock.memory.region[i].base; + start = memblock.memory.regions[i].base; end = start + size; while (start < end) { unsigned long this_end; @@ -1299,7 +1299,7 @@ static void __init bootmem_init_nonnuma(void) if (!size) continue; - start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; + start_pfn = memblock.memory.regions[i].base >> PAGE_SHIFT; end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); add_active_range(0, start_pfn, end_pfn); } @@ -1339,7 +1339,7 @@ static void __init trim_reserved_in_node(int nid) numadbg(" trim_reserved_in_node(%d)\n", nid); for (i = 0; i < memblock.reserved.cnt; i++) { - unsigned long start = memblock.reserved.region[i].base; + unsigned long start = memblock.reserved.regions[i].base; unsigned long size = memblock_size_bytes(&memblock.reserved, i); unsigned long end = start + size; diff --git a/drivers/video/omap2/vram.c b/drivers/video/omap2/vram.c index f6fdc2085f3e..0f2532bf0f04 100644 --- a/drivers/video/omap2/vram.c +++ b/drivers/video/omap2/vram.c @@ -554,7 +554,7 @@ void __init omap_vram_reserve_sdram_memblock(void) size = PAGE_ALIGN(size); if (paddr) { - struct memblock_property res; + struct memblock_region res; res.base = paddr; res.size = size; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index a59faf2b5edd..86e7daf742f2 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -18,22 +18,22 @@ #define MAX_MEMBLOCK_REGIONS 128 -struct memblock_property { +struct memblock_region { u64 base; u64 size; }; -struct memblock_region { +struct memblock_type { unsigned long cnt; u64 size; - struct memblock_property region[MAX_MEMBLOCK_REGIONS+1]; + struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1]; }; struct memblock { unsigned long debug; u64 rmo_size; - struct memblock_region memory; - struct memblock_region reserved; + struct memblock_type memory; + struct memblock_type reserved; }; extern struct memblock memblock; @@ -56,27 +56,27 @@ extern u64 memblock_end_of_DRAM(void); extern void __init memblock_enforce_memory_limit(u64 memory_limit); extern int __init memblock_is_reserved(u64 addr); extern int memblock_is_region_reserved(u64 base, u64 size); -extern int memblock_find(struct memblock_property *res); +extern int memblock_find(struct memblock_region *res); extern void memblock_dump_all(void); static inline u64 -memblock_size_bytes(struct memblock_region *type, unsigned long region_nr) +memblock_size_bytes(struct memblock_type *type, unsigned long region_nr) { - return type->region[region_nr].size; + return type->regions[region_nr].size; } static inline u64 -memblock_size_pages(struct memblock_region *type, unsigned long region_nr) +memblock_size_pages(struct memblock_type *type, unsigned long region_nr) { return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT; } static inline u64 -memblock_start_pfn(struct memblock_region *type, unsigned long region_nr) +memblock_start_pfn(struct memblock_type *type, unsigned long region_nr) { - return type->region[region_nr].base >> PAGE_SHIFT; + return type->regions[region_nr].base >> PAGE_SHIFT; } static inline u64 -memblock_end_pfn(struct memblock_region *type, unsigned long region_nr) +memblock_end_pfn(struct memblock_type *type, unsigned long region_nr) { return memblock_start_pfn(type, region_nr) + memblock_size_pages(type, region_nr); diff --git a/mm/memblock.c b/mm/memblock.c index 43840b305ecb..6f407ccf604e 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -29,7 +29,7 @@ static int __init early_memblock(char *p) } early_param("memblock", early_memblock); -static void memblock_dump(struct memblock_region *region, char *name) +static void memblock_dump(struct memblock_type *region, char *name) { unsigned long long base, size; int i; @@ -37,8 +37,8 @@ static void memblock_dump(struct memblock_region *region, char *name) pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); for (i = 0; i < region->cnt; i++) { - base = region->region[i].base; - size = region->region[i].size; + base = region->regions[i].base; + size = region->regions[i].size; pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n", name, i, base, base + size - 1, size); @@ -74,34 +74,34 @@ static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) return 0; } -static long memblock_regions_adjacent(struct memblock_region *rgn, +static long memblock_regions_adjacent(struct memblock_type *type, unsigned long r1, unsigned long r2) { - u64 base1 = rgn->region[r1].base; - u64 size1 = rgn->region[r1].size; - u64 base2 = rgn->region[r2].base; - u64 size2 = rgn->region[r2].size; + u64 base1 = type->regions[r1].base; + u64 size1 = type->regions[r1].size; + u64 base2 = type->regions[r2].base; + u64 size2 = type->regions[r2].size; return memblock_addrs_adjacent(base1, size1, base2, size2); } -static void memblock_remove_region(struct memblock_region *rgn, unsigned long r) +static void memblock_remove_region(struct memblock_type *type, unsigned long r) { unsigned long i; - for (i = r; i < rgn->cnt - 1; i++) { - rgn->region[i].base = rgn->region[i + 1].base; - rgn->region[i].size = rgn->region[i + 1].size; + for (i = r; i < type->cnt - 1; i++) { + type->regions[i].base = type->regions[i + 1].base; + type->regions[i].size = type->regions[i + 1].size; } - rgn->cnt--; + type->cnt--; } /* Assumption: base addr of region 1 < base addr of region 2 */ -static void memblock_coalesce_regions(struct memblock_region *rgn, +static void memblock_coalesce_regions(struct memblock_type *type, unsigned long r1, unsigned long r2) { - rgn->region[r1].size += rgn->region[r2].size; - memblock_remove_region(rgn, r2); + type->regions[r1].size += type->regions[r2].size; + memblock_remove_region(type, r2); } void __init memblock_init(void) @@ -109,13 +109,13 @@ void __init memblock_init(void) /* Create a dummy zero size MEMBLOCK which will get coalesced away later. * This simplifies the memblock_add() code below... */ - memblock.memory.region[0].base = 0; - memblock.memory.region[0].size = 0; + memblock.memory.regions[0].base = 0; + memblock.memory.regions[0].size = 0; memblock.memory.cnt = 1; /* Ditto. */ - memblock.reserved.region[0].base = 0; - memblock.reserved.region[0].size = 0; + memblock.reserved.regions[0].base = 0; + memblock.reserved.regions[0].size = 0; memblock.reserved.cnt = 1; } @@ -126,24 +126,24 @@ void __init memblock_analyze(void) memblock.memory.size = 0; for (i = 0; i < memblock.memory.cnt; i++) - memblock.memory.size += memblock.memory.region[i].size; + memblock.memory.size += memblock.memory.regions[i].size; } -static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size) +static long memblock_add_region(struct memblock_type *type, u64 base, u64 size) { unsigned long coalesced = 0; long adjacent, i; - if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) { - rgn->region[0].base = base; - rgn->region[0].size = size; + if ((type->cnt == 1) && (type->regions[0].size == 0)) { + type->regions[0].base = base; + type->regions[0].size = size; return 0; } /* First try and coalesce this MEMBLOCK with another. */ - for (i = 0; i < rgn->cnt; i++) { - u64 rgnbase = rgn->region[i].base; - u64 rgnsize = rgn->region[i].size; + for (i = 0; i < type->cnt; i++) { + u64 rgnbase = type->regions[i].base; + u64 rgnsize = type->regions[i].size; if ((rgnbase == base) && (rgnsize == size)) /* Already have this region, so we're done */ @@ -151,61 +151,59 @@ static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size) adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize); if (adjacent > 0) { - rgn->region[i].base -= size; - rgn->region[i].size += size; + type->regions[i].base -= size; + type->regions[i].size += size; coalesced++; break; } else if (adjacent < 0) { - rgn->region[i].size += size; + type->regions[i].size += size; coalesced++; break; } } - if ((i < rgn->cnt - 1) && memblock_regions_adjacent(rgn, i, i+1)) { - memblock_coalesce_regions(rgn, i, i+1); + if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1)) { + memblock_coalesce_regions(type, i, i+1); coalesced++; } if (coalesced) return coalesced; - if (rgn->cnt >= MAX_MEMBLOCK_REGIONS) + if (type->cnt >= MAX_MEMBLOCK_REGIONS) return -1; /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */ - for (i = rgn->cnt - 1; i >= 0; i--) { - if (base < rgn->region[i].base) { - rgn->region[i+1].base = rgn->region[i].base; - rgn->region[i+1].size = rgn->region[i].size; + for (i = type->cnt - 1; i >= 0; i--) { + if (base < type->regions[i].base) { + type->regions[i+1].base = type->regions[i].base; + type->regions[i+1].size = type->regions[i].size; } else { - rgn->region[i+1].base = base; - rgn->region[i+1].size = size; + type->regions[i+1].base = base; + type->regions[i+1].size = size; break; } } - if (base < rgn->region[0].base) { - rgn->region[0].base = base; - rgn->region[0].size = size; + if (base < type->regions[0].base) { + type->regions[0].base = base; + type->regions[0].size = size; } - rgn->cnt++; + type->cnt++; return 0; } long memblock_add(u64 base, u64 size) { - struct memblock_region *_rgn = &memblock.memory; - /* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */ if (base == 0) memblock.rmo_size = size; - return memblock_add_region(_rgn, base, size); + return memblock_add_region(&memblock.memory, base, size); } -static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size) +static long __memblock_remove(struct memblock_type *type, u64 base, u64 size) { u64 rgnbegin, rgnend; u64 end = base + size; @@ -214,34 +212,34 @@ static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size) rgnbegin = rgnend = 0; /* supress gcc warnings */ /* Find the region where (base, size) belongs to */ - for (i=0; i < rgn->cnt; i++) { - rgnbegin = rgn->region[i].base; - rgnend = rgnbegin + rgn->region[i].size; + for (i=0; i < type->cnt; i++) { + rgnbegin = type->regions[i].base; + rgnend = rgnbegin + type->regions[i].size; if ((rgnbegin <= base) && (end <= rgnend)) break; } /* Didn't find the region */ - if (i == rgn->cnt) + if (i == type->cnt) return -1; /* Check to see if we are removing entire region */ if ((rgnbegin == base) && (rgnend == end)) { - memblock_remove_region(rgn, i); + memblock_remove_region(type, i); return 0; } /* Check to see if region is matching at the front */ if (rgnbegin == base) { - rgn->region[i].base = end; - rgn->region[i].size -= size; + type->regions[i].base = end; + type->regions[i].size -= size; return 0; } /* Check to see if the region is matching at the end */ if (rgnend == end) { - rgn->region[i].size -= size; + type->regions[i].size -= size; return 0; } @@ -249,8 +247,8 @@ static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size) * We need to split the entry - adjust the current one to the * beginging of the hole and add the region after hole. */ - rgn->region[i].size = base - rgn->region[i].base; - return memblock_add_region(rgn, end, rgnend - end); + type->regions[i].size = base - type->regions[i].base; + return memblock_add_region(type, end, rgnend - end); } long memblock_remove(u64 base, u64 size) @@ -265,25 +263,25 @@ long __init memblock_free(u64 base, u64 size) long __init memblock_reserve(u64 base, u64 size) { - struct memblock_region *_rgn = &memblock.reserved; + struct memblock_type *_rgn = &memblock.reserved; BUG_ON(0 == size); return memblock_add_region(_rgn, base, size); } -long memblock_overlaps_region(struct memblock_region *rgn, u64 base, u64 size) +long memblock_overlaps_region(struct memblock_type *type, u64 base, u64 size) { unsigned long i; - for (i = 0; i < rgn->cnt; i++) { - u64 rgnbase = rgn->region[i].base; - u64 rgnsize = rgn->region[i].size; + for (i = 0; i < type->cnt; i++) { + u64 rgnbase = type->regions[i].base; + u64 rgnsize = type->regions[i].size; if (memblock_addrs_overlap(base, size, rgnbase, rgnsize)) break; } - return (i < rgn->cnt) ? i : -1; + return (i < type->cnt) ? i : -1; } static u64 memblock_align_down(u64 addr, u64 size) @@ -311,7 +309,7 @@ static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end, base = ~(u64)0; return base; } - res_base = memblock.reserved.region[j].base; + res_base = memblock.reserved.regions[j].base; if (res_base < size) break; base = memblock_align_down(res_base - size, align); @@ -320,7 +318,7 @@ static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end, return ~(u64)0; } -static u64 __init memblock_alloc_nid_region(struct memblock_property *mp, +static u64 __init memblock_alloc_nid_region(struct memblock_region *mp, u64 (*nid_range)(u64, u64, int *), u64 size, u64 align, int nid) { @@ -350,7 +348,7 @@ static u64 __init memblock_alloc_nid_region(struct memblock_property *mp, u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, u64 (*nid_range)(u64 start, u64 end, int *nid)) { - struct memblock_region *mem = &memblock.memory; + struct memblock_type *mem = &memblock.memory; int i; BUG_ON(0 == size); @@ -358,7 +356,7 @@ u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, size = memblock_align_up(size, align); for (i = 0; i < mem->cnt; i++) { - u64 ret = memblock_alloc_nid_region(&mem->region[i], + u64 ret = memblock_alloc_nid_region(&mem->regions[i], nid_range, size, align, nid); if (ret != ~(u64)0) @@ -402,8 +400,8 @@ u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr) max_addr = MEMBLOCK_REAL_LIMIT; for (i = memblock.memory.cnt - 1; i >= 0; i--) { - u64 memblockbase = memblock.memory.region[i].base; - u64 memblocksize = memblock.memory.region[i].size; + u64 memblockbase = memblock.memory.regions[i].base; + u64 memblocksize = memblock.memory.regions[i].size; if (memblocksize < size) continue; @@ -423,7 +421,7 @@ u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr) return 0; return base; } - res_base = memblock.reserved.region[j].base; + res_base = memblock.reserved.regions[j].base; if (res_base < size) break; base = memblock_align_down(res_base - size, align); @@ -442,7 +440,7 @@ u64 memblock_end_of_DRAM(void) { int idx = memblock.memory.cnt - 1; - return (memblock.memory.region[idx].base + memblock.memory.region[idx].size); + return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); } /* You must call memblock_analyze() after this. */ @@ -450,7 +448,7 @@ void __init memblock_enforce_memory_limit(u64 memory_limit) { unsigned long i; u64 limit; - struct memblock_property *p; + struct memblock_region *p; if (!memory_limit) return; @@ -458,24 +456,24 @@ void __init memblock_enforce_memory_limit(u64 memory_limit) /* Truncate the memblock regions to satisfy the memory limit. */ limit = memory_limit; for (i = 0; i < memblock.memory.cnt; i++) { - if (limit > memblock.memory.region[i].size) { - limit -= memblock.memory.region[i].size; + if (limit > memblock.memory.regions[i].size) { + limit -= memblock.memory.regions[i].size; continue; } - memblock.memory.region[i].size = limit; + memblock.memory.regions[i].size = limit; memblock.memory.cnt = i + 1; break; } - if (memblock.memory.region[0].size < memblock.rmo_size) - memblock.rmo_size = memblock.memory.region[0].size; + if (memblock.memory.regions[0].size < memblock.rmo_size) + memblock.rmo_size = memblock.memory.regions[0].size; memory_limit = memblock_end_of_DRAM(); /* And truncate any reserves above the limit also. */ for (i = 0; i < memblock.reserved.cnt; i++) { - p = &memblock.reserved.region[i]; + p = &memblock.reserved.regions[i]; if (p->base > memory_limit) p->size = 0; @@ -494,9 +492,9 @@ int __init memblock_is_reserved(u64 addr) int i; for (i = 0; i < memblock.reserved.cnt; i++) { - u64 upper = memblock.reserved.region[i].base + - memblock.reserved.region[i].size - 1; - if ((addr >= memblock.reserved.region[i].base) && (addr <= upper)) + u64 upper = memblock.reserved.regions[i].base + + memblock.reserved.regions[i].size - 1; + if ((addr >= memblock.reserved.regions[i].base) && (addr <= upper)) return 1; } return 0; @@ -511,7 +509,7 @@ int memblock_is_region_reserved(u64 base, u64 size) * Given a , find which memory regions belong to this range. * Adjust the request and return a contiguous chunk. */ -int memblock_find(struct memblock_property *res) +int memblock_find(struct memblock_region *res) { int i; u64 rstart, rend; @@ -520,8 +518,8 @@ int memblock_find(struct memblock_property *res) rend = rstart + res->size - 1; for (i = 0; i < memblock.memory.cnt; i++) { - u64 start = memblock.memory.region[i].base; - u64 end = start + memblock.memory.region[i].size - 1; + u64 start = memblock.memory.regions[i].base; + u64 end = start + memblock.memory.regions[i].size - 1; if (start > rend) return -1; -- cgit v1.2.3 From 411a25a80da328f5ae6b6c037872ffe867fcc130 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:38:56 -0700 Subject: memblock: No reason to include asm/memblock.h late Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 86e7daf742f2..4b6931327b22 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -16,6 +16,8 @@ #include #include +#include + #define MAX_MEMBLOCK_REGIONS 128 struct memblock_region { @@ -82,8 +84,6 @@ memblock_end_pfn(struct memblock_type *type, unsigned long region_nr) memblock_size_pages(type, region_nr); } -#include - #endif /* __KERNEL__ */ #endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.2.3 From 72d4b0b4e0e7fa858767e03972771a9f7c02b689 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 14:38:47 +1000 Subject: memblock: Implement memblock_is_memory and memblock_is_region_memory To make it fast, we steal ARM's binary search for memblock_is_memory() and we use that to also the replace existing implementation of memblock_is_reserved(). Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 ++ mm/memblock.c | 42 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 4b6931327b22..47bceb187058 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -56,6 +56,8 @@ extern u64 __init __memblock_alloc_base(u64 size, extern u64 __init memblock_phys_mem_size(void); extern u64 memblock_end_of_DRAM(void); extern void __init memblock_enforce_memory_limit(u64 memory_limit); +extern int memblock_is_memory(u64 addr); +extern int memblock_is_region_memory(u64 base, u64 size); extern int __init memblock_is_reserved(u64 addr); extern int memblock_is_region_reserved(u64 base, u64 size); extern int memblock_find(struct memblock_region *res); diff --git a/mm/memblock.c b/mm/memblock.c index 6f407ccf604e..aa88c62bce7f 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -487,17 +487,43 @@ void __init memblock_enforce_memory_limit(u64 memory_limit) } } +static int memblock_search(struct memblock_type *type, u64 addr) +{ + unsigned int left = 0, right = type->cnt; + + do { + unsigned int mid = (right + left) / 2; + + if (addr < type->regions[mid].base) + right = mid; + else if (addr >= (type->regions[mid].base + + type->regions[mid].size)) + left = mid + 1; + else + return mid; + } while (left < right); + return -1; +} + int __init memblock_is_reserved(u64 addr) { - int i; + return memblock_search(&memblock.reserved, addr) != -1; +} - for (i = 0; i < memblock.reserved.cnt; i++) { - u64 upper = memblock.reserved.regions[i].base + - memblock.reserved.regions[i].size - 1; - if ((addr >= memblock.reserved.regions[i].base) && (addr <= upper)) - return 1; - } - return 0; +int memblock_is_memory(u64 addr) +{ + return memblock_search(&memblock.memory, addr) != -1; +} + +int memblock_is_region_memory(u64 base, u64 size) +{ + int idx = memblock_search(&memblock.reserved, base); + + if (idx == -1) + return 0; + return memblock.reserved.regions[idx].base <= base && + (memblock.reserved.regions[idx].base + + memblock.reserved.regions[idx].size) >= (base + size); } int memblock_is_region_reserved(u64 base, u64 size) -- cgit v1.2.3 From 5b385f259fa4d356452e3b4729cbaf5213f4f55b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 13:40:38 +1000 Subject: memblock: Introduce for_each_memblock() and new accessors Walk memblock's using for_each_memblock() and use memblock_region_base/end_pfn() for getting to PFNs. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 47bceb187058..c914112cd24f 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -64,6 +64,7 @@ extern int memblock_find(struct memblock_region *res); extern void memblock_dump_all(void); +/* Obsolete accessors */ static inline u64 memblock_size_bytes(struct memblock_type *type, unsigned long region_nr) { @@ -86,6 +87,57 @@ memblock_end_pfn(struct memblock_type *type, unsigned long region_nr) memblock_size_pages(type, region_nr); } +/* + * pfn conversion functions + * + * While the memory MEMBLOCKs should always be page aligned, the reserved + * MEMBLOCKs may not be. This accessor attempt to provide a very clear + * idea of what they return for such non aligned MEMBLOCKs. + */ + +/** + * memblock_region_base_pfn - Return the lowest pfn intersecting with the region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_base_pfn(const struct memblock_region *reg) +{ + return reg->base >> PAGE_SHIFT; +} + +/** + * memblock_region_last_pfn - Return the highest pfn intersecting with the region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_last_pfn(const struct memblock_region *reg) +{ + return (reg->base + reg->size - 1) >> PAGE_SHIFT; +} + +/** + * memblock_region_end_pfn - Return the pfn of the first page following the region + * but not intersecting it + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_end_pfn(const struct memblock_region *reg) +{ + return memblock_region_last_pfn(reg) + 1; +} + +/** + * memblock_region_pages - Return the number of pages covering a region + * @reg: memblock_region structure + */ +static inline unsigned long memblock_region_pages(const struct memblock_region *reg) +{ + return memblock_region_end_pfn(reg) - memblock_region_end_pfn(reg); +} + +#define for_each_memblock(memblock_type, region) \ + for (region = memblock.memblock_type.regions; \ + region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ + region++) + + #endif /* __KERNEL__ */ #endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.2.3 From 1e2b904026e9debf95f500b8980a00c43ac0f31c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 13:52:25 +1000 Subject: memblock: Remove obsolete accessors Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c914112cd24f..7d70fdd43db4 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -64,29 +64,6 @@ extern int memblock_find(struct memblock_region *res); extern void memblock_dump_all(void); -/* Obsolete accessors */ -static inline u64 -memblock_size_bytes(struct memblock_type *type, unsigned long region_nr) -{ - return type->regions[region_nr].size; -} -static inline u64 -memblock_size_pages(struct memblock_type *type, unsigned long region_nr) -{ - return memblock_size_bytes(type, region_nr) >> PAGE_SHIFT; -} -static inline u64 -memblock_start_pfn(struct memblock_type *type, unsigned long region_nr) -{ - return type->regions[region_nr].base >> PAGE_SHIFT; -} -static inline u64 -memblock_end_pfn(struct memblock_type *type, unsigned long region_nr) -{ - return memblock_start_pfn(type, region_nr) + - memblock_size_pages(type, region_nr); -} - /* * pfn conversion functions * -- cgit v1.2.3 From b693fffb189fbfe7e1e8317ce5838808be8666a0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 13:52:55 +1000 Subject: memblock: Remove memblock_find() Nobody uses it anymore. It's semantics were ... weird Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 1 - mm/memblock.c | 32 -------------------------------- 2 files changed, 33 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 7d70fdd43db4..776c7d945dcc 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -60,7 +60,6 @@ extern int memblock_is_memory(u64 addr); extern int memblock_is_region_memory(u64 base, u64 size); extern int __init memblock_is_reserved(u64 addr); extern int memblock_is_region_reserved(u64 base, u64 size); -extern int memblock_find(struct memblock_region *res); extern void memblock_dump_all(void); diff --git a/mm/memblock.c b/mm/memblock.c index aa88c62bce7f..8a118b71cbec 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -531,35 +531,3 @@ int memblock_is_region_reserved(u64 base, u64 size) return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } -/* - * Given a , find which memory regions belong to this range. - * Adjust the request and return a contiguous chunk. - */ -int memblock_find(struct memblock_region *res) -{ - int i; - u64 rstart, rend; - - rstart = res->base; - rend = rstart + res->size - 1; - - for (i = 0; i < memblock.memory.cnt; i++) { - u64 start = memblock.memory.regions[i].base; - u64 end = start + memblock.memory.regions[i].size - 1; - - if (start > rend) - return -1; - - if ((end >= rstart) && (start < rend)) { - /* adjust the request */ - if (rstart < start) - rstart = start; - if (rend > end) - rend = end; - res->base = rstart; - res->size = rend - rstart + 1; - return 0; - } - } - return -1; -} -- cgit v1.2.3 From 35a1f0bd07015dde66501b47cfb6ddc72ebe7346 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:38:58 -0700 Subject: memblock: Remove nid_range argument, arch provides memblock_nid_range() instead Signed-off-by: Benjamin Herrenschmidt --- arch/sparc/mm/init_64.c | 16 ++++++---------- include/linux/memblock.h | 7 +++++-- mm/memblock.c | 13 ++++++++----- 3 files changed, 19 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index dd68025ecdbe..0883113624b9 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -785,8 +785,7 @@ static int find_node(unsigned long addr) return -1; } -static unsigned long long nid_range(unsigned long long start, - unsigned long long end, int *nid) +u64 memblock_nid_range(u64 start, u64 end, int *nid) { *nid = find_node(start); start += PAGE_SIZE; @@ -804,8 +803,7 @@ static unsigned long long nid_range(unsigned long long start, return start; } #else -static unsigned long long nid_range(unsigned long long start, - unsigned long long end, int *nid) +u64 memblock_nid_range(u64 start, u64 end, int *nid) { *nid = 0; return end; @@ -822,8 +820,7 @@ static void __init allocate_node_data(int nid) struct pglist_data *p; #ifdef CONFIG_NEED_MULTIPLE_NODES - paddr = memblock_alloc_nid(sizeof(struct pglist_data), - SMP_CACHE_BYTES, nid, nid_range); + paddr = memblock_alloc_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid); if (!paddr) { prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); prom_halt(); @@ -843,8 +840,7 @@ static void __init allocate_node_data(int nid) if (p->node_spanned_pages) { num_pages = bootmem_bootmap_pages(p->node_spanned_pages); - paddr = memblock_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid, - nid_range); + paddr = memblock_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid); if (!paddr) { prom_printf("Cannot allocate bootmap for nid[%d]\n", nid); @@ -984,7 +980,7 @@ static void __init add_node_ranges(void) unsigned long this_end; int nid; - this_end = nid_range(start, end, &nid); + this_end = memblock_nid_range(start, end, &nid); numadbg("Adding active range nid[%d] " "start[%lx] end[%lx]\n", @@ -1317,7 +1313,7 @@ static void __init reserve_range_in_node(int nid, unsigned long start, unsigned long this_end; int n; - this_end = nid_range(start, end, &n); + this_end = memblock_nid_range(start, end, &n); if (n == nid) { numadbg(" MATCH reserving range [%lx:%lx]\n", start, this_end); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 776c7d945dcc..367dea6e95a0 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -46,8 +46,7 @@ extern long memblock_add(u64 base, u64 size); extern long memblock_remove(u64 base, u64 size); extern long __init memblock_free(u64 base, u64 size); extern long __init memblock_reserve(u64 base, u64 size); -extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, - u64 (*nid_range)(u64, u64, int *)); +extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid); extern u64 __init memblock_alloc(u64 size, u64 align); extern u64 __init memblock_alloc_base(u64 size, u64, u64 max_addr); @@ -63,6 +62,10 @@ extern int memblock_is_region_reserved(u64 base, u64 size); extern void memblock_dump_all(void); +/* Provided by the architecture */ +extern u64 memblock_nid_range(u64 start, u64 end, int *nid); + + /* * pfn conversion functions * diff --git a/mm/memblock.c b/mm/memblock.c index 8a118b71cbec..13807f280ada 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -319,7 +319,6 @@ static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end, } static u64 __init memblock_alloc_nid_region(struct memblock_region *mp, - u64 (*nid_range)(u64, u64, int *), u64 size, u64 align, int nid) { u64 start, end; @@ -332,7 +331,7 @@ static u64 __init memblock_alloc_nid_region(struct memblock_region *mp, u64 this_end; int this_nid; - this_end = nid_range(start, end, &this_nid); + this_end = memblock_nid_range(start, end, &this_nid); if (this_nid == nid) { u64 ret = memblock_alloc_nid_unreserved(start, this_end, size, align); @@ -345,8 +344,7 @@ static u64 __init memblock_alloc_nid_region(struct memblock_region *mp, return ~(u64)0; } -u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, - u64 (*nid_range)(u64 start, u64 end, int *nid)) +u64 __init memblock_alloc_nid(u64 size, u64 align, int nid) { struct memblock_type *mem = &memblock.memory; int i; @@ -357,7 +355,6 @@ u64 __init memblock_alloc_nid(u64 size, u64 align, int nid, for (i = 0; i < mem->cnt; i++) { u64 ret = memblock_alloc_nid_region(&mem->regions[i], - nid_range, size, align, nid); if (ret != ~(u64)0) return ret; @@ -531,3 +528,9 @@ int memblock_is_region_reserved(u64 base, u64 size) return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } +u64 __weak memblock_nid_range(u64 start, u64 end, int *nid) +{ + *nid = 0; + + return end; +} -- cgit v1.2.3 From 27f574c223d2c09610058b3ec7a29582d63a3e06 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:00 -0700 Subject: memblock: Expose MEMBLOCK_ALLOC_ANYWHERE Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hash_utils_64.c | 2 +- include/linux/memblock.h | 1 + mm/memblock.c | 2 -- 3 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 4072b871497d..a542ff5ec8a9 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -625,7 +625,7 @@ static void __init htab_initialize(void) if (machine_is(cell)) limit = 0x80000000; else - limit = 0; + limit = MEMBLOCK_ALLOC_ANYWHERE; table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 367dea6e95a0..3cf3304e901d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -50,6 +50,7 @@ extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid); extern u64 __init memblock_alloc(u64 size, u64 align); extern u64 __init memblock_alloc_base(u64 size, u64, u64 max_addr); +#define MEMBLOCK_ALLOC_ANYWHERE 0 extern u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr); extern u64 __init memblock_phys_mem_size(void); diff --git a/mm/memblock.c b/mm/memblock.c index e264e8c70892..0131684c42f8 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -15,8 +15,6 @@ #include #include -#define MEMBLOCK_ALLOC_ANYWHERE 0 - struct memblock memblock; static int memblock_debug; -- cgit v1.2.3 From e63075a3c9377536d085bc013cd3fe6323162449 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:01 -0700 Subject: memblock: Introduce default allocation limit and use it to replace explicit ones This introduce memblock.current_limit which is used to limit allocations from memblock_alloc() or memblock_alloc_base(..., MEMBLOCK_ALLOC_ACCESSIBLE). The old MEMBLOCK_ALLOC_ANYWHERE changes value from 0 to ~(u64)0 and can still be used with memblock_alloc_base() to allocate really anywhere. It is -no-longer- cropped to MEMBLOCK_REAL_LIMIT which disappears. Note to archs: I'm leaving the default limit to MEMBLOCK_ALLOC_ANYWHERE. I strongly recommend that you ensure that you set an appropriate limit during boot in order to guarantee that an memblock_alloc() at any time results in something that is accessible with a simple __va(). The reason is that a subsequent patch will introduce the ability for the array to resize itself by reallocating itself. The MEMBLOCK core will honor the current limit when performing those allocations. Signed-off-by: Benjamin Herrenschmidt --- arch/microblaze/include/asm/memblock.h | 3 --- arch/powerpc/include/asm/memblock.h | 7 ------- arch/powerpc/kernel/prom.c | 20 +++++++++++++++++++- arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/mm/40x_mmu.c | 5 +++-- arch/powerpc/mm/fsl_booke_mmu.c | 3 ++- arch/powerpc/mm/hash_utils_64.c | 3 ++- arch/powerpc/mm/init_32.c | 29 +++++++---------------------- arch/powerpc/mm/ppc_mmu_32.c | 3 +-- arch/powerpc/mm/tlb_nohash.c | 2 ++ arch/sh/include/asm/memblock.h | 2 -- arch/sparc/include/asm/memblock.h | 2 -- include/linux/memblock.h | 16 +++++++++++++++- mm/memblock.c | 19 +++++++++++-------- 14 files changed, 63 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/arch/microblaze/include/asm/memblock.h b/arch/microblaze/include/asm/memblock.h index f9c2fa331d2a..20a8e257c77f 100644 --- a/arch/microblaze/include/asm/memblock.h +++ b/arch/microblaze/include/asm/memblock.h @@ -9,9 +9,6 @@ #ifndef _ASM_MICROBLAZE_MEMBLOCK_H #define _ASM_MICROBLAZE_MEMBLOCK_H -/* MEMBLOCK limit is OFF */ -#define MEMBLOCK_REAL_LIMIT 0xFFFFFFFF - #endif /* _ASM_MICROBLAZE_MEMBLOCK_H */ diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h index 3c29728b56b1..43efc345065e 100644 --- a/arch/powerpc/include/asm/memblock.h +++ b/arch/powerpc/include/asm/memblock.h @@ -5,11 +5,4 @@ #define MEMBLOCK_DBG(fmt...) udbg_printf(fmt) -#ifdef CONFIG_PPC32 -extern phys_addr_t lowmem_end_addr; -#define MEMBLOCK_REAL_LIMIT lowmem_end_addr -#else -#define MEMBLOCK_REAL_LIMIT 0 -#endif - #endif /* _ASM_POWERPC_MEMBLOCK_H */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fed9bf6187d1..3aec0b980f6a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -98,7 +98,7 @@ static void __init move_device_tree(void) if ((memory_limit && (start + size) > memory_limit) || overlaps_crashkernel(start, size)) { - p = __va(memblock_alloc_base(size, PAGE_SIZE, memblock.rmo_size)); + p = __va(memblock_alloc(size, PAGE_SIZE)); memcpy(p, initial_boot_params, size); initial_boot_params = (struct boot_param_header *)p; DBG("Moved device tree to 0x%p\n", p); @@ -655,6 +655,21 @@ static void __init phyp_dump_reserve_mem(void) static inline void __init phyp_dump_reserve_mem(void) {} #endif /* CONFIG_PHYP_DUMP && CONFIG_PPC_RTAS */ +static void set_boot_memory_limit(void) +{ +#ifdef CONFIG_PPC32 + /* 601 can only access 16MB at the moment */ + if (PVR_VER(mfspr(SPRN_PVR)) == 1) + memblock_set_current_limit(0x01000000); + /* 8xx can only access 8MB at the moment */ + else if (PVR_VER(mfspr(SPRN_PVR)) == 0x50) + memblock_set_current_limit(0x00800000); + else + memblock_set_current_limit(0x10000000); +#else + memblock_set_current_limit(memblock.rmo_size); +#endif +} void __init early_init_devtree(void *params) { @@ -683,6 +698,7 @@ void __init early_init_devtree(void *params) /* Scan memory nodes and rebuild MEMBLOCKs */ memblock_init(); + of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); @@ -718,6 +734,8 @@ void __init early_init_devtree(void *params) DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); + set_boot_memory_limit(); + /* We may need to relocate the flat tree, do it now. * FIXME .. and the initrd too? */ move_device_tree(); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index a10ffc85ada7..b7eb1ded3b5f 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -246,7 +246,7 @@ static void __init irqstack_early_init(void) unsigned int i; /* interrupt stacks must be in lowmem, we get that for free on ppc32 - * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */ + * as the memblock is limited to lowmem by default */ for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 1dc2fa5ce1bd..58969b51f454 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -47,6 +48,7 @@ #include #include #include + #include "mmu_decl.h" extern int __map_without_ltlbs; @@ -139,8 +141,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top) * coverage with normal-sized pages (or other reasons) do not * attempt to allocate outside the allowed range. */ - - __initial_memory_limit_addr = memstart_addr + mapped; + memblock_set_current_limit(memstart_addr + mapped); return mapped; } diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index cdc7526e9c93..e525f862d759 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -212,5 +213,5 @@ void __init adjust_total_lowmem(void) pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20, (unsigned int)((total_lowmem - __max_low_memory) >> 20)); - __initial_memory_limit_addr = memstart_addr + __max_low_memory; + memblock_set_current_limit(memstart_addr + __max_low_memory); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a542ff5ec8a9..b05890e23813 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -696,7 +696,8 @@ static void __init htab_initialize(void) #endif /* CONFIG_U3_DART */ BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), prot, mmu_linear_psize, mmu_kernel_ssize)); - } + } + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); /* * If we have a memory_limit and we've allocated TCEs then we need to diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 6a6975dc2654..59b208b7ec6f 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -91,12 +91,6 @@ int __allow_ioremap_reserved; /* max amount of low RAM to map in */ unsigned long __max_low_memory = MAX_LOW_MEM; -/* - * address of the limit of what is accessible with initial MMU setup - - * 256MB usually, but only 16MB on 601. - */ -phys_addr_t __initial_memory_limit_addr = (phys_addr_t)0x10000000; - /* * Check for command-line options that affect what MMU_init will do. */ @@ -126,13 +120,6 @@ void __init MMU_init(void) if (ppc_md.progress) ppc_md.progress("MMU:enter", 0x111); - /* 601 can only access 16MB at the moment */ - if (PVR_VER(mfspr(SPRN_PVR)) == 1) - __initial_memory_limit_addr = 0x01000000; - /* 8xx can only access 8MB at the moment */ - if (PVR_VER(mfspr(SPRN_PVR)) == 0x50) - __initial_memory_limit_addr = 0x00800000; - /* parse args from command line */ MMU_setup(); @@ -190,20 +177,18 @@ void __init MMU_init(void) #ifdef CONFIG_BOOTX_TEXT btext_unmap(); #endif + + /* Shortly after that, the entire linear mapping will be available */ + memblock_set_current_limit(lowmem_end_addr); } /* This is only called until mem_init is done. */ void __init *early_get_page(void) { - void *p; - - if (init_bootmem_done) { - p = alloc_bootmem_pages(PAGE_SIZE); - } else { - p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, - __initial_memory_limit_addr)); - } - return p; + if (init_bootmem_done) + return alloc_bootmem_pages(PAGE_SIZE); + else + return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); } /* Free up now-unused memory */ diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index f8a01829d64f..7d34e170e80f 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -223,8 +223,7 @@ void __init MMU_init_hw(void) * Find some memory for the hash table. */ if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); - Hash = __va(memblock_alloc_base(Hash_size, Hash_size, - __initial_memory_limit_addr)); + Hash = __va(memblock_alloc(Hash_size, Hash_size)); cacheable_memzero(Hash, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index d8695b02a968..7ba32e762990 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -432,6 +432,8 @@ static void __early_init_mmu(int boot_cpu) * the MMU configuration */ mb(); + + memblock_set_current_limit(linear_map_top); } void __init early_init_mmu(void) diff --git a/arch/sh/include/asm/memblock.h b/arch/sh/include/asm/memblock.h index dfe683b88075..e87063fad2ea 100644 --- a/arch/sh/include/asm/memblock.h +++ b/arch/sh/include/asm/memblock.h @@ -1,6 +1,4 @@ #ifndef __ASM_SH_MEMBLOCK_H #define __ASM_SH_MEMBLOCK_H -#define MEMBLOCK_REAL_LIMIT 0 - #endif /* __ASM_SH_MEMBLOCK_H */ diff --git a/arch/sparc/include/asm/memblock.h b/arch/sparc/include/asm/memblock.h index f12af880649b..c67b047ef85e 100644 --- a/arch/sparc/include/asm/memblock.h +++ b/arch/sparc/include/asm/memblock.h @@ -5,6 +5,4 @@ #define MEMBLOCK_DBG(fmt...) prom_printf(fmt) -#define MEMBLOCK_REAL_LIMIT 0 - #endif /* !(_SPARC64_MEMBLOCK_H) */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 3cf3304e901d..c4f6e53264ed 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -34,6 +34,7 @@ struct memblock_type { struct memblock { unsigned long debug; u64 rmo_size; + u64 current_limit; struct memblock_type memory; struct memblock_type reserved; }; @@ -46,11 +47,16 @@ extern long memblock_add(u64 base, u64 size); extern long memblock_remove(u64 base, u64 size); extern long __init memblock_free(u64 base, u64 size); extern long __init memblock_reserve(u64 base, u64 size); + extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid); extern u64 __init memblock_alloc(u64 size, u64 align); + +/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ +#define MEMBLOCK_ALLOC_ANYWHERE (~(u64)0) +#define MEMBLOCK_ALLOC_ACCESSIBLE 0 + extern u64 __init memblock_alloc_base(u64 size, u64, u64 max_addr); -#define MEMBLOCK_ALLOC_ANYWHERE 0 extern u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr); extern u64 __init memblock_phys_mem_size(void); @@ -66,6 +72,14 @@ extern void memblock_dump_all(void); /* Provided by the architecture */ extern u64 memblock_nid_range(u64 start, u64 end, int *nid); +/** + * memblock_set_current_limit - Set the current allocation limit to allow + * limiting allocations to what is currently + * accessible during boot + * @limit: New limit value (physical address) + */ +extern void memblock_set_current_limit(u64 limit); + /* * pfn conversion functions diff --git a/mm/memblock.c b/mm/memblock.c index 0131684c42f8..770c5bfac2cd 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -115,6 +115,8 @@ void __init memblock_init(void) memblock.reserved.regions[0].base = 0; memblock.reserved.regions[0].size = 0; memblock.reserved.cnt = 1; + + memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE; } void __init memblock_analyze(void) @@ -373,7 +375,7 @@ u64 __init memblock_alloc_nid(u64 size, u64 align, int nid) u64 __init memblock_alloc(u64 size, u64 align) { - return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); + return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); } u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr) @@ -399,14 +401,9 @@ u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr) size = memblock_align_up(size, align); - /* On some platforms, make sure we allocate lowmem */ - /* Note that MEMBLOCK_REAL_LIMIT may be MEMBLOCK_ALLOC_ANYWHERE */ - if (max_addr == MEMBLOCK_ALLOC_ANYWHERE) - max_addr = MEMBLOCK_REAL_LIMIT; - /* Pump up max_addr */ - if (max_addr == MEMBLOCK_ALLOC_ANYWHERE) - max_addr = ~(u64)0; + if (max_addr == MEMBLOCK_ALLOC_ACCESSIBLE) + max_addr = memblock.current_limit; /* We do a top-down search, this tends to limit memory * fragmentation by keeping early boot allocs near the @@ -527,3 +524,9 @@ int memblock_is_region_reserved(u64 base, u64 size) return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } + +void __init memblock_set_current_limit(u64 limit) +{ + memblock.current_limit = limit; +} + -- cgit v1.2.3 From cd3db0c4ca3d237e7ad20f7107216e575705d2b0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:02 -0700 Subject: memblock: Remove rmo_size, burry it in arch/powerpc where it belongs The RMA (RMO is a misnomer) is a concept specific to ppc64 (in fact server ppc64 though I hijack it on embedded ppc64 for similar purposes) and represents the area of memory that can be accessed in real mode (aka with MMU off), or on embedded, from the exception vectors (which is bolted in the TLB) which pretty much boils down to the same thing. We take that out of the generic MEMBLOCK data structure and move it into arch/powerpc where it belongs, renaming it to "RMA" while at it. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mmu.h | 12 ++++++++++++ arch/powerpc/kernel/head_40x.S | 6 +----- arch/powerpc/kernel/paca.c | 2 +- arch/powerpc/kernel/prom.c | 29 ++++++++--------------------- arch/powerpc/kernel/rtas.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/mm/40x_mmu.c | 14 +++++++++++++- arch/powerpc/mm/44x_mmu.c | 14 ++++++++++++++ arch/powerpc/mm/fsl_booke_mmu.c | 9 +++++++++ arch/powerpc/mm/hash_utils_64.c | 22 +++++++++++++++++++++- arch/powerpc/mm/init_32.c | 14 ++++++++++++++ arch/powerpc/mm/init_64.c | 1 + arch/powerpc/mm/ppc_mmu_32.c | 15 +++++++++++++++ arch/powerpc/mm/tlb_nohash.c | 14 ++++++++++++++ include/linux/memblock.h | 1 - mm/memblock.c | 8 -------- 16 files changed, 125 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 7ebf42ed84a2..bb40a06d3b77 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -2,6 +2,8 @@ #define _ASM_POWERPC_MMU_H_ #ifdef __KERNEL__ +#include + #include #include @@ -82,6 +84,16 @@ extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup; extern void early_init_mmu(void); extern void early_init_mmu_secondary(void); +extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size); + +#ifdef CONFIG_PPC64 +/* This is our real memory area size on ppc64 server, on embedded, we + * make it match the size our of bolted TLB area + */ +extern u64 ppc64_rma_size; +#endif /* CONFIG_PPC64 */ + #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a90625f9b485..8278e8bad5a0 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -923,11 +923,7 @@ initial_mmu: mtspr SPRN_PID,r0 sync - /* Configure and load two entries into TLB slots 62 and 63. - * In case we are pinning TLBs, these are reserved in by the - * other TLB functions. If not reserving, then it doesn't - * matter where they are loaded. - */ + /* Configure and load one entry into TLB slots 63 */ clrrwi r4,r4,10 /* Mask off the real page number */ ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */ diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 139a773853f4..b9ffd7deeed7 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -117,7 +117,7 @@ void __init allocate_pacas(void) * the first segment. On iSeries they must be within the area mapped * by the HV, which is HvPagesToMap * HVPAGESIZE bytes. */ - limit = min(0x10000000ULL, memblock.rmo_size); + limit = min(0x10000000ULL, ppc64_rma_size); if (firmware_has_feature(FW_FEATURE_ISERIES)) limit = min(limit, HvPagesToMap * HVPAGESIZE); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 3aec0b980f6a..c3c6a8857544 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -66,6 +66,7 @@ int __initdata iommu_is_off; int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; +u64 ppc64_rma_size; #endif static int __init early_parse_mem(char *p) @@ -492,7 +493,7 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node, void __init early_init_dt_add_memory_arch(u64 base, u64 size) { -#if defined(CONFIG_PPC64) +#ifdef CONFIG_PPC64 if (iommu_is_off) { if (base >= 0x80000000ul) return; @@ -501,9 +502,13 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) } #endif - memblock_add(base, size); - + /* First MEMBLOCK added, do some special initializations */ + if (memstart_addr == ~(phys_addr_t)0) + setup_initial_memory_limit(base, size); memstart_addr = min((u64)memstart_addr, base); + + /* Add the chunk to the MEMBLOCK list */ + memblock_add(base, size); } u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) @@ -655,22 +660,6 @@ static void __init phyp_dump_reserve_mem(void) static inline void __init phyp_dump_reserve_mem(void) {} #endif /* CONFIG_PHYP_DUMP && CONFIG_PPC_RTAS */ -static void set_boot_memory_limit(void) -{ -#ifdef CONFIG_PPC32 - /* 601 can only access 16MB at the moment */ - if (PVR_VER(mfspr(SPRN_PVR)) == 1) - memblock_set_current_limit(0x01000000); - /* 8xx can only access 8MB at the moment */ - else if (PVR_VER(mfspr(SPRN_PVR)) == 0x50) - memblock_set_current_limit(0x00800000); - else - memblock_set_current_limit(0x10000000); -#else - memblock_set_current_limit(memblock.rmo_size); -#endif -} - void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -734,8 +723,6 @@ void __init early_init_devtree(void *params) DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); - set_boot_memory_limit(); - /* We may need to relocate the flat tree, do it now. * FIXME .. and the initrd too? */ move_device_tree(); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d0516dbee762..1662777be5dd 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -934,7 +934,7 @@ void __init rtas_initialize(void) */ #ifdef CONFIG_PPC64 if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { - rtas_region = min(memblock.rmo_size, RTAS_INSTANTIATE_MAX); + rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d135f93cb0f6..4360944b60f0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -487,7 +487,7 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(slb0_limit(), memblock.rmo_size); + limit = min(slb0_limit(), ppc64_rma_size); for_each_possible_cpu(i) { unsigned long sp; diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 58969b51f454..5810967511d4 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c @@ -141,7 +141,19 @@ unsigned long __init mmu_mapin_ram(unsigned long top) * coverage with normal-sized pages (or other reasons) do not * attempt to allocate outside the allowed range. */ - memblock_set_current_limit(memstart_addr + mapped); + memblock_set_current_limit(mapped); return mapped; } + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 40x can only access 16MB at the moment (see head_40x.S) */ + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000)); +} diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index d8c6efb32bc6..024acab588fd 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -24,6 +24,8 @@ */ #include +#include + #include #include #include @@ -213,6 +215,18 @@ unsigned long __init mmu_mapin_ram(unsigned long top) return total_lowmem; } +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 44x has a 256M TLB entry pinned at boot */ + memblock_set_current_limit(min_t(u64, first_memblock_size, PPC_PIN_SIZE)); +} + #ifdef CONFIG_SMP void __cpuinit mmu_init_secondary(int cpu) { diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index e525f862d759..0be8fe24c54e 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -215,3 +215,12 @@ void __init adjust_total_lowmem(void) memblock_set_current_limit(memstart_addr + __max_low_memory); } + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + phys_addr_t limit = first_memblock_base + first_memblock_size; + + /* 64M mapped initially according to head_fsl_booke.S */ + memblock_set_current_limit(min_t(u64, limit, 0x04000000)); +} diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b05890e23813..83f534d862db 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -649,7 +649,7 @@ static void __init htab_initialize(void) #ifdef CONFIG_DEBUG_PAGEALLOC linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count, - 1, memblock.rmo_size)); + 1, ppc64_rma_size)); memset(linear_map_hash_slots, 0, linear_map_hash_count); #endif /* CONFIG_DEBUG_PAGEALLOC */ @@ -1248,3 +1248,23 @@ void kernel_map_pages(struct page *page, int numpages, int enable) local_irq_restore(flags); } #endif /* CONFIG_DEBUG_PAGEALLOC */ + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* On LPAR systems, the first entry is our RMA region, + * non-LPAR 64-bit hash MMU systems don't have a limitation + * on real mode access, but using the first entry works well + * enough. We also clamp it to 1G to avoid some funky things + * such as RTAS bugs etc... + */ + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + + /* Finally limit subsequent allocations */ + memblock_set_current_limit(ppc64_rma_size); +} diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 59b208b7ec6f..742da43b4ab6 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -237,3 +237,17 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif + +#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 8xx can only access 8MB at the moment */ + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000)); +} +#endif /* CONFIG_8xx */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 71f1415e2472..9e081ffbf0f2 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -328,3 +328,4 @@ int __meminit vmemmap_populate(struct page *start_page, return 0; } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ + diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 7d34e170e80f..11571e118831 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -271,3 +271,18 @@ void __init MMU_init_hw(void) if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205); } + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* We don't currently support the first MEMBLOCK not mapping 0 + * physical on those processors + */ + BUG_ON(first_memblock_base != 0); + + /* 601 can only access 16MB at the moment */ + if (PVR_VER(mfspr(SPRN_PVR)) == 1) + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000)); + else /* Anything else has 256M mapped */ + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000)); +} diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 7ba32e762990..a086ed562606 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -446,4 +446,18 @@ void __cpuinit early_init_mmu_secondary(void) __early_init_mmu(0); } +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* On Embedded 64-bit, we adjust the RMA size to match + * the bolted TLB entry. We know for now that only 1G + * entries are supported though that may eventually + * change. We crop it to the size of the first MEMBLOCK to + * avoid going over total available memory just in case... + */ + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + + /* Finally limit subsequent allocations */ + memblock_set_current_limit(ppc64_memblock_base + ppc64_rma_size); +} #endif /* CONFIG_PPC64 */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c4f6e53264ed..71b8edc6ede8 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -33,7 +33,6 @@ struct memblock_type { struct memblock { unsigned long debug; - u64 rmo_size; u64 current_limit; struct memblock_type memory; struct memblock_type reserved; diff --git a/mm/memblock.c b/mm/memblock.c index 770c5bfac2cd..73d903ebf3d4 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -49,7 +49,6 @@ void memblock_dump_all(void) return; pr_info("MEMBLOCK configuration:\n"); - pr_info(" rmo_size = 0x%llx\n", (unsigned long long)memblock.rmo_size); pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size); memblock_dump(&memblock.memory, "memory"); @@ -195,10 +194,6 @@ static long memblock_add_region(struct memblock_type *type, u64 base, u64 size) long memblock_add(u64 base, u64 size) { - /* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */ - if (base == 0) - memblock.rmo_size = size; - return memblock_add_region(&memblock.memory, base, size); } @@ -459,9 +454,6 @@ void __init memblock_enforce_memory_limit(u64 memory_limit) break; } - if (memblock.memory.regions[0].size < memblock.rmo_size) - memblock.rmo_size = memblock.memory.regions[0].size; - memory_limit = memblock_end_of_DRAM(); /* And truncate any reserves above the limit also. */ -- cgit v1.2.3 From 2898cc4cdf208f15246b7a1c6951d2b126a70fd6 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 4 Aug 2010 13:34:42 +1000 Subject: memblock: Change u64 to phys_addr_t Let's not waste space and cycles on archs that don't support >32-bit physical address space. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 48 +++++++++---------- mm/memblock.c | 118 ++++++++++++++++++++++++----------------------- 2 files changed, 84 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 71b8edc6ede8..b65045a4ed08 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -21,19 +21,19 @@ #define MAX_MEMBLOCK_REGIONS 128 struct memblock_region { - u64 base; - u64 size; + phys_addr_t base; + phys_addr_t size; }; struct memblock_type { unsigned long cnt; - u64 size; + phys_addr_t size; struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1]; }; struct memblock { unsigned long debug; - u64 current_limit; + phys_addr_t current_limit; struct memblock_type memory; struct memblock_type reserved; }; @@ -42,34 +42,34 @@ extern struct memblock memblock; extern void __init memblock_init(void); extern void __init memblock_analyze(void); -extern long memblock_add(u64 base, u64 size); -extern long memblock_remove(u64 base, u64 size); -extern long __init memblock_free(u64 base, u64 size); -extern long __init memblock_reserve(u64 base, u64 size); +extern long memblock_add(phys_addr_t base, phys_addr_t size); +extern long memblock_remove(phys_addr_t base, phys_addr_t size); +extern long __init memblock_free(phys_addr_t base, phys_addr_t size); +extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size); -extern u64 __init memblock_alloc_nid(u64 size, u64 align, int nid); -extern u64 __init memblock_alloc(u64 size, u64 align); +extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); +extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ -#define MEMBLOCK_ALLOC_ANYWHERE (~(u64)0) +#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 -extern u64 __init memblock_alloc_base(u64 size, - u64, u64 max_addr); -extern u64 __init __memblock_alloc_base(u64 size, - u64 align, u64 max_addr); -extern u64 __init memblock_phys_mem_size(void); -extern u64 memblock_end_of_DRAM(void); -extern void __init memblock_enforce_memory_limit(u64 memory_limit); -extern int memblock_is_memory(u64 addr); -extern int memblock_is_region_memory(u64 base, u64 size); -extern int __init memblock_is_reserved(u64 addr); -extern int memblock_is_region_reserved(u64 base, u64 size); +extern phys_addr_t __init memblock_alloc_base(phys_addr_t size, + phys_addr_t, phys_addr_t max_addr); +extern phys_addr_t __init __memblock_alloc_base(phys_addr_t size, + phys_addr_t align, phys_addr_t max_addr); +extern phys_addr_t __init memblock_phys_mem_size(void); +extern phys_addr_t memblock_end_of_DRAM(void); +extern void __init memblock_enforce_memory_limit(phys_addr_t memory_limit); +extern int memblock_is_memory(phys_addr_t addr); +extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); +extern int __init memblock_is_reserved(phys_addr_t addr); +extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); extern void memblock_dump_all(void); /* Provided by the architecture */ -extern u64 memblock_nid_range(u64 start, u64 end, int *nid); +extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid); /** * memblock_set_current_limit - Set the current allocation limit to allow @@ -77,7 +77,7 @@ extern u64 memblock_nid_range(u64 start, u64 end, int *nid); * accessible during boot * @limit: New limit value (physical address) */ -extern void memblock_set_current_limit(u64 limit); +extern void memblock_set_current_limit(phys_addr_t limit); /* diff --git a/mm/memblock.c b/mm/memblock.c index 73d903ebf3d4..81da63592a68 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -55,13 +55,14 @@ void memblock_dump_all(void) memblock_dump(&memblock.reserved, "reserved"); } -static unsigned long memblock_addrs_overlap(u64 base1, u64 size1, u64 base2, - u64 size2) +static unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) +static long memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2) { if (base2 == base1 + size1) return 1; @@ -72,12 +73,12 @@ static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) } static long memblock_regions_adjacent(struct memblock_type *type, - unsigned long r1, unsigned long r2) + unsigned long r1, unsigned long r2) { - u64 base1 = type->regions[r1].base; - u64 size1 = type->regions[r1].size; - u64 base2 = type->regions[r2].base; - u64 size2 = type->regions[r2].size; + phys_addr_t base1 = type->regions[r1].base; + phys_addr_t size1 = type->regions[r1].size; + phys_addr_t base2 = type->regions[r2].base; + phys_addr_t size2 = type->regions[r2].size; return memblock_addrs_adjacent(base1, size1, base2, size2); } @@ -128,7 +129,7 @@ void __init memblock_analyze(void) memblock.memory.size += memblock.memory.regions[i].size; } -static long memblock_add_region(struct memblock_type *type, u64 base, u64 size) +static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { unsigned long coalesced = 0; long adjacent, i; @@ -141,8 +142,8 @@ static long memblock_add_region(struct memblock_type *type, u64 base, u64 size) /* First try and coalesce this MEMBLOCK with another. */ for (i = 0; i < type->cnt; i++) { - u64 rgnbase = type->regions[i].base; - u64 rgnsize = type->regions[i].size; + phys_addr_t rgnbase = type->regions[i].base; + phys_addr_t rgnsize = type->regions[i].size; if ((rgnbase == base) && (rgnsize == size)) /* Already have this region, so we're done */ @@ -192,16 +193,16 @@ static long memblock_add_region(struct memblock_type *type, u64 base, u64 size) return 0; } -long memblock_add(u64 base, u64 size) +long memblock_add(phys_addr_t base, phys_addr_t size) { return memblock_add_region(&memblock.memory, base, size); } -static long __memblock_remove(struct memblock_type *type, u64 base, u64 size) +static long __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { - u64 rgnbegin, rgnend; - u64 end = base + size; + phys_addr_t rgnbegin, rgnend; + phys_addr_t end = base + size; int i; rgnbegin = rgnend = 0; /* supress gcc warnings */ @@ -246,17 +247,17 @@ static long __memblock_remove(struct memblock_type *type, u64 base, u64 size) return memblock_add_region(type, end, rgnend - end); } -long memblock_remove(u64 base, u64 size) +long memblock_remove(phys_addr_t base, phys_addr_t size) { return __memblock_remove(&memblock.memory, base, size); } -long __init memblock_free(u64 base, u64 size) +long __init memblock_free(phys_addr_t base, phys_addr_t size) { return __memblock_remove(&memblock.reserved, base, size); } -long __init memblock_reserve(u64 base, u64 size) +long __init memblock_reserve(phys_addr_t base, phys_addr_t size) { struct memblock_type *_rgn = &memblock.reserved; @@ -265,13 +266,13 @@ long __init memblock_reserve(u64 base, u64 size) return memblock_add_region(_rgn, base, size); } -long memblock_overlaps_region(struct memblock_type *type, u64 base, u64 size) +long memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { unsigned long i; for (i = 0; i < type->cnt; i++) { - u64 rgnbase = type->regions[i].base; - u64 rgnsize = type->regions[i].size; + phys_addr_t rgnbase = type->regions[i].base; + phys_addr_t rgnsize = type->regions[i].size; if (memblock_addrs_overlap(base, size, rgnbase, rgnsize)) break; } @@ -279,20 +280,20 @@ long memblock_overlaps_region(struct memblock_type *type, u64 base, u64 size) return (i < type->cnt) ? i : -1; } -static u64 memblock_align_down(u64 addr, u64 size) +static phys_addr_t memblock_align_down(phys_addr_t addr, phys_addr_t size) { return addr & ~(size - 1); } -static u64 memblock_align_up(u64 addr, u64 size) +static phys_addr_t memblock_align_up(phys_addr_t addr, phys_addr_t size) { return (addr + (size - 1)) & ~(size - 1); } -static u64 __init memblock_alloc_region(u64 start, u64 end, - u64 size, u64 align) +static phys_addr_t __init memblock_alloc_region(phys_addr_t start, phys_addr_t end, + phys_addr_t size, phys_addr_t align) { - u64 base, res_base; + phys_addr_t base, res_base; long j; base = memblock_align_down((end - size), align); @@ -301,7 +302,7 @@ static u64 __init memblock_alloc_region(u64 start, u64 end, if (j < 0) { /* this area isn't reserved, take it */ if (memblock_add_region(&memblock.reserved, base, size) < 0) - base = ~(u64)0; + base = ~(phys_addr_t)0; return base; } res_base = memblock.reserved.regions[j].base; @@ -310,42 +311,43 @@ static u64 __init memblock_alloc_region(u64 start, u64 end, base = memblock_align_down(res_base - size, align); } - return ~(u64)0; + return ~(phys_addr_t)0; } -u64 __weak __init memblock_nid_range(u64 start, u64 end, int *nid) +phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) { *nid = 0; return end; } -static u64 __init memblock_alloc_nid_region(struct memblock_region *mp, - u64 size, u64 align, int nid) +static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, + phys_addr_t size, + phys_addr_t align, int nid) { - u64 start, end; + phys_addr_t start, end; start = mp->base; end = start + mp->size; start = memblock_align_up(start, align); while (start < end) { - u64 this_end; + phys_addr_t this_end; int this_nid; this_end = memblock_nid_range(start, end, &this_nid); if (this_nid == nid) { - u64 ret = memblock_alloc_region(start, this_end, size, align); - if (ret != ~(u64)0) + phys_addr_t ret = memblock_alloc_region(start, this_end, size, align); + if (ret != ~(phys_addr_t)0) return ret; } start = this_end; } - return ~(u64)0; + return ~(phys_addr_t)0; } -u64 __init memblock_alloc_nid(u64 size, u64 align, int nid) +phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) { struct memblock_type *mem = &memblock.memory; int i; @@ -359,23 +361,23 @@ u64 __init memblock_alloc_nid(u64 size, u64 align, int nid) size = memblock_align_up(size, align); for (i = 0; i < mem->cnt; i++) { - u64 ret = memblock_alloc_nid_region(&mem->regions[i], + phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i], size, align, nid); - if (ret != ~(u64)0) + if (ret != ~(phys_addr_t)0) return ret; } return memblock_alloc(size, align); } -u64 __init memblock_alloc(u64 size, u64 align) +phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) { return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); } -u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr) +phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) { - u64 alloc; + phys_addr_t alloc; alloc = __memblock_alloc_base(size, align, max_addr); @@ -386,11 +388,11 @@ u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr) return alloc; } -u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr) +phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) { long i; - u64 base = 0; - u64 res_base; + phys_addr_t base = 0; + phys_addr_t res_base; BUG_ON(0 == size); @@ -405,26 +407,26 @@ u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr) * top of memory */ for (i = memblock.memory.cnt - 1; i >= 0; i--) { - u64 memblockbase = memblock.memory.regions[i].base; - u64 memblocksize = memblock.memory.regions[i].size; + phys_addr_t memblockbase = memblock.memory.regions[i].base; + phys_addr_t memblocksize = memblock.memory.regions[i].size; if (memblocksize < size) continue; base = min(memblockbase + memblocksize, max_addr); res_base = memblock_alloc_region(memblockbase, base, size, align); - if (res_base != ~(u64)0) + if (res_base != ~(phys_addr_t)0) return res_base; } return 0; } /* You must call memblock_analyze() before this. */ -u64 __init memblock_phys_mem_size(void) +phys_addr_t __init memblock_phys_mem_size(void) { return memblock.memory.size; } -u64 memblock_end_of_DRAM(void) +phys_addr_t memblock_end_of_DRAM(void) { int idx = memblock.memory.cnt - 1; @@ -432,10 +434,10 @@ u64 memblock_end_of_DRAM(void) } /* You must call memblock_analyze() after this. */ -void __init memblock_enforce_memory_limit(u64 memory_limit) +void __init memblock_enforce_memory_limit(phys_addr_t memory_limit) { unsigned long i; - u64 limit; + phys_addr_t limit; struct memblock_region *p; if (!memory_limit) @@ -472,7 +474,7 @@ void __init memblock_enforce_memory_limit(u64 memory_limit) } } -static int memblock_search(struct memblock_type *type, u64 addr) +static int memblock_search(struct memblock_type *type, phys_addr_t addr) { unsigned int left = 0, right = type->cnt; @@ -490,17 +492,17 @@ static int memblock_search(struct memblock_type *type, u64 addr) return -1; } -int __init memblock_is_reserved(u64 addr) +int __init memblock_is_reserved(phys_addr_t addr) { return memblock_search(&memblock.reserved, addr) != -1; } -int memblock_is_memory(u64 addr) +int memblock_is_memory(phys_addr_t addr) { return memblock_search(&memblock.memory, addr) != -1; } -int memblock_is_region_memory(u64 base, u64 size) +int memblock_is_region_memory(phys_addr_t base, phys_addr_t size) { int idx = memblock_search(&memblock.reserved, base); @@ -511,13 +513,13 @@ int memblock_is_region_memory(u64 base, u64 size) memblock.reserved.regions[idx].size) >= (base + size); } -int memblock_is_region_reserved(u64 base, u64 size) +int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) { return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } -void __init memblock_set_current_limit(u64 limit) +void __init memblock_set_current_limit(phys_addr_t limit) { memblock.current_limit = limit; } -- cgit v1.2.3 From 9d3c30f5a17ec35894eadb7171f724643dce19c3 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:04 -0700 Subject: memblock: Remove unused memblock.debug struct member Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b65045a4ed08..0fe6dd56a4b4 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -32,7 +32,6 @@ struct memblock_type { }; struct memblock { - unsigned long debug; phys_addr_t current_limit; struct memblock_type memory; struct memblock_type reserved; @@ -55,9 +54,11 @@ extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); #define MEMBLOCK_ALLOC_ACCESSIBLE 0 extern phys_addr_t __init memblock_alloc_base(phys_addr_t size, - phys_addr_t, phys_addr_t max_addr); + phys_addr_t align, + phys_addr_t max_addr); extern phys_addr_t __init __memblock_alloc_base(phys_addr_t size, - phys_addr_t align, phys_addr_t max_addr); + phys_addr_t align, + phys_addr_t max_addr); extern phys_addr_t __init memblock_phys_mem_size(void); extern phys_addr_t memblock_end_of_DRAM(void); extern void __init memblock_enforce_memory_limit(phys_addr_t memory_limit); -- cgit v1.2.3 From 4734b594c6ca1be796d30c82d93fdf5160f45124 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 28 Jul 2010 14:31:29 +1000 Subject: memblock: Remove memblock_type.size and add memblock.memory_size instead Right now, both the "memory" and "reserved" memblock_type structures have a "size" member. It represents the calculated memory size in the former case and is unused in the latter. This moves it out to the main memblock structure instead Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/mem.c | 2 +- include/linux/memblock.h | 2 +- mm/memblock.c | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 52df5428ece4..f661f6c527da 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -301,7 +301,7 @@ void __init mem_init(void) swiotlb_init(1); #endif - num_physpages = memblock.memory.size >> PAGE_SHIFT; + num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); #ifdef CONFIG_NEED_MULTIPLE_NODES diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 0fe6dd56a4b4..c9c7b0f344a5 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -27,12 +27,12 @@ struct memblock_region { struct memblock_type { unsigned long cnt; - phys_addr_t size; struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1]; }; struct memblock { phys_addr_t current_limit; + phys_addr_t memory_size; /* Updated by memblock_analyze() */ struct memblock_type memory; struct memblock_type reserved; }; diff --git a/mm/memblock.c b/mm/memblock.c index 81da63592a68..5ae413e9afd8 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -49,7 +49,7 @@ void memblock_dump_all(void) return; pr_info("MEMBLOCK configuration:\n"); - pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size); + pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size); memblock_dump(&memblock.memory, "memory"); memblock_dump(&memblock.reserved, "reserved"); @@ -123,10 +123,10 @@ void __init memblock_analyze(void) { int i; - memblock.memory.size = 0; + memblock.memory_size = 0; for (i = 0; i < memblock.memory.cnt; i++) - memblock.memory.size += memblock.memory.regions[i].size; + memblock.memory_size += memblock.memory.regions[i].size; } static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) @@ -423,7 +423,7 @@ phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, ph /* You must call memblock_analyze() before this. */ phys_addr_t __init memblock_phys_mem_size(void) { - return memblock.memory.size; + return memblock.memory_size; } phys_addr_t memblock_end_of_DRAM(void) -- cgit v1.2.3 From bf23c51f1f49d3960f3cd8e3d2e7f943d9c41042 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:06 -0700 Subject: memblock: Move memblock arrays to static storage in memblock.c and make their size a variable This is in preparation for having resizable arrays. Note that we still allocate one more than needed, this is unchanged from the previous implementation. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 7 ++++--- mm/memblock.c | 10 +++++++++- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c9c7b0f344a5..150be938b910 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -18,7 +18,7 @@ #include -#define MAX_MEMBLOCK_REGIONS 128 +#define INIT_MEMBLOCK_REGIONS 128 struct memblock_region { phys_addr_t base; @@ -26,8 +26,9 @@ struct memblock_region { }; struct memblock_type { - unsigned long cnt; - struct memblock_region regions[MAX_MEMBLOCK_REGIONS+1]; + unsigned long cnt; /* number of regions */ + unsigned long max; /* size of the allocated array */ + struct memblock_region *regions; }; struct memblock { diff --git a/mm/memblock.c b/mm/memblock.c index 5ae413e9afd8..3c474502d92b 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -18,6 +18,8 @@ struct memblock memblock; static int memblock_debug; +static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1]; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1]; static int __init early_memblock(char *p) { @@ -104,6 +106,12 @@ static void memblock_coalesce_regions(struct memblock_type *type, void __init memblock_init(void) { + /* Hookup the initial arrays */ + memblock.memory.regions = memblock_memory_init_regions; + memblock.memory.max = INIT_MEMBLOCK_REGIONS; + memblock.reserved.regions = memblock_reserved_init_regions; + memblock.reserved.max = INIT_MEMBLOCK_REGIONS; + /* Create a dummy zero size MEMBLOCK which will get coalesced away later. * This simplifies the memblock_add() code below... */ @@ -169,7 +177,7 @@ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, ph if (coalesced) return coalesced; - if (type->cnt >= MAX_MEMBLOCK_REGIONS) + if (type->cnt >= type->max) return -1; /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */ -- cgit v1.2.3 From d2cd563ba82c424083b78e0ce97d68bfb04d1242 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:14 -0700 Subject: memblock: Add arch function to control coalescing of memblock memory regions Some archs such as ARM want to avoid coalescing accross things such as the lowmem/highmem boundary or similar. This provides the option to control it via an arch callback for which a weak default is provided which always allows coalescing. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 ++ mm/memblock.c | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 150be938b910..e5e8f9db3a84 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -72,6 +72,8 @@ extern void memblock_dump_all(void); /* Provided by the architecture */ extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid); +extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, + phys_addr_t addr2, phys_addr_t size2); /** * memblock_set_current_limit - Set the current allocation limit to allow diff --git a/mm/memblock.c b/mm/memblock.c index 0787790b1ce0..8715f09434df 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -241,6 +241,12 @@ static int memblock_double_array(struct memblock_type *type) return 0; } +extern int __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, + phys_addr_t addr2, phys_addr_t size2) +{ + return 1; +} + static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { unsigned long coalesced = 0; @@ -262,6 +268,10 @@ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, ph return 0; adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize); + /* Check if arch allows coalescing */ + if (adjacent != 0 && type == &memblock.memory && + !memblock_memory_can_coalesce(base, size, rgnbase, rgnsize)) + break; if (adjacent > 0) { type->regions[i].base -= size; type->regions[i].size += size; @@ -274,7 +284,14 @@ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, ph } } - if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1)) { + /* If we plugged a hole, we may want to also coalesce with the + * next region + */ + if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1) && + ((type != &memblock.memory || memblock_memory_can_coalesce(type->regions[i].base, + type->regions[i].size, + type->regions[i+1].base, + type->regions[i+1].size)))) { memblock_coalesce_regions(type, i, i+1); coalesced++; } -- cgit v1.2.3 From c196f76fd5ece716ee3b7fa5dda3576961c0cecc Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:16 -0700 Subject: memblock: NUMA allocate can now use early_pfn_map We now provide a default (weak) implementation of memblock_nid_range() which uses the early_pfn_map[] if CONFIG_ARCH_POPULATES_NODE_MAP is set. Sparc still needs to use its own method due to the way the pages can be scattered between nodes. This implementation is inefficient due to our main algorithm and callback construct wanting to work on an ascending addresses bases while early_pfn_map[] would rather work with nid's (it's unsorted at that stage). But it should work and we can look into improving it subsequently, possibly using arch compile options to chose a different algorithm alltogether. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 3 +++ mm/memblock.c | 28 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e5e8f9db3a84..82b030244aa7 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -47,6 +47,9 @@ extern long memblock_remove(phys_addr_t base, phys_addr_t size); extern long __init memblock_free(phys_addr_t base, phys_addr_t size); extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size); +/* The numa aware allocator is only available if + * CONFIG_ARCH_POPULATES_NODE_MAP is set + */ extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); diff --git a/mm/memblock.c b/mm/memblock.c index 468ff43a72b4..af7e4d9cf400 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -15,6 +15,7 @@ #include #include #include +#include #include struct memblock memblock; @@ -451,11 +452,36 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) /* * Additional node-local allocators. Search for node memory is bottom up * and walks memblock regions within that node bottom-up as well, but allocation - * within an memblock region is top-down. + * within an memblock region is top-down. XXX I plan to fix that at some stage + * + * WARNING: Only available after early_node_map[] has been populated, + * on some architectures, that is after all the calls to add_active_range() + * have been done to populate it. */ phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) { +#ifdef CONFIG_ARCH_POPULATES_NODE_MAP + /* + * This code originates from sparc which really wants use to walk by addresses + * and returns the nid. This is not very convenient for early_pfn_map[] users + * as the map isn't sorted yet, and it really wants to be walked by nid. + * + * For now, I implement the inefficient method below which walks the early + * map multiple times. Eventually we may want to use an ARCH config option + * to implement a completely different method for both case. + */ + unsigned long start_pfn, end_pfn; + int i; + + for (i = 0; i < MAX_NUMNODES; i++) { + get_pfn_range_for_nid(i, &start_pfn, &end_pfn); + if (start < PFN_PHYS(start_pfn) || start >= PFN_PHYS(end_pfn)) + continue; + *nid = i; + return min(end, PFN_PHYS(end_pfn)); + } +#endif *nid = 0; return end; -- cgit v1.2.3 From 9d1e24928e6a0728d1c7c76818ccbd11b93e7ac9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 6 Jul 2010 15:39:17 -0700 Subject: memblock: Separate memblock_alloc_nid() and memblock_alloc_try_nid() The former is now strict, it will fail if it cannot honor the allocation within the node, while the later implements the previous semantic which falls back to allocating anywhere. Signed-off-by: Benjamin Herrenschmidt --- arch/sparc/mm/init_64.c | 4 ++-- include/linux/memblock.h | 6 +++++- mm/memblock.c | 14 ++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 0883113624b9..dc584d26d597 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -820,7 +820,7 @@ static void __init allocate_node_data(int nid) struct pglist_data *p; #ifdef CONFIG_NEED_MULTIPLE_NODES - paddr = memblock_alloc_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid); + paddr = memblock_alloc_try_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid); if (!paddr) { prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); prom_halt(); @@ -840,7 +840,7 @@ static void __init allocate_node_data(int nid) if (p->node_spanned_pages) { num_pages = bootmem_bootmap_pages(p->node_spanned_pages); - paddr = memblock_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid); + paddr = memblock_alloc_try_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid); if (!paddr) { prom_printf("Cannot allocate bootmap for nid[%d]\n", nid); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 82b030244aa7..c8da03eb7ba3 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -50,7 +50,11 @@ extern long __init memblock_reserve(phys_addr_t base, phys_addr_t size); /* The numa aware allocator is only available if * CONFIG_ARCH_POPULATES_NODE_MAP is set */ -extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid); +extern phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, + int nid); +extern phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, + int nid); + extern phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align); /* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ diff --git a/mm/memblock.c b/mm/memblock.c index af7e4d9cf400..1802d97c7284 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -537,9 +537,23 @@ phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int n return ret; } + return 0; +} + +phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) +{ + phys_addr_t res = memblock_alloc_nid(size, align, nid); + + if (res) + return res; return memblock_alloc(size, align); } + +/* + * Remaining API functions + */ + /* You must call memblock_analyze() before this. */ phys_addr_t __init memblock_phys_mem_size(void) { -- cgit v1.2.3 From 5e63cf43af844ed30acc278b38b8c9bc51eba493 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:07:21 +1000 Subject: memblock: Expose some memblock bits for use by x86 This exposes memblock_debug and associated memblock_dbg() macro, along with memblock_can_resize so that x86 can use these when ported to use memblock Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 5 +++++ mm/memblock.c | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c8da03eb7ba3..eed0f9b8e526 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -39,6 +39,11 @@ struct memblock { }; extern struct memblock memblock; +extern int memblock_debug; +extern int memblock_can_resize; + +#define memblock_dbg(fmt, ...) \ + if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) extern void __init memblock_init(void); extern void __init memblock_analyze(void); diff --git a/mm/memblock.c b/mm/memblock.c index cc15be29fd0a..5499ab162b9d 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -22,7 +22,8 @@ struct memblock memblock; -static int memblock_debug, memblock_can_resize; +int memblock_debug; +int memblock_can_resize; static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1]; static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1]; -- cgit v1.2.3 From 37d8d4bf489e39eedc9537f8616fe87879b13cb0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:20:58 +1000 Subject: memblock: Export MEMBLOCK_ERROR will used by x86 memblock_x86_find_in_range_node and nobootmem replacement Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 3 ++- mm/memblock.c | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index eed0f9b8e526..1a9c29cc92fa 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -18,7 +18,8 @@ #include -#define INIT_MEMBLOCK_REGIONS 128 +#define INIT_MEMBLOCK_REGIONS 128 +#define MEMBLOCK_ERROR (~(phys_addr_t)0) struct memblock_region { phys_addr_t base; diff --git a/mm/memblock.c b/mm/memblock.c index c3703abf057e..85cfa1d3ab28 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -27,8 +27,6 @@ int memblock_can_resize; static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1]; static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1]; -#define MEMBLOCK_ERROR (~(phys_addr_t)0) - /* inline so we don't get a warning when pr_debug is compiled out */ static inline const char *memblock_type_name(struct memblock_type *type) { -- cgit v1.2.3 From 25818f0f288cd5333ba5a90ad6dde3def4c4ff58 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 28 Jul 2010 15:25:10 +1000 Subject: memblock: Make MEMBLOCK_ERROR be 0 And ensure we don't hand out 0 as a valid allocation. We put the low limit at PAGE_SIZE arbitrarily. Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 +- mm/memblock.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 1a9c29cc92fa..dfa64494ced1 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -19,7 +19,7 @@ #include #define INIT_MEMBLOCK_REGIONS 128 -#define MEMBLOCK_ERROR (~(phys_addr_t)0) +#define MEMBLOCK_ERROR 0 struct memblock_region { phys_addr_t base; diff --git a/mm/memblock.c b/mm/memblock.c index 85cfa1d3ab28..cb520df2a414 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -105,6 +105,12 @@ static phys_addr_t __init memblock_find_region(phys_addr_t start, phys_addr_t en phys_addr_t base, res_base; long j; + /* Prevent allocations returning 0 as it's also used to + * indicate an allocation failure + */ + if (start == 0) + start = PAGE_SIZE; + base = memblock_align_down((end - size), align); while (start <= base) { j = memblock_overlaps_region(&memblock.reserved, base, size); -- cgit v1.2.3 From f0b37fad9a63217c39997b2d2b31f44e3d8be727 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:28:21 +1000 Subject: memblock: Protect memblock.h with CONFIG_HAVE_MEMBLOCK This should make it easier to catch/debug incorrect use when the CONFIG_ option isn't set. Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index dfa64494ced1..c24b27849096 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -2,6 +2,7 @@ #define _LINUX_MEMBLOCK_H #ifdef __KERNEL__ +#ifdef CONFIG_HAVE_MEMBLOCK /* * Logical memory blocks. * @@ -148,6 +149,8 @@ static inline unsigned long memblock_region_pages(const struct memblock_region * region++) +#endif /* CONFIG_HAVE_MEMBLOCK */ + #endif /* __KERNEL__ */ #endif /* _LINUX_MEMBLOCK_H */ -- cgit v1.2.3 From 10d0643988e976360eb3497dcafb55b393b8e480 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:43:02 +1000 Subject: memblock: Option for the architecture to put memblock into the .init section Arch code can define ARCH_DISCARD_MEMBLOCK in asm/memblock.h, which in turns causes memblock code and data to go respectively into the .init and .initdata sections. This will be used by the x86 architecture. If ARCH_DISCARD_MEMBLOCK is defined, the debugfs files to inspect the memblock arrays after boot are not created. Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 8 ++++++++ mm/memblock.c | 48 ++++++++++++++++++++++++------------------------ 2 files changed, 32 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c24b27849096..3978e6a8e824 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -149,6 +149,14 @@ static inline unsigned long memblock_region_pages(const struct memblock_region * region++) +#ifdef ARCH_DISCARD_MEMBLOCK +#define __init_memblock __init +#define __initdata_memblock __initdata +#else +#define __init_memblock +#define __initdata_memblock +#endif + #endif /* CONFIG_HAVE_MEMBLOCK */ #endif /* __KERNEL__ */ diff --git a/mm/memblock.c b/mm/memblock.c index cb520df2a414..a17faea37d47 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,12 +20,12 @@ #include #include -struct memblock memblock; +struct memblock memblock __initdata_memblock; -int memblock_debug; -int memblock_can_resize; -static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1]; -static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1]; +int memblock_debug __initdata_memblock; +int memblock_can_resize __initdata_memblock; +static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock; /* inline so we don't get a warning when pr_debug is compiled out */ static inline const char *memblock_type_name(struct memblock_type *type) @@ -42,23 +42,23 @@ static inline const char *memblock_type_name(struct memblock_type *type) * Address comparison utilities */ -static phys_addr_t memblock_align_down(phys_addr_t addr, phys_addr_t size) +static phys_addr_t __init_memblock memblock_align_down(phys_addr_t addr, phys_addr_t size) { return addr & ~(size - 1); } -static phys_addr_t memblock_align_up(phys_addr_t addr, phys_addr_t size) +static phys_addr_t __init_memblock memblock_align_up(phys_addr_t addr, phys_addr_t size) { return (addr + (size - 1)) & ~(size - 1); } -static unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, +static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -static long memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1, +static long __init_memblock memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, phys_addr_t size2) { if (base2 == base1 + size1) @@ -69,7 +69,7 @@ static long memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1, return 0; } -static long memblock_regions_adjacent(struct memblock_type *type, +static long __init_memblock memblock_regions_adjacent(struct memblock_type *type, unsigned long r1, unsigned long r2) { phys_addr_t base1 = type->regions[r1].base; @@ -80,7 +80,7 @@ static long memblock_regions_adjacent(struct memblock_type *type, return memblock_addrs_adjacent(base1, size1, base2, size2); } -long memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { unsigned long i; @@ -162,7 +162,7 @@ static phys_addr_t __init memblock_find_base(phys_addr_t size, phys_addr_t align return MEMBLOCK_ERROR; } -static void memblock_remove_region(struct memblock_type *type, unsigned long r) +static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) { unsigned long i; @@ -174,7 +174,7 @@ static void memblock_remove_region(struct memblock_type *type, unsigned long r) } /* Assumption: base addr of region 1 < base addr of region 2 */ -static void memblock_coalesce_regions(struct memblock_type *type, +static void __init_memblock memblock_coalesce_regions(struct memblock_type *type, unsigned long r1, unsigned long r2) { type->regions[r1].size += type->regions[r2].size; @@ -184,7 +184,7 @@ static void memblock_coalesce_regions(struct memblock_type *type, /* Defined below but needed now */ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); -static int memblock_double_array(struct memblock_type *type) +static int __init_memblock memblock_double_array(struct memblock_type *type) { struct memblock_region *new_array, *old_array; phys_addr_t old_size, new_size, addr; @@ -255,13 +255,13 @@ static int memblock_double_array(struct memblock_type *type) return 0; } -extern int __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, +extern int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, phys_addr_t addr2, phys_addr_t size2) { return 1; } -static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +static long __init_memblock memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { unsigned long coalesced = 0; long adjacent, i; @@ -348,13 +348,13 @@ static long memblock_add_region(struct memblock_type *type, phys_addr_t base, ph return 0; } -long memblock_add(phys_addr_t base, phys_addr_t size) +long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) { return memblock_add_region(&memblock.memory, base, size); } -static long __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +static long __init_memblock __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { phys_addr_t rgnbegin, rgnend; phys_addr_t end = base + size; @@ -402,7 +402,7 @@ static long __memblock_remove(struct memblock_type *type, phys_addr_t base, phys return memblock_add_region(type, end, rgnend - end); } -long memblock_remove(phys_addr_t base, phys_addr_t size) +long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) { return __memblock_remove(&memblock.memory, base, size); } @@ -568,7 +568,7 @@ phys_addr_t __init memblock_phys_mem_size(void) return memblock.memory_size; } -phys_addr_t memblock_end_of_DRAM(void) +phys_addr_t __init_memblock memblock_end_of_DRAM(void) { int idx = memblock.memory.cnt - 1; @@ -655,7 +655,7 @@ int memblock_is_region_memory(phys_addr_t base, phys_addr_t size) memblock.reserved.regions[idx].size) >= (base + size); } -int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) +int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) { return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } @@ -666,7 +666,7 @@ void __init memblock_set_current_limit(phys_addr_t limit) memblock.current_limit = limit; } -static void memblock_dump(struct memblock_type *region, char *name) +static void __init_memblock memblock_dump(struct memblock_type *region, char *name) { unsigned long long base, size; int i; @@ -682,7 +682,7 @@ static void memblock_dump(struct memblock_type *region, char *name) } } -void memblock_dump_all(void) +void __init_memblock memblock_dump_all(void) { if (!memblock_debug) return; @@ -748,7 +748,7 @@ static int __init early_memblock(char *p) } early_param("memblock", early_memblock); -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) && !defined(ARCH_DISCARD_MEMBLOCK) static int memblock_debug_show(struct seq_file *m, void *private) { -- cgit v1.2.3 From 5303b68f57c227c27193a14e57dd12be27cd670f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 28 Jul 2010 15:38:40 +1000 Subject: memblock: Add memblock_find_in_range() This is a wrapper for memblock_find_base() using slightly different arguments (start,end instead of start,size for example) in order to make it easier to convert existing arch/x86 code. Signed-off-by: Yinghai Lu Signed-off-by: Benjamin Herrenschmidt --- include/linux/memblock.h | 2 ++ mm/memblock.c | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 3978e6a8e824..4df09bdcae42 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -47,6 +47,8 @@ extern int memblock_can_resize; #define memblock_dbg(fmt, ...) \ if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align); + extern void __init memblock_init(void); extern void __init memblock_analyze(void); extern long memblock_add(phys_addr_t base, phys_addr_t size); diff --git a/mm/memblock.c b/mm/memblock.c index a17faea37d47..b7ab10a2ef46 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -162,6 +162,14 @@ static phys_addr_t __init memblock_find_base(phys_addr_t size, phys_addr_t align return MEMBLOCK_ERROR; } +/* + * Find a free area with specified alignment in a specific range. + */ +u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 align) +{ + return memblock_find_base(size, align, start, end); +} + static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) { unsigned long i; -- cgit v1.2.3 From 18cb2aef91b37dbce2bec2f39bb1dddd0e9dd838 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 7 Aug 2010 03:26:23 +0900 Subject: percpu: handle __percpu notations in UP accessors UP accessors didn't take care of __percpu notations leading to a lot of spurious sparse warnings on UP configurations. Fix it. Signed-off-by: Namhyung Kim Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 15 ++++++++++----- include/linux/percpu.h | 2 +- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b5043a9890d8..08923b684768 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -70,11 +70,16 @@ extern void setup_per_cpu_areas(void); #else /* ! SMP */ -#define per_cpu(var, cpu) (*((void)(cpu), &(var))) -#define __get_cpu_var(var) (var) -#define __raw_get_cpu_var(var) (var) -#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) -#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) +#define VERIFY_PERCPU_PTR(__p) ({ \ + __verify_pcpu_ptr((__p)); \ + (typeof(*(__p)) __kernel __force *)(__p); \ +}) + +#define per_cpu(var, cpu) (*((void)(cpu), VERIFY_PERCPU_PTR(&(var)))) +#define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) +#define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) +#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) +#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) #endif /* SMP */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index b8b9084527b1..49466b13c5c6 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -149,7 +149,7 @@ extern void __init percpu_init_late(void); #else /* CONFIG_SMP */ -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) /* can't distinguish from other static vars, always false */ static inline bool is_kernel_percpu_address(unsigned long addr) -- cgit v1.2.3 From 1b5ad24878b7e5a543b98c5d2f8c0d8c0dd3088f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 7 Aug 2010 14:29:22 +0200 Subject: slub: add missing __percpu markup in mm/slub_def.h kmem_cache->cpu_slab is a percpu pointer but was missing __percpu markup. Add it. Signed-off-by: Namhyung Kim Acked-by: Tejun Heo Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 6447a723ecb1..5ec4bc0e45aa 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -68,7 +68,7 @@ struct kmem_cache_order_objects { * Slab cache management. */ struct kmem_cache { - struct kmem_cache_cpu *cpu_slab; + struct kmem_cache_cpu __percpu *cpu_slab; /* Used for retriving partial slabs etc */ unsigned long flags; int size; /* The size of an object including meta data */ -- cgit v1.2.3 From 73e4008ddddc84d5f2499c17012b340a0dae153e Mon Sep 17 00:00:00 2001 From: Nikolai Kondrashov Date: Fri, 6 Aug 2010 23:03:06 +0400 Subject: HID: allow resizing and replacing report descriptors Update hid_driver's report_fixup prototype to allow changing report descriptor size and/or returning completely different report descriptor. Update existing usage accordingly. This is to give more freedom in descriptor fixup and to allow having a whole fixed descriptor in the code for the sake of readability. Signed-off-by: Nikolai Kondrashov Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 7 ++++--- drivers/hid/hid-cherry.c | 7 ++++--- drivers/hid/hid-core.c | 2 +- drivers/hid/hid-cypress.c | 9 +++++---- drivers/hid/hid-elecom.c | 7 ++++--- drivers/hid/hid-kye.c | 7 ++++--- drivers/hid/hid-lg.c | 9 +++++---- drivers/hid/hid-microsoft.c | 7 ++++--- drivers/hid/hid-monterey.c | 7 ++++--- drivers/hid/hid-ortek.c | 7 ++++--- drivers/hid/hid-petalynx.c | 7 ++++--- drivers/hid/hid-prodikeys.c | 7 ++++--- drivers/hid/hid-samsung.c | 20 +++++++++++--------- drivers/hid/hid-sony.c | 7 ++++--- drivers/hid/hid-sunplus.c | 7 ++++--- drivers/hid/hid-zydacron.c | 7 ++++--- include/linux/hid.h | 4 ++-- 17 files changed, 72 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index bba05d0a8980..eaeca564a8d3 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -246,17 +246,18 @@ static int apple_event(struct hid_device *hdev, struct hid_field *field, /* * MacBook JIS keyboard has wrong logical maximum */ -static void apple_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *apple_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { struct apple_sc *asc = hid_get_drvdata(hdev); - if ((asc->quirks & APPLE_RDESC_JIS) && rsize >= 60 && + if ((asc->quirks & APPLE_RDESC_JIS) && *rsize >= 60 && rdesc[53] == 0x65 && rdesc[59] == 0x65) { dev_info(&hdev->dev, "fixing up MacBook JIS keyboard report " "descriptor\n"); rdesc[53] = rdesc[59] = 0xe7; } + return rdesc; } static void apple_setup_input(struct input_dev *input) diff --git a/drivers/hid/hid-cherry.c b/drivers/hid/hid-cherry.c index 24663a8717b1..e880086c2311 100644 --- a/drivers/hid/hid-cherry.c +++ b/drivers/hid/hid-cherry.c @@ -26,15 +26,16 @@ * Cherry Cymotion keyboard have an invalid HID report descriptor, * that needs fixing before we can parse it. */ -static void ch_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *ch_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 17 && rdesc[11] == 0x3c && rdesc[12] == 0x02) { + if (*rsize >= 17 && rdesc[11] == 0x3c && rdesc[12] == 0x02) { dev_info(&hdev->dev, "fixing up Cherry Cymotion report " "descriptor\n"); rdesc[11] = rdesc[16] = 0xff; rdesc[12] = rdesc[17] = 0x03; } + return rdesc; } #define ch_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e635199a0cd2..ec20e83cbd61 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -651,7 +651,7 @@ int hid_parse_report(struct hid_device *device, __u8 *start, }; if (device->driver->report_fixup) - device->driver->report_fixup(device, start, size); + start = device->driver->report_fixup(device, start, &size); device->rdesc = kmemdup(start, size, GFP_KERNEL); if (device->rdesc == NULL) diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c index 998b6f443d7d..4cd0e2345991 100644 --- a/drivers/hid/hid-cypress.c +++ b/drivers/hid/hid-cypress.c @@ -31,16 +31,16 @@ * Some USB barcode readers from cypress have usage min and usage max in * the wrong order */ -static void cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); unsigned int i; if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX)) - return; + return rdesc; - for (i = 0; i < rsize - 4; i++) + for (i = 0; i < *rsize - 4; i++) if (rdesc[i] == 0x29 && rdesc[i + 2] == 0x19) { __u8 tmp; @@ -50,6 +50,7 @@ static void cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[i + 3] = rdesc[i + 1]; rdesc[i + 1] = tmp; } + return rdesc; } static int cp_input_mapped(struct hid_device *hdev, struct hid_input *hi, diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c index 7a40878f46b4..6e31f305397d 100644 --- a/drivers/hid/hid-elecom.c +++ b/drivers/hid/hid-elecom.c @@ -20,14 +20,15 @@ #include "hid-ids.h" -static void elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) { + if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) { dev_info(&hdev->dev, "Fixing up Elecom BM084 " "report descriptor.\n"); rdesc[47] = 0x00; } + return rdesc; } static const struct hid_device_id elecom_devices[] = { diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c index f8871712b7b5..817247ee006c 100644 --- a/drivers/hid/hid-kye.c +++ b/drivers/hid/hid-kye.c @@ -23,10 +23,10 @@ * - report size 8 count 1 must be size 1 count 8 for button bitfield * - change the button usage range to 4-7 for the extra buttons */ -static void kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 74 && + if (*rsize >= 74 && rdesc[61] == 0x05 && rdesc[62] == 0x08 && rdesc[63] == 0x19 && rdesc[64] == 0x08 && rdesc[65] == 0x29 && rdesc[66] == 0x0f && @@ -40,6 +40,7 @@ static void kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[72] = 0x01; rdesc[74] = 0x08; } + return rdesc; } static const struct hid_device_id kye_devices[] = { diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index f6433d8050a9..68c0b68856c7 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -41,25 +41,26 @@ * above the logical maximum described in descriptor. This extends * the original value of 0x28c of logical maximum to 0x104d */ -static void lg_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *lg_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); - if ((quirks & LG_RDESC) && rsize >= 90 && rdesc[83] == 0x26 && + if ((quirks & LG_RDESC) && *rsize >= 90 && rdesc[83] == 0x26 && rdesc[84] == 0x8c && rdesc[85] == 0x02) { dev_info(&hdev->dev, "fixing up Logitech keyboard report " "descriptor\n"); rdesc[84] = rdesc[89] = 0x4d; rdesc[85] = rdesc[90] = 0x10; } - if ((quirks & LG_RDESC_REL_ABS) && rsize >= 50 && + if ((quirks & LG_RDESC_REL_ABS) && *rsize >= 50 && rdesc[32] == 0x81 && rdesc[33] == 0x06 && rdesc[49] == 0x81 && rdesc[50] == 0x06) { dev_info(&hdev->dev, "fixing up rel/abs in Logitech " "report descriptor\n"); rdesc[33] = rdesc[50] = 0x02; } + return rdesc; } #define lg_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c index 359cc447c6c6..dc618c33d0a2 100644 --- a/drivers/hid/hid-microsoft.c +++ b/drivers/hid/hid-microsoft.c @@ -33,18 +33,19 @@ * Microsoft Wireless Desktop Receiver (Model 1028) has * 'Usage Min/Max' where it ought to have 'Physical Min/Max' */ -static void ms_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *ms_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); - if ((quirks & MS_RDESC) && rsize == 571 && rdesc[557] == 0x19 && + if ((quirks & MS_RDESC) && *rsize == 571 && rdesc[557] == 0x19 && rdesc[559] == 0x29) { dev_info(&hdev->dev, "fixing up Microsoft Wireless Receiver " "Model 1028 report descriptor\n"); rdesc[557] = 0x35; rdesc[559] = 0x45; } + return rdesc; } #define ms_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ diff --git a/drivers/hid/hid-monterey.c b/drivers/hid/hid-monterey.c index 2cd05aa244b9..c95c31e2d869 100644 --- a/drivers/hid/hid-monterey.c +++ b/drivers/hid/hid-monterey.c @@ -22,14 +22,15 @@ #include "hid-ids.h" -static void mr_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *mr_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 30 && rdesc[29] == 0x05 && rdesc[30] == 0x09) { + if (*rsize >= 30 && rdesc[29] == 0x05 && rdesc[30] == 0x09) { dev_info(&hdev->dev, "fixing up button/consumer in HID report " "descriptor\n"); rdesc[30] = 0x0c; } + return rdesc; } #define mr_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ diff --git a/drivers/hid/hid-ortek.c b/drivers/hid/hid-ortek.c index aa9a960f73a4..2e79716dca31 100644 --- a/drivers/hid/hid-ortek.c +++ b/drivers/hid/hid-ortek.c @@ -19,14 +19,15 @@ #include "hid-ids.h" -static void ortek_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *ortek_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 56 && rdesc[54] == 0x25 && rdesc[55] == 0x01) { + if (*rsize >= 56 && rdesc[54] == 0x25 && rdesc[55] == 0x01) { dev_info(&hdev->dev, "Fixing up Ortek WKB-2000 " "report descriptor.\n"); rdesc[55] = 0x92; } + return rdesc; } static const struct hid_device_id ortek_devices[] = { diff --git a/drivers/hid/hid-petalynx.c b/drivers/hid/hid-petalynx.c index 500fbd0652dc..308d6ae48a3e 100644 --- a/drivers/hid/hid-petalynx.c +++ b/drivers/hid/hid-petalynx.c @@ -23,10 +23,10 @@ #include "hid-ids.h" /* Petalynx Maxter Remote has maximum for consumer page set too low */ -static void pl_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *pl_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 60 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 && + if (*rsize >= 60 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 && rdesc[41] == 0x00 && rdesc[59] == 0x26 && rdesc[60] == 0xf9 && rdesc[61] == 0x00) { dev_info(&hdev->dev, "fixing up Petalynx Maxter Remote report " @@ -34,6 +34,7 @@ static void pl_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[60] = 0xfa; rdesc[40] = 0xfa; } + return rdesc; } #define pl_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c index 845f428b8090..48eab84f53b5 100644 --- a/drivers/hid/hid-prodikeys.c +++ b/drivers/hid/hid-prodikeys.c @@ -740,10 +740,10 @@ int pcmidi_snd_terminate(struct pcmidi_snd *pm) /* * PC-MIDI report descriptor for report id is wrong. */ -static void pk_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *pk_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize == 178 && + if (*rsize == 178 && rdesc[111] == 0x06 && rdesc[112] == 0x00 && rdesc[113] == 0xff) { dev_info(&hdev->dev, "fixing up pc-midi keyboard report " @@ -751,6 +751,7 @@ static void pk_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[144] = 0x18; /* report 4: was 0x10 report count */ } + return rdesc; } static int pk_input_mapping(struct hid_device *hdev, struct hid_input *hi, diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c index bda0fd60c98d..35894444e000 100644 --- a/drivers/hid/hid-samsung.c +++ b/drivers/hid/hid-samsung.c @@ -61,10 +61,10 @@ static inline void samsung_irda_dev_trace(struct hid_device *hdev, "descriptor\n", rsize); } -static void samsung_irda_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *samsung_irda_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize == 184 && rdesc[175] == 0x25 && rdesc[176] == 0x40 && + if (*rsize == 184 && rdesc[175] == 0x25 && rdesc[176] == 0x40 && rdesc[177] == 0x75 && rdesc[178] == 0x30 && rdesc[179] == 0x95 && rdesc[180] == 0x01 && rdesc[182] == 0x40) { @@ -74,24 +74,25 @@ static void samsung_irda_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[180] = 0x06; rdesc[182] = 0x42; } else - if (rsize == 203 && rdesc[192] == 0x15 && rdesc[193] == 0x0 && + if (*rsize == 203 && rdesc[192] == 0x15 && rdesc[193] == 0x0 && rdesc[194] == 0x25 && rdesc[195] == 0x12) { samsung_irda_dev_trace(hdev, 203); rdesc[193] = 0x1; rdesc[195] = 0xf; } else - if (rsize == 135 && rdesc[124] == 0x15 && rdesc[125] == 0x0 && + if (*rsize == 135 && rdesc[124] == 0x15 && rdesc[125] == 0x0 && rdesc[126] == 0x25 && rdesc[127] == 0x11) { samsung_irda_dev_trace(hdev, 135); rdesc[125] = 0x1; rdesc[127] = 0xe; } else - if (rsize == 171 && rdesc[160] == 0x15 && rdesc[161] == 0x0 && + if (*rsize == 171 && rdesc[160] == 0x15 && rdesc[161] == 0x0 && rdesc[162] == 0x25 && rdesc[163] == 0x01) { samsung_irda_dev_trace(hdev, 171); rdesc[161] = 0x1; rdesc[163] = 0x3; } + return rdesc; } #define samsung_kbd_mouse_map_key_clear(c) \ @@ -130,11 +131,12 @@ static int samsung_kbd_mouse_input_mapping(struct hid_device *hdev, return 1; } -static void samsung_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *samsung_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { if (USB_DEVICE_ID_SAMSUNG_IR_REMOTE == hdev->product) - samsung_irda_report_fixup(hdev, rdesc, rsize); + rdesc = samsung_irda_report_fixup(hdev, rdesc, rsize); + return rdesc; } static int samsung_input_mapping(struct hid_device *hdev, struct hid_input *hi, diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 402d5574b574..9fa034915185 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -31,17 +31,18 @@ struct sony_sc { }; /* Sony Vaio VGX has wrongly mouse pointer declared as constant */ -static void sony_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *sony_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { struct sony_sc *sc = hid_get_drvdata(hdev); if ((sc->quirks & VAIO_RDESC_CONSTANT) && - rsize >= 56 && rdesc[54] == 0x81 && rdesc[55] == 0x07) { + *rsize >= 56 && rdesc[54] == 0x81 && rdesc[55] == 0x07) { dev_info(&hdev->dev, "Fixing up Sony Vaio VGX report " "descriptor\n"); rdesc[55] = 0x06; } + return rdesc; } /* diff --git a/drivers/hid/hid-sunplus.c b/drivers/hid/hid-sunplus.c index 438107d9f1b2..164ed568f6cf 100644 --- a/drivers/hid/hid-sunplus.c +++ b/drivers/hid/hid-sunplus.c @@ -22,16 +22,17 @@ #include "hid-ids.h" -static void sp_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *sp_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 107 && rdesc[104] == 0x26 && rdesc[105] == 0x80 && + if (*rsize >= 107 && rdesc[104] == 0x26 && rdesc[105] == 0x80 && rdesc[106] == 0x03) { dev_info(&hdev->dev, "fixing up Sunplus Wireless Desktop " "report descriptor\n"); rdesc[105] = rdesc[110] = 0x03; rdesc[106] = rdesc[111] = 0x21; } + return rdesc; } #define sp_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ diff --git a/drivers/hid/hid-zydacron.c b/drivers/hid/hid-zydacron.c index 9e8d35a203e4..aac1f9273149 100644 --- a/drivers/hid/hid-zydacron.c +++ b/drivers/hid/hid-zydacron.c @@ -27,10 +27,10 @@ struct zc_device { * Zydacron remote control has an invalid HID report descriptor, * that needs fixing before we can parse it. */ -static void zc_report_fixup(struct hid_device *hdev, __u8 *rdesc, - unsigned int rsize) +static __u8 *zc_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) { - if (rsize >= 253 && + if (*rsize >= 253 && rdesc[0x96] == 0xbc && rdesc[0x97] == 0xff && rdesc[0xca] == 0xbc && rdesc[0xcb] == 0xff && rdesc[0xe1] == 0xbc && rdesc[0xe2] == 0xff) { @@ -40,6 +40,7 @@ static void zc_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[0x96] = rdesc[0xca] = rdesc[0xe1] = 0x0c; rdesc[0x97] = rdesc[0xcb] = rdesc[0xe2] = 0x00; } + return rdesc; } #define zc_map_key_clear(c) \ diff --git a/include/linux/hid.h b/include/linux/hid.h index 42a0f1d11365..0a34fb071379 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -626,8 +626,8 @@ struct hid_driver { int (*event)(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value); - void (*report_fixup)(struct hid_device *hdev, __u8 *buf, - unsigned int size); + __u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf, + unsigned int *size); int (*input_mapping)(struct hid_device *hdev, struct hid_input *hidinput, struct hid_field *field, -- cgit v1.2.3 From 92298e668372f2f6c8a79fb272f13d65161a4876 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 13 Aug 2010 10:22:17 +1000 Subject: PCI: provide stub pci_domain_nr function for !CONFIG_PCI configs Allows the new PCI domain aware DRM code to compile on m68k. Reported-by: Geert Uytterhoeven Signed-off-by: Dave Airlie Signed-off-by: Jesse Barnes --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index b1d17956a153..c8d95e369ff4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1214,6 +1214,9 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn) { return NULL; } +static inline int pci_domain_nr(struct pci_bus *bus) +{ return 0; } + #define dev_is_pci(d) (false) #define dev_is_pf(d) (false) #define dev_num_vf(d) (0) -- cgit v1.2.3 From c84e032e145775032fa9078b55e6333dd866603b Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Sat, 14 Aug 2010 09:43:22 -0700 Subject: include/video/vga.h: update web address. The below updates a broken web address to one(hopefully) thats the new correct address. Signed-off-by: Justin P. Mattock Signed-off-by: Jiri Kosina --- include/video/vga.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/video/vga.h b/include/video/vga.h index b49a5120ca2d..2b8691f7d256 100644 --- a/include/video/vga.h +++ b/include/video/vga.h @@ -5,7 +5,7 @@ * * Copyright history from vga16fb.c: * Copyright 1999 Ben Pfaff and Petr Vandrovec - * Based on VGA info at http://www.goodnet.com/~tinara/FreeVGA/home.htm + * Based on VGA info at http://www.osdever.net/FreeVGA/home.htm * Based on VESA framebuffer (c) 1998 Gerd Knorr * * This file is subject to the terms and conditions of the GNU General -- cgit v1.2.3 From 4e6cbfd09c66893e5134c9896e9af353c2322b66 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Thu, 29 Jul 2010 16:14:13 -0400 Subject: mac80211: support use of NAPI for bottom-half processing This patch implement basic infrastructure to support use of NAPI by mac80211-based hardware drivers. Because mac80211 devices can support multiple netdevs, a dummy netdev is used for interfacing with the NAPI code in the core of the network stack. That structure is hidden from the hardware drivers, but the actual napi_struct is exposed in the ieee80211_hw structure so that the poll routines in drivers can retrieve that structure. Hardware drivers can also specify their own weight value for NAPI polling. Signed-off-by: John W. Linville --- include/net/mac80211.h | 24 ++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 5 +++++ net/mac80211/iface.c | 4 ++++ net/mac80211/main.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b0787a1dea90..3f1e03b521ec 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1102,6 +1102,10 @@ enum ieee80211_hw_flags { * * @max_rates: maximum number of alternate rate retry stages * @max_rate_tries: maximum number of tries for each stage + * + * @napi_weight: weight used for NAPI polling. You must specify an + * appropriate value here if a napi_poll operation is provided + * by your driver. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -1113,6 +1117,7 @@ struct ieee80211_hw { int channel_change_time; int vif_data_size; int sta_data_size; + int napi_weight; u16 queues; u16 max_listen_interval; s8 max_signal; @@ -1687,6 +1692,8 @@ enum ieee80211_ampdu_mlme_action { * switch operation for CSAs received from the AP may implement this * callback. They must then call ieee80211_chswitch_done() to indicate * completion of the channel switch. + * + * @napi_poll: Poll Rx queue for incoming data frames. */ struct ieee80211_ops { int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -1752,6 +1759,7 @@ struct ieee80211_ops { void (*flush)(struct ieee80211_hw *hw, bool drop); void (*channel_switch)(struct ieee80211_hw *hw, struct ieee80211_channel_switch *ch_switch); + int (*napi_poll)(struct ieee80211_hw *hw, int budget); }; /** @@ -1897,6 +1905,22 @@ void ieee80211_free_hw(struct ieee80211_hw *hw); */ void ieee80211_restart_hw(struct ieee80211_hw *hw); +/** ieee80211_napi_schedule - schedule NAPI poll + * + * Use this function to schedule NAPI polling on a device. + * + * @hw: the hardware to start polling + */ +void ieee80211_napi_schedule(struct ieee80211_hw *hw); + +/** ieee80211_napi_complete - complete NAPI polling + * + * Use this function to finish NAPI polling on a device. + * + * @hw: the hardware to stop polling + */ +void ieee80211_napi_complete(struct ieee80211_hw *hw); + /** * ieee80211_rx - receive frame * diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 65e0ed6c2975..79d56454484a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -870,6 +870,11 @@ struct ieee80211_local { struct dentry *keys; } debugfs; #endif + + /* dummy netdev for use w/ NAPI */ + struct net_device napi_dev; + + struct napi_struct napi; }; static inline struct ieee80211_sub_if_data * diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ebbe264e2b0b..c1008a9d7bfb 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -187,6 +187,8 @@ static int ieee80211_open(struct net_device *dev) res = drv_start(local); if (res) goto err_del_bss; + if (local->ops->napi_poll) + napi_enable(&local->napi); /* we're brought up, everything changes */ hw_reconf_flags = ~0; ieee80211_led_radio(local, true); @@ -519,6 +521,8 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_recalc_ps(local, -1); if (local->open_count == 0) { + if (local->ops->napi_poll) + napi_disable(&local->napi); ieee80211_clear_tx_pending(local); ieee80211_stop_device(local); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 798a91b100cc..1ed956c9cb8b 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -390,6 +390,30 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, } #endif +static int ieee80211_napi_poll(struct napi_struct *napi, int budget) +{ + struct ieee80211_local *local = + container_of(napi, struct ieee80211_local, napi); + + return local->ops->napi_poll(&local->hw, budget); +} + +void ieee80211_napi_schedule(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + napi_schedule(&local->napi); +} +EXPORT_SYMBOL(ieee80211_napi_schedule); + +void ieee80211_napi_complete(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + napi_complete(&local->napi); +} +EXPORT_SYMBOL(ieee80211_napi_complete); + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -494,6 +518,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, skb_queue_head_init(&local->skb_queue); skb_queue_head_init(&local->skb_queue_unreliable); + /* init dummy netdev for use w/ NAPI */ + init_dummy_netdev(&local->napi_dev); + return local_to_hw(local); } EXPORT_SYMBOL(ieee80211_alloc_hw); @@ -683,6 +710,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_ifa; #endif + netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll, + local->hw.napi_weight); + return 0; #ifdef CONFIG_INET -- cgit v1.2.3 From 7da7cc1d42d8ce02cca16df8c021e6d657f1f8fd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:02:38 +0200 Subject: mac80211: per interface idle notification Sometimes we don't just need to know whether or not the device is idle, but also per interface. This adds that reporting capability to mac80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++++ net/mac80211/ibss.c | 8 +++++-- net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/iface.c | 53 +++++++++++++++++++++++++++++++++++++++------- net/mac80211/mlme.c | 17 +++++++++++++-- net/mac80211/scan.c | 2 ++ net/mac80211/work.c | 8 +++---- 7 files changed, 81 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3f1e03b521ec..3a3c26f647b7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -149,6 +149,7 @@ struct ieee80211_low_level_stats { * @BSS_CHANGED_ARP_FILTER: Hardware ARP filter address list or state changed. * @BSS_CHANGED_QOS: QoS for this association was enabled/disabled. Note * that it is only ever disabled for station mode. + * @BSS_CHANGED_IDLE: Idle changed for this BSS/interface. */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -165,6 +166,7 @@ enum ieee80211_bss_change { BSS_CHANGED_IBSS = 1<<11, BSS_CHANGED_ARP_FILTER = 1<<12, BSS_CHANGED_QOS = 1<<13, + BSS_CHANGED_IDLE = 1<<14, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -223,6 +225,9 @@ enum ieee80211_bss_change { * hardware must not perform any ARP filtering. Note, that the filter will * be enabled also in promiscuous mode. * @qos: This is a QoS-enabled BSS. + * @idle: This interface is idle. There's also a global idle flag in the + * hardware config which may be more appropriate depending on what + * your driver/device needs to do. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -247,6 +252,7 @@ struct ieee80211_bss_conf { u8 arp_addr_cnt; bool arp_filter_enabled; bool qos; + bool idle; }; /** diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index c691780725a7..32af97108425 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -920,12 +920,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN); sdata->u.ibss.ssid_len = params->ssid_len; + mutex_unlock(&sdata->u.ibss.mtx); + + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); ieee80211_queue_work(&sdata->local->hw, &sdata->work); - mutex_unlock(&sdata->u.ibss.mtx); - return 0; } @@ -980,7 +982,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) mutex_unlock(&sdata->u.ibss.mtx); + mutex_lock(&local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&local->mtx); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b44e03a02da9..98e783c6a363 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -497,6 +497,9 @@ struct ieee80211_sub_if_data { */ bool ht_opmode_valid; + /* to detect idle changes */ + bool old_idle; + /* Fragment table for host-based reassembly */ struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; unsigned int fragment_next; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index c1008a9d7bfb..9459aeee0ddc 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -309,7 +309,9 @@ static int ieee80211_open(struct net_device *dev) if (sdata->flags & IEEE80211_SDATA_PROMISC) atomic_inc(&local->iff_promiscs); + mutex_lock(&local->mtx); hw_reconf_flags |= __ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); local->open_count++; if (hw_reconf_flags) { @@ -516,7 +518,9 @@ static int ieee80211_stop(struct net_device *dev) sdata->bss = NULL; + mutex_lock(&local->mtx); hw_reconf_flags |= __ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); ieee80211_recalc_ps(local, -1); @@ -1199,28 +1203,61 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; int count = 0; + bool working = false, scanning = false; + struct ieee80211_work *wk; - if (!list_empty(&local->work_list)) - return ieee80211_idle_off(local, "working"); - - if (local->scanning) - return ieee80211_idle_off(local, "scanning"); +#ifdef CONFIG_PROVE_LOCKING + WARN_ON(debug_locks && !lockdep_rtnl_is_held() && + !lockdep_is_held(&local->iflist_mtx)); +#endif + lockdep_assert_held(&local->mtx); list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) + if (!ieee80211_sdata_running(sdata)) { + sdata->vif.bss_conf.idle = true; continue; + } + + sdata->old_idle = sdata->vif.bss_conf.idle; + /* do not count disabled managed interfaces */ if (sdata->vif.type == NL80211_IFTYPE_STATION && - !sdata->u.mgd.associated) + !sdata->u.mgd.associated) { + sdata->vif.bss_conf.idle = true; continue; + } /* do not count unused IBSS interfaces */ if (sdata->vif.type == NL80211_IFTYPE_ADHOC && - !sdata->u.ibss.ssid_len) + !sdata->u.ibss.ssid_len) { + sdata->vif.bss_conf.idle = true; continue; + } /* count everything else */ count++; } + list_for_each_entry(wk, &local->work_list, list) { + working = true; + wk->sdata->vif.bss_conf.idle = false; + } + + if (local->scan_sdata) { + scanning = true; + local->scan_sdata->vif.bss_conf.idle = false; + } + + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->old_idle == sdata->vif.bss_conf.idle) + continue; + if (!ieee80211_sdata_running(sdata)) + continue; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + } + + if (working) + return ieee80211_idle_off(local, "working"); + if (scanning) + return ieee80211_idle_off(local, "scanning"); if (!count) return ieee80211_idle_on(local); else diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 17e9257a61d8..82e7cec5179c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1103,8 +1103,11 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); ieee80211_set_disassoc(sdata, true); - ieee80211_recalc_idle(local); mutex_unlock(&ifmgd->mtx); + + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); /* * must be outside lock due to cfg80211, * but that's not a problem. @@ -1173,7 +1176,9 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, sdata->name, bssid, reason_code); ieee80211_set_disassoc(sdata, true); + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); return RX_MGMT_CFG80211_DEAUTH; } @@ -1203,7 +1208,9 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, sdata->name, mgmt->sa, reason_code); ieee80211_set_disassoc(sdata, true); + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); return RX_MGMT_CFG80211_DISASSOC; } @@ -1840,8 +1847,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) " after %dms, disconnecting.\n", bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); ieee80211_set_disassoc(sdata, true); - ieee80211_recalc_idle(local); mutex_unlock(&ifmgd->mtx); + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); /* * must be outside lock due to cfg80211, * but that's not a problem. @@ -2319,7 +2328,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, if (assoc_bss) sta_info_destroy_addr(sdata, bssid); + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); return 0; } @@ -2357,7 +2368,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, cookie, !req->local_state_change); sta_info_destroy_addr(sdata, bssid); + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); return 0; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index f31f549733b1..31f233f7f51a 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -304,7 +304,9 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) ieee80211_offchannel_return(local, true); done: + mutex_lock(&local->mtx); ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); ieee80211_mesh_notify_scan_completed(local); diff --git a/net/mac80211/work.c b/net/mac80211/work.c index b98af64f5862..ae344d1ba056 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -888,10 +888,10 @@ static void ieee80211_work_work(struct work_struct *work) while ((skb = skb_dequeue(&local->work_skb_queue))) ieee80211_work_rx_queued_mgmt(local, skb); - ieee80211_recalc_idle(local); - mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + list_for_each_entry_safe(wk, tmp, &local->work_list, list) { bool started = wk->started; @@ -1001,10 +1001,10 @@ static void ieee80211_work_work(struct work_struct *work) &local->scan_work, round_jiffies_relative(0)); - mutex_unlock(&local->mtx); - ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); + list_for_each_entry_safe(wk, tmp, &free_work, list) { wk->done(wk, NULL); list_del(&wk->list); -- cgit v1.2.3 From d1f5b7a34aa5ff703c4966ea2652d4212ac75940 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:05:55 +0200 Subject: mac80211: allow drivers to request SM PS mode change Sometimes drivers have more information than the stack about how their antennas/chains are used, and may require that the SM PS mode be changed. This could happen, for example, when detecting that the user disconnected an antenna. Thus this patch introduces API to allow drivers to request SM PS mode changes. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 12 ++++++++++++ net/mac80211/ht.c | 28 ++++++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 6 +++++- net/mac80211/mlme.c | 3 +++ 4 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3a3c26f647b7..871ed1de736a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2548,6 +2548,18 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, */ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); +/** + * ieee80211_request_smps - request SM PS transition + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @mode: new SM PS mode + * + * This allows the driver to request an SM PS transition in managed + * mode. This is useful when the driver has more information than + * the stack about possible interference, for example by bluetooth. + */ +void ieee80211_request_smps(struct ieee80211_vif *vif, + enum ieee80211_smps_mode smps_mode); + /* Rate control API */ /** diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 9d101fb33861..11f74f5f7b2f 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -265,3 +265,31 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, return 0; } + +void ieee80211_request_smps_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + u.mgd.request_smps_work); + + mutex_lock(&sdata->u.mgd.mtx); + __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode); + mutex_unlock(&sdata->u.mgd.mtx); +} + +void ieee80211_request_smps(struct ieee80211_vif *vif, + enum ieee80211_smps_mode smps_mode) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) + return; + + if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF)) + smps_mode = IEEE80211_SMPS_AUTOMATIC; + + ieee80211_queue_work(&sdata->local->hw, + &sdata->u.mgd.request_smps_work); +} +/* this might change ... don't want non-open drivers using it */ +EXPORT_SYMBOL_GPL(ieee80211_request_smps); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 98e783c6a363..1bf05bfd149d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -343,7 +343,10 @@ struct ieee80211_if_managed { unsigned long timers_running; /* used for quiesce/restart */ bool powersave; /* powersave requested for this iface */ enum ieee80211_smps_mode req_smps, /* requested smps mode */ - ap_smps; /* smps mode AP thinks we're in */ + ap_smps, /* smps mode AP thinks we're in */ + driver_smps_mode; /* smps mode request */ + + struct work_struct request_smps_work; unsigned int flags; @@ -1113,6 +1116,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, const u8 *bssid); +void ieee80211_request_smps_work(struct work_struct *work); void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 82e7cec5179c..38996a44aa8e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1926,6 +1926,8 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) * time -- the code here is properly synchronised. */ + cancel_work_sync(&ifmgd->request_smps_work); + cancel_work_sync(&ifmgd->beacon_connection_loss_work); if (del_timer_sync(&ifmgd->timer)) set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); @@ -1961,6 +1963,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); INIT_WORK(&ifmgd->beacon_connection_loss_work, ieee80211_beacon_connection_loss_work); + INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_work); setup_timer(&ifmgd->timer, ieee80211_sta_timer, (unsigned long) sdata); setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, -- cgit v1.2.3 From 04600794958f1833f5571c6cde40f260ab557f55 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:45:15 +0200 Subject: cfg80211: support sysfs namespaces Enable using network namespaces with wireless devices even when sysfs is enabled using the same infrastructure that was built for netdevs. Signed-off-by: Johannes Berg Acked-by: "Eric W. Biederman" Signed-off-by: John W. Linville --- include/linux/netdevice.h | 2 ++ net/core/net-sysfs.c | 3 ++- net/wireless/core.c | 7 ++++++- net/wireless/sysfs.c | 9 +++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46c36ffe20ee..a4b14fd81c6a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2171,6 +2171,8 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern int netdev_class_create_file(struct class_attribute *class_attr); extern void netdev_class_remove_file(struct class_attribute *class_attr); +extern struct kobj_ns_type_operations net_ns_type_operations; + extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len); extern void linkwatch_run_queue(void); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index af4dfbadf2a0..7d748542d97e 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -789,12 +789,13 @@ static const void *net_netlink_ns(struct sock *sk) return sock_net(sk); } -static struct kobj_ns_type_operations net_ns_type_operations = { +struct kobj_ns_type_operations net_ns_type_operations = { .type = KOBJ_NS_TYPE_NET, .current_ns = net_current_ns, .netlink_ns = net_netlink_ns, .initial_ns = net_initial_ns, }; +EXPORT_SYMBOL_GPL(net_ns_type_operations); static void net_kobj_ns_exit(struct net *net) { diff --git a/net/wireless/core.c b/net/wireless/core.c index 541e2fff5e9c..c70909c3eae4 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -253,11 +253,16 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, WARN_ON(err); wdev->netdev->features |= NETIF_F_NETNS_LOCAL; } + + return err; } wiphy_net_set(&rdev->wiphy, net); - return err; + err = device_rename(&rdev->wiphy.dev, dev_name(&rdev->wiphy.dev)); + WARN_ON(err); + + return 0; } static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 9f2cef3e0ca0..74a9e3cce452 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -110,6 +110,13 @@ static int wiphy_resume(struct device *dev) return ret; } +static const void *wiphy_namespace(struct device *d) +{ + struct wiphy *wiphy = container_of(d, struct wiphy, dev); + + return wiphy_net(wiphy); +} + struct class ieee80211_class = { .name = "ieee80211", .owner = THIS_MODULE, @@ -120,6 +127,8 @@ struct class ieee80211_class = { #endif .suspend = wiphy_suspend, .resume = wiphy_resume, + .ns_type = &net_ns_type_operations, + .namespace = wiphy_namespace, }; int wiphy_sysfs_init(void) -- cgit v1.2.3 From 97359d1235eaf634fe706c9faa6e40181cc95fb8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Aug 2010 09:46:38 +0200 Subject: mac80211: use cipher suite selectors Currently, mac80211 translates the cfg80211 cipher suite selectors into ALG_* values. That isn't all too useful, and some drivers benefit from the distinction between WEP40 and WEP104 as well. Therefore, convert it all to use the cipher suite selectors. Signed-off-by: Johannes Berg Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville --- drivers/net/wireless/at76c50x-usb.c | 7 ++-- drivers/net/wireless/ath/ar9170/main.c | 31 ++++++++------- drivers/net/wireless/ath/ath5k/base.c | 9 +++-- drivers/net/wireless/ath/ath5k/pcu.c | 19 ++++----- drivers/net/wireless/ath/ath9k/common.c | 36 ++++++++++-------- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 5 ++- drivers/net/wireless/ath/ath9k/main.c | 5 ++- drivers/net/wireless/b43/main.c | 16 ++++---- drivers/net/wireless/iwlwifi/iwl-agn-tx.c | 16 ++++---- drivers/net/wireless/iwlwifi/iwl-agn.c | 4 +- drivers/net/wireless/iwlwifi/iwl-dev.h | 2 +- drivers/net/wireless/iwlwifi/iwl-sta.c | 24 ++++++------ drivers/net/wireless/iwlwifi/iwl3945-base.c | 43 ++++++++++++--------- drivers/net/wireless/p54/main.c | 9 +++-- drivers/net/wireless/p54/txrx.c | 17 +++++---- drivers/net/wireless/rt2x00/rt2500usb.c | 4 +- drivers/net/wireless/rt2x00/rt2x00crypto.c | 17 ++++----- drivers/net/wireless/wl12xx/wl1251_main.c | 13 ++++--- drivers/net/wireless/wl12xx/wl1251_tx.c | 4 +- drivers/net/wireless/wl12xx/wl1271_main.c | 13 ++++--- drivers/net/wireless/wl12xx/wl1271_tx.c | 4 +- include/net/mac80211.h | 18 +-------- net/mac80211/cfg.c | 44 ++++++--------------- net/mac80211/debugfs_key.c | 55 +++++++++++---------------- net/mac80211/driver-trace.h | 4 +- net/mac80211/key.c | 25 ++++++------ net/mac80211/key.h | 4 +- net/mac80211/rx.c | 18 +++++---- net/mac80211/tx.c | 22 ++++++----- net/mac80211/wep.c | 2 +- net/mac80211/wpa.c | 6 +-- 31 files changed, 236 insertions(+), 260 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c index d5140a87f073..a267bf55574c 100644 --- a/drivers/net/wireless/at76c50x-usb.c +++ b/drivers/net/wireless/at76c50x-usb.c @@ -2061,11 +2061,12 @@ static int at76_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, int i; - at76_dbg(DBG_MAC80211, "%s(): cmd %d key->alg %d key->keyidx %d " + at76_dbg(DBG_MAC80211, "%s(): cmd %d key->cipher %d key->keyidx %d " "key->keylen %d", - __func__, cmd, key->alg, key->keyidx, key->keylen); + __func__, cmd, key->cipher, key->keyidx, key->keylen); - if (key->alg != ALG_WEP) + if ((key->cipher != WLAN_CIPHER_SUITE_WEP40) && + (key->cipher != WLAN_CIPHER_SUITE_WEP104)) return -EOPNOTSUPP; key->hw_key_idx = key->keyidx; diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c index c67b05f3bcbd..29d2253ad7e4 100644 --- a/drivers/net/wireless/ath/ar9170/main.c +++ b/drivers/net/wireless/ath/ar9170/main.c @@ -1190,14 +1190,13 @@ static int ar9170_tx_prepare(struct ar9170 *ar, struct sk_buff *skb) if (info->control.hw_key) { icv = info->control.hw_key->icv_len; - switch (info->control.hw_key->alg) { - case ALG_WEP: + switch (info->control.hw_key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + case WLAN_CIPHER_SUITE_TKIP: keytype = AR9170_TX_MAC_ENCR_RC4; break; - case ALG_TKIP: - keytype = AR9170_TX_MAC_ENCR_RC4; - break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: keytype = AR9170_TX_MAC_ENCR_AES; break; default: @@ -1778,17 +1777,17 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if ((!ar->vif) || (ar->disable_offload)) return -EOPNOTSUPP; - switch (key->alg) { - case ALG_WEP: - if (key->keylen == WLAN_KEY_LEN_WEP40) - ktype = AR9170_ENC_ALG_WEP64; - else - ktype = AR9170_ENC_ALG_WEP128; + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + ktype = AR9170_ENC_ALG_WEP64; + break; + case WLAN_CIPHER_SUITE_WEP104: + ktype = AR9170_ENC_ALG_WEP128; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: ktype = AR9170_ENC_ALG_TKIP; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: ktype = AR9170_ENC_ALG_AESCCMP; break; default: @@ -1827,7 +1826,7 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (err) goto out; - if (key->alg == ALG_TKIP) { + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { err = ar9170_upload_key(ar, i, sta ? sta->addr : NULL, ktype, 1, key->key + 16, 16); if (err) @@ -1864,7 +1863,7 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (err) goto out; - if (key->alg == ALG_TKIP) { + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { err = ar9170_upload_key(ar, key->hw_key_idx, NULL, AR9170_ENC_ALG_NONE, 1, diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 373dcfec689c..a729b8774670 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -3297,11 +3297,12 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (sc->opmode == NL80211_IFTYPE_AP) return -EOPNOTSUPP; - switch (key->alg) { - case ALG_WEP: - case ALG_TKIP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + case WLAN_CIPHER_SUITE_TKIP: break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: if (sc->ah->ah_aes_support) break; diff --git a/drivers/net/wireless/ath/ath5k/pcu.c b/drivers/net/wireless/ath/ath5k/pcu.c index 86fdb6ddfaaa..af273358c7cb 100644 --- a/drivers/net/wireless/ath/ath5k/pcu.c +++ b/drivers/net/wireless/ath/ath5k/pcu.c @@ -695,21 +695,18 @@ int ath5k_hw_reset_key(struct ath5k_hw *ah, u16 entry) static int ath5k_keycache_type(const struct ieee80211_key_conf *key) { - switch (key->alg) { - case ALG_TKIP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_TKIP: return AR5K_KEYTABLE_TYPE_TKIP; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: return AR5K_KEYTABLE_TYPE_CCM; - case ALG_WEP: - if (key->keylen == WLAN_KEY_LEN_WEP40) - return AR5K_KEYTABLE_TYPE_40; - else if (key->keylen == WLAN_KEY_LEN_WEP104) - return AR5K_KEYTABLE_TYPE_104; - return -EINVAL; + case WLAN_CIPHER_SUITE_WEP40: + return AR5K_KEYTABLE_TYPE_40; + case WLAN_CIPHER_SUITE_WEP104: + return AR5K_KEYTABLE_TYPE_104; default: return -EINVAL; } - return -EINVAL; } /* @@ -728,7 +725,7 @@ int ath5k_hw_set_key(struct ath5k_hw *ah, u16 entry, bool is_tkip; const u8 *key_ptr; - is_tkip = (key->alg == ALG_TKIP); + is_tkip = (key->cipher == WLAN_CIPHER_SUITE_TKIP); /* * key->keylen comes in from mac80211 in bytes. diff --git a/drivers/net/wireless/ath/ath9k/common.c b/drivers/net/wireless/ath/ath9k/common.c index c86f7d3593ab..3100c87a4fcd 100644 --- a/drivers/net/wireless/ath/ath9k/common.c +++ b/drivers/net/wireless/ath/ath9k/common.c @@ -46,12 +46,17 @@ int ath9k_cmn_get_hw_crypto_keytype(struct sk_buff *skb) struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); if (tx_info->control.hw_key) { - if (tx_info->control.hw_key->alg == ALG_WEP) + switch (tx_info->control.hw_key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: return ATH9K_KEY_TYPE_WEP; - else if (tx_info->control.hw_key->alg == ALG_TKIP) + case WLAN_CIPHER_SUITE_TKIP: return ATH9K_KEY_TYPE_TKIP; - else if (tx_info->control.hw_key->alg == ALG_CCMP) + case WLAN_CIPHER_SUITE_CCMP: return ATH9K_KEY_TYPE_AES; + default: + break; + } } return ATH9K_KEY_TYPE_CLEAR; @@ -212,11 +217,11 @@ static int ath_reserve_key_cache_slot_tkip(struct ath_common *common) } static int ath_reserve_key_cache_slot(struct ath_common *common, - enum ieee80211_key_alg alg) + u32 cipher) { int i; - if (alg == ALG_TKIP) + if (cipher == WLAN_CIPHER_SUITE_TKIP) return ath_reserve_key_cache_slot_tkip(common); /* First, try to find slots that would not be available for TKIP. */ @@ -293,14 +298,15 @@ int ath9k_cmn_key_config(struct ath_common *common, memset(&hk, 0, sizeof(hk)); - switch (key->alg) { - case ALG_WEP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: hk.kv_type = ATH9K_CIPHER_WEP; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: hk.kv_type = ATH9K_CIPHER_TKIP; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: hk.kv_type = ATH9K_CIPHER_AES_CCM; break; default: @@ -316,7 +322,7 @@ int ath9k_cmn_key_config(struct ath_common *common, memcpy(gmac, vif->addr, ETH_ALEN); gmac[0] |= 0x01; mac = gmac; - idx = ath_reserve_key_cache_slot(common, key->alg); + idx = ath_reserve_key_cache_slot(common, key->cipher); break; case NL80211_IFTYPE_ADHOC: if (!sta) { @@ -326,7 +332,7 @@ int ath9k_cmn_key_config(struct ath_common *common, memcpy(gmac, sta->addr, ETH_ALEN); gmac[0] |= 0x01; mac = gmac; - idx = ath_reserve_key_cache_slot(common, key->alg); + idx = ath_reserve_key_cache_slot(common, key->cipher); break; default: idx = key->keyidx; @@ -348,13 +354,13 @@ int ath9k_cmn_key_config(struct ath_common *common, return -EOPNOTSUPP; mac = sta->addr; - idx = ath_reserve_key_cache_slot(common, key->alg); + idx = ath_reserve_key_cache_slot(common, key->cipher); } if (idx < 0) return -ENOSPC; /* no free key cache entries */ - if (key->alg == ALG_TKIP) + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) ret = ath_setkey_tkip(common, idx, key->key, &hk, mac, vif->type == NL80211_IFTYPE_AP); else @@ -364,7 +370,7 @@ int ath9k_cmn_key_config(struct ath_common *common, return -EIO; set_bit(idx, common->keymap); - if (key->alg == ALG_TKIP) { + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { set_bit(idx + 64, common->keymap); if (common->splitmic) { set_bit(idx + 32, common->keymap); @@ -389,7 +395,7 @@ void ath9k_cmn_key_delete(struct ath_common *common, return; clear_bit(key->hw_key_idx, common->keymap); - if (key->alg != ALG_TKIP) + if (key->cipher != WLAN_CIPHER_SUITE_TKIP) return; clear_bit(key->hw_key_idx + 64, common->keymap); diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index 7d09b4b17bbd..4e345be62435 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1585,9 +1585,10 @@ static int ath9k_htc_set_key(struct ieee80211_hw *hw, key->hw_key_idx = ret; /* push IV and Michael MIC generation to stack */ key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; - if (key->alg == ALG_TKIP) + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC; - if (priv->ah->sw_mgmt_crypto && key->alg == ALG_CCMP) + if (priv->ah->sw_mgmt_crypto && + key->cipher == WLAN_CIPHER_SUITE_CCMP) key->flags |= IEEE80211_KEY_FLAG_SW_MGMT; ret = 0; } diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index a3b0ea90439d..1165f909ef04 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1782,9 +1782,10 @@ static int ath9k_set_key(struct ieee80211_hw *hw, key->hw_key_idx = ret; /* push IV and Michael MIC generation to stack */ key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; - if (key->alg == ALG_TKIP) + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC; - if (sc->sc_ah->sw_mgmt_crypto && key->alg == ALG_CCMP) + if (sc->sc_ah->sw_mgmt_crypto && + key->cipher == WLAN_CIPHER_SUITE_CCMP) key->flags |= IEEE80211_KEY_FLAG_SW_MGMT; ret = 0; } diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index a3e2f2bfe3a7..a1186525c70d 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3759,17 +3759,17 @@ static int b43_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, } err = -EINVAL; - switch (key->alg) { - case ALG_WEP: - if (key->keylen == WLAN_KEY_LEN_WEP40) - algorithm = B43_SEC_ALGO_WEP40; - else - algorithm = B43_SEC_ALGO_WEP104; + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + algorithm = B43_SEC_ALGO_WEP40; + break; + case WLAN_CIPHER_SUITE_WEP104: + algorithm = B43_SEC_ALGO_WEP104; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: algorithm = B43_SEC_ALGO_TKIP; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: algorithm = B43_SEC_ALGO_AES; break; default: diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-tx.c b/drivers/net/wireless/iwlwifi/iwl-agn-tx.c index 69155aa448fb..3fc982e87921 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-tx.c @@ -470,8 +470,8 @@ static void iwlagn_tx_cmd_build_hwcrypto(struct iwl_priv *priv, { struct ieee80211_key_conf *keyconf = info->control.hw_key; - switch (keyconf->alg) { - case ALG_CCMP: + switch (keyconf->cipher) { + case WLAN_CIPHER_SUITE_CCMP: tx_cmd->sec_ctl = TX_CMD_SEC_CCM; memcpy(tx_cmd->key, keyconf->key, keyconf->keylen); if (info->flags & IEEE80211_TX_CTL_AMPDU) @@ -479,20 +479,20 @@ static void iwlagn_tx_cmd_build_hwcrypto(struct iwl_priv *priv, IWL_DEBUG_TX(priv, "tx_cmd with AES hwcrypto\n"); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: tx_cmd->sec_ctl = TX_CMD_SEC_TKIP; ieee80211_get_tkip_key(keyconf, skb_frag, IEEE80211_TKIP_P2_KEY, tx_cmd->key); IWL_DEBUG_TX(priv, "tx_cmd with tkip hwcrypto\n"); break; - case ALG_WEP: + case WLAN_CIPHER_SUITE_WEP104: + tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128; + /* fall through */ + case WLAN_CIPHER_SUITE_WEP40: tx_cmd->sec_ctl |= (TX_CMD_SEC_WEP | (keyconf->keyidx & TX_CMD_SEC_MSK) << TX_CMD_SEC_SHIFT); - if (keyconf->keylen == WEP_KEY_LEN_128) - tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128; - memcpy(&tx_cmd->key[3], keyconf->key, keyconf->keylen); IWL_DEBUG_TX(priv, "Configuring packet for WEP encryption " @@ -500,7 +500,7 @@ static void iwlagn_tx_cmd_build_hwcrypto(struct iwl_priv *priv, break; default: - IWL_ERR(priv, "Unknown encode alg %d\n", keyconf->alg); + IWL_ERR(priv, "Unknown encode cipher %x\n", keyconf->cipher); break; } } diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 43e078b87378..21a52ae05c0d 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -3389,7 +3389,9 @@ static int iwl_mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, * in 1X mode. * In legacy wep mode, we use another host command to the uCode. */ - if (key->alg == ALG_WEP && !sta && vif->type != NL80211_IFTYPE_AP) { + if ((key->cipher == WLAN_CIPHER_SUITE_WEP40 || + key->cipher == WLAN_CIPHER_SUITE_WEP104) && + !sta && vif->type != NL80211_IFTYPE_AP) { if (cmd == SET_KEY) is_default_wep_key = !priv->key_mapping_key; else diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h index 599635547021..149619fb1c07 100644 --- a/drivers/net/wireless/iwlwifi/iwl-dev.h +++ b/drivers/net/wireless/iwlwifi/iwl-dev.h @@ -420,7 +420,7 @@ struct iwl_tid_data { }; struct iwl_hw_key { - enum ieee80211_key_alg alg; + u32 cipher; int keylen; u8 keyidx; u8 key[32]; diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index 7e0829be5e78..d5e8db37b86e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c @@ -818,7 +818,7 @@ int iwl_set_default_wep_key(struct iwl_priv *priv, keyconf->flags &= ~IEEE80211_KEY_FLAG_GENERATE_IV; keyconf->hw_key_idx = HW_KEY_DEFAULT; - priv->stations[IWL_AP_ID].keyinfo.alg = ALG_WEP; + priv->stations[IWL_AP_ID].keyinfo.cipher = keyconf->cipher; priv->wep_keys[keyconf->keyidx].key_size = keyconf->keylen; memcpy(&priv->wep_keys[keyconf->keyidx].key, &keyconf->key, @@ -856,7 +856,7 @@ static int iwl_set_wep_dynamic_key_info(struct iwl_priv *priv, spin_lock_irqsave(&priv->sta_lock, flags); - priv->stations[sta_id].keyinfo.alg = keyconf->alg; + priv->stations[sta_id].keyinfo.cipher = keyconf->cipher; priv->stations[sta_id].keyinfo.keylen = keyconf->keylen; priv->stations[sta_id].keyinfo.keyidx = keyconf->keyidx; @@ -906,7 +906,7 @@ static int iwl_set_ccmp_dynamic_key_info(struct iwl_priv *priv, keyconf->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; spin_lock_irqsave(&priv->sta_lock, flags); - priv->stations[sta_id].keyinfo.alg = keyconf->alg; + priv->stations[sta_id].keyinfo.cipher = keyconf->cipher; priv->stations[sta_id].keyinfo.keylen = keyconf->keylen; memcpy(priv->stations[sta_id].keyinfo.key, keyconf->key, @@ -955,7 +955,7 @@ static int iwl_set_tkip_dynamic_key_info(struct iwl_priv *priv, spin_lock_irqsave(&priv->sta_lock, flags); - priv->stations[sta_id].keyinfo.alg = keyconf->alg; + priv->stations[sta_id].keyinfo.cipher = keyconf->cipher; priv->stations[sta_id].keyinfo.keylen = 16; if ((priv->stations[sta_id].sta.key.key_flags & STA_KEY_FLG_ENCRYPT_MSK) @@ -1090,24 +1090,26 @@ int iwl_set_dynamic_key(struct iwl_priv *priv, priv->key_mapping_key++; keyconf->hw_key_idx = HW_KEY_DYNAMIC; - switch (keyconf->alg) { - case ALG_CCMP: + switch (keyconf->cipher) { + case WLAN_CIPHER_SUITE_CCMP: ret = iwl_set_ccmp_dynamic_key_info(priv, keyconf, sta_id); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: ret = iwl_set_tkip_dynamic_key_info(priv, keyconf, sta_id); break; - case ALG_WEP: + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: ret = iwl_set_wep_dynamic_key_info(priv, keyconf, sta_id); break; default: IWL_ERR(priv, - "Unknown alg: %s alg = %d\n", __func__, keyconf->alg); + "Unknown alg: %s cipher = %x\n", __func__, + keyconf->cipher); ret = -EINVAL; } - IWL_DEBUG_WEP(priv, "Set dynamic key: alg= %d len=%d idx=%d sta=%d ret=%d\n", - keyconf->alg, keyconf->keylen, keyconf->keyidx, + IWL_DEBUG_WEP(priv, "Set dynamic key: cipher=%x len=%d idx=%d sta=%d ret=%d\n", + keyconf->cipher, keyconf->keylen, keyconf->keyidx, sta_id, ret); return ret; diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index b14eaf91c7c2..faa2e0037e10 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -152,7 +152,7 @@ static int iwl3945_set_ccmp_dynamic_key_info(struct iwl_priv *priv, key_flags &= ~STA_KEY_FLG_INVALID; spin_lock_irqsave(&priv->sta_lock, flags); - priv->stations[sta_id].keyinfo.alg = keyconf->alg; + priv->stations[sta_id].keyinfo.cipher = keyconf->cipher; priv->stations[sta_id].keyinfo.keylen = keyconf->keylen; memcpy(priv->stations[sta_id].keyinfo.key, keyconf->key, keyconf->keylen); @@ -223,23 +223,25 @@ static int iwl3945_set_dynamic_key(struct iwl_priv *priv, keyconf->hw_key_idx = HW_KEY_DYNAMIC; - switch (keyconf->alg) { - case ALG_CCMP: + switch (keyconf->cipher) { + case WLAN_CIPHER_SUITE_CCMP: ret = iwl3945_set_ccmp_dynamic_key_info(priv, keyconf, sta_id); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: ret = iwl3945_set_tkip_dynamic_key_info(priv, keyconf, sta_id); break; - case ALG_WEP: + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: ret = iwl3945_set_wep_dynamic_key_info(priv, keyconf, sta_id); break; default: - IWL_ERR(priv, "Unknown alg: %s alg = %d\n", __func__, keyconf->alg); + IWL_ERR(priv, "Unknown alg: %s alg=%x\n", __func__, + keyconf->cipher); ret = -EINVAL; } - IWL_DEBUG_WEP(priv, "Set dynamic key: alg= %d len=%d idx=%d sta=%d ret=%d\n", - keyconf->alg, keyconf->keylen, keyconf->keyidx, + IWL_DEBUG_WEP(priv, "Set dynamic key: alg=%x len=%d idx=%d sta=%d ret=%d\n", + keyconf->cipher, keyconf->keylen, keyconf->keyidx, sta_id, ret); return ret; @@ -255,10 +257,11 @@ static int iwl3945_remove_static_key(struct iwl_priv *priv) static int iwl3945_set_static_key(struct iwl_priv *priv, struct ieee80211_key_conf *key) { - if (key->alg == ALG_WEP) + if (key->cipher == WLAN_CIPHER_SUITE_WEP40 || + key->cipher == WLAN_CIPHER_SUITE_WEP104) return -EOPNOTSUPP; - IWL_ERR(priv, "Static key invalid: alg %d\n", key->alg); + IWL_ERR(priv, "Static key invalid: cipher %x\n", key->cipher); return -EINVAL; } @@ -370,23 +373,25 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl_priv *priv, struct iwl3945_tx_cmd *tx_cmd = (struct iwl3945_tx_cmd *)cmd->cmd.payload; struct iwl_hw_key *keyinfo = &priv->stations[sta_id].keyinfo; - switch (keyinfo->alg) { - case ALG_CCMP: + tx_cmd->sec_ctl = 0; + + switch (keyinfo->cipher) { + case WLAN_CIPHER_SUITE_CCMP: tx_cmd->sec_ctl = TX_CMD_SEC_CCM; memcpy(tx_cmd->key, keyinfo->key, keyinfo->keylen); IWL_DEBUG_TX(priv, "tx_cmd with AES hwcrypto\n"); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: break; - case ALG_WEP: - tx_cmd->sec_ctl = TX_CMD_SEC_WEP | + case WLAN_CIPHER_SUITE_WEP104: + tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128; + /* fall through */ + case WLAN_CIPHER_SUITE_WEP40: + tx_cmd->sec_ctl |= TX_CMD_SEC_WEP | (info->control.hw_key->hw_key_idx & TX_CMD_SEC_MSK) << TX_CMD_SEC_SHIFT; - if (keyinfo->keylen == 13) - tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128; - memcpy(&tx_cmd->key[3], keyinfo->key, keyinfo->keylen); IWL_DEBUG_TX(priv, "Configuring packet for WEP encryption " @@ -394,7 +399,7 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl_priv *priv, break; default: - IWL_ERR(priv, "Unknown encode alg %d\n", keyinfo->alg); + IWL_ERR(priv, "Unknown encode cipher %x\n", keyinfo->cipher); break; } } diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index 47db439b63bf..622d27b6d8f2 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -429,8 +429,8 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd, mutex_lock(&priv->conf_mutex); if (cmd == SET_KEY) { - switch (key->alg) { - case ALG_TKIP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_TKIP: if (!(priv->privacy_caps & (BR_DESC_PRIV_CAP_MICHAEL | BR_DESC_PRIV_CAP_TKIP))) { ret = -EOPNOTSUPP; @@ -439,7 +439,8 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd, key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; algo = P54_CRYPTO_TKIPMICHAEL; break; - case ALG_WEP: + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: if (!(priv->privacy_caps & BR_DESC_PRIV_CAP_WEP)) { ret = -EOPNOTSUPP; goto out_unlock; @@ -447,7 +448,7 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd, key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; algo = P54_CRYPTO_WEP; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: if (!(priv->privacy_caps & BR_DESC_PRIV_CAP_AESCCMP)) { ret = -EOPNOTSUPP; goto out_unlock; diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c index 427b46f558ed..e53f8cec7798 100644 --- a/drivers/net/wireless/p54/txrx.c +++ b/drivers/net/wireless/p54/txrx.c @@ -683,14 +683,15 @@ static void p54_tx_80211_header(struct p54_common *priv, struct sk_buff *skb, } } -static u8 p54_convert_algo(enum ieee80211_key_alg alg) +static u8 p54_convert_algo(u32 cipher) { - switch (alg) { - case ALG_WEP: + switch (cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: return P54_CRYPTO_WEP; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: return P54_CRYPTO_TKIPMICHAEL; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: return P54_CRYPTO_AESCCMP; default: return 0; @@ -731,7 +732,7 @@ int p54_tx_80211(struct ieee80211_hw *dev, struct sk_buff *skb) if (info->control.hw_key) { crypt_offset = ieee80211_get_hdrlen_from_skb(skb); - if (info->control.hw_key->alg == ALG_TKIP) { + if (info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { u8 *iv = (u8 *)(skb->data + crypt_offset); /* * The firmware excepts that the IV has to have @@ -827,10 +828,10 @@ int p54_tx_80211(struct ieee80211_hw *dev, struct sk_buff *skb) hdr->tries = ridx; txhdr->rts_rate_idx = 0; if (info->control.hw_key) { - txhdr->key_type = p54_convert_algo(info->control.hw_key->alg); + txhdr->key_type = p54_convert_algo(info->control.hw_key->cipher); txhdr->key_len = min((u8)16, info->control.hw_key->keylen); memcpy(txhdr->key, info->control.hw_key->key, txhdr->key_len); - if (info->control.hw_key->alg == ALG_TKIP) { + if (info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { /* reserve space for the MIC key */ len += 8; memcpy(skb_put(skb, 8), &(info->control.hw_key->key diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index cdaf93f48263..97cf72f8bc12 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -355,7 +355,9 @@ static int rt2500usb_config_key(struct rt2x00_dev *rt2x00dev, * it is known that not work at least on some hardware. * SW crypto will be used in that case. */ - if (key->alg == ALG_WEP && key->keyidx != 0) + if ((key->cipher == WLAN_CIPHER_SUITE_WEP40 || + key->cipher == WLAN_CIPHER_SUITE_WEP104) && + key->keyidx != 0) return -EOPNOTSUPP; /* diff --git a/drivers/net/wireless/rt2x00/rt2x00crypto.c b/drivers/net/wireless/rt2x00/rt2x00crypto.c index 583dacd8d241..5e9074bf2b8e 100644 --- a/drivers/net/wireless/rt2x00/rt2x00crypto.c +++ b/drivers/net/wireless/rt2x00/rt2x00crypto.c @@ -31,15 +31,14 @@ enum cipher rt2x00crypto_key_to_cipher(struct ieee80211_key_conf *key) { - switch (key->alg) { - case ALG_WEP: - if (key->keylen == WLAN_KEY_LEN_WEP40) - return CIPHER_WEP64; - else - return CIPHER_WEP128; - case ALG_TKIP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + return CIPHER_WEP64; + case WLAN_CIPHER_SUITE_WEP104: + return CIPHER_WEP128; + case WLAN_CIPHER_SUITE_TKIP: return CIPHER_TKIP; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: return CIPHER_AES; default: return CIPHER_NONE; @@ -95,7 +94,7 @@ unsigned int rt2x00crypto_tx_overhead(struct rt2x00_dev *rt2x00dev, overhead += key->iv_len; if (!(key->flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { - if (key->alg == ALG_TKIP) + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) overhead += 8; } diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c index 861a5f33761e..6d31c855fcfe 100644 --- a/drivers/net/wireless/wl12xx/wl1251_main.c +++ b/drivers/net/wireless/wl12xx/wl1251_main.c @@ -725,8 +725,9 @@ static int wl1251_set_key_type(struct wl1251 *wl, struct ieee80211_key_conf *mac80211_key, const u8 *addr) { - switch (mac80211_key->alg) { - case ALG_WEP: + switch (mac80211_key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: if (is_broadcast_ether_addr(addr)) key->key_type = KEY_WEP_DEFAULT; else @@ -734,7 +735,7 @@ static int wl1251_set_key_type(struct wl1251 *wl, mac80211_key->hw_key_idx = mac80211_key->keyidx; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: if (is_broadcast_ether_addr(addr)) key->key_type = KEY_TKIP_MIC_GROUP; else @@ -742,7 +743,7 @@ static int wl1251_set_key_type(struct wl1251 *wl, mac80211_key->hw_key_idx = mac80211_key->keyidx; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: if (is_broadcast_ether_addr(addr)) key->key_type = KEY_AES_GROUP; else @@ -750,7 +751,7 @@ static int wl1251_set_key_type(struct wl1251 *wl, mac80211_key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; break; default: - wl1251_error("Unknown key algo 0x%x", mac80211_key->alg); + wl1251_error("Unknown key cipher 0x%x", mac80211_key->cipher); return -EOPNOTSUPP; } @@ -783,7 +784,7 @@ static int wl1251_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, wl1251_debug(DEBUG_CRYPT, "CMD: 0x%x", cmd); wl1251_dump(DEBUG_CRYPT, "ADDR: ", addr, ETH_ALEN); wl1251_debug(DEBUG_CRYPT, "Key: algo:0x%x, id:%d, len:%d flags 0x%x", - key->alg, key->keyidx, key->keylen, key->flags); + key->cipher, key->keyidx, key->keylen, key->flags); wl1251_dump(DEBUG_CRYPT, "KEY: ", key->key, key->keylen); if (is_zero_ether_addr(addr)) { diff --git a/drivers/net/wireless/wl12xx/wl1251_tx.c b/drivers/net/wireless/wl12xx/wl1251_tx.c index a38ec199187a..6634b3e27cfc 100644 --- a/drivers/net/wireless/wl12xx/wl1251_tx.c +++ b/drivers/net/wireless/wl12xx/wl1251_tx.c @@ -189,7 +189,7 @@ static int wl1251_tx_send_packet(struct wl1251 *wl, struct sk_buff *skb, tx_hdr = (struct tx_double_buffer_desc *) skb->data; if (control->control.hw_key && - control->control.hw_key->alg == ALG_TKIP) { + control->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { int hdrlen; __le16 fc; u16 length; @@ -399,7 +399,7 @@ static void wl1251_tx_packet_cb(struct wl1251 *wl, */ frame = skb_pull(skb, sizeof(struct tx_double_buffer_desc)); if (info->control.hw_key && - info->control.hw_key->alg == ALG_TKIP) { + info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { hdrlen = ieee80211_get_hdrlen_from_skb(skb); memmove(frame + WL1251_TKIP_IV_SPACE, frame, hdrlen); skb_pull(skb, WL1251_TKIP_IV_SPACE); diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c index 9d68f0012f05..30194c0f36a9 100644 --- a/drivers/net/wireless/wl12xx/wl1271_main.c +++ b/drivers/net/wireless/wl12xx/wl1271_main.c @@ -1439,7 +1439,7 @@ static int wl1271_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, wl1271_debug(DEBUG_CRYPT, "CMD: 0x%x", cmd); wl1271_dump(DEBUG_CRYPT, "ADDR: ", addr, ETH_ALEN); wl1271_debug(DEBUG_CRYPT, "Key: algo:0x%x, id:%d, len:%d flags 0x%x", - key_conf->alg, key_conf->keyidx, + key_conf->cipher, key_conf->keyidx, key_conf->keylen, key_conf->flags); wl1271_dump(DEBUG_CRYPT, "KEY: ", key_conf->key, key_conf->keylen); @@ -1455,20 +1455,21 @@ static int wl1271_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (ret < 0) goto out_unlock; - switch (key_conf->alg) { - case ALG_WEP: + switch (key_conf->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: key_type = KEY_WEP; key_conf->hw_key_idx = key_conf->keyidx; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: key_type = KEY_TKIP; key_conf->hw_key_idx = key_conf->keyidx; tx_seq_32 = WL1271_TX_SECURITY_HI32(wl->tx_security_seq); tx_seq_16 = WL1271_TX_SECURITY_LO16(wl->tx_security_seq); break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: key_type = KEY_AES; key_conf->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; @@ -1476,7 +1477,7 @@ static int wl1271_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, tx_seq_16 = WL1271_TX_SECURITY_LO16(wl->tx_security_seq); break; default: - wl1271_error("Unknown key algo 0x%x", key_conf->alg); + wl1271_error("Unknown key algo 0x%x", key_conf->cipher); ret = -EOPNOTSUPP; goto out_sleep; diff --git a/drivers/net/wireless/wl12xx/wl1271_tx.c b/drivers/net/wireless/wl12xx/wl1271_tx.c index c592cc2e9fe8..dc0b46c93c4b 100644 --- a/drivers/net/wireless/wl12xx/wl1271_tx.c +++ b/drivers/net/wireless/wl12xx/wl1271_tx.c @@ -193,7 +193,7 @@ static int wl1271_tx_frame(struct wl1271 *wl, struct sk_buff *skb) info = IEEE80211_SKB_CB(skb); if (info->control.hw_key && - info->control.hw_key->alg == ALG_TKIP) + info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) extra = WL1271_TKIP_IV_SPACE; if (info->control.hw_key) { @@ -347,7 +347,7 @@ static void wl1271_tx_complete_packet(struct wl1271 *wl, /* remove TKIP header space if present */ if (info->control.hw_key && - info->control.hw_key->alg == ALG_TKIP) { + info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { int hdrlen = ieee80211_get_hdrlen_from_skb(skb); memmove(skb->data + WL1271_TKIP_IV_SPACE, skb->data, hdrlen); skb_pull(skb, WL1271_TKIP_IV_SPACE); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 871ed1de736a..914c747ac3a0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -788,20 +788,6 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) return false; } -/** - * enum ieee80211_key_alg - key algorithm - * @ALG_WEP: WEP40 or WEP104 - * @ALG_TKIP: TKIP - * @ALG_CCMP: CCMP (AES) - * @ALG_AES_CMAC: AES-128-CMAC - */ -enum ieee80211_key_alg { - ALG_WEP, - ALG_TKIP, - ALG_CCMP, - ALG_AES_CMAC, -}; - /** * enum ieee80211_key_flags - key flags * @@ -839,7 +825,7 @@ enum ieee80211_key_flags { * @hw_key_idx: To be set by the driver, this is the key index the driver * wants to be given when a frame is transmitted and needs to be * encrypted in hardware. - * @alg: The key algorithm. + * @cipher: The key's cipher suite selector. * @flags: key flags, see &enum ieee80211_key_flags. * @keyidx: the key index (0-3) * @keylen: key material length @@ -852,7 +838,7 @@ enum ieee80211_key_flags { * @iv_len: The IV length for this key type */ struct ieee80211_key_conf { - enum ieee80211_key_alg alg; + u32 cipher; u8 icv_len; u8 iv_len; u8 hw_key_idx; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 19c6146010b7..9a35d9e7efd7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -116,7 +116,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata; struct sta_info *sta = NULL; - enum ieee80211_key_alg alg; struct ieee80211_key *key; int err; @@ -125,31 +124,20 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); + /* reject WEP and TKIP keys if WEP failed to initialize */ switch (params->cipher) { case WLAN_CIPHER_SUITE_WEP40: - case WLAN_CIPHER_SUITE_WEP104: - alg = ALG_WEP; - break; case WLAN_CIPHER_SUITE_TKIP: - alg = ALG_TKIP; - break; - case WLAN_CIPHER_SUITE_CCMP: - alg = ALG_CCMP; - break; - case WLAN_CIPHER_SUITE_AES_CMAC: - alg = ALG_AES_CMAC; + case WLAN_CIPHER_SUITE_WEP104: + if (IS_ERR(sdata->local->wep_tx_tfm)) + return -EINVAL; break; default: - return -EINVAL; + break; } - /* reject WEP and TKIP keys if WEP failed to initialize */ - if ((alg == ALG_WEP || alg == ALG_TKIP) && - IS_ERR(sdata->local->wep_tx_tfm)) - return -EINVAL; - - key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key, - params->seq_len, params->seq); + key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len, + params->key, params->seq_len, params->seq); if (IS_ERR(key)) return PTR_ERR(key); @@ -247,10 +235,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, memset(¶ms, 0, sizeof(params)); - switch (key->conf.alg) { - case ALG_TKIP: - params.cipher = WLAN_CIPHER_SUITE_TKIP; + params.cipher = key->conf.cipher; + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_TKIP: iv32 = key->u.tkip.tx.iv32; iv16 = key->u.tkip.tx.iv16; @@ -268,8 +256,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq = seq; params.seq_len = 6; break; - case ALG_CCMP: - params.cipher = WLAN_CIPHER_SUITE_CCMP; + case WLAN_CIPHER_SUITE_CCMP: seq[0] = key->u.ccmp.tx_pn[5]; seq[1] = key->u.ccmp.tx_pn[4]; seq[2] = key->u.ccmp.tx_pn[3]; @@ -279,14 +266,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq = seq; params.seq_len = 6; break; - case ALG_WEP: - if (key->conf.keylen == 5) - params.cipher = WLAN_CIPHER_SUITE_WEP40; - else - params.cipher = WLAN_CIPHER_SUITE_WEP104; - break; - case ALG_AES_CMAC: - params.cipher = WLAN_CIPHER_SUITE_AES_CMAC; + case WLAN_CIPHER_SUITE_AES_CMAC: seq[0] = key->u.aes_cmac.tx_pn[5]; seq[1] = key->u.aes_cmac.tx_pn[4]; seq[2] = key->u.aes_cmac.tx_pn[3]; diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index fa5e76e658ef..1647f8dc5cda 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -64,26 +64,13 @@ static ssize_t key_algorithm_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char *alg; + char buf[15]; struct ieee80211_key *key = file->private_data; + u32 c = key->conf.cipher; - switch (key->conf.alg) { - case ALG_WEP: - alg = "WEP\n"; - break; - case ALG_TKIP: - alg = "TKIP\n"; - break; - case ALG_CCMP: - alg = "CCMP\n"; - break; - case ALG_AES_CMAC: - alg = "AES-128-CMAC\n"; - break; - default: - return 0; - } - return simple_read_from_buffer(userbuf, count, ppos, alg, strlen(alg)); + sprintf(buf, "%.2x-%.2x-%.2x:%d\n", + c >> 24, (c >> 16) & 0xff, (c >> 8) & 0xff, c & 0xff); + return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); } KEY_OPS(algorithm); @@ -95,21 +82,22 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, int len; struct ieee80211_key *key = file->private_data; - switch (key->conf.alg) { - case ALG_WEP: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: len = scnprintf(buf, sizeof(buf), "\n"); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: len = scnprintf(buf, sizeof(buf), "%08x %04x\n", key->u.tkip.tx.iv32, key->u.tkip.tx.iv16); break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: tpn = key->u.ccmp.tx_pn; len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: tpn = key->u.aes_cmac.tx_pn; len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], @@ -130,11 +118,12 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, int i, len; const u8 *rpn; - switch (key->conf.alg) { - case ALG_WEP: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: len = scnprintf(buf, sizeof(buf), "\n"); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: for (i = 0; i < NUM_RX_DATA_QUEUES; i++) p += scnprintf(p, sizeof(buf)+buf-p, "%08x %04x\n", @@ -142,7 +131,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, key->u.tkip.rx[i].iv16); len = p - buf; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) { rpn = key->u.ccmp.rx_pn[i]; p += scnprintf(p, sizeof(buf)+buf-p, @@ -152,7 +141,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, } len = p - buf; break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: rpn = key->u.aes_cmac.rx_pn; p += scnprintf(p, sizeof(buf)+buf-p, "%02x%02x%02x%02x%02x%02x\n", @@ -174,11 +163,11 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf, char buf[20]; int len; - switch (key->conf.alg) { - case ALG_CCMP: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.aes_cmac.replays); break; @@ -196,8 +185,8 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf, char buf[20]; int len; - switch (key->conf.alg) { - case ALG_AES_CMAC: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_AES_CMAC: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.aes_cmac.icverrors); break; diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 5d5d2a974668..b5a95582d816 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -336,7 +336,7 @@ TRACE_EVENT(drv_set_key, LOCAL_ENTRY VIF_ENTRY STA_ENTRY - __field(enum ieee80211_key_alg, alg) + __field(u32, cipher) __field(u8, hw_key_idx) __field(u8, flags) __field(s8, keyidx) @@ -346,7 +346,7 @@ TRACE_EVENT(drv_set_key, LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; - __entry->alg = key->alg; + __entry->cipher = key->cipher; __entry->flags = key->flags; __entry->keyidx = key->keyidx; __entry->hw_key_idx = key->hw_key_idx; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index d6dbc8ea4ead..3203d1d3cd38 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -227,9 +227,7 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, } } -struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, - int idx, - size_t key_len, +struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, const u8 *key_data, size_t seq_len, const u8 *seq) { @@ -249,15 +247,16 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, key->conf.flags = 0; key->flags = 0; - key->conf.alg = alg; + key->conf.cipher = cipher; key->conf.keyidx = idx; key->conf.keylen = key_len; - switch (alg) { - case ALG_WEP: + switch (cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: key->conf.iv_len = WEP_IV_LEN; key->conf.icv_len = WEP_ICV_LEN; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: key->conf.iv_len = TKIP_IV_LEN; key->conf.icv_len = TKIP_ICV_LEN; if (seq) { @@ -269,7 +268,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, } } break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: key->conf.iv_len = CCMP_HDR_LEN; key->conf.icv_len = CCMP_MIC_LEN; if (seq) { @@ -279,7 +278,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, seq[CCMP_PN_LEN - j - 1]; } break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: key->conf.iv_len = 0; key->conf.icv_len = sizeof(struct ieee80211_mmie); if (seq) @@ -290,7 +289,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); - if (alg == ALG_CCMP) { + if (cipher == WLAN_CIPHER_SUITE_CCMP) { /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. @@ -303,7 +302,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, } } - if (alg == ALG_AES_CMAC) { + if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) { /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. @@ -328,9 +327,9 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key) if (key->local) ieee80211_key_disable_hw_accel(key); - if (key->conf.alg == ALG_CCMP) + if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP) ieee80211_aes_key_free(key->u.ccmp.tfm); - if (key->conf.alg == ALG_AES_CMAC) + if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); if (key->local) ieee80211_debugfs_key_remove(key); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index b665bbb7a471..53b5ce12536f 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -123,9 +123,7 @@ struct ieee80211_key { struct ieee80211_key_conf conf; }; -struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, - int idx, - size_t key_len, +struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, const u8 *key_data, size_t seq_len, const u8 *seq); /* diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f24a0a1cff1a..ad2427021b26 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -961,7 +961,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * pairwise or station-to-station keys, but for WEP we allow * using a key index as well. */ - if (rx->key && rx->key->conf.alg != ALG_WEP && + if (rx->key && rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 && + rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 && !is_multicast_ether_addr(hdr->addr1)) rx->key = NULL; } @@ -977,8 +978,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; /* the hdr variable is invalid now! */ - switch (rx->key->conf.alg) { - case ALG_WEP: + switch (rx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: /* Check for weak IVs if possible */ if (rx->sta && ieee80211_is_data(fc) && (!(status->flag & RX_FLAG_IV_STRIPPED) || @@ -988,13 +990,13 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) result = ieee80211_crypto_wep_decrypt(rx); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: result = ieee80211_crypto_tkip_decrypt(rx); break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: result = ieee80211_crypto_ccmp_decrypt(rx); break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: result = ieee80211_crypto_aes_cmac_decrypt(rx); break; } @@ -1291,7 +1293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is the first fragment of a new frame. */ entry = ieee80211_reassemble_add(rx->sdata, frag, seq, rx->queue, &(rx->skb)); - if (rx->key && rx->key->conf.alg == ALG_CCMP && + if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP && ieee80211_has_protected(fc)) { int queue = ieee80211_is_mgmt(fc) ? NUM_RX_DATA_QUEUES : rx->queue; @@ -1320,7 +1322,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) int i; u8 pn[CCMP_PN_LEN], *rpn; int queue; - if (!rx->key || rx->key->conf.alg != ALG_CCMP) + if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP) return RX_DROP_UNUSABLE; memcpy(pn, entry->last_pn, CCMP_PN_LEN); for (i = CCMP_PN_LEN - 1; i >= 0; i--) { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c54db966926b..bc4fefc91663 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -543,15 +543,16 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->key->tx_rx_count++; /* TODO: add threshold stuff again */ - switch (tx->key->conf.alg) { - case ALG_WEP: + switch (tx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: if (ieee80211_is_auth(hdr->frame_control)) break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: if (!ieee80211_is_data_present(hdr->frame_control)) tx->key = NULL; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: if (!ieee80211_is_data_present(hdr->frame_control) && !ieee80211_use_mfp(hdr->frame_control, tx->sta, tx->skb)) @@ -561,7 +562,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) IEEE80211_KEY_FLAG_SW_MGMT) && ieee80211_is_mgmt(hdr->frame_control); break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: if (!ieee80211_is_mgmt(hdr->frame_control)) tx->key = NULL; break; @@ -949,14 +950,15 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) if (!tx->key) return TX_CONTINUE; - switch (tx->key->conf.alg) { - case ALG_WEP: + switch (tx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: return ieee80211_crypto_wep_encrypt(tx); - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: return ieee80211_crypto_tkip_encrypt(tx); - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: return ieee80211_crypto_ccmp_encrypt(tx); - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: return ieee80211_crypto_aes_cmac_encrypt(tx); } diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 9ebc8d8a1f5b..f27484c22b9f 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -240,7 +240,7 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local, keyidx = skb->data[hdrlen + 3] >> 6; - if (!key || keyidx != key->conf.keyidx || key->conf.alg != ALG_WEP) + if (!key || keyidx != key->conf.keyidx) return -1; klen = 3 + key->conf.keylen; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 8d59d27d887e..b08ad94b56da 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -36,8 +36,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) int tail; hdr = (struct ieee80211_hdr *)skb->data; - if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || - !ieee80211_is_data_present(hdr->frame_control)) + if (!tx->key || tx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP || + skb->len < 24 || !ieee80211_is_data_present(hdr->frame_control)) return TX_CONTINUE; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -94,7 +94,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_MMIC_STRIPPED) return RX_CONTINUE; - if (!rx->key || rx->key->conf.alg != ALG_TKIP || + if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP || !ieee80211_has_protected(hdr->frame_control) || !ieee80211_is_data_present(hdr->frame_control)) return RX_CONTINUE; -- cgit v1.2.3 From bfb564e7391340638afe4ad67744a8f3858e7566 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Wed, 4 Aug 2010 06:15:52 +0000 Subject: core: Factor out flow calculation from get_rps_cpu Factor out flow calculation code from get_rps_cpu, since other functions can use the same code. Revisions: v2 (Ben): Separate flow calcuation out and use in select queue. v3 (Arnd): Don't re-implement MIN. v4 (Changli): skb->data points to ethernet header in macvtap, and make a fast path. Tested macvtap with this patch. v5 (Changli): - Cache skb->rxhash in skb_get_rxhash - macvtap may not have pow(2) queues, so change code for queue selection. (Arnd): - Use first available queue if all fails. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++ net/core/dev.c | 106 +++++++++++++++++++++++++++++-------------------- 2 files changed, 71 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 77eb60d2b496..d8050382b189 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -558,6 +558,15 @@ extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config, struct ts_state *state); +extern __u32 __skb_get_rxhash(struct sk_buff *skb); +static inline __u32 skb_get_rxhash(struct sk_buff *skb) +{ + if (!skb->rxhash) + skb->rxhash = __skb_get_rxhash(skb); + + return skb->rxhash; +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { diff --git a/net/core/dev.c b/net/core/dev.c index 1ae654391442..586a11cb4398 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2259,69 +2259,41 @@ static inline void ____napi_schedule(struct softnet_data *sd, __raise_softirq_irqoff(NET_RX_SOFTIRQ); } -#ifdef CONFIG_RPS - -/* One global table that all flow-based protocols share. */ -struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; -EXPORT_SYMBOL(rps_sock_flow_table); - /* - * get_rps_cpu is called from netif_receive_skb and returns the target - * CPU from the RPS map of the receiving queue for a given skb. - * rcu_read_lock must be held on entry. + * __skb_get_rxhash: calculate a flow hash based on src/dst addresses + * and src/dst port numbers. Returns a non-zero hash number on success + * and 0 on failure. */ -static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, - struct rps_dev_flow **rflowp) +__u32 __skb_get_rxhash(struct sk_buff *skb) { + int nhoff, hash = 0; struct ipv6hdr *ip6; struct iphdr *ip; - struct netdev_rx_queue *rxqueue; - struct rps_map *map; - struct rps_dev_flow_table *flow_table; - struct rps_sock_flow_table *sock_flow_table; - int cpu = -1; u8 ip_proto; - u16 tcpu; u32 addr1, addr2, ihl; union { u32 v32; u16 v16[2]; } ports; - if (skb_rx_queue_recorded(skb)) { - u16 index = skb_get_rx_queue(skb); - if (unlikely(index >= dev->num_rx_queues)) { - WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " - "on queue %u, but number of RX queues is %u\n", - dev->name, index, dev->num_rx_queues); - goto done; - } - rxqueue = dev->_rx + index; - } else - rxqueue = dev->_rx; - - if (!rxqueue->rps_map && !rxqueue->rps_flow_table) - goto done; - - if (skb->rxhash) - goto got_hash; /* Skip hash computation on packet header */ + nhoff = skb_network_offset(skb); switch (skb->protocol) { case __constant_htons(ETH_P_IP): - if (!pskb_may_pull(skb, sizeof(*ip))) + if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) goto done; - ip = (struct iphdr *) skb->data; + ip = (struct iphdr *) skb->data + nhoff; ip_proto = ip->protocol; addr1 = (__force u32) ip->saddr; addr2 = (__force u32) ip->daddr; ihl = ip->ihl; break; case __constant_htons(ETH_P_IPV6): - if (!pskb_may_pull(skb, sizeof(*ip6))) + if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) goto done; - ip6 = (struct ipv6hdr *) skb->data; + ip6 = (struct ipv6hdr *) skb->data + nhoff; ip_proto = ip6->nexthdr; addr1 = (__force u32) ip6->saddr.s6_addr32[3]; addr2 = (__force u32) ip6->daddr.s6_addr32[3]; @@ -2330,6 +2302,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, default: goto done; } + switch (ip_proto) { case IPPROTO_TCP: case IPPROTO_UDP: @@ -2338,8 +2311,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, case IPPROTO_AH: case IPPROTO_SCTP: case IPPROTO_UDPLITE: - if (pskb_may_pull(skb, (ihl * 4) + 4)) { - ports.v32 = * (__force u32 *) (skb->data + (ihl * 4)); + if (pskb_may_pull(skb, (ihl * 4) + 4 + nhoff)) { + ports.v32 = * (__force u32 *) (skb->data + nhoff + + (ihl * 4)); if (ports.v16[1] < ports.v16[0]) swap(ports.v16[0], ports.v16[1]); break; @@ -2352,11 +2326,55 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* get a consistent hash (same value on both flow directions) */ if (addr2 < addr1) swap(addr1, addr2); - skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd); - if (!skb->rxhash) - skb->rxhash = 1; -got_hash: + hash = jhash_3words(addr1, addr2, ports.v32, hashrnd); + if (!hash) + hash = 1; + +done: + return hash; +} +EXPORT_SYMBOL(__skb_get_rxhash); + +#ifdef CONFIG_RPS + +/* One global table that all flow-based protocols share. */ +struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; +EXPORT_SYMBOL(rps_sock_flow_table); + +/* + * get_rps_cpu is called from netif_receive_skb and returns the target + * CPU from the RPS map of the receiving queue for a given skb. + * rcu_read_lock must be held on entry. + */ +static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, + struct rps_dev_flow **rflowp) +{ + struct netdev_rx_queue *rxqueue; + struct rps_map *map; + struct rps_dev_flow_table *flow_table; + struct rps_sock_flow_table *sock_flow_table; + int cpu = -1; + u16 tcpu; + + if (skb_rx_queue_recorded(skb)) { + u16 index = skb_get_rx_queue(skb); + if (unlikely(index >= dev->num_rx_queues)) { + WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " + "on queue %u, but number of RX queues is %u\n", + dev->name, index, dev->num_rx_queues); + goto done; + } + rxqueue = dev->_rx + index; + } else + rxqueue = dev->_rx; + + if (!rxqueue->rps_map && !rxqueue->rps_flow_table) + goto done; + + if (!skb_get_rxhash(skb)) + goto done; + flow_table = rcu_dereference(rxqueue->rps_flow_table); sock_flow_table = rcu_dereference(rps_sock_flow_table); if (flow_table && sock_flow_table) { -- cgit v1.2.3 From 1565c7c1c4c8e931bdba66abc8aa6f141a406872 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Wed, 4 Aug 2010 06:15:59 +0000 Subject: macvtap: Implement multiqueue for macvtap driver Implement multiqueue facility for macvtap driver. The idea is that a macvtap device can be opened multiple times and the fd's can be used to register eg, as backend for vhost. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 99 ++++++++++++++++++++++++++++++++++++++-------- include/linux/if_macvlan.h | 9 ++++- 2 files changed, 90 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 3b1c54a9c6ef..42567279843e 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -84,26 +84,45 @@ static const struct proto_ops macvtap_socket_ops; static DEFINE_SPINLOCK(macvtap_lock); /* - * Choose the next free queue, for now there is only one + * get_slot: return a [unused/occupied] slot in vlan->taps[]: + * - if 'q' is NULL, return the first empty slot; + * - otherwise, return the slot this pointer occupies. */ +static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q) +{ + int i; + + for (i = 0; i < MAX_MACVTAP_QUEUES; i++) { + if (rcu_dereference(vlan->taps[i]) == q) + return i; + } + + /* Should never happen */ + BUG_ON(1); +} + static int macvtap_set_queue(struct net_device *dev, struct file *file, struct macvtap_queue *q) { struct macvlan_dev *vlan = netdev_priv(dev); + int index; int err = -EBUSY; spin_lock(&macvtap_lock); - if (rcu_dereference(vlan->tap)) + if (vlan->numvtaps == MAX_MACVTAP_QUEUES) goto out; err = 0; + index = get_slot(vlan, NULL); rcu_assign_pointer(q->vlan, vlan); - rcu_assign_pointer(vlan->tap, q); + rcu_assign_pointer(vlan->taps[index], q); sock_hold(&q->sk); q->file = file; file->private_data = q; + vlan->numvtaps++; + out: spin_unlock(&macvtap_lock); return err; @@ -124,9 +143,12 @@ static void macvtap_put_queue(struct macvtap_queue *q) spin_lock(&macvtap_lock); vlan = rcu_dereference(q->vlan); if (vlan) { - rcu_assign_pointer(vlan->tap, NULL); + int index = get_slot(vlan, q); + + rcu_assign_pointer(vlan->taps[index], NULL); rcu_assign_pointer(q->vlan, NULL); sock_put(&q->sk); + --vlan->numvtaps; } spin_unlock(&macvtap_lock); @@ -136,39 +158,82 @@ static void macvtap_put_queue(struct macvtap_queue *q) } /* - * Since we only support one queue, just dereference the pointer. + * Select a queue based on the rxq of the device on which this packet + * arrived. If the incoming device is not mq, calculate a flow hash + * to select a queue. If all fails, find the first available queue. + * Cache vlan->numvtaps since it can become zero during the execution + * of this function. */ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, struct sk_buff *skb) { struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_queue *tap = NULL; + int numvtaps = vlan->numvtaps; + __u32 rxq; + + if (!numvtaps) + goto out; + + if (likely(skb_rx_queue_recorded(skb))) { + rxq = skb_get_rx_queue(skb); + + while (unlikely(rxq >= numvtaps)) + rxq -= numvtaps; + + tap = rcu_dereference(vlan->taps[rxq]); + if (tap) + goto out; + } + + /* Check if we can use flow to select a queue */ + rxq = skb_get_rxhash(skb); + if (rxq) { + tap = rcu_dereference(vlan->taps[rxq % numvtaps]); + if (tap) + goto out; + } - return rcu_dereference(vlan->tap); + /* Everything failed - find first available queue */ + for (rxq = 0; rxq < MAX_MACVTAP_QUEUES; rxq++) { + tap = rcu_dereference(vlan->taps[rxq]); + if (tap) + break; + } + +out: + return tap; } /* * The net_device is going away, give up the reference - * that it holds on the queue (all the queues one day) - * and safely set the pointer from the queues to NULL. + * that it holds on all queues and safely set the pointer + * from the queues to NULL. */ static void macvtap_del_queues(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); - struct macvtap_queue *q; + struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES]; + int i, j = 0; + /* macvtap_put_queue can free some slots, so go through all slots */ spin_lock(&macvtap_lock); - q = rcu_dereference(vlan->tap); - if (!q) { - spin_unlock(&macvtap_lock); - return; + for (i = 0; i < MAX_MACVTAP_QUEUES && vlan->numvtaps; i++) { + q = rcu_dereference(vlan->taps[i]); + if (q) { + qlist[j++] = q; + rcu_assign_pointer(vlan->taps[i], NULL); + rcu_assign_pointer(q->vlan, NULL); + vlan->numvtaps--; + } } - - rcu_assign_pointer(vlan->tap, NULL); - rcu_assign_pointer(q->vlan, NULL); + BUG_ON(vlan->numvtaps != 0); spin_unlock(&macvtap_lock); synchronize_rcu(); - sock_put(&q->sk); + + for (--j; j >= 0; j--) + sock_put(&qlist[j]->sk); } /* diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 35280b302290..8a2fd66a8b5f 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -40,6 +40,12 @@ struct macvlan_rx_stats { unsigned long rx_errors; }; +/* + * Maximum times a macvtap device can be opened. This can be used to + * configure the number of receive queue, e.g. for multiqueue virtio. + */ +#define MAX_MACVTAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16) + struct macvlan_dev { struct net_device *dev; struct list_head list; @@ -50,7 +56,8 @@ struct macvlan_dev { enum macvlan_mode mode; int (*receive)(struct sk_buff *skb); int (*forward)(struct net_device *dev, struct sk_buff *skb); - struct macvtap_queue *tap; + struct macvtap_queue *taps[MAX_MACVTAP_QUEUES]; + int numvtaps; }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, -- cgit v1.2.3 From 1b2f1489633888d4a06028315dc19d65768a1c05 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 14 Aug 2010 20:20:34 +1000 Subject: drm: block userspace under allocating buffer and having drivers overwrite it (v2) With the current screwed but its ABI, ioctls for the drm, Linus pointed out that we could allow userspace to specify the allocation size, but we pass it to the driver which then uses it blindly to store a struct. Now if userspace specifies the allocation size as smaller than the driver needs, the driver can possibly overwrite memory. This patch restructures the driver ioctls so we store the structure size we are expecting, and make sure we allocate at least that size. The copy from/to userspace are still restricted to the size the user specifies, this allows ioctl structs to grow on both sides of the equation. Up until now we didn't really use the DRM_IOCTL defines in the kernel, so this cleans them up and adds them for nouveau. v2: fix nouveau pushbuf arg (thanks to Ben for pointing it out) Reported-by: Linus Torvalds Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_drv.c | 23 ++++++--- drivers/gpu/drm/i810/i810_dma.c | 30 ++++++------ drivers/gpu/drm/i830/i830_dma.c | 28 +++++------ drivers/gpu/drm/i915/i915_dma.c | 80 +++++++++++++++---------------- drivers/gpu/drm/mga/mga_state.c | 26 +++++----- drivers/gpu/drm/nouveau/nouveau_channel.c | 24 +++++----- drivers/gpu/drm/r128/r128_state.c | 35 +++++++------- drivers/gpu/drm/radeon/radeon_kms.c | 78 +++++++++++++++--------------- drivers/gpu/drm/radeon/radeon_state.c | 56 +++++++++++----------- drivers/gpu/drm/savage/savage_bci.c | 8 ++-- drivers/gpu/drm/sis/sis_mm.c | 12 ++--- drivers/gpu/drm/via/via_dma.c | 28 +++++------ drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 34 ++++++------- include/drm/drmP.h | 6 ++- include/drm/i830_drm.h | 28 +++++------ include/drm/i915_drm.h | 1 + include/drm/mga_drm.h | 2 +- include/drm/nouveau_drm.h | 13 +++++ include/drm/radeon_drm.h | 4 +- include/drm/savage_drm.h | 8 ++-- 20 files changed, 275 insertions(+), 249 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 3644c94c0a17..84da748555bc 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -55,6 +55,9 @@ static int drm_version(struct drm_device *dev, void *data, struct drm_file *file_priv); +#define DRM_IOCTL_DEF(ioctl, _func, _flags) \ + [DRM_IOCTL_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0} + /** Ioctl table */ static struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, 0), @@ -421,6 +424,7 @@ long drm_ioctl(struct file *filp, int retcode = -EINVAL; char stack_kdata[128]; char *kdata = NULL; + unsigned int usize, asize; dev = file_priv->minor->dev; atomic_inc(&dev->ioctl_count); @@ -436,11 +440,18 @@ long drm_ioctl(struct file *filp, ((nr < DRM_COMMAND_BASE) || (nr >= DRM_COMMAND_END))) goto err_i1; if ((nr >= DRM_COMMAND_BASE) && (nr < DRM_COMMAND_END) && - (nr < DRM_COMMAND_BASE + dev->driver->num_ioctls)) + (nr < DRM_COMMAND_BASE + dev->driver->num_ioctls)) { + u32 drv_size; ioctl = &dev->driver->ioctls[nr - DRM_COMMAND_BASE]; + drv_size = _IOC_SIZE(ioctl->cmd_drv); + usize = asize = _IOC_SIZE(cmd); + if (drv_size > asize) + asize = drv_size; + } else if ((nr >= DRM_COMMAND_END) || (nr < DRM_COMMAND_BASE)) { ioctl = &drm_ioctls[nr]; cmd = ioctl->cmd; + usize = asize = _IOC_SIZE(cmd); } else goto err_i1; @@ -460,10 +471,10 @@ long drm_ioctl(struct file *filp, retcode = -EACCES; } else { if (cmd & (IOC_IN | IOC_OUT)) { - if (_IOC_SIZE(cmd) <= sizeof(stack_kdata)) { + if (asize <= sizeof(stack_kdata)) { kdata = stack_kdata; } else { - kdata = kmalloc(_IOC_SIZE(cmd), GFP_KERNEL); + kdata = kmalloc(asize, GFP_KERNEL); if (!kdata) { retcode = -ENOMEM; goto err_i1; @@ -473,12 +484,12 @@ long drm_ioctl(struct file *filp, if (cmd & IOC_IN) { if (copy_from_user(kdata, (void __user *)arg, - _IOC_SIZE(cmd)) != 0) { + usize) != 0) { retcode = -EFAULT; goto err_i1; } } else - memset(kdata, 0, _IOC_SIZE(cmd)); + memset(kdata, 0, usize); if (ioctl->flags & DRM_UNLOCKED) retcode = func(dev, kdata, file_priv); @@ -490,7 +501,7 @@ long drm_ioctl(struct file *filp, if (cmd & IOC_OUT) { if (copy_to_user((void __user *)arg, kdata, - _IOC_SIZE(cmd)) != 0) + usize) != 0) retcode = -EFAULT; } } diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c index 0e6c131313d9..61b4caf220fa 100644 --- a/drivers/gpu/drm/i810/i810_dma.c +++ b/drivers/gpu/drm/i810/i810_dma.c @@ -1255,21 +1255,21 @@ long i810_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } struct drm_ioctl_desc i810_ioctls[] = { - DRM_IOCTL_DEF(DRM_I810_INIT, i810_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_VERTEX, i810_dma_vertex, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_CLEAR, i810_clear_bufs, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_FLUSH, i810_flush_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_GETAGE, i810_getage, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_GETBUF, i810_getbuf, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_SWAP, i810_swap_bufs, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_COPY, i810_copybuf, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_DOCOPY, i810_docopy, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_OV0INFO, i810_ov0_info, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_FSTATUS, i810_fstatus, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_OV0FLIP, i810_ov0_flip, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_MC, i810_dma_mc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_RSTATUS, i810_rstatus, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I810_FLIP, i810_flip_bufs, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_INIT, i810_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_VERTEX, i810_dma_vertex, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_CLEAR, i810_clear_bufs, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_FLUSH, i810_flush_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_GETAGE, i810_getage, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_GETBUF, i810_getbuf, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_SWAP, i810_swap_bufs, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_COPY, i810_copybuf, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_DOCOPY, i810_docopy, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_OV0INFO, i810_ov0_info, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_FSTATUS, i810_fstatus, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_OV0FLIP, i810_ov0_flip, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_MC, i810_dma_mc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_RSTATUS, i810_rstatus, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I810_FLIP, i810_flip_bufs, DRM_AUTH|DRM_UNLOCKED), }; int i810_max_ioctl = DRM_ARRAY_SIZE(i810_ioctls); diff --git a/drivers/gpu/drm/i830/i830_dma.c b/drivers/gpu/drm/i830/i830_dma.c index 5168862c9227..671aa18415ac 100644 --- a/drivers/gpu/drm/i830/i830_dma.c +++ b/drivers/gpu/drm/i830/i830_dma.c @@ -1524,20 +1524,20 @@ long i830_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } struct drm_ioctl_desc i830_ioctls[] = { - DRM_IOCTL_DEF(DRM_I830_INIT, i830_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_VERTEX, i830_dma_vertex, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_CLEAR, i830_clear_bufs, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_FLUSH, i830_flush_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_GETAGE, i830_getage, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_GETBUF, i830_getbuf, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_SWAP, i830_swap_bufs, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_COPY, i830_copybuf, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_DOCOPY, i830_docopy, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_FLIP, i830_flip_bufs, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_IRQ_EMIT, i830_irq_emit, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_IRQ_WAIT, i830_irq_wait, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_GETPARAM, i830_getparam, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I830_SETPARAM, i830_setparam, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_INIT, i830_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_VERTEX, i830_dma_vertex, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_CLEAR, i830_clear_bufs, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_FLUSH, i830_flush_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_GETAGE, i830_getage, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_GETBUF, i830_getbuf, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_SWAP, i830_swap_bufs, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_COPY, i830_copybuf, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_DOCOPY, i830_docopy, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_FLIP, i830_flip_bufs, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_IRQ_EMIT, i830_irq_emit, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_IRQ_WAIT, i830_irq_wait, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_GETPARAM, i830_getparam, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I830_SETPARAM, i830_setparam, DRM_AUTH|DRM_UNLOCKED), }; int i830_max_ioctl = DRM_ARRAY_SIZE(i830_ioctls); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index f19ffe87af3c..a2d3509c393b 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -2360,46 +2360,46 @@ void i915_driver_postclose(struct drm_device *dev, struct drm_file *file_priv) } struct drm_ioctl_desc i915_ioctls[] = { - DRM_IOCTL_DEF(DRM_I915_INIT, i915_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_I915_FLUSH, i915_flush_ioctl, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_FLIP, i915_flip_bufs, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_BATCHBUFFER, i915_batchbuffer, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_IRQ_EMIT, i915_irq_emit, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_IRQ_WAIT, i915_irq_wait, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_GETPARAM, i915_getparam, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_SETPARAM, i915_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_I915_ALLOC, i915_mem_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_FREE, i915_mem_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_INIT_HEAP, i915_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_I915_CMDBUFFER, i915_cmdbuffer, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_DESTROY_HEAP, i915_mem_destroy_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY ), - DRM_IOCTL_DEF(DRM_I915_SET_VBLANK_PIPE, i915_vblank_pipe_set, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY ), - DRM_IOCTL_DEF(DRM_I915_GET_VBLANK_PIPE, i915_vblank_pipe_get, DRM_AUTH ), - DRM_IOCTL_DEF(DRM_I915_VBLANK_SWAP, i915_vblank_swap, DRM_AUTH), - DRM_IOCTL_DEF(DRM_I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_ENTERVT, i915_gem_entervt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_LEAVEVT, i915_gem_leavevt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_CREATE, i915_gem_create_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_INIT, i915_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_FLUSH, i915_flush_ioctl, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_FLIP, i915_flip_bufs, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_BATCHBUFFER, i915_batchbuffer, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_IRQ_EMIT, i915_irq_emit, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_IRQ_WAIT, i915_irq_wait, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_SETPARAM, i915_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_ALLOC, i915_mem_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_FREE, i915_mem_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_INIT_HEAP, i915_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_CMDBUFFER, i915_cmdbuffer, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_DESTROY_HEAP, i915_mem_destroy_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_SET_VBLANK_PIPE, i915_vblank_pipe_set, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_GET_VBLANK_PIPE, i915_vblank_pipe_get, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_VBLANK_SWAP, i915_vblank_swap, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, i915_gem_entervt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, i915_gem_leavevt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c index fff82045c427..9ce2827f8c00 100644 --- a/drivers/gpu/drm/mga/mga_state.c +++ b/drivers/gpu/drm/mga/mga_state.c @@ -1085,19 +1085,19 @@ file_priv) } struct drm_ioctl_desc mga_ioctls[] = { - DRM_IOCTL_DEF(DRM_MGA_INIT, mga_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_MGA_FLUSH, mga_dma_flush, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_RESET, mga_dma_reset, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_SWAP, mga_dma_swap, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_CLEAR, mga_dma_clear, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_VERTEX, mga_dma_vertex, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_INDICES, mga_dma_indices, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_ILOAD, mga_dma_iload, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_BLIT, mga_dma_blit, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_GETPARAM, mga_getparam, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_SET_FENCE, mga_set_fence, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_WAIT_FENCE, mga_wait_fence, DRM_AUTH), - DRM_IOCTL_DEF(DRM_MGA_DMA_BOOTSTRAP, mga_dma_bootstrap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(MGA_INIT, mga_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(MGA_FLUSH, mga_dma_flush, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_RESET, mga_dma_reset, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_SWAP, mga_dma_swap, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_CLEAR, mga_dma_clear, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_VERTEX, mga_dma_vertex, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_INDICES, mga_dma_indices, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_ILOAD, mga_dma_iload, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_BLIT, mga_dma_blit, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_GETPARAM, mga_getparam, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_SET_FENCE, mga_set_fence, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_WAIT_FENCE, mga_wait_fence, DRM_AUTH), + DRM_IOCTL_DEF_DRV(MGA_DMA_BOOTSTRAP, mga_dma_bootstrap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), }; int mga_max_ioctl = DRM_ARRAY_SIZE(mga_ioctls); diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c index 90fdcda332be..0480f064f2c1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_channel.c +++ b/drivers/gpu/drm/nouveau/nouveau_channel.c @@ -426,18 +426,18 @@ nouveau_ioctl_fifo_free(struct drm_device *dev, void *data, ***********************************/ struct drm_ioctl_desc nouveau_ioctls[] = { - DRM_IOCTL_DEF(DRM_NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_CHANNEL_FREE, nouveau_ioctl_fifo_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GROBJ_ALLOC, nouveau_ioctl_grobj_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_ioctl_notifier_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH), - DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_FREE, nouveau_ioctl_fifo_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_ioctl_grobj_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_ioctl_notifier_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH), + DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH), }; int nouveau_max_ioctl = DRM_ARRAY_SIZE(nouveau_ioctls); diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c index 077af1f2f9b4..a9e33ce65918 100644 --- a/drivers/gpu/drm/r128/r128_state.c +++ b/drivers/gpu/drm/r128/r128_state.c @@ -1639,30 +1639,29 @@ void r128_driver_preclose(struct drm_device *dev, struct drm_file *file_priv) r128_do_cleanup_pageflip(dev); } } - void r128_driver_lastclose(struct drm_device *dev) { r128_do_cleanup_cce(dev); } struct drm_ioctl_desc r128_ioctls[] = { - DRM_IOCTL_DEF(DRM_R128_INIT, r128_cce_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_R128_CCE_START, r128_cce_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_R128_CCE_STOP, r128_cce_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_R128_CCE_RESET, r128_cce_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_R128_CCE_IDLE, r128_cce_idle, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_RESET, r128_engine_reset, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_FULLSCREEN, r128_fullscreen, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_SWAP, r128_cce_swap, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_FLIP, r128_cce_flip, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_CLEAR, r128_cce_clear, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_VERTEX, r128_cce_vertex, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_INDICES, r128_cce_indices, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_BLIT, r128_cce_blit, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_DEPTH, r128_cce_depth, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_STIPPLE, r128_cce_stipple, DRM_AUTH), - DRM_IOCTL_DEF(DRM_R128_INDIRECT, r128_cce_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_R128_GETPARAM, r128_getparam, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_INIT, r128_cce_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(R128_CCE_START, r128_cce_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(R128_CCE_STOP, r128_cce_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(R128_CCE_RESET, r128_cce_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(R128_CCE_IDLE, r128_cce_idle, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_RESET, r128_engine_reset, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_FULLSCREEN, r128_fullscreen, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_SWAP, r128_cce_swap, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_FLIP, r128_cce_flip, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_CLEAR, r128_cce_clear, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_VERTEX, r128_cce_vertex, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_INDICES, r128_cce_indices, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_BLIT, r128_cce_blit, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_DEPTH, r128_cce_depth, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_STIPPLE, r128_cce_stipple, DRM_AUTH), + DRM_IOCTL_DEF_DRV(R128_INDIRECT, r128_cce_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(R128_GETPARAM, r128_getparam, DRM_AUTH), }; int r128_max_ioctl = DRM_ARRAY_SIZE(r128_ioctls); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index b1c8ace5f080..27435db0aa48 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -323,45 +323,45 @@ KMS_INVALID_IOCTL(radeon_surface_free_kms) struct drm_ioctl_desc radeon_ioctls_kms[] = { - DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc_kms, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc_kms, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free_kms, DRM_AUTH), /* KMS */ - DRM_IOCTL_DEF(DRM_RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), }; int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index b3ba44c0a818..4ae5a3d1074e 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -3228,34 +3228,34 @@ void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv) } struct drm_ioctl_desc radeon_ioctls[] = { - DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH) + DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH) }; int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls); diff --git a/drivers/gpu/drm/savage/savage_bci.c b/drivers/gpu/drm/savage/savage_bci.c index f576232846c3..6756c97899f1 100644 --- a/drivers/gpu/drm/savage/savage_bci.c +++ b/drivers/gpu/drm/savage/savage_bci.c @@ -1082,10 +1082,10 @@ void savage_reclaim_buffers(struct drm_device *dev, struct drm_file *file_priv) } struct drm_ioctl_desc savage_ioctls[] = { - DRM_IOCTL_DEF(DRM_SAVAGE_BCI_INIT, savage_bci_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_SAVAGE_BCI_CMDBUF, savage_bci_cmdbuf, DRM_AUTH), - DRM_IOCTL_DEF(DRM_SAVAGE_BCI_EVENT_EMIT, savage_bci_event_emit, DRM_AUTH), - DRM_IOCTL_DEF(DRM_SAVAGE_BCI_EVENT_WAIT, savage_bci_event_wait, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SAVAGE_BCI_INIT, savage_bci_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(SAVAGE_BCI_CMDBUF, savage_bci_cmdbuf, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_EMIT, savage_bci_event_emit, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_WAIT, savage_bci_event_wait, DRM_AUTH), }; int savage_max_ioctl = DRM_ARRAY_SIZE(savage_ioctls); diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c index 07d0f2979cac..7fe2b63412ce 100644 --- a/drivers/gpu/drm/sis/sis_mm.c +++ b/drivers/gpu/drm/sis/sis_mm.c @@ -320,12 +320,12 @@ void sis_reclaim_buffers_locked(struct drm_device *dev, } struct drm_ioctl_desc sis_ioctls[] = { - DRM_IOCTL_DEF(DRM_SIS_FB_ALLOC, sis_fb_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_SIS_FB_FREE, sis_drm_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_SIS_AGP_INIT, sis_ioctl_agp_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_SIS_AGP_ALLOC, sis_ioctl_agp_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_SIS_AGP_FREE, sis_drm_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_SIS_FB_INIT, sis_fb_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(SIS_FB_ALLOC, sis_fb_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SIS_FB_FREE, sis_drm_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SIS_AGP_INIT, sis_ioctl_agp_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(SIS_AGP_ALLOC, sis_ioctl_agp_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SIS_AGP_FREE, sis_drm_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(SIS_FB_INIT, sis_fb_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY), }; int sis_max_ioctl = DRM_ARRAY_SIZE(sis_ioctls); diff --git a/drivers/gpu/drm/via/via_dma.c b/drivers/gpu/drm/via/via_dma.c index 68dda74a50ae..cc0ffa9abd00 100644 --- a/drivers/gpu/drm/via/via_dma.c +++ b/drivers/gpu/drm/via/via_dma.c @@ -722,20 +722,20 @@ static int via_cmdbuf_size(struct drm_device *dev, void *data, struct drm_file * } struct drm_ioctl_desc via_ioctls[] = { - DRM_IOCTL_DEF(DRM_VIA_ALLOCMEM, via_mem_alloc, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_FREEMEM, via_mem_free, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_AGP_INIT, via_agp_init, DRM_AUTH|DRM_MASTER), - DRM_IOCTL_DEF(DRM_VIA_FB_INIT, via_fb_init, DRM_AUTH|DRM_MASTER), - DRM_IOCTL_DEF(DRM_VIA_MAP_INIT, via_map_init, DRM_AUTH|DRM_MASTER), - DRM_IOCTL_DEF(DRM_VIA_DEC_FUTEX, via_decoder_futex, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_DMA_INIT, via_dma_init, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_CMDBUFFER, via_cmdbuffer, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_FLUSH, via_flush_ioctl, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_PCICMD, via_pci_cmdbuffer, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_CMDBUF_SIZE, via_cmdbuf_size, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_WAIT_IRQ, via_wait_irq, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_DMA_BLIT, via_dma_blit, DRM_AUTH), - DRM_IOCTL_DEF(DRM_VIA_BLIT_SYNC, via_dma_blit_sync, DRM_AUTH) + DRM_IOCTL_DEF_DRV(VIA_ALLOCMEM, via_mem_alloc, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_FREEMEM, via_mem_free, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_AGP_INIT, via_agp_init, DRM_AUTH|DRM_MASTER), + DRM_IOCTL_DEF_DRV(VIA_FB_INIT, via_fb_init, DRM_AUTH|DRM_MASTER), + DRM_IOCTL_DEF_DRV(VIA_MAP_INIT, via_map_init, DRM_AUTH|DRM_MASTER), + DRM_IOCTL_DEF_DRV(VIA_DEC_FUTEX, via_decoder_futex, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_DMA_INIT, via_dma_init, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_CMDBUFFER, via_cmdbuffer, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_FLUSH, via_flush_ioctl, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_PCICMD, via_pci_cmdbuffer, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_CMDBUF_SIZE, via_cmdbuf_size, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_WAIT_IRQ, via_wait_irq, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_DMA_BLIT, via_dma_blit, DRM_AUTH), + DRM_IOCTL_DEF_DRV(VIA_BLIT_SYNC, via_dma_blit_sync, DRM_AUTH) }; int via_max_ioctl = DRM_ARRAY_SIZE(via_ioctls); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 9dd395b90216..72ec2e2b6e97 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -99,47 +99,47 @@ */ #define VMW_IOCTL_DEF(ioctl, func, flags) \ - [DRM_IOCTL_NR(ioctl) - DRM_COMMAND_BASE] = {ioctl, flags, func} + [DRM_IOCTL_NR(DRM_IOCTL_##ioctl) - DRM_COMMAND_BASE] = {DRM_##ioctl, flags, func, DRM_IOCTL_##ioctl} /** * Ioctl definitions. */ static struct drm_ioctl_desc vmw_ioctls[] = { - VMW_IOCTL_DEF(DRM_IOCTL_VMW_GET_PARAM, vmw_getparam_ioctl, + VMW_IOCTL_DEF(VMW_GET_PARAM, vmw_getparam_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_ALLOC_DMABUF, vmw_dmabuf_alloc_ioctl, + VMW_IOCTL_DEF(VMW_ALLOC_DMABUF, vmw_dmabuf_alloc_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_DMABUF, vmw_dmabuf_unref_ioctl, + VMW_IOCTL_DEF(VMW_UNREF_DMABUF, vmw_dmabuf_unref_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_CURSOR_BYPASS, + VMW_IOCTL_DEF(VMW_CURSOR_BYPASS, vmw_kms_cursor_bypass_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_CONTROL_STREAM, vmw_overlay_ioctl, + VMW_IOCTL_DEF(VMW_CONTROL_STREAM, vmw_overlay_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_CLAIM_STREAM, vmw_stream_claim_ioctl, + VMW_IOCTL_DEF(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_STREAM, vmw_stream_unref_ioctl, + VMW_IOCTL_DEF(VMW_UNREF_STREAM, vmw_stream_unref_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_CREATE_CONTEXT, vmw_context_define_ioctl, + VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl, + VMW_IOCTL_DEF(VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_CREATE_SURFACE, vmw_surface_define_ioctl, + VMW_IOCTL_DEF(VMW_CREATE_SURFACE, vmw_surface_define_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl, + VMW_IOCTL_DEF(VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_REF_SURFACE, vmw_surface_reference_ioctl, + VMW_IOCTL_DEF(VMW_REF_SURFACE, vmw_surface_reference_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_EXECBUF, vmw_execbuf_ioctl, + VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_FIFO_DEBUG, vmw_fifo_debug_ioctl, + VMW_IOCTL_DEF(VMW_FIFO_DEBUG, vmw_fifo_debug_ioctl, DRM_AUTH | DRM_ROOT_ONLY | DRM_MASTER | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_FENCE_WAIT, vmw_fence_wait_ioctl, + VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_wait_ioctl, DRM_AUTH | DRM_UNLOCKED), - VMW_IOCTL_DEF(DRM_IOCTL_VMW_UPDATE_LAYOUT, vmw_kms_update_layout_ioctl, + VMW_IOCTL_DEF(VMW_UPDATE_LAYOUT, vmw_kms_update_layout_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED) }; diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 2a512bc0d4ab..7809d230adee 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -305,14 +305,16 @@ struct drm_ioctl_desc { unsigned int cmd; int flags; drm_ioctl_t *func; + unsigned int cmd_drv; }; /** * Creates a driver or general drm_ioctl_desc array entry for the given * ioctl, for use by drm_ioctl(). */ -#define DRM_IOCTL_DEF(ioctl, _func, _flags) \ - [DRM_IOCTL_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags} + +#define DRM_IOCTL_DEF_DRV(ioctl, _func, _flags) \ + [DRM_IOCTL_NR(DRM_##ioctl)] = {.cmd = DRM_##ioctl, .func = _func, .flags = _flags, .cmd_drv = DRM_IOCTL_##ioctl} struct drm_magic_entry { struct list_head head; diff --git a/include/drm/i830_drm.h b/include/drm/i830_drm.h index 4b00d2dd4f68..61315c29b8f3 100644 --- a/include/drm/i830_drm.h +++ b/include/drm/i830_drm.h @@ -264,20 +264,20 @@ typedef struct _drm_i830_sarea { #define DRM_I830_GETPARAM 0x0c #define DRM_I830_SETPARAM 0x0d -#define DRM_IOCTL_I830_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_INIT, drm_i830_init_t) -#define DRM_IOCTL_I830_VERTEX DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_VERTEX, drm_i830_vertex_t) -#define DRM_IOCTL_I830_CLEAR DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_CLEAR, drm_i830_clear_t) -#define DRM_IOCTL_I830_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_FLUSH) -#define DRM_IOCTL_I830_GETAGE DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_GETAGE) -#define DRM_IOCTL_I830_GETBUF DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_GETBUF, drm_i830_dma_t) -#define DRM_IOCTL_I830_SWAP DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_SWAP) -#define DRM_IOCTL_I830_COPY DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_COPY, drm_i830_copy_t) -#define DRM_IOCTL_I830_DOCOPY DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_DOCOPY) -#define DRM_IOCTL_I830_FLIP DRM_IO ( DRM_COMMAND_BASE + DRM_IOCTL_I830_FLIP) -#define DRM_IOCTL_I830_IRQ_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_IRQ_EMIT, drm_i830_irq_emit_t) -#define DRM_IOCTL_I830_IRQ_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_IOCTL_I830_IRQ_WAIT, drm_i830_irq_wait_t) -#define DRM_IOCTL_I830_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_GETPARAM, drm_i830_getparam_t) -#define DRM_IOCTL_I830_SETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_IOCTL_I830_SETPARAM, drm_i830_setparam_t) +#define DRM_IOCTL_I830_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I830_INIT, drm_i830_init_t) +#define DRM_IOCTL_I830_VERTEX DRM_IOW( DRM_COMMAND_BASE + DRM_I830_VERTEX, drm_i830_vertex_t) +#define DRM_IOCTL_I830_CLEAR DRM_IOW( DRM_COMMAND_BASE + DRM_I830_CLEAR, drm_i830_clear_t) +#define DRM_IOCTL_I830_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I830_FLUSH) +#define DRM_IOCTL_I830_GETAGE DRM_IO ( DRM_COMMAND_BASE + DRM_I830_GETAGE) +#define DRM_IOCTL_I830_GETBUF DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_GETBUF, drm_i830_dma_t) +#define DRM_IOCTL_I830_SWAP DRM_IO ( DRM_COMMAND_BASE + DRM_I830_SWAP) +#define DRM_IOCTL_I830_COPY DRM_IOW( DRM_COMMAND_BASE + DRM_I830_COPY, drm_i830_copy_t) +#define DRM_IOCTL_I830_DOCOPY DRM_IO ( DRM_COMMAND_BASE + DRM_I830_DOCOPY) +#define DRM_IOCTL_I830_FLIP DRM_IO ( DRM_COMMAND_BASE + DRM_I830_FLIP) +#define DRM_IOCTL_I830_IRQ_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_IRQ_EMIT, drm_i830_irq_emit_t) +#define DRM_IOCTL_I830_IRQ_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_I830_IRQ_WAIT, drm_i830_irq_wait_t) +#define DRM_IOCTL_I830_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_GETPARAM, drm_i830_getparam_t) +#define DRM_IOCTL_I830_SETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_I830_SETPARAM, drm_i830_setparam_t) typedef struct _drm_i830_clear { int clear_color; diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 7f0028e1010b..000357bf3ac8 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -206,6 +206,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_SET_VBLANK_PIPE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, drm_i915_vblank_pipe_t) #define DRM_IOCTL_I915_GET_VBLANK_PIPE DRM_IOR( DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, drm_i915_vblank_pipe_t) #define DRM_IOCTL_I915_VBLANK_SWAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t) +#define DRM_IOCTL_I915_HWS_ADDR DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) #define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) diff --git a/include/drm/mga_drm.h b/include/drm/mga_drm.h index 3ffbc4798afa..c16097f99be0 100644 --- a/include/drm/mga_drm.h +++ b/include/drm/mga_drm.h @@ -248,7 +248,7 @@ typedef struct _drm_mga_sarea { #define DRM_MGA_DMA_BOOTSTRAP 0x0c #define DRM_IOCTL_MGA_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_INIT, drm_mga_init_t) -#define DRM_IOCTL_MGA_FLUSH DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, drm_lock_t) +#define DRM_IOCTL_MGA_FLUSH DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, struct drm_lock) #define DRM_IOCTL_MGA_RESET DRM_IO( DRM_COMMAND_BASE + DRM_MGA_RESET) #define DRM_IOCTL_MGA_SWAP DRM_IO( DRM_COMMAND_BASE + DRM_MGA_SWAP) #define DRM_IOCTL_MGA_CLEAR DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_CLEAR, drm_mga_clear_t) diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h index fe917dee723a..01a714119506 100644 --- a/include/drm/nouveau_drm.h +++ b/include/drm/nouveau_drm.h @@ -197,4 +197,17 @@ struct drm_nouveau_sarea { #define DRM_NOUVEAU_GEM_CPU_FINI 0x43 #define DRM_NOUVEAU_GEM_INFO 0x44 +#define DRM_IOCTL_NOUVEAU_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GETPARAM, struct drm_nouveau_getparam) +#define DRM_IOCTL_NOUVEAU_SETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SETPARAM, struct drm_nouveau_setparam) +#define DRM_IOCTL_NOUVEAU_CHANNEL_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_ALLOC, struct drm_nouveau_channel_alloc) +#define DRM_IOCTL_NOUVEAU_CHANNEL_FREE DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_FREE, struct drm_nouveau_channel_free) +#define DRM_IOCTL_NOUVEAU_GROBJ_ALLOC DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GROBJ_ALLOC, struct drm_nouveau_grobj_alloc) +#define DRM_IOCTL_NOUVEAU_NOTIFIEROBJ_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, struct drm_nouveau_notifierobj_alloc) +#define DRM_IOCTL_NOUVEAU_GPUOBJ_FREE DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GPUOBJ_FREE, struct drm_nouveau_gpuobj_free) +#define DRM_IOCTL_NOUVEAU_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new) +#define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf) +#define DRM_IOCTL_NOUVEAU_GEM_CPU_PREP DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_PREP, struct drm_nouveau_gem_cpu_prep) +#define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct drm_nouveau_gem_cpu_fini) +#define DRM_IOCTL_NOUVEAU_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct drm_nouveau_gem_info) + #endif /* __NOUVEAU_DRM_H__ */ diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index 0acaf8f91437..10f8b53bdd40 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -547,8 +547,8 @@ typedef struct { #define DRM_IOCTL_RADEON_GEM_WAIT_IDLE DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle) #define DRM_IOCTL_RADEON_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs) #define DRM_IOCTL_RADEON_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) -#define DRM_IOCTL_RADEON_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) -#define DRM_IOCTL_RADEON_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) +#define DRM_IOCTL_RADEON_GEM_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) +#define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) typedef struct drm_radeon_init { diff --git a/include/drm/savage_drm.h b/include/drm/savage_drm.h index 8a576ef01821..4863cf6bf96f 100644 --- a/include/drm/savage_drm.h +++ b/include/drm/savage_drm.h @@ -63,10 +63,10 @@ typedef struct _drm_savage_sarea { #define DRM_SAVAGE_BCI_EVENT_EMIT 0x02 #define DRM_SAVAGE_BCI_EVENT_WAIT 0x03 -#define DRM_IOCTL_SAVAGE_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t) -#define DRM_IOCTL_SAVAGE_CMDBUF DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t) -#define DRM_IOCTL_SAVAGE_EVENT_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t) -#define DRM_IOCTL_SAVAGE_EVENT_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t) +#define DRM_IOCTL_SAVAGE_BCI_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t) +#define DRM_IOCTL_SAVAGE_BCI_CMDBUF DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t) +#define DRM_IOCTL_SAVAGE_BCI_EVENT_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t) +#define DRM_IOCTL_SAVAGE_BCI_EVENT_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t) #define SAVAGE_DMA_PCI 1 #define SAVAGE_DMA_AGP 3 -- cgit v1.2.3 From f1b499f029c5dde85d46a8811353c62f29157541 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Thu, 5 Aug 2010 17:10:53 +0200 Subject: lockdep: Remove __debug_show_held_locks There is no longer any functional difference between __debug_show_held_locks() and debug_show_held_locks(), so remove the former. Signed-off-by: John Kacur Cc: Peter Zijlstra LKML-Reference: <1281021054-4228-1-git-send-email-jkacur@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/debug_locks.h | 5 ----- kernel/hung_task.c | 2 +- kernel/lockdep.c | 8 +------- 3 files changed, 2 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 29b3ce3f2a1d..2833452ea01c 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -49,7 +49,6 @@ struct task_struct; #ifdef CONFIG_LOCKDEP extern void debug_show_all_locks(void); -extern void __debug_show_held_locks(struct task_struct *task); extern void debug_show_held_locks(struct task_struct *task); extern void debug_check_no_locks_freed(const void *from, unsigned long len); extern void debug_check_no_locks_held(struct task_struct *task); @@ -58,10 +57,6 @@ static inline void debug_show_all_locks(void) { } -static inline void __debug_show_held_locks(struct task_struct *task) -{ -} - static inline void debug_show_held_locks(struct task_struct *task) { } diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 0c642d51aac2..bca942379559 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -98,7 +98,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" " disables this message.\n"); sched_show_task(t); - __debug_show_held_locks(t); + debug_show_held_locks(t); touch_nmi_watchdog(); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index f2852a510232..84baa71cfda5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3775,7 +3775,7 @@ EXPORT_SYMBOL_GPL(debug_show_all_locks); * Careful: only use this function if you are sure that * the task cannot run in parallel! */ -void __debug_show_held_locks(struct task_struct *task) +void debug_show_held_locks(struct task_struct *task) { if (unlikely(!debug_locks)) { printk("INFO: lockdep is turned off.\n"); @@ -3783,12 +3783,6 @@ void __debug_show_held_locks(struct task_struct *task) } lockdep_print_held_locks(task); } -EXPORT_SYMBOL_GPL(__debug_show_held_locks); - -void debug_show_held_locks(struct task_struct *task) -{ - __debug_show_held_locks(task); -} EXPORT_SYMBOL_GPL(debug_show_held_locks); void lockdep_sys_exit(void) -- cgit v1.2.3 From 99c796df94afca5256860dd4760017f1dbb3480c Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 17 Aug 2010 22:13:22 +0100 Subject: VIDEO: amba clcd: don't disable an already disabled clock Fix the clock enable/disable tracking in the AMBA CLCD driver so that the driver doesn't try to disable an already disabled clock, thereby causing the clock (if shared) to become unbalanced. This resolves a problem with CLCD on LPC32xx ARM platforms. Reported-by: Kevin Wells Signed-off-by: Russell King --- drivers/video/amba-clcd.c | 10 ++++++++-- include/linux/amba/clcd.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index afe21e6eb544..1c2c68356ea7 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -80,7 +80,10 @@ static void clcdfb_disable(struct clcd_fb *fb) /* * Disable CLCD clock source. */ - clk_disable(fb->clk); + if (fb->clk_enabled) { + fb->clk_enabled = false; + clk_disable(fb->clk); + } } static void clcdfb_enable(struct clcd_fb *fb, u32 cntl) @@ -88,7 +91,10 @@ static void clcdfb_enable(struct clcd_fb *fb, u32 cntl) /* * Enable the CLCD clock source. */ - clk_enable(fb->clk); + if (!fb->clk_enabled) { + fb->clk_enabled = true; + clk_enable(fb->clk); + } /* * Bring up by first enabling.. diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h index ca16c3801a1e..be33b3affc8a 100644 --- a/include/linux/amba/clcd.h +++ b/include/linux/amba/clcd.h @@ -150,6 +150,7 @@ struct clcd_fb { u16 off_cntl; u32 clcd_cntl; u32 cmap[16]; + bool clk_enabled; }; static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) -- cgit v1.2.3 From d7627467b7a8dd6944885290a03a07ceb28c10eb Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 17 Aug 2010 23:52:56 +0100 Subject: Make do_execve() take a const filename pointer Make do_execve() take a const filename pointer so that kernel_execve() compiles correctly on ARM: arch/arm/kernel/sys_arm.c:88: warning: passing argument 1 of 'do_execve' discards qualifiers from pointer target type This also requires the argv and envp arguments to be consted twice, once for the pointer array and once for the strings the array points to. This is because do_execve() passes a pointer to the filename (now const) to copy_strings_kernel(). A simpler alternative would be to cast the filename pointer in do_execve() when it's passed to copy_strings_kernel(). do_execve() may not change any of the strings it is passed as part of the argv or envp lists as they are some of them in .rodata, so marking these strings as const should be fine. Further kernel_execve() and sys_execve() need to be changed to match. This has been test built on x86_64, frv, arm and mips. Signed-off-by: David Howells Tested-by: Ralf Baechle Acked-by: Russell King Signed-off-by: Linus Torvalds --- arch/alpha/kernel/process.c | 5 +++-- arch/arm/kernel/sys_arm.c | 14 +++++++++----- arch/avr32/kernel/process.c | 5 +++-- arch/avr32/kernel/sys_avr32.c | 4 +++- arch/blackfin/kernel/process.c | 4 +++- arch/cris/arch-v10/kernel/process.c | 4 +++- arch/cris/arch-v32/kernel/process.c | 6 ++++-- arch/frv/kernel/process.c | 5 +++-- arch/h8300/kernel/process.c | 5 ++++- arch/h8300/kernel/sys_h8300.c | 4 +++- arch/ia64/kernel/process.c | 4 +++- arch/m32r/kernel/process.c | 4 ++-- arch/m32r/kernel/sys_m32r.c | 4 +++- arch/m68k/kernel/process.c | 4 +++- arch/m68k/kernel/sys_m68k.c | 4 +++- arch/m68knommu/kernel/process.c | 4 +++- arch/m68knommu/kernel/sys_m68k.c | 4 +++- arch/microblaze/kernel/sys_microblaze.c | 10 +++++++--- arch/mips/kernel/syscall.c | 10 +++++++--- arch/mn10300/kernel/process.c | 4 ++-- arch/parisc/hpux/fs.c | 6 ++++-- arch/parisc/kernel/process.c | 15 ++++++++++----- arch/powerpc/kernel/process.c | 5 +++-- arch/s390/kernel/process.c | 5 +++-- arch/score/kernel/sys_score.c | 10 +++++++--- arch/sh/kernel/process_32.c | 7 ++++--- arch/sh/kernel/process_64.c | 4 ++-- arch/sh/kernel/sys_sh32.c | 4 +++- arch/sh/kernel/sys_sh64.c | 4 +++- arch/sparc/kernel/process_32.c | 6 ++++-- arch/sparc/kernel/process_64.c | 4 ++-- arch/sparc/kernel/sys_sparc_32.c | 4 +++- arch/sparc/kernel/sys_sparc_64.c | 4 +++- arch/tile/kernel/process.c | 5 +++-- arch/um/kernel/exec.c | 5 +++-- arch/um/kernel/syscall.c | 4 +++- arch/x86/include/asm/syscalls.h | 5 +++-- arch/x86/kernel/process.c | 5 +++-- arch/x86/kernel/sys_i386_32.c | 4 +++- arch/xtensa/kernel/process.c | 5 +++-- fs/binfmt_misc.c | 2 +- fs/binfmt_script.c | 3 ++- fs/exec.c | 21 +++++++++++---------- include/linux/binfmts.h | 7 ++++--- include/linux/sched.h | 4 +++- include/linux/syscalls.h | 2 +- init/do_mounts_initrd.c | 7 ++++--- init/main.c | 6 +++--- kernel/kmod.c | 4 +++- security/commoncap.c | 2 +- 50 files changed, 179 insertions(+), 98 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 88e608aebc8c..842dba308eab 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -387,8 +387,9 @@ EXPORT_SYMBOL(dump_elf_task_fp); * sys_execve() executes a new program. */ asmlinkage int -do_sys_execve(const char __user *ufilename, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +do_sys_execve(const char __user *ufilename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { int error; char *filename; diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 5b7c541a4c63..62e7c61d0342 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -62,8 +62,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs) /* sys_execve() executes a new program. * This is called indirectly via a small wrapper */ -asmlinkage int sys_execve(const char __user *filenamei, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +asmlinkage int sys_execve(const char __user *filenamei, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { int error; char * filename; @@ -78,14 +79,17 @@ out: return error; } -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { struct pt_regs regs; int ret; memset(®s, 0, sizeof(struct pt_regs)); - ret = do_execve(filename, (char __user * __user *)argv, - (char __user * __user *)envp, ®s); + ret = do_execve(filename, + (const char __user *const __user *)argv, + (const char __user *const __user *)envp, ®s); if (ret < 0) goto out; diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index e5daddff397d..9c46aaad11ce 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -384,8 +384,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs) } asmlinkage int sys_execve(const char __user *ufilename, - char __user *__user *uargv, - char __user *__user *uenvp, struct pt_regs *regs) + const char __user *const __user *uargv, + const char __user *const __user *uenvp, + struct pt_regs *regs) { int error; char *filename; diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c index 459349b5ed5a..62635a09ae3e 100644 --- a/arch/avr32/kernel/sys_avr32.c +++ b/arch/avr32/kernel/sys_avr32.c @@ -7,7 +7,9 @@ */ #include -int kernel_execve(const char *file, char **argv, char **envp) +int kernel_execve(const char *file, + const char *const *argv, + const char *const *envp) { register long scno asm("r8") = __NR_execve; register long sc1 asm("r12") = (long)file; diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index a566f61c002a..01f98cb964d2 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -209,7 +209,9 @@ copy_thread(unsigned long clone_flags, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp) +asmlinkage int sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp) { int error; char *filename; diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index 93f0f64b1326..9a57db6907f5 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -204,7 +204,9 @@ asmlinkage int sys_vfork(long r10, long r11, long r12, long r13, long mof, long /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char *fname, char **argv, char **envp, +asmlinkage int sys_execve(const char *fname, + const char *const *argv, + const char *const *envp, long r13, long mof, long srp, struct pt_regs *regs) { diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index 2661a9529d70..562f84718906 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -218,8 +218,10 @@ sys_vfork(long r10, long r11, long r12, long r13, long mof, long srp, /* sys_execve() executes a new program. */ asmlinkage int -sys_execve(const char *fname, char **argv, char **envp, long r13, long mof, long srp, - struct pt_regs *regs) +sys_execve(const char *fname, + const char *const *argv, + const char *const *envp, long r13, long mof, long srp, + struct pt_regs *regs) { int error; char *filename; diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 428931cf2f0c..2b63b0191f52 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -250,8 +250,9 @@ int copy_thread(unsigned long clone_flags, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, - char __user * __user *envp) +asmlinkage int sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp) { int error; char * filename; diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 8b7b78d77d5c..97478138e361 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -212,7 +212,10 @@ int copy_thread(unsigned long clone_flags, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char *name, char **argv, char **envp,int dummy,...) +asmlinkage int sys_execve(const char *name, + const char *const *argv, + const char *const *envp, + int dummy, ...) { int error; char * filename; diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c index f9b3f44da69f..dc1ac0243b78 100644 --- a/arch/h8300/kernel/sys_h8300.c +++ b/arch/h8300/kernel/sys_h8300.c @@ -51,7 +51,9 @@ asmlinkage void syscall_print(void *dummy,...) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long res __asm__("er0"); register char *const *_c __asm__("er3") = envp; diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index a879c03b7f1c..16f1c7b04c69 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -633,7 +633,9 @@ dump_fpu (struct pt_regs *pt, elf_fpregset_t dst) } long -sys_execve (const char __user *filename, char __user * __user *argv, char __user * __user *envp, +sys_execve (const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { char *fname; diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 8665a4d868ec..422bea9f1dbc 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -289,8 +289,8 @@ asmlinkage int sys_vfork(unsigned long r0, unsigned long r1, unsigned long r2, * sys_execve() executes a new program. */ asmlinkage int sys_execve(const char __user *ufilename, - char __user * __user *uargv, - char __user * __user *uenvp, + const char __user *const __user *uargv, + const char __user *const __user *uenvp, unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, struct pt_regs regs) { diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c index 0a00f467edfa..d841fb6cc703 100644 --- a/arch/m32r/kernel/sys_m32r.c +++ b/arch/m32r/kernel/sys_m32r.c @@ -93,7 +93,9 @@ asmlinkage int sys_cachectl(char *addr, int nbytes, int op) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __scno __asm__ ("r7") = __NR_execve; register long __arg3 __asm__ ("r2") = (long)(envp); diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 221d0b71ce39..18732ab23292 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -315,7 +315,9 @@ EXPORT_SYMBOL(dump_fpu); /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp) +asmlinkage int sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp) { int error; char * filename; diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 77896692eb0a..2f431ece7b5f 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -459,7 +459,9 @@ asmlinkage int sys_getpagesize(void) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __res asm ("%d0") = __NR_execve; register long __a asm ("%d1") = (long)(filename); diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 6350f68cd026..4d090d3c0897 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c @@ -350,7 +350,9 @@ void dump(struct pt_regs *fp) /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char *name, char **argv, char **envp) +asmlinkage int sys_execve(const char *name, + const char *const *argv, + const char *const *envp) { int error; char * filename; diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c index d65e9c4c930c..68488ae47f0a 100644 --- a/arch/m68knommu/kernel/sys_m68k.c +++ b/arch/m68knommu/kernel/sys_m68k.c @@ -44,7 +44,9 @@ asmlinkage int sys_getpagesize(void) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __res asm ("%d0") = __NR_execve; register long __a asm ("%d1") = (long)(filename); diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index 6abab6ebedbe..2250fe9d269b 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -47,8 +47,10 @@ asmlinkage long microblaze_clone(int flags, unsigned long stack, struct pt_regs return do_fork(flags, stack, regs, 0, NULL, NULL); } -asmlinkage long microblaze_execve(const char __user *filenamei, char __user *__user *argv, - char __user *__user *envp, struct pt_regs *regs) +asmlinkage long microblaze_execve(const char __user *filenamei, + const char __user *const __user *argv, + const char __user *const __user *envp, + struct pt_regs *regs) { int error; char *filename; @@ -77,7 +79,9 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register const char *__a __asm__("r5") = filename; register const void *__b __asm__("r6") = argv; diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index bddce0bca195..1dc6edff45e0 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -258,8 +258,10 @@ asmlinkage int sys_execve(nabi_no_regargs struct pt_regs regs) error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char __user *__user *) (long)regs.regs[5], - (char __user *__user *) (long)regs.regs[6], ®s); + error = do_execve(filename, + (const char __user *const __user *) (long)regs.regs[5], + (const char __user *const __user *) (long)regs.regs[6], + ®s); putname(filename); out: @@ -436,7 +438,9 @@ asmlinkage void bad_stack(void) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register unsigned long __a0 asm("$4") = (unsigned long) filename; register unsigned long __a1 asm("$5") = (unsigned long) argv; diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 762eb325b949..f48373e2bc1c 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -269,8 +269,8 @@ asmlinkage long sys_vfork(void) } asmlinkage long sys_execve(const char __user *name, - char __user * __user *argv, - char __user * __user *envp) + const char __user *const __user *argv, + const char __user *const __user *envp) { char *filename; int error; diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 1444875a7611..0dc8543acb4f 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -41,8 +41,10 @@ int hpux_execve(struct pt_regs *regs) if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char __user * __user *) regs->gr[25], - (char __user * __user *) regs->gr[24], regs); + error = do_execve(filename, + (const char __user *const __user *) regs->gr[25], + (const char __user *const __user *) regs->gr[24], + regs); putname(filename); diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 76332dadc6e9..4b4b9181a1a0 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -348,17 +348,22 @@ asmlinkage int sys_execve(struct pt_regs *regs) error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char __user * __user *) regs->gr[25], - (char __user * __user *) regs->gr[24], regs); + error = do_execve(filename, + (const char __user *const __user *) regs->gr[25], + (const char __user *const __user *) regs->gr[24], + regs); putname(filename); out: return error; } -extern int __execve(const char *filename, char *const argv[], - char *const envp[], struct task_struct *task); -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +extern int __execve(const char *filename, + const char *const argv[], + const char *const envp[], struct task_struct *task); +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { return __execve(filename, argv, envp, current); } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index feacfb789686..91356ffda2ca 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1034,8 +1034,9 @@ int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, flush_fp_to_thread(current); flush_altivec_to_thread(current); flush_spe_to_thread(current); - error = do_execve(filename, (char __user * __user *) a1, - (char __user * __user *) a2, regs); + error = do_execve(filename, + (const char __user *const __user *) a1, + (const char __user *const __user *) a2, regs); putname(filename); out: return error; diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 7eafaf2662b9..d3a2d1c6438e 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -267,8 +267,9 @@ asmlinkage void execve_tail(void) /* * sys_execve() executes a new program. */ -SYSCALL_DEFINE3(execve, const char __user *, name, char __user * __user *, argv, - char __user * __user *, envp) +SYSCALL_DEFINE3(execve, const char __user *, name, + const char __user *const __user *, argv, + const char __user *const __user *, envp) { struct pt_regs *regs = task_pt_regs(current); char *filename; diff --git a/arch/score/kernel/sys_score.c b/arch/score/kernel/sys_score.c index 651096ff8db4..e478bf9a7e91 100644 --- a/arch/score/kernel/sys_score.c +++ b/arch/score/kernel/sys_score.c @@ -99,8 +99,10 @@ score_execve(struct pt_regs *regs) if (IS_ERR(filename)) return error; - error = do_execve(filename, (char __user *__user*)regs->regs[5], - (char __user *__user *) regs->regs[6], regs); + error = do_execve(filename, + (const char __user *const __user *)regs->regs[5], + (const char __user *const __user *)regs->regs[6], + regs); putname(filename); return error; @@ -110,7 +112,9 @@ score_execve(struct pt_regs *regs) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register unsigned long __r4 asm("r4") = (unsigned long) filename; register unsigned long __r5 asm("r5") = (unsigned long) argv; diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 052981972ae6..762a13984bbd 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -296,9 +296,10 @@ asmlinkage int sys_vfork(unsigned long r4, unsigned long r5, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(char __user *ufilename, char __user * __user *uargv, - char __user * __user *uenvp, unsigned long r7, - struct pt_regs __regs) +asmlinkage int sys_execve(const char __user *ufilename, + const char __user *const __user *uargv, + const char __user *const __user *uenvp, + unsigned long r7, struct pt_regs __regs) { struct pt_regs *regs = RELOC_HIDE(&__regs, 0); int error; diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 68d128d651b3..210c1cabcb7f 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -497,8 +497,8 @@ asmlinkage int sys_execve(const char *ufilename, char **uargv, goto out; error = do_execve(filename, - (char __user * __user *)uargv, - (char __user * __user *)uenvp, + (const char __user *const __user *)uargv, + (const char __user *const __user *)uenvp, pregs); putname(filename); out: diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index eb68bfdd86e6..f56b6fe5c5d0 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -71,7 +71,9 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1, * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __sc0 __asm__ ("r3") = __NR_execve; register long __sc4 __asm__ ("r4") = (long) filename; diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c index 287235768bc5..c5a38c4bf410 100644 --- a/arch/sh/kernel/sys_sh64.c +++ b/arch/sh/kernel/sys_sh64.c @@ -33,7 +33,9 @@ * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_execve); register unsigned long __sc2 __asm__ ("r2") = (unsigned long) filename; diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 40e29fc8a4d6..17529298c50a 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -633,8 +633,10 @@ asmlinkage int sparc_execve(struct pt_regs *regs) if(IS_ERR(filename)) goto out; error = do_execve(filename, - (char __user * __user *)regs->u_regs[base + UREG_I1], - (char __user * __user *)regs->u_regs[base + UREG_I2], + (const char __user *const __user *) + regs->u_regs[base + UREG_I1], + (const char __user *const __user *) + regs->u_regs[base + UREG_I2], regs); putname(filename); out: diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index dbe81a368b45..485f54748384 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -739,9 +739,9 @@ asmlinkage int sparc_execve(struct pt_regs *regs) if (IS_ERR(filename)) goto out; error = do_execve(filename, - (char __user * __user *) + (const char __user *const __user *) regs->u_regs[base + UREG_I1], - (char __user * __user *) + (const char __user *const __user *) regs->u_regs[base + UREG_I2], regs); putname(filename); if (!error) { diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index ee995b7dae7e..50794137d710 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -282,7 +282,9 @@ out: * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { long __res; register long __g1 __asm__ ("g1") = __NR_execve; diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 3d435c42e6db..f836f4e93afe 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -758,7 +758,9 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act, * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { long __res; register long __g1 __asm__ ("g1") = __NR_execve; diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index ed590ad0acdc..985cc28c74c5 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -543,8 +543,9 @@ long _sys_vfork(struct pt_regs *regs) /* * sys_execve() executes a new program. */ -long _sys_execve(char __user *path, char __user *__user *argv, - char __user *__user *envp, struct pt_regs *regs) +long _sys_execve(const char __user *path, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { long error; char *filename; diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 59b20d93b6d4..cd145eda3579 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -44,8 +44,9 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) PT_REGS_SP(regs) = esp; } -static long execve1(const char *file, char __user * __user *argv, - char __user *__user *env) +static long execve1(const char *file, + const char __user *const __user *argv, + const char __user *const __user *env) { long error; diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index 7427c0b1930c..5ddb246626db 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -51,7 +51,9 @@ long old_mmap(unsigned long addr, unsigned long len, return err; } -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { mm_segment_t fs; int ret; diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index feb2ff9bfc2d..f1d8b441fc77 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -23,8 +23,9 @@ long sys_iopl(unsigned int, struct pt_regs *); /* kernel/process.c */ int sys_fork(struct pt_regs *); int sys_vfork(struct pt_regs *); -long sys_execve(const char __user *, char __user * __user *, - char __user * __user *, struct pt_regs *); +long sys_execve(const char __user *, + const char __user *const __user *, + const char __user *const __user *, struct pt_regs *); long sys_clone(unsigned long, unsigned long, void __user *, void __user *, struct pt_regs *); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 64ecaf0af9af..57d1868a86aa 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -301,8 +301,9 @@ EXPORT_SYMBOL(kernel_thread); /* * sys_execve() executes a new program. */ -long sys_execve(const char __user *name, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +long sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { long error; char *filename; diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 196552bb412c..d5e06624e34a 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -28,7 +28,9 @@ * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { long __res; asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 7c2f38f68ebb..e3558b9a58ba 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -318,8 +318,9 @@ long xtensa_clone(unsigned long clone_flags, unsigned long newsp, */ asmlinkage -long xtensa_execve(const char __user *name, char __user * __user *argv, - char __user * __user *envp, +long xtensa_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp, long a3, long a4, long a5, struct pt_regs *regs) { diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 9e60fd201716..a7528b913936 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -108,7 +108,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) Node *fmt; struct file * interp_file = NULL; char iname[BINPRM_BUF_SIZE]; - char *iname_addr = iname; + const char *iname_addr = iname; int retval; int fd_binary = -1; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index aca9d55afb22..396a9884591f 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -16,7 +16,8 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) { - char *cp, *i_name, *i_arg; + const char *i_arg, *i_name; + char *cp; struct file *file; char interp[BINPRM_BUF_SIZE]; int retval; diff --git a/fs/exec.c b/fs/exec.c index 7761837e4500..05c7d6b84df7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -361,13 +361,13 @@ err: /* * count() counts the number of strings in array ARGV. */ -static int count(char __user * __user * argv, int max) +static int count(const char __user * const __user * argv, int max) { int i = 0; if (argv != NULL) { for (;;) { - char __user * p; + const char __user * p; if (get_user(p, argv)) return -EFAULT; @@ -387,7 +387,7 @@ static int count(char __user * __user * argv, int max) * processes's memory to the new process's stack. The call to get_user_pages() * ensures the destination page is created and not swapped out. */ -static int copy_strings(int argc, char __user * __user * argv, +static int copy_strings(int argc, const char __user *const __user *argv, struct linux_binprm *bprm) { struct page *kmapped_page = NULL; @@ -396,7 +396,7 @@ static int copy_strings(int argc, char __user * __user * argv, int ret; while (argc-- > 0) { - char __user *str; + const char __user *str; int len; unsigned long pos; @@ -470,12 +470,13 @@ out: /* * Like copy_strings, but get argv and its values from kernel memory. */ -int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm) +int copy_strings_kernel(int argc, const char *const *argv, + struct linux_binprm *bprm) { int r; mm_segment_t oldfs = get_fs(); set_fs(KERNEL_DS); - r = copy_strings(argc, (char __user * __user *)argv, bprm); + r = copy_strings(argc, (const char __user *const __user *)argv, bprm); set_fs(oldfs); return r; } @@ -997,7 +998,7 @@ EXPORT_SYMBOL(flush_old_exec); void setup_new_exec(struct linux_binprm * bprm) { int i, ch; - char * name; + const char *name; char tcomm[sizeof(current->comm)]; arch_pick_mmap_layout(current->mm); @@ -1316,9 +1317,9 @@ EXPORT_SYMBOL(search_binary_handler); /* * sys_execve() executes a new program. */ -int do_execve(char * filename, - char __user *__user *argv, - char __user *__user *envp, +int do_execve(const char * filename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs * regs) { struct linux_binprm *bprm; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index c809e286d213..a065612fc928 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -50,8 +50,8 @@ struct linux_binprm{ int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ unsigned int per_clear; /* bits to clear in current->personality */ int argc, envc; - char * filename; /* Name of binary as seen by procps */ - char * interp; /* Name of the binary really executed. Most + const char * filename; /* Name of binary as seen by procps */ + const char * interp; /* Name of the binary really executed. Most of the time same as filename, but could be different for binfmt_{misc,script} */ unsigned interp_flags; @@ -126,7 +126,8 @@ extern int setup_arg_pages(struct linux_binprm * bprm, unsigned long stack_top, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); -extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); +extern int copy_strings_kernel(int argc, const char *const *argv, + struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); diff --git a/include/linux/sched.h b/include/linux/sched.h index ce160d68f5e7..1e2a6db2d7dd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2109,7 +2109,9 @@ extern void daemonize(const char *, ...); extern int allow_signal(int); extern int disallow_signal(int); -extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *); +extern int do_execve(const char *, + const char __user * const __user *, + const char __user * const __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 6e5d19788634..e6319d18a55d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -820,7 +820,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, u64 mask, int fd, const char __user *pathname); -int kernel_execve(const char *filename, char *const argv[], char *const envp[]); +int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]); asmlinkage long sys_perf_event_open( diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 2b108538d0d9..3098a38f3ae1 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -24,10 +24,11 @@ static int __init no_initrd(char *str) __setup("noinitrd", no_initrd); -static int __init do_linuxrc(void * shell) +static int __init do_linuxrc(void *_shell) { - static char *argv[] = { "linuxrc", NULL, }; - extern char * envp_init[]; + static const char *argv[] = { "linuxrc", NULL, }; + extern const char *envp_init[]; + const char *shell = _shell; sys_close(old_fd);sys_close(root_fd); sys_setsid(); diff --git a/init/main.c b/init/main.c index 22d61cb06f98..94ab488039aa 100644 --- a/init/main.c +++ b/init/main.c @@ -197,8 +197,8 @@ static int __init set_reset_devices(char *str) __setup("reset_devices", set_reset_devices); -static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; -char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; +static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; +const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; extern const struct obs_kernel_param __setup_start[], __setup_end[]; @@ -809,7 +809,7 @@ static void __init do_pre_smp_initcalls(void) do_one_initcall(*fn); } -static void run_init_process(char *init_filename) +static void run_init_process(const char *init_filename) { argv_init[0] = init_filename; kernel_execve(init_filename, argv_init, envp_init); diff --git a/kernel/kmod.c b/kernel/kmod.c index 6e9b19667a8d..9cd0591c96a2 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -153,7 +153,9 @@ static int ____call_usermodehelper(void *data) goto fail; } - retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp); + retval = kernel_execve(sub_info->path, + (const char *const *)sub_info->argv, + (const char *const *)sub_info->envp); /* Exec failed? */ fail: diff --git a/security/commoncap.c b/security/commoncap.c index 4e015996dd4d..9d172e6e330c 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -40,7 +40,7 @@ * * Warn if that happens, once per boot. */ -static void warn_setuid_and_fcaps_mixed(char *fname) +static void warn_setuid_and_fcaps_mixed(const char *fname) { static int warned; if (!warned) { -- cgit v1.2.3 From 5c79a5ae23e72fa12f1c7c528f62bf3ea35da0dc Mon Sep 17 00:00:00 2001 From: Ernst Schwab Date: Mon, 16 Aug 2010 15:10:11 +0200 Subject: spi.h: missing kernel-doc notation, please fix Added comments in kernel-doc notation for previously added struct fields. Signed-off-by: Ernst Schwab Acked-by: Randy Dunlap Signed-off-by: Grant Likely --- include/linux/spi/spi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index ae0a5286f558..92e52a1e6af3 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -213,6 +213,9 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @dma_alignment: SPI controller constraint on DMA buffers alignment. * @mode_bits: flags understood by this controller driver * @flags: other constraints relevant to this driver + * @bus_lock_spinlock: spinlock for SPI bus locking + * @bus_lock_mutex: mutex for SPI bus locking + * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use * @setup: updates the device mode and clocking records used by a * device's SPI controller; protocol code may call this. This * must fail if an unrecognized or unsupported mode is requested. -- cgit v1.2.3 From 87e99511ea54510ffb60b98001d108794d5037f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Aug 2010 17:05:45 +0200 Subject: kill BH_Ordered flag Instead of abusing a buffer_head flag just add a variant of sync_dirty_buffer which allows passing the exact type of write flag required. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 17 ++++++++-------- fs/jbd/commit.c | 49 +++++++++++++++++++++++---------------------- fs/jbd2/commit.c | 39 ++++++++++++++---------------------- fs/nilfs2/super.c | 28 +++++++++++++------------- include/linux/buffer_head.h | 3 +-- 5 files changed, 63 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/fs/buffer.c b/fs/buffer.c index 50efa339e051..6c8ad977f3d4 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2911,13 +2911,6 @@ int submit_bh(int rw, struct buffer_head * bh) BUG_ON(buffer_delay(bh)); BUG_ON(buffer_unwritten(bh)); - /* - * Mask in barrier bit for a write (could be either a WRITE or a - * WRITE_SYNC - */ - if (buffer_ordered(bh) && (rw & WRITE)) - rw |= WRITE_BARRIER; - /* * Only clear out a write error when rewriting */ @@ -3021,7 +3014,7 @@ EXPORT_SYMBOL(ll_rw_block); * and then start new I/O and then wait upon it. The caller must have a ref on * the buffer_head. */ -int sync_dirty_buffer(struct buffer_head *bh) +int __sync_dirty_buffer(struct buffer_head *bh, int rw) { int ret = 0; @@ -3030,7 +3023,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE_SYNC, bh); + ret = submit_bh(rw, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); @@ -3043,6 +3036,12 @@ int sync_dirty_buffer(struct buffer_head *bh) } return ret; } +EXPORT_SYMBOL(__sync_dirty_buffer); + +int sync_dirty_buffer(struct buffer_head *bh) +{ + return __sync_dirty_buffer(bh, WRITE_SYNC); +} EXPORT_SYMBOL(sync_dirty_buffer); /* diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 28a9ddaa0c49..95d8c11c929e 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal, struct buffer_head *bh; journal_header_t *header; int ret; - int barrier_done = 0; if (is_journal_aborted(journal)) return 0; @@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "write commit block"); set_buffer_dirty(bh); + if (journal->j_flags & JFS_BARRIER) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = sync_dirty_buffer(bh); - if (barrier_done) - clear_buffer_ordered(bh); - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - char b[BDEVNAME_SIZE]; + ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); - printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", - bdevname(journal->j_dev, b)); - spin_lock(&journal->j_state_lock); - journal->j_flags &= ~JFS_BARRIER; - spin_unlock(&journal->j_state_lock); + /* + * Is it possible for another commit to fail at roughly + * the same time as this one? If so, we don't want to + * trust the barrier flag in the super, but instead want + * to remember if we sent a barrier request + */ + if (ret == -EOPNOTSUPP) { + char b[BDEVNAME_SIZE]; - /* And try again, without the barrier */ - set_buffer_uptodate(bh); - set_buffer_dirty(bh); + printk(KERN_WARNING + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", + bdevname(journal->j_dev, b)); + spin_lock(&journal->j_state_lock); + journal->j_flags &= ~JFS_BARRIER; + spin_unlock(&journal->j_state_lock); + + /* And try again, without the barrier */ + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + ret = sync_dirty_buffer(bh); + } + } else { ret = sync_dirty_buffer(bh); } + put_bh(bh); /* One for getblk() */ journal_put_journal_head(descriptor); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f52e5e8049f1..7c068c189d80 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; int ret; - int barrier_done = 0; struct timespec now = current_kernel_time(); if (is_journal_aborted(journal)) @@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = submit_bh(WRITE_SYNC_PLUG, bh); - if (barrier_done) - clear_buffer_ordered(bh); - - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - printk(KERN_WARNING - "JBD2: Disabling barriers on %s, " - "not supported by device\n", journal->j_devname); - write_lock(&journal->j_state_lock); - journal->j_flags &= ~JBD2_BARRIER; - write_unlock(&journal->j_state_lock); + ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh); + if (ret == -EOPNOTSUPP) { + printk(KERN_WARNING + "JBD2: Disabling barriers on %s, " + "not supported by device\n", journal->j_devname); + write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_BARRIER; + write_unlock(&journal->j_state_lock); - /* And try again, without the barrier */ - lock_buffer(bh); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); + /* And try again, without the barrier */ + lock_buffer(bh); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + ret = submit_bh(WRITE_SYNC_PLUG, bh); + } + } else { ret = submit_bh(WRITE_SYNC_PLUG, bh); } *cbh = bh; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1fa86b9df73b..68345430fb48 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) { struct the_nilfs *nilfs = sbi->s_nilfs; int err; - int barrier_done = 0; - if (nilfs_test_opt(sbi, BARRIER)) { - set_buffer_ordered(nilfs->ns_sbh[0]); - barrier_done = 1; - } retry: set_buffer_dirty(nilfs->ns_sbh[0]); - err = sync_dirty_buffer(nilfs->ns_sbh[0]); - if (err == -EOPNOTSUPP && barrier_done) { - nilfs_warning(sbi->s_super, __func__, - "barrier-based sync failed. " - "disabling barriers\n"); - nilfs_clear_opt(sbi, BARRIER); - barrier_done = 0; - clear_buffer_ordered(nilfs->ns_sbh[0]); - goto retry; + + if (nilfs_test_opt(sbi, BARRIER)) { + err = __sync_dirty_buffer(nilfs->ns_sbh[0], + WRITE_SYNC | WRITE_BARRIER); + if (err == -EOPNOTSUPP) { + nilfs_warning(sbi->s_super, __func__, + "barrier-based sync failed. " + "disabling barriers\n"); + nilfs_clear_opt(sbi, BARRIER); + goto retry; + } + } else { + err = sync_dirty_buffer(nilfs->ns_sbh[0]); } + if (unlikely(err)) { printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 43e649a72529..72c1cf83eb85 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Ordered, /* ordered write */ BH_Eopnotsupp, /* operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ @@ -125,7 +124,6 @@ BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) -BUFFER_FNS(Ordered, ordered) BUFFER_FNS(Eopnotsupp, eopnotsupp) BUFFER_FNS(Unwritten, unwritten) @@ -183,6 +181,7 @@ void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); +int __sync_dirty_buffer(struct buffer_head *bh, int rw); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); -- cgit v1.2.3 From 9cb569d601e0b93e01c20a22872270ec663b75f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Aug 2010 17:06:24 +0200 Subject: remove SWRITE* I/O types These flags aren't real I/O types, but tell ll_rw_block to always lock the buffer instead of giving up on a failed trylock. Instead add a new write_dirty_buffer helper that implements this semantic and use it from the existing SWRITE* callers. Note that the ll_rw_block code had a bug where it didn't promote WRITE_SYNC_PLUG properly, which this patch fixes. In the ufs code clean up the helper that used to call ll_rw_block to mirror sync_dirty_buffer, which is the function it implements for compound buffers. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 52 +++++++++++++++++++++++++-------------------- fs/fat/misc.c | 4 +++- fs/jbd/checkpoint.c | 4 +++- fs/jbd/journal.c | 2 +- fs/jbd/revoke.c | 2 +- fs/jbd2/checkpoint.c | 4 +++- fs/jbd2/journal.c | 2 +- fs/jbd2/revoke.c | 2 +- fs/reiserfs/journal.c | 2 +- fs/ufs/balloc.c | 24 +++++++-------------- fs/ufs/ialloc.c | 18 ++++++---------- fs/ufs/truncate.c | 18 ++++++---------- fs/ufs/util.c | 20 +++++++---------- fs/ufs/util.h | 3 +-- include/linux/buffer_head.h | 1 + include/linux/fs.h | 9 -------- 16 files changed, 73 insertions(+), 94 deletions(-) (limited to 'include') diff --git a/fs/buffer.c b/fs/buffer.c index 6c8ad977f3d4..3e7dca279d1c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) spin_unlock(lock); /* * Ensure any pending I/O completes so that - * ll_rw_block() actually writes the current - * contents - it is a noop if I/O is still in - * flight on potentially older contents. + * write_dirty_buffer() actually writes the + * current contents - it is a noop if I/O is + * still in flight on potentially older + * contents. */ - ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, WRITE_SYNC_PLUG); /* * Kick off IO for the previous mapping. Note @@ -2949,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) - * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) + * @rw: whether to %READ or %WRITE or maybe %READA (readahead) * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * * ll_rw_block() takes an array of pointers to &struct buffer_heads, and * requests an I/O operation on them, either a %READ or a %WRITE. The third - * %SWRITE is like %WRITE only we make sure that the *current* data in buffers - * are sent to disk. The fourth %READA option is described in the documentation - * for generic_make_request() which ll_rw_block() calls. + * %READA option is described in the documentation for generic_make_request() + * which ll_rw_block() calls. * * This function drops any buffer that it cannot get a lock on (with the - * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be - * clean when doing a write request, and any buffer that appears to be - * up-to-date when doing read request. Further it marks as clean buffers that - * are processed for writing (the buffer cache won't assume that they are - * actually clean until the buffer gets unlocked). + * BH_Lock state bit), any buffer that appears to be clean when doing a write + * request, and any buffer that appears to be up-to-date when doing read + * request. Further it marks as clean buffers that are processed for + * writing (the buffer cache won't assume that they are actually clean + * until the buffer gets unlocked). * * ll_rw_block sets b_end_io to simple completion handler that marks * the buffer up-to-date (if approriate), unlocks the buffer and wakes @@ -2980,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) - lock_buffer(bh); - else if (!trylock_buffer(bh)) + if (!trylock_buffer(bh)) continue; - - if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || - rw == SWRITE_SYNC_PLUG) { + if (rw == WRITE) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - if (rw == SWRITE_SYNC) - submit_bh(WRITE_SYNC, bh); - else - submit_bh(WRITE, bh); + submit_bh(WRITE, bh); continue; } } else { @@ -3009,6 +3002,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) } EXPORT_SYMBOL(ll_rw_block); +void write_dirty_buffer(struct buffer_head *bh, int rw) +{ + lock_buffer(bh); + if (!test_clear_buffer_dirty(bh)) { + unlock_buffer(bh); + return; + } + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(rw, bh); +} +EXPORT_SYMBOL(write_dirty_buffer); + /* * For a data-integrity writeout, we need to wait upon any in-progress I/O * and then start new I/O and then wait upon it. The caller must have a ref on diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1fa23f6ffba5..1736f2356388 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { int i, err = 0; - ll_rw_block(SWRITE, nr_bhs, bhs); + for (i = 0; i < nr_bhs; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); if (buffer_eopnotsupp(bhs[i])) { diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index b0435dd0654d..05a38b9c4c0e 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f19ce94693d8..2c4b1f109da9 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait) if (wait) sync_dirty_buffer(bh); else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index ad717328343a..d29018307e2e 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1c23a0f4e8a3..5247e7ffdcb4 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = journal->j_chkpt_bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ad5866aaf0f9..0e8014ea6b94 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) set_buffer_uptodate(bh); } } else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index a360b06af2e3..9ad321fd63fd 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 1ec952b1f036..812e2c05aa29 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb, /* flush out the real blocks */ for (i = 0; i < get_desc_trans_len(desc); i++) { set_buffer_dirty(real_blocks[i]); - ll_rw_block(SWRITE, 1, real_blocks + i); + write_dirty_buffer(real_blocks[i], WRITE); } for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(real_blocks[i]); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 048484fb10d2..46f7a807bbc1 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -207,10 +205,8 @@ do_more: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); if (overflow) { fragment += count; @@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); @@ -680,10 +674,8 @@ cg_found: succed: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; result += cgno * uspi->s_fpg; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 428017e018fe..2eabf04af3de 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb, fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer(UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); UFSD("EXIT\n"); } @@ -290,10 +286,8 @@ cg_found: } ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; inode->i_ino = cg * uspi->s_ipg + bit; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 34d5cb135320..a58f9155fc9a 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) ubh_bforget(ind_ubh); ind_ubh = NULL; } - if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { - ubh_ll_rw_block(SWRITE, ind_ubh); - ubh_wait_on_buffer (ind_ubh); - } + if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) + ubh_sync_block(ind_ubh); ubh_brelse (ind_ubh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) ubh_bforget(dind_bh); dind_bh = NULL; } - if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { - ubh_ll_rw_block(SWRITE, dind_bh); - ubh_wait_on_buffer (dind_bh); - } + if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) + ubh_sync_block(dind_bh); ubh_brelse (dind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode) ubh_bforget(tind_bh); tind_bh = NULL; } - if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { - ubh_ll_rw_block(SWRITE, tind_bh); - ubh_wait_on_buffer (tind_bh); - } + if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) + ubh_sync_block(tind_bh); ubh_brelse (tind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 85a7fc9e4a4e..d2c36d53fe66 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag) } } -void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) +void ubh_sync_block(struct ufs_buffer_head *ubh) { - if (!ubh) - return; + if (ubh) { + unsigned i; - ll_rw_block(rw, ubh->count, ubh->bh); -} + for (i = 0; i < ubh->count; i++) + write_dirty_buffer(ubh->bh[i], WRITE); -void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) -{ - unsigned i; - if (!ubh) - return; - for ( i = 0; i < ubh->count; i++ ) - wait_on_buffer (ubh->bh[i]); + for (i = 0; i < ubh->count; i++) + wait_on_buffer(ubh->bh[i]); + } } void ubh_bforget (struct ufs_buffer_head * ubh) diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 0466036912f1..9f8775ce381c 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *); extern void ubh_brelse_uspi (struct ufs_sb_private_info *); extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); -extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); -extern void ubh_wait_on_buffer (struct ufs_buffer_head *); +extern void ubh_sync_block(struct ufs_buffer_head *); extern void ubh_bforget (struct ufs_buffer_head *); extern int ubh_buffer_dirty (struct ufs_buffer_head *); #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 72c1cf83eb85..ec94c12f21da 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -182,6 +182,7 @@ void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, int rw); +void write_dirty_buffer(struct buffer_head *bh, int rw); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); diff --git a/include/linux/fs.h b/include/linux/fs.h index 9a96b4d83fc1..29f7c975304c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -125,9 +125,6 @@ struct inodes_stat_t { * block layer could (in theory) choose to ignore this * request if it runs into resource problems. * WRITE A normal async write. Device will be plugged. - * SWRITE Like WRITE, but a special case for ll_rw_block() that - * tells it to lock the buffer first. Normally a buffer - * must be locked before doing IO. * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down * the hint that someone will be waiting on this IO * shortly. The device must still be unplugged explicitly, @@ -138,9 +135,6 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * SWRITE_SYNC - * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. - * See SWRITE. * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all * previously submitted writes must be safely on storage * before this one is started. Also guarantees that when @@ -155,7 +149,6 @@ struct inodes_stat_t { #define READ 0 #define WRITE RW_MASK #define READA RWA_MASK -#define SWRITE (WRITE | READA) #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) #define READ_META (READ | REQ_META) @@ -165,8 +158,6 @@ struct inodes_stat_t { #define WRITE_META (WRITE | REQ_META) #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_HARDBARRIER) -#define SWRITE_SYNC_PLUG (SWRITE | REQ_SYNC | REQ_NOIDLE) -#define SWRITE_SYNC (SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) /* * These aren't really reads or writes, they pass down information about -- cgit v1.2.3 From 3d529946ce292336793b85198bd59afc75e16bd4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 18 Aug 2010 09:48:31 +1000 Subject: Fix spelling mistake in jhash Fix a spelling mistake. Signed-off-by: Anton Blanchard Signed-off-by: Jiri Kosina --- include/linux/jhash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jhash.h b/include/linux/jhash.h index 2a2f99fbcb16..ced1159fa4f2 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -116,7 +116,7 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) /* A special ultra-optimized versions that knows they are hashing exactly * 3, 2 or 1 word(s). * - * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally + * NOTE: In particular the "c += length; __jhash_mix(a,b,c);" normally * done at the end is not done here. */ static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) -- cgit v1.2.3 From 2a4419b5b2a77f3f4537c14f7ad7df95770655dd Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:33 +1000 Subject: fs: fs_struct rwlock to spinlock fs: fs_struct rwlock to spinlock struct fs_struct.lock is an rwlock with the read-side used to protect root and pwd members while taking references to them. Taking a reference to a path typically requires just 2 atomic ops, so the critical section is very small. Parallel read-side operations would have cacheline contention on the lock, the dentry, and the vfsmount cachelines, so the rwlock is unlikely to ever give a real parallelism increase. Replace it with a spinlock to avoid one or two atomic operations in typical path lookup fastpath. Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- drivers/staging/pohmelfs/path_entry.c | 8 ++++---- fs/exec.c | 4 ++-- fs/fs_struct.c | 32 ++++++++++++++++---------------- include/linux/fs_struct.h | 14 +++++++------- kernel/fork.c | 10 +++++----- 5 files changed, 34 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index cdc4dd50d638..8ec83d2dffb7 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c @@ -44,9 +44,9 @@ int pohmelfs_construct_path_string(struct pohmelfs_inode *pi, void *data, int le return -ENOENT; } - read_lock(¤t->fs->lock); + spin_lock(¤t->fs->lock); path.mnt = mntget(current->fs->root.mnt); - read_unlock(¤t->fs->lock); + spin_unlock(¤t->fs->lock); path.dentry = d; @@ -91,9 +91,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) return -ENOENT; } - read_lock(¤t->fs->lock); + spin_lock(¤t->fs->lock); root = dget(current->fs->root.dentry); - read_unlock(¤t->fs->lock); + spin_unlock(¤t->fs->lock); spin_lock(&dcache_lock); diff --git a/fs/exec.c b/fs/exec.c index 7761837e4500..5adab2c93eca 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1117,7 +1117,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) bprm->unsafe = tracehook_unsafe_exec(p); n_fs = 1; - write_lock(&p->fs->lock); + spin_lock(&p->fs->lock); rcu_read_lock(); for (t = next_thread(p); t != p; t = next_thread(t)) { if (t->fs == p->fs) @@ -1134,7 +1134,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) res = 1; } } - write_unlock(&p->fs->lock); + spin_unlock(&p->fs->lock); return res; } diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 1ee40eb9a2c0..ed45a9cf5f3d 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) { struct path old_root; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_root = fs->root; fs->root = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_root.dentry) path_put(&old_root); } @@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) { struct path old_pwd; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_pwd = fs->pwd; fs->pwd = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_pwd.dentry) path_put(&old_pwd); @@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) task_lock(p); fs = p->fs; if (fs) { - write_lock(&fs->lock); + spin_lock(&fs->lock); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { path_get(new_root); @@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) fs->pwd = *new_root; count++; } - write_unlock(&fs->lock); + spin_unlock(&fs->lock); } task_unlock(p); } while_each_thread(g, p); @@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk) if (fs) { int kill; task_lock(tsk); - write_lock(&fs->lock); + spin_lock(&fs->lock); tsk->fs = NULL; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(tsk); if (kill) free_fs_struct(fs); @@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) if (fs) { fs->users = 1; fs->in_exec = 0; - rwlock_init(&fs->lock); + spin_lock_init(&fs->lock); fs->umask = old->umask; get_fs_root_and_pwd(old, &fs->root, &fs->pwd); } @@ -121,10 +121,10 @@ int unshare_fs_struct(void) return -ENOMEM; task_lock(current); - write_lock(&fs->lock); + spin_lock(&fs->lock); kill = !--fs->users; current->fs = new_fs; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) @@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask); /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { .users = 1, - .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), .umask = 0022, }; @@ -156,14 +156,14 @@ void daemonize_fs_struct(void) task_lock(current); - write_lock(&init_fs.lock); + spin_lock(&init_fs.lock); init_fs.users++; - write_unlock(&init_fs.lock); + spin_unlock(&init_fs.lock); - write_lock(&fs->lock); + spin_lock(&fs->lock); current->fs = &init_fs; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index eca3d5202138..a42b5bf02f8b 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -5,7 +5,7 @@ struct fs_struct { int users; - rwlock_t lock; + spinlock_t lock; int umask; int in_exec; struct path root, pwd; @@ -23,29 +23,29 @@ extern int unshare_fs_struct(void); static inline void get_fs_root(struct fs_struct *fs, struct path *root) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *root = fs->root; path_get(root); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *pwd = fs->pwd; path_get(pwd); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, struct path *pwd) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *root = fs->root; path_get(root); *pwd = fs->pwd; path_get(pwd); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } #endif /* _LINUX_FS_STRUCT_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 98b450876f93..856eac3ec52e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -752,13 +752,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) struct fs_struct *fs = current->fs; if (clone_flags & CLONE_FS) { /* tsk->fs is already what we want */ - write_lock(&fs->lock); + spin_lock(&fs->lock); if (fs->in_exec) { - write_unlock(&fs->lock); + spin_unlock(&fs->lock); return -EAGAIN; } fs->users++; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); return 0; } tsk->fs = copy_fs_struct(fs); @@ -1676,13 +1676,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) if (new_fs) { fs = current->fs; - write_lock(&fs->lock); + spin_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); } if (new_mm) { -- cgit v1.2.3 From ee2ffa0dfdd2db19705f2ba1c6a4c0bfe8122dd8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:35 +1000 Subject: fs: cleanup files_lock locking fs: cleanup files_lock locking Lock tty_files with a new spinlock, tty_files_lock; provide helpers to manipulate the per-sb files list; unexport the files_lock spinlock. Cc: linux-kernel@vger.kernel.org Cc: Christoph Hellwig Cc: Alan Cox Acked-by: Andi Kleen Acked-by: Greg Kroah-Hartman Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- drivers/char/pty.c | 6 +++++- drivers/char/tty_io.c | 26 ++++++++++++++++++-------- fs/file_table.c | 42 ++++++++++++++++++------------------------ fs/open.c | 4 ++-- include/linux/fs.h | 7 ++----- include/linux/tty.h | 1 + security/selinux/hooks.c | 4 ++-- 7 files changed, 48 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/drivers/char/pty.c b/drivers/char/pty.c index ad46eae1f9bb..2c64faa8efa4 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -676,7 +676,11 @@ static int ptmx_open(struct inode *inode, struct file *filp) set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ filp->private_data = tty; - file_move(filp, &tty->tty_files); + + file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ + spin_lock(&tty_files_lock); + list_add(&filp->f_u.fu_list, &tty->tty_files); + spin_unlock(&tty_files_lock); retval = devpts_pty_new(inode, tty->link); if (retval) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 0350c42375a2..cd5b829634ea 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */ DEFINE_MUTEX(tty_mutex); EXPORT_SYMBOL(tty_mutex); +/* Spinlock to protect the tty->tty_files list */ +DEFINE_SPINLOCK(tty_files_lock); + static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *); ssize_t redirected_tty_write(struct file *, const char __user *, @@ -235,11 +238,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine) struct list_head *p; int count = 0; - file_list_lock(); + spin_lock(&tty_files_lock); list_for_each(p, &tty->tty_files) { count++; } - file_list_unlock(); + spin_unlock(&tty_files_lock); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_SLAVE && tty->link && tty->link->count) @@ -519,7 +522,7 @@ void __tty_hangup(struct tty_struct *tty) workqueue with the lock held */ check_tty_count(tty, "tty_hangup"); - file_list_lock(); + spin_lock(&tty_files_lock); /* This breaks for file handles being sent over AF_UNIX sockets ? */ list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { if (filp->f_op->write == redirected_tty_write) @@ -530,7 +533,7 @@ void __tty_hangup(struct tty_struct *tty) __tty_fasync(-1, filp, 0); /* can't block */ filp->f_op = &hung_up_tty_fops; } - file_list_unlock(); + spin_unlock(&tty_files_lock); tty_ldisc_hangup(tty); @@ -1424,9 +1427,9 @@ static void release_one_tty(struct work_struct *work) tty_driver_kref_put(driver); module_put(driver->owner); - file_list_lock(); + spin_lock(&tty_files_lock); list_del_init(&tty->tty_files); - file_list_unlock(); + spin_unlock(&tty_files_lock); put_pid(tty->pgrp); put_pid(tty->session); @@ -1671,7 +1674,10 @@ int tty_release(struct inode *inode, struct file *filp) * - do_tty_hangup no longer sees this file descriptor as * something that needs to be handled for hangups. */ - file_kill(filp); + spin_lock(&tty_files_lock); + BUG_ON(list_empty(&filp->f_u.fu_list)); + list_del_init(&filp->f_u.fu_list); + spin_unlock(&tty_files_lock); filp->private_data = NULL; /* @@ -1840,7 +1846,11 @@ got_driver: } filp->private_data = tty; - file_move(filp, &tty->tty_files); + BUG_ON(list_empty(&filp->f_u.fu_list)); + file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ + spin_lock(&tty_files_lock); + list_add(&filp->f_u.fu_list, &tty->tty_files); + spin_unlock(&tty_files_lock); check_tty_count(tty, "tty_open"); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) diff --git a/fs/file_table.c b/fs/file_table.c index edecd36fed9b..6f0e62ecfddd 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -32,8 +32,7 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -/* public. Not pretty! */ -__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -249,7 +248,7 @@ static void __fput(struct file *file) cdev_put(inode->i_cdev); fops_put(file->f_op); put_pid(file->f_owner.pid); - file_kill(file); + file_sb_list_del(file); if (file->f_mode & FMODE_WRITE) drop_file_write_access(file); file->f_path.dentry = NULL; @@ -328,31 +327,29 @@ struct file *fget_light(unsigned int fd, int *fput_needed) return file; } - void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); - file_kill(file); + file_sb_list_del(file); file_free(file); } } -void file_move(struct file *file, struct list_head *list) +void file_sb_list_add(struct file *file, struct super_block *sb) { - if (!list) - return; - file_list_lock(); - list_move(&file->f_u.fu_list, list); - file_list_unlock(); + spin_lock(&files_lock); + BUG_ON(!list_empty(&file->f_u.fu_list)); + list_add(&file->f_u.fu_list, &sb->s_files); + spin_unlock(&files_lock); } -void file_kill(struct file *file) +void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - file_list_lock(); + spin_lock(&files_lock); list_del_init(&file->f_u.fu_list); - file_list_unlock(); + spin_unlock(&files_lock); } } @@ -361,7 +358,7 @@ int fs_may_remount_ro(struct super_block *sb) struct file *file; /* Check that no files are currently opened for writing. */ - file_list_lock(); + spin_lock(&files_lock); list_for_each_entry(file, &sb->s_files, f_u.fu_list) { struct inode *inode = file->f_path.dentry->d_inode; @@ -373,10 +370,10 @@ int fs_may_remount_ro(struct super_block *sb) if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; } - file_list_unlock(); + spin_unlock(&files_lock); return 1; /* Tis' cool bro. */ too_bad: - file_list_unlock(); + spin_unlock(&files_lock); return 0; } @@ -392,7 +389,7 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - file_list_lock(); + spin_lock(&files_lock); list_for_each_entry(f, &sb->s_files, f_u.fu_list) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) @@ -408,16 +405,13 @@ retry: continue; file_release_write(f); mnt = mntget(f->f_path.mnt); - file_list_unlock(); - /* - * This can sleep, so we can't hold - * the file_list_lock() spinlock. - */ + /* This can sleep, so we can't hold the spinlock. */ + spin_unlock(&files_lock); mnt_drop_write(mnt); mntput(mnt); goto retry; } - file_list_unlock(); + spin_unlock(&files_lock); } void __init files_init(unsigned long mempages) diff --git a/fs/open.c b/fs/open.c index 630715f9f73d..d74e1983e8dc 100644 --- a/fs/open.c +++ b/fs/open.c @@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_path.mnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); - file_move(f, &inode->i_sb->s_files); + file_sb_list_add(f, inode->i_sb); error = security_dentry_open(f, cred); if (error) @@ -721,7 +721,7 @@ cleanup_all: mnt_drop_write(mnt); } } - file_kill(f); + file_sb_list_del(f); f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: diff --git a/include/linux/fs.h b/include/linux/fs.h index 29f7c975304c..5a9a9e5a3705 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -944,9 +944,6 @@ struct file { unsigned long f_mnt_write_state; #endif }; -extern spinlock_t files_lock; -#define file_list_lock() spin_lock(&files_lock); -#define file_list_unlock() spin_unlock(&files_lock); #define get_file(x) atomic_long_inc(&(x)->f_count) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) @@ -2188,8 +2185,8 @@ static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } -extern void file_move(struct file *f, struct list_head *list); -extern void file_kill(struct file *f); +extern void file_sb_list_add(struct file *f, struct super_block *sb); +extern void file_sb_list_del(struct file *f); #ifdef CONFIG_BLOCK extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); diff --git a/include/linux/tty.h b/include/linux/tty.h index 1437da3ddc62..f6b371a2514e 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -470,6 +470,7 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); extern struct mutex tty_mutex; +extern spinlock_t tty_files_lock; extern void tty_write_unlock(struct tty_struct *tty); extern int tty_write_lock(struct tty_struct *tty, int ndelay); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 42043f96e54f..bd7da0f0ccf3 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2170,7 +2170,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, tty = get_current_tty(); if (tty) { - file_list_lock(); + spin_lock(&tty_files_lock); if (!list_empty(&tty->tty_files)) { struct inode *inode; @@ -2186,7 +2186,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, drop_tty = 1; } } - file_list_unlock(); + spin_unlock(&tty_files_lock); tty_kref_put(tty); } /* Reset controlling tty. */ -- cgit v1.2.3 From d996b62a8df1d935b01319bf8defb95b5709f7b8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:36 +1000 Subject: tty: fix fu_list abuse tty: fix fu_list abuse tty code abuses fu_list, which causes a bug in remount,ro handling. If a tty device node is opened on a filesystem, then the last link to the inode removed, the filesystem will be allowed to be remounted readonly. This is because fs_may_remount_ro does not find the 0 link tty inode on the file sb list (because the tty code incorrectly removed it to use for its own purpose). This can result in a filesystem with errors after it is marked "clean". Taking idea from Christoph's initial patch, allocate a tty private struct at file->private_data and put our required list fields in there, linking file and tty. This makes tty nodes behave the same way as other device nodes and avoid meddling with the vfs, and avoids this bug. The error handling is not trivial in the tty code, so for this bugfix, I take the simple approach of using __GFP_NOFAIL and don't worry about memory errors. This is not a problem because our allocator doesn't fail small allocs as a rule anyway. So proper error handling is left as an exercise for tty hackers. [ Arguably filesystem's device inode would ideally be divorced from the driver's pseudo inode when it is opened, but in practice it's not clear whether that will ever be worth implementing. ] Cc: linux-kernel@vger.kernel.org Cc: Christoph Hellwig Cc: Alan Cox Cc: Greg Kroah-Hartman Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- drivers/char/pty.c | 6 +--- drivers/char/tty_io.c | 84 +++++++++++++++++++++++++++++++----------------- fs/internal.h | 2 ++ include/linux/fs.h | 2 -- include/linux/tty.h | 8 +++++ security/selinux/hooks.c | 5 ++- 6 files changed, 69 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 2c64faa8efa4..c350d01716bd 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -675,12 +675,8 @@ static int ptmx_open(struct inode *inode, struct file *filp) } set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ - filp->private_data = tty; - file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ - spin_lock(&tty_files_lock); - list_add(&filp->f_u.fu_list, &tty->tty_files); - spin_unlock(&tty_files_lock); + tty_add_file(tty, filp); retval = devpts_pty_new(inode, tty->link); if (retval) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index cd5b829634ea..949067a0bd47 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -188,6 +188,41 @@ void free_tty_struct(struct tty_struct *tty) kfree(tty); } +static inline struct tty_struct *file_tty(struct file *file) +{ + return ((struct tty_file_private *)file->private_data)->tty; +} + +/* Associate a new file with the tty structure */ +void tty_add_file(struct tty_struct *tty, struct file *file) +{ + struct tty_file_private *priv; + + /* XXX: must implement proper error handling in callers */ + priv = kmalloc(sizeof(*priv), GFP_KERNEL|__GFP_NOFAIL); + + priv->tty = tty; + priv->file = file; + file->private_data = priv; + + spin_lock(&tty_files_lock); + list_add(&priv->list, &tty->tty_files); + spin_unlock(&tty_files_lock); +} + +/* Delete file from its tty */ +void tty_del_file(struct file *file) +{ + struct tty_file_private *priv = file->private_data; + + spin_lock(&tty_files_lock); + list_del(&priv->list); + spin_unlock(&tty_files_lock); + file->private_data = NULL; + kfree(priv); +} + + #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base) /** @@ -500,6 +535,7 @@ void __tty_hangup(struct tty_struct *tty) struct file *cons_filp = NULL; struct file *filp, *f = NULL; struct task_struct *p; + struct tty_file_private *priv; int closecount = 0, n; unsigned long flags; int refs = 0; @@ -509,7 +545,7 @@ void __tty_hangup(struct tty_struct *tty) spin_lock(&redirect_lock); - if (redirect && redirect->private_data == tty) { + if (redirect && file_tty(redirect) == tty) { f = redirect; redirect = NULL; } @@ -524,7 +560,8 @@ void __tty_hangup(struct tty_struct *tty) spin_lock(&tty_files_lock); /* This breaks for file handles being sent over AF_UNIX sockets ? */ - list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { + list_for_each_entry(priv, &tty->tty_files, list) { + filp = priv->file; if (filp->f_op->write == redirected_tty_write) cons_filp = filp; if (filp->f_op->write != tty_write) @@ -892,12 +929,10 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { int i; - struct tty_struct *tty; - struct inode *inode; + struct inode *inode = file->f_path.dentry->d_inode; + struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; - tty = file->private_data; - inode = file->f_path.dentry->d_inode; if (tty_paranoia_check(tty, inode, "tty_read")) return -EIO; if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags))) @@ -1068,12 +1103,11 @@ void tty_write_message(struct tty_struct *tty, char *msg) static ssize_t tty_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct tty_struct *tty; struct inode *inode = file->f_path.dentry->d_inode; + struct tty_struct *tty = file_tty(file); + struct tty_ldisc *ld; ssize_t ret; - struct tty_ldisc *ld; - tty = file->private_data; if (tty_paranoia_check(tty, inode, "tty_write")) return -EIO; if (!tty || !tty->ops->write || @@ -1510,13 +1544,13 @@ static void release_tty(struct tty_struct *tty, int idx) int tty_release(struct inode *inode, struct file *filp) { - struct tty_struct *tty, *o_tty; + struct tty_struct *tty = file_tty(filp); + struct tty_struct *o_tty; int pty_master, tty_closing, o_tty_closing, do_sleep; int devpts; int idx; char buf[64]; - tty = filp->private_data; if (tty_paranoia_check(tty, inode, "tty_release_dev")) return 0; @@ -1674,11 +1708,7 @@ int tty_release(struct inode *inode, struct file *filp) * - do_tty_hangup no longer sees this file descriptor as * something that needs to be handled for hangups. */ - spin_lock(&tty_files_lock); - BUG_ON(list_empty(&filp->f_u.fu_list)); - list_del_init(&filp->f_u.fu_list); - spin_unlock(&tty_files_lock); - filp->private_data = NULL; + tty_del_file(filp); /* * Perform some housekeeping before deciding whether to return. @@ -1845,12 +1875,8 @@ got_driver: return PTR_ERR(tty); } - filp->private_data = tty; - BUG_ON(list_empty(&filp->f_u.fu_list)); - file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ - spin_lock(&tty_files_lock); - list_add(&filp->f_u.fu_list, &tty->tty_files); - spin_unlock(&tty_files_lock); + tty_add_file(tty, filp); + check_tty_count(tty, "tty_open"); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) @@ -1926,11 +1952,10 @@ got_driver: static unsigned int tty_poll(struct file *filp, poll_table *wait) { - struct tty_struct *tty; + struct tty_struct *tty = file_tty(filp); struct tty_ldisc *ld; int ret = 0; - tty = filp->private_data; if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll")) return 0; @@ -1943,11 +1968,10 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait) static int __tty_fasync(int fd, struct file *filp, int on) { - struct tty_struct *tty; + struct tty_struct *tty = file_tty(filp); unsigned long flags; int retval = 0; - tty = filp->private_data; if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync")) goto out; @@ -2501,13 +2525,13 @@ EXPORT_SYMBOL(tty_pair_get_pty); */ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct tty_struct *tty, *real_tty; + struct tty_struct *tty = file_tty(file); + struct tty_struct *real_tty; void __user *p = (void __user *)arg; int retval; struct tty_ldisc *ld; struct inode *inode = file->f_dentry->d_inode; - tty = file->private_data; if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; @@ -2629,7 +2653,7 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct inode *inode = file->f_dentry->d_inode; - struct tty_struct *tty = file->private_data; + struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; int retval = -ENOIOCTLCMD; @@ -2721,7 +2745,7 @@ void __do_SAK(struct tty_struct *tty) if (!filp) continue; if (filp->f_op->read == tty_read && - filp->private_data == tty) { + file_tty(filp) == tty) { printk(KERN_NOTICE "SAK: killed process %d" " (%s): fd#%d opened to the tty\n", task_pid_nr(p), p->comm, i); diff --git a/fs/internal.h b/fs/internal.h index 6b706bc60a66..6a5c13a80660 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -80,6 +80,8 @@ extern void chroot_fs_refs(struct path *, struct path *); /* * file_table.c */ +extern void file_sb_list_add(struct file *f, struct super_block *sb); +extern void file_sb_list_del(struct file *f); extern void mark_files_ro(struct super_block *); extern struct file *get_empty_filp(void); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5a9a9e5a3705..5e65add0f163 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2185,8 +2185,6 @@ static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } -extern void file_sb_list_add(struct file *f, struct super_block *sb); -extern void file_sb_list_del(struct file *f); #ifdef CONFIG_BLOCK extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); diff --git a/include/linux/tty.h b/include/linux/tty.h index f6b371a2514e..67d64e6efe7a 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -329,6 +329,13 @@ struct tty_struct { struct tty_port *port; }; +/* Each of a tty's open files has private_data pointing to tty_file_private */ +struct tty_file_private { + struct tty_struct *tty; + struct file *file; + struct list_head list; +}; + /* tty magic number */ #define TTY_MAGIC 0x5401 @@ -458,6 +465,7 @@ extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); extern void tty_default_fops(struct file_operations *fops); extern struct tty_struct *alloc_tty_struct(void); +extern void tty_add_file(struct tty_struct *tty, struct file *file); extern void free_tty_struct(struct tty_struct *tty); extern void initialize_tty_struct(struct tty_struct *tty, struct tty_driver *driver, int idx); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index bd7da0f0ccf3..4796ddd4e721 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2172,6 +2172,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, if (tty) { spin_lock(&tty_files_lock); if (!list_empty(&tty->tty_files)) { + struct tty_file_private *file_priv; struct inode *inode; /* Revalidate access to controlling tty. @@ -2179,7 +2180,9 @@ static inline void flush_unauthorized_files(const struct cred *cred, than using file_has_perm, as this particular open file may belong to another process and we are only interested in the inode-based check here. */ - file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list); + file_priv = list_first_entry(&tty->tty_files, + struct tty_file_private, list); + file = file_priv->file; inode = file->f_path.dentry->d_inode; if (inode_has_perm(cred, inode, FILE__READ | FILE__WRITE, NULL)) { -- cgit v1.2.3 From 2dc91abe03d8ce6dd7f9251faffafca5f6b9e85d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:37 +1000 Subject: lglock: introduce special lglock and brlock spin locks lglock: introduce special lglock and brlock spin locks This patch introduces "local-global" locks (lglocks). These can be used to: - Provide fast exclusive access to per-CPU data, with exclusive access to another CPU's data allowed but possibly subject to contention, and to provide very slow exclusive access to all per-CPU data. - Or to provide very fast and scalable read serialisation, and to provide very slow exclusive serialisation of data (not necessarily per-CPU data). Brlocks are also implemented as a short-hand notation for the latter use case. Thanks to Paul for local/global naming convention. Cc: linux-kernel@vger.kernel.org Cc: Al Viro Cc: "Paul E. McKenney" Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/lglock.h | 172 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 include/linux/lglock.h (limited to 'include') diff --git a/include/linux/lglock.h b/include/linux/lglock.h new file mode 100644 index 000000000000..b288cb713b90 --- /dev/null +++ b/include/linux/lglock.h @@ -0,0 +1,172 @@ +/* + * Specialised local-global spinlock. Can only be declared as global variables + * to avoid overhead and keep things simple (and we don't want to start using + * these inside dynamically allocated structures). + * + * "local/global locks" (lglocks) can be used to: + * + * - Provide fast exclusive access to per-CPU data, with exclusive access to + * another CPU's data allowed but possibly subject to contention, and to + * provide very slow exclusive access to all per-CPU data. + * - Or to provide very fast and scalable read serialisation, and to provide + * very slow exclusive serialisation of data (not necessarily per-CPU data). + * + * Brlocks are also implemented as a short-hand notation for the latter use + * case. + * + * Copyright 2009, 2010, Nick Piggin, Novell Inc. + */ +#ifndef __LINUX_LGLOCK_H +#define __LINUX_LGLOCK_H + +#include +#include +#include + +/* can make br locks by using local lock for read side, global lock for write */ +#define br_lock_init(name) name##_lock_init() +#define br_read_lock(name) name##_local_lock() +#define br_read_unlock(name) name##_local_unlock() +#define br_write_lock(name) name##_global_lock_online() +#define br_write_unlock(name) name##_global_unlock_online() + +#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) +#define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) + + +#define lg_lock_init(name) name##_lock_init() +#define lg_local_lock(name) name##_local_lock() +#define lg_local_unlock(name) name##_local_unlock() +#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) +#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) +#define lg_global_lock(name) name##_global_lock() +#define lg_global_unlock(name) name##_global_unlock() +#define lg_global_lock_online(name) name##_global_lock_online() +#define lg_global_unlock_online(name) name##_global_unlock_online() + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define LOCKDEP_INIT_MAP lockdep_init_map + +#define DEFINE_LGLOCK_LOCKDEP(name) \ + struct lock_class_key name##_lock_key; \ + struct lockdep_map name##_lock_dep_map; \ + EXPORT_SYMBOL(name##_lock_dep_map) + +#else +#define LOCKDEP_INIT_MAP(a, b, c, d) + +#define DEFINE_LGLOCK_LOCKDEP(name) +#endif + + +#define DECLARE_LGLOCK(name) \ + extern void name##_lock_init(void); \ + extern void name##_local_lock(void); \ + extern void name##_local_unlock(void); \ + extern void name##_local_lock_cpu(int cpu); \ + extern void name##_local_unlock_cpu(int cpu); \ + extern void name##_global_lock(void); \ + extern void name##_global_unlock(void); \ + extern void name##_global_lock_online(void); \ + extern void name##_global_unlock_online(void); \ + +#define DEFINE_LGLOCK(name) \ + \ + DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ + DEFINE_LGLOCK_LOCKDEP(name); \ + \ + void name##_lock_init(void) { \ + int i; \ + LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ + for_each_possible_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ + } \ + } \ + EXPORT_SYMBOL(name##_lock_init); \ + \ + void name##_local_lock(void) { \ + arch_spinlock_t *lock; \ + preempt_disable(); \ + rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ + lock = &__get_cpu_var(name##_lock); \ + arch_spin_lock(lock); \ + } \ + EXPORT_SYMBOL(name##_local_lock); \ + \ + void name##_local_unlock(void) { \ + arch_spinlock_t *lock; \ + rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ + lock = &__get_cpu_var(name##_lock); \ + arch_spin_unlock(lock); \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_local_unlock); \ + \ + void name##_local_lock_cpu(int cpu) { \ + arch_spinlock_t *lock; \ + preempt_disable(); \ + rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ + lock = &per_cpu(name##_lock, cpu); \ + arch_spin_lock(lock); \ + } \ + EXPORT_SYMBOL(name##_local_lock_cpu); \ + \ + void name##_local_unlock_cpu(int cpu) { \ + arch_spinlock_t *lock; \ + rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ + lock = &per_cpu(name##_lock, cpu); \ + arch_spin_unlock(lock); \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_local_unlock_cpu); \ + \ + void name##_global_lock_online(void) { \ + int i; \ + preempt_disable(); \ + rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_lock(lock); \ + } \ + } \ + EXPORT_SYMBOL(name##_global_lock_online); \ + \ + void name##_global_unlock_online(void) { \ + int i; \ + rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_unlock(lock); \ + } \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_global_unlock_online); \ + \ + void name##_global_lock(void) { \ + int i; \ + preempt_disable(); \ + rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_lock(lock); \ + } \ + } \ + EXPORT_SYMBOL(name##_global_lock); \ + \ + void name##_global_unlock(void) { \ + int i; \ + rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_unlock(lock); \ + } \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_global_unlock); +#endif -- cgit v1.2.3 From 6416ccb7899960868f5016751fb81bf25213d24f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Aug 2010 04:37:38 +1000 Subject: fs: scale files_lock fs: scale files_lock Improve scalability of files_lock by adding per-cpu, per-sb files lists, protected with an lglock. The lglock provides fast access to the per-cpu lists to add and remove files. It also provides a snapshot of all the per-cpu lists (although this is very slow). One difficulty with this approach is that a file can be removed from the list by another CPU. We must track which per-cpu list the file is on with a new variale in the file struct (packed into a hole on 64-bit archs). Scalability could suffer if files are frequently removed from different cpu's list. However loads with frequent removal of files imply short interval between adding and removing the files, and the scheduler attempts to avoid moving processes too far away. Also, even in the case of cross-CPU removal, the hardware has much more opportunity to parallelise cacheline transfers with N cachelines than with 1. A worst-case test of 1 CPU allocating files subsequently being freed by N CPUs degenerates to contending on a single lock, which is no worse than before. When more than one CPU are allocating files, even if they are always freed by different CPUs, there will be more parallelism than the single-lock case. Testing results: On a 2 socket, 8 core opteron, I measure the number of times the lock is taken to remove the file, the number of times it is removed by the same CPU that added it, and the number of times it is removed by the same node that added it. Booting: locks= 25049 cpu-hits= 23174 (92.5%) node-hits= 23945 (95.6%) kbuild -j16 locks=2281913 cpu-hits=2208126 (96.8%) node-hits=2252674 (98.7%) dbench 64 locks=4306582 cpu-hits=4287247 (99.6%) node-hits=4299527 (99.8%) So a file is removed from the same CPU it was added by over 90% of the time. It remains within the same node 95% of the time. Tim Chen ran some numbers for a 64 thread Nehalem system performing a compile. throughput 2.6.34-rc2 24.5 +patch 24.9 us sys idle IO wait (in %) 2.6.34-rc2 51.25 28.25 17.25 3.25 +patch 53.75 18.5 19 8.75 So significantly less CPU time spent in kernel code, higher idle time and slightly higher throughput. Single threaded performance difference was within the noise of microbenchmarks. That is not to say penalty does not exist, the code is larger and more memory accesses required so it will be slightly slower. Cc: linux-kernel@vger.kernel.org Cc: Tim Chen Cc: Andi Kleen Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/file_table.c | 108 ++++++++++++++++++++++++++++++++++++++++++++--------- fs/super.c | 18 +++++++++ include/linux/fs.h | 7 ++++ 3 files changed, 115 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/file_table.c b/fs/file_table.c index 6f0e62ecfddd..a04bdd81c11c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include @@ -32,7 +34,8 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +DECLARE_LGLOCK(files_lglock); +DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -336,30 +339,98 @@ void put_filp(struct file *file) } } +static inline int file_list_cpu(struct file *file) +{ +#ifdef CONFIG_SMP + return file->f_sb_list_cpu; +#else + return smp_processor_id(); +#endif +} + +/* helper for file_sb_list_add to reduce ifdefs */ +static inline void __file_sb_list_add(struct file *file, struct super_block *sb) +{ + struct list_head *list; +#ifdef CONFIG_SMP + int cpu; + cpu = smp_processor_id(); + file->f_sb_list_cpu = cpu; + list = per_cpu_ptr(sb->s_files, cpu); +#else + list = &sb->s_files; +#endif + list_add(&file->f_u.fu_list, list); +} + +/** + * file_sb_list_add - add a file to the sb's file list + * @file: file to add + * @sb: sb to add it to + * + * Use this function to associate a file with the superblock of the inode it + * refers to. + */ void file_sb_list_add(struct file *file, struct super_block *sb) { - spin_lock(&files_lock); - BUG_ON(!list_empty(&file->f_u.fu_list)); - list_add(&file->f_u.fu_list, &sb->s_files); - spin_unlock(&files_lock); + lg_local_lock(files_lglock); + __file_sb_list_add(file, sb); + lg_local_unlock(files_lglock); } +/** + * file_sb_list_del - remove a file from the sb's file list + * @file: file to remove + * @sb: sb to remove it from + * + * Use this function to remove a file from its superblock. + */ void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - spin_lock(&files_lock); + lg_local_lock_cpu(files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - spin_unlock(&files_lock); + lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); } } +#ifdef CONFIG_SMP + +/* + * These macros iterate all files on all CPUs for a given superblock. + * files_lglock must be held globally. + */ +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + int i; \ + for_each_possible_cpu(i) { \ + struct list_head *list; \ + list = per_cpu_ptr((__sb)->s_files, i); \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ + } \ +} + +#else + +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + struct list_head *list; \ + list = &(sb)->s_files; \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ +} + +#endif + int fs_may_remount_ro(struct super_block *sb) { struct file *file; - /* Check that no files are currently opened for writing. */ - spin_lock(&files_lock); - list_for_each_entry(file, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, file) { struct inode *inode = file->f_path.dentry->d_inode; /* File with pending delete? */ @@ -369,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb) /* Writeable file? */ if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; - } - spin_unlock(&files_lock); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); return 1; /* Tis' cool bro. */ too_bad: - spin_unlock(&files_lock); + lg_global_unlock(files_lglock); return 0; } @@ -389,8 +460,8 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - spin_lock(&files_lock); - list_for_each_entry(f, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) continue; @@ -406,12 +477,12 @@ retry: file_release_write(f); mnt = mntget(f->f_path.mnt); /* This can sleep, so we can't hold the spinlock. */ - spin_unlock(&files_lock); + lg_global_unlock(files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; - } - spin_unlock(&files_lock); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); } void __init files_init(unsigned long mempages) @@ -431,5 +502,6 @@ void __init files_init(unsigned long mempages) if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init(); + lg_lock_init(files_lglock); percpu_counter_init(&nr_files, 0); } diff --git a/fs/super.c b/fs/super.c index 9674ab2c8718..8819e3a7ff20 100644 --- a/fs/super.c +++ b/fs/super.c @@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type) s = NULL; goto out; } +#ifdef CONFIG_SMP + s->s_files = alloc_percpu(struct list_head); + if (!s->s_files) { + security_sb_free(s); + kfree(s); + s = NULL; + goto out; + } else { + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); + } +#else INIT_LIST_HEAD(&s->s_files); +#endif INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); @@ -108,6 +123,9 @@ out: */ static inline void destroy_super(struct super_block *s) { +#ifdef CONFIG_SMP + free_percpu(s->s_files); +#endif security_sb_free(s); kfree(s->s_subtype); kfree(s->s_options); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5e65add0f163..76041b614758 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -920,6 +920,9 @@ struct file { #define f_vfsmnt f_path.mnt const struct file_operations *f_op; spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ +#ifdef CONFIG_SMP + int f_sb_list_cpu; +#endif atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -1334,7 +1337,11 @@ struct super_block { struct list_head s_inodes; /* all inodes */ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ +#ifdef CONFIG_SMP + struct list_head __percpu *s_files; +#else struct list_head s_files; +#endif /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ -- cgit v1.2.3 From 56385a12d9bb9e173751f74b6c430742018cafc0 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 18 Aug 2010 14:08:17 +0200 Subject: ALSA: emu10k1 - delay the PCM interrupts (add pcm_irq_delay parameter) With some hardware combinations, the PCM interrupts are acknowledged before the period boundary from the emu10k1 chip. The midlevel PCM code gets confused and the playback stream is interrupted. It seems that the interrupt processing shift by 2 samples is enough to fix this issue. This default value does not harm other, non-affected hardware. More information: Kernel bugzilla bug#16300 [A copmile warning fixed by tiwai] Signed-off-by: Jaroslav Kysela Cc: Signed-off-by: Takashi Iwai --- include/sound/emu10k1.h | 1 + sound/core/pcm_native.c | 4 ++++ sound/pci/emu10k1/emu10k1.c | 4 ++++ sound/pci/emu10k1/emupcm.c | 30 ++++++++++++++++++++++++++---- sound/pci/emu10k1/memory.c | 4 +++- 5 files changed, 38 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 6a664c3f7c1e..7dc97d12253c 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1707,6 +1707,7 @@ struct snd_emu10k1 { unsigned int card_type; /* EMU10K1_CARD_* */ unsigned int ecard_ctrl; /* ecard control bits */ unsigned long dma_mask; /* PCI DMA mask */ + unsigned int delay_pcm_irq; /* in samples */ int max_cache_pages; /* max memory size / PAGE_SIZE */ struct snd_dma_buffer silent_page; /* silent page */ struct snd_dma_buffer ptb_pages; /* page table pages */ diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index a3b2a6479246..134fc6c2e08d 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -978,6 +978,10 @@ static int snd_pcm_do_pause(struct snd_pcm_substream *substream, int push) { if (substream->runtime->trigger_master != substream) return 0; + /* some drivers might use hw_ptr to recover from the pause - + update the hw_ptr now */ + if (push) + snd_pcm_update_hw_ptr(substream); /* The jiffies check in snd_pcm_update_hw_ptr*() is done by * a delta betwen the current jiffies, this gives a large enough * delta, effectively to skip the check once. diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c index 4203782d7cb7..aff8387c45cf 100644 --- a/sound/pci/emu10k1/emu10k1.c +++ b/sound/pci/emu10k1/emu10k1.c @@ -52,6 +52,7 @@ static int max_synth_voices[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 64}; static int max_buffer_size[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 128}; static int enable_ir[SNDRV_CARDS]; static uint subsystem[SNDRV_CARDS]; /* Force card subsystem model */ +static uint delay_pcm_irq[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 2}; module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for the EMU10K1 soundcard."); @@ -73,6 +74,8 @@ module_param_array(enable_ir, bool, NULL, 0444); MODULE_PARM_DESC(enable_ir, "Enable IR."); module_param_array(subsystem, uint, NULL, 0444); MODULE_PARM_DESC(subsystem, "Force card subsystem model."); +module_param_array(delay_pcm_irq, uint, NULL, 0444); +MODULE_PARM_DESC(delay_pcm_irq, "Delay PCM interrupt by specified number of samples (default 0)."); /* * Class 0401: 1102:0008 (rev 00) Subsystem: 1102:1001 -> Audigy2 Value Model:SB0400 */ @@ -127,6 +130,7 @@ static int __devinit snd_card_emu10k1_probe(struct pci_dev *pci, &emu)) < 0) goto error; card->private_data = emu; + emu->delay_pcm_irq = delay_pcm_irq[dev] & 0x1f; if ((err = snd_emu10k1_pcm(emu, 0, NULL)) < 0) goto error; if ((err = snd_emu10k1_pcm_mic(emu, 1, NULL)) < 0) diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 55b83ef73c63..622bace148e3 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -332,7 +332,7 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, evoice->epcm->ccca_start_addr = start_addr + ccis; if (extra) { start_addr += ccis; - end_addr += ccis; + end_addr += ccis + emu->delay_pcm_irq; } if (stereo && !extra) { snd_emu10k1_ptr_write(emu, CPF, voice, CPF_STEREO_MASK); @@ -360,7 +360,9 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, /* Assumption that PT is already 0 so no harm overwriting */ snd_emu10k1_ptr_write(emu, PTRX, voice, (send_amount[0] << 8) | send_amount[1]); snd_emu10k1_ptr_write(emu, DSL, voice, end_addr | (send_amount[3] << 24)); - snd_emu10k1_ptr_write(emu, PSST, voice, start_addr | (send_amount[2] << 24)); + snd_emu10k1_ptr_write(emu, PSST, voice, + (start_addr + (extra ? emu->delay_pcm_irq : 0)) | + (send_amount[2] << 24)); if (emu->card_capabilities->emu_model) pitch_target = PITCH_48000; /* Disable interpolators on emu1010 card */ else @@ -732,6 +734,23 @@ static void snd_emu10k1_playback_stop_voice(struct snd_emu10k1 *emu, struct snd_ snd_emu10k1_ptr_write(emu, IP, voice, 0); } +static inline void snd_emu10k1_playback_mangle_extra(struct snd_emu10k1 *emu, + struct snd_emu10k1_pcm *epcm, + struct snd_pcm_substream *substream, + struct snd_pcm_runtime *runtime) +{ + unsigned int ptr, period_pos; + + /* try to sychronize the current position for the interrupt + source voice */ + period_pos = runtime->status->hw_ptr - runtime->hw_ptr_interrupt; + period_pos %= runtime->period_size; + ptr = snd_emu10k1_ptr_read(emu, CCCA, epcm->extra->number); + ptr &= ~0x00ffffff; + ptr |= epcm->ccca_start_addr + period_pos; + snd_emu10k1_ptr_write(emu, CCCA, epcm->extra->number, ptr); +} + static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream, int cmd) { @@ -753,6 +772,8 @@ static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream, /* follow thru */ case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: case SNDRV_PCM_TRIGGER_RESUME: + if (cmd == SNDRV_PCM_TRIGGER_PAUSE_RELEASE) + snd_emu10k1_playback_mangle_extra(emu, epcm, substream, runtime); mix = &emu->pcm_mixer[substream->number]; snd_emu10k1_playback_prepare_voice(emu, epcm->voices[0], 1, 0, mix); snd_emu10k1_playback_prepare_voice(emu, epcm->voices[1], 0, 0, mix); @@ -869,8 +890,9 @@ static snd_pcm_uframes_t snd_emu10k1_playback_pointer(struct snd_pcm_substream * #endif /* printk(KERN_DEBUG - "ptr = 0x%x, buffer_size = 0x%x, period_size = 0x%x\n", - ptr, runtime->buffer_size, runtime->period_size); + "ptr = 0x%lx, buffer_size = 0x%lx, period_size = 0x%lx\n", + (long)ptr, (long)runtime->buffer_size, + (long)runtime->period_size); */ return ptr; } diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c index ffb1ddb8dc28..957a311514c8 100644 --- a/sound/pci/emu10k1/memory.c +++ b/sound/pci/emu10k1/memory.c @@ -310,8 +310,10 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst if (snd_BUG_ON(!hdr)) return NULL; + idx = runtime->period_size >= runtime->buffer_size ? + (emu->delay_pcm_irq * 2) : 0; mutex_lock(&hdr->block_mutex); - blk = search_empty(emu, runtime->dma_bytes); + blk = search_empty(emu, runtime->dma_bytes + idx); if (blk == NULL) { mutex_unlock(&hdr->block_mutex); return NULL; -- cgit v1.2.3 From bd76af0f87f7a1815b311bde269a3f18305b3169 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 18 Aug 2010 14:16:54 +0200 Subject: ALSA: pcm midlevel code - add time check for double interrupt acknowledge The current code in pcm_lib.c do all checks using only the position in the ring buffer. Unfortunately, where the interrupts gets delayed or merged into one, we need another timing source to check when the buffer size boundary overlaps to avoid the wrong updating of the ring buffer pointers. This code uses jiffies to check the right time window without any performance impact. Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 1 + sound/core/pcm_lib.c | 14 +++++++++----- sound/core/pcm_native.c | 2 ++ 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 85f1c6bf8566..dfd9b76b1853 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -278,6 +278,7 @@ struct snd_pcm_runtime { snd_pcm_uframes_t hw_ptr_base; /* Position at buffer restart */ snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time */ unsigned long hw_ptr_jiffies; /* Time when hw_ptr is updated */ + unsigned long hw_ptr_buffer_jiffies; /* buffer time in jiffies */ snd_pcm_sframes_t delay; /* extra delay; typically FIFO size */ /* -- HW params -- */ diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index e23e0e7ab26f..a1707cca9c66 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -334,11 +334,15 @@ static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream, /* delta = "expected next hw_ptr" for in_interrupt != 0 */ delta = runtime->hw_ptr_interrupt + runtime->period_size; if (delta > new_hw_ptr) { - hw_base += runtime->buffer_size; - if (hw_base >= runtime->boundary) - hw_base = 0; - new_hw_ptr = hw_base + pos; - goto __delta; + /* check for double acknowledged interrupts */ + hdelta = jiffies - runtime->hw_ptr_jiffies; + if (hdelta > runtime->hw_ptr_buffer_jiffies/2) { + hw_base += runtime->buffer_size; + if (hw_base >= runtime->boundary) + hw_base = 0; + new_hw_ptr = hw_base + pos; + goto __delta; + } } } /* new_hw_ptr might be lower than old_hw_ptr in case when */ diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 134fc6c2e08d..e2e73895db12 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -864,6 +864,8 @@ static void snd_pcm_post_start(struct snd_pcm_substream *substream, int state) struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); runtime->hw_ptr_jiffies = jiffies; + runtime->hw_ptr_buffer_jiffies = (runtime->buffer_size * HZ) / + runtime->rate; runtime->status->state = state; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && runtime->silence_size > 0) -- cgit v1.2.3 From d15ca3203754359cfe5d18910722d3089b204cc4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Aug 2010 18:55:33 +0100 Subject: Fix the declaration of sys_execve() in asm-generic/syscalls.h Fix the declaration of sys_execve() in asm-generic/syscalls.h to have various consts applied to its pointers. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/asm-generic/syscalls.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h index df84e3b04555..d89dec864d42 100644 --- a/include/asm-generic/syscalls.h +++ b/include/asm-generic/syscalls.h @@ -23,8 +23,10 @@ asmlinkage long sys_vfork(struct pt_regs *regs); #endif #ifndef sys_execve -asmlinkage long sys_execve(char __user *filename, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs); +asmlinkage long sys_execve(const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp, + struct pt_regs *regs); #endif #ifndef sys_mmap2 -- cgit v1.2.3 From 70791ce9ba68a5921c9905ef05d23f62a90bc10c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 29 Jun 2010 19:34:05 +0200 Subject: perf: Generalize callchain_store() callchain_store() is the same on every archs, inline it in perf_event.h and rename it to perf_callchain_store() to avoid any collision. This removes repetitive code. Signed-off-by: Frederic Weisbecker Acked-by: Paul Mackerras Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: David Miller Cc: Paul Mundt Cc: Borislav Petkov --- arch/arm/kernel/perf_event.c | 15 ++++---------- arch/powerpc/kernel/perf_callchain.c | 40 +++++++++++++----------------------- arch/sh/kernel/perf_callchain.c | 11 +++------- arch/sparc/kernel/perf_event.c | 26 +++++++++-------------- arch/x86/kernel/cpu/perf_event.c | 20 +++++++----------- include/linux/perf_event.h | 7 +++++++ 6 files changed, 45 insertions(+), 74 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index fdcb0be47df1..a07c3b1955f0 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -3001,13 +3001,6 @@ arch_initcall(init_hw_perf_events); /* * Callchain handling code. */ -static inline void -callchain_store(struct perf_callchain_entry *entry, - u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} /* * The registers we're interested in are at the end of the variable @@ -3039,7 +3032,7 @@ user_backtrace(struct frame_tail *tail, if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) return NULL; - callchain_store(entry, buftail.lr); + perf_callchain_store(entry, buftail.lr); /* * Frame pointers should strictly progress back up the stack @@ -3057,7 +3050,7 @@ perf_callchain_user(struct pt_regs *regs, { struct frame_tail *tail; - callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, PERF_CONTEXT_USER); if (!user_mode(regs)) regs = task_pt_regs(current); @@ -3078,7 +3071,7 @@ callchain_trace(struct stackframe *fr, void *data) { struct perf_callchain_entry *entry = data; - callchain_store(entry, fr->pc); + perf_callchain_store(entry, fr->pc); return 0; } @@ -3088,7 +3081,7 @@ perf_callchain_kernel(struct pt_regs *regs, { struct stackframe fr; - callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); fr.fp = regs->ARM_fp; fr.sp = regs->ARM_sp; fr.lr = regs->ARM_lr; diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index 95ad9dad298e..a286c2e5a3ea 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -23,18 +23,6 @@ #include "ppc32.h" #endif -/* - * Store another value in a callchain_entry. - */ -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - unsigned int nr = entry->nr; - - if (nr < PERF_MAX_STACK_DEPTH) { - entry->ip[nr] = ip; - entry->nr = nr + 1; - } -} /* * Is sp valid as the address of the next kernel stack frame after prev_sp? @@ -69,8 +57,8 @@ static void perf_callchain_kernel(struct pt_regs *regs, lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->nip); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, regs->nip); if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) return; @@ -89,7 +77,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; level = 0; - callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); } else { if (level == 0) @@ -111,7 +99,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, ++level; } - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); if (!valid_next_sp(next_sp, sp)) return; sp = next_sp; @@ -246,8 +234,8 @@ static void perf_callchain_user_64(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); for (;;) { fp = (unsigned long __user *) sp; @@ -276,14 +264,14 @@ static void perf_callchain_user_64(struct pt_regs *regs, read_user_stack_64(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } @@ -447,8 +435,8 @@ static void perf_callchain_user_32(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); while (entry->nr < PERF_MAX_STACK_DEPTH) { fp = (unsigned int __user *) (unsigned long) sp; @@ -470,14 +458,14 @@ static void perf_callchain_user_32(struct pt_regs *regs, read_user_stack_32(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index 1d6dbce7a3bc..00143f3dd196 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c @@ -14,11 +14,6 @@ #include #include -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} static void callchain_warning(void *data, char *msg) { @@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable) struct perf_callchain_entry *entry = data; if (reliable) - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops callchain_ops = { @@ -52,8 +47,8 @@ static const struct stacktrace_ops callchain_ops = { static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->pc); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, regs->pc); unwind_stack(NULL, regs, NULL, &callchain_ops, entry); } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 357ced3c33ff..2a95a9079862 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1283,12 +1283,6 @@ void __init init_hw_perf_events(void) register_die_notifier(&perf_event_nmi_notifier); } -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} - static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { @@ -1297,8 +1291,8 @@ static void perf_callchain_kernel(struct pt_regs *regs, int graph = 0; #endif - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, regs->tpc); ksp = regs->u_regs[UREG_I6]; fp = ksp + STACK_BIAS; @@ -1322,13 +1316,13 @@ static void perf_callchain_kernel(struct pt_regs *regs, pc = sf->callers_pc; fp = (unsigned long)sf->fp + STACK_BIAS; } - callchain_store(entry, pc); + perf_callchain_store(entry, pc); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((pc + 8UL) == (unsigned long) &return_to_handler) { int index = current->curr_ret_stack; if (current->ret_stack && index >= graph) { pc = current->ret_stack[index - graph].ret; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); graph++; } } @@ -1341,8 +1335,8 @@ static void perf_callchain_user_64(struct pt_regs *regs, { unsigned long ufp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, regs->tpc); ufp = regs->u_regs[UREG_I6] + STACK_BIAS; do { @@ -1355,7 +1349,7 @@ static void perf_callchain_user_64(struct pt_regs *regs, pc = sf.callers_pc; ufp = (unsigned long)sf.fp + STACK_BIAS; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } @@ -1364,8 +1358,8 @@ static void perf_callchain_user_32(struct pt_regs *regs, { unsigned long ufp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, regs->tpc); ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; do { @@ -1378,7 +1372,7 @@ static void perf_callchain_user_32(struct pt_regs *regs, pc = sf.callers_pc; ufp = (unsigned long)sf.fp; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4a4d191f9492..8af28caeafc1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1571,12 +1571,6 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) * callchain support */ -static inline -void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); @@ -1602,7 +1596,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops backtrace_ops = { @@ -1616,8 +1610,8 @@ static const struct stacktrace_ops backtrace_ops = { static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->ip); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, regs->ip); dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } @@ -1646,7 +1640,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (fp < compat_ptr(regs->sp)) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = compat_ptr(frame.next_frame); } return 1; @@ -1670,8 +1664,8 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) fp = (void __user *)regs->bp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, regs->ip); if (perf_callchain_user32(regs, entry)) return; @@ -1688,7 +1682,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) if ((unsigned long)fp < regs->sp) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = frame.next_frame; } } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 937495c25073..358880404b42 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -978,6 +978,13 @@ extern void perf_event_fork(struct task_struct *tsk); extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +static inline void +perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} + extern int sysctl_perf_event_paranoid; extern int sysctl_perf_event_mlock; extern int sysctl_perf_event_sample_rate; -- cgit v1.2.3 From 56962b4449af34070bb1994621ef4f0265eed4d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 30 Jun 2010 23:03:51 +0200 Subject: perf: Generalize some arch callchain code - Most archs use one callchain buffer per cpu, except x86 that needs to deal with NMIs. Provide a default perf_callchain_buffer() implementation that x86 overrides. - Centralize all the kernel/user regs handling and invoke new arch handlers from there: perf_callchain_user() / perf_callchain_kernel() That avoid all the user_mode(), current->mm checks and so... - Invert some parameters in perf_callchain_*() helpers: entry to the left, regs to the right, following the traditional (dst, src). Signed-off-by: Frederic Weisbecker Acked-by: Paul Mackerras Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: David Miller Cc: Paul Mundt Cc: Borislav Petkov --- arch/arm/kernel/perf_event.c | 43 +++---------------------------- arch/powerpc/kernel/perf_callchain.c | 49 +++++++++++------------------------- arch/sh/kernel/perf_callchain.c | 37 ++------------------------- arch/sparc/kernel/perf_event.c | 46 +++++++++++---------------------- arch/x86/kernel/cpu/perf_event.c | 45 ++++++--------------------------- include/linux/perf_event.h | 10 +++++++- kernel/perf_event.c | 40 +++++++++++++++++++++++++++-- 7 files changed, 90 insertions(+), 180 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index a07c3b1955f0..0e3bbdb15927 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -3044,17 +3044,13 @@ user_backtrace(struct frame_tail *tail, return buftail.fp - 1; } -static void -perf_callchain_user(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct frame_tail *tail; perf_callchain_store(entry, PERF_CONTEXT_USER); - if (!user_mode(regs)) - regs = task_pt_regs(current); - tail = (struct frame_tail *)regs->ARM_fp - 1; while (tail && !((unsigned long)tail & 0x3)) @@ -3075,9 +3071,8 @@ callchain_trace(struct stackframe *fr, return 0; } -static void -perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stackframe fr; @@ -3088,33 +3083,3 @@ perf_callchain_kernel(struct pt_regs *regs, fr.pc = regs->ARM_pc; walk_stackframe(&fr, callchain_trace, entry); } - -static void -perf_do_callchain(struct pt_regs *regs, - struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); - -struct perf_callchain_entry * -perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - perf_do_callchain(regs, entry); - return entry; -} diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index a286c2e5a3ea..f7a85ede8407 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -46,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; } -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -221,8 +221,8 @@ static int sane_signal_64_frame(unsigned long sp) puc == (unsigned long) &sf->uc; } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -303,8 +303,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) return __get_user_inatomic(*ret, ptr); } -static inline void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { } @@ -423,8 +423,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp, return mctx->mc_gregs; } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned int sp, next_sp; unsigned int next_ip; @@ -471,32 +471,11 @@ static void perf_callchain_user_32(struct pt_regs *regs, } } -/* - * Since we can't get PMU interrupts inside a PMU interrupt handler, - * we don't need separate irq and nmi entries here. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); - - entry->nr = 0; - - if (!user_mode(regs)) { - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - if (current_is_64bit()) - perf_callchain_user_64(regs, entry); - else - perf_callchain_user_32(regs, entry); - } - - return entry; + if (current_is_64bit()) + perf_callchain_user_64(entry, regs); + else + perf_callchain_user_32(entry, regs); } diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index 00143f3dd196..ef076a91292a 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c @@ -44,44 +44,11 @@ static const struct stacktrace_ops callchain_ops = { .address = callchain_address, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { perf_callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_store(entry, regs->pc); unwind_stack(NULL, regs, NULL, &callchain_ops, entry); } - -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - /* - * Only the kernel side is implemented for now. - */ - if (!is_user) - perf_callchain_kernel(regs, entry); -} - -/* - * No need for separate IRQ and NMI entries. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2a95a9079862..460162d74aba 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1283,14 +1283,16 @@ void __init init_hw_perf_events(void) register_die_notifier(&perf_event_nmi_notifier); } -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ksp, fp; #ifdef CONFIG_FUNCTION_GRAPH_TRACER int graph = 0; #endif + stack_trace_flush(); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_store(entry, regs->tpc); @@ -1330,8 +1332,8 @@ static void perf_callchain_kernel(struct pt_regs *regs, } while (entry->nr < PERF_MAX_STACK_DEPTH); } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ufp; @@ -1353,8 +1355,8 @@ static void perf_callchain_user_64(struct pt_regs *regs, } while (entry->nr < PERF_MAX_STACK_DEPTH); } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ufp; @@ -1376,30 +1378,12 @@ static void perf_callchain_user_32(struct pt_regs *regs, } while (entry->nr < PERF_MAX_STACK_DEPTH); } -/* Like powerpc we can't get PMU interrupts within the PMU handler, - * so no need for separate NMI and IRQ chains as on x86. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - if (!user_mode(regs)) { - stack_trace_flush(); - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - if (regs) { - flushw_user(); - if (test_thread_flag(TIF_32BIT)) - perf_callchain_user_32(regs, entry); - else - perf_callchain_user_64(regs, entry); - } - return entry; + flushw_user(); + if (test_thread_flag(TIF_32BIT)) + perf_callchain_user_32(entry, regs); + else + perf_callchain_user_64(entry, regs); } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8af28caeafc1..39f8421b86e6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1571,9 +1571,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) * callchain support */ - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); +static DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry_nmi); static void @@ -1607,8 +1605,8 @@ static const struct stacktrace_ops backtrace_ops = { .walk_stack = print_context_stack_bp, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { perf_callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_store(entry, regs->ip); @@ -1653,14 +1651,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) } #endif -static void -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stack_frame frame; const void __user *fp; - if (!user_mode(regs)) - regs = task_pt_regs(current); fp = (void __user *)regs->bp; @@ -1687,42 +1683,17 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) } } -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +struct perf_callchain_entry *perf_callchain_buffer(void) { - struct perf_callchain_entry *entry; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ return NULL; } if (in_nmi()) - entry = &__get_cpu_var(pmc_nmi_entry); - else - entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - - perf_do_callchain(regs, entry); + return &__get_cpu_var(perf_callchain_entry_nmi); - return entry; + return &__get_cpu_var(perf_callchain_entry); } unsigned long perf_instruction_pointer(struct pt_regs *regs) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 358880404b42..4db61dded388 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -976,7 +976,15 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_comm(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +/* Callchains */ +DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); + +extern void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs); +extern struct perf_callchain_entry *perf_callchain_buffer(void); + static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index c772a3d4000d..02efde6c8798 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2937,13 +2937,49 @@ void perf_event_do_pending(void) __perf_pending_run(); } +DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); + /* * Callchain support -- arch specific */ -__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +__weak struct perf_callchain_entry *perf_callchain_buffer(void) { - return NULL; + return &__get_cpu_var(perf_callchain_entry); +} + +__weak void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +__weak void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + struct perf_callchain_entry *entry; + + entry = perf_callchain_buffer(); + if (!entry) + return NULL; + + entry->nr = 0; + + if (!user_mode(regs)) { + perf_callchain_kernel(entry, regs); + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) + perf_callchain_user(entry, regs); + + return entry; } -- cgit v1.2.3 From 927c7a9e92c4f69097a6e9e086d11fc2f8a5b40b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 1 Jul 2010 16:20:36 +0200 Subject: perf: Fix race in callchains Now that software events don't have interrupt disabled anymore in the event path, callchains can nest on any context. So seperating nmi and others contexts in two buffers has become racy. Fix this by providing one buffer per nesting level. Given the size of the callchain entries (2040 bytes * 4), we now need to allocate them dynamically. v2: Fixed put_callchain_entry call after recursion. Fix the type of the recursion, it must be an array. v3: Use a manual pr cpu allocation (temporary solution until NMIs can safely access vmalloc'ed memory). Do a better separation between callchain reference tracking and allocation. Make the "put" path lockless for non-release cases. v4: Protect the callchain buffers with rcu. v5: Do the cpu buffers allocations node affine. Signed-off-by: Frederic Weisbecker Tested-by: Will Deacon Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian Cc: Paul Mundt Cc: David Miller Cc: Borislav Petkov --- arch/x86/kernel/cpu/perf_event.c | 22 ++- include/linux/perf_event.h | 1 - kernel/perf_event.c | 298 ++++++++++++++++++++++++++++++--------- 3 files changed, 238 insertions(+), 83 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index a3c922288cc0..8e91cf34a9c8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1608,6 +1608,11 @@ static const struct stacktrace_ops backtrace_ops = { void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return NULL; + } + perf_callchain_store(entry, regs->ip); dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); @@ -1656,6 +1661,10 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) struct stack_frame frame; const void __user *fp; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return NULL; + } fp = (void __user *)regs->bp; @@ -1681,19 +1690,6 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) } } -struct perf_callchain_entry *perf_callchain_buffer(void) -{ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* TODO: We don't support guest os callchain now */ - return NULL; - } - - if (in_nmi()) - return &__get_cpu_var(perf_callchain_entry_nmi); - - return &__get_cpu_var(perf_callchain_entry); -} - unsigned long perf_instruction_pointer(struct pt_regs *regs) { unsigned long ip; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4db61dded388..d7e8ea690864 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -983,7 +983,6 @@ extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); -extern struct perf_callchain_entry *perf_callchain_buffer(void); static inline void diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 615d024894cf..75ab8a2df6b2 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1763,6 +1763,216 @@ static u64 perf_event_read(struct perf_event *event) return perf_event_count(event); } +/* + * Callchain support + */ + +struct callchain_cpus_entries { + struct rcu_head rcu_head; + struct perf_callchain_entry *cpu_entries[0]; +}; + +static DEFINE_PER_CPU(int, callchain_recursion[4]); +static atomic_t nr_callchain_events; +static DEFINE_MUTEX(callchain_mutex); +struct callchain_cpus_entries *callchain_cpus_entries; + + +__weak void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +__weak void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +static void release_callchain_buffers_rcu(struct rcu_head *head) +{ + struct callchain_cpus_entries *entries; + int cpu; + + entries = container_of(head, struct callchain_cpus_entries, rcu_head); + + for_each_possible_cpu(cpu) + kfree(entries->cpu_entries[cpu]); + + kfree(entries); +} + +static void release_callchain_buffers(void) +{ + struct callchain_cpus_entries *entries; + + entries = callchain_cpus_entries; + rcu_assign_pointer(callchain_cpus_entries, NULL); + call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); +} + +static int alloc_callchain_buffers(void) +{ + int cpu; + int size; + struct callchain_cpus_entries *entries; + + /* + * We can't use the percpu allocation API for data that can be + * accessed from NMI. Use a temporary manual per cpu allocation + * until that gets sorted out. + */ + size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * + num_possible_cpus(); + + entries = kzalloc(size, GFP_KERNEL); + if (!entries) + return -ENOMEM; + + size = sizeof(struct perf_callchain_entry) * 4; + + for_each_possible_cpu(cpu) { + entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, + cpu_to_node(cpu)); + if (!entries->cpu_entries[cpu]) + goto fail; + } + + rcu_assign_pointer(callchain_cpus_entries, entries); + + return 0; + +fail: + for_each_possible_cpu(cpu) + kfree(entries->cpu_entries[cpu]); + kfree(entries); + + return -ENOMEM; +} + +static int get_callchain_buffers(void) +{ + int err = 0; + int count; + + mutex_lock(&callchain_mutex); + + count = atomic_inc_return(&nr_callchain_events); + if (WARN_ON_ONCE(count < 1)) { + err = -EINVAL; + goto exit; + } + + if (count > 1) { + /* If the allocation failed, give up */ + if (!callchain_cpus_entries) + err = -ENOMEM; + goto exit; + } + + err = alloc_callchain_buffers(); + if (err) + release_callchain_buffers(); +exit: + mutex_unlock(&callchain_mutex); + + return err; +} + +static void put_callchain_buffers(void) +{ + if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { + release_callchain_buffers(); + mutex_unlock(&callchain_mutex); + } +} + +static int get_recursion_context(int *recursion) +{ + int rctx; + + if (in_nmi()) + rctx = 3; + else if (in_irq()) + rctx = 2; + else if (in_softirq()) + rctx = 1; + else + rctx = 0; + + if (recursion[rctx]) + return -1; + + recursion[rctx]++; + barrier(); + + return rctx; +} + +static inline void put_recursion_context(int *recursion, int rctx) +{ + barrier(); + recursion[rctx]--; +} + +static struct perf_callchain_entry *get_callchain_entry(int *rctx) +{ + int cpu; + struct callchain_cpus_entries *entries; + + *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); + if (*rctx == -1) + return NULL; + + entries = rcu_dereference(callchain_cpus_entries); + if (!entries) + return NULL; + + cpu = smp_processor_id(); + + return &entries->cpu_entries[cpu][*rctx]; +} + +static void +put_callchain_entry(int rctx) +{ + put_recursion_context(__get_cpu_var(callchain_recursion), rctx); +} + +static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + int rctx; + struct perf_callchain_entry *entry; + + + entry = get_callchain_entry(&rctx); + if (rctx == -1) + return NULL; + + if (!entry) + goto exit_put; + + entry->nr = 0; + + if (!user_mode(regs)) { + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_kernel(entry, regs); + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_user(entry, regs); + } + +exit_put: + put_callchain_entry(rctx); + + return entry; +} + /* * Initialize the perf_event context in a task_struct: */ @@ -1895,6 +2105,8 @@ static void free_event(struct perf_event *event) atomic_dec(&nr_comm_events); if (event->attr.task) atomic_dec(&nr_task_events); + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + put_callchain_buffers(); } if (event->buffer) { @@ -2937,55 +3149,6 @@ void perf_event_do_pending(void) __perf_pending_run(); } -DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); - -/* - * Callchain support -- arch specific - */ - -__weak struct perf_callchain_entry *perf_callchain_buffer(void) -{ - return &__get_cpu_var(perf_callchain_entry); -} - -__weak void perf_callchain_kernel(struct perf_callchain_entry *entry, - struct pt_regs *regs) -{ -} - -__weak void perf_callchain_user(struct perf_callchain_entry *entry, - struct pt_regs *regs) -{ -} - -static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry; - - entry = perf_callchain_buffer(); - if (!entry) - return NULL; - - entry->nr = 0; - - if (!user_mode(regs)) { - perf_callchain_store(entry, PERF_CONTEXT_KERNEL); - perf_callchain_kernel(entry, regs); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - perf_callchain_store(entry, PERF_CONTEXT_USER); - perf_callchain_user(entry, regs); - } - - return entry; -} - - /* * We assume there is only KVM supporting the callbacks. * Later on, we might change it to a list if there is @@ -3480,14 +3643,20 @@ static void perf_event_output(struct perf_event *event, int nmi, struct perf_output_handle handle; struct perf_event_header header; + /* protect the callchain buffers */ + rcu_read_lock(); + perf_prepare_sample(&header, data, event, regs); if (perf_output_begin(&handle, event, header.size, nmi, 1)) - return; + goto exit; perf_output_sample(&handle, &header, data, event); perf_output_end(&handle); + +exit: + rcu_read_unlock(); } /* @@ -4243,32 +4412,16 @@ end: int perf_swevent_get_recursion_context(void) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - int rctx; - if (in_nmi()) - rctx = 3; - else if (in_irq()) - rctx = 2; - else if (in_softirq()) - rctx = 1; - else - rctx = 0; - - if (cpuctx->recursion[rctx]) - return -1; - - cpuctx->recursion[rctx]++; - barrier(); - - return rctx; + return get_recursion_context(cpuctx->recursion); } EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); void inline perf_swevent_put_recursion_context(int rctx) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - barrier(); - cpuctx->recursion[rctx]--; + + put_recursion_context(cpuctx->recursion, rctx); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, @@ -4968,6 +5121,13 @@ done: atomic_inc(&nr_comm_events); if (event->attr.task) atomic_inc(&nr_task_events); + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { + err = get_callchain_buffers(); + if (err) { + free_event(event); + return ERR_PTR(err); + } + } } return event; -- cgit v1.2.3 From 7ae07ea3a48d30689ee037cb136bc21f0b37d8ae Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 14 Aug 2010 20:45:13 +0200 Subject: perf: Humanize the number of contexts Instead of hardcoding the number of contexts for the recursions barriers, define a cpp constant to make the code more self-explanatory. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian --- include/linux/perf_event.h | 14 ++++++++------ kernel/perf_event.c | 4 ++-- kernel/trace/trace_event_perf.c | 8 ++++---- 3 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d7e8ea690864..ae6fa6050925 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -808,6 +808,12 @@ struct perf_event_context { struct rcu_head rcu_head; }; +/* + * Number of contexts where an event can trigger: + * task, softirq, hardirq, nmi. + */ +#define PERF_NR_CONTEXTS 4 + /** * struct perf_event_cpu_context - per cpu event context structure */ @@ -821,12 +827,8 @@ struct perf_cpu_context { struct mutex hlist_mutex; int hlist_refcount; - /* - * Recursion avoidance: - * - * task, softirq, irq, nmi context - */ - int recursion[4]; + /* Recursion avoidance in each contexts */ + int recursion[PERF_NR_CONTEXTS]; }; struct perf_output_handle { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 75ab8a2df6b2..f416aef242c3 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1772,7 +1772,7 @@ struct callchain_cpus_entries { struct perf_callchain_entry *cpu_entries[0]; }; -static DEFINE_PER_CPU(int, callchain_recursion[4]); +static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); static atomic_t nr_callchain_events; static DEFINE_MUTEX(callchain_mutex); struct callchain_cpus_entries *callchain_cpus_entries; @@ -1828,7 +1828,7 @@ static int alloc_callchain_buffers(void) if (!entries) return -ENOMEM; - size = sizeof(struct perf_callchain_entry) * 4; + size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; for_each_possible_cpu(cpu) { entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 000e6e85b445..db2eae2efcf2 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -9,7 +9,7 @@ #include #include "trace.h" -static char *perf_trace_buf[4]; +static char *perf_trace_buf[PERF_NR_CONTEXTS]; /* * Force it to be aligned to unsigned long to avoid misaligned accesses @@ -45,7 +45,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, char *buf; int i; - for (i = 0; i < 4; i++) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail; @@ -65,7 +65,7 @@ fail: if (!total_ref_count) { int i; - for (i = 0; i < 4; i++) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { free_percpu(perf_trace_buf[i]); perf_trace_buf[i] = NULL; } @@ -140,7 +140,7 @@ void perf_trace_destroy(struct perf_event *p_event) tp_event->perf_events = NULL; if (!--total_ref_count) { - for (i = 0; i < 4; i++) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { free_percpu(perf_trace_buf[i]); perf_trace_buf[i] = NULL; } -- cgit v1.2.3 From 6016ee13db518ab1cd0cbf43fc2ad5712021e338 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Aug 2010 12:47:59 +0900 Subject: perf, tracing: add missing __percpu markups ftrace_event_call->perf_events, perf_trace_buf, fgraph_data->cpu_data and some local variables are percpu pointers missing __percpu markups. Add them. Signed-off-by: Namhyung Kim Acked-by: Tejun Heo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Stephane Eranian LKML-Reference: <1281498479-28551-1-git-send-email-namhyung@gmail.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 4 ++-- kernel/trace/trace_event_perf.c | 15 ++++++++------- kernel/trace/trace_functions_graph.c | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 02b8b24f8f51..5f8ad7bec636 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -191,8 +191,8 @@ struct ftrace_event_call { unsigned int flags; #ifdef CONFIG_PERF_EVENTS - int perf_refcount; - struct hlist_head *perf_events; + int perf_refcount; + struct hlist_head __percpu *perf_events; #endif }; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index db2eae2efcf2..92f5477a006a 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -9,7 +9,7 @@ #include #include "trace.h" -static char *perf_trace_buf[PERF_NR_CONTEXTS]; +static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; /* * Force it to be aligned to unsigned long to avoid misaligned accesses @@ -24,7 +24,7 @@ static int total_ref_count; static int perf_trace_event_init(struct ftrace_event_call *tp_event, struct perf_event *p_event) { - struct hlist_head *list; + struct hlist_head __percpu *list; int ret = -ENOMEM; int cpu; @@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, tp_event->perf_events = list; if (!total_ref_count) { - char *buf; + char __percpu *buf; int i; for (i = 0; i < PERF_NR_CONTEXTS; i++) { - buf = (char *)alloc_percpu(perf_trace_t); + buf = (char __percpu *)alloc_percpu(perf_trace_t); if (!buf) goto fail; @@ -102,13 +102,14 @@ int perf_trace_init(struct perf_event *p_event) int perf_trace_enable(struct perf_event *p_event) { struct ftrace_event_call *tp_event = p_event->tp_event; + struct hlist_head __percpu *pcpu_list; struct hlist_head *list; - list = tp_event->perf_events; - if (WARN_ON_ONCE(!list)) + pcpu_list = tp_event->perf_events; + if (WARN_ON_ONCE(!pcpu_list)) return -EINVAL; - list = this_cpu_ptr(list); + list = this_cpu_ptr(pcpu_list); hlist_add_head_rcu(&p_event->hlist_entry, list); return 0; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 6bff23625781..fcb5a542cd21 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -23,7 +23,7 @@ struct fgraph_cpu_data { }; struct fgraph_data { - struct fgraph_cpu_data *cpu_data; + struct fgraph_cpu_data __percpu *cpu_data; /* Place to preserve last processed entry. */ struct ftrace_graph_ent_entry ent; -- cgit v1.2.3 From a49f37eed22b74221f271811ea41323654e40dad Mon Sep 17 00:00:00 2001 From: Sachin Sanap Date: Fri, 13 Aug 2010 21:22:49 +0000 Subject: net: add Fast Ethernet driver for PXA168. Signed-off-by: Sachin Sanap Signed-off-by: David S. Miller --- drivers/net/Kconfig | 10 + drivers/net/Makefile | 1 + drivers/net/pxa168_eth.c | 1666 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/pxa168_eth.h | 30 + 4 files changed, 1707 insertions(+) create mode 100644 drivers/net/pxa168_eth.c create mode 100644 include/linux/pxa168_eth.h (limited to 'include') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index ebe68395ecf8..fe581566cb26 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -928,6 +928,16 @@ config SMC91X The module will be called smc91x. If you want to compile it as a module, say M here and read . +config PXA168_ETH + tristate "Marvell pxa168 ethernet support" + depends on CPU_PXA168 + select PHYLIB + help + This driver supports the pxa168 Ethernet ports. + + To compile this driver as a module, choose M here. The module + will be called pxa168_eth. + config NET_NETX tristate "NetX Ethernet support" select MII diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 56e8c27f77ce..3e8f150c4b14 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -244,6 +244,7 @@ obj-$(CONFIG_MYRI10GE) += myri10ge/ obj-$(CONFIG_SMC91X) += smc91x.o obj-$(CONFIG_SMC911X) += smc911x.o obj-$(CONFIG_SMSC911X) += smsc911x.o +obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o obj-$(CONFIG_BFIN_MAC) += bfin_mac.o obj-$(CONFIG_DM9000) += dm9000.o obj-$(CONFIG_PASEMI_MAC) += pasemi_mac_driver.o diff --git a/drivers/net/pxa168_eth.c b/drivers/net/pxa168_eth.c new file mode 100644 index 000000000000..ecc64d750cce --- /dev/null +++ b/drivers/net/pxa168_eth.c @@ -0,0 +1,1666 @@ +/* + * PXA168 ethernet driver. + * Most of the code is derived from mv643xx ethernet driver. + * + * Copyright (C) 2010 Marvell International Ltd. + * Sachin Sanap + * Philip Rakity + * Mark Brown + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_NAME "pxa168-eth" +#define DRIVER_VERSION "0.3" + +/* + * Registers + */ + +#define PHY_ADDRESS 0x0000 +#define SMI 0x0010 +#define PORT_CONFIG 0x0400 +#define PORT_CONFIG_EXT 0x0408 +#define PORT_COMMAND 0x0410 +#define PORT_STATUS 0x0418 +#define HTPR 0x0428 +#define SDMA_CONFIG 0x0440 +#define SDMA_CMD 0x0448 +#define INT_CAUSE 0x0450 +#define INT_W_CLEAR 0x0454 +#define INT_MASK 0x0458 +#define ETH_F_RX_DESC_0 0x0480 +#define ETH_C_RX_DESC_0 0x04A0 +#define ETH_C_TX_DESC_1 0x04E4 + +/* smi register */ +#define SMI_BUSY (1 << 28) /* 0 - Write, 1 - Read */ +#define SMI_R_VALID (1 << 27) /* 0 - Write, 1 - Read */ +#define SMI_OP_W (0 << 26) /* Write operation */ +#define SMI_OP_R (1 << 26) /* Read operation */ + +#define PHY_WAIT_ITERATIONS 10 + +#define PXA168_ETH_PHY_ADDR_DEFAULT 0 +/* RX & TX descriptor command */ +#define BUF_OWNED_BY_DMA (1 << 31) + +/* RX descriptor status */ +#define RX_EN_INT (1 << 23) +#define RX_FIRST_DESC (1 << 17) +#define RX_LAST_DESC (1 << 16) +#define RX_ERROR (1 << 15) + +/* TX descriptor command */ +#define TX_EN_INT (1 << 23) +#define TX_GEN_CRC (1 << 22) +#define TX_ZERO_PADDING (1 << 18) +#define TX_FIRST_DESC (1 << 17) +#define TX_LAST_DESC (1 << 16) +#define TX_ERROR (1 << 15) + +/* SDMA_CMD */ +#define SDMA_CMD_AT (1 << 31) +#define SDMA_CMD_TXDL (1 << 24) +#define SDMA_CMD_TXDH (1 << 23) +#define SDMA_CMD_AR (1 << 15) +#define SDMA_CMD_ERD (1 << 7) + +/* Bit definitions of the Port Config Reg */ +#define PCR_HS (1 << 12) +#define PCR_EN (1 << 7) +#define PCR_PM (1 << 0) + +/* Bit definitions of the Port Config Extend Reg */ +#define PCXR_2BSM (1 << 28) +#define PCXR_DSCP_EN (1 << 21) +#define PCXR_MFL_1518 (0 << 14) +#define PCXR_MFL_1536 (1 << 14) +#define PCXR_MFL_2048 (2 << 14) +#define PCXR_MFL_64K (3 << 14) +#define PCXR_FLP (1 << 11) +#define PCXR_PRIO_TX_OFF 3 +#define PCXR_TX_HIGH_PRI (7 << PCXR_PRIO_TX_OFF) + +/* Bit definitions of the SDMA Config Reg */ +#define SDCR_BSZ_OFF 12 +#define SDCR_BSZ8 (3 << SDCR_BSZ_OFF) +#define SDCR_BSZ4 (2 << SDCR_BSZ_OFF) +#define SDCR_BSZ2 (1 << SDCR_BSZ_OFF) +#define SDCR_BSZ1 (0 << SDCR_BSZ_OFF) +#define SDCR_BLMR (1 << 6) +#define SDCR_BLMT (1 << 7) +#define SDCR_RIFB (1 << 9) +#define SDCR_RC_OFF 2 +#define SDCR_RC_MAX_RETRANS (0xf << SDCR_RC_OFF) + +/* + * Bit definitions of the Interrupt Cause Reg + * and Interrupt MASK Reg is the same + */ +#define ICR_RXBUF (1 << 0) +#define ICR_TXBUF_H (1 << 2) +#define ICR_TXBUF_L (1 << 3) +#define ICR_TXEND_H (1 << 6) +#define ICR_TXEND_L (1 << 7) +#define ICR_RXERR (1 << 8) +#define ICR_TXERR_H (1 << 10) +#define ICR_TXERR_L (1 << 11) +#define ICR_TX_UDR (1 << 13) +#define ICR_MII_CH (1 << 28) + +#define ALL_INTS (ICR_TXBUF_H | ICR_TXBUF_L | ICR_TX_UDR |\ + ICR_TXERR_H | ICR_TXERR_L |\ + ICR_TXEND_H | ICR_TXEND_L |\ + ICR_RXBUF | ICR_RXERR | ICR_MII_CH) + +#define ETH_HW_IP_ALIGN 2 /* hw aligns IP header */ + +#define NUM_RX_DESCS 64 +#define NUM_TX_DESCS 64 + +#define HASH_ADD 0 +#define HASH_DELETE 1 +#define HASH_ADDR_TABLE_SIZE 0x4000 /* 16K (1/2K address - PCR_HS == 1) */ +#define HOP_NUMBER 12 + +/* Bit definitions for Port status */ +#define PORT_SPEED_100 (1 << 0) +#define FULL_DUPLEX (1 << 1) +#define FLOW_CONTROL_ENABLED (1 << 2) +#define LINK_UP (1 << 3) + +/* Bit definitions for work to be done */ +#define WORK_LINK (1 << 0) +#define WORK_TX_DONE (1 << 1) + +/* + * Misc definitions. + */ +#define SKB_DMA_REALIGN ((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES) + +struct rx_desc { + u32 cmd_sts; /* Descriptor command status */ + u16 byte_cnt; /* Descriptor buffer byte count */ + u16 buf_size; /* Buffer size */ + u32 buf_ptr; /* Descriptor buffer pointer */ + u32 next_desc_ptr; /* Next descriptor pointer */ +}; + +struct tx_desc { + u32 cmd_sts; /* Command/status field */ + u16 reserved; + u16 byte_cnt; /* buffer byte count */ + u32 buf_ptr; /* pointer to buffer for this descriptor */ + u32 next_desc_ptr; /* Pointer to next descriptor */ +}; + +struct pxa168_eth_private { + int port_num; /* User Ethernet port number */ + + int rx_resource_err; /* Rx ring resource error flag */ + + /* Next available and first returning Rx resource */ + int rx_curr_desc_q, rx_used_desc_q; + + /* Next available and first returning Tx resource */ + int tx_curr_desc_q, tx_used_desc_q; + + struct rx_desc *p_rx_desc_area; + dma_addr_t rx_desc_dma; + int rx_desc_area_size; + struct sk_buff **rx_skb; + + struct tx_desc *p_tx_desc_area; + dma_addr_t tx_desc_dma; + int tx_desc_area_size; + struct sk_buff **tx_skb; + + struct work_struct tx_timeout_task; + + struct net_device *dev; + struct napi_struct napi; + u8 work_todo; + int skb_size; + + struct net_device_stats stats; + /* Size of Tx Ring per queue */ + int tx_ring_size; + /* Number of tx descriptors in use */ + int tx_desc_count; + /* Size of Rx Ring per queue */ + int rx_ring_size; + /* Number of rx descriptors in use */ + int rx_desc_count; + + /* + * Used in case RX Ring is empty, which can occur when + * system does not have resources (skb's) + */ + struct timer_list timeout; + struct mii_bus *smi_bus; + struct phy_device *phy; + + /* clock */ + struct clk *clk; + struct pxa168_eth_platform_data *pd; + /* + * Ethernet controller base address. + */ + void __iomem *base; + + /* Pointer to the hardware address filter table */ + void *htpr; + dma_addr_t htpr_dma; +}; + +struct addr_table_entry { + __le32 lo; + __le32 hi; +}; + +/* Bit fields of a Hash Table Entry */ +enum hash_table_entry { + HASH_ENTRY_VALID = 1, + SKIP = 2, + HASH_ENTRY_RECEIVE_DISCARD = 4, + HASH_ENTRY_RECEIVE_DISCARD_BIT = 2 +}; + +static int pxa168_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); +static int pxa168_set_settings(struct net_device *dev, struct ethtool_cmd *cmd); +static int pxa168_init_hw(struct pxa168_eth_private *pep); +static void eth_port_reset(struct net_device *dev); +static void eth_port_start(struct net_device *dev); +static int pxa168_eth_open(struct net_device *dev); +static int pxa168_eth_stop(struct net_device *dev); +static int ethernet_phy_setup(struct net_device *dev); + +static inline u32 rdl(struct pxa168_eth_private *pep, int offset) +{ + return readl(pep->base + offset); +} + +static inline void wrl(struct pxa168_eth_private *pep, int offset, u32 data) +{ + writel(data, pep->base + offset); +} + +static void abort_dma(struct pxa168_eth_private *pep) +{ + int delay; + int max_retries = 40; + + do { + wrl(pep, SDMA_CMD, SDMA_CMD_AR | SDMA_CMD_AT); + udelay(100); + + delay = 10; + while ((rdl(pep, SDMA_CMD) & (SDMA_CMD_AR | SDMA_CMD_AT)) + && delay-- > 0) { + udelay(10); + } + } while (max_retries-- > 0 && delay <= 0); + + if (max_retries <= 0) + printk(KERN_ERR "%s : DMA Stuck\n", __func__); +} + +static int ethernet_phy_get(struct pxa168_eth_private *pep) +{ + unsigned int reg_data; + + reg_data = rdl(pep, PHY_ADDRESS); + + return (reg_data >> (5 * pep->port_num)) & 0x1f; +} + +static void ethernet_phy_set_addr(struct pxa168_eth_private *pep, int phy_addr) +{ + u32 reg_data; + int addr_shift = 5 * pep->port_num; + + reg_data = rdl(pep, PHY_ADDRESS); + reg_data &= ~(0x1f << addr_shift); + reg_data |= (phy_addr & 0x1f) << addr_shift; + wrl(pep, PHY_ADDRESS, reg_data); +} + +static void ethernet_phy_reset(struct pxa168_eth_private *pep) +{ + int data; + + data = phy_read(pep->phy, MII_BMCR); + if (data < 0) + return; + + data |= BMCR_RESET; + if (phy_write(pep->phy, MII_BMCR, data) < 0) + return; + + do { + data = phy_read(pep->phy, MII_BMCR); + } while (data >= 0 && data & BMCR_RESET); +} + +static void rxq_refill(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct sk_buff *skb; + struct rx_desc *p_used_rx_desc; + int used_rx_desc; + + while (pep->rx_desc_count < pep->rx_ring_size) { + int size; + + skb = dev_alloc_skb(pep->skb_size); + if (!skb) + break; + if (SKB_DMA_REALIGN) + skb_reserve(skb, SKB_DMA_REALIGN); + pep->rx_desc_count++; + /* Get 'used' Rx descriptor */ + used_rx_desc = pep->rx_used_desc_q; + p_used_rx_desc = &pep->p_rx_desc_area[used_rx_desc]; + size = skb->end - skb->data; + p_used_rx_desc->buf_ptr = dma_map_single(NULL, + skb->data, + size, + DMA_FROM_DEVICE); + p_used_rx_desc->buf_size = size; + pep->rx_skb[used_rx_desc] = skb; + + /* Return the descriptor to DMA ownership */ + wmb(); + p_used_rx_desc->cmd_sts = BUF_OWNED_BY_DMA | RX_EN_INT; + wmb(); + + /* Move the used descriptor pointer to the next descriptor */ + pep->rx_used_desc_q = (used_rx_desc + 1) % pep->rx_ring_size; + + /* Any Rx return cancels the Rx resource error status */ + pep->rx_resource_err = 0; + + skb_reserve(skb, ETH_HW_IP_ALIGN); + } + + /* + * If RX ring is empty of SKB, set a timer to try allocating + * again at a later time. + */ + if (pep->rx_desc_count == 0) { + pep->timeout.expires = jiffies + (HZ / 10); + add_timer(&pep->timeout); + } +} + +static inline void rxq_refill_timer_wrapper(unsigned long data) +{ + struct pxa168_eth_private *pep = (void *)data; + napi_schedule(&pep->napi); +} + +static inline u8 flip_8_bits(u8 x) +{ + return (((x) & 0x01) << 3) | (((x) & 0x02) << 1) + | (((x) & 0x04) >> 1) | (((x) & 0x08) >> 3) + | (((x) & 0x10) << 3) | (((x) & 0x20) << 1) + | (((x) & 0x40) >> 1) | (((x) & 0x80) >> 3); +} + +static void nibble_swap_every_byte(unsigned char *mac_addr) +{ + int i; + for (i = 0; i < ETH_ALEN; i++) { + mac_addr[i] = ((mac_addr[i] & 0x0f) << 4) | + ((mac_addr[i] & 0xf0) >> 4); + } +} + +static void inverse_every_nibble(unsigned char *mac_addr) +{ + int i; + for (i = 0; i < ETH_ALEN; i++) + mac_addr[i] = flip_8_bits(mac_addr[i]); +} + +/* + * ---------------------------------------------------------------------------- + * This function will calculate the hash function of the address. + * Inputs + * mac_addr_orig - MAC address. + * Outputs + * return the calculated entry. + */ +static u32 hash_function(unsigned char *mac_addr_orig) +{ + u32 hash_result; + u32 addr0; + u32 addr1; + u32 addr2; + u32 addr3; + unsigned char mac_addr[ETH_ALEN]; + + /* Make a copy of MAC address since we are going to performe bit + * operations on it + */ + memcpy(mac_addr, mac_addr_orig, ETH_ALEN); + + nibble_swap_every_byte(mac_addr); + inverse_every_nibble(mac_addr); + + addr0 = (mac_addr[5] >> 2) & 0x3f; + addr1 = (mac_addr[5] & 0x03) | (((mac_addr[4] & 0x7f)) << 2); + addr2 = ((mac_addr[4] & 0x80) >> 7) | mac_addr[3] << 1; + addr3 = (mac_addr[2] & 0xff) | ((mac_addr[1] & 1) << 8); + + hash_result = (addr0 << 9) | (addr1 ^ addr2 ^ addr3); + hash_result = hash_result & 0x07ff; + return hash_result; +} + +/* + * ---------------------------------------------------------------------------- + * This function will add/del an entry to the address table. + * Inputs + * pep - ETHERNET . + * mac_addr - MAC address. + * skip - if 1, skip this address.Used in case of deleting an entry which is a + * part of chain in the hash table.We cant just delete the entry since + * that will break the chain.We need to defragment the tables time to + * time. + * rd - 0 Discard packet upon match. + * - 1 Receive packet upon match. + * Outputs + * address table entry is added/deleted. + * 0 if success. + * -ENOSPC if table full + */ +static int add_del_hash_entry(struct pxa168_eth_private *pep, + unsigned char *mac_addr, + u32 rd, u32 skip, int del) +{ + struct addr_table_entry *entry, *start; + u32 new_high; + u32 new_low; + u32 i; + + new_low = (((mac_addr[1] >> 4) & 0xf) << 15) + | (((mac_addr[1] >> 0) & 0xf) << 11) + | (((mac_addr[0] >> 4) & 0xf) << 7) + | (((mac_addr[0] >> 0) & 0xf) << 3) + | (((mac_addr[3] >> 4) & 0x1) << 31) + | (((mac_addr[3] >> 0) & 0xf) << 27) + | (((mac_addr[2] >> 4) & 0xf) << 23) + | (((mac_addr[2] >> 0) & 0xf) << 19) + | (skip << SKIP) | (rd << HASH_ENTRY_RECEIVE_DISCARD_BIT) + | HASH_ENTRY_VALID; + + new_high = (((mac_addr[5] >> 4) & 0xf) << 15) + | (((mac_addr[5] >> 0) & 0xf) << 11) + | (((mac_addr[4] >> 4) & 0xf) << 7) + | (((mac_addr[4] >> 0) & 0xf) << 3) + | (((mac_addr[3] >> 5) & 0x7) << 0); + + /* + * Pick the appropriate table, start scanning for free/reusable + * entries at the index obtained by hashing the specified MAC address + */ + start = (struct addr_table_entry *)(pep->htpr); + entry = start + hash_function(mac_addr); + for (i = 0; i < HOP_NUMBER; i++) { + if (!(le32_to_cpu(entry->lo) & HASH_ENTRY_VALID)) { + break; + } else { + /* if same address put in same position */ + if (((le32_to_cpu(entry->lo) & 0xfffffff8) == + (new_low & 0xfffffff8)) && + (le32_to_cpu(entry->hi) == new_high)) { + break; + } + } + if (entry == start + 0x7ff) + entry = start; + else + entry++; + } + + if (((le32_to_cpu(entry->lo) & 0xfffffff8) != (new_low & 0xfffffff8)) && + (le32_to_cpu(entry->hi) != new_high) && del) + return 0; + + if (i == HOP_NUMBER) { + if (!del) { + printk(KERN_INFO "%s: table section is full, need to " + "move to 16kB implementation?\n", + __FILE__); + return -ENOSPC; + } else + return 0; + } + + /* + * Update the selected entry + */ + if (del) { + entry->hi = 0; + entry->lo = 0; + } else { + entry->hi = cpu_to_le32(new_high); + entry->lo = cpu_to_le32(new_low); + } + + return 0; +} + +/* + * ---------------------------------------------------------------------------- + * Create an addressTable entry from MAC address info + * found in the specifed net_device struct + * + * Input : pointer to ethernet interface network device structure + * Output : N/A + */ +static void update_hash_table_mac_address(struct pxa168_eth_private *pep, + unsigned char *oaddr, + unsigned char *addr) +{ + /* Delete old entry */ + if (oaddr) + add_del_hash_entry(pep, oaddr, 1, 0, HASH_DELETE); + /* Add new entry */ + add_del_hash_entry(pep, addr, 1, 0, HASH_ADD); +} + +static int init_hash_table(struct pxa168_eth_private *pep) +{ + /* + * Hardware expects CPU to build a hash table based on a predefined + * hash function and populate it based on hardware address. The + * location of the hash table is identified by 32-bit pointer stored + * in HTPR internal register. Two possible sizes exists for the hash + * table 8kB (256kB of DRAM required (4 x 64 kB banks)) and 1/2kB + * (16kB of DRAM required (4 x 4 kB banks)).We currently only support + * 1/2kB. + */ + /* TODO: Add support for 8kB hash table and alternative hash + * function.Driver can dynamically switch to them if the 1/2kB hash + * table is full. + */ + if (pep->htpr == NULL) { + pep->htpr = dma_alloc_coherent(pep->dev->dev.parent, + HASH_ADDR_TABLE_SIZE, + &pep->htpr_dma, GFP_KERNEL); + if (pep->htpr == NULL) + return -ENOMEM; + } + memset(pep->htpr, 0, HASH_ADDR_TABLE_SIZE); + wrl(pep, HTPR, pep->htpr_dma); + return 0; +} + +static void pxa168_eth_set_rx_mode(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct netdev_hw_addr *ha; + u32 val; + + val = rdl(pep, PORT_CONFIG); + if (dev->flags & IFF_PROMISC) + val |= PCR_PM; + else + val &= ~PCR_PM; + wrl(pep, PORT_CONFIG, val); + + /* + * Remove the old list of MAC address and add dev->addr + * and multicast address. + */ + memset(pep->htpr, 0, HASH_ADDR_TABLE_SIZE); + update_hash_table_mac_address(pep, NULL, dev->dev_addr); + + netdev_for_each_mc_addr(ha, dev) + update_hash_table_mac_address(pep, NULL, ha->addr); +} + +static int pxa168_eth_set_mac_address(struct net_device *dev, void *addr) +{ + struct sockaddr *sa = addr; + struct pxa168_eth_private *pep = netdev_priv(dev); + unsigned char oldMac[ETH_ALEN]; + + if (!is_valid_ether_addr(sa->sa_data)) + return -EINVAL; + memcpy(oldMac, dev->dev_addr, ETH_ALEN); + memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN); + netif_addr_lock_bh(dev); + update_hash_table_mac_address(pep, oldMac, dev->dev_addr); + netif_addr_unlock_bh(dev); + return 0; +} + +static void eth_port_start(struct net_device *dev) +{ + unsigned int val = 0; + struct pxa168_eth_private *pep = netdev_priv(dev); + int tx_curr_desc, rx_curr_desc; + + /* Perform PHY reset, if there is a PHY. */ + if (pep->phy != NULL) { + struct ethtool_cmd cmd; + + pxa168_get_settings(pep->dev, &cmd); + ethernet_phy_reset(pep); + pxa168_set_settings(pep->dev, &cmd); + } + + /* Assignment of Tx CTRP of given queue */ + tx_curr_desc = pep->tx_curr_desc_q; + wrl(pep, ETH_C_TX_DESC_1, + (u32) ((struct tx_desc *)pep->tx_desc_dma + tx_curr_desc)); + + /* Assignment of Rx CRDP of given queue */ + rx_curr_desc = pep->rx_curr_desc_q; + wrl(pep, ETH_C_RX_DESC_0, + (u32) ((struct rx_desc *)pep->rx_desc_dma + rx_curr_desc)); + + wrl(pep, ETH_F_RX_DESC_0, + (u32) ((struct rx_desc *)pep->rx_desc_dma + rx_curr_desc)); + + /* Clear all interrupts */ + wrl(pep, INT_CAUSE, 0); + + /* Enable all interrupts for receive, transmit and error. */ + wrl(pep, INT_MASK, ALL_INTS); + + val = rdl(pep, PORT_CONFIG); + val |= PCR_EN; + wrl(pep, PORT_CONFIG, val); + + /* Start RX DMA engine */ + val = rdl(pep, SDMA_CMD); + val |= SDMA_CMD_ERD; + wrl(pep, SDMA_CMD, val); +} + +static void eth_port_reset(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + unsigned int val = 0; + + /* Stop all interrupts for receive, transmit and error. */ + wrl(pep, INT_MASK, 0); + + /* Clear all interrupts */ + wrl(pep, INT_CAUSE, 0); + + /* Stop RX DMA */ + val = rdl(pep, SDMA_CMD); + val &= ~SDMA_CMD_ERD; /* abort dma command */ + + /* Abort any transmit and receive operations and put DMA + * in idle state. + */ + abort_dma(pep); + + /* Disable port */ + val = rdl(pep, PORT_CONFIG); + val &= ~PCR_EN; + wrl(pep, PORT_CONFIG, val); +} + +/* + * txq_reclaim - Free the tx desc data for completed descriptors + * If force is non-zero, frees uncompleted descriptors as well + */ +static int txq_reclaim(struct net_device *dev, int force) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct tx_desc *desc; + u32 cmd_sts; + struct sk_buff *skb; + int tx_index; + dma_addr_t addr; + int count; + int released = 0; + + netif_tx_lock(dev); + + pep->work_todo &= ~WORK_TX_DONE; + while (pep->tx_desc_count > 0) { + tx_index = pep->tx_used_desc_q; + desc = &pep->p_tx_desc_area[tx_index]; + cmd_sts = desc->cmd_sts; + if (!force && (cmd_sts & BUF_OWNED_BY_DMA)) { + if (released > 0) { + goto txq_reclaim_end; + } else { + released = -1; + goto txq_reclaim_end; + } + } + pep->tx_used_desc_q = (tx_index + 1) % pep->tx_ring_size; + pep->tx_desc_count--; + addr = desc->buf_ptr; + count = desc->byte_cnt; + skb = pep->tx_skb[tx_index]; + if (skb) + pep->tx_skb[tx_index] = NULL; + + if (cmd_sts & TX_ERROR) { + if (net_ratelimit()) + printk(KERN_ERR "%s: Error in TX\n", dev->name); + dev->stats.tx_errors++; + } + dma_unmap_single(NULL, addr, count, DMA_TO_DEVICE); + if (skb) + dev_kfree_skb_irq(skb); + released++; + } +txq_reclaim_end: + netif_tx_unlock(dev); + return released; +} + +static void pxa168_eth_tx_timeout(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + + printk(KERN_INFO "%s: TX timeout desc_count %d\n", + dev->name, pep->tx_desc_count); + + schedule_work(&pep->tx_timeout_task); +} + +static void pxa168_eth_tx_timeout_task(struct work_struct *work) +{ + struct pxa168_eth_private *pep = container_of(work, + struct pxa168_eth_private, + tx_timeout_task); + struct net_device *dev = pep->dev; + pxa168_eth_stop(dev); + pxa168_eth_open(dev); +} + +static int rxq_process(struct net_device *dev, int budget) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; + unsigned int received_packets = 0; + struct sk_buff *skb; + + while (budget-- > 0) { + int rx_next_curr_desc, rx_curr_desc, rx_used_desc; + struct rx_desc *rx_desc; + unsigned int cmd_sts; + + /* Do not process Rx ring in case of Rx ring resource error */ + if (pep->rx_resource_err) + break; + rx_curr_desc = pep->rx_curr_desc_q; + rx_used_desc = pep->rx_used_desc_q; + rx_desc = &pep->p_rx_desc_area[rx_curr_desc]; + cmd_sts = rx_desc->cmd_sts; + rmb(); + if (cmd_sts & (BUF_OWNED_BY_DMA)) + break; + skb = pep->rx_skb[rx_curr_desc]; + pep->rx_skb[rx_curr_desc] = NULL; + + rx_next_curr_desc = (rx_curr_desc + 1) % pep->rx_ring_size; + pep->rx_curr_desc_q = rx_next_curr_desc; + + /* Rx descriptors exhausted. */ + /* Set the Rx ring resource error flag */ + if (rx_next_curr_desc == rx_used_desc) + pep->rx_resource_err = 1; + pep->rx_desc_count--; + dma_unmap_single(NULL, rx_desc->buf_ptr, + rx_desc->buf_size, + DMA_FROM_DEVICE); + received_packets++; + /* + * Update statistics. + * Note byte count includes 4 byte CRC count + */ + stats->rx_packets++; + stats->rx_bytes += rx_desc->byte_cnt; + /* + * In case received a packet without first / last bits on OR + * the error summary bit is on, the packets needs to be droped. + */ + if (((cmd_sts & (RX_FIRST_DESC | RX_LAST_DESC)) != + (RX_FIRST_DESC | RX_LAST_DESC)) + || (cmd_sts & RX_ERROR)) { + + stats->rx_dropped++; + if ((cmd_sts & (RX_FIRST_DESC | RX_LAST_DESC)) != + (RX_FIRST_DESC | RX_LAST_DESC)) { + if (net_ratelimit()) + printk(KERN_ERR + "%s: Rx pkt on multiple desc\n", + dev->name); + } + if (cmd_sts & RX_ERROR) + stats->rx_errors++; + dev_kfree_skb_irq(skb); + } else { + /* + * The -4 is for the CRC in the trailer of the + * received packet + */ + skb_put(skb, rx_desc->byte_cnt - 4); + skb->protocol = eth_type_trans(skb, dev); + netif_receive_skb(skb); + } + dev->last_rx = jiffies; + } + /* Fill RX ring with skb's */ + rxq_refill(dev); + return received_packets; +} + +static int pxa168_eth_collect_events(struct pxa168_eth_private *pep, + struct net_device *dev) +{ + u32 icr; + int ret = 0; + + icr = rdl(pep, INT_CAUSE); + if (icr == 0) + return IRQ_NONE; + + wrl(pep, INT_CAUSE, ~icr); + if (icr & (ICR_TXBUF_H | ICR_TXBUF_L)) { + pep->work_todo |= WORK_TX_DONE; + ret = 1; + } + if (icr & ICR_RXBUF) + ret = 1; + if (icr & ICR_MII_CH) { + pep->work_todo |= WORK_LINK; + ret = 1; + } + return ret; +} + +static void handle_link_event(struct pxa168_eth_private *pep) +{ + struct net_device *dev = pep->dev; + u32 port_status; + int speed; + int duplex; + int fc; + + port_status = rdl(pep, PORT_STATUS); + if (!(port_status & LINK_UP)) { + if (netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link down\n", dev->name); + netif_carrier_off(dev); + txq_reclaim(dev, 1); + } + return; + } + if (port_status & PORT_SPEED_100) + speed = 100; + else + speed = 10; + + duplex = (port_status & FULL_DUPLEX) ? 1 : 0; + fc = (port_status & FLOW_CONTROL_ENABLED) ? 1 : 0; + printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, " + "flow control %sabled\n", dev->name, + speed, duplex ? "full" : "half", fc ? "en" : "dis"); + if (!netif_carrier_ok(dev)) + netif_carrier_on(dev); +} + +static irqreturn_t pxa168_eth_int_handler(int irq, void *dev_id) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct pxa168_eth_private *pep = netdev_priv(dev); + + if (unlikely(!pxa168_eth_collect_events(pep, dev))) + return IRQ_NONE; + /* Disable interrupts */ + wrl(pep, INT_MASK, 0); + napi_schedule(&pep->napi); + return IRQ_HANDLED; +} + +static void pxa168_eth_recalc_skb_size(struct pxa168_eth_private *pep) +{ + int skb_size; + + /* + * Reserve 2+14 bytes for an ethernet header (the hardware + * automatically prepends 2 bytes of dummy data to each + * received packet), 16 bytes for up to four VLAN tags, and + * 4 bytes for the trailing FCS -- 36 bytes total. + */ + skb_size = pep->dev->mtu + 36; + + /* + * Make sure that the skb size is a multiple of 8 bytes, as + * the lower three bits of the receive descriptor's buffer + * size field are ignored by the hardware. + */ + pep->skb_size = (skb_size + 7) & ~7; + + /* + * If NET_SKB_PAD is smaller than a cache line, + * netdev_alloc_skb() will cause skb->data to be misaligned + * to a cache line boundary. If this is the case, include + * some extra space to allow re-aligning the data area. + */ + pep->skb_size += SKB_DMA_REALIGN; + +} + +static int set_port_config_ext(struct pxa168_eth_private *pep) +{ + int skb_size; + + pxa168_eth_recalc_skb_size(pep); + if (pep->skb_size <= 1518) + skb_size = PCXR_MFL_1518; + else if (pep->skb_size <= 1536) + skb_size = PCXR_MFL_1536; + else if (pep->skb_size <= 2048) + skb_size = PCXR_MFL_2048; + else + skb_size = PCXR_MFL_64K; + + /* Extended Port Configuration */ + wrl(pep, + PORT_CONFIG_EXT, PCXR_2BSM | /* Two byte prefix aligns IP hdr */ + PCXR_DSCP_EN | /* Enable DSCP in IP */ + skb_size | PCXR_FLP | /* do not force link pass */ + PCXR_TX_HIGH_PRI); /* Transmit - high priority queue */ + + return 0; +} + +static int pxa168_init_hw(struct pxa168_eth_private *pep) +{ + int err = 0; + + /* Disable interrupts */ + wrl(pep, INT_MASK, 0); + wrl(pep, INT_CAUSE, 0); + /* Write to ICR to clear interrupts. */ + wrl(pep, INT_W_CLEAR, 0); + /* Abort any transmit and receive operations and put DMA + * in idle state. + */ + abort_dma(pep); + /* Initialize address hash table */ + err = init_hash_table(pep); + if (err) + return err; + /* SDMA configuration */ + wrl(pep, SDMA_CONFIG, SDCR_BSZ8 | /* Burst size = 32 bytes */ + SDCR_RIFB | /* Rx interrupt on frame */ + SDCR_BLMT | /* Little endian transmit */ + SDCR_BLMR | /* Little endian receive */ + SDCR_RC_MAX_RETRANS); /* Max retransmit count */ + /* Port Configuration */ + wrl(pep, PORT_CONFIG, PCR_HS); /* Hash size is 1/2kb */ + set_port_config_ext(pep); + + return err; +} + +static int rxq_init(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct rx_desc *p_rx_desc; + int size = 0, i = 0; + int rx_desc_num = pep->rx_ring_size; + + /* Allocate RX skb rings */ + pep->rx_skb = kmalloc(sizeof(*pep->rx_skb) * pep->rx_ring_size, + GFP_KERNEL); + if (!pep->rx_skb) { + printk(KERN_ERR "%s: Cannot alloc RX skb ring\n", dev->name); + return -ENOMEM; + } + /* Allocate RX ring */ + pep->rx_desc_count = 0; + size = pep->rx_ring_size * sizeof(struct rx_desc); + pep->rx_desc_area_size = size; + pep->p_rx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size, + &pep->rx_desc_dma, GFP_KERNEL); + if (!pep->p_rx_desc_area) { + printk(KERN_ERR "%s: Cannot alloc RX ring (size %d bytes)\n", + dev->name, size); + goto out; + } + memset((void *)pep->p_rx_desc_area, 0, size); + /* initialize the next_desc_ptr links in the Rx descriptors ring */ + p_rx_desc = (struct rx_desc *)pep->p_rx_desc_area; + for (i = 0; i < rx_desc_num; i++) { + p_rx_desc[i].next_desc_ptr = pep->rx_desc_dma + + ((i + 1) % rx_desc_num) * sizeof(struct rx_desc); + } + /* Save Rx desc pointer to driver struct. */ + pep->rx_curr_desc_q = 0; + pep->rx_used_desc_q = 0; + pep->rx_desc_area_size = rx_desc_num * sizeof(struct rx_desc); + return 0; +out: + kfree(pep->rx_skb); + return -ENOMEM; +} + +static void rxq_deinit(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + int curr; + + /* Free preallocated skb's on RX rings */ + for (curr = 0; pep->rx_desc_count && curr < pep->rx_ring_size; curr++) { + if (pep->rx_skb[curr]) { + dev_kfree_skb(pep->rx_skb[curr]); + pep->rx_desc_count--; + } + } + if (pep->rx_desc_count) + printk(KERN_ERR + "Error in freeing Rx Ring. %d skb's still\n", + pep->rx_desc_count); + /* Free RX ring */ + if (pep->p_rx_desc_area) + dma_free_coherent(pep->dev->dev.parent, pep->rx_desc_area_size, + pep->p_rx_desc_area, pep->rx_desc_dma); + kfree(pep->rx_skb); +} + +static int txq_init(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct tx_desc *p_tx_desc; + int size = 0, i = 0; + int tx_desc_num = pep->tx_ring_size; + + pep->tx_skb = kmalloc(sizeof(*pep->tx_skb) * pep->tx_ring_size, + GFP_KERNEL); + if (!pep->tx_skb) { + printk(KERN_ERR "%s: Cannot alloc TX skb ring\n", dev->name); + return -ENOMEM; + } + /* Allocate TX ring */ + pep->tx_desc_count = 0; + size = pep->tx_ring_size * sizeof(struct tx_desc); + pep->tx_desc_area_size = size; + pep->p_tx_desc_area = dma_alloc_coherent(pep->dev->dev.parent, size, + &pep->tx_desc_dma, GFP_KERNEL); + if (!pep->p_tx_desc_area) { + printk(KERN_ERR "%s: Cannot allocate Tx Ring (size %d bytes)\n", + dev->name, size); + goto out; + } + memset((void *)pep->p_tx_desc_area, 0, pep->tx_desc_area_size); + /* Initialize the next_desc_ptr links in the Tx descriptors ring */ + p_tx_desc = (struct tx_desc *)pep->p_tx_desc_area; + for (i = 0; i < tx_desc_num; i++) { + p_tx_desc[i].next_desc_ptr = pep->tx_desc_dma + + ((i + 1) % tx_desc_num) * sizeof(struct tx_desc); + } + pep->tx_curr_desc_q = 0; + pep->tx_used_desc_q = 0; + pep->tx_desc_area_size = tx_desc_num * sizeof(struct tx_desc); + return 0; +out: + kfree(pep->tx_skb); + return -ENOMEM; +} + +static void txq_deinit(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + + /* Free outstanding skb's on TX ring */ + txq_reclaim(dev, 1); + BUG_ON(pep->tx_used_desc_q != pep->tx_curr_desc_q); + /* Free TX ring */ + if (pep->p_tx_desc_area) + dma_free_coherent(pep->dev->dev.parent, pep->tx_desc_area_size, + pep->p_tx_desc_area, pep->tx_desc_dma); + kfree(pep->tx_skb); +} + +static int pxa168_eth_open(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + int err; + + err = request_irq(dev->irq, pxa168_eth_int_handler, + IRQF_DISABLED, dev->name, dev); + if (err) { + dev_printk(KERN_ERR, &dev->dev, "can't assign irq\n"); + return -EAGAIN; + } + pep->rx_resource_err = 0; + err = rxq_init(dev); + if (err != 0) + goto out_free_irq; + err = txq_init(dev); + if (err != 0) + goto out_free_rx_skb; + pep->rx_used_desc_q = 0; + pep->rx_curr_desc_q = 0; + + /* Fill RX ring with skb's */ + rxq_refill(dev); + pep->rx_used_desc_q = 0; + pep->rx_curr_desc_q = 0; + netif_carrier_off(dev); + eth_port_start(dev); + napi_enable(&pep->napi); + return 0; +out_free_rx_skb: + rxq_deinit(dev); +out_free_irq: + free_irq(dev->irq, dev); + return err; +} + +static int pxa168_eth_stop(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + eth_port_reset(dev); + + /* Disable interrupts */ + wrl(pep, INT_MASK, 0); + wrl(pep, INT_CAUSE, 0); + /* Write to ICR to clear interrupts. */ + wrl(pep, INT_W_CLEAR, 0); + napi_disable(&pep->napi); + del_timer_sync(&pep->timeout); + netif_carrier_off(dev); + free_irq(dev->irq, dev); + rxq_deinit(dev); + txq_deinit(dev); + + return 0; +} + +static int pxa168_eth_change_mtu(struct net_device *dev, int mtu) +{ + int retval; + struct pxa168_eth_private *pep = netdev_priv(dev); + + if ((mtu > 9500) || (mtu < 68)) + return -EINVAL; + + dev->mtu = mtu; + retval = set_port_config_ext(pep); + + if (!netif_running(dev)) + return 0; + + /* + * Stop and then re-open the interface. This will allocate RX + * skbs of the new MTU. + * There is a possible danger that the open will not succeed, + * due to memory being full. + */ + pxa168_eth_stop(dev); + if (pxa168_eth_open(dev)) { + dev_printk(KERN_ERR, &dev->dev, + "fatal error on re-opening device after " + "MTU change\n"); + } + + return 0; +} + +static int eth_alloc_tx_desc_index(struct pxa168_eth_private *pep) +{ + int tx_desc_curr; + + tx_desc_curr = pep->tx_curr_desc_q; + pep->tx_curr_desc_q = (tx_desc_curr + 1) % pep->tx_ring_size; + BUG_ON(pep->tx_curr_desc_q == pep->tx_used_desc_q); + pep->tx_desc_count++; + + return tx_desc_curr; +} + +static int pxa168_rx_poll(struct napi_struct *napi, int budget) +{ + struct pxa168_eth_private *pep = + container_of(napi, struct pxa168_eth_private, napi); + struct net_device *dev = pep->dev; + int work_done = 0; + + if (unlikely(pep->work_todo & WORK_LINK)) { + pep->work_todo &= ~(WORK_LINK); + handle_link_event(pep); + } + /* + * We call txq_reclaim every time since in NAPI interupts are disabled + * and due to this we miss the TX_DONE interrupt,which is not updated in + * interrupt status register. + */ + txq_reclaim(dev, 0); + if (netif_queue_stopped(dev) + && pep->tx_ring_size - pep->tx_desc_count > 1) { + netif_wake_queue(dev); + } + work_done = rxq_process(dev, budget); + if (work_done < budget) { + napi_complete(napi); + wrl(pep, INT_MASK, ALL_INTS); + } + + return work_done; +} + +static int pxa168_eth_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; + struct tx_desc *desc; + int tx_index; + int length; + + tx_index = eth_alloc_tx_desc_index(pep); + desc = &pep->p_tx_desc_area[tx_index]; + length = skb->len; + pep->tx_skb[tx_index] = skb; + desc->byte_cnt = length; + desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE); + wmb(); + desc->cmd_sts = BUF_OWNED_BY_DMA | TX_GEN_CRC | TX_FIRST_DESC | + TX_ZERO_PADDING | TX_LAST_DESC | TX_EN_INT; + wmb(); + wrl(pep, SDMA_CMD, SDMA_CMD_TXDH | SDMA_CMD_ERD); + + stats->tx_bytes += skb->len; + stats->tx_packets++; + dev->trans_start = jiffies; + if (pep->tx_ring_size - pep->tx_desc_count <= 1) { + /* We handled the current skb, but now we are out of space.*/ + netif_stop_queue(dev); + } + + return NETDEV_TX_OK; +} + +static int smi_wait_ready(struct pxa168_eth_private *pep) +{ + int i = 0; + + /* wait for the SMI register to become available */ + for (i = 0; rdl(pep, SMI) & SMI_BUSY; i++) { + if (i == PHY_WAIT_ITERATIONS) + return -ETIMEDOUT; + msleep(10); + } + + return 0; +} + +static int pxa168_smi_read(struct mii_bus *bus, int phy_addr, int regnum) +{ + struct pxa168_eth_private *pep = bus->priv; + int i = 0; + int val; + + if (smi_wait_ready(pep)) { + printk(KERN_WARNING "pxa168_eth: SMI bus busy timeout\n"); + return -ETIMEDOUT; + } + wrl(pep, SMI, (phy_addr << 16) | (regnum << 21) | SMI_OP_R); + /* now wait for the data to be valid */ + for (i = 0; !((val = rdl(pep, SMI)) & SMI_R_VALID); i++) { + if (i == PHY_WAIT_ITERATIONS) { + printk(KERN_WARNING + "pxa168_eth: SMI bus read not valid\n"); + return -ENODEV; + } + msleep(10); + } + + return val & 0xffff; +} + +static int pxa168_smi_write(struct mii_bus *bus, int phy_addr, int regnum, + u16 value) +{ + struct pxa168_eth_private *pep = bus->priv; + + if (smi_wait_ready(pep)) { + printk(KERN_WARNING "pxa168_eth: SMI bus busy timeout\n"); + return -ETIMEDOUT; + } + + wrl(pep, SMI, (phy_addr << 16) | (regnum << 21) | + SMI_OP_W | (value & 0xffff)); + + if (smi_wait_ready(pep)) { + printk(KERN_ERR "pxa168_eth: SMI bus busy timeout\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int pxa168_eth_do_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + if (pep->phy != NULL) + return phy_mii_ioctl(pep->phy, if_mii(ifr), cmd); + + return -EOPNOTSUPP; +} + +static struct phy_device *phy_scan(struct pxa168_eth_private *pep, int phy_addr) +{ + struct mii_bus *bus = pep->smi_bus; + struct phy_device *phydev; + int start; + int num; + int i; + + if (phy_addr == PXA168_ETH_PHY_ADDR_DEFAULT) { + /* Scan entire range */ + start = ethernet_phy_get(pep); + num = 32; + } else { + /* Use phy addr specific to platform */ + start = phy_addr & 0x1f; + num = 1; + } + phydev = NULL; + for (i = 0; i < num; i++) { + int addr = (start + i) & 0x1f; + if (bus->phy_map[addr] == NULL) + mdiobus_scan(bus, addr); + + if (phydev == NULL) { + phydev = bus->phy_map[addr]; + if (phydev != NULL) + ethernet_phy_set_addr(pep, addr); + } + } + + return phydev; +} + +static void phy_init(struct pxa168_eth_private *pep, int speed, int duplex) +{ + struct phy_device *phy = pep->phy; + ethernet_phy_reset(pep); + + phy_attach(pep->dev, dev_name(&phy->dev), 0, PHY_INTERFACE_MODE_MII); + + if (speed == 0) { + phy->autoneg = AUTONEG_ENABLE; + phy->speed = 0; + phy->duplex = 0; + phy->supported &= PHY_BASIC_FEATURES; + phy->advertising = phy->supported | ADVERTISED_Autoneg; + } else { + phy->autoneg = AUTONEG_DISABLE; + phy->advertising = 0; + phy->speed = speed; + phy->duplex = duplex; + } + phy_start_aneg(phy); +} + +static int ethernet_phy_setup(struct net_device *dev) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + + if (pep->pd != NULL) { + if (pep->pd->init) + pep->pd->init(); + } + pep->phy = phy_scan(pep, pep->pd->phy_addr & 0x1f); + if (pep->phy != NULL) + phy_init(pep, pep->pd->speed, pep->pd->duplex); + update_hash_table_mac_address(pep, NULL, dev->dev_addr); + + return 0; +} + +static int pxa168_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + int err; + + err = phy_read_status(pep->phy); + if (err == 0) + err = phy_ethtool_gset(pep->phy, cmd); + + return err; +} + +static int pxa168_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct pxa168_eth_private *pep = netdev_priv(dev); + + return phy_ethtool_sset(pep->phy, cmd); +} + +static void pxa168_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strncpy(info->driver, DRIVER_NAME, 32); + strncpy(info->version, DRIVER_VERSION, 32); + strncpy(info->fw_version, "N/A", 32); + strncpy(info->bus_info, "N/A", 32); +} + +static u32 pxa168_get_link(struct net_device *dev) +{ + return !!netif_carrier_ok(dev); +} + +static const struct ethtool_ops pxa168_ethtool_ops = { + .get_settings = pxa168_get_settings, + .set_settings = pxa168_set_settings, + .get_drvinfo = pxa168_get_drvinfo, + .get_link = pxa168_get_link, +}; + +static const struct net_device_ops pxa168_eth_netdev_ops = { + .ndo_open = pxa168_eth_open, + .ndo_stop = pxa168_eth_stop, + .ndo_start_xmit = pxa168_eth_start_xmit, + .ndo_set_rx_mode = pxa168_eth_set_rx_mode, + .ndo_set_mac_address = pxa168_eth_set_mac_address, + .ndo_validate_addr = eth_validate_addr, + .ndo_do_ioctl = pxa168_eth_do_ioctl, + .ndo_change_mtu = pxa168_eth_change_mtu, + .ndo_tx_timeout = pxa168_eth_tx_timeout, +}; + +static int pxa168_eth_probe(struct platform_device *pdev) +{ + struct pxa168_eth_private *pep = NULL; + struct net_device *dev = NULL; + struct resource *res; + struct clk *clk; + int err; + + printk(KERN_NOTICE "PXA168 10/100 Ethernet Driver\n"); + + clk = clk_get(&pdev->dev, "MFUCLK"); + if (IS_ERR(clk)) { + printk(KERN_ERR "%s: Fast Ethernet failed to get clock\n", + DRIVER_NAME); + return -ENODEV; + } + clk_enable(clk); + + dev = alloc_etherdev(sizeof(struct pxa168_eth_private)); + if (!dev) { + err = -ENOMEM; + goto out; + } + + platform_set_drvdata(pdev, dev); + pep = netdev_priv(dev); + pep->dev = dev; + pep->clk = clk; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res == NULL) { + err = -ENODEV; + goto out; + } + pep->base = ioremap(res->start, res->end - res->start + 1); + if (pep->base == NULL) { + err = -ENOMEM; + goto out; + } + res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + BUG_ON(!res); + dev->irq = res->start; + dev->netdev_ops = &pxa168_eth_netdev_ops; + dev->watchdog_timeo = 2 * HZ; + dev->base_addr = 0; + SET_ETHTOOL_OPS(dev, &pxa168_ethtool_ops); + + INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task); + + printk(KERN_INFO "%s:Using random mac address\n", DRIVER_NAME); + random_ether_addr(dev->dev_addr); + + pep->pd = pdev->dev.platform_data; + pep->rx_ring_size = NUM_RX_DESCS; + if (pep->pd->rx_queue_size) + pep->rx_ring_size = pep->pd->rx_queue_size; + + pep->tx_ring_size = NUM_TX_DESCS; + if (pep->pd->tx_queue_size) + pep->tx_ring_size = pep->pd->tx_queue_size; + + pep->port_num = pep->pd->port_number; + /* Hardware supports only 3 ports */ + BUG_ON(pep->port_num > 2); + netif_napi_add(dev, &pep->napi, pxa168_rx_poll, pep->rx_ring_size); + + memset(&pep->timeout, 0, sizeof(struct timer_list)); + init_timer(&pep->timeout); + pep->timeout.function = rxq_refill_timer_wrapper; + pep->timeout.data = (unsigned long)pep; + + pep->smi_bus = mdiobus_alloc(); + if (pep->smi_bus == NULL) { + err = -ENOMEM; + goto out; + } + pep->smi_bus->priv = pep; + pep->smi_bus->name = "pxa168_eth smi"; + pep->smi_bus->read = pxa168_smi_read; + pep->smi_bus->write = pxa168_smi_write; + snprintf(pep->smi_bus->id, MII_BUS_ID_SIZE, "%d", pdev->id); + pep->smi_bus->parent = &pdev->dev; + pep->smi_bus->phy_mask = 0xffffffff; + if (mdiobus_register(pep->smi_bus) < 0) { + err = -ENOMEM; + goto out; + } + pxa168_init_hw(pep); + err = ethernet_phy_setup(dev); + if (err) + goto out; + SET_NETDEV_DEV(dev, &pdev->dev); + err = register_netdev(dev); + if (err) + goto out; + return 0; +out: + if (pep->clk) { + clk_disable(pep->clk); + clk_put(pep->clk); + pep->clk = NULL; + } + if (pep->base) { + iounmap(pep->base); + pep->base = NULL; + } + if (dev) + free_netdev(dev); + return err; +} + +static int pxa168_eth_remove(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct pxa168_eth_private *pep = netdev_priv(dev); + + if (pep->htpr) { + dma_free_coherent(pep->dev->dev.parent, HASH_ADDR_TABLE_SIZE, + pep->htpr, pep->htpr_dma); + pep->htpr = NULL; + } + if (pep->clk) { + clk_disable(pep->clk); + clk_put(pep->clk); + pep->clk = NULL; + } + if (pep->phy != NULL) + phy_detach(pep->phy); + + iounmap(pep->base); + pep->base = NULL; + unregister_netdev(dev); + flush_scheduled_work(); + free_netdev(dev); + platform_set_drvdata(pdev, NULL); + return 0; +} + +static void pxa168_eth_shutdown(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + eth_port_reset(dev); +} + +#ifdef CONFIG_PM +static int pxa168_eth_resume(struct platform_device *pdev) +{ + return -ENOSYS; +} + +static int pxa168_eth_suspend(struct platform_device *pdev, pm_message_t state) +{ + return -ENOSYS; +} + +#else +#define pxa168_eth_resume NULL +#define pxa168_eth_suspend NULL +#endif + +static struct platform_driver pxa168_eth_driver = { + .probe = pxa168_eth_probe, + .remove = pxa168_eth_remove, + .shutdown = pxa168_eth_shutdown, + .resume = pxa168_eth_resume, + .suspend = pxa168_eth_suspend, + .driver = { + .name = DRIVER_NAME, + }, +}; + +static int __init pxa168_init_module(void) +{ + return platform_driver_register(&pxa168_eth_driver); +} + +static void __exit pxa168_cleanup_module(void) +{ + platform_driver_unregister(&pxa168_eth_driver); +} + +module_init(pxa168_init_module); +module_exit(pxa168_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ethernet driver for Marvell PXA168"); +MODULE_ALIAS("platform:pxa168_eth"); diff --git a/include/linux/pxa168_eth.h b/include/linux/pxa168_eth.h new file mode 100644 index 000000000000..18d75e795606 --- /dev/null +++ b/include/linux/pxa168_eth.h @@ -0,0 +1,30 @@ +/* + *pxa168 ethernet platform device data definition file. + */ +#ifndef __LINUX_PXA168_ETH_H +#define __LINUX_PXA168_ETH_H + +struct pxa168_eth_platform_data { + int port_number; + int phy_addr; + + /* + * If speed is 0, then speed and duplex are autonegotiated. + */ + int speed; /* 0, SPEED_10, SPEED_100 */ + int duplex; /* DUPLEX_HALF or DUPLEX_FULL */ + + /* + * Override default RX/TX queue sizes if nonzero. + */ + int rx_queue_size; + int tx_queue_size; + + /* + * init callback is used for board specific initialization + * e.g on Aspenite its used to initialize the PHY transceiver. + */ + int (*init)(void); +}; + +#endif /* __LINUX_PXA168_ETH_H */ -- cgit v1.2.3 From e243f5b6de35b6fc394bc2e1e1737afe538e7e0c Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 15 Aug 2010 10:03:57 +0000 Subject: netfilter: fix userspace header warning "make headers_check" issued the following warning: CHECK include/linux/netfilter (64 files) usr/include/linux/netfilter/xt_ipvs.h:19: found __[us]{8,16,32,64} type without #include Fix this by as suggested including linux/types.h. Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- include/linux/netfilter/xt_ipvs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/xt_ipvs.h b/include/linux/netfilter/xt_ipvs.h index 1167aeb7a347..eff34ac18808 100644 --- a/include/linux/netfilter/xt_ipvs.h +++ b/include/linux/netfilter/xt_ipvs.h @@ -1,6 +1,8 @@ #ifndef _XT_IPVS_H #define _XT_IPVS_H +#include + enum { XT_IPVS_IPVS_PROPERTY = 1 << 0, /* all other options imply this one */ XT_IPVS_PROTO = 1 << 1, -- cgit v1.2.3 From 2244d07bfa2097cb00600da91c715a8aa547917e Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 17 Aug 2010 08:59:14 +0000 Subject: net: simplify flags for tx timestamping This patch removes the abstraction introduced by the union skb_shared_tx in the shared skb data. The access of the different union elements at several places led to some confusion about accessing the shared tx_flags e.g. in skb_orphan_try(). http://marc.info/?l=linux-netdev&m=128084897415886&w=2 Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- Documentation/networking/timestamping.txt | 22 +++++++++------- drivers/net/bfin_mac.c | 10 +++---- drivers/net/gianfar.c | 15 +++++------ drivers/net/igb/igb.h | 2 +- drivers/net/igb/igb_main.c | 11 ++++---- include/linux/skbuff.h | 44 +++++++++++-------------------- include/net/ip.h | 2 +- include/net/sock.h | 8 ++---- net/can/raw.c | 4 +-- net/core/dev.c | 6 ++--- net/core/skbuff.c | 2 +- net/ipv4/icmp.c | 4 +-- net/ipv4/ip_output.c | 6 ++--- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 4 +-- net/packet/af_packet.c | 4 +-- net/socket.c | 9 +++---- 17 files changed, 68 insertions(+), 87 deletions(-) (limited to 'include') diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index e8c8f4f06c67..98097d8cb910 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -172,15 +172,19 @@ struct skb_shared_hwtstamps { }; Time stamps for outgoing packets are to be generated as follows: -- In hard_start_xmit(), check if skb_tx(skb)->hardware is set no-zero. - If yes, then the driver is expected to do hardware time stamping. +- In hard_start_xmit(), check if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) + is set no-zero. If yes, then the driver is expected to do hardware time + stamping. - If this is possible for the skb and requested, then declare - that the driver is doing the time stamping by setting the field - skb_tx(skb)->in_progress non-zero. You might want to keep a pointer - to the associated skb for the next step and not free the skb. A driver - not supporting hardware time stamping doesn't do that. A driver must - never touch sk_buff::tstamp! It is used to store software generated - time stamps by the network subsystem. + that the driver is doing the time stamping by setting the flag + SKBTX_IN_PROGRESS in skb_shinfo(skb)->tx_flags , e.g. with + + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + + You might want to keep a pointer to the associated skb for the next step + and not free the skb. A driver not supporting hardware time stamping doesn't + do that. A driver must never touch sk_buff::tstamp! It is used to store + software generated time stamps by the network subsystem. - As soon as the driver has sent the packet and/or obtained a hardware time stamp for it, it passes the time stamp back by calling skb_hwtstamp_tx() with the original skb, the raw @@ -191,6 +195,6 @@ Time stamps for outgoing packets are to be generated as follows: this would occur at a later time in the processing pipeline than other software time stamping and therefore could lead to unexpected deltas between time stamps. -- If the driver did not call set skb_tx(skb)->in_progress, then +- If the driver did not set the SKBTX_IN_PROGRESS flag (see above), then dev_hard_start_xmit() checks whether software time stamping is wanted as fallback and potentially generates the time stamp. diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c index 012613fde3f4..7a0e4156fade 100644 --- a/drivers/net/bfin_mac.c +++ b/drivers/net/bfin_mac.c @@ -803,15 +803,14 @@ static void bfin_dump_hwtamp(char *s, ktime_t *hw, ktime_t *ts, struct timecompa static void bfin_tx_hwtstamp(struct net_device *netdev, struct sk_buff *skb) { struct bfin_mac_local *lp = netdev_priv(netdev); - union skb_shared_tx *shtx = skb_tx(skb); - if (shtx->hardware) { + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { int timeout_cnt = MAX_TIMEOUT_CNT; /* When doing time stamping, keep the connection to the socket * a while longer */ - shtx->in_progress = 1; + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; /* * The timestamping is done at the EMAC module's MII/RMII interface @@ -991,7 +990,6 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb, struct bfin_mac_local *lp = netdev_priv(dev); u16 *data; u32 data_align = (unsigned long)(skb->data) & 0x3; - union skb_shared_tx *shtx = skb_tx(skb); current_tx_ptr->skb = skb; @@ -1005,7 +1003,7 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb, * of this field are the length of the packet payload in bytes and the higher * 4 bits are the timestamping enable field. */ - if (shtx->hardware) + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) *data |= 0x1000; current_tx_ptr->desc_a.start_addr = (u32)data; @@ -1015,7 +1013,7 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb, } else { *((u16 *)(current_tx_ptr->packet)) = (u16)(skb->len); /* enable timestamping for the sent packet */ - if (shtx->hardware) + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) *((u16 *)(current_tx_ptr->packet)) |= 0x1000; memcpy((u8 *)(current_tx_ptr->packet + 2), skb->data, skb->len); diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 3d9f958ebd2c..e6048d6ab0ea 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -2048,7 +2048,6 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) u32 bufaddr; unsigned long flags; unsigned int nr_frags, nr_txbds, length; - union skb_shared_tx *shtx; /* * TOE=1 frames larger than 2500 bytes may see excess delays @@ -2069,10 +2068,10 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) txq = netdev_get_tx_queue(dev, rq); base = tx_queue->tx_bd_base; regs = tx_queue->grp->regs; - shtx = skb_tx(skb); /* check if time stamp should be generated */ - if (unlikely(shtx->hardware && priv->hwts_tx_en)) + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + priv->hwts_tx_en)) do_tstamp = 1; /* make space for additional header when fcb is needed */ @@ -2174,7 +2173,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Setup tx hardware time stamping if requested */ if (unlikely(do_tstamp)) { - shtx->in_progress = 1; + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; if (fcb == NULL) fcb = gfar_add_fcb(skb); fcb->ptp = 1; @@ -2446,7 +2445,6 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) int howmany = 0; u32 lstatus; size_t buflen; - union skb_shared_tx *shtx; rx_queue = priv->rx_queue[tx_queue->qindex]; bdp = tx_queue->dirty_tx; @@ -2461,8 +2459,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) * When time stamping, one additional TxBD must be freed. * Also, we need to dma_unmap_single() the TxPAL. */ - shtx = skb_tx(skb); - if (unlikely(shtx->in_progress)) + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) nr_txbds = frags + 2; else nr_txbds = frags + 1; @@ -2476,7 +2473,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) (lstatus & BD_LENGTH_MASK)) break; - if (unlikely(shtx->in_progress)) { + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { next = next_txbd(bdp, base, tx_ring_size); buflen = next->length + GMAC_FCB_LEN; } else @@ -2485,7 +2482,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) dma_unmap_single(&priv->ofdev->dev, bdp->bufPtr, buflen, DMA_TO_DEVICE); - if (unlikely(shtx->in_progress)) { + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { struct skb_shared_hwtstamps shhwtstamps; u64 *ns = (u64*) (((u32)skb->data + 0x10) & ~0x7); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h index 6e63d9a7fc75..44e0ff1494e0 100644 --- a/drivers/net/igb/igb.h +++ b/drivers/net/igb/igb.h @@ -143,7 +143,7 @@ struct igb_buffer { u16 next_to_watch; unsigned int bytecount; u16 gso_segs; - union skb_shared_tx shtx; + u8 tx_flags; u8 mapped_as_page; }; /* RX */ diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 9b4e5895f5f9..985e37cf17b6 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -3954,7 +3954,7 @@ static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb, } tx_ring->buffer_info[i].skb = skb; - tx_ring->buffer_info[i].shtx = skb_shinfo(skb)->tx_flags; + tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags; /* multiply data chunks by size of headers */ tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len; tx_ring->buffer_info[i].gso_segs = gso_segs; @@ -4088,7 +4088,6 @@ netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb, u32 tx_flags = 0; u16 first; u8 hdr_len = 0; - union skb_shared_tx *shtx = skb_tx(skb); /* need: 1 descriptor per page, * + 2 desc gap to keep tail from touching head, @@ -4100,8 +4099,8 @@ netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb, return NETDEV_TX_BUSY; } - if (unlikely(shtx->hardware)) { - shtx->in_progress = 1; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGB_TX_FLAGS_TSTAMP; } @@ -5319,7 +5318,7 @@ static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *bu u64 regval; /* if skb does not support hw timestamp or TX stamp not valid exit */ - if (likely(!buffer_info->shtx.hardware) || + if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) || !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID)) return; @@ -5500,7 +5499,7 @@ static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr, * values must belong to this one here and therefore we don't need to * compare any of the additional attributes stored for it. * - * If nothing went wrong, then it should have a skb_shared_tx that we + * If nothing went wrong, then it should have a shared tx_flags that we * can turn into a skb_shared_hwtstamps. */ if (staterr & E1000_RXDADV_STAT_TSIP) { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d8050382b189..f067c95cf18a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -163,26 +163,19 @@ struct skb_shared_hwtstamps { ktime_t syststamp; }; -/** - * struct skb_shared_tx - instructions for time stamping of outgoing packets - * @hardware: generate hardware time stamp - * @software: generate software time stamp - * @in_progress: device driver is going to provide - * hardware time stamp - * @prevent_sk_orphan: make sk reference available on driver level - * @flags: all shared_tx flags - * - * These flags are attached to packets as part of the - * &skb_shared_info. Use skb_tx() to get a pointer. - */ -union skb_shared_tx { - struct { - __u8 hardware:1, - software:1, - in_progress:1, - prevent_sk_orphan:1; - }; - __u8 flags; +/* Definitions for tx_flags in struct skb_shared_info */ +enum { + /* generate hardware time stamp */ + SKBTX_HW_TSTAMP = 1 << 0, + + /* generate software time stamp */ + SKBTX_SW_TSTAMP = 1 << 1, + + /* device driver is going to provide hardware time stamp */ + SKBTX_IN_PROGRESS = 1 << 2, + + /* ensure the originating sk reference is available on driver level */ + SKBTX_DRV_NEEDS_SK_REF = 1 << 3, }; /* This data is invariant across clones and lives at @@ -195,7 +188,7 @@ struct skb_shared_info { unsigned short gso_segs; unsigned short gso_type; __be32 ip6_frag_id; - union skb_shared_tx tx_flags; + __u8 tx_flags; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; @@ -587,11 +580,6 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) return &skb_shinfo(skb)->hwtstamps; } -static inline union skb_shared_tx *skb_tx(struct sk_buff *skb) -{ - return &skb_shinfo(skb)->tx_flags; -} - /** * skb_queue_empty - check if a queue is empty * @list: queue head @@ -1996,8 +1984,8 @@ extern void skb_tstamp_tx(struct sk_buff *orig_skb, static inline void sw_tx_timestamp(struct sk_buff *skb) { - union skb_shared_tx *shtx = skb_tx(skb); - if (shtx->software && !shtx->in_progress) + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP && + !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) skb_tstamp_tx(skb, NULL); } diff --git a/include/net/ip.h b/include/net/ip.h index 890f9725d681..7691aca133db 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -53,7 +53,7 @@ struct ipcm_cookie { __be32 addr; int oif; struct ip_options *opt; - union skb_shared_tx shtx; + __u8 tx_flags; }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) diff --git a/include/net/sock.h b/include/net/sock.h index ac53bfbdfe16..100e43bf95fb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1669,17 +1669,13 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped - * @msg: outgoing packet * @sk: socket sending this packet - * @shtx: filled with instructions for time stamping + * @tx_flags: filled with instructions for time stamping * * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if * parameters are invalid. */ -extern int sock_tx_timestamp(struct msghdr *msg, - struct sock *sk, - union skb_shared_tx *shtx); - +extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed diff --git a/net/can/raw.c b/net/can/raw.c index a10e3338f084..7d77e67e57af 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -647,12 +647,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto free_skb; - err = sock_tx_timestamp(msg, sk, skb_tx(skb)); + err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (err < 0) goto free_skb; /* to be able to check the received tx sock reference in raw_rcv() */ - skb_tx(skb)->prevent_sk_orphan = 1; + skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF; skb->dev = dev; skb->sk = sk; diff --git a/net/core/dev.c b/net/core/dev.c index 586a11cb4398..c1dc8a95f6ff 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1902,14 +1902,14 @@ static int dev_gso_segment(struct sk_buff *skb) /* * Try to orphan skb early, right before transmission by the device. - * We cannot orphan skb if tx timestamp is requested, since - * drivers need to call skb_tstamp_tx() to send the timestamp. + * We cannot orphan skb if tx timestamp is requested or the sk-reference + * is needed on driver level for other reasons, e.g. see net/can/raw.c */ static inline void skb_orphan_try(struct sk_buff *skb) { struct sock *sk = skb->sk; - if (sk && !skb_tx(skb)->flags) { + if (sk && !skb_shinfo(skb)->tx_flags) { /* skb_tx_hash() wont be able to get sk. * We copy sk_hash into skb->rxhash */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3a2513f0d0c3..99ef721f773d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3016,7 +3016,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, } else { /* * no hardware time stamps available, - * so keep the skb_shared_tx and only + * so keep the shared tx_flags and only * store software time stamp */ skb->tstamp = ktime_get_real(); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a0d847c7cba5..96bc7f9475a3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -379,7 +379,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) inet->tos = ip_hdr(skb)->tos; daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; if (icmp_param->replyopts.optlen) { ipc.opt = &icmp_param->replyopts; if (ipc.opt->srr) @@ -538,7 +538,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; { struct flowi fl = { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04b69896df5f..e807492f1777 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -953,7 +953,7 @@ alloc_new_skb: else /* only the initial fragment is time stamped */ - ipc->shtx.flags = 0; + ipc->tx_flags = 0; } if (skb == NULL) goto error; @@ -964,7 +964,7 @@ alloc_new_skb: skb->ip_summed = csummode; skb->csum = 0; skb_reserve(skb, hh_len); - *skb_tx(skb) = ipc->shtx; + skb_shinfo(skb)->tx_flags = ipc->tx_flags; /* * Find where to start putting bytes. @@ -1384,7 +1384,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; if (replyopts.opt.optlen) { ipc.opt = &replyopts.opt; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 009a7b2aa1ef..1f85ef289895 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -505,7 +505,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.opt = NULL; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32e0bef60d0a..86e757e162ee 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -797,7 +797,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EOPNOTSUPP; ipc.opt = NULL; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; if (up->pending) { /* @@ -845,7 +845,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; - err = sock_tx_timestamp(msg, sk, &ipc.shtx); + err = sock_tx_timestamp(sk, &ipc.tx_flags); if (err) return err; if (msg->msg_controllen) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9a17f28b1253..3616f27b9d46 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -488,7 +488,7 @@ retry: skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - err = sock_tx_timestamp(msg, sk, skb_tx(skb)); + err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (err < 0) goto out_unlock; @@ -1209,7 +1209,7 @@ static int packet_snd(struct socket *sock, err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); if (err) goto out_free; - err = sock_tx_timestamp(msg, sk, skb_tx(skb)); + err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (err < 0) goto out_free; diff --git a/net/socket.c b/net/socket.c index 2270b941bcc7..7848d12f5e4d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -535,14 +535,13 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, - union skb_shared_tx *shtx) +int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) { - shtx->flags = 0; + *tx_flags = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) - shtx->hardware = 1; + *tx_flags |= SKBTX_HW_TSTAMP; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) - shtx->software = 1; + *tx_flags |= SKBTX_SW_TSTAMP; return 0; } EXPORT_SYMBOL(sock_tx_timestamp); -- cgit v1.2.3 From ede1b4290781ae82ccf0f2ecc6dada8d3dd35779 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 18 Aug 2010 15:33:13 -0700 Subject: tracing: Fix timer tracing PowerTOP would like to be able to trace timers. Unfortunately, the current timer tracing is not very useful: the actual timer function is not recorded in the trace at the start of timer execution. Although this is recorded for timer "start" time (when it gets armed), this is not useful; most timers get started early, and a tracer like PowerTOP will never see this event, but will only see the actual running of the timer. This patch just adds the function to the timer tracing; I've verified with PowerTOP that now it can get useful information about timers. Signed-off-by: Arjan van de Ven Cc: xiaoguangrong@cn.fujitsu.com Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: # .35.x, .34.x, .33.x LKML-Reference: <4C6C5FA9.3000405@linux.intel.com> Signed-off-by: Ingo Molnar --- include/trace/events/timer.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index c624126a9c8a..425bcfe56c62 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -81,14 +81,16 @@ TRACE_EVENT(timer_expire_entry, TP_STRUCT__entry( __field( void *, timer ) __field( unsigned long, now ) + __field( void *, function) ), TP_fast_assign( __entry->timer = timer; __entry->now = jiffies; + __entry->function = timer->function; ), - TP_printk("timer=%p now=%lu", __entry->timer, __entry->now) + TP_printk("timer=%p function=%pf now=%lu", __entry->timer, __entry->function,__entry->now) ); /** @@ -200,14 +202,16 @@ TRACE_EVENT(hrtimer_expire_entry, TP_STRUCT__entry( __field( void *, hrtimer ) __field( s64, now ) + __field( void *, function) ), TP_fast_assign( __entry->hrtimer = hrtimer; __entry->now = now->tv64; + __entry->function = hrtimer->function; ), - TP_printk("hrtimer=%p now=%llu", __entry->hrtimer, + TP_printk("hrtimer=%p function=%pf now=%llu", __entry->hrtimer, __entry->function, (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) ); -- cgit v1.2.3 From e760702ed8333588f9f21e7bf6597873993006f1 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 17 Aug 2010 19:03:44 +0000 Subject: net: introduce proto_ports_offset() Introduce proto_ports_offset() for getting the position of the ports or SPI in the message of a protocol. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/in.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/in.h b/include/linux/in.h index 41d88a4689af..beeb6dee2b49 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -250,6 +250,25 @@ struct sockaddr_in { #ifdef __KERNEL__ +#include + +static inline int proto_ports_offset(int proto) +{ + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: /* SPI */ + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + return 0; + case IPPROTO_AH: /* SPI */ + return 4; + default: + return -EINVAL; + } +} + static inline bool ipv4_is_loopback(__be32 addr) { return (addr & htonl(0xff000000)) == htonl(0x7f000000); -- cgit v1.2.3 From d34a16661ed0fed433c9469d7cfa3ca4d30ca42e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 14 Jun 2010 17:06:21 -0700 Subject: net: convert to rcu_dereference_index_check() The task_cls_classid() function applies rcu_dereference() to integers, which does not work with the shiny new sparse-based checking in rcu_dereference(). This commit therefore moves to the new RCU API rcu_dereference_index_check(). Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: David S. Miller Acked-by: Herbert Xu --- include/net/cls_cgroup.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 726cc3536409..dd1fdb8293f5 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -45,7 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p) return 0; rcu_read_lock(); - id = rcu_dereference(net_cls_subsys_id); + id = rcu_dereference_index_check(net_cls_subsys_id, + rcu_read_lock_held()); if (id >= 0) classid = container_of(task_subsys_state(p, id), struct cgroup_cls_state, css)->classid; -- cgit v1.2.3 From ca5ecddfa8fcbd948c95530e7e817cee9fb43a3d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 28 Apr 2010 14:39:09 -0700 Subject: rcu: define __rcu address space modifier for sparse This commit provides definitions for the __rcu annotation defined earlier. This annotation permits sparse to check for correct use of RCU-protected pointers. If a pointer that is annotated with __rcu is accessed directly (as opposed to via rcu_dereference(), rcu_assign_pointer(), or one of their variants), sparse can be made to complain. To enable such complaints, use the new default-disabled CONFIG_SPARSE_RCU_POINTER kernel configuration option. Please note that these sparse complaints are intended to be a debugging aid, -not- a code-style-enforcement mechanism. There are special rcu_dereference_protected() and rcu_access_pointer() accessors for use when RCU read-side protection is not required, for example, when no other CPU has access to the data structure in question or while the current CPU hold the update-side lock. This patch also updates a number of docbook comments that were showing their age. Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Christopher Li Reviewed-by: Josh Triplett --- include/linux/compiler.h | 4 + include/linux/rcupdate.h | 352 ++++++++++++++++++++++++++++------------------- include/linux/srcu.h | 27 +++- kernel/rcupdate.c | 6 +- lib/Kconfig.debug | 13 ++ 5 files changed, 257 insertions(+), 145 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c1a62c56a660..320d6c94ff84 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -16,7 +16,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +#ifdef CONFIG_SPARSE_RCU_POINTER +# define __rcu __attribute__((noderef, address_space(4))) +#else # define __rcu +#endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); #else diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9fbc54a2585d..b973dea2d6b0 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -41,6 +41,7 @@ #include #include #include +#include #ifdef CONFIG_RCU_TORTURE_TEST extern int rcutorture_runnable; /* for sysctl */ @@ -120,14 +121,15 @@ extern struct lockdep_map rcu_sched_lock_map; extern int debug_lockdep_rcu_enabled(void); /** - * rcu_read_lock_held - might we be in RCU read-side critical section? + * rcu_read_lock_held() - might we be in RCU read-side critical section? * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, * this assumes we are in an RCU read-side critical section unless it can - * prove otherwise. + * prove otherwise. This is useful for debug checks in functions that + * require that they be called within an RCU read-side critical section. * - * Check debug_lockdep_rcu_enabled() to prevent false positives during boot + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. */ static inline int rcu_read_lock_held(void) @@ -144,14 +146,16 @@ static inline int rcu_read_lock_held(void) extern int rcu_read_lock_bh_held(void); /** - * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? + * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an * RCU-sched read-side critical section. In absence of * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side * critical section unless it can prove otherwise. Note that disabling * of preemption (including disabling irqs) counts as an RCU-sched - * read-side critical section. + * read-side critical section. This is useful for debug checks in functions + * that required that they be called within an RCU-sched read-side + * critical section. * * Check debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. @@ -220,41 +224,155 @@ extern int rcu_my_thread_group_empty(void); } \ } while (0) +#else /* #ifdef CONFIG_PROVE_RCU */ + +#define __do_rcu_dereference_check(c) do { } while (0) + +#endif /* #else #ifdef CONFIG_PROVE_RCU */ + +/* + * Helper functions for rcu_dereference_check(), rcu_dereference_protected() + * and rcu_assign_pointer(). Some of these could be folded into their + * callers, but they are left separate in order to ease introduction of + * multiple flavors of pointers to match the multiple flavors of RCU + * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in + * the future. + */ +#define __rcu_access_pointer(p, space) \ + ({ \ + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ + (void) (((typeof (*p) space *)p) == p); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ + }) +#define __rcu_dereference_check(p, c, space) \ + ({ \ + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ + __do_rcu_dereference_check(c); \ + (void) (((typeof (*p) space *)p) == p); \ + smp_read_barrier_depends(); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ + }) +#define __rcu_dereference_protected(p, c, space) \ + ({ \ + __do_rcu_dereference_check(c); \ + (void) (((typeof (*p) space *)p) == p); \ + ((typeof(*p) __force __kernel *)(p)); \ + }) + +#define __rcu_dereference_index_check(p, c) \ + ({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + __do_rcu_dereference_check(c); \ + smp_read_barrier_depends(); \ + (_________p1); \ + }) +#define __rcu_assign_pointer(p, v, space) \ + ({ \ + if (!__builtin_constant_p(v) || \ + ((v) != NULL)) \ + smp_wmb(); \ + (p) = (typeof(*v) __force space *)(v); \ + }) + + +/** + * rcu_access_pointer() - fetch RCU pointer with no dereferencing + * @p: The pointer to read + * + * Return the value of the specified RCU-protected pointer, but omit the + * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful + * when the value of this pointer is accessed, but the pointer is not + * dereferenced, for example, when testing an RCU-protected pointer against + * NULL. Although rcu_access_pointer() may also be used in cases where + * update-side locks prevent the value of the pointer from changing, you + * should instead use rcu_dereference_protected() for this use case. + */ +#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) + /** - * rcu_dereference_check - rcu_dereference with debug checking + * rcu_dereference_check() - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Do an rcu_dereference(), but check that the conditions under which the - * dereference will take place are correct. Typically the conditions indicate - * the various locking conditions that should be held at that point. The check - * should return true if the conditions are satisfied. + * dereference will take place are correct. Typically the conditions + * indicate the various locking conditions that should be held at that + * point. The check should return true if the conditions are satisfied. + * An implicit check for being in an RCU read-side critical section + * (rcu_read_lock()) is included. * * For example: * - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || - * lockdep_is_held(&foo->lock)); + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); * * could be used to indicate to lockdep that foo->bar may only be dereferenced - * if either the RCU read lock is held, or that the lock required to replace + * if either rcu_read_lock() is held, or that the lock required to replace * the bar struct at foo->bar is held. * * Note that the list of conditions may also include indications of when a lock * need not be held, for example during initialisation or destruction of the * target struct: * - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || - * lockdep_is_held(&foo->lock) || + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || * atomic_read(&foo->usage) == 0); + * + * Inserts memory barriers on architectures that require them + * (currently only the Alpha), prevents the compiler from refetching + * (and from merging fetches), and, more importantly, documents exactly + * which pointers are protected by RCU and checks that the pointer is + * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ - ({ \ - __do_rcu_dereference_check(c); \ - rcu_dereference_raw(p); \ - }) + __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) + +/** + * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-bh counterpart to rcu_dereference_check(). + */ +#define rcu_dereference_bh_check(p, c) \ + __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) /** - * rcu_dereference_protected - fetch RCU pointer when updates prevented + * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-sched counterpart to rcu_dereference_check(). + */ +#define rcu_dereference_sched_check(p, c) \ + __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ + __rcu) + +#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ + +/** + * rcu_dereference_index_check() - rcu_dereference for indices with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * Similar to rcu_dereference_check(), but omits the sparse checking. + * This allows rcu_dereference_index_check() to be used on integers, + * which can then be used as array indices. Attempting to use + * rcu_dereference_check() on an integer will give compiler warnings + * because the sparse address-space mechanism relies on dereferencing + * the RCU-protected pointer. Dereferencing integers is not something + * that even gcc will put up with. + * + * Note that this function does not implicitly check for RCU read-side + * critical sections. If this function gains lots of uses, it might + * make sense to provide versions for each flavor of RCU, but it does + * not make sense as of early 2010. + */ +#define rcu_dereference_index_check(p, c) \ + __rcu_dereference_index_check((p), (c)) + +/** + * rcu_dereference_protected() - fetch RCU pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place * * Return the value of the specified RCU-protected pointer, but omit * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This @@ -263,35 +381,61 @@ extern int rcu_my_thread_group_empty(void); * prevent the compiler from repeating this reference or combining it * with other references, so it should not be used without protection * of appropriate locks. + * + * This function is only for update-side use. Using this function + * when protected only by rcu_read_lock() will result in infrequent + * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ - ({ \ - __do_rcu_dereference_check(c); \ - (p); \ - }) + __rcu_dereference_protected((p), (c), __rcu) -#else /* #ifdef CONFIG_PROVE_RCU */ +/** + * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-bh counterpart to rcu_dereference_protected(). + */ +#define rcu_dereference_bh_protected(p, c) \ + __rcu_dereference_protected((p), (c), __rcu) -#define rcu_dereference_check(p, c) rcu_dereference_raw(p) -#define rcu_dereference_protected(p, c) (p) +/** + * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-sched counterpart to rcu_dereference_protected(). + */ +#define rcu_dereference_sched_protected(p, c) \ + __rcu_dereference_protected((p), (c), __rcu) -#endif /* #else #ifdef CONFIG_PROVE_RCU */ /** - * rcu_access_pointer - fetch RCU pointer with no dereferencing + * rcu_dereference() - fetch RCU-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing * - * Return the value of the specified RCU-protected pointer, but omit the - * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful - * when the value of this pointer is accessed, but the pointer is not - * dereferenced, for example, when testing an RCU-protected pointer against - * NULL. This may also be used in cases where update-side locks prevent - * the value of the pointer from changing, but rcu_dereference_protected() - * is a lighter-weight primitive for this use case. + * This is a simple wrapper around rcu_dereference_check(). */ -#define rcu_access_pointer(p) ACCESS_ONCE(p) +#define rcu_dereference(p) rcu_dereference_check(p, 0) /** - * rcu_read_lock - mark the beginning of an RCU read-side critical section. + * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) + +/** + * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) + +/** + * rcu_read_lock() - mark the beginning of an RCU read-side critical section * * When synchronize_rcu() is invoked on one CPU while other CPUs * are within RCU read-side critical sections, then the @@ -337,7 +481,7 @@ static inline void rcu_read_lock(void) */ /** - * rcu_read_unlock - marks the end of an RCU read-side critical section. + * rcu_read_unlock() - marks the end of an RCU read-side critical section. * * See rcu_read_lock() for more information. */ @@ -349,15 +493,16 @@ static inline void rcu_read_unlock(void) } /** - * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section + * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section * * This is equivalent of rcu_read_lock(), but to be used when updates - * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks - * consider completion of a softirq handler to be a quiescent state, - * a process in RCU read-side critical section must be protected by - * disabling softirqs. Read-side critical sections in interrupt context - * can use just rcu_read_lock(). - * + * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since + * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a + * softirq handler to be a quiescent state, a process in RCU read-side + * critical section must be protected by disabling softirqs. Read-side + * critical sections in interrupt context can use just rcu_read_lock(), + * though this should at least be commented to avoid confusing people + * reading the code. */ static inline void rcu_read_lock_bh(void) { @@ -379,13 +524,12 @@ static inline void rcu_read_unlock_bh(void) } /** - * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section + * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section * - * Should be used with either - * - synchronize_sched() - * or - * - call_rcu_sched() and rcu_barrier_sched() - * on the write-side to insure proper synchronization. + * This is equivalent of rcu_read_lock(), but to be used when updates + * are being done using call_rcu_sched() or synchronize_rcu_sched(). + * Read-side critical sections can also be introduced by anything that + * disables preemption, including local_irq_disable() and friends. */ static inline void rcu_read_lock_sched(void) { @@ -420,54 +564,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) preempt_enable_notrace(); } - /** - * rcu_dereference_raw - fetch an RCU-protected pointer + * rcu_assign_pointer() - assign to RCU-protected pointer + * @p: pointer to assign to + * @v: value to assign (publish) * - * The caller must be within some flavor of RCU read-side critical - * section, or must be otherwise preventing the pointer from changing, - * for example, by holding an appropriate lock. This pointer may later - * be safely dereferenced. It is the caller's responsibility to have - * done the right thing, as this primitive does no checking of any kind. - * - * Inserts memory barriers on architectures that require them - * (currently only the Alpha), and, more importantly, documents - * exactly which pointers are protected by RCU. - */ -#define rcu_dereference_raw(p) ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) - -/** - * rcu_dereference - fetch an RCU-protected pointer, checking for RCU - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference(p) \ - rcu_dereference_check(p, rcu_read_lock_held()) - -/** - * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference_bh(p) \ - rcu_dereference_check(p, rcu_read_lock_bh_held()) - -/** - * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference_sched(p) \ - rcu_dereference_check(p, rcu_read_lock_sched_held()) - -/** - * rcu_assign_pointer - assign (publicize) a pointer to a newly - * initialized structure that will be dereferenced by RCU read-side - * critical sections. Returns the value assigned. + * Assigns the specified value to the specified RCU-protected + * pointer, ensuring that any concurrent RCU readers will see + * any prior initialization. Returns the value assigned. * * Inserts memory barriers on architectures that require them * (pretty much all of them other than x86), and also prevents @@ -476,14 +580,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * call documents which pointers will be dereferenced by RCU read-side * code. */ - #define rcu_assign_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - smp_wmb(); \ - (p) = (v); \ - }) + __rcu_assign_pointer((p), (v), __rcu) + +/** + * RCU_INIT_POINTER() - initialize an RCU protected pointer + * + * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep + * splats. + */ +#define RCU_INIT_POINTER(p, v) \ + p = (typeof(*v) __force __rcu *)(v) /* Infrastructure to implement the synchronize_() primitives. */ @@ -495,7 +602,7 @@ struct rcu_synchronize { extern void wakeme_after_rcu(struct rcu_head *head); /** - * call_rcu - Queue an RCU callback for invocation after a grace period. + * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period * @@ -509,7 +616,7 @@ extern void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); /** - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period * @@ -566,37 +673,4 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -#ifndef CONFIG_PROVE_RCU -#define __do_rcu_dereference_check(c) do { } while (0) -#endif /* #ifdef CONFIG_PROVE_RCU */ - -#define __rcu_dereference_index_check(p, c) \ - ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - __do_rcu_dereference_check(c); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) - -/** - * rcu_dereference_index_check() - rcu_dereference for indices with debug checking - * @p: The pointer to read, prior to dereferencing - * @c: The conditions under which the dereference will take place - * - * Similar to rcu_dereference_check(), but omits the sparse checking. - * This allows rcu_dereference_index_check() to be used on integers, - * which can then be used as array indices. Attempting to use - * rcu_dereference_check() on an integer will give compiler warnings - * because the sparse address-space mechanism relies on dereferencing - * the RCU-protected pointer. Dereferencing integers is not something - * that even gcc will put up with. - * - * Note that this function does not implicitly check for RCU read-side - * critical sections. If this function gains lots of uses, it might - * make sense to provide versions for each flavor of RCU, but it does - * not make sense as of early 2010. - */ -#define rcu_dereference_index_check(p, c) \ - __rcu_dereference_index_check((p), (c)) - #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4d5d2f546dbf..6f456a720ff0 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -108,12 +108,31 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /** - * srcu_dereference - fetch SRCU-protected pointer with checking + * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. + * @c: condition to check for update-side use * - * Makes rcu_dereference_check() do the dirty work. + * If PROVE_RCU is enabled, invoking this outside of an RCU read-side + * critical section will result in an RCU-lockdep splat, unless @c evaluates + * to 1. The @c argument will normally be a logical expression containing + * lockdep_is_held() calls. */ -#define srcu_dereference(p, sp) \ - rcu_dereference_check(p, srcu_read_lock_held(sp)) +#define srcu_dereference_check(p, sp, c) \ + __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) + +/** + * srcu_dereference - fetch SRCU-protected pointer for later dereferencing + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. + * + * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU + * is enabled, invoking this outside of an RCU read-side critical + * section will result in an RCU-lockdep splat. + */ +#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0) /** * srcu_read_lock - register a new reader for an SRCU-protected structure. diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 4d169835fb36..6c79e851521c 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -73,12 +73,14 @@ int debug_lockdep_rcu_enabled(void) EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); /** - * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? + * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? * * Check for bottom half being disabled, which covers both the * CONFIG_PROVE_RCU and not cases. Note that if someone uses * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) - * will show the situation. + * will show the situation. This is useful for debug checks in functions + * that require that they be called within an RCU read-side critical + * section. * * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1b4afd2e6ca0..12465f2ef766 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -539,6 +539,19 @@ config PROVE_RCU_REPEATEDLY disabling, allowing multiple RCU-lockdep warnings to be printed on a single reboot. +config SPARSE_RCU_POINTER + bool "RCU debugging: sparse-based checks for pointer usage" + default n + help + This feature enables the __rcu sparse annotation for + RCU-protected pointers. This annotation will cause sparse + to flag any non-RCU used of annotated pointers. This can be + helpful when debugging RCU usage. Please note that this feature + is not intended to enforce code cleanliness; it is instead merely + a debugging aid. + + Say Y to make sparse flag questionable use of RCU-protected pointers + Say N if you are unsure. config LOCKDEP -- cgit v1.2.3 From 67bdbffd696f29a0b68aa8daa285783a06651583 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 25 Feb 2010 16:55:13 +0100 Subject: rculist: avoid __rcu annotations This avoids warnings from missing __rcu annotations in the rculist implementation, making it possible to use the same lists in both RCU and non-RCU cases. We can add rculist annotations later, together with lockdep support for rculist, which is missing as well, but that may involve changing all the users. Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Pavel Emelyanov Cc: Sukadev Bhattiprolu Reviewed-by: Josh Triplett --- include/linux/rculist.h | 53 +++++++++++++++++++++++++++---------------- include/linux/rculist_nulls.h | 16 +++++++++---- kernel/pid.c | 2 +- 3 files changed, 46 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4ec3b38ce9c5..c10b1050dbe6 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -9,6 +9,12 @@ #include #include +/* + * return the ->next pointer of a list_head in an rcu safe + * way, we must not access it directly + */ +#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) + /* * Insert a new entry between two known consecutive entries. * @@ -20,7 +26,7 @@ static inline void __list_add_rcu(struct list_head *new, { new->next = next; new->prev = prev; - rcu_assign_pointer(prev->next, new); + rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } @@ -138,7 +144,7 @@ static inline void list_replace_rcu(struct list_head *old, { new->next = old->next; new->prev = old->prev; - rcu_assign_pointer(new->prev->next, new); + rcu_assign_pointer(list_next_rcu(new->prev), new); new->next->prev = new; old->prev = LIST_POISON2; } @@ -193,7 +199,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ last->next = at; - rcu_assign_pointer(head->next, first); + rcu_assign_pointer(list_next_rcu(head), first); first->prev = head; at->prev = last; } @@ -208,7 +214,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ - container_of(rcu_dereference_raw(ptr), type, member) + ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \ + container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ + }) /** * list_first_entry_rcu - get the first element from a list @@ -225,9 +233,9 @@ static inline void list_splice_init_rcu(struct list_head *list, list_entry_rcu((ptr)->next, type, member) #define __list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference_raw((head)->next); \ + for (pos = rcu_dereference_raw(list_next_rcu(head)); \ pos != (head); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(list_next_rcu((pos))) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -257,9 +265,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = rcu_dereference_raw((pos)->next); \ + for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \ prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference_raw((pos)->next)) + (pos) = rcu_dereference_raw(list_next_rcu(pos))) /** * list_for_each_entry_continue_rcu - continue iteration over list of given type @@ -314,12 +322,19 @@ static inline void hlist_replace_rcu(struct hlist_node *old, new->next = next; new->pprev = old->pprev; - rcu_assign_pointer(*new->pprev, new); + rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); if (next) new->next->pprev = &new->next; old->pprev = LIST_POISON2; } +/* + * return the first or the next element in an RCU protected hlist + */ +#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first))) +#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next))) +#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) + /** * hlist_add_head_rcu * @n: the element to add to the hash list. @@ -346,7 +361,7 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, n->next = first; n->pprev = &h->first; - rcu_assign_pointer(h->first, n); + rcu_assign_pointer(hlist_first_rcu(h), n); if (first) first->pprev = &n->next; } @@ -374,7 +389,7 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, { n->pprev = next->pprev; n->next = next; - rcu_assign_pointer(*(n->pprev), n); + rcu_assign_pointer(hlist_pprev_rcu(n), n); next->pprev = &n->next; } @@ -401,15 +416,15 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, { n->next = prev->next; n->pprev = &prev->next; - rcu_assign_pointer(prev->next, n); + rcu_assign_pointer(hlist_next_rcu(prev), n); if (n->next) n->next->pprev = &n->next; } -#define __hlist_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ prefetch(pos->next); 1; }); \ - pos = rcu_dereference(pos->next)) +#define __hlist_for_each_rcu(pos, head) \ + for (pos = rcu_dereference(hlist_first_rcu(head)); \ + pos && ({ prefetch(pos->next); 1; }); \ + pos = rcu_dereference(hlist_next_rcu(pos))) /** * hlist_for_each_entry_rcu - iterate over rcu list of given type @@ -422,11 +437,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw((head)->first); \ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ pos && ({ prefetch(pos->next); 1; }) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_next_rcu(pos))) /** * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index b70ffe53cb9f..2ae13714828b 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -37,6 +37,12 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) } } +#define hlist_nulls_first_rcu(head) \ + (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) + +#define hlist_nulls_next_rcu(node) \ + (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) + /** * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. @@ -88,7 +94,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, n->next = first; n->pprev = &h->first; - rcu_assign_pointer(h->first, n); + rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) first->pprev = &n->next; } @@ -100,11 +106,11 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, * @member: the name of the hlist_nulls_node within the struct. * */ -#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw((head)->first); \ - (!is_a_nulls(pos)) && \ +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ + (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) #endif #endif diff --git a/kernel/pid.c b/kernel/pid.c index d55c6fb8d087..0f90c2f713f1 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -401,7 +401,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) struct task_struct *result = NULL; if (pid) { struct hlist_node *first; - first = rcu_dereference_check(pid->tasks[type].first, + first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), rcu_read_lock_held() || lockdep_tasklist_lock_is_held()); if (first) -- cgit v1.2.3 From 2c392b8c3450ceb69ba1b93cb0cddb3998fb8cdc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 24 Feb 2010 19:41:39 +0100 Subject: cgroups: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: Paul Menage Cc: Li Zefan Reviewed-by: Josh Triplett --- include/linux/cgroup.h | 4 ++-- include/linux/sched.h | 2 +- kernel/cgroup.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e41c6e..3cb7d04308cd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -75,7 +75,7 @@ struct cgroup_subsys_state { unsigned long flags; /* ID for this css, if possible */ - struct css_id *id; + struct css_id __rcu *id; }; /* bits in struct cgroup_subsys_state flags field */ @@ -205,7 +205,7 @@ struct cgroup { struct list_head children; /* my children */ struct cgroup *parent; /* my parent */ - struct dentry *dentry; /* cgroup fs entry, RCU protected */ + struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7dd..bbffd087476c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1418,7 +1418,7 @@ struct task_struct { #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock */ - struct css_set *cgroups; + struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock */ struct list_head cg_list; #endif diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 192f88c5b0f9..e5c5497a7dca 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -138,7 +138,7 @@ struct css_id { * is called after synchronize_rcu(). But for safe use, css_is_removed() * css_tryget() should be used for avoiding race. */ - struct cgroup_subsys_state *css; + struct cgroup_subsys_state __rcu *css; /* * ID of this css. */ -- cgit v1.2.3 From 1b0ba1c9037b2265d6e5d0165d31e4c0269b603b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 24 Feb 2010 19:45:09 +0100 Subject: credentials: rcu annotation Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar Acked-by: David Howells Reviewed-by: Josh Triplett --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index bbffd087476c..2c756666c111 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1288,9 +1288,9 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - const struct cred *real_cred; /* objective and real subjective task + const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ - const struct cred *cred; /* effective (overridable) subjective task + const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations -- cgit v1.2.3 From e63ba744a64d234c8a07c469ab1806443cb0a6ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Feb 2010 18:01:20 +0100 Subject: keys: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: David Howells Reviewed-by: Josh Triplett --- include/linux/cred.h | 2 +- include/linux/key.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index 4d2c39573f36..4aaeab376446 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,7 +84,7 @@ struct thread_group_cred { atomic_t usage; pid_t tgid; /* thread group process ID */ spinlock_t lock; - struct key *session_keyring; /* keyring inherited over fork */ + struct key __rcu *session_keyring; /* keyring inherited over fork */ struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; diff --git a/include/linux/key.h b/include/linux/key.h index cd50dfa1d4c2..3db0adce1fda 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -178,8 +178,9 @@ struct key { */ union { unsigned long value; + void __rcu *rcudata; void *data; - struct keyring_list *subscriptions; + struct keyring_list __rcu *subscriptions; } payload; }; -- cgit v1.2.3 From 5b22216e11f717adc344abc7f97b42e03127c6c0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Mar 2010 10:20:10 +0100 Subject: nfs: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 +- include/linux/sunrpc/auth_gss.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 508f8cf6da37..d0edf7d823ae 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -185,7 +185,7 @@ struct nfs_inode { struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ struct list_head open_states; - struct nfs_delegation *delegation; + struct nfs_delegation __rcu *delegation; fmode_t delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 671538d25bc1..8eee9dbbfe7a 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -69,7 +69,7 @@ struct gss_cl_ctx { enum rpc_gss_proc gc_proc; u32 gc_seq; spinlock_t gc_seq_lock; - struct gss_ctx *gc_gss_ctx; + struct gss_ctx __rcu *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; u32 gc_win; unsigned long gc_expiry; @@ -80,7 +80,7 @@ struct gss_upcall_msg; struct gss_cred { struct rpc_cred gc_base; enum rpc_gss_svc gc_service; - struct gss_cl_ctx *gc_ctx; + struct gss_cl_ctx __rcu *gc_ctx; struct gss_upcall_msg *gc_upcall; unsigned long gc_upcall_timestamp; unsigned char gc_machine_cred : 1; -- cgit v1.2.3 From 2be85279281bafe7de808ca99de59af4fd474c49 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Mar 2010 15:50:28 +0100 Subject: input: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Dmitry Torokhov Acked-by: Dmitry Torokhov Reviewed-by: Josh Triplett --- drivers/input/evdev.c | 2 +- include/linux/input.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index c908c5f83645..5808731f72d2 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -28,7 +28,7 @@ struct evdev { int minor; struct input_handle handle; wait_queue_head_t wait; - struct evdev_client *grab; + struct evdev_client __rcu *grab; struct list_head client_list; spinlock_t client_lock; /* protects client_list */ struct mutex mutex; diff --git a/include/linux/input.h b/include/linux/input.h index 896a92227bc4..d6ae1761be97 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1196,7 +1196,7 @@ struct input_dev { int (*flush)(struct input_dev *dev, struct file *file); int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); - struct input_handle *grab; + struct input_handle __rcu *grab; spinlock_t event_lock; struct mutex mutex; -- cgit v1.2.3 From 0906a372f2aa0fec1e59bd12b896883b6e41307a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 9 Mar 2010 20:59:15 +0100 Subject: net/netfilter: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Acked-by: Patrick McHardy Cc: "David S. Miller" Cc: Eric Dumazet Reviewed-by: Josh Triplett --- include/net/netfilter/nf_conntrack.h | 2 +- net/ipv4/netfilter/nf_nat_core.c | 2 +- net/netfilter/core.c | 2 +- net/netfilter/nf_conntrack_ecache.c | 4 ++-- net/netfilter/nf_conntrack_extend.c | 2 +- net/netfilter/nf_conntrack_proto.c | 4 ++-- net/netfilter/nf_log.c | 2 +- net/netfilter/nf_queue.c | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index e624dae54fa4..caf17db87dbc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -75,7 +75,7 @@ struct nf_conntrack_helper; /* nf_conn feature for connections that have a helper */ struct nf_conn_help { /* Helper. if any */ - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper __rcu *helper; union nf_conntrack_help help; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 8c8632d9b93c..957c9241fb0c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -38,7 +38,7 @@ static DEFINE_SPINLOCK(nf_nat_lock); static struct nf_conntrack_l3proto *l3proto __read_mostly; #define MAX_IP_NAT_PROTO 256 -static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] +static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO] __read_mostly; static inline const struct nf_nat_protocol * diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 78b505d33bfb..fdaec7daff1d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -27,7 +27,7 @@ static DEFINE_MUTEX(afinfo_mutex); -const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; +const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; EXPORT_SYMBOL(nf_afinfo); int nf_register_afinfo(const struct nf_afinfo *afinfo) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index cdcc7649476b..5702de35e2bb 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -26,10 +26,10 @@ static DEFINE_MUTEX(nf_ct_ecache_mutex); -struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly; +struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); -struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly; +struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly; EXPORT_SYMBOL_GPL(nf_expect_event_cb); /* deliver cached events and clear cache entry - must be called with locally diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 7dcf7a404190..1d9bdae06161 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -16,7 +16,7 @@ #include #include -static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM]; +static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM]; static DEFINE_MUTEX(nf_ct_ext_type_mutex); void __nf_ct_ext_destroy(struct nf_conn *ct) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 5886ba1d52a0..ed6d92958023 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -28,8 +28,8 @@ #include #include -static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly; -struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly; +static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly; +struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_l3protos); static DEFINE_MUTEX(nf_ct_proto_mutex); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 7df37fd786bc..b07393eab88e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,7 +16,7 @@ #define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 -static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; +static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 78b3cf9c519c..74aebed5bd28 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -18,7 +18,7 @@ * long term mutex. The handler must provide an an outfn() to accept packets * for queueing and must reinject all packets it receives, no matter what. */ -static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly; +static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(queue_handler_mutex); -- cgit v1.2.3 From 4b6a2872a2a00042ee50024822ab706e5456aad8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Mar 2010 15:59:23 +0100 Subject: kvm: add __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Avi Kivity Cc: Marcelo Tosatti Reviewed-by: Josh Triplett --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c13cc48697aa..ac740b26eb10 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -205,7 +205,7 @@ struct kvm { struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP - struct kvm_irq_routing_table *irq_routing; + struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; struct hlist_head irq_ack_notifier_list; #endif -- cgit v1.2.3 From 4221a9918e38b7494cee341dda7b7b4bb8c04bde Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 26 Jun 2010 01:08:19 +0900 Subject: Add RCU check for find_task_by_vpid(). find_task_by_vpid() says "Must be called under rcu_read_lock().". But due to commit 3120438 "rcu: Disable lockdep checking in RCU list-traversal primitives", we are currently unable to catch "find_task_by_vpid() with tasklist_lock held but RCU lock not held" errors due to the RCU-lockdep checks being suppressed in the RCU variants of the struct list_head traversals. This commit therefore places an explicit check for being in an RCU read-side critical section in find_task_by_pid_ns(). =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- kernel/pid.c:386 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 1 lock held by rc.sysinit/1102: #0: (tasklist_lock){.+.+..}, at: [] sys_setpgid+0x40/0x160 stack backtrace: Pid: 1102, comm: rc.sysinit Not tainted 2.6.35-rc3-dirty #1 Call Trace: [] lockdep_rcu_dereference+0x94/0xb0 [] find_task_by_pid_ns+0x6d/0x70 [] find_task_by_vpid+0x18/0x20 [] sys_setpgid+0x47/0x160 [] sysenter_do_call+0x12/0x36 Commit updated to use a new rcu_lockdep_assert() exported API rather than the old internal __do_rcu_dereference(). Signed-off-by: Tetsuo Handa Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 14 +++++++++----- kernel/pid.c | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b973dea2d6b0..b124bc6a75ad 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -215,7 +215,11 @@ static inline int rcu_read_lock_sched_held(void) extern int rcu_my_thread_group_empty(void); -#define __do_rcu_dereference_check(c) \ +/** + * rcu_lockdep_assert - emit lockdep splat if specified condition not met + * @c: condition to check + */ +#define rcu_lockdep_assert(c) \ do { \ static bool __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ @@ -226,7 +230,7 @@ extern int rcu_my_thread_group_empty(void); #else /* #ifdef CONFIG_PROVE_RCU */ -#define __do_rcu_dereference_check(c) do { } while (0) +#define rcu_lockdep_assert(c) do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ @@ -247,14 +251,14 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_check(p, c, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - __do_rcu_dereference_check(c); \ + rcu_lockdep_assert(c); \ (void) (((typeof (*p) space *)p) == p); \ smp_read_barrier_depends(); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ - __do_rcu_dereference_check(c); \ + rcu_lockdep_assert(c); \ (void) (((typeof (*p) space *)p) == p); \ ((typeof(*p) __force __kernel *)(p)); \ }) @@ -262,7 +266,7 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_index_check(p, c) \ ({ \ typeof(p) _________p1 = ACCESS_ONCE(p); \ - __do_rcu_dereference_check(c); \ + rcu_lockdep_assert(c); \ smp_read_barrier_depends(); \ (_________p1); \ }) diff --git a/kernel/pid.c b/kernel/pid.c index 0f90c2f713f1..39b65b69584f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -416,6 +416,7 @@ EXPORT_SYMBOL(pid_task); */ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) { + rcu_lockdep_assert(rcu_read_lock_held()); return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); } -- cgit v1.2.3 From 77d8485a8b5416c615b6acd95f01bfcacd7d81ff Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 8 Jul 2010 17:38:59 -0700 Subject: rcu: improve kerneldoc for rcu_read_lock(), call_rcu(), and synchronize_rcu() Make it explicit that new RCU read-side critical sections that start after call_rcu() and synchronize_rcu() start might still be running after the end of the relevant grace period. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 16 +++++++++------- kernel/rcutree_plugin.h | 8 +++++--- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b124bc6a75ad..3e1b6625553b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -450,7 +450,7 @@ extern int rcu_my_thread_group_empty(void); * until after the all the other CPUs exit their critical sections. * * Note, however, that RCU callbacks are permitted to run concurrently - * with RCU read-side critical sections. One way that this can happen + * with new RCU read-side critical sections. One way that this can happen * is via the following sequence of events: (1) CPU 0 enters an RCU * read-side critical section, (2) CPU 1 invokes call_rcu() to register * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, @@ -608,11 +608,13 @@ extern void wakeme_after_rcu(struct rcu_head *head); /** * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period + * @func: actual callback function to be invoked after the grace period * - * The update function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. RCU read-side critical * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. */ @@ -622,9 +624,9 @@ extern void call_rcu(struct rcu_head *head, /** * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period + * @func: actual callback function to be invoked after the grace period * - * The update function will be invoked some time after a full grace + * The callback function will be invoked some time after a full grace * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_bh() assumes * that the read-side critical sections end on completion of a softirq diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 9906f85c7780..63bb7714fdeb 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -546,9 +546,11 @@ EXPORT_SYMBOL_GPL(call_rcu); * * Control will return to the caller some time after a full grace * period has elapsed, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. + * read-side critical sections have completed. Note, however, that + * upon return from synchronize_rcu(), the caller might well be executing + * concurrently with new RCU read-side critical sections that began while + * synchronize_rcu() was waiting. RCU read-side critical sections are + * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. */ void synchronize_rcu(void) { -- cgit v1.2.3 From 374a8e0dc33c984fac284de7d57d77af3cfdbfb7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 24 Feb 2010 20:00:13 +0100 Subject: notifiers: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Alan Cox Reviewed-by: Josh Triplett --- include/linux/notifier.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index b2f1a4d83550..2026f9e1ceb8 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -49,28 +49,28 @@ struct notifier_block { int (*notifier_call)(struct notifier_block *, unsigned long, void *); - struct notifier_block *next; + struct notifier_block __rcu *next; int priority; }; struct atomic_notifier_head { spinlock_t lock; - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct blocking_notifier_head { struct rw_semaphore rwsem; - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct raw_notifier_head { - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct srcu_notifier_head { struct mutex mutex; struct srcu_struct srcu; - struct notifier_block *head; + struct notifier_block __rcu *head; }; #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ -- cgit v1.2.3 From a1115570b31091f3e3ab9e6cf7ee8d320a42be84 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 25 Feb 2010 23:43:52 +0100 Subject: radix-tree: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Nick Piggin Reviewed-by: Josh Triplett --- include/linux/radix-tree.h | 4 +++- lib/radix-tree.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 634b8e674ac5..a39cbed9ee17 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -47,6 +47,8 @@ static inline void *radix_tree_indirect_to_ptr(void *ptr) { return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } +#define radix_tree_indirect_to_ptr(ptr) \ + radix_tree_indirect_to_ptr((void __force *)(ptr)) static inline int radix_tree_is_indirect_ptr(void *ptr) { @@ -61,7 +63,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) struct radix_tree_root { unsigned int height; gfp_t gfp_mask; - struct radix_tree_node *rnode; + struct radix_tree_node __rcu *rnode; }; #define RADIX_TREE_INIT(mask) { \ diff --git a/lib/radix-tree.c b/lib/radix-tree.c index e907858498a6..899fb750946f 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -49,7 +49,7 @@ struct radix_tree_node { unsigned int height; /* Height from the bottom */ unsigned int count; struct rcu_head rcu_head; - void *slots[RADIX_TREE_MAP_SIZE]; + void __rcu *slots[RADIX_TREE_MAP_SIZE]; unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; }; -- cgit v1.2.3 From d2c2486bc8e185548490e8edbc84d185de9eaff1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Feb 2010 14:53:26 +0100 Subject: idr: __rcu annotations Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Manfred Spraul Reviewed-by: Josh Triplett --- include/linux/idr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index e968db71e33a..cdb715e58e3e 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -50,14 +50,14 @@ struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ - struct idr_layer *ary[1< Date: Wed, 24 Feb 2010 20:01:56 +0100 Subject: kernel: __rcu annotations This adds annotations for RCU operations in core kernel components Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney Cc: Al Viro Cc: Jens Axboe Cc: Andrew Morton Reviewed-by: Josh Triplett --- include/linux/fdtable.h | 6 +++--- include/linux/fs.h | 2 +- include/linux/genhd.h | 6 +++--- include/linux/init_task.h | 4 ++-- include/linux/iocontext.h | 2 +- include/linux/mm_types.h | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index f59ed297b661..133c0ba25e30 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -31,7 +31,7 @@ struct embedded_fd_set { struct fdtable { unsigned int max_fds; - struct file ** fd; /* current fd array */ + struct file __rcu **fd; /* current fd array */ fd_set *close_on_exec; fd_set *open_fds; struct rcu_head rcu; @@ -46,7 +46,7 @@ struct files_struct { * read mostly part */ atomic_t count; - struct fdtable *fdt; + struct fdtable __rcu *fdt; struct fdtable fdtab; /* * written part on a separate cache line in SMP @@ -55,7 +55,7 @@ struct files_struct { int next_fd; struct embedded_fd_set close_on_exec_init; struct embedded_fd_set open_fds_init; - struct file * fd_array[NR_OPEN_DEFAULT]; + struct file __rcu * fd_array[NR_OPEN_DEFAULT]; }; #define rcu_dereference_check_fdtable(files, fdtfd) \ diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..aa3dc8d20436 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1380,7 +1380,7 @@ struct super_block { * Saved mount options for lazy filesystems using * generic_show_options() */ - char *s_options; + char __rcu *s_options; }; extern struct timespec current_fs_time(struct super_block *sb); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5f2f4c4d8fb0..af3f06b41dc1 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -129,8 +129,8 @@ struct blk_scsi_cmd_filter { struct disk_part_tbl { struct rcu_head rcu_head; int len; - struct hd_struct *last_lookup; - struct hd_struct *part[]; + struct hd_struct __rcu *last_lookup; + struct hd_struct __rcu *part[]; }; struct gendisk { @@ -149,7 +149,7 @@ struct gendisk { * non-critical accesses use RCU. Always access through * helpers. */ - struct disk_part_tbl *part_tbl; + struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0; const struct block_device_operations *fops; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1f43fa56f600..6460fc65ed6b 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -137,8 +137,8 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .real_cred = &init_cred, \ - .cred = &init_cred, \ + RCU_INIT_POINTER(.real_cred, &init_cred), \ + RCU_INIT_POINTER(.cred, &init_cred), \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 64d529133031..3e70b21884a9 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -53,7 +53,7 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; - void *ioc_data; + void __rcu *ioc_data; }; static inline struct io_context *ioc_task_link(struct io_context *ioc) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b8bb9a6a1f37..05537a5eb855 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -299,7 +299,7 @@ struct mm_struct { * new_owner->mm == mm * new_owner->alloc_lock is held */ - struct task_struct *owner; + struct task_struct __rcu *owner; #endif #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From 5e8067adfdbaf97039a97540765b1e16eb8d61cc Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 17 Apr 2010 08:48:41 -0400 Subject: rcu head remove init RCU heads really don't need to be initialized. Their state before call_rcu() really does not matter. We need to keep init/destroy_rcu_head_on_stack() though, since we want debugobjects to be able to keep track of these objects. Signed-off-by: Alexey Dobriyan Signed-off-by: Mathieu Desnoyers CC: David S. Miller CC: "Paul E. McKenney" CC: akpm@linux-foundation.org CC: mingo@elte.hu CC: laijs@cn.fujitsu.com CC: dipankar@in.ibm.com CC: josh@joshtriplett.org CC: dvhltc@us.ibm.com CC: niv@us.ibm.com CC: tglx@linutronix.de CC: peterz@infradead.org CC: rostedt@goodmis.org CC: Valdis.Kletnieks@vt.edu CC: dhowells@redhat.com CC: eric.dumazet@gmail.com CC: Alexey Dobriyan Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3e1b6625553b..27b44b3e3024 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -75,12 +75,6 @@ extern void rcu_init(void); #error "Unknown RCU implementation specified to kernel configuration" #endif -#define RCU_HEAD_INIT { .next = NULL, .func = NULL } -#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT -#define INIT_RCU_HEAD(ptr) do { \ - (ptr)->next = NULL; (ptr)->func = NULL; \ -} while (0) - /* * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic * initialization and destruction of rcu_head on the stack. rcu_head structures -- cgit v1.2.3 From 1495cc9df4e81f5a8fa9b0b8f1034b14d24b7d8c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 17 Aug 2010 21:15:46 -0700 Subject: Input: sysrq - drop tty argument from sysrq ops handlers Noone is using tty argument so let's get rid of it. Acked-by: Alan Cox Acked-by: Jason Wessel Acked-by: Greg Kroah-Hartman Signed-off-by: Dmitry Torokhov --- arch/arm/kernel/etm.c | 2 +- arch/powerpc/xmon/xmon.c | 5 ++--- arch/sparc/kernel/process_64.c | 2 +- drivers/char/sysrq.c | 42 ++++++++++++++++++++--------------------- drivers/gpu/drm/drm_fb_helper.c | 2 +- drivers/net/ibm_newemac/debug.c | 2 +- include/linux/sysrq.h | 6 +++++- kernel/debug/debug_core.c | 2 +- kernel/power/poweroff.c | 2 +- 9 files changed, 34 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/etm.c b/arch/arm/kernel/etm.c index 56418f98cd01..33c7077174db 100644 --- a/arch/arm/kernel/etm.c +++ b/arch/arm/kernel/etm.c @@ -230,7 +230,7 @@ static void etm_dump(void) etb_lock(t); } -static void sysrq_etm_dump(int key, struct tty_struct *tty) +static void sysrq_etm_dump(int key) { dev_dbg(tracer.dev, "Dumping ETB buffer\n"); etm_dump(); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0554445200bf..d17d04cfb2cd 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2880,15 +2880,14 @@ static void xmon_init(int enable) } #ifdef CONFIG_MAGIC_SYSRQ -static void sysrq_handle_xmon(int key, struct tty_struct *tty) +static void sysrq_handle_xmon(int key) { /* ensure xmon is enabled */ xmon_init(1); debugger(get_irq_regs()); } -static struct sysrq_key_op sysrq_xmon_op = -{ +static struct sysrq_key_op sysrq_xmon_op = { .handler = sysrq_handle_xmon, .help_msg = "Xmon", .action_msg = "Entering xmon", diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index dbe81a368b45..25b01b43b40d 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -303,7 +303,7 @@ void arch_trigger_all_cpu_backtrace(void) #ifdef CONFIG_MAGIC_SYSRQ -static void sysrq_handle_globreg(int key, struct tty_struct *tty) +static void sysrq_handle_globreg(int key) { arch_trigger_all_cpu_backtrace(); } diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 878ac0c2cc68..a892a3c249dd 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -76,7 +76,7 @@ static int __init sysrq_always_enabled_setup(char *str) __setup("sysrq_always_enabled", sysrq_always_enabled_setup); -static void sysrq_handle_loglevel(int key, struct tty_struct *tty) +static void sysrq_handle_loglevel(int key) { int i; @@ -93,7 +93,7 @@ static struct sysrq_key_op sysrq_loglevel_op = { }; #ifdef CONFIG_VT -static void sysrq_handle_SAK(int key, struct tty_struct *tty) +static void sysrq_handle_SAK(int key) { struct work_struct *SAK_work = &vc_cons[fg_console].SAK_work; schedule_work(SAK_work); @@ -109,7 +109,7 @@ static struct sysrq_key_op sysrq_SAK_op = { #endif #ifdef CONFIG_VT -static void sysrq_handle_unraw(int key, struct tty_struct *tty) +static void sysrq_handle_unraw(int key) { struct kbd_struct *kbd = &kbd_table[fg_console]; @@ -126,7 +126,7 @@ static struct sysrq_key_op sysrq_unraw_op = { #define sysrq_unraw_op (*(struct sysrq_key_op *)NULL) #endif /* CONFIG_VT */ -static void sysrq_handle_crash(int key, struct tty_struct *tty) +static void sysrq_handle_crash(int key) { char *killer = NULL; @@ -141,7 +141,7 @@ static struct sysrq_key_op sysrq_crash_op = { .enable_mask = SYSRQ_ENABLE_DUMP, }; -static void sysrq_handle_reboot(int key, struct tty_struct *tty) +static void sysrq_handle_reboot(int key) { lockdep_off(); local_irq_enable(); @@ -154,7 +154,7 @@ static struct sysrq_key_op sysrq_reboot_op = { .enable_mask = SYSRQ_ENABLE_BOOT, }; -static void sysrq_handle_sync(int key, struct tty_struct *tty) +static void sysrq_handle_sync(int key) { emergency_sync(); } @@ -165,7 +165,7 @@ static struct sysrq_key_op sysrq_sync_op = { .enable_mask = SYSRQ_ENABLE_SYNC, }; -static void sysrq_handle_show_timers(int key, struct tty_struct *tty) +static void sysrq_handle_show_timers(int key) { sysrq_timer_list_show(); } @@ -176,7 +176,7 @@ static struct sysrq_key_op sysrq_show_timers_op = { .action_msg = "Show clockevent devices & pending hrtimers (no others)", }; -static void sysrq_handle_mountro(int key, struct tty_struct *tty) +static void sysrq_handle_mountro(int key) { emergency_remount(); } @@ -188,7 +188,7 @@ static struct sysrq_key_op sysrq_mountro_op = { }; #ifdef CONFIG_LOCKDEP -static void sysrq_handle_showlocks(int key, struct tty_struct *tty) +static void sysrq_handle_showlocks(int key) { debug_show_all_locks(); } @@ -226,7 +226,7 @@ static void sysrq_showregs_othercpus(struct work_struct *dummy) static DECLARE_WORK(sysrq_showallcpus, sysrq_showregs_othercpus); -static void sysrq_handle_showallcpus(int key, struct tty_struct *tty) +static void sysrq_handle_showallcpus(int key) { /* * Fall back to the workqueue based printing if the @@ -252,7 +252,7 @@ static struct sysrq_key_op sysrq_showallcpus_op = { }; #endif -static void sysrq_handle_showregs(int key, struct tty_struct *tty) +static void sysrq_handle_showregs(int key) { struct pt_regs *regs = get_irq_regs(); if (regs) @@ -266,7 +266,7 @@ static struct sysrq_key_op sysrq_showregs_op = { .enable_mask = SYSRQ_ENABLE_DUMP, }; -static void sysrq_handle_showstate(int key, struct tty_struct *tty) +static void sysrq_handle_showstate(int key) { show_state(); } @@ -277,7 +277,7 @@ static struct sysrq_key_op sysrq_showstate_op = { .enable_mask = SYSRQ_ENABLE_DUMP, }; -static void sysrq_handle_showstate_blocked(int key, struct tty_struct *tty) +static void sysrq_handle_showstate_blocked(int key) { show_state_filter(TASK_UNINTERRUPTIBLE); } @@ -291,7 +291,7 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = { #ifdef CONFIG_TRACING #include -static void sysrq_ftrace_dump(int key, struct tty_struct *tty) +static void sysrq_ftrace_dump(int key) { ftrace_dump(DUMP_ALL); } @@ -305,7 +305,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = { #define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)NULL) #endif -static void sysrq_handle_showmem(int key, struct tty_struct *tty) +static void sysrq_handle_showmem(int key) { show_mem(); } @@ -330,7 +330,7 @@ static void send_sig_all(int sig) } } -static void sysrq_handle_term(int key, struct tty_struct *tty) +static void sysrq_handle_term(int key) { send_sig_all(SIGTERM); console_loglevel = 8; @@ -349,7 +349,7 @@ static void moom_callback(struct work_struct *ignored) static DECLARE_WORK(moom_work, moom_callback); -static void sysrq_handle_moom(int key, struct tty_struct *tty) +static void sysrq_handle_moom(int key) { schedule_work(&moom_work); } @@ -361,7 +361,7 @@ static struct sysrq_key_op sysrq_moom_op = { }; #ifdef CONFIG_BLOCK -static void sysrq_handle_thaw(int key, struct tty_struct *tty) +static void sysrq_handle_thaw(int key) { emergency_thaw_all(); } @@ -373,7 +373,7 @@ static struct sysrq_key_op sysrq_thaw_op = { }; #endif -static void sysrq_handle_kill(int key, struct tty_struct *tty) +static void sysrq_handle_kill(int key) { send_sig_all(SIGKILL); console_loglevel = 8; @@ -385,7 +385,7 @@ static struct sysrq_key_op sysrq_kill_op = { .enable_mask = SYSRQ_ENABLE_SIGNAL, }; -static void sysrq_handle_unrt(int key, struct tty_struct *tty) +static void sysrq_handle_unrt(int key) { normalize_rt_tasks(); } @@ -520,7 +520,7 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask) if (!check_mask || sysrq_on_mask(op_p->enable_mask)) { printk("%s\n", op_p->action_msg); console_loglevel = orig_log_level; - op_p->handler(key, tty); + op_p->handler(key); } else { printk("This sysrq operation is disabled.\n"); } diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index de82e201d682..5efd6d6742ec 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -369,7 +369,7 @@ static void drm_fb_helper_restore_work_fn(struct work_struct *ignored) } static DECLARE_WORK(drm_fb_helper_restore_work, drm_fb_helper_restore_work_fn); -static void drm_fb_helper_sysrq(int dummy1, struct tty_struct *dummy3) +static void drm_fb_helper_sysrq(int dummy1) { schedule_work(&drm_fb_helper_restore_work); } diff --git a/drivers/net/ibm_newemac/debug.c b/drivers/net/ibm_newemac/debug.c index 3995fafc1e08..8c6c1e2a8750 100644 --- a/drivers/net/ibm_newemac/debug.c +++ b/drivers/net/ibm_newemac/debug.c @@ -238,7 +238,7 @@ void emac_dbg_dump_all(void) } #if defined(CONFIG_MAGIC_SYSRQ) -static void emac_sysrq_handler(int key, struct tty_struct *tty) +static void emac_sysrq_handler(int key) { emac_dbg_dump_all(); } diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 609e8ca5f534..4ee650315119 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -31,7 +31,7 @@ struct tty_struct; #define SYSRQ_ENABLE_RTNICE 0x0100 struct sysrq_key_op { - void (*handler)(int, struct tty_struct *); + void (*handler)(int); char *help_msg; char *action_msg; int enable_mask; @@ -58,6 +58,10 @@ static inline void handle_sysrq(int key, struct tty_struct *tty) { } +static inline void __handle_sysrq(int key, struct tty_struct *tty, int check_mask); +{ +} + static inline int register_sysrq_key(int key, struct sysrq_key_op *op) { return -EINVAL; diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 3c2d4972d235..de407c78178d 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -741,7 +741,7 @@ static struct console kgdbcons = { }; #ifdef CONFIG_MAGIC_SYSRQ -static void sysrq_handle_dbg(int key, struct tty_struct *tty) +static void sysrq_handle_dbg(int key) { if (!dbg_io_ops) { printk(KERN_CRIT "ERROR: No KGDB I/O module available\n"); diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c index e8b337006276..d52359374e85 100644 --- a/kernel/power/poweroff.c +++ b/kernel/power/poweroff.c @@ -24,7 +24,7 @@ static void do_poweroff(struct work_struct *dummy) static DECLARE_WORK(poweroff_work, do_poweroff); -static void handle_poweroff(int key, struct tty_struct *tty) +static void handle_poweroff(int key) { /* run sysrq poweroff on boot cpu */ schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work); -- cgit v1.2.3 From eb4d40654505e47aa9d2035bb97f631fa61d14b4 Mon Sep 17 00:00:00 2001 From: Grégoire Baron Date: Wed, 18 Aug 2010 13:10:35 +0000 Subject: net/sched: add ACT_CSUM action to update packets checksums net/sched: add ACT_CSUM action to update packets checksums ACT_CSUM can be called just after ACT_PEDIT in order to re-compute some altered checksums in IPv4 and IPv6 packets. The following checksums are supported by this patch: - IPv4: IPv4 header, ICMP, IGMP, TCP, UDP & UDPLite - IPv6: ICMPv6, TCP, UDP & UDPLite It's possible to request in the same action to update different kind of checksums, if the packets flow mix TCP, UDP and UDPLite, ... An example of usage is done in the associated iproute2 patch. Version 3 changes: - remove useless goto instructions - improve IPv6 hop options decoding Version 2 changes: - coding style correction - remove useless arguments of some functions - use stack in tcf_csum_dump() - add tcf_csum_skb_nextlayer() to factor code Signed-off-by: Gregoire Baron Acked-by: jamal Signed-off-by: David S. Miller --- include/linux/tc_act/Kbuild | 1 + include/linux/tc_act/tc_csum.h | 32 +++ include/net/tc_act/tc_csum.h | 15 ++ net/sched/Kconfig | 10 + net/sched/Makefile | 1 + net/sched/act_csum.c | 595 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 654 insertions(+) create mode 100644 include/linux/tc_act/tc_csum.h create mode 100644 include/net/tc_act/tc_csum.h create mode 100644 net/sched/act_csum.c (limited to 'include') diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild index 76990937f4c9..67b501c302b2 100644 --- a/include/linux/tc_act/Kbuild +++ b/include/linux/tc_act/Kbuild @@ -4,3 +4,4 @@ header-y += tc_mirred.h header-y += tc_pedit.h header-y += tc_nat.h header-y += tc_skbedit.h +header-y += tc_csum.h diff --git a/include/linux/tc_act/tc_csum.h b/include/linux/tc_act/tc_csum.h new file mode 100644 index 000000000000..a047c49a3153 --- /dev/null +++ b/include/linux/tc_act/tc_csum.h @@ -0,0 +1,32 @@ +#ifndef __LINUX_TC_CSUM_H +#define __LINUX_TC_CSUM_H + +#include +#include + +#define TCA_ACT_CSUM 16 + +enum { + TCA_CSUM_UNSPEC, + TCA_CSUM_PARMS, + TCA_CSUM_TM, + __TCA_CSUM_MAX +}; +#define TCA_CSUM_MAX (__TCA_CSUM_MAX - 1) + +enum { + TCA_CSUM_UPDATE_FLAG_IPV4HDR = 1, + TCA_CSUM_UPDATE_FLAG_ICMP = 2, + TCA_CSUM_UPDATE_FLAG_IGMP = 4, + TCA_CSUM_UPDATE_FLAG_TCP = 8, + TCA_CSUM_UPDATE_FLAG_UDP = 16, + TCA_CSUM_UPDATE_FLAG_UDPLITE = 32 +}; + +struct tc_csum { + tc_gen; + + __u32 update_flags; +}; + +#endif /* __LINUX_TC_CSUM_H */ diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h new file mode 100644 index 000000000000..9e8710be7a04 --- /dev/null +++ b/include/net/tc_act/tc_csum.h @@ -0,0 +1,15 @@ +#ifndef __NET_TC_CSUM_H +#define __NET_TC_CSUM_H + +#include +#include + +struct tcf_csum { + struct tcf_common common; + + u32 update_flags; +}; +#define to_tcf_csum(pc) \ + container_of(pc,struct tcf_csum,common) + +#endif /* __NET_TC_CSUM_H */ diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2f691fb180d1..522d5a9a2825 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -518,6 +518,16 @@ config NET_ACT_SKBEDIT To compile this code as a module, choose M here: the module will be called act_skbedit. +config NET_ACT_CSUM + tristate "Checksum Updating" + depends on NET_CLS_ACT + ---help--- + Say Y here to update some common checksum after some direct + packet alterations. + + To compile this code as a module, choose M here: the + module will be called act_csum. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index f14e71bfa58f..960f5dba6304 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o +obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c new file mode 100644 index 000000000000..58d7f36949da --- /dev/null +++ b/net/sched/act_csum.c @@ -0,0 +1,595 @@ +/* + * Checksum updating actions + * + * Copyright (c) 2010 Gregoire Baron + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#define CSUM_TAB_MASK 15 +static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1]; +static u32 csum_idx_gen; +static DEFINE_RWLOCK(csum_lock); + +static struct tcf_hashinfo csum_hash_info = { + .htab = tcf_csum_ht, + .hmask = CSUM_TAB_MASK, + .lock = &csum_lock, +}; + +static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { + [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, +}; + +static int tcf_csum_init(struct nlattr *nla, struct nlattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct nlattr *tb[TCA_CSUM_MAX + 1]; + struct tc_csum *parm; + struct tcf_common *pc; + struct tcf_csum *p; + int ret = 0, err; + + if (nla == NULL) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy); + if (err < 0) + return err; + + if (tb[TCA_CSUM_PARMS] == NULL) + return -EINVAL; + parm = nla_data(tb[TCA_CSUM_PARMS]); + + pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &csum_idx_gen, &csum_hash_info); + if (IS_ERR(pc)) + return PTR_ERR(pc); + p = to_tcf_csum(pc); + ret = ACT_P_CREATED; + } else { + p = to_tcf_csum(pc); + if (!ovr) { + tcf_hash_release(pc, bind, &csum_hash_info); + return -EEXIST; + } + } + + spin_lock_bh(&p->tcf_lock); + p->tcf_action = parm->action; + p->update_flags = parm->update_flags; + spin_unlock_bh(&p->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &csum_hash_info); + + return ret; +} + +static int tcf_csum_cleanup(struct tc_action *a, int bind) +{ + struct tcf_csum *p = a->priv; + return tcf_hash_release(&p->common, bind, &csum_hash_info); +} + +/** + * tcf_csum_skb_nextlayer - Get next layer pointer + * @skb: sk_buff to use + * @ihl: previous summed headers length + * @ipl: complete packet length + * @jhl: next header length + * + * Check the expected next layer availability in the specified sk_buff. + * Return the next layer pointer if pass, NULL otherwise. + */ +static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, + unsigned int ihl, unsigned int ipl, + unsigned int jhl) +{ + int ntkoff = skb_network_offset(skb); + int hl = ihl + jhl; + + if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) || + (skb_cloned(skb) && + !skb_clone_writable(skb, hl + ntkoff) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + return NULL; + else + return (void *)(skb_network_header(skb) + ihl); +} + +static int tcf_csum_ipv4_icmp(struct sk_buff *skb, + unsigned int ihl, unsigned int ipl) +{ + struct icmphdr *icmph; + + icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph)); + if (icmph == NULL) + return 0; + + icmph->checksum = 0; + skb->csum = csum_partial(icmph, ipl - ihl, 0); + icmph->checksum = csum_fold(skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv4_igmp(struct sk_buff *skb, + unsigned int ihl, unsigned int ipl) +{ + struct igmphdr *igmph; + + igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph)); + if (igmph == NULL) + return 0; + + igmph->csum = 0; + skb->csum = csum_partial(igmph, ipl - ihl, 0); + igmph->csum = csum_fold(skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int ihl, unsigned int ipl) +{ + struct icmp6hdr *icmp6h; + + icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h)); + if (icmp6h == NULL) + return 0; + + icmp6h->icmp6_cksum = 0; + skb->csum = csum_partial(icmp6h, ipl - ihl, 0); + icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + ipl - ihl, IPPROTO_ICMPV6, + skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph, + unsigned int ihl, unsigned int ipl) +{ + struct tcphdr *tcph; + + tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); + if (tcph == NULL) + return 0; + + tcph->check = 0; + skb->csum = csum_partial(tcph, ipl - ihl, 0); + tcph->check = tcp_v4_check(ipl - ihl, + iph->saddr, iph->daddr, skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int ihl, unsigned int ipl) +{ + struct tcphdr *tcph; + + tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); + if (tcph == NULL) + return 0; + + tcph->check = 0; + skb->csum = csum_partial(tcph, ipl - ihl, 0); + tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + ipl - ihl, IPPROTO_TCP, + skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph, + unsigned int ihl, unsigned int ipl, int udplite) +{ + struct udphdr *udph; + u16 ul; + + /* Support both UDP and UDPLITE checksum algorithms, + * Don't use udph->len to get the real length without any protocol check, + * UDPLITE uses udph->len for another thing, + * Use iph->tot_len, or just ipl. + */ + + udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph)); + if (udph == NULL) + return 0; + + ul = ntohs(udph->len); + + if (udplite || udph->check) { + + udph->check = 0; + + if (udplite) { + if (ul == 0) + skb->csum = csum_partial(udph, ipl - ihl, 0); + + else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl)) + skb->csum = csum_partial(udph, ul, 0); + + else + goto ignore_obscure_skb; + } else { + if (ul != ipl - ihl) + goto ignore_obscure_skb; + + skb->csum = csum_partial(udph, ul, 0); + } + + udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, + ul, iph->protocol, + skb->csum); + + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + + skb->ip_summed = CHECKSUM_NONE; + +ignore_obscure_skb: + return 1; +} + +static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int ihl, unsigned int ipl, int udplite) +{ + struct udphdr *udph; + u16 ul; + + /* Support both UDP and UDPLITE checksum algorithms, + * Don't use udph->len to get the real length without any protocol check, + * UDPLITE uses udph->len for another thing, + * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl. + */ + + udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph)); + if (udph == NULL) + return 0; + + ul = ntohs(udph->len); + + udph->check = 0; + + if (udplite) { + if (ul == 0) + skb->csum = csum_partial(udph, ipl - ihl, 0); + + else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl)) + skb->csum = csum_partial(udph, ul, 0); + + else + goto ignore_obscure_skb; + } else { + if (ul != ipl - ihl) + goto ignore_obscure_skb; + + skb->csum = csum_partial(udph, ul, 0); + } + + udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul, + udplite ? IPPROTO_UDPLITE : IPPROTO_UDP, + skb->csum); + + if (!udph->check) + udph->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_NONE; + +ignore_obscure_skb: + return 1; +} + +static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) +{ + struct iphdr *iph; + int ntkoff; + + ntkoff = skb_network_offset(skb); + + if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff)) + goto fail; + + iph = ip_hdr(skb); + + switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { + case IPPROTO_ICMP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) + if (!tcf_csum_ipv4_icmp(skb, + iph->ihl * 4, ntohs(iph->tot_len))) + goto fail; + break; + case IPPROTO_IGMP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP) + if (!tcf_csum_ipv4_igmp(skb, + iph->ihl * 4, ntohs(iph->tot_len))) + goto fail; + break; + case IPPROTO_TCP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) + if (!tcf_csum_ipv4_tcp(skb, iph, + iph->ihl * 4, ntohs(iph->tot_len))) + goto fail; + break; + case IPPROTO_UDP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) + if (!tcf_csum_ipv4_udp(skb, iph, + iph->ihl * 4, ntohs(iph->tot_len), 0)) + goto fail; + break; + case IPPROTO_UDPLITE: + if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) + if (!tcf_csum_ipv4_udp(skb, iph, + iph->ihl * 4, ntohs(iph->tot_len), 1)) + goto fail; + break; + } + + if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) { + if (skb_cloned(skb) && + !skb_clone_writable(skb, sizeof(*iph) + ntkoff) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto fail; + + ip_send_check(iph); + } + + return 1; + +fail: + return 0; +} + +static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, + unsigned int ixhl, unsigned int *pl) +{ + int off, len, optlen; + unsigned char *xh = (void *)ip6xh; + + off = sizeof(*ip6xh); + len = ixhl - off; + + while (len > 1) { + switch (xh[off]) + { + case IPV6_TLV_PAD0: + optlen = 1; + break; + case IPV6_TLV_JUMBO: + optlen = xh[off + 1] + 2; + if (optlen != 6 || len < 6 || (off & 3) != 2) + /* wrong jumbo option length/alignment */ + return 0; + *pl = ntohl(*(__be32 *)(xh + off + 2)); + goto done; + default: + optlen = xh[off + 1] + 2; + if (optlen > len) + /* ignore obscure options */ + goto done; + break; + } + off += optlen; + len -= optlen; + } + +done: + return 1; +} + +static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags) +{ + struct ipv6hdr *ip6h; + struct ipv6_opt_hdr *ip6xh; + unsigned int hl, ixhl; + unsigned int pl; + int ntkoff; + u8 nexthdr; + + ntkoff = skb_network_offset(skb); + + hl = sizeof(*ip6h); + + if (!pskb_may_pull(skb, hl + ntkoff)) + goto fail; + + ip6h = ipv6_hdr(skb); + + pl = ntohs(ip6h->payload_len); + nexthdr = ip6h->nexthdr; + + do { + switch (nexthdr) { + case NEXTHDR_FRAGMENT: + goto ignore_skb; + case NEXTHDR_ROUTING: + case NEXTHDR_HOP: + case NEXTHDR_DEST: + if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff)) + goto fail; + ip6xh = (void *)(skb_network_header(skb) + hl); + ixhl = ipv6_optlen(ip6xh); + if (!pskb_may_pull(skb, hl + ixhl + ntkoff)) + goto fail; + if ((nexthdr == NEXTHDR_HOP) && + !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl))) + goto fail; + nexthdr = ip6xh->nexthdr; + hl += ixhl; + break; + case IPPROTO_ICMPV6: + if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) + if (!tcf_csum_ipv6_icmp(skb, ip6h, + hl, pl + sizeof(*ip6h))) + goto fail; + goto done; + case IPPROTO_TCP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) + if (!tcf_csum_ipv6_tcp(skb, ip6h, + hl, pl + sizeof(*ip6h))) + goto fail; + goto done; + case IPPROTO_UDP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) + if (!tcf_csum_ipv6_udp(skb, ip6h, + hl, pl + sizeof(*ip6h), 0)) + goto fail; + goto done; + case IPPROTO_UDPLITE: + if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) + if (!tcf_csum_ipv6_udp(skb, ip6h, + hl, pl + sizeof(*ip6h), 1)) + goto fail; + goto done; + default: + goto ignore_skb; + } + } while (pskb_may_pull(skb, hl + 1 + ntkoff)); + +done: +ignore_skb: + return 1; + +fail: + return 0; +} + +static int tcf_csum(struct sk_buff *skb, + struct tc_action *a, struct tcf_result *res) +{ + struct tcf_csum *p = a->priv; + int action; + u32 update_flags; + + spin_lock(&p->tcf_lock); + p->tcf_tm.lastuse = jiffies; + p->tcf_bstats.bytes += qdisc_pkt_len(skb); + p->tcf_bstats.packets++; + action = p->tcf_action; + update_flags = p->update_flags; + spin_unlock(&p->tcf_lock); + + if (unlikely(action == TC_ACT_SHOT)) + goto drop; + + switch (skb->protocol) { + case cpu_to_be16(ETH_P_IP): + if (!tcf_csum_ipv4(skb, update_flags)) + goto drop; + break; + case cpu_to_be16(ETH_P_IPV6): + if (!tcf_csum_ipv6(skb, update_flags)) + goto drop; + break; + } + + return action; + +drop: + spin_lock(&p->tcf_lock); + p->tcf_qstats.drops++; + spin_unlock(&p->tcf_lock); + return TC_ACT_SHOT; +} + +static int tcf_csum_dump(struct sk_buff *skb, + struct tc_action *a, int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_csum *p = a->priv; + struct tc_csum opt = { + .update_flags = p->update_flags, + + .index = p->tcf_index, + .action = p->tcf_action, + .refcnt = p->tcf_refcnt - ref, + .bindcnt = p->tcf_bindcnt - bind, + }; + struct tcf_t t; + + NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt); + t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(p->tcf_tm.expires); + NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t); + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tc_action_ops act_csum_ops = { + .kind = "csum", + .hinfo = &csum_hash_info, + .type = TCA_ACT_CSUM, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_csum, + .dump = tcf_csum_dump, + .cleanup = tcf_csum_cleanup, + .lookup = tcf_hash_search, + .init = tcf_csum_init, + .walk = tcf_generic_walker +}; + +MODULE_DESCRIPTION("Checksum updating actions"); +MODULE_LICENSE("GPL"); + +static int __init csum_init_module(void) +{ + return tcf_register_action(&act_csum_ops); +} + +static void __exit csum_cleanup_module(void) +{ + tcf_unregister_action(&act_csum_ops); +} + +module_init(csum_init_module); +module_exit(csum_cleanup_module); -- cgit v1.2.3 From a57eb940d130477a799dfb24a570ee04979c0f7f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 Jun 2010 16:49:16 -0700 Subject: rcu: Add a TINY_PREEMPT_RCU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement a small-memory-footprint uniprocessor-only implementation of preemptible RCU. This implementation uses but a single blocked-tasks list rather than the combinatorial number used per leaf rcu_node by TREE_PREEMPT_RCU, which reduces memory consumption and greatly simplifies processing. This version also takes advantage of uniprocessor execution to accelerate grace periods in the case where there are no readers. The general design is otherwise broadly similar to that of TREE_PREEMPT_RCU. This implementation is a step towards having RCU implementation driven off of the SMP and PREEMPT kernel configuration variables, which can happen once this implementation has accumulated sufficient experience. Removed ACCESS_ONCE() from __rcu_read_unlock() and added barrier() as suggested by Steve Rostedt in order to avoid the compiler-reordering issue noted by Mathieu Desnoyers (http://lkml.org/lkml/2010/8/16/183). As can be seen below, CONFIG_TINY_PREEMPT_RCU represents almost 5Kbyte savings compared to CONFIG_TREE_PREEMPT_RCU. Of course, for non-real-time workloads, CONFIG_TINY_RCU is even better. CONFIG_TREE_PREEMPT_RCU text data bss dec filename 13 0 0 13 kernel/rcupdate.o 6170 825 28 7023 kernel/rcutree.o ---- 7026 Total CONFIG_TINY_PREEMPT_RCU text data bss dec filename 13 0 0 13 kernel/rcupdate.o 2081 81 8 2170 kernel/rcutiny.o ---- 2183 Total CONFIG_TINY_RCU (non-preemptible) text data bss dec filename 13 0 0 13 kernel/rcupdate.o 719 25 0 744 kernel/rcutiny.o --- 757 Total Requested-by: Loïc Minier Signed-off-by: Paul E. McKenney --- include/linux/hardirq.h | 2 +- include/linux/init_task.h | 10 +- include/linux/rcupdate.h | 3 +- include/linux/rcutiny.h | 126 +++++++--- include/linux/rcutree.h | 2 + include/linux/sched.h | 10 +- init/Kconfig | 16 +- kernel/Makefile | 1 + kernel/rcutiny.c | 33 ++- kernel/rcutiny_plugin.h | 582 +++++++++++++++++++++++++++++++++++++++++++++- 10 files changed, 717 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b387669dab..1f4517d55b19 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -139,7 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk) #endif #if defined(CONFIG_NO_HZ) -#if defined(CONFIG_TINY_RCU) +#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) extern void rcu_enter_nohz(void); extern void rcu_exit_nohz(void); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6460fc65ed6b..2fea6c8ef6ba 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -82,11 +82,17 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_FULL_SET #ifdef CONFIG_TREE_PREEMPT_RCU +#define INIT_TASK_RCU_TREE_PREEMPT() \ + .rcu_blocked_node = NULL, +#else +#define INIT_TASK_RCU_TREE_PREEMPT(tsk) +#endif +#ifdef CONFIG_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ .rcu_read_lock_nesting = 0, \ .rcu_read_unlock_special = 0, \ - .rcu_blocked_node = NULL, \ - .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), + .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ + INIT_TASK_RCU_TREE_PREEMPT() #else #define INIT_TASK_RCU_PREEMPT(tsk) #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 27b44b3e3024..24b896649384 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -58,7 +58,6 @@ struct rcu_head { }; /* Exported common interfaces */ -extern void rcu_barrier(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); extern void synchronize_sched_expedited(void); @@ -69,7 +68,7 @@ extern void rcu_init(void); #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include -#elif defined(CONFIG_TINY_RCU) +#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) #include #else #error "Unknown RCU implementation specified to kernel configuration" diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e2e893144a84..4cc5eba41616 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -29,66 +29,51 @@ void rcu_sched_qs(int cpu); void rcu_bh_qs(int cpu); -static inline void rcu_note_context_switch(int cpu) -{ - rcu_sched_qs(cpu); -} +#ifdef CONFIG_TINY_RCU #define __rcu_read_lock() preempt_disable() #define __rcu_read_unlock() preempt_enable() +#else /* #ifdef CONFIG_TINY_RCU */ +void __rcu_read_lock(void); +void __rcu_read_unlock(void); +#endif /* #else #ifdef CONFIG_TINY_RCU */ #define __rcu_read_lock_bh() local_bh_disable() #define __rcu_read_unlock_bh() local_bh_enable() -#define call_rcu_sched call_rcu +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)); #define rcu_init_sched() do { } while (0) -extern void rcu_check_callbacks(int cpu, int user); -static inline int rcu_needs_cpu(int cpu) -{ - return 0; -} +extern void synchronize_sched(void); -/* - * Return the number of grace periods. - */ -static inline long rcu_batches_completed(void) -{ - return 0; -} +#ifdef CONFIG_TINY_RCU -/* - * Return the number of bottom-half grace periods. - */ -static inline long rcu_batches_completed_bh(void) -{ - return 0; -} +#define call_rcu call_rcu_sched -static inline void rcu_force_quiescent_state(void) +static inline void synchronize_rcu(void) { + synchronize_sched(); } -static inline void rcu_bh_force_quiescent_state(void) +static inline void synchronize_rcu_expedited(void) { + synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ } -static inline void rcu_sched_force_quiescent_state(void) +static inline void rcu_barrier(void) { + rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */ } -extern void synchronize_sched(void); +#else /* #ifdef CONFIG_TINY_RCU */ -static inline void synchronize_rcu(void) -{ - synchronize_sched(); -} +void synchronize_rcu(void); +void rcu_barrier(void); +void synchronize_rcu_expedited(void); -static inline void synchronize_rcu_bh(void) -{ - synchronize_sched(); -} +#endif /* #else #ifdef CONFIG_TINY_RCU */ -static inline void synchronize_rcu_expedited(void) +static inline void synchronize_rcu_bh(void) { synchronize_sched(); } @@ -117,15 +102,82 @@ static inline void rcu_exit_nohz(void) #endif /* #else #ifdef CONFIG_NO_HZ */ +#ifdef CONFIG_TINY_RCU + +static inline void rcu_preempt_note_context_switch(void) +{ +} + static inline void exit_rcu(void) { } +static inline int rcu_needs_cpu(int cpu) +{ + return 0; +} + static inline int rcu_preempt_depth(void) { return 0; } +#else /* #ifdef CONFIG_TINY_RCU */ + +void rcu_preempt_note_context_switch(void); +extern void exit_rcu(void); +int rcu_preempt_needs_cpu(void); + +static inline int rcu_needs_cpu(int cpu) +{ + return rcu_preempt_needs_cpu(); +} + +/* + * Defined as macro as it is a very low level header + * included from areas that don't even know about current + * FIXME: combine with include/linux/rcutree.h into rcupdate.h. + */ +#define rcu_preempt_depth() (current->rcu_read_lock_nesting) + +#endif /* #else #ifdef CONFIG_TINY_RCU */ + +static inline void rcu_note_context_switch(int cpu) +{ + rcu_sched_qs(cpu); + rcu_preempt_note_context_switch(); +} + +extern void rcu_check_callbacks(int cpu, int user); + +/* + * Return the number of grace periods. + */ +static inline long rcu_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of bottom-half grace periods. + */ +static inline long rcu_batches_completed_bh(void) +{ + return 0; +} + +static inline void rcu_force_quiescent_state(void) +{ +} + +static inline void rcu_bh_force_quiescent_state(void) +{ +} + +static inline void rcu_sched_force_quiescent_state(void) +{ +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC extern int rcu_scheduler_active __read_mostly; diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c0ed1c056f29..c13b85dd22bc 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -95,6 +95,8 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched_expedited(); } +extern void rcu_barrier(void); + extern void rcu_check_callbacks(int cpu, int user); extern long rcu_batches_completed(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c756666c111..e18473f0eb78 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1202,11 +1202,13 @@ struct task_struct { unsigned int policy; cpumask_t cpus_allowed; -#ifdef CONFIG_TREE_PREEMPT_RCU +#ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; char rcu_read_unlock_special; - struct rcu_node *rcu_blocked_node; struct list_head rcu_node_entry; +#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_TREE_PREEMPT_RCU + struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) @@ -1740,7 +1742,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -#ifdef CONFIG_TREE_PREEMPT_RCU +#ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ @@ -1749,7 +1751,9 @@ static inline void rcu_copy_process(struct task_struct *p) { p->rcu_read_lock_nesting = 0; p->rcu_read_unlock_special = 0; +#ifdef CONFIG_TREE_PREEMPT_RCU p->rcu_blocked_node = NULL; +#endif INIT_LIST_HEAD(&p->rcu_node_entry); } diff --git a/init/Kconfig b/init/Kconfig index dbc08baad77e..a619a1ac7f4c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -348,7 +348,7 @@ config TREE_RCU smaller systems. config TREE_PREEMPT_RCU - bool "Preemptable tree-based hierarchical RCU" + bool "Preemptible tree-based hierarchical RCU" depends on PREEMPT help This option selects the RCU implementation that is @@ -366,8 +366,22 @@ config TINY_RCU is not required. This option greatly reduces the memory footprint of RCU. +config TINY_PREEMPT_RCU + bool "Preemptible UP-only small-memory-footprint RCU" + depends on !SMP && PREEMPT + help + This option selects the RCU implementation that is designed + for real-time UP systems. This option greatly reduces the + memory footprint of RCU. + endchoice +config PREEMPT_RCU + def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU ) + help + This option enables preemptible-RCU code that is common between + the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. + config RCU_TRACE bool "Enable tracing for RCU" depends on TREE_RCU || TREE_PREEMPT_RCU diff --git a/kernel/Makefile b/kernel/Makefile index 0b72d1a74be0..17046b6e7c90 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o obj-$(CONFIG_TINY_RCU) += rcutiny.o +obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 196ec02f8be0..d806735342ac 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -59,6 +59,14 @@ int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Forward declarations for rcutiny_plugin.h. */ +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); +static void __call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu), + struct rcu_ctrlblk *rcp); + +#include "rcutiny_plugin.h" + #ifdef CONFIG_NO_HZ static long rcu_dynticks_nesting = 1; @@ -140,6 +148,7 @@ void rcu_check_callbacks(int cpu, int user) rcu_sched_qs(cpu); else if (!in_softirq()) rcu_bh_qs(cpu); + rcu_preempt_check_callbacks(); } /* @@ -162,6 +171,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) *rcp->donetail = NULL; if (rcp->curtail == rcp->donetail) rcp->curtail = &rcp->rcucblist; + rcu_preempt_remove_callbacks(rcp); rcp->donetail = &rcp->rcucblist; local_irq_restore(flags); @@ -182,6 +192,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) { __rcu_process_callbacks(&rcu_sched_ctrlblk); __rcu_process_callbacks(&rcu_bh_ctrlblk); + rcu_preempt_process_callbacks(); } /* @@ -223,15 +234,15 @@ static void __call_rcu(struct rcu_head *head, } /* - * Post an RCU callback to be invoked after the end of an RCU grace + * Post an RCU callback to be invoked after the end of an RCU-sched grace * period. But since we have but one CPU, that would be after any * quiescent state. */ -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { __call_rcu(head, func, &rcu_sched_ctrlblk); } -EXPORT_SYMBOL_GPL(call_rcu); +EXPORT_SYMBOL_GPL(call_rcu_sched); /* * Post an RCU bottom-half callback to be invoked after any subsequent @@ -243,20 +254,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu_bh); -void rcu_barrier(void) -{ - struct rcu_synchronize rcu; - - init_rcu_head_on_stack(&rcu.head); - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); - destroy_rcu_head_on_stack(&rcu.head); -} -EXPORT_SYMBOL_GPL(rcu_barrier); - void rcu_barrier_bh(void) { struct rcu_synchronize rcu; @@ -289,5 +286,3 @@ void __init rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } - -#include "rcutiny_plugin.h" diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index d223a92bc742..e6bc1b447c6c 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -1,7 +1,7 @@ /* - * Read-Copy Update mechanism for mutual exclusion (tree-based version) + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition * Internal non-public definitions that provide either classic - * or preemptable semantics. + * or preemptible semantics. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,11 +17,587 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright IBM Corporation, 2009 + * Copyright (c) 2010 Linaro * * Author: Paul E. McKenney */ +#ifdef CONFIG_TINY_PREEMPT_RCU + +#include + +/* FIXME: merge with definitions in kernel/rcutree.h. */ +#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) +#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) + +/* Global control variables for preemptible RCU. */ +struct rcu_preempt_ctrlblk { + struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */ + struct rcu_head **nexttail; + /* Tasks blocked in a preemptible RCU */ + /* read-side critical section while an */ + /* preemptible-RCU grace period is in */ + /* progress must wait for a later grace */ + /* period. This pointer points to the */ + /* ->next pointer of the last task that */ + /* must wait for a later grace period, or */ + /* to &->rcb.rcucblist if there is no */ + /* such task. */ + struct list_head blkd_tasks; + /* Tasks blocked in RCU read-side critical */ + /* section. Tasks are placed at the head */ + /* of this list and age towards the tail. */ + struct list_head *gp_tasks; + /* Pointer to the first task blocking the */ + /* current grace period, or NULL if there */ + /* is not such task. */ + struct list_head *exp_tasks; + /* Pointer to first task blocking the */ + /* current expedited grace period, or NULL */ + /* if there is no such task. If there */ + /* is no current expedited grace period, */ + /* then there cannot be any such task. */ + u8 gpnum; /* Current grace period. */ + u8 gpcpu; /* Last grace period blocked by the CPU. */ + u8 completed; /* Last grace period completed. */ + /* If all three are equal, RCU is idle. */ +}; + +static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { + .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist, + .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, + .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, + .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), +}; + +static int rcu_preempted_readers_exp(void); +static void rcu_report_exp_done(void); + +/* + * Return true if the CPU has not yet responded to the current grace period. + */ +static int rcu_cpu_cur_gp(void) +{ + return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum; +} + +/* + * Check for a running RCU reader. Because there is only one CPU, + * there can be but one running RCU reader at a time. ;-) + */ +static int rcu_preempt_running_reader(void) +{ + return current->rcu_read_lock_nesting; +} + +/* + * Check for preempted RCU readers blocking any grace period. + * If the caller needs a reliable answer, it must disable hard irqs. + */ +static int rcu_preempt_blocked_readers_any(void) +{ + return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks); +} + +/* + * Check for preempted RCU readers blocking the current grace period. + * If the caller needs a reliable answer, it must disable hard irqs. + */ +static int rcu_preempt_blocked_readers_cgp(void) +{ + return rcu_preempt_ctrlblk.gp_tasks != NULL; +} + +/* + * Return true if another preemptible-RCU grace period is needed. + */ +static int rcu_preempt_needs_another_gp(void) +{ + return *rcu_preempt_ctrlblk.rcb.curtail != NULL; +} + +/* + * Return true if a preemptible-RCU grace period is in progress. + * The caller must disable hardirqs. + */ +static int rcu_preempt_gp_in_progress(void) +{ + return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum; +} + +/* + * Record a preemptible-RCU quiescent state for the specified CPU. Note + * that this just means that the task currently running on the CPU is + * in a quiescent state. There might be any number of tasks blocked + * while in an RCU read-side critical section. + * + * Unlike the other rcu_*_qs() functions, callers to this function + * must disable irqs in order to protect the assignment to + * ->rcu_read_unlock_special. + * + * Because this is a single-CPU implementation, the only way a grace + * period can end is if the CPU is in a quiescent state. The reason is + * that a blocked preemptible-RCU reader can exit its critical section + * only if the CPU is running it at the time. Therefore, when the + * last task blocking the current grace period exits its RCU read-side + * critical section, neither the CPU nor blocked tasks will be stopping + * the current grace period. (In contrast, SMP implementations + * might have CPUs running in RCU read-side critical sections that + * block later grace periods -- but this is not possible given only + * one CPU.) + */ +static void rcu_preempt_cpu_qs(void) +{ + /* Record both CPU and task as having responded to current GP. */ + rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; + current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + + /* + * If there is no GP, or if blocked readers are still blocking GP, + * then there is nothing more to do. + */ + if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) + return; + + /* Advance callbacks. */ + rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; + rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail; + rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail; + + /* If there are no blocked readers, next GP is done instantly. */ + if (!rcu_preempt_blocked_readers_any()) + rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; + + /* If there are done callbacks, make RCU_SOFTIRQ process them. */ + if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Start a new RCU grace period if warranted. Hard irqs must be disabled. + */ +static void rcu_preempt_start_gp(void) +{ + if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) { + + /* Official start of GP. */ + rcu_preempt_ctrlblk.gpnum++; + + /* Any blocked RCU readers block new GP. */ + if (rcu_preempt_blocked_readers_any()) + rcu_preempt_ctrlblk.gp_tasks = + rcu_preempt_ctrlblk.blkd_tasks.next; + + /* If there is no running reader, CPU is done with GP. */ + if (!rcu_preempt_running_reader()) + rcu_preempt_cpu_qs(); + } +} + +/* + * We have entered the scheduler, and the current task might soon be + * context-switched away from. If this task is in an RCU read-side + * critical section, we will no longer be able to rely on the CPU to + * record that fact, so we enqueue the task on the blkd_tasks list. + * If the task started after the current grace period began, as recorded + * by ->gpcpu, we enqueue at the beginning of the list. Otherwise + * before the element referenced by ->gp_tasks (or at the tail if + * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element. + * The task will dequeue itself when it exits the outermost enclosing + * RCU read-side critical section. Therefore, the current grace period + * cannot be permitted to complete until the ->gp_tasks pointer becomes + * NULL. + * + * Caller must disable preemption. + */ +void rcu_preempt_note_context_switch(void) +{ + struct task_struct *t = current; + unsigned long flags; + + local_irq_save(flags); /* must exclude scheduler_tick(). */ + if (rcu_preempt_running_reader() && + (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { + + /* Possibly blocking in an RCU read-side critical section. */ + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; + + /* + * If this CPU has already checked in, then this task + * will hold up the next grace period rather than the + * current grace period. Queue the task accordingly. + * If the task is queued for the current grace period + * (i.e., this CPU has not yet passed through a quiescent + * state for the current grace period), then as long + * as that task remains queued, the current grace period + * cannot end. + */ + list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks); + if (rcu_cpu_cur_gp()) + rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry; + } + + /* + * Either we were not in an RCU read-side critical section to + * begin with, or we have now recorded that critical section + * globally. Either way, we can now note a quiescent state + * for this CPU. Again, if we were in an RCU read-side critical + * section, and if that critical section was blocking the current + * grace period, then the fact that the task has been enqueued + * means that current grace period continues to be blocked. + */ + rcu_preempt_cpu_qs(); + local_irq_restore(flags); +} + +/* + * Tiny-preemptible RCU implementation for rcu_read_lock(). + * Just increment ->rcu_read_lock_nesting, shared state will be updated + * if we block. + */ +void __rcu_read_lock(void) +{ + current->rcu_read_lock_nesting++; + barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */ +} +EXPORT_SYMBOL_GPL(__rcu_read_lock); + +/* + * Handle special cases during rcu_read_unlock(), such as needing to + * notify RCU core processing or task having blocked during the RCU + * read-side critical section. + */ +static void rcu_read_unlock_special(struct task_struct *t) +{ + int empty; + int empty_exp; + unsigned long flags; + struct list_head *np; + int special; + + /* + * NMI handlers cannot block and cannot safely manipulate state. + * They therefore cannot possibly be special, so just leave. + */ + if (in_nmi()) + return; + + local_irq_save(flags); + + /* + * If RCU core is waiting for this CPU to exit critical section, + * let it know that we have done so. + */ + special = t->rcu_read_unlock_special; + if (special & RCU_READ_UNLOCK_NEED_QS) + rcu_preempt_cpu_qs(); + + /* Hardware IRQ handlers cannot block. */ + if (in_irq()) { + local_irq_restore(flags); + return; + } + + /* Clean up if blocked during RCU read-side critical section. */ + if (special & RCU_READ_UNLOCK_BLOCKED) { + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; + + /* + * Remove this task from the ->blkd_tasks list and adjust + * any pointers that might have been referencing it. + */ + empty = !rcu_preempt_blocked_readers_cgp(); + empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; + np = t->rcu_node_entry.next; + if (np == &rcu_preempt_ctrlblk.blkd_tasks) + np = NULL; + list_del(&t->rcu_node_entry); + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) + rcu_preempt_ctrlblk.gp_tasks = np; + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) + rcu_preempt_ctrlblk.exp_tasks = np; + INIT_LIST_HEAD(&t->rcu_node_entry); + + /* + * If this was the last task on the current list, and if + * we aren't waiting on the CPU, report the quiescent state + * and start a new grace period if needed. + */ + if (!empty && !rcu_preempt_blocked_readers_cgp()) { + rcu_preempt_cpu_qs(); + rcu_preempt_start_gp(); + } + + /* + * If this was the last task on the expedited lists, + * then we need wake up the waiting task. + */ + if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) + rcu_report_exp_done(); + } + local_irq_restore(flags); +} + +/* + * Tiny-preemptible RCU implementation for rcu_read_unlock(). + * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost + * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then + * invoke rcu_read_unlock_special() to clean up after a context switch + * in an RCU read-side critical section and other special cases. + */ +void __rcu_read_unlock(void) +{ + struct task_struct *t = current; + + barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ + --t->rcu_read_lock_nesting; + barrier(); /* decrement before load of ->rcu_read_unlock_special */ + if (t->rcu_read_lock_nesting == 0 && + unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) + rcu_read_unlock_special(t); +#ifdef CONFIG_PROVE_LOCKING + WARN_ON_ONCE(t->rcu_read_lock_nesting < 0); +#endif /* #ifdef CONFIG_PROVE_LOCKING */ +} +EXPORT_SYMBOL_GPL(__rcu_read_unlock); + +/* + * Check for a quiescent state from the current CPU. When a task blocks, + * the task is recorded in the rcu_preempt_ctrlblk structure, which is + * checked elsewhere. This is called from the scheduling-clock interrupt. + * + * Caller must disable hard irqs. + */ +static void rcu_preempt_check_callbacks(void) +{ + struct task_struct *t = current; + + if (!rcu_preempt_running_reader() && rcu_preempt_gp_in_progress()) + rcu_preempt_cpu_qs(); + if (&rcu_preempt_ctrlblk.rcb.rcucblist != + rcu_preempt_ctrlblk.rcb.donetail) + raise_softirq(RCU_SOFTIRQ); + if (rcu_preempt_gp_in_progress() && rcu_preempt_running_reader()) + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; +} + +/* + * TINY_PREEMPT_RCU has an extra callback-list tail pointer to + * update, so this is invoked from __rcu_process_callbacks() to + * handle that case. Of course, it is invoked for all flavors of + * RCU, but RCU callbacks can appear only on one of the lists, and + * neither ->nexttail nor ->donetail can possibly be NULL, so there + * is no need for an explicit check. + */ +static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) +{ + if (rcu_preempt_ctrlblk.nexttail == rcp->donetail) + rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist; +} + +/* + * Process callbacks for preemptible RCU. + */ +static void rcu_preempt_process_callbacks(void) +{ + __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); +} + +/* + * Queue a preemptible -RCU callback for invocation after a grace period. + */ +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + unsigned long flags; + + debug_rcu_head_queue(head); + head->func = func; + head->next = NULL; + + local_irq_save(flags); + *rcu_preempt_ctrlblk.nexttail = head; + rcu_preempt_ctrlblk.nexttail = &head->next; + rcu_preempt_start_gp(); /* checks to see if GP needed. */ + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(call_rcu); + +void rcu_barrier(void) +{ + struct rcu_synchronize rcu; + + init_rcu_head_on_stack(&rcu.head); + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); +} +EXPORT_SYMBOL_GPL(rcu_barrier); + +/* + * synchronize_rcu - wait until a grace period has elapsed. + * + * Control will return to the caller some time after a full grace + * period has elapsed, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +void synchronize_rcu(void) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (!rcu_scheduler_active) + return; +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + + WARN_ON_ONCE(rcu_preempt_running_reader()); + if (!rcu_preempt_blocked_readers_any()) + return; + + /* Once we get past the fastpath checks, same code as rcu_barrier(). */ + rcu_barrier(); +} +EXPORT_SYMBOL_GPL(synchronize_rcu); + +static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); +static unsigned long sync_rcu_preempt_exp_count; +static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); + +/* + * Return non-zero if there are any tasks in RCU read-side critical + * sections blocking the current preemptible-RCU expedited grace period. + * If there is no preemptible-RCU expedited grace period currently in + * progress, returns zero unconditionally. + */ +static int rcu_preempted_readers_exp(void) +{ + return rcu_preempt_ctrlblk.exp_tasks != NULL; +} + +/* + * Report the exit from RCU read-side critical section for the last task + * that queued itself during or before the current expedited preemptible-RCU + * grace period. + */ +static void rcu_report_exp_done(void) +{ + wake_up(&sync_rcu_preempt_exp_wq); +} + +/* + * Wait for an rcu-preempt grace period, but expedite it. The basic idea + * is to rely in the fact that there is but one CPU, and that it is + * illegal for a task to invoke synchronize_rcu_expedited() while in a + * preemptible-RCU read-side critical section. Therefore, any such + * critical sections must correspond to blocked tasks, which must therefore + * be on the ->blkd_tasks list. So just record the current head of the + * list in the ->exp_tasks pointer, and wait for all tasks including and + * after the task pointed to by ->exp_tasks to drain. + */ +void synchronize_rcu_expedited(void) +{ + unsigned long flags; + struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk; + unsigned long snap; + + barrier(); /* ensure prior action seen before grace period. */ + + WARN_ON_ONCE(rcu_preempt_running_reader()); + + /* + * Acquire lock so that there is only one preemptible RCU grace + * period in flight. Of course, if someone does the expedited + * grace period for us while we are acquiring the lock, just leave. + */ + snap = sync_rcu_preempt_exp_count + 1; + mutex_lock(&sync_rcu_preempt_exp_mutex); + if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count)) + goto unlock_mb_ret; /* Others did our work for us. */ + + local_irq_save(flags); + + /* + * All RCU readers have to already be on blkd_tasks because + * we cannot legally be executing in an RCU read-side critical + * section. + */ + + /* Snapshot current head of ->blkd_tasks list. */ + rpcp->exp_tasks = rpcp->blkd_tasks.next; + if (rpcp->exp_tasks == &rpcp->blkd_tasks) + rpcp->exp_tasks = NULL; + local_irq_restore(flags); + + /* Wait for tail of ->blkd_tasks list to drain. */ + if (rcu_preempted_readers_exp()) + wait_event(sync_rcu_preempt_exp_wq, + !rcu_preempted_readers_exp()); + + /* Clean up and exit. */ + barrier(); /* ensure expedited GP seen before counter increment. */ + sync_rcu_preempt_exp_count++; +unlock_mb_ret: + mutex_unlock(&sync_rcu_preempt_exp_mutex); + barrier(); /* ensure subsequent action seen after grace period. */ +} +EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); + +/* + * Does preemptible RCU need the CPU to stay out of dynticks mode? + */ +int rcu_preempt_needs_cpu(void) +{ + if (!rcu_preempt_running_reader()) + rcu_preempt_cpu_qs(); + return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; +} + +/* + * Check for a task exiting while in a preemptible -RCU read-side + * critical section, clean up if so. No need to issue warnings, + * as debug_check_no_locks_held() already does this if lockdep + * is enabled. + */ +void exit_rcu(void) +{ + struct task_struct *t = current; + + if (t->rcu_read_lock_nesting == 0) + return; + t->rcu_read_lock_nesting = 1; + rcu_read_unlock(); +} + +#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ + +/* + * Because preemptible RCU does not exist, it never has any callbacks + * to check. + */ +static void rcu_preempt_check_callbacks(void) +{ +} + +/* + * Because preemptible RCU does not exist, it never has any callbacks + * to remove. + */ +static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) +{ +} + +/* + * Because preemptible RCU does not exist, it never has any callbacks + * to process. + */ +static void rcu_preempt_process_callbacks(void) +{ +} + +#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC #include -- cgit v1.2.3 From 9079fd7c2e06a92cf27d05224a1f478581916c5b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 7 Aug 2010 21:59:54 -0700 Subject: rcu: update obsolete rcu_read_lock() comment. The comment says that blocking is illegal in rcu_read_lock()-style RCU read-side critical sections, which is no longer entirely true given preemptible RCU. This commit provides a fix. Suggested-by: David Miller Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 24b896649384..d7af96ef6fcf 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -458,7 +458,20 @@ extern int rcu_my_thread_group_empty(void); * will be deferred until the outermost RCU read-side critical section * completes. * - * It is illegal to block while in an RCU read-side critical section. + * You can avoid reading and understanding the next paragraph by + * following this rule: don't put anything in an rcu_read_lock() RCU + * read-side critical section that would block in a !PREEMPT kernel. + * But if you want the full story, read on! + * + * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it + * is illegal to block while in an RCU read-side critical section. In + * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) + * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may + * be preempted, but explicit blocking is illegal. Finally, in preemptible + * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds, + * RCU read-side critical sections may be preempted and they may also + * block, but only when acquiring spinlocks that are subject to priority + * inheritance. */ static inline void rcu_read_lock(void) { -- cgit v1.2.3 From 53d84e004d5e8c018be395c4330dc72fd60bd13e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Aug 2010 14:28:53 -0700 Subject: rcu: permit suppressing current grace period's CPU stall warnings When using a kernel debugger, a long sojourn in the debugger can get you lots of RCU CPU stall warnings once you resume. This might not be helpful, especially if you are using the system console. This patch therefore allows RCU CPU stall warnings to be suppressed, but only for the duration of the current set of grace periods. This differs from Jason's original patch in that it adds support for tiny RCU and preemptible RCU, and uses a slightly different method for suppressing the RCU CPU stall warning messages. Signed-off-by: Jason Wessel Signed-off-by: Paul E. McKenney Tested-by: Jason Wessel --- include/linux/rcutiny.h | 4 ++++ include/linux/rcutree.h | 1 + kernel/rcutree.c | 20 ++++++++++++++++++++ kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 18 ++++++++++++++++++ 5 files changed, 44 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 4cc5eba41616..3fa179784e18 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -178,6 +178,10 @@ static inline void rcu_sched_force_quiescent_state(void) { } +static inline void rcu_cpu_stall_reset(void) +{ +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC extern int rcu_scheduler_active __read_mostly; diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c13b85dd22bc..0726809497ba 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -36,6 +36,7 @@ extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); +extern void rcu_cpu_stall_reset(void); #ifdef CONFIG_TREE_PREEMPT_RCU diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ff214118e4b8..42140a860bb9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -565,6 +565,22 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) return NOTIFY_DONE; } +/** + * rcu_cpu_stall_reset - prevent further stall warnings in current grace period + * + * Set the stall-warning timeout way off into the future, thus preventing + * any RCU CPU stall-warning messages from appearing in the current set of + * RCU grace periods. + * + * The caller must disable hard irqs. + */ +void rcu_cpu_stall_reset(void) +{ + rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; + rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; + rcu_preempt_stall_reset(); +} + static struct notifier_block rcu_panic_block = { .notifier_call = rcu_panic, }; @@ -584,6 +600,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) { } +void rcu_cpu_stall_reset(void) +{ +} + static void __init check_cpu_stall_init(void) { } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index bb4d08695c45..7abd439a7573 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -372,6 +372,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, #ifdef CONFIG_RCU_CPU_STALL_DETECTOR static void rcu_print_detail_task_stall(struct rcu_state *rsp); static void rcu_print_task_stall(struct rcu_node *rnp); +static void rcu_preempt_stall_reset(void); #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 63bb7714fdeb..561410f70d4a 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -417,6 +417,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp) } } +/* + * Suppress preemptible RCU's CPU stall warnings by pushing the + * time of the next stall-warning message comfortably far into the + * future. + */ +static void rcu_preempt_stall_reset(void) +{ + rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; +} + #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* @@ -867,6 +877,14 @@ static void rcu_print_task_stall(struct rcu_node *rnp) { } +/* + * Because preemptible RCU does not exist, there is no need to suppress + * its CPU stall warnings. + */ +static void rcu_preempt_stall_reset(void) +{ +} + #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* -- cgit v1.2.3 From a3dc3fb161f9b4066c0fce22db72638af8baf83b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Aug 2010 16:16:25 -0700 Subject: rcu: repair code-duplication FIXMEs Combine the duplicate definitions of ULONG_CMP_GE(), ULONG_CMP_LT(), and rcu_preempt_depth() into include/linux/rcupdate.h. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 15 +++++++++++++++ include/linux/rcutiny.h | 7 ------- include/linux/rcutree.h | 6 ------ kernel/rcutiny_plugin.h | 4 ---- kernel/rcutree.h | 3 --- 5 files changed, 15 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d7af96ef6fcf..325bad7bbca9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -47,6 +47,9 @@ extern int rcutorture_runnable; /* for sysctl */ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ +#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) +#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) + /** * struct rcu_head - callback structure for use with RCU * @next: next update requests in a list @@ -66,6 +69,18 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); +#ifdef CONFIG_PREEMPT_RCU + +/* + * Defined as a macro as it is a very low level header included from + * areas that don't even know about current. This gives the rcu_read_lock() + * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other + * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. + */ +#define rcu_preempt_depth() (current->rcu_read_lock_nesting) + +#endif /* #ifdef CONFIG_PREEMPT_RCU */ + #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 3fa179784e18..c6b11dc5ba0a 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -133,13 +133,6 @@ static inline int rcu_needs_cpu(int cpu) return rcu_preempt_needs_cpu(); } -/* - * Defined as macro as it is a very low level header - * included from areas that don't even know about current - * FIXME: combine with include/linux/rcutree.h into rcupdate.h. - */ -#define rcu_preempt_depth() (current->rcu_read_lock_nesting) - #endif /* #else #ifdef CONFIG_TINY_RCU */ static inline void rcu_note_context_switch(int cpu) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 0726809497ba..54a20c11f98d 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -45,12 +45,6 @@ extern void __rcu_read_unlock(void); extern void synchronize_rcu(void); extern void exit_rcu(void); -/* - * Defined as macro as it is a very low level header - * included from areas that don't even know about current - */ -#define rcu_preempt_depth() (current->rcu_read_lock_nesting) - #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ static inline void __rcu_read_lock(void) diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index e6bc1b447c6c..c5bea1137dcb 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -26,10 +26,6 @@ #include -/* FIXME: merge with definitions in kernel/rcutree.h. */ -#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) -#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) - /* Global control variables for preemptible RCU. */ struct rcu_preempt_ctrlblk { struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */ diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7abd439a7573..7918ba61873f 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -272,9 +272,6 @@ struct rcu_data { #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) -#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) - /* * RCU global state, including node hierarchy. This hierarchy is * represented in "heap" form in a dense array. The root (first level) -- cgit v1.2.3 From 73d4da4d360136826b36f78f5cf72b29da82c8a6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Aug 2010 10:50:54 -0700 Subject: rcu: Upgrade srcu_read_lock() docbook about SRCU grace periods It is illegal to wait for an SRCU grace period while within the corresponding flavor of SRCU read-side critical section. Therefore, this commit updates the srcu_read_lock() docbook accordingly. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 6f456a720ff0..58971e891f48 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -139,7 +139,12 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) * @sp: srcu_struct in which to register the new reader. * * Enter an SRCU read-side critical section. Note that SRCU read-side - * critical sections may be nested. + * critical sections may be nested. However, it is illegal to + * call anything that waits on an SRCU grace period for the same + * srcu_struct, whether directly or indirectly. Please note that + * one way to indirectly wait on an SRCU grace period is to acquire + * a mutex that is held elsewhere while calling synchronize_srcu() or + * synchronize_srcu_expedited(). */ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) { -- cgit v1.2.3 From 7b0b759b65247cbc66384a912be9acf8d4800636 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 17 Aug 2010 14:18:46 -0700 Subject: rcu: combine duplicate code, courtesy of CONFIG_PREEMPT_RCU The CONFIG_PREEMPT_RCU kernel configuration parameter was recently re-introduced, but as an indication of the type of RCU (preemptible vs. non-preemptible) instead of as selecting a given implementation. This commit uses CONFIG_PREEMPT_RCU to combine duplicate code from include/linux/rcutiny.h and include/linux/rcutree.h into include/linux/rcupdate.h. This commit also combines a few other pieces of duplicate code that have accumulated. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 75 ++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/rcutiny.h | 51 -------------------------------- include/linux/rcutree.h | 50 -------------------------------- kernel/rcutree_plugin.h | 9 ------ 4 files changed, 72 insertions(+), 113 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 325bad7bbca9..89414d67d961 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -61,16 +61,30 @@ struct rcu_head { }; /* Exported common interfaces */ +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)); +extern void synchronize_sched(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); extern void synchronize_sched_expedited(void); extern int sched_expedited_torture_stats(char *page); -/* Internal to kernel */ -extern void rcu_init(void); +static inline void __rcu_read_lock_bh(void) +{ + local_bh_disable(); +} + +static inline void __rcu_read_unlock_bh(void) +{ + local_bh_enable(); +} #ifdef CONFIG_PREEMPT_RCU +extern void __rcu_read_lock(void); +extern void __rcu_read_unlock(void); +void synchronize_rcu(void); + /* * Defined as a macro as it is a very low level header included from * areas that don't even know about current. This gives the rcu_read_lock() @@ -79,7 +93,53 @@ extern void rcu_init(void); */ #define rcu_preempt_depth() (current->rcu_read_lock_nesting) -#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +static inline void __rcu_read_lock(void) +{ + preempt_disable(); +} + +static inline void __rcu_read_unlock(void) +{ + preempt_enable(); +} + +static inline void synchronize_rcu(void) +{ + synchronize_sched(); +} + +static inline int rcu_preempt_depth(void) +{ + return 0; +} + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + +/* Internal to kernel */ +extern void rcu_init(void); +extern void rcu_sched_qs(int cpu); +extern void rcu_bh_qs(int cpu); +extern void rcu_check_callbacks(int cpu, int user); +struct notifier_block; + +#ifdef CONFIG_NO_HZ + +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +#else /* #ifdef CONFIG_NO_HZ */ + +static inline void rcu_enter_nohz(void) +{ +} + +static inline void rcu_exit_nohz(void) +{ +} + +#endif /* #else #ifdef CONFIG_NO_HZ */ #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include @@ -626,6 +686,8 @@ struct rcu_synchronize { extern void wakeme_after_rcu(struct rcu_head *head); +#ifdef CONFIG_PREEMPT_RCU + /** * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. @@ -642,6 +704,13 @@ extern void wakeme_after_rcu(struct rcu_head *head); extern void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +/* In classic RCU, call_rcu() is just call_rcu_sched(). */ +#define call_rcu call_rcu_sched + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + /** * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index c6b11dc5ba0a..13877cb93a60 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,34 +27,10 @@ #include -void rcu_sched_qs(int cpu); -void rcu_bh_qs(int cpu); - -#ifdef CONFIG_TINY_RCU -#define __rcu_read_lock() preempt_disable() -#define __rcu_read_unlock() preempt_enable() -#else /* #ifdef CONFIG_TINY_RCU */ -void __rcu_read_lock(void); -void __rcu_read_unlock(void); -#endif /* #else #ifdef CONFIG_TINY_RCU */ -#define __rcu_read_lock_bh() local_bh_disable() -#define __rcu_read_unlock_bh() local_bh_enable() -extern void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)); - #define rcu_init_sched() do { } while (0) -extern void synchronize_sched(void); - #ifdef CONFIG_TINY_RCU -#define call_rcu call_rcu_sched - -static inline void synchronize_rcu(void) -{ - synchronize_sched(); -} - static inline void synchronize_rcu_expedited(void) { synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ @@ -67,7 +43,6 @@ static inline void rcu_barrier(void) #else /* #ifdef CONFIG_TINY_RCU */ -void synchronize_rcu(void); void rcu_barrier(void); void synchronize_rcu_expedited(void); @@ -83,25 +58,6 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched(); } -struct notifier_block; - -#ifdef CONFIG_NO_HZ - -extern void rcu_enter_nohz(void); -extern void rcu_exit_nohz(void); - -#else /* #ifdef CONFIG_NO_HZ */ - -static inline void rcu_enter_nohz(void) -{ -} - -static inline void rcu_exit_nohz(void) -{ -} - -#endif /* #else #ifdef CONFIG_NO_HZ */ - #ifdef CONFIG_TINY_RCU static inline void rcu_preempt_note_context_switch(void) @@ -117,11 +73,6 @@ static inline int rcu_needs_cpu(int cpu) return 0; } -static inline int rcu_preempt_depth(void) -{ - return 0; -} - #else /* #ifdef CONFIG_TINY_RCU */ void rcu_preempt_note_context_switch(void); @@ -141,8 +92,6 @@ static inline void rcu_note_context_switch(int cpu) rcu_preempt_note_context_switch(); } -extern void rcu_check_callbacks(int cpu, int user); - /* * Return the number of grace periods. */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 54a20c11f98d..95518e628794 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,59 +30,23 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H -struct notifier_block; - -extern void rcu_sched_qs(int cpu); -extern void rcu_bh_qs(int cpu); extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); extern void rcu_cpu_stall_reset(void); #ifdef CONFIG_TREE_PREEMPT_RCU -extern void __rcu_read_lock(void); -extern void __rcu_read_unlock(void); -extern void synchronize_rcu(void); extern void exit_rcu(void); #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -static inline void __rcu_read_lock(void) -{ - preempt_disable(); -} - -static inline void __rcu_read_unlock(void) -{ - preempt_enable(); -} - -#define synchronize_rcu synchronize_sched - static inline void exit_rcu(void) { } -static inline int rcu_preempt_depth(void) -{ - return 0; -} - #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ -static inline void __rcu_read_lock_bh(void) -{ - local_bh_disable(); -} -static inline void __rcu_read_unlock_bh(void) -{ - local_bh_enable(); -} - -extern void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)); extern void synchronize_rcu_bh(void); -extern void synchronize_sched(void); extern void synchronize_rcu_expedited(void); static inline void synchronize_rcu_bh_expedited(void) @@ -92,8 +56,6 @@ static inline void synchronize_rcu_bh_expedited(void) extern void rcu_barrier(void); -extern void rcu_check_callbacks(int cpu, int user); - extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); extern long rcu_batches_completed_sched(void); @@ -101,18 +63,6 @@ extern void rcu_force_quiescent_state(void); extern void rcu_bh_force_quiescent_state(void); extern void rcu_sched_force_quiescent_state(void); -#ifdef CONFIG_NO_HZ -void rcu_enter_nohz(void); -void rcu_exit_nohz(void); -#else /* CONFIG_NO_HZ */ -static inline void rcu_enter_nohz(void) -{ -} -static inline void rcu_exit_nohz(void) -{ -} -#endif /* CONFIG_NO_HZ */ - /* A context switch is a grace period for RCU-sched and RCU-bh. */ static inline int rcu_blocking_is_gp(void) { diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 561410f70d4a..87f60f06b18e 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -938,15 +938,6 @@ static void rcu_preempt_process_callbacks(void) { } -/* - * In classic RCU, call_rcu() is just call_rcu_sched(). - */ -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) -{ - call_rcu_sched(head, func); -} -EXPORT_SYMBOL_GPL(call_rcu); - /* * Wait for an rcu-preempt grace period, but make it happen quickly. * But because preemptable RCU does not exist, map to rcu-sched. -- cgit v1.2.3 From 65e6bf484c497f02d47a0faae69ee398cd59cfda Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Aug 2010 21:43:09 -0700 Subject: rcu: add comment stating that list_empty() applies to RCU-protected lists Because list_empty() does not dereference any RCU-protected pointers, and further does not pass such pointers to the caller (so that the caller does not dereference them either), it is safe to use list_empty() on RCU-protected lists. There is no need for a list_empty_rcu(). This commit adds a comment stating this explicitly. Requested-by: Andrew Morton Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index c10b1050dbe6..f31ef61f1c65 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -9,6 +9,15 @@ #include #include +/* + * Why is there no list_empty_rcu()? Because list_empty() serves this + * purpose. The list_empty() function fetches the RCU-protected pointer + * and compares it to the address of the list head, but neither dereferences + * this pointer itself nor provides this pointer to the caller. Therefore, + * it is not necessary to use rcu_dereference(), so that list_empty() can + * be used anywhere you would want to use a list_empty_rcu(). + */ + /* * return the ->next pointer of a list_head in an rcu safe * way, we must not access it directly -- cgit v1.2.3 From b35de43b31040828f83046f40fd34ba33146409d Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 19 Aug 2010 14:13:27 -0700 Subject: kfifo: implement missing __kfifo_skip_r() kfifo_skip() is currently broken, due to the missing of the internal helper function. Add it. Signed-off-by: Andrea Righi Cc: Greg KH Acked-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 2 ++ kernel/kfifo.c | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 311f8753d713..4aa95f203f3e 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -836,6 +836,8 @@ extern void __kfifo_dma_out_finish_r(struct __kfifo *fifo, size_t recsize); extern unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize); +extern void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize); + extern unsigned int __kfifo_out_peek_r(struct __kfifo *fifo, void *buf, unsigned int len, size_t recsize); diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 4502604ecadf..6b5580c57644 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -503,6 +503,15 @@ unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf, } EXPORT_SYMBOL(__kfifo_out_r); +void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize) +{ + unsigned int n; + + n = __kfifo_peek_n(fifo, recsize); + fifo->out += n + recsize; +} +EXPORT_SYMBOL(__kfifo_skip_r); + int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from, unsigned long len, unsigned int *copied, size_t recsize) { -- cgit v1.2.3 From f335397d177c906256ee1bba28e8c49e8ec63817 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 17 Aug 2010 21:15:47 -0700 Subject: Input: sysrq - drop tty argument form handle_sysrq() Sysrq operations do not accept tty argument anymore so no need to pass it to us. [Stephen Rothwell : fix build breakage in drm code caused by sysrq using bool but not including linux/types.h] [Sachin Sant : fix build breakage in s390 keyboadr driver] Acked-by: Alan Cox Acked-by: Jason Wessel Acked-by: Greg Kroah-Hartman Signed-off-by: Dmitry Torokhov --- arch/ia64/hp/sim/simserial.c | 2 +- arch/um/drivers/mconsole_kern.c | 2 +- drivers/char/hangcheck-timer.c | 2 +- drivers/char/hvc_console.c | 2 +- drivers/char/hvsi.c | 2 +- drivers/char/sysrq.c | 11 +++++------ drivers/s390/char/ctrlchar.c | 4 +--- drivers/s390/char/keyboard.c | 2 +- drivers/serial/sn_console.c | 2 +- drivers/usb/serial/generic.c | 2 +- drivers/xen/manage.c | 2 +- include/linux/serial_core.h | 2 +- include/linux/sysrq.h | 12 +++++------- kernel/debug/kdb/kdb_main.c | 2 +- 14 files changed, 22 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index 2bef5261d96d..1e8d71ad93ef 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -149,7 +149,7 @@ static void receive_chars(struct tty_struct *tty) ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR); while (!ch); - handle_sysrq(ch, NULL); + handle_sysrq(ch); } #endif seen_esc = 0; diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index de317d0c3294..ebc680717e59 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -690,7 +690,7 @@ static void with_console(struct mc_request *req, void (*proc)(void *), static void sysrq_proc(void *arg) { char *op = arg; - handle_sysrq(*op, NULL); + handle_sysrq(*op); } void mconsole_sysrq(struct mc_request *req) diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c index e0249722d25f..f953c96efc86 100644 --- a/drivers/char/hangcheck-timer.c +++ b/drivers/char/hangcheck-timer.c @@ -159,7 +159,7 @@ static void hangcheck_fire(unsigned long data) if (hangcheck_dump_tasks) { printk(KERN_CRIT "Hangcheck: Task state:\n"); #ifdef CONFIG_MAGIC_SYSRQ - handle_sysrq('t', NULL); + handle_sysrq('t'); #endif /* CONFIG_MAGIC_SYSRQ */ } if (hangcheck_reboot) { diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index fa27d1676ee5..3afd62e856eb 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -651,7 +651,7 @@ int hvc_poll(struct hvc_struct *hp) if (sysrq_pressed) continue; } else if (sysrq_pressed) { - handle_sysrq(buf[i], tty); + handle_sysrq(buf[i]); sysrq_pressed = 0; continue; } diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c index 1f4b6de65a2d..a2bc885ce60a 100644 --- a/drivers/char/hvsi.c +++ b/drivers/char/hvsi.c @@ -403,7 +403,7 @@ static void hvsi_insert_chars(struct hvsi_struct *hp, const char *buf, int len) hp->sysrq = 1; continue; } else if (hp->sysrq) { - handle_sysrq(c, hp->tty); + handle_sysrq(c); hp->sysrq = 0; continue; } diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index a892a3c249dd..ef31bb81e843 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -493,7 +492,7 @@ static void __sysrq_put_key_op(int key, struct sysrq_key_op *op_p) sysrq_key_table[i] = op_p; } -void __handle_sysrq(int key, struct tty_struct *tty, int check_mask) +void __handle_sysrq(int key, bool check_mask) { struct sysrq_key_op *op_p; int orig_log_level; @@ -545,10 +544,10 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask) spin_unlock_irqrestore(&sysrq_key_table_lock, flags); } -void handle_sysrq(int key, struct tty_struct *tty) +void handle_sysrq(int key) { if (sysrq_on()) - __handle_sysrq(key, tty, 1); + __handle_sysrq(key, true); } EXPORT_SYMBOL(handle_sysrq); @@ -597,7 +596,7 @@ static bool sysrq_filter(struct input_handle *handle, unsigned int type, default: if (sysrq_down && value && value != 2) - __handle_sysrq(sysrq_xlate[code], NULL, 1); + __handle_sysrq(sysrq_xlate[code], true); break; } @@ -765,7 +764,7 @@ static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf, if (get_user(c, buf)) return -EFAULT; - __handle_sysrq(c, NULL, 0); + __handle_sysrq(c, false); } return count; diff --git a/drivers/s390/char/ctrlchar.c b/drivers/s390/char/ctrlchar.c index c6cbcb3f925e..0e9a309b9669 100644 --- a/drivers/s390/char/ctrlchar.c +++ b/drivers/s390/char/ctrlchar.c @@ -16,12 +16,11 @@ #ifdef CONFIG_MAGIC_SYSRQ static int ctrlchar_sysrq_key; -static struct tty_struct *sysrq_tty; static void ctrlchar_handle_sysrq(struct work_struct *work) { - handle_sysrq(ctrlchar_sysrq_key, sysrq_tty); + handle_sysrq(ctrlchar_sysrq_key); } static DECLARE_WORK(ctrlchar_work, ctrlchar_handle_sysrq); @@ -54,7 +53,6 @@ ctrlchar_handle(const unsigned char *buf, int len, struct tty_struct *tty) /* racy */ if (len == 3 && buf[1] == '-') { ctrlchar_sysrq_key = buf[2]; - sysrq_tty = tty; schedule_work(&ctrlchar_work); return CTRLCHAR_SYSRQ; } diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c index 18d9a497863b..8cd58e412b5e 100644 --- a/drivers/s390/char/keyboard.c +++ b/drivers/s390/char/keyboard.c @@ -305,7 +305,7 @@ kbd_keycode(struct kbd_data *kbd, unsigned int keycode) if (kbd->sysrq) { if (kbd->sysrq == K(KT_LATIN, '-')) { kbd->sysrq = 0; - handle_sysrq(value, kbd->tty); + handle_sysrq(value); return; } if (value == '-') { diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c index 7e5e5efea4e2..cff9a306660f 100644 --- a/drivers/serial/sn_console.c +++ b/drivers/serial/sn_console.c @@ -492,7 +492,7 @@ sn_receive_chars(struct sn_cons_port *port, unsigned long flags) sysrq_requested = 0; if (ch && time_before(jiffies, sysrq_timeout)) { spin_unlock_irqrestore(&port->sc_port.lock, flags); - handle_sysrq(ch, NULL); + handle_sysrq(ch); spin_lock_irqsave(&port->sc_port.lock, flags); /* ignore actual sysrq command char */ continue; diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index ca92f67747cc..1e846cc3c7a4 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -453,7 +453,7 @@ int usb_serial_handle_sysrq_char(struct tty_struct *tty, { if (port->sysrq && port->port.console) { if (ch && time_before(jiffies, port->sysrq)) { - handle_sysrq(ch, tty); + handle_sysrq(ch); port->sysrq = 0; return 1; } diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 1799bd890315..ef9c7db52077 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -237,7 +237,7 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec, goto again; if (sysrq_key != '\0') - handle_sysrq(sysrq_key, NULL); + handle_sysrq(sysrq_key); } static struct xenbus_watch sysrq_watch = { diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 3c2ad99fed34..64458a9a8938 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -465,7 +465,7 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) #ifdef SUPPORT_SYSRQ if (port->sysrq) { if (ch && time_before(jiffies, port->sysrq)) { - handle_sysrq(ch, port->state->port.tty); + handle_sysrq(ch); port->sysrq = 0; return 1; } diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 4ee650315119..387fa7d05c98 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -15,9 +15,7 @@ #define _LINUX_SYSRQ_H #include - -struct pt_regs; -struct tty_struct; +#include /* Possible values of bitmask for enabling sysrq functions */ /* 0x0001 is reserved for enable everything */ @@ -44,8 +42,8 @@ struct sysrq_key_op { * are available -- else NULL's). */ -void handle_sysrq(int key, struct tty_struct *tty); -void __handle_sysrq(int key, struct tty_struct *tty, int check_mask); +void handle_sysrq(int key); +void __handle_sysrq(int key, bool check_mask); int register_sysrq_key(int key, struct sysrq_key_op *op); int unregister_sysrq_key(int key, struct sysrq_key_op *op); struct sysrq_key_op *__sysrq_get_key_op(int key); @@ -54,11 +52,11 @@ int sysrq_toggle_support(int enable_mask); #else -static inline void handle_sysrq(int key, struct tty_struct *tty) +static inline void handle_sysrq(int key) { } -static inline void __handle_sysrq(int key, struct tty_struct *tty, int check_mask); +static inline void __handle_sysrq(int key, bool check_mask) { } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 28b844118bbd..caf057a3de0e 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1929,7 +1929,7 @@ static int kdb_sr(int argc, const char **argv) if (argc != 1) return KDB_ARGCOUNT; kdb_trap_printk++; - __handle_sysrq(*argv[1], NULL, 0); + __handle_sysrq(*argv[1], false); kdb_trap_printk--; return 0; -- cgit v1.2.3 From 6ee9f4b4affe751d313d2538999aeec134d413a6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 17 Aug 2010 21:15:47 -0700 Subject: USB: drop tty argument from usb_serial_handle_sysrq_char() Since handle_sysrq() does not take tty as argument anymore we can drop it from usb_serial_handle_sysrq_char() as well. Acked-by: Alan Cox Acked-by: Jason Wessel Acked-by: Greg Kroah-Hartman Signed-off-by: Dmitry Torokhov --- drivers/usb/serial/ftdi_sio.c | 2 +- drivers/usb/serial/generic.c | 8 +++----- drivers/usb/serial/pl2303.c | 2 +- drivers/usb/serial/ssu100.c | 2 +- include/linux/usb/serial.h | 3 +-- 5 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index eb12d9b096b4..5d47983b533c 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1831,7 +1831,7 @@ static int ftdi_process_packet(struct tty_struct *tty, if (port->port.console && port->sysrq) { for (i = 0; i < len; i++, ch++) { - if (!usb_serial_handle_sysrq_char(tty, port, *ch)) + if (!usb_serial_handle_sysrq_char(port, *ch)) tty_insert_flip_char(tty, *ch, flag); } } else { diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 1e846cc3c7a4..1abe34c38c08 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -343,7 +343,7 @@ void usb_serial_generic_process_read_urb(struct urb *urb) tty_insert_flip_string(tty, ch, urb->actual_length); else { for (i = 0; i < urb->actual_length; i++, ch++) { - if (!usb_serial_handle_sysrq_char(tty, port, *ch)) + if (!usb_serial_handle_sysrq_char(port, *ch)) tty_insert_flip_char(tty, *ch, TTY_NORMAL); } } @@ -448,8 +448,7 @@ void usb_serial_generic_unthrottle(struct tty_struct *tty) EXPORT_SYMBOL_GPL(usb_serial_generic_unthrottle); #ifdef CONFIG_MAGIC_SYSRQ -int usb_serial_handle_sysrq_char(struct tty_struct *tty, - struct usb_serial_port *port, unsigned int ch) +int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch) { if (port->sysrq && port->port.console) { if (ch && time_before(jiffies, port->sysrq)) { @@ -462,8 +461,7 @@ int usb_serial_handle_sysrq_char(struct tty_struct *tty, return 0; } #else -int usb_serial_handle_sysrq_char(struct tty_struct *tty, - struct usb_serial_port *port, unsigned int ch) +int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch) { return 0; } diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 6b6001822279..34ad7b3d5948 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -788,7 +788,7 @@ static void pl2303_process_read_urb(struct urb *urb) if (port->port.console && port->sysrq) { for (i = 0; i < urb->actual_length; ++i) - if (!usb_serial_handle_sysrq_char(tty, port, data[i])) + if (!usb_serial_handle_sysrq_char(port, data[i])) tty_insert_flip_char(tty, data[i], tty_flag); } else { tty_insert_flip_string_fixed_flag(tty, data, tty_flag, diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c index 6e82d4f54bc8..819de4740388 100644 --- a/drivers/usb/serial/ssu100.c +++ b/drivers/usb/serial/ssu100.c @@ -596,7 +596,7 @@ static int ssu100_process_packet(struct tty_struct *tty, if (port->port.console && port->sysrq) { for (i = 0; i < len; i++, ch++) { - if (!usb_serial_handle_sysrq_char(tty, port, *ch)) + if (!usb_serial_handle_sysrq_char(port, *ch)) tty_insert_flip_char(tty, *ch, flag); } } else diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 84a4c44c208b..55675b1efb28 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -342,8 +342,7 @@ extern int usb_serial_generic_submit_read_urb(struct usb_serial_port *port, extern void usb_serial_generic_process_read_urb(struct urb *urb); extern int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port, void *dest, size_t size); -extern int usb_serial_handle_sysrq_char(struct tty_struct *tty, - struct usb_serial_port *port, +extern int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch); extern int usb_serial_handle_break(struct usb_serial_port *port); -- cgit v1.2.3 From 8905aaafb4b5d9764c5b4b54c7d03eb41bb0a7e9 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 19 Aug 2010 09:52:28 -0700 Subject: Input: uinput - add devname alias to allow module on-demand load Recent modprobe and udev versions allow to create device nodes for modules which are not loaded. Only the first access will cause the in-kernel module loader to pull-in the module. Systems which never access the device node will not needlessly load the module, and no longer need init scripts or other facilities to unconditionally load it. Signed-off-by: Kay Sievers Signed-off-by: Dmitry Torokhov --- drivers/input/misc/uinput.c | 2 ++ include/linux/miscdevice.h | 1 + include/linux/uinput.h | 1 - 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index bb53fd33cd1c..0d4266a533a5 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -811,6 +811,8 @@ static struct miscdevice uinput_misc = { .minor = UINPUT_MINOR, .name = UINPUT_NAME, }; +MODULE_ALIAS_MISCDEV(UINPUT_MINOR); +MODULE_ALIAS("devname:" UINPUT_NAME); static int __init uinput_init(void) { diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index bafffc737903..18fd13028ba1 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -33,6 +33,7 @@ #define MWAVE_MINOR 219 /* ACP/Mwave Modem */ #define MPT_MINOR 220 #define MPT2SAS_MINOR 221 +#define UINPUT_MINOR 223 #define HPET_MINOR 228 #define FUSE_MINOR 229 #define KVM_MINOR 232 diff --git a/include/linux/uinput.h b/include/linux/uinput.h index 60c81da77f0f..05f7fed2b173 100644 --- a/include/linux/uinput.h +++ b/include/linux/uinput.h @@ -37,7 +37,6 @@ #define UINPUT_VERSION 3 #ifdef __KERNEL__ -#define UINPUT_MINOR 223 #define UINPUT_NAME "uinput" #define UINPUT_BUFFER_SIZE 16 #define UINPUT_NUM_REQUESTS 16 -- cgit v1.2.3 From 297c5eee372478fc32fec5fe8eed711eedb13f3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 20 Aug 2010 16:24:55 -0700 Subject: mm: make the vma list be doubly linked It's a really simple list, and several of the users want to go backwards in it to find the previous vma. So rather than have to look up the previous entry with 'find_vma_prev()' or something similar, just make it doubly linked instead. Tested-by: Ian Campbell Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 +- kernel/fork.c | 7 +++++-- mm/mmap.c | 21 +++++++++++++++++---- mm/nommu.c | 7 +++++-- 4 files changed, 28 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b8bb9a6a1f37..ee7e258627f9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -134,7 +134,7 @@ struct vm_area_struct { within vm_mm. */ /* linked list of VM areas per task, sorted by address */ - struct vm_area_struct *vm_next; + struct vm_area_struct *vm_next, *vm_prev; pgprot_t vm_page_prot; /* Access permissions of this VMA. */ unsigned long vm_flags; /* Flags, see mm.h. */ diff --git a/kernel/fork.c b/kernel/fork.c index 856eac3ec52e..b7e9d60a675d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -300,7 +300,7 @@ out: #ifdef CONFIG_MMU static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) { - struct vm_area_struct *mpnt, *tmp, **pprev; + struct vm_area_struct *mpnt, *tmp, *prev, **pprev; struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; @@ -328,6 +328,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (retval) goto out; + prev = NULL; for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { struct file *file; @@ -359,7 +360,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) goto fail_nomem_anon_vma_fork; tmp->vm_flags &= ~VM_LOCKED; tmp->vm_mm = mm; - tmp->vm_next = NULL; + tmp->vm_next = tmp->vm_prev = NULL; file = tmp->vm_file; if (file) { struct inode *inode = file->f_path.dentry->d_inode; @@ -392,6 +393,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) */ *pprev = tmp; pprev = &tmp->vm_next; + tmp->vm_prev = prev; + prev = tmp; __vma_link_rb(mm, tmp, rb_link, rb_parent); rb_link = &tmp->vm_rb.rb_right; diff --git a/mm/mmap.c b/mm/mmap.c index 31003338b978..331e51af38c9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -388,17 +388,23 @@ static inline void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node *rb_parent) { + struct vm_area_struct *next; + + vma->vm_prev = prev; if (prev) { - vma->vm_next = prev->vm_next; + next = prev->vm_next; prev->vm_next = vma; } else { mm->mmap = vma; if (rb_parent) - vma->vm_next = rb_entry(rb_parent, + next = rb_entry(rb_parent, struct vm_area_struct, vm_rb); else - vma->vm_next = NULL; + next = NULL; } + vma->vm_next = next; + if (next) + next->vm_prev = vma; } void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, @@ -483,7 +489,11 @@ static inline void __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev) { - prev->vm_next = vma->vm_next; + struct vm_area_struct *next = vma->vm_next; + + prev->vm_next = next; + if (next) + next->vm_prev = prev; rb_erase(&vma->vm_rb, &mm->mm_rb); if (mm->mmap_cache == vma) mm->mmap_cache = prev; @@ -1915,6 +1925,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr; insertion_point = (prev ? &prev->vm_next : &mm->mmap); + vma->vm_prev = NULL; do { rb_erase(&vma->vm_rb, &mm->mm_rb); mm->map_count--; @@ -1922,6 +1933,8 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, vma = vma->vm_next; } while (vma && vma->vm_start < end); *insertion_point = vma; + if (vma) + vma->vm_prev = prev; tail_vma->vm_next = NULL; if (mm->unmap_area == arch_unmap_area) addr = prev ? prev->vm_end : mm->mmap_base; diff --git a/mm/nommu.c b/mm/nommu.c index efa9a380335e..88ff091eb07a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -604,7 +604,7 @@ static void protect_vma(struct vm_area_struct *vma, unsigned long flags) */ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) { - struct vm_area_struct *pvma, **pp; + struct vm_area_struct *pvma, **pp, *next; struct address_space *mapping; struct rb_node **p, *parent; @@ -664,8 +664,11 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) break; } - vma->vm_next = *pp; + next = *pp; *pp = vma; + vma->vm_next = next; + if (next) + next->vm_prev = vma; } /* -- cgit v1.2.3 From e36c886a0f9d624377977fa6cae309cfd7f362fa Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 21 Aug 2010 13:07:26 -0700 Subject: workqueue: Add basic tracepoints to track workqueue execution With the introduction of the new unified work queue thread pools, we lost one feature: It's no longer possible to know which worker is causing the CPU to wake out of idle. The result is that PowerTOP now reports a lot of "kworker/a:b" instead of more readable results. This patch adds a pair of tracepoints to the new workqueue code, similar in style to the timer/hrtimer tracepoints. With this pair of tracepoints, the next PowerTOP can correctly report which work item caused the wakeup (and how long it took): Interrupt (43) i915 time 3.51ms wakeups 141 Work ieee80211_iface_work time 0.81ms wakeups 29 Work do_dbs_timer time 0.55ms wakeups 24 Process Xorg time 21.36ms wakeups 4 Timer sched_rt_period_timer time 0.01ms wakeups 1 Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- include/trace/events/workqueue.h | 62 ++++++++++++++++++++++++++++++++++++++++ kernel/workqueue.c | 9 ++++++ 2 files changed, 71 insertions(+) create mode 100644 include/trace/events/workqueue.h (limited to 'include') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h new file mode 100644 index 000000000000..49682d7e9d60 --- /dev/null +++ b/include/trace/events/workqueue.h @@ -0,0 +1,62 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM workqueue + +#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_WORKQUEUE_H + +#include +#include + +/** + * workqueue_execute_start - called immediately before the workqueue callback + * @work: pointer to struct work_struct + * + * Allows to track workqueue execution. + */ +TRACE_EVENT(workqueue_execute_start, + + TP_PROTO(struct work_struct *work), + + TP_ARGS(work), + + TP_STRUCT__entry( + __field( void *, work ) + __field( void *, function) + ), + + TP_fast_assign( + __entry->work = work; + __entry->function = work->func; + ), + + TP_printk("work struct %p: function %pf", __entry->work, __entry->function) +); + +/** + * workqueue_execute_end - called immediately before the workqueue callback + * @work: pointer to struct work_struct + * + * Allows to track workqueue execution. + */ +TRACE_EVENT(workqueue_execute_end, + + TP_PROTO(struct work_struct *work), + + TP_ARGS(work), + + TP_STRUCT__entry( + __field( void *, work ) + ), + + TP_fast_assign( + __entry->work = work; + ), + + TP_printk("work struct %p", __entry->work) +); + + +#endif /* _TRACE_WORKQUEUE_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2994a0e3a61c..8bd600c020e5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -35,6 +35,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + #include "workqueue_sched.h" enum { @@ -1790,7 +1793,13 @@ static void process_one_work(struct worker *worker, struct work_struct *work) work_clear_pending(work); lock_map_acquire(&cwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); + trace_workqueue_execute_start(work); f(work); + /* + * While we must be careful to not use "work" after this, the trace + * point will only record its address. + */ + trace_workqueue_execute_end(work); lock_map_release(&lockdep_map); lock_map_release(&cwq->wq->lockdep_map); -- cgit v1.2.3 From 00959ade36acadc00e757f87060bf6e4501d545f Mon Sep 17 00:00:00 2001 From: Dmitry Kozlov Date: Sat, 21 Aug 2010 23:05:39 -0700 Subject: PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol) PPP: introduce "pptp" module which implements point-to-point tunneling protocol using pppox framework NET: introduce the "gre" module for demultiplexing GRE packets on version criteria (required to pptp and ip_gre may coexists) NET: ip_gre: update to use the "gre" module This patch introduces then pptp support to the linux kernel which dramatically speeds up pptp vpn connections and decreases cpu usage in comparison of existing user-space implementation (poptop/pptpclient). There is accel-pptp project (https://sourceforge.net/projects/accel-pptp/) to utilize this module, it contains plugin for pppd to use pptp in client-mode and modified pptpd (poptop) to build high-performance pptp NAS. There was many changes from initial submitted patch, most important are: 1. using rcu instead of read-write locks 2. using static bitmap instead of dynamically allocated 3. using vmalloc for memory allocation instead of BITS_PER_LONG + __get_free_pages 4. fixed many coding style issues Thanks to Eric Dumazet. Signed-off-by: Dmitry Kozlov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- MAINTAINERS | 14 + drivers/net/Kconfig | 11 + drivers/net/Makefile | 1 + drivers/net/pptp.c | 726 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/if_pppox.h | 52 ++-- include/net/gre.h | 18 ++ net/ipv4/Kconfig | 7 + net/ipv4/Makefile | 1 + net/ipv4/gre.c | 151 ++++++++++ net/ipv4/ip_gre.c | 14 +- 10 files changed, 971 insertions(+), 24 deletions(-) create mode 100644 drivers/net/pptp.c create mode 100644 include/net/gre.h create mode 100644 net/ipv4/gre.c (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index b5b8baa1d70e..43c9efcd8e10 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6528,6 +6528,20 @@ M: "Maciej W. Rozycki" S: Maintained F: drivers/serial/zs.* +GRE DEMULTIPLEXER DRIVER +M: Dmitry Kozlov +L: netdev@vger.kernel.org +S: Maintained +F: net/ipv4/gre.c +F: include/net/gre.h + +PPTP DRIVER +M: Dmitry Kozlov +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/pptp.c +W: http://sourceforge.net/projects/accel-pptp + THE REST M: Linus Torvalds L: linux-kernel@vger.kernel.org diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 5a6895320b48..9b2a72089a68 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -3192,6 +3192,17 @@ config PPPOE which contains instruction on how to use this driver (under the heading "Kernel mode PPPoE"). +config PPTP + tristate "PPP over IPv4 (PPTP) (EXPERIMENTAL)" + depends on EXPERIMENTAL && PPP && NET_IPGRE_DEMUX + help + Support for PPP over IPv4.(Point-to-Point Tunneling Protocol) + + This driver requires pppd plugin to work in client mode or + modified pptpd (poptop) to work in server mode. + See http://accel-pptp.sourceforge.net/ for information how to + utilize this module. + config PPPOATM tristate "PPP over ATM" depends on ATM && PPP diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 56e8c27f77ce..0b371083d481 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -162,6 +162,7 @@ obj-$(CONFIG_PPP_BSDCOMP) += bsd_comp.o obj-$(CONFIG_PPP_MPPE) += ppp_mppe.o obj-$(CONFIG_PPPOE) += pppox.o pppoe.o obj-$(CONFIG_PPPOL2TP) += pppox.o +obj-$(CONFIG_PPTP) += pppox.o pptp.o obj-$(CONFIG_SLIP) += slip.o obj-$(CONFIG_SLHC) += slhc.o diff --git a/drivers/net/pptp.c b/drivers/net/pptp.c new file mode 100644 index 000000000000..761f0eced724 --- /dev/null +++ b/drivers/net/pptp.c @@ -0,0 +1,726 @@ +/* + * Point-to-Point Tunneling Protocol for Linux + * + * Authors: Dmitry Kozlov + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#define PPTP_DRIVER_VERSION "0.8.5" + +#define MAX_CALLID 65535 + +static DECLARE_BITMAP(callid_bitmap, MAX_CALLID + 1); +static struct pppox_sock **callid_sock; + +static DEFINE_SPINLOCK(chan_lock); + +static struct proto pptp_sk_proto __read_mostly; +static struct ppp_channel_ops pptp_chan_ops; +static const struct proto_ops pptp_ops; + +#define PPP_LCP_ECHOREQ 0x09 +#define PPP_LCP_ECHOREP 0x0A +#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) + +#define MISSING_WINDOW 20 +#define WRAPPED(curseq, lastseq)\ + ((((curseq) & 0xffffff00) == 0) &&\ + (((lastseq) & 0xffffff00) == 0xffffff00)) + +#define PPTP_GRE_PROTO 0x880B +#define PPTP_GRE_VER 0x1 + +#define PPTP_GRE_FLAG_C 0x80 +#define PPTP_GRE_FLAG_R 0x40 +#define PPTP_GRE_FLAG_K 0x20 +#define PPTP_GRE_FLAG_S 0x10 +#define PPTP_GRE_FLAG_A 0x80 + +#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C) +#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R) +#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K) +#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S) +#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A) + +#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header)) +struct pptp_gre_header { + u8 flags; + u8 ver; + u16 protocol; + u16 payload_len; + u16 call_id; + u32 seq; + u32 ack; +} __packed; + +static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr) +{ + struct pppox_sock *sock; + struct pptp_opt *opt; + + rcu_read_lock(); + sock = rcu_dereference(callid_sock[call_id]); + if (sock) { + opt = &sock->proto.pptp; + if (opt->dst_addr.sin_addr.s_addr != s_addr) + sock = NULL; + else + sock_hold(sk_pppox(sock)); + } + rcu_read_unlock(); + + return sock; +} + +static int lookup_chan_dst(u16 call_id, __be32 d_addr) +{ + struct pppox_sock *sock; + struct pptp_opt *opt; + int i; + + rcu_read_lock(); + for (i = find_next_bit(callid_bitmap, MAX_CALLID, 1); i < MAX_CALLID; + i = find_next_bit(callid_bitmap, MAX_CALLID, i + 1)) { + sock = rcu_dereference(callid_sock[i]); + if (!sock) + continue; + opt = &sock->proto.pptp; + if (opt->dst_addr.call_id == call_id && + opt->dst_addr.sin_addr.s_addr == d_addr) + break; + } + rcu_read_unlock(); + + return i < MAX_CALLID; +} + +static int add_chan(struct pppox_sock *sock) +{ + static int call_id; + + spin_lock(&chan_lock); + if (!sock->proto.pptp.src_addr.call_id) { + call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1); + if (call_id == MAX_CALLID) { + call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1); + if (call_id == MAX_CALLID) + goto out_err; + } + sock->proto.pptp.src_addr.call_id = call_id; + } else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap)) + goto out_err; + + set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap); + rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock); + spin_unlock(&chan_lock); + + return 0; + +out_err: + spin_unlock(&chan_lock); + return -1; +} + +static void del_chan(struct pppox_sock *sock) +{ + spin_lock(&chan_lock); + clear_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap); + rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], NULL); + spin_unlock(&chan_lock); + synchronize_rcu(); +} + +static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) +{ + struct sock *sk = (struct sock *) chan->private; + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + struct pptp_gre_header *hdr; + unsigned int header_len = sizeof(*hdr); + int err = 0; + int islcp; + int len; + unsigned char *data; + __u32 seq_recv; + + + struct rtable *rt; + struct net_device *tdev; + struct iphdr *iph; + int max_headroom; + + if (sk_pppox(po)->sk_state & PPPOX_DEAD) + goto tx_error; + + { + struct flowi fl = { .oif = 0, + .nl_u = { + .ip4_u = { + .daddr = opt->dst_addr.sin_addr.s_addr, + .saddr = opt->src_addr.sin_addr.s_addr, + .tos = RT_TOS(0) } }, + .proto = IPPROTO_GRE }; + err = ip_route_output_key(&init_net, &rt, &fl); + if (err) + goto tx_error; + } + tdev = rt->dst.dev; + + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(*iph) + sizeof(*hdr) + 2; + + if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) { + ip_rt_put(rt); + goto tx_error; + } + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + kfree_skb(skb); + skb = new_skb; + } + + data = skb->data; + islcp = ((data[0] << 8) + data[1]) == PPP_LCP && 1 <= data[2] && data[2] <= 7; + + /* compress protocol field */ + if ((opt->ppp_flags & SC_COMP_PROT) && data[0] == 0 && !islcp) + skb_pull(skb, 1); + + /* Put in the address/control bytes if necessary */ + if ((opt->ppp_flags & SC_COMP_AC) == 0 || islcp) { + data = skb_push(skb, 2); + data[0] = PPP_ALLSTATIONS; + data[1] = PPP_UI; + } + + len = skb->len; + + seq_recv = opt->seq_recv; + + if (opt->ack_sent == seq_recv) + header_len -= sizeof(hdr->ack); + + /* Push down and install GRE header */ + skb_push(skb, header_len); + hdr = (struct pptp_gre_header *)(skb->data); + + hdr->flags = PPTP_GRE_FLAG_K; + hdr->ver = PPTP_GRE_VER; + hdr->protocol = htons(PPTP_GRE_PROTO); + hdr->call_id = htons(opt->dst_addr.call_id); + + hdr->flags |= PPTP_GRE_FLAG_S; + hdr->seq = htonl(++opt->seq_sent); + if (opt->ack_sent != seq_recv) { + /* send ack with this message */ + hdr->ver |= PPTP_GRE_FLAG_A; + hdr->ack = htonl(seq_recv); + opt->ack_sent = seq_recv; + } + hdr->payload_len = htons(len); + + /* Push down and install the IP header. */ + + skb_reset_transport_header(skb); + skb_push(skb, sizeof(*iph)); + skb_reset_network_header(skb); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); + + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + if (ip_dont_fragment(sk, &rt->dst)) + iph->frag_off = htons(IP_DF); + else + iph->frag_off = 0; + iph->protocol = IPPROTO_GRE; + iph->tos = 0; + iph->daddr = rt->rt_dst; + iph->saddr = rt->rt_src; + iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); + iph->tot_len = htons(skb->len); + + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + + nf_reset(skb); + + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(iph, &rt->dst, NULL); + ip_send_check(iph); + + ip_local_out(skb); + +tx_error: + return 1; +} + +static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) +{ + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + int headersize, payload_len, seq; + __u8 *payload; + struct pptp_gre_header *header; + + if (!(sk->sk_state & PPPOX_CONNECTED)) { + if (sock_queue_rcv_skb(sk, skb)) + goto drop; + return NET_RX_SUCCESS; + } + + header = (struct pptp_gre_header *)(skb->data); + + /* test if acknowledgement present */ + if (PPTP_GRE_IS_A(header->ver)) { + __u32 ack = (PPTP_GRE_IS_S(header->flags)) ? + header->ack : header->seq; /* ack in different place if S = 0 */ + + ack = ntohl(ack); + + if (ack > opt->ack_recv) + opt->ack_recv = ack; + /* also handle sequence number wrap-around */ + if (WRAPPED(ack, opt->ack_recv)) + opt->ack_recv = ack; + } + + /* test if payload present */ + if (!PPTP_GRE_IS_S(header->flags)) + goto drop; + + headersize = sizeof(*header); + payload_len = ntohs(header->payload_len); + seq = ntohl(header->seq); + + /* no ack present? */ + if (!PPTP_GRE_IS_A(header->ver)) + headersize -= sizeof(header->ack); + /* check for incomplete packet (length smaller than expected) */ + if (skb->len - headersize < payload_len) + goto drop; + + payload = skb->data + headersize; + /* check for expected sequence number */ + if (seq < opt->seq_recv + 1 || WRAPPED(opt->seq_recv, seq)) { + if ((payload[0] == PPP_ALLSTATIONS) && (payload[1] == PPP_UI) && + (PPP_PROTOCOL(payload) == PPP_LCP) && + ((payload[4] == PPP_LCP_ECHOREQ) || (payload[4] == PPP_LCP_ECHOREP))) + goto allow_packet; + } else { + opt->seq_recv = seq; +allow_packet: + skb_pull(skb, headersize); + + if (payload[0] == PPP_ALLSTATIONS && payload[1] == PPP_UI) { + /* chop off address/control */ + if (skb->len < 3) + goto drop; + skb_pull(skb, 2); + } + + if ((*skb->data) & 1) { + /* protocol is compressed */ + skb_push(skb, 1)[0] = 0; + } + + skb->ip_summed = CHECKSUM_NONE; + skb_set_network_header(skb, skb->head-skb->data); + ppp_input(&po->chan, skb); + + return NET_RX_SUCCESS; + } +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static int pptp_rcv(struct sk_buff *skb) +{ + struct pppox_sock *po; + struct pptp_gre_header *header; + struct iphdr *iph; + + if (skb->pkt_type != PACKET_HOST) + goto drop; + + if (!pskb_may_pull(skb, 12)) + goto drop; + + iph = ip_hdr(skb); + + header = (struct pptp_gre_header *)skb->data; + + if (ntohs(header->protocol) != PPTP_GRE_PROTO || /* PPTP-GRE protocol for PPTP */ + PPTP_GRE_IS_C(header->flags) || /* flag C should be clear */ + PPTP_GRE_IS_R(header->flags) || /* flag R should be clear */ + !PPTP_GRE_IS_K(header->flags) || /* flag K should be set */ + (header->flags&0xF) != 0) /* routing and recursion ctrl = 0 */ + /* if invalid, discard this packet */ + goto drop; + + po = lookup_chan(htons(header->call_id), iph->saddr); + if (po) { + skb_dst_drop(skb); + nf_reset(skb); + return sk_receive_skb(sk_pppox(po), skb, 0); + } +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr, + int sockaddr_len) +{ + struct sock *sk = sock->sk; + struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr; + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + int error = 0; + + lock_sock(sk); + + opt->src_addr = sp->sa_addr.pptp; + if (add_chan(po)) { + release_sock(sk); + error = -EBUSY; + } + + release_sock(sk); + return error; +} + +static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, + int sockaddr_len, int flags) +{ + struct sock *sk = sock->sk; + struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr; + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + struct rtable *rt; + int error = 0; + + if (sp->sa_protocol != PX_PROTO_PPTP) + return -EINVAL; + + if (lookup_chan_dst(sp->sa_addr.pptp.call_id, sp->sa_addr.pptp.sin_addr.s_addr)) + return -EALREADY; + + lock_sock(sk); + /* Check for already bound sockets */ + if (sk->sk_state & PPPOX_CONNECTED) { + error = -EBUSY; + goto end; + } + + /* Check for already disconnected sockets, on attempts to disconnect */ + if (sk->sk_state & PPPOX_DEAD) { + error = -EALREADY; + goto end; + } + + if (!opt->src_addr.sin_addr.s_addr || !sp->sa_addr.pptp.sin_addr.s_addr) { + error = -EINVAL; + goto end; + } + + po->chan.private = sk; + po->chan.ops = &pptp_chan_ops; + + { + struct flowi fl = { + .nl_u = { + .ip4_u = { + .daddr = opt->dst_addr.sin_addr.s_addr, + .saddr = opt->src_addr.sin_addr.s_addr, + .tos = RT_CONN_FLAGS(sk) } }, + .proto = IPPROTO_GRE }; + security_sk_classify_flow(sk, &fl); + if (ip_route_output_key(&init_net, &rt, &fl)) { + error = -EHOSTUNREACH; + goto end; + } + sk_setup_caps(sk, &rt->dst); + } + po->chan.mtu = dst_mtu(&rt->dst); + if (!po->chan.mtu) + po->chan.mtu = PPP_MTU; + ip_rt_put(rt); + po->chan.mtu -= PPTP_HEADER_OVERHEAD; + + po->chan.hdrlen = 2 + sizeof(struct pptp_gre_header); + error = ppp_register_channel(&po->chan); + if (error) { + pr_err("PPTP: failed to register PPP channel (%d)\n", error); + goto end; + } + + opt->dst_addr = sp->sa_addr.pptp; + sk->sk_state = PPPOX_CONNECTED; + + end: + release_sock(sk); + return error; +} + +static int pptp_getname(struct socket *sock, struct sockaddr *uaddr, + int *usockaddr_len, int peer) +{ + int len = sizeof(struct sockaddr_pppox); + struct sockaddr_pppox sp; + + sp.sa_family = AF_PPPOX; + sp.sa_protocol = PX_PROTO_PPTP; + sp.sa_addr.pptp = pppox_sk(sock->sk)->proto.pptp.src_addr; + + memcpy(uaddr, &sp, len); + + *usockaddr_len = len; + + return 0; +} + +static int pptp_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct pppox_sock *po; + struct pptp_opt *opt; + int error = 0; + + if (!sk) + return 0; + + lock_sock(sk); + + if (sock_flag(sk, SOCK_DEAD)) { + release_sock(sk); + return -EBADF; + } + + po = pppox_sk(sk); + opt = &po->proto.pptp; + del_chan(po); + + pppox_unbind_sock(sk); + sk->sk_state = PPPOX_DEAD; + + sock_orphan(sk); + sock->sk = NULL; + + release_sock(sk); + sock_put(sk); + + return error; +} + +static void pptp_sock_destruct(struct sock *sk) +{ + if (!(sk->sk_state & PPPOX_DEAD)) { + del_chan(pppox_sk(sk)); + pppox_unbind_sock(sk); + } + skb_queue_purge(&sk->sk_receive_queue); +} + +static int pptp_create(struct net *net, struct socket *sock) +{ + int error = -ENOMEM; + struct sock *sk; + struct pppox_sock *po; + struct pptp_opt *opt; + + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto); + if (!sk) + goto out; + + sock_init_data(sock, sk); + + sock->state = SS_UNCONNECTED; + sock->ops = &pptp_ops; + + sk->sk_backlog_rcv = pptp_rcv_core; + sk->sk_state = PPPOX_NONE; + sk->sk_type = SOCK_STREAM; + sk->sk_family = PF_PPPOX; + sk->sk_protocol = PX_PROTO_PPTP; + sk->sk_destruct = pptp_sock_destruct; + + po = pppox_sk(sk); + opt = &po->proto.pptp; + + opt->seq_sent = 0; opt->seq_recv = 0; + opt->ack_recv = 0; opt->ack_sent = 0; + + error = 0; +out: + return error; +} + +static int pptp_ppp_ioctl(struct ppp_channel *chan, unsigned int cmd, + unsigned long arg) +{ + struct sock *sk = (struct sock *) chan->private; + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + void __user *argp = (void __user *)arg; + int __user *p = argp; + int err, val; + + err = -EFAULT; + switch (cmd) { + case PPPIOCGFLAGS: + val = opt->ppp_flags; + if (put_user(val, p)) + break; + err = 0; + break; + case PPPIOCSFLAGS: + if (get_user(val, p)) + break; + opt->ppp_flags = val & ~SC_RCV_BITS; + err = 0; + break; + default: + err = -ENOTTY; + } + + return err; +} + +static struct ppp_channel_ops pptp_chan_ops = { + .start_xmit = pptp_xmit, + .ioctl = pptp_ppp_ioctl, +}; + +static struct proto pptp_sk_proto __read_mostly = { + .name = "PPTP", + .owner = THIS_MODULE, + .obj_size = sizeof(struct pppox_sock), +}; + +static const struct proto_ops pptp_ops = { + .family = AF_PPPOX, + .owner = THIS_MODULE, + .release = pptp_release, + .bind = pptp_bind, + .connect = pptp_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = pptp_getname, + .poll = sock_no_poll, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = sock_no_recvmsg, + .mmap = sock_no_mmap, + .ioctl = pppox_ioctl, +}; + +static struct pppox_proto pppox_pptp_proto = { + .create = pptp_create, + .owner = THIS_MODULE, +}; + +static struct gre_protocol gre_pptp_protocol = { + .handler = pptp_rcv, +}; + +static int __init pptp_init_module(void) +{ + int err = 0; + pr_info("PPTP driver version " PPTP_DRIVER_VERSION "\n"); + + callid_sock = __vmalloc((MAX_CALLID + 1) * sizeof(void *), + GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); + if (!callid_sock) { + pr_err("PPTP: cann't allocate memory\n"); + return -ENOMEM; + } + + err = gre_add_protocol(&gre_pptp_protocol, GREPROTO_PPTP); + if (err) { + pr_err("PPTP: can't add gre protocol\n"); + goto out_mem_free; + } + + err = proto_register(&pptp_sk_proto, 0); + if (err) { + pr_err("PPTP: can't register sk_proto\n"); + goto out_gre_del_protocol; + } + + err = register_pppox_proto(PX_PROTO_PPTP, &pppox_pptp_proto); + if (err) { + pr_err("PPTP: can't register pppox_proto\n"); + goto out_unregister_sk_proto; + } + + return 0; + +out_unregister_sk_proto: + proto_unregister(&pptp_sk_proto); +out_gre_del_protocol: + gre_del_protocol(&gre_pptp_protocol, GREPROTO_PPTP); +out_mem_free: + vfree(callid_sock); + + return err; +} + +static void __exit pptp_exit_module(void) +{ + unregister_pppox_proto(PX_PROTO_PPTP); + proto_unregister(&pptp_sk_proto); + gre_del_protocol(&gre_pptp_protocol, GREPROTO_PPTP); + vfree(callid_sock); +} + +module_init(pptp_init_module); +module_exit(pptp_exit_module); + +MODULE_DESCRIPTION("Point-to-Point Tunneling Protocol"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 1925e0c3f162..1525b2156b2a 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -40,25 +40,35 @@ * PPPoE addressing definition */ typedef __be16 sid_t; -struct pppoe_addr{ - sid_t sid; /* Session identifier */ - unsigned char remote[ETH_ALEN]; /* Remote address */ - char dev[IFNAMSIZ]; /* Local device to use */ +struct pppoe_addr { + sid_t sid; /* Session identifier */ + unsigned char remote[ETH_ALEN]; /* Remote address */ + char dev[IFNAMSIZ]; /* Local device to use */ }; /************************************************************************ - * Protocols supported by AF_PPPOX - */ + * PPTP addressing definition + */ +struct pptp_addr { + u16 call_id; + struct in_addr sin_addr; +}; + +/************************************************************************ + * Protocols supported by AF_PPPOX + */ #define PX_PROTO_OE 0 /* Currently just PPPoE */ #define PX_PROTO_OL2TP 1 /* Now L2TP also */ -#define PX_MAX_PROTO 2 - -struct sockaddr_pppox { - sa_family_t sa_family; /* address family, AF_PPPOX */ - unsigned int sa_protocol; /* protocol identifier */ - union{ - struct pppoe_addr pppoe; - }sa_addr; +#define PX_PROTO_PPTP 2 +#define PX_MAX_PROTO 3 + +struct sockaddr_pppox { + sa_family_t sa_family; /* address family, AF_PPPOX */ + unsigned int sa_protocol; /* protocol identifier */ + union { + struct pppoe_addr pppoe; + struct pptp_addr pptp; + } sa_addr; } __packed; /* The use of the above union isn't viable because the size of this @@ -101,7 +111,7 @@ struct pppoe_tag { __be16 tag_type; __be16 tag_len; char tag_data[0]; -} __attribute ((packed)); +} __packed; /* Tag identifiers */ #define PTT_EOL __cpu_to_be16(0x0000) @@ -150,15 +160,23 @@ struct pppoe_opt { relayed to (PPPoE relaying) */ }; +struct pptp_opt { + struct pptp_addr src_addr; + struct pptp_addr dst_addr; + u32 ack_sent, ack_recv; + u32 seq_sent, seq_recv; + int ppp_flags; +}; #include struct pppox_sock { /* struct sock must be the first member of pppox_sock */ - struct sock sk; - struct ppp_channel chan; + struct sock sk; + struct ppp_channel chan; struct pppox_sock *next; /* for hash table */ union { struct pppoe_opt pppoe; + struct pptp_opt pptp; } proto; __be16 num; }; diff --git a/include/net/gre.h b/include/net/gre.h new file mode 100644 index 000000000000..82665474bcb7 --- /dev/null +++ b/include/net/gre.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_GRE_H +#define __LINUX_GRE_H + +#include + +#define GREPROTO_CISCO 0 +#define GREPROTO_PPTP 1 +#define GREPROTO_MAX 2 + +struct gre_protocol { + int (*handler)(struct sk_buff *skb); + void (*err_handler)(struct sk_buff *skb, u32 info); +}; + +int gre_add_protocol(const struct gre_protocol *proto, u8 version); +int gre_del_protocol(const struct gre_protocol *proto, u8 version); + +#endif diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7c3a7d191249..7458bdae7e9f 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -215,8 +215,15 @@ config NET_IPIP be inserted in and removed from the running kernel whenever you want). Most people won't need this and can say N. +config NET_IPGRE_DEMUX + tristate "IP: GRE demultiplexer" + help + This is helper module to demultiplex GRE packets on GRE version field criteria. + Required by ip_gre and pptp modules. + config NET_IPGRE tristate "IP: GRE tunnels over IP" + depends on NET_IPGRE_DEMUX help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 80ff87ce43aa..4978d22f9a75 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o obj-$(CONFIG_NET_IPIP) += ipip.o +obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_AH) += ah4.o diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c new file mode 100644 index 000000000000..b546736da2e1 --- /dev/null +++ b/net/ipv4/gre.c @@ -0,0 +1,151 @@ +/* + * GRE over IPv4 demultiplexer driver + * + * Authors: Dmitry Kozlov (xeb@mail.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly; +static DEFINE_SPINLOCK(gre_proto_lock); + +int gre_add_protocol(const struct gre_protocol *proto, u8 version) +{ + if (version >= GREPROTO_MAX) + goto err_out; + + spin_lock(&gre_proto_lock); + if (gre_proto[version]) + goto err_out_unlock; + + rcu_assign_pointer(gre_proto[version], proto); + spin_unlock(&gre_proto_lock); + return 0; + +err_out_unlock: + spin_unlock(&gre_proto_lock); +err_out: + return -1; +} +EXPORT_SYMBOL_GPL(gre_add_protocol); + +int gre_del_protocol(const struct gre_protocol *proto, u8 version) +{ + if (version >= GREPROTO_MAX) + goto err_out; + + spin_lock(&gre_proto_lock); + if (gre_proto[version] != proto) + goto err_out_unlock; + rcu_assign_pointer(gre_proto[version], NULL); + spin_unlock(&gre_proto_lock); + synchronize_rcu(); + return 0; + +err_out_unlock: + spin_unlock(&gre_proto_lock); +err_out: + return -1; +} +EXPORT_SYMBOL_GPL(gre_del_protocol); + +static int gre_rcv(struct sk_buff *skb) +{ + const struct gre_protocol *proto; + u8 ver; + int ret; + + if (!pskb_may_pull(skb, 12)) + goto drop; + + ver = skb->data[1]&0x7f; + if (ver >= GREPROTO_MAX) + goto drop; + + rcu_read_lock(); + proto = rcu_dereference(gre_proto[ver]); + if (!proto || !proto->handler) + goto drop_unlock; + ret = proto->handler(skb); + rcu_read_unlock(); + return ret; + +drop_unlock: + rcu_read_unlock(); +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static void gre_err(struct sk_buff *skb, u32 info) +{ + const struct gre_protocol *proto; + u8 ver; + + if (!pskb_may_pull(skb, 12)) + goto drop; + + ver = skb->data[1]&0x7f; + if (ver >= GREPROTO_MAX) + goto drop; + + rcu_read_lock(); + proto = rcu_dereference(gre_proto[ver]); + if (!proto || !proto->err_handler) + goto drop_unlock; + proto->err_handler(skb, info); + rcu_read_unlock(); + return; + +drop_unlock: + rcu_read_unlock(); +drop: + kfree_skb(skb); +} + +static const struct net_protocol net_gre_protocol = { + .handler = gre_rcv, + .err_handler = gre_err, + .netns_ok = 1, +}; + +static int __init gre_init(void) +{ + pr_info("GRE over IPv4 demultiplexor driver"); + + if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { + pr_err("gre: can't add protocol\n"); + return -EAGAIN; + } + + return 0; +} + +static void __exit gre_exit(void) +{ + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); +} + +module_init(gre_init); +module_exit(gre_exit); + +MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_LICENSE("GPL"); + diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 945b20a5ad50..85176895495a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -44,6 +44,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6 #include @@ -1278,10 +1279,9 @@ static void ipgre_fb_tunnel_init(struct net_device *dev) } -static const struct net_protocol ipgre_protocol = { - .handler = ipgre_rcv, - .err_handler = ipgre_err, - .netns_ok = 1, +static const struct gre_protocol ipgre_protocol = { + .handler = ipgre_rcv, + .err_handler = ipgre_err, }; static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) @@ -1663,7 +1663,7 @@ static int __init ipgre_init(void) if (err < 0) return err; - err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); + err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); if (err < 0) { printk(KERN_INFO "ipgre init: can't add protocol\n"); goto add_proto_failed; @@ -1683,7 +1683,7 @@ out: tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); add_proto_failed: unregister_pernet_device(&ipgre_net_ops); goto out; @@ -1693,7 +1693,7 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) + if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); unregister_pernet_device(&ipgre_net_ops); } -- cgit v1.2.3 From 81ce790bd75d49a0d119f5d7b27405e1d9b1bd57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Aug 2010 23:51:33 +0000 Subject: irda: use net_device_stats from struct net_device struct net_device has its own struct net_device_stats member, so use this one instead of a private copy in the irlan_cb struct. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/irda/irlan_common.h | 1 - net/irda/irlan/irlan_eth.c | 32 +++++++++----------------------- 2 files changed, 9 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h index 73cacb3ac16c..0af8b8dfbc22 100644 --- a/include/net/irda/irlan_common.h +++ b/include/net/irda/irlan_common.h @@ -171,7 +171,6 @@ struct irlan_cb { int magic; struct list_head dev_list; struct net_device *dev; /* Ethernet device structure*/ - struct net_device_stats stats; __u32 saddr; /* Source device address */ __u32 daddr; /* Destination device address */ diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 5bb8353105cc..8ee1ff6c742f 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -45,13 +45,11 @@ static int irlan_eth_close(struct net_device *dev); static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev); static void irlan_eth_set_multicast_list( struct net_device *dev); -static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev); static const struct net_device_ops irlan_eth_netdev_ops = { .ndo_open = irlan_eth_open, .ndo_stop = irlan_eth_close, .ndo_start_xmit = irlan_eth_xmit, - .ndo_get_stats = irlan_eth_get_stats, .ndo_set_multicast_list = irlan_eth_set_multicast_list, .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, @@ -208,10 +206,10 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, * tried :-) DB */ /* irttp_data_request already free the packet */ - self->stats.tx_dropped++; + dev->stats.tx_dropped++; } else { - self->stats.tx_packets++; - self->stats.tx_bytes += len; + dev->stats.tx_packets++; + dev->stats.tx_bytes += len; } return NETDEV_TX_OK; @@ -226,15 +224,16 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) { struct irlan_cb *self = instance; + struct net_device *dev = self->dev; if (skb == NULL) { - ++self->stats.rx_dropped; + dev->stats.rx_dropped++; return 0; } if (skb->len < ETH_HLEN) { IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n", __func__, skb->len); - ++self->stats.rx_dropped; + dev->stats.rx_dropped++; dev_kfree_skb(skb); return 0; } @@ -244,10 +243,10 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) * might have been previously set by the low level IrDA network * device driver */ - skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */ + skb->protocol = eth_type_trans(skb, dev); /* Remove eth header */ - self->stats.rx_packets++; - self->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; netif_rx(skb); /* Eat it! */ @@ -348,16 +347,3 @@ static void irlan_eth_set_multicast_list(struct net_device *dev) else irlan_set_broadcast_filter(self, FALSE); } - -/* - * Function irlan_get_stats (dev) - * - * Get the current statistics for this device - * - */ -static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev) -{ - struct irlan_cb *self = netdev_priv(dev); - - return &self->stats; -} -- cgit v1.2.3 From 739a91ef0625e0e4a40b835f4f891313c47915df Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 21 Aug 2010 06:23:15 +0000 Subject: net_sched: cls_flow: add key rxhash We can use rxhash to classify the traffic into flows. As rxhash maybe supplied by NIC or RPS, it is cheaper. Signed-off-by: Changli Gao Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/pkt_cls.h | 1 + net/sched/cls_flow.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 7f6ba8658abe..defbde203d07 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -332,6 +332,7 @@ enum { FLOW_KEY_SKUID, FLOW_KEY_SKGID, FLOW_KEY_VLAN_TAG, + FLOW_KEY_RXHASH, __FLOW_KEY_MAX, }; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index cd709f1294df..5b271a18bc3a 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -306,6 +306,11 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb) return tag & VLAN_VID_MASK; } +static u32 flow_get_rxhash(struct sk_buff *skb) +{ + return skb_get_rxhash(skb); +} + static u32 flow_key_get(struct sk_buff *skb, int key) { switch (key) { @@ -343,6 +348,8 @@ static u32 flow_key_get(struct sk_buff *skb, int key) return flow_get_skgid(skb); case FLOW_KEY_VLAN_TAG: return flow_get_vlan_tag(skb); + case FLOW_KEY_RXHASH: + return flow_get_rxhash(skb); default: WARN_ON(1); return 0; -- cgit v1.2.3 From 2eebf582c9b3106abb9c33f4fc0a347fb9391037 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 18 Aug 2010 12:25:50 -0400 Subject: fanotify: flush outstanding perm requests on group destroy When an fanotify listener is closing it may cause a deadlock between the listener and the original task doing an fs operation. If the original task is waiting for a permissions response it will be holding the srcu lock. The listener cannot clean up and exit until after that srcu lock is syncronized. Thus deadlock. The fix introduced here is to stop accepting new permissions events when a listener is shutting down and to grant permission for all outstanding events. Thus the original task will eventually release the srcu lock and the listener can complete shutdown. Reported-by: Andreas Gruenbacher Cc: Andreas Gruenbacher Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 27 +++++++++++++++++++++++++++ include/linux/fanotify.h | 7 ------- include/linux/fsnotify_backend.h | 1 + 3 files changed, 28 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 032b837fcd11..b966b7230f47 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -195,6 +195,14 @@ static int prepare_for_access_response(struct fsnotify_group *group, re->fd = fd; mutex_lock(&group->fanotify_data.access_mutex); + + if (group->fanotify_data.bypass_perm) { + mutex_unlock(&group->fanotify_data.access_mutex); + kmem_cache_free(fanotify_response_event_cache, re); + event->response = FAN_ALLOW; + return 0; + } + list_add_tail(&re->list, &group->fanotify_data.access_list); mutex_unlock(&group->fanotify_data.access_mutex); @@ -364,9 +372,28 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; + struct fanotify_response_event *re, *lre; pr_debug("%s: file=%p group=%p\n", __func__, file, group); +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + mutex_lock(&group->fanotify_data.access_mutex); + + group->fanotify_data.bypass_perm = true; + + list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { + pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, + re, re->event); + + list_del_init(&re->list); + re->event->response = FAN_ALLOW; + + kmem_cache_free(fanotify_response_event_cache, re); + } + mutex_unlock(&group->fanotify_data.access_mutex); + + wake_up(&group->fanotify_data.access_waitq); +#endif /* matches the fanotify_init->fsnotify_alloc_group */ fsnotify_put_group(group); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index f0949a57ca9d..985435622ecd 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -95,11 +95,4 @@ struct fanotify_response { (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ (long)(meta)->event_len <= (long)(len)) -#ifdef __KERNEL__ - -struct fanotify_wait { - struct fsnotify_event *event; - __s32 fd; -}; -#endif /* __KERNEL__ */ #endif /* _LINUX_FANOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index ed36fb57c426..e40190d16878 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -156,6 +156,7 @@ struct fsnotify_group { struct mutex access_mutex; struct list_head access_list; wait_queue_head_t access_waitq; + bool bypass_perm; /* protected by access_mutex */ #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; } fanotify_data; -- cgit v1.2.3 From d8287fc864643beaf1623c92aceb1ab38eae0648 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 18:37:27 -0700 Subject: net: use __be16 instead of u16 for the userspace code Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/if_pppox.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 1525b2156b2a..770e8fa669d2 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -50,8 +50,8 @@ struct pppoe_addr { * PPTP addressing definition */ struct pptp_addr { - u16 call_id; - struct in_addr sin_addr; + __be16 call_id; + struct in_addr sin_addr; }; /************************************************************************ -- cgit v1.2.3 From 05532121da0728eaedac2a0a5c3cecad3a95d765 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 21:03:33 -0700 Subject: net: 802.1q: make vlan_hwaccel_do_receive() return void vlan_hwaccel_do_receive() always returns 0, so make it return void. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 5 ++--- net/8021q/vlan_core.c | 3 +-- net/core/dev.c | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 3d870fda8c4f..a52320751bfc 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -119,7 +119,7 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); -extern int vlan_hwaccel_do_receive(struct sk_buff *skb); +extern void vlan_hwaccel_do_receive(struct sk_buff *skb); extern gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct sk_buff *skb); @@ -147,9 +147,8 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, return NET_XMIT_SUCCESS; } -static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) +static inline void vlan_hwaccel_do_receive(struct sk_buff *skb) { - return 0; } static inline gro_result_t diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 01ddb0472f86..07eeb5b99dce 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -35,7 +35,7 @@ drop: } EXPORT_SYMBOL(__vlan_hwaccel_rx); -int vlan_hwaccel_do_receive(struct sk_buff *skb) +void vlan_hwaccel_do_receive(struct sk_buff *skb) { struct net_device *dev = skb->dev; struct vlan_rx_stats *rx_stats; @@ -69,7 +69,6 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb) break; } u64_stats_update_end(&rx_stats->syncp); - return 0; } struct net_device *vlan_dev_real_dev(const struct net_device *dev) diff --git a/net/core/dev.c b/net/core/dev.c index 7cd5237d9822..d569f88bcf80 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2841,8 +2841,8 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!netdev_tstamp_prequeue) net_timestamp_check(skb); - if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) - return NET_RX_SUCCESS; + if (vlan_tx_tag_present(skb)) + vlan_hwaccel_do_receive(skb); /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) -- cgit v1.2.3 From fcb12fd2236f49aa8fdc1568ed4ebdfe4fddc6b5 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 16:41:59 +0000 Subject: net: rds: remove duplication type definitions __be* are defined in linux/types.h now, and in fact, rds.h isn't exported to user space even. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/rds.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 24bce3ded9ea..7f3971d9fc5c 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -36,15 +36,6 @@ #include -/* These sparse annotated types shouldn't be in any user - * visible header file. We should clean this up rather - * than kludging around them. */ -#ifndef __KERNEL__ -#define __be16 u_int16_t -#define __be32 u_int32_t -#define __be64 u_int64_t -#endif - #define RDS_IB_ABI_VERSION 0x301 /* -- cgit v1.2.3 From 09cd2b99c6cdd1e14e84c1febca2fb91e9f4e5ba Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 22 Aug 2010 17:25:05 +0000 Subject: header: fix broken headers for user space __packed is only defined in kernel space, so we should use __attribute__((packed)) for the code shared between kernel and user space. Two __attribute() annotations are replaced with __attribute__() too. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/if_ether.h | 2 +- include/linux/if_fddi.h | 8 ++++---- include/linux/if_hippi.h | 8 ++++---- include/linux/if_pppox.h | 10 +++++----- include/linux/ipv6.h | 4 ++-- include/linux/nbd.h | 2 +- include/linux/ncp.h | 10 +++++----- include/linux/netfilter/xt_IDLETIMER.h | 2 +- include/linux/phonet.h | 4 ++-- include/linux/rfkill.h | 2 +- 10 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index c831467774d0..bed7a4682b90 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -119,7 +119,7 @@ struct ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ unsigned char h_source[ETH_ALEN]; /* source ether addr */ __be16 h_proto; /* packet type ID field */ -} __packed; +} __attribute__((packed)); #ifdef __KERNEL__ #include diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h index 9947c39e62f6..e6dc11e7f9a5 100644 --- a/include/linux/if_fddi.h +++ b/include/linux/if_fddi.h @@ -67,7 +67,7 @@ struct fddi_8022_1_hdr { __u8 dsap; /* destination service access point */ __u8 ssap; /* source service access point */ __u8 ctrl; /* control byte #1 */ -} __packed; +} __attribute__((packed)); /* Define 802.2 Type 2 header */ struct fddi_8022_2_hdr { @@ -75,7 +75,7 @@ struct fddi_8022_2_hdr { __u8 ssap; /* source service access point */ __u8 ctrl_1; /* control byte #1 */ __u8 ctrl_2; /* control byte #2 */ -} __packed; +} __attribute__((packed)); /* Define 802.2 SNAP header */ #define FDDI_K_OUI_LEN 3 @@ -85,7 +85,7 @@ struct fddi_snap_hdr { __u8 ctrl; /* always 0x03 */ __u8 oui[FDDI_K_OUI_LEN]; /* organizational universal id */ __be16 ethertype; /* packet type ID field */ -} __packed; +} __attribute__((packed)); /* Define FDDI LLC frame header */ struct fddihdr { @@ -98,7 +98,7 @@ struct fddihdr { struct fddi_8022_2_hdr llc_8022_2; struct fddi_snap_hdr llc_snap; } hdr; -} __packed; +} __attribute__((packed)); #ifdef __KERNEL__ #include diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h index 5fe5f307c6f5..cdc049f1829a 100644 --- a/include/linux/if_hippi.h +++ b/include/linux/if_hippi.h @@ -104,7 +104,7 @@ struct hippi_fp_hdr { __be32 fixed; #endif __be32 d2_size; -} __packed; +} __attribute__((packed)); struct hippi_le_hdr { #if defined (__BIG_ENDIAN_BITFIELD) @@ -129,7 +129,7 @@ struct hippi_le_hdr { __u8 daddr[HIPPI_ALEN]; __u16 locally_administered; __u8 saddr[HIPPI_ALEN]; -} __packed; +} __attribute__((packed)); #define HIPPI_OUI_LEN 3 /* @@ -142,12 +142,12 @@ struct hippi_snap_hdr { __u8 ctrl; /* always 0x03 */ __u8 oui[HIPPI_OUI_LEN]; /* organizational universal id (zero)*/ __be16 ethertype; /* packet type ID field */ -} __packed; +} __attribute__((packed)); struct hippi_hdr { struct hippi_fp_hdr fp; struct hippi_le_hdr le; struct hippi_snap_hdr snap; -} __packed; +} __attribute__((packed)); #endif /* _LINUX_IF_HIPPI_H */ diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 1925e0c3f162..27741e05446f 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -59,7 +59,7 @@ struct sockaddr_pppox { union{ struct pppoe_addr pppoe; }sa_addr; -} __packed; +} __attribute__((packed)); /* The use of the above union isn't viable because the size of this * struct must stay fixed over time -- applications use sizeof(struct @@ -70,7 +70,7 @@ struct sockaddr_pppol2tp { sa_family_t sa_family; /* address family, AF_PPPOX */ unsigned int sa_protocol; /* protocol identifier */ struct pppol2tp_addr pppol2tp; -} __packed; +} __attribute__((packed)); /* The L2TPv3 protocol changes tunnel and session ids from 16 to 32 * bits. So we need a different sockaddr structure. @@ -79,7 +79,7 @@ struct sockaddr_pppol2tpv3 { sa_family_t sa_family; /* address family, AF_PPPOX */ unsigned int sa_protocol; /* protocol identifier */ struct pppol2tpv3_addr pppol2tp; -} __packed; +} __attribute__((packed)); /********************************************************************* * @@ -101,7 +101,7 @@ struct pppoe_tag { __be16 tag_type; __be16 tag_len; char tag_data[0]; -} __attribute ((packed)); +} __attribute__ ((packed)); /* Tag identifiers */ #define PTT_EOL __cpu_to_be16(0x0000) @@ -129,7 +129,7 @@ struct pppoe_hdr { __be16 sid; __be16 length; struct pppoe_tag tag[0]; -} __packed; +} __attribute__((packed)); /* Length of entire PPPoE + PPP header */ #define PPPOE_SES_HLEN 8 diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ab9e9e89e407..e62683ba88e6 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -58,7 +58,7 @@ struct ipv6_opt_hdr { /* * TLV encoded option data follows. */ -} __packed; /* required for some archs */ +} __attribute__((packed)); /* required for some archs */ #define ipv6_destopt_hdr ipv6_opt_hdr #define ipv6_hopopt_hdr ipv6_opt_hdr @@ -99,7 +99,7 @@ struct ipv6_destopt_hao { __u8 type; __u8 length; struct in6_addr addr; -} __packed; +} __attribute__((packed)); /* * IPv6 fixed header diff --git a/include/linux/nbd.h b/include/linux/nbd.h index bb58854a8061..d146ca10c0f5 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -88,7 +88,7 @@ struct nbd_request { char handle[8]; __be64 from; __be32 len; -} __packed; +} __attribute__((packed)); /* * This is the reply packet that nbd-server sends back to the client after diff --git a/include/linux/ncp.h b/include/linux/ncp.h index 3ace8370e61e..99f0adeeb3f3 100644 --- a/include/linux/ncp.h +++ b/include/linux/ncp.h @@ -27,7 +27,7 @@ struct ncp_request_header { __u8 conn_high; __u8 function; __u8 data[0]; -} __packed; +} __attribute__((packed)); #define NCP_REPLY (0x3333) #define NCP_WATCHDOG (0x3E3E) @@ -42,7 +42,7 @@ struct ncp_reply_header { __u8 completion_code; __u8 connection_state; __u8 data[0]; -} __packed; +} __attribute__((packed)); #define NCP_VOLNAME_LEN (16) #define NCP_NUMBER_OF_VOLUMES (256) @@ -158,7 +158,7 @@ struct nw_info_struct { #ifdef __KERNEL__ struct nw_nfs_info nfs; #endif -} __packed; +} __attribute__((packed)); /* modify mask - use with MODIFY_DOS_INFO structure */ #define DM_ATTRIBUTES (cpu_to_le32(0x02)) @@ -190,12 +190,12 @@ struct nw_modify_dos_info { __u16 inheritanceGrantMask; __u16 inheritanceRevokeMask; __u32 maximumSpace; -} __packed; +} __attribute__((packed)); struct nw_search_sequence { __u8 volNumber; __u32 dirBase; __u32 sequence; -} __packed; +} __attribute__((packed)); #endif /* _LINUX_NCP_H */ diff --git a/include/linux/netfilter/xt_IDLETIMER.h b/include/linux/netfilter/xt_IDLETIMER.h index 3e1aa1be942e..208ae9387331 100644 --- a/include/linux/netfilter/xt_IDLETIMER.h +++ b/include/linux/netfilter/xt_IDLETIMER.h @@ -39,7 +39,7 @@ struct idletimer_tg_info { char label[MAX_IDLETIMER_LABEL_SIZE]; /* for kernel module internal use only */ - struct idletimer_tg *timer __attribute((aligned(8))); + struct idletimer_tg *timer __attribute__((aligned(8))); }; #endif diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 24426c3d6b5a..76edadf046d3 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -56,7 +56,7 @@ struct phonethdr { __be16 pn_length; __u8 pn_robj; __u8 pn_sobj; -} __packed; +} __attribute__((packed)); /* Common Phonet payload header */ struct phonetmsg { @@ -98,7 +98,7 @@ struct sockaddr_pn { __u8 spn_dev; __u8 spn_resource; __u8 spn_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - 3]; -} __packed; +} __attribute__((packed)); /* Well known address */ #define PN_DEV_PC 0x10 diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 4f82326eb294..08c32e4f261a 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -81,7 +81,7 @@ struct rfkill_event { __u8 type; __u8 op; __u8 soft, hard; -} __packed; +} __attribute__((packed)); /* * We are planning to be backward and forward compatible with changes -- cgit v1.2.3 From 21dc330157454046dd7c494961277d76e1c957fe Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 23 Aug 2010 00:13:46 -0700 Subject: net: Rename skb_has_frags to skb_has_frag_list SKBs can be "fragmented" in two ways, via a page array (called skb_shinfo(skb)->frags[]) and via a list of SKBs (called skb_shinfo(skb)->frag_list). Since skb_has_frags() tests the latter, it's name is confusing since it sounds more like it's testing the former. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/linux/skbuff.h | 4 ++-- net/core/dev.c | 4 ++-- net/core/skbuff.c | 18 +++++++++--------- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46c36ffe20ee..ce2de8b64083 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2191,7 +2191,7 @@ static inline int net_gso_ok(int features, int gso_type) static inline int skb_gso_ok(struct sk_buff *skb, int features) { return net_gso_ok(features, skb_shinfo(skb)->gso_type) && - (!skb_has_frags(skb) || (features & NETIF_F_FRAGLIST)); + (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); } static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f067c95cf18a..f900ffcd847e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1120,7 +1120,7 @@ extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size); #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) -#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frags(skb)) +#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb)) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) #ifdef NET_SKBUFF_DATA_USES_OFFSET @@ -1784,7 +1784,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) skb = skb->prev) -static inline bool skb_has_frags(const struct sk_buff *skb) +static inline bool skb_has_frag_list(const struct sk_buff *skb) { return skb_shinfo(skb)->frag_list != NULL; } diff --git a/net/core/dev.c b/net/core/dev.c index d569f88bcf80..859e30ff044a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1930,7 +1930,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb, struct net_device *dev) { return skb_is_nonlinear(skb) && - ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || + ((skb_has_frag_list(skb) && !(dev->features & NETIF_F_FRAGLIST)) || (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)))); } @@ -3090,7 +3090,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) goto normal; - if (skb_is_gso(skb) || skb_has_frags(skb)) + if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; rcu_read_lock(); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 99ef721f773d..e2535fb4985d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -340,7 +340,7 @@ static void skb_release_data(struct sk_buff *skb) put_page(skb_shinfo(skb)->frags[i].page); } - if (skb_has_frags(skb)) + if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); kfree(skb->head); @@ -759,7 +759,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) skb_shinfo(n)->nr_frags = i; } - if (skb_has_frags(skb)) { + if (skb_has_frag_list(skb)) { skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; skb_clone_fraglist(n); } @@ -822,7 +822,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); - if (skb_has_frags(skb)) + if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); skb_release_data(skb); @@ -1099,7 +1099,7 @@ drop_pages: for (; i < nfrags; i++) put_page(skb_shinfo(skb)->frags[i].page); - if (skb_has_frags(skb)) + if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); goto done; } @@ -1194,7 +1194,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) /* Optimization: no fragments, no reasons to preestimate * size of pulled pages. Superb. */ - if (!skb_has_frags(skb)) + if (!skb_has_frag_list(skb)) goto pull_pages; /* Estimate size of pulled pages. */ @@ -2323,7 +2323,7 @@ next_skb: st->frag_data = NULL; } - if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) { + if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) { st->cur_skb = skb_shinfo(st->root_skb)->frag_list; st->frag_idx = 0; goto next_skb; @@ -2889,7 +2889,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) return -ENOMEM; /* Easy case. Most of packets will go this way. */ - if (!skb_has_frags(skb)) { + if (!skb_has_frag_list(skb)) { /* A little of trouble, not enough of space for trailer. * This should not happen, when stack is tuned to generate * good frames. OK, on miss we reallocate and reserve even more @@ -2924,7 +2924,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) if (skb1->next == NULL && tailbits) { if (skb_shinfo(skb1)->nr_frags || - skb_has_frags(skb1) || + skb_has_frag_list(skb1) || skb_tailroom(skb1) < tailbits) ntail = tailbits + 128; } @@ -2933,7 +2933,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) skb_cloned(skb1) || ntail || skb_shinfo(skb1)->nr_frags || - skb_has_frags(skb1)) { + skb_has_frag_list(skb1)) { struct sk_buff *skb2; /* Fuck, we are miserable poor guys... */ diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b7c41654dde5..f4dc879e258e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -542,7 +542,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ - if (skb_has_frags(head)) { + if (skb_has_frag_list(head)) { struct sk_buff *clone; int i, plen = 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e807492f1777..6d2753c7ffdd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -487,7 +487,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) * LATER: this step can be merged to real generation of fragments, * we can switch to copy when see the first bad fragment. */ - if (skb_has_frags(skb)) { + if (skb_has_frag_list(skb)) { struct sk_buff *frag; int first_len = skb_pagelen(skb); int truesizes = 0; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d40b330c0ee6..1838927a2243 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -637,7 +637,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } mtu -= hlen + sizeof(struct frag_hdr); - if (skb_has_frags(skb)) { + if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); int truesizes = 0; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 13ef5bc05cf5..089c598773c7 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -413,7 +413,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ - if (skb_has_frags(head)) { + if (skb_has_frag_list(head)) { struct sk_buff *clone; int i, plen = 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 545c4141b755..8aea3f3f18d7 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -499,7 +499,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ - if (skb_has_frags(head)) { + if (skb_has_frag_list(head)) { struct sk_buff *clone; int i, plen = 0; -- cgit v1.2.3 From c93a4dfb31f2c023da3ad1238c352452f2cc0e05 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 23 Aug 2010 11:59:28 +0100 Subject: xen: pvhvm: allow user to request no emulated device unplug this allows the user to disable pvhvm and revert to emulated devices in case of a system misconfiguration (e.g. initramfs with only emulated drivers in it). Signed-off-by: Ian Campbell Acked-by: Jeremy Fitzhardinge Acked-by: Stefano Stabellini --- Documentation/kernel-parameters.txt | 1 + arch/x86/xen/platform-pci-unplug.c | 5 +++++ include/xen/platform_pci.h | 1 + 3 files changed, 7 insertions(+) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2c85c0692b01..8bbe83b9d0b2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2631,6 +2631,7 @@ and is between 256 and 4096 characters. It is defined in the file all -- unplug all emulated devices (NICs and IDE disks) ignore -- continue loading the Xen platform PCI driver even if the version check failed + never -- do not unplug even if version check succeeds xirc2ps_cs= [NET,PCMCIA] Format: diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 554c002a1e1a..070dfa0654bd 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -72,6 +72,9 @@ void __init xen_unplug_emulated_devices(void) { int r; + /* user explicitly requested no unplug */ + if (xen_emul_unplug & XEN_UNPLUG_NEVER) + return; /* check the version of the xen platform PCI device */ r = check_platform_magic(); /* If the version matches enable the Xen platform PCI driver. @@ -127,6 +130,8 @@ static int __init parse_xen_emul_unplug(char *arg) xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; else if (!strncmp(p, "ignore", l)) xen_emul_unplug |= XEN_UNPLUG_IGNORE; + else if (!strncmp(p, "never", l)) + xen_emul_unplug |= XEN_UNPLUG_NEVER; else printk(KERN_WARNING "unrecognised option '%s' " "in parameter 'xen_emul_unplug'\n", p); diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h index ce9d671c636c..123b7752fa6a 100644 --- a/include/xen/platform_pci.h +++ b/include/xen/platform_pci.h @@ -21,6 +21,7 @@ #define XEN_UNPLUG_AUX_IDE_DISKS 4 #define XEN_UNPLUG_ALL 7 #define XEN_UNPLUG_IGNORE 8 +#define XEN_UNPLUG_NEVER 16 static inline int xen_must_unplug_nics(void) { #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \ -- cgit v1.2.3 From 1dc7ce99b091a11cce0f34456c1ffcb928f17edd Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 23 Aug 2010 11:59:29 +0100 Subject: xen: pvhvm: rename xen_emul_unplug=ignore to =unnnecessary It is not immediately clear what this option causes to become ignored. The actual meaning is that it is not necessary to unplug the emulated devices to safely use the PV ones, even if the platform does not support the unplug protocol. (pressumably the user will only add this option if they have ensured that their domain configuration is safe). I think xen_emul_unplug=unnecessary better captures this. Signed-off-by: Ian Campbell Acked-by: Jeremy Fitzhardinge Acked-by: Stefano Stabellini --- Documentation/kernel-parameters.txt | 5 +++-- arch/x86/xen/platform-pci-unplug.c | 13 +++++++------ drivers/block/xen-blkfront.c | 2 +- include/xen/platform_pci.h | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8bbe83b9d0b2..f084af0cb8e0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2629,8 +2629,9 @@ and is between 256 and 4096 characters. It is defined in the file aux-ide-disks -- unplug non-primary-master IDE devices nics -- unplug network devices all -- unplug all emulated devices (NICs and IDE disks) - ignore -- continue loading the Xen platform PCI driver even - if the version check failed + unnecessary -- unplugging emulated devices is + unnecessary even if the host did not respond to + the unplug protocol never -- do not unplug even if version check succeeds xirc2ps_cs= [NET,PCMCIA] diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 070dfa0654bd..0f456386cce5 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -78,10 +78,11 @@ void __init xen_unplug_emulated_devices(void) /* check the version of the xen platform PCI device */ r = check_platform_magic(); /* If the version matches enable the Xen platform PCI driver. - * Also enable the Xen platform PCI driver if the version is really old - * and the user told us to ignore it. */ + * Also enable the Xen platform PCI driver if the host does + * not support the unplug protocol (XEN_PLATFORM_ERR_MAGIC) + * but the user told us that unplugging is unnecessary. */ if (r && !(r == XEN_PLATFORM_ERR_MAGIC && - (xen_emul_unplug & XEN_UNPLUG_IGNORE))) + (xen_emul_unplug & XEN_UNPLUG_UNNECESSARY))) return; /* Set the default value of xen_emul_unplug depending on whether or * not the Xen PV frontends and the Xen platform PCI driver have @@ -102,7 +103,7 @@ void __init xen_unplug_emulated_devices(void) } } /* Now unplug the emulated devices */ - if (!(xen_emul_unplug & XEN_UNPLUG_IGNORE)) + if (!(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY)) outw(xen_emul_unplug, XEN_IOPORT_UNPLUG); xen_platform_pci_unplug = xen_emul_unplug; } @@ -128,8 +129,8 @@ static int __init parse_xen_emul_unplug(char *arg) xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS; else if (!strncmp(p, "nics", l)) xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; - else if (!strncmp(p, "ignore", l)) - xen_emul_unplug |= XEN_UNPLUG_IGNORE; + else if (!strncmp(p, "unnecessary", l)) + xen_emul_unplug |= XEN_UNPLUG_UNNECESSARY; else if (!strncmp(p, "never", l)) xen_emul_unplug |= XEN_UNPLUG_NEVER; else diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ac1b682edecb..ab735a605cf3 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -834,7 +834,7 @@ static int blkfront_probe(struct xenbus_device *dev, char *type; int len; /* no unplug has been done: do not hook devices != xen vbds */ - if (xen_platform_pci_unplug & XEN_UNPLUG_IGNORE) { + if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) { int major; if (!VDEV_IS_EXTENDED(vdevice)) diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h index 123b7752fa6a..590ccfd82645 100644 --- a/include/xen/platform_pci.h +++ b/include/xen/platform_pci.h @@ -20,7 +20,7 @@ #define XEN_UNPLUG_ALL_NICS 2 #define XEN_UNPLUG_AUX_IDE_DISKS 4 #define XEN_UNPLUG_ALL 7 -#define XEN_UNPLUG_IGNORE 8 +#define XEN_UNPLUG_UNNECESSARY 8 #define XEN_UNPLUG_NEVER 16 static inline int xen_must_unplug_nics(void) { -- cgit v1.2.3 From 9c35e90c6fcf7f5baf27a63d9565e9f47633f299 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 23 Aug 2010 12:01:35 +0100 Subject: xen: pvhvm: make it clearer that XEN_UNPLUG_* define bits in a bitfield by defining in terms of (1< Acked-by: Jeremy Fitzhardinge Acked-by: Stefano Stabellini --- include/xen/platform_pci.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h index 590ccfd82645..a785a3b0c8c7 100644 --- a/include/xen/platform_pci.h +++ b/include/xen/platform_pci.h @@ -16,12 +16,15 @@ #define XEN_IOPORT_PROTOVER (XEN_IOPORT_BASE + 2) /* 1 byte access (R) */ #define XEN_IOPORT_PRODNUM (XEN_IOPORT_BASE + 2) /* 2 byte access (W) */ -#define XEN_UNPLUG_ALL_IDE_DISKS 1 -#define XEN_UNPLUG_ALL_NICS 2 -#define XEN_UNPLUG_AUX_IDE_DISKS 4 -#define XEN_UNPLUG_ALL 7 -#define XEN_UNPLUG_UNNECESSARY 8 -#define XEN_UNPLUG_NEVER 16 +#define XEN_UNPLUG_ALL_IDE_DISKS (1<<0) +#define XEN_UNPLUG_ALL_NICS (1<<1) +#define XEN_UNPLUG_AUX_IDE_DISKS (1<<2) +#define XEN_UNPLUG_ALL (XEN_UNPLUG_ALL_IDE_DISKS|\ + XEN_UNPLUG_ALL_NICS|\ + XEN_UNPLUG_AUX_IDE_DISKS) + +#define XEN_UNPLUG_UNNECESSARY (1<<16) +#define XEN_UNPLUG_NEVER (1<<17) static inline int xen_must_unplug_nics(void) { #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \ -- cgit v1.2.3 From 5dd531a03ad721b41911ddb32e6e0481404e7aaf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 23 Aug 2010 13:52:19 +0200 Subject: block: add function call to switch the IO scheduler from a driver Currently drivers must do an elevator_exit() + elevator_init() to switch IO schedulers. There are a few problems with this: - Since commit 1abec4fdbb142e3ccb6ce99832fae42129134a96, elevator_init() requires a zeroed out q->elevator pointer. The two existing in-kernel users don't do that. - It will only work at initialization time, since using the above two-staged construct does not properly quisce the queue. So add elevator_change() which takes care of this, and convert the elv_iosched_store() sysfs interface to use this helper as well. Reported-by: Peter Oberparleiter Reported-by: Kevin Vigor Signed-off-by: Jens Axboe --- block/elevator.c | 44 +++++++++++++++++++++++++++++++------------- include/linux/elevator.h | 1 + 2 files changed, 32 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/block/elevator.c b/block/elevator.c index ec585c9554d3..205b09a5bd9e 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1009,18 +1009,19 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) { struct elevator_queue *old_elevator, *e; void *data; + int err; /* * Allocate new elevator */ e = elevator_alloc(q, new_e); if (!e) - return 0; + return -ENOMEM; data = elevator_init_queue(q, e); if (!data) { kobject_put(&e->kobj); - return 0; + return -ENOMEM; } /* @@ -1043,7 +1044,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) __elv_unregister_queue(old_elevator); - if (elv_register_queue(q)) + err = elv_register_queue(q); + if (err) goto fail_register; /* @@ -1056,7 +1058,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); - return 1; + return 0; fail_register: /* @@ -1071,17 +1073,19 @@ fail_register: queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); spin_unlock_irq(q->queue_lock); - return 0; + return err; } -ssize_t elv_iosched_store(struct request_queue *q, const char *name, - size_t count) +/* + * Switch this queue to the given IO scheduler. + */ +int elevator_change(struct request_queue *q, const char *name) { char elevator_name[ELV_NAME_MAX]; struct elevator_type *e; if (!q->elevator) - return count; + return -ENXIO; strlcpy(elevator_name, name, sizeof(elevator_name)); e = elevator_get(strstrip(elevator_name)); @@ -1092,13 +1096,27 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { elevator_put(e); - return count; + return 0; } - if (!elevator_switch(q, e)) - printk(KERN_ERR "elevator: switch to %s failed\n", - elevator_name); - return count; + return elevator_switch(q, e); +} +EXPORT_SYMBOL(elevator_change); + +ssize_t elv_iosched_store(struct request_queue *q, const char *name, + size_t count) +{ + int ret; + + if (!q->elevator) + return count; + + ret = elevator_change(q, name); + if (!ret) + return count; + + printk(KERN_ERR "elevator: switch to %s failed\n", name); + return ret; } ssize_t elv_iosched_show(struct request_queue *q, char *name) diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 2c958f4fce1e..926b50322a46 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -136,6 +136,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct elevator_queue *); +extern int elevator_change(struct request_queue *, const char *); extern int elv_rq_merge_ok(struct request *, struct bio *); /* -- cgit v1.2.3 From 61c77326d1df079f202fa79403c3ccd8c5966a81 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 16 Aug 2010 09:16:55 +0800 Subject: x86, mm: Avoid unnecessary TLB flush In x86, access and dirty bits are set automatically by CPU when CPU accesses memory. When we go into the code path of below flush_tlb_fix_spurious_fault(), we already set dirty bit for pte and don't need flush tlb. This might mean tlb entry in some CPUs hasn't dirty bit set, but this doesn't matter. When the CPUs do page write, they will automatically check the bit and no software involved. On the other hand, flush tlb in below position is harmful. Test creates CPU number of threads, each thread writes to a same but random address in same vma range and we measure the total time. Under a 4 socket system, original time is 1.96s, while with the patch, the time is 0.8s. Under a 2 socket system, there is 20% time cut too. perf shows a lot of time are taking to send ipi/handle ipi for tlb flush. Signed-off-by: Shaohua Li LKML-Reference: <20100816011655.GA362@sli10-desk.sh.intel.com> Acked-by: Suresh Siddha Cc: Andrea Archangeli Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 2 ++ include/asm-generic/pgtable.h | 4 ++++ mm/memory.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a34c785c5a63..2d0a33bd2971 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -603,6 +603,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, pte_update(mm, addr, ptep); } +#define flush_tlb_fix_spurious_fault(vma, address) + /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e2bd73e8f9c0..f4d4120e5128 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres #define move_pte(pte, prot, old_addr, new_addr) (pte) #endif +#ifndef flush_tlb_fix_spurious_fault +#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) +#endif + #ifndef pgprot_noncached #define pgprot_noncached(prot) (prot) #endif diff --git a/mm/memory.c b/mm/memory.c index 2ed2267439df..a40da6983961 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3147,7 +3147,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, * with threads. */ if (flags & FAULT_FLAG_WRITE) - flush_tlb_page(vma, address); + flush_tlb_fix_spurious_fault(vma, address); } unlock: pte_unmap_unlock(pte, ptl); -- cgit v1.2.3 From 8488a38f4d2f43bd55a3e0db4cd57a5bef3af6d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 11 Aug 2010 15:01:02 +0100 Subject: kobject: Break the kobject namespace defs into their own header Break the kobject namespace defs into their own header to avoid a header file inclusion ordering problem between linux/sysfs.h and linux/kobject.h. This fixes the build breakage on older versions of gcc. Signed-off-by: David Howells Cc: Eric Biederman Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 35 +---------------------------- include/linux/kobject_ns.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sysfs.h | 1 + 3 files changed, 58 insertions(+), 34 deletions(-) create mode 100644 include/linux/kobject_ns.h (limited to 'include') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index cf343a852534..7950a37a7146 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -136,42 +137,8 @@ struct kobj_attribute { extern const struct sysfs_ops kobj_sysfs_ops; -/* - * Namespace types which are used to tag kobjects and sysfs entries. - * Network namespace will likely be the first. - */ -enum kobj_ns_type { - KOBJ_NS_TYPE_NONE = 0, - KOBJ_NS_TYPE_NET, - KOBJ_NS_TYPES -}; - struct sock; -/* - * Callbacks so sysfs can determine namespaces - * @current_ns: return calling task's namespace - * @netlink_ns: return namespace to which a sock belongs (right?) - * @initial_ns: return the initial namespace (i.e. init_net_ns) - */ -struct kobj_ns_type_operations { - enum kobj_ns_type type; - const void *(*current_ns)(void); - const void *(*netlink_ns)(struct sock *sk); - const void *(*initial_ns)(void); -}; - -int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); -int kobj_ns_type_registered(enum kobj_ns_type type); -const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); -const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); - -const void *kobj_ns_current(enum kobj_ns_type type); -const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); -const void *kobj_ns_initial(enum kobj_ns_type type); -void kobj_ns_exit(enum kobj_ns_type type, const void *ns); - - /** * struct kset - a set of kobjects of a specific type, belonging to a specific subsystem. * diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h new file mode 100644 index 000000000000..82cb5bf461fb --- /dev/null +++ b/include/linux/kobject_ns.h @@ -0,0 +1,56 @@ +/* Kernel object name space definitions + * + * Copyright (c) 2002-2003 Patrick Mochel + * Copyright (c) 2002-2003 Open Source Development Labs + * Copyright (c) 2006-2008 Greg Kroah-Hartman + * Copyright (c) 2006-2008 Novell Inc. + * + * Split from kobject.h by David Howells (dhowells@redhat.com) + * + * This file is released under the GPLv2. + * + * Please read Documentation/kobject.txt before using the kobject + * interface, ESPECIALLY the parts about reference counts and object + * destructors. + */ + +#ifndef _LINUX_KOBJECT_NS_H +#define _LINUX_KOBJECT_NS_H + +struct sock; +struct kobject; + +/* + * Namespace types which are used to tag kobjects and sysfs entries. + * Network namespace will likely be the first. + */ +enum kobj_ns_type { + KOBJ_NS_TYPE_NONE = 0, + KOBJ_NS_TYPE_NET, + KOBJ_NS_TYPES +}; + +/* + * Callbacks so sysfs can determine namespaces + * @current_ns: return calling task's namespace + * @netlink_ns: return namespace to which a sock belongs (right?) + * @initial_ns: return the initial namespace (i.e. init_net_ns) + */ +struct kobj_ns_type_operations { + enum kobj_ns_type type; + const void *(*current_ns)(void); + const void *(*netlink_ns)(struct sock *sk); + const void *(*initial_ns)(void); +}; + +int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); +int kobj_ns_type_registered(enum kobj_ns_type type); +const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); +const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); + +const void *kobj_ns_current(enum kobj_ns_type type); +const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); +const void *kobj_ns_initial(enum kobj_ns_type type); +void kobj_ns_exit(enum kobj_ns_type type, const void *ns); + +#endif /* _LINUX_KOBJECT_NS_H */ diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 3c92121ba9af..96eb576d82fd 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -16,6 +16,7 @@ #include #include #include +#include #include struct kobject; -- cgit v1.2.3 From 8b230ed8ec96c933047dd0625cf95f739e4939a6 Mon Sep 17 00:00:00 2001 From: Rasesh Mody Date: Mon, 23 Aug 2010 20:24:12 -0700 Subject: bna: Brocade 10Gb Ethernet device driver This is patch 1/6 which contains linux driver source for Brocade's BR1010/BR1020 10Gb CEE capable ethernet adapter. Signed-off-by: Debashis Dutt Signed-off-by: Rasesh Mody Signed-off-by: David S. Miller --- MAINTAINERS | 7 + drivers/net/Kconfig | 14 + drivers/net/Makefile | 1 + drivers/net/bna/Makefile | 11 + drivers/net/bna/bfa_cee.c | 407 ++++ drivers/net/bna/bfa_cee.h | 72 + drivers/net/bna/bfa_defs.h | 243 ++ drivers/net/bna/bfa_defs_cna.h | 223 ++ drivers/net/bna/bfa_defs_mfg_comm.h | 244 ++ drivers/net/bna/bfa_defs_status.h | 216 ++ drivers/net/bna/bfa_ioc.c | 1839 +++++++++++++++ drivers/net/bna/bfa_ioc.h | 343 +++ drivers/net/bna/bfa_ioc_ct.c | 391 ++++ drivers/net/bna/bfa_sm.h | 88 + drivers/net/bna/bfa_wc.h | 69 + drivers/net/bna/bfi.h | 392 ++++ drivers/net/bna/bfi_cna.h | 199 ++ drivers/net/bna/bfi_ctreg.h | 637 ++++++ drivers/net/bna/bfi_ll.h | 438 ++++ drivers/net/bna/bna.h | 654 ++++++ drivers/net/bna/bna_ctrl.c | 3626 ++++++++++++++++++++++++++++++ drivers/net/bna/bna_hw.h | 1491 +++++++++++++ drivers/net/bna/bna_txrx.c | 4209 +++++++++++++++++++++++++++++++++++ drivers/net/bna/bna_types.h | 1128 ++++++++++ drivers/net/bna/bnad.c | 3270 +++++++++++++++++++++++++++ drivers/net/bna/bnad.h | 334 +++ drivers/net/bna/bnad_ethtool.c | 1282 +++++++++++ drivers/net/bna/cna.h | 81 + drivers/net/bna/cna_fwimg.c | 64 + include/linux/pci_ids.h | 3 + 30 files changed, 21976 insertions(+) create mode 100644 drivers/net/bna/Makefile create mode 100644 drivers/net/bna/bfa_cee.c create mode 100644 drivers/net/bna/bfa_cee.h create mode 100644 drivers/net/bna/bfa_defs.h create mode 100644 drivers/net/bna/bfa_defs_cna.h create mode 100644 drivers/net/bna/bfa_defs_mfg_comm.h create mode 100644 drivers/net/bna/bfa_defs_status.h create mode 100644 drivers/net/bna/bfa_ioc.c create mode 100644 drivers/net/bna/bfa_ioc.h create mode 100644 drivers/net/bna/bfa_ioc_ct.c create mode 100644 drivers/net/bna/bfa_sm.h create mode 100644 drivers/net/bna/bfa_wc.h create mode 100644 drivers/net/bna/bfi.h create mode 100644 drivers/net/bna/bfi_cna.h create mode 100644 drivers/net/bna/bfi_ctreg.h create mode 100644 drivers/net/bna/bfi_ll.h create mode 100644 drivers/net/bna/bna.h create mode 100644 drivers/net/bna/bna_ctrl.c create mode 100644 drivers/net/bna/bna_hw.h create mode 100644 drivers/net/bna/bna_txrx.c create mode 100644 drivers/net/bna/bna_types.h create mode 100644 drivers/net/bna/bnad.c create mode 100644 drivers/net/bna/bnad.h create mode 100644 drivers/net/bna/bnad_ethtool.c create mode 100644 drivers/net/bna/cna.h create mode 100644 drivers/net/bna/cna_fwimg.c (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 43c9efcd8e10..93198c1980a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1387,6 +1387,13 @@ L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/bfa/ +BROCADE BNA 10 GIGABIT ETHERNET DRIVER +M: Rasesh Mody +M: Debashis Dutt +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/bna/ + BSG (block layer generic sg v4 driver) M: FUJITA Tomonori L: linux-scsi@vger.kernel.org diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 85485287192a..53c4810b119e 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2869,6 +2869,20 @@ config QLGE To compile this driver as a module, choose M here: the module will be called qlge. +config BNA + tristate "Brocade 1010/1020 10Gb Ethernet Driver support" + depends on PCI + ---help--- + This driver supports Brocade 1010/1020 10Gb CEE capable Ethernet + cards. + To compile this driver as a module, choose M here: the module + will be called bna. + + For general information and support, go to the Brocade support + website at: + + + source "drivers/net/sfc/Kconfig" source "drivers/net/benet/Kconfig" diff --git a/drivers/net/Makefile b/drivers/net/Makefile index f34b3baf7ee6..18a277709a2a 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_ENIC) += enic/ obj-$(CONFIG_JME) += jme.o obj-$(CONFIG_BE2NET) += benet/ obj-$(CONFIG_VMXNET3) += vmxnet3/ +obj-$(CONFIG_BNA) += bna/ gianfar_driver-objs := gianfar.o \ gianfar_ethtool.o \ diff --git a/drivers/net/bna/Makefile b/drivers/net/bna/Makefile new file mode 100644 index 000000000000..a5d604de7fea --- /dev/null +++ b/drivers/net/bna/Makefile @@ -0,0 +1,11 @@ +# +# Copyright (c) 2005-2010 Brocade Communications Systems, Inc. +# All rights reserved. +# + +obj-$(CONFIG_BNA) += bna.o + +bna-objs := bnad.o bnad_ethtool.o bna_ctrl.o bna_txrx.o +bna-objs += bfa_ioc.o bfa_ioc_ct.o bfa_cee.o cna_fwimg.o + +EXTRA_CFLAGS := -Idrivers/net/bna diff --git a/drivers/net/bna/bfa_cee.c b/drivers/net/bna/bfa_cee.c new file mode 100644 index 000000000000..1545fc9720f8 --- /dev/null +++ b/drivers/net/bna/bfa_cee.c @@ -0,0 +1,407 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#include "bfa_defs_cna.h" +#include "cna.h" +#include "bfa_cee.h" +#include "bfi_cna.h" +#include "bfa_ioc.h" + +#define bfa_ioc_portid(__ioc) ((__ioc)->port_id) +#define bfa_lpuid(__arg) bfa_ioc_portid(&(__arg)->ioc) + +static void bfa_cee_format_lldp_cfg(struct bfa_cee_lldp_cfg *lldp_cfg); +static void bfa_cee_format_cee_cfg(void *buffer); + +static void +bfa_cee_format_cee_cfg(void *buffer) +{ + struct bfa_cee_attr *cee_cfg = buffer; + bfa_cee_format_lldp_cfg(&cee_cfg->lldp_remote); +} + +static void +bfa_cee_stats_swap(struct bfa_cee_stats *stats) +{ + u32 *buffer = (u32 *)stats; + int i; + + for (i = 0; i < (sizeof(struct bfa_cee_stats) / sizeof(u32)); + i++) { + buffer[i] = ntohl(buffer[i]); + } +} + +static void +bfa_cee_format_lldp_cfg(struct bfa_cee_lldp_cfg *lldp_cfg) +{ + lldp_cfg->time_to_live = + ntohs(lldp_cfg->time_to_live); + lldp_cfg->enabled_system_cap = + ntohs(lldp_cfg->enabled_system_cap); +} + +/** + * bfa_cee_attr_meminfo() + * + * @brief Returns the size of the DMA memory needed by CEE attributes + * + * @param[in] void + * + * @return Size of DMA region + */ +static u32 +bfa_cee_attr_meminfo(void) +{ + return roundup(sizeof(struct bfa_cee_attr), BFA_DMA_ALIGN_SZ); +} +/** + * bfa_cee_stats_meminfo() + * + * @brief Returns the size of the DMA memory needed by CEE stats + * + * @param[in] void + * + * @return Size of DMA region + */ +static u32 +bfa_cee_stats_meminfo(void) +{ + return roundup(sizeof(struct bfa_cee_stats), BFA_DMA_ALIGN_SZ); +} + +/** + * bfa_cee_get_attr_isr() + * + * @brief CEE ISR for get-attributes responses from f/w + * + * @param[in] cee - Pointer to the CEE module + * status - Return status from the f/w + * + * @return void + */ +static void +bfa_cee_get_attr_isr(struct bfa_cee *cee, enum bfa_status status) +{ + cee->get_attr_status = status; + if (status == BFA_STATUS_OK) { + memcpy(cee->attr, cee->attr_dma.kva, + sizeof(struct bfa_cee_attr)); + bfa_cee_format_cee_cfg(cee->attr); + } + cee->get_attr_pending = false; + if (cee->cbfn.get_attr_cbfn) + cee->cbfn.get_attr_cbfn(cee->cbfn.get_attr_cbarg, status); +} + +/** + * bfa_cee_get_attr_isr() + * + * @brief CEE ISR for get-stats responses from f/w + * + * @param[in] cee - Pointer to the CEE module + * status - Return status from the f/w + * + * @return void + */ +static void +bfa_cee_get_stats_isr(struct bfa_cee *cee, enum bfa_status status) +{ + cee->get_stats_status = status; + if (status == BFA_STATUS_OK) { + memcpy(cee->stats, cee->stats_dma.kva, + sizeof(struct bfa_cee_stats)); + bfa_cee_stats_swap(cee->stats); + } + cee->get_stats_pending = false; + if (cee->cbfn.get_stats_cbfn) + cee->cbfn.get_stats_cbfn(cee->cbfn.get_stats_cbarg, status); +} + +/** + * bfa_cee_get_attr_isr() + * + * @brief CEE ISR for reset-stats responses from f/w + * + * @param[in] cee - Pointer to the CEE module + * status - Return status from the f/w + * + * @return void + */ +static void +bfa_cee_reset_stats_isr(struct bfa_cee *cee, enum bfa_status status) +{ + cee->reset_stats_status = status; + cee->reset_stats_pending = false; + if (cee->cbfn.reset_stats_cbfn) + cee->cbfn.reset_stats_cbfn(cee->cbfn.reset_stats_cbarg, status); +} +/** + * bfa_cee_meminfo() + * + * @brief Returns the size of the DMA memory needed by CEE module + * + * @param[in] void + * + * @return Size of DMA region + */ +u32 +bfa_cee_meminfo(void) +{ + return bfa_cee_attr_meminfo() + bfa_cee_stats_meminfo(); +} + +/** + * bfa_cee_mem_claim() + * + * @brief Initialized CEE DMA Memory + * + * @param[in] cee CEE module pointer + * dma_kva Kernel Virtual Address of CEE DMA Memory + * dma_pa Physical Address of CEE DMA Memory + * + * @return void + */ +void +bfa_cee_mem_claim(struct bfa_cee *cee, u8 *dma_kva, u64 dma_pa) +{ + cee->attr_dma.kva = dma_kva; + cee->attr_dma.pa = dma_pa; + cee->stats_dma.kva = dma_kva + bfa_cee_attr_meminfo(); + cee->stats_dma.pa = dma_pa + bfa_cee_attr_meminfo(); + cee->attr = (struct bfa_cee_attr *) dma_kva; + cee->stats = (struct bfa_cee_stats *) + (dma_kva + bfa_cee_attr_meminfo()); +} + +/** + * bfa_cee_get_attr() + * + * @brief + * Send the request to the f/w to fetch CEE attributes. + * + * @param[in] Pointer to the CEE module data structure. + * + * @return Status + */ + +enum bfa_status +bfa_cee_get_attr(struct bfa_cee *cee, struct bfa_cee_attr *attr, + bfa_cee_get_attr_cbfn_t cbfn, void *cbarg) +{ + struct bfi_cee_get_req *cmd; + + BUG_ON(!((cee != NULL) && (cee->ioc != NULL))); + if (!bfa_ioc_is_operational(cee->ioc)) + return BFA_STATUS_IOC_FAILURE; + if (cee->get_attr_pending == true) + return BFA_STATUS_DEVBUSY; + cee->get_attr_pending = true; + cmd = (struct bfi_cee_get_req *) cee->get_cfg_mb.msg; + cee->attr = attr; + cee->cbfn.get_attr_cbfn = cbfn; + cee->cbfn.get_attr_cbarg = cbarg; + bfi_h2i_set(cmd->mh, BFI_MC_CEE, BFI_CEE_H2I_GET_CFG_REQ, + bfa_ioc_portid(cee->ioc)); + bfa_dma_be_addr_set(cmd->dma_addr, cee->attr_dma.pa); + bfa_ioc_mbox_queue(cee->ioc, &cee->get_cfg_mb); + + return BFA_STATUS_OK; +} + +/** + * bfa_cee_get_stats() + * + * @brief + * Send the request to the f/w to fetch CEE statistics. + * + * @param[in] Pointer to the CEE module data structure. + * + * @return Status + */ + +enum bfa_status +bfa_cee_get_stats(struct bfa_cee *cee, struct bfa_cee_stats *stats, + bfa_cee_get_stats_cbfn_t cbfn, void *cbarg) +{ + struct bfi_cee_get_req *cmd; + + BUG_ON(!((cee != NULL) && (cee->ioc != NULL))); + + if (!bfa_ioc_is_operational(cee->ioc)) + return BFA_STATUS_IOC_FAILURE; + if (cee->get_stats_pending == true) + return BFA_STATUS_DEVBUSY; + cee->get_stats_pending = true; + cmd = (struct bfi_cee_get_req *) cee->get_stats_mb.msg; + cee->stats = stats; + cee->cbfn.get_stats_cbfn = cbfn; + cee->cbfn.get_stats_cbarg = cbarg; + bfi_h2i_set(cmd->mh, BFI_MC_CEE, BFI_CEE_H2I_GET_STATS_REQ, + bfa_ioc_portid(cee->ioc)); + bfa_dma_be_addr_set(cmd->dma_addr, cee->stats_dma.pa); + bfa_ioc_mbox_queue(cee->ioc, &cee->get_stats_mb); + + return BFA_STATUS_OK; +} + +/** + * bfa_cee_reset_stats() + * + * @brief Clears CEE Stats in the f/w. + * + * @param[in] Pointer to the CEE module data structure. + * + * @return Status + */ + +enum bfa_status +bfa_cee_reset_stats(struct bfa_cee *cee, bfa_cee_reset_stats_cbfn_t cbfn, + void *cbarg) +{ + struct bfi_cee_reset_stats *cmd; + + BUG_ON(!((cee != NULL) && (cee->ioc != NULL))); + if (!bfa_ioc_is_operational(cee->ioc)) + return BFA_STATUS_IOC_FAILURE; + if (cee->reset_stats_pending == true) + return BFA_STATUS_DEVBUSY; + cee->reset_stats_pending = true; + cmd = (struct bfi_cee_reset_stats *) cee->reset_stats_mb.msg; + cee->cbfn.reset_stats_cbfn = cbfn; + cee->cbfn.reset_stats_cbarg = cbarg; + bfi_h2i_set(cmd->mh, BFI_MC_CEE, BFI_CEE_H2I_RESET_STATS, + bfa_ioc_portid(cee->ioc)); + bfa_ioc_mbox_queue(cee->ioc, &cee->reset_stats_mb); + return BFA_STATUS_OK; +} + +/** + * bfa_cee_isrs() + * + * @brief Handles Mail-box interrupts for CEE module. + * + * @param[in] Pointer to the CEE module data structure. + * + * @return void + */ + +void +bfa_cee_isr(void *cbarg, struct bfi_mbmsg *m) +{ + union bfi_cee_i2h_msg_u *msg; + struct bfi_cee_get_rsp *get_rsp; + struct bfa_cee *cee = (struct bfa_cee *) cbarg; + msg = (union bfi_cee_i2h_msg_u *) m; + get_rsp = (struct bfi_cee_get_rsp *) m; + switch (msg->mh.msg_id) { + case BFI_CEE_I2H_GET_CFG_RSP: + bfa_cee_get_attr_isr(cee, get_rsp->cmd_status); + break; + case BFI_CEE_I2H_GET_STATS_RSP: + bfa_cee_get_stats_isr(cee, get_rsp->cmd_status); + break; + case BFI_CEE_I2H_RESET_STATS_RSP: + bfa_cee_reset_stats_isr(cee, get_rsp->cmd_status); + break; + default: + BUG_ON(1); + } +} + +/** + * bfa_cee_hbfail() + * + * @brief CEE module heart-beat failure handler. + * + * @param[in] Pointer to the CEE module data structure. + * + * @return void + */ + +void +bfa_cee_hbfail(void *arg) +{ + struct bfa_cee *cee; + cee = (struct bfa_cee *) arg; + + if (cee->get_attr_pending == true) { + cee->get_attr_status = BFA_STATUS_FAILED; + cee->get_attr_pending = false; + if (cee->cbfn.get_attr_cbfn) { + cee->cbfn.get_attr_cbfn(cee->cbfn.get_attr_cbarg, + BFA_STATUS_FAILED); + } + } + if (cee->get_stats_pending == true) { + cee->get_stats_status = BFA_STATUS_FAILED; + cee->get_stats_pending = false; + if (cee->cbfn.get_stats_cbfn) { + cee->cbfn.get_stats_cbfn(cee->cbfn.get_stats_cbarg, + BFA_STATUS_FAILED); + } + } + if (cee->reset_stats_pending == true) { + cee->reset_stats_status = BFA_STATUS_FAILED; + cee->reset_stats_pending = false; + if (cee->cbfn.reset_stats_cbfn) { + cee->cbfn.reset_stats_cbfn(cee->cbfn.reset_stats_cbarg, + BFA_STATUS_FAILED); + } + } +} + +/** + * bfa_cee_attach() + * + * @brief CEE module-attach API + * + * @param[in] cee - Pointer to the CEE module data structure + * ioc - Pointer to the ioc module data structure + * dev - Pointer to the device driver module data structure + * The device driver specific mbox ISR functions have + * this pointer as one of the parameters. + * + * @return void + */ +void +bfa_cee_attach(struct bfa_cee *cee, struct bfa_ioc *ioc, + void *dev) +{ + BUG_ON(!(cee != NULL)); + cee->dev = dev; + cee->ioc = ioc; + + bfa_ioc_mbox_regisr(cee->ioc, BFI_MC_CEE, bfa_cee_isr, cee); + bfa_ioc_hbfail_init(&cee->hbfail, bfa_cee_hbfail, cee); + bfa_ioc_hbfail_register(cee->ioc, &cee->hbfail); +} + +/** + * bfa_cee_detach() + * + * @brief CEE module-detach API + * + * @param[in] cee - Pointer to the CEE module data structure + * + * @return void + */ +void +bfa_cee_detach(struct bfa_cee *cee) +{ +} diff --git a/drivers/net/bna/bfa_cee.h b/drivers/net/bna/bfa_cee.h new file mode 100644 index 000000000000..1208cadeceed --- /dev/null +++ b/drivers/net/bna/bfa_cee.h @@ -0,0 +1,72 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#ifndef __BFA_CEE_H__ +#define __BFA_CEE_H__ + +#include "bfa_defs_cna.h" +#include "bfa_ioc.h" + +typedef void (*bfa_cee_get_attr_cbfn_t) (void *dev, enum bfa_status status); +typedef void (*bfa_cee_get_stats_cbfn_t) (void *dev, enum bfa_status status); +typedef void (*bfa_cee_reset_stats_cbfn_t) (void *dev, enum bfa_status status); +typedef void (*bfa_cee_hbfail_cbfn_t) (void *dev, enum bfa_status status); + +struct bfa_cee_cbfn { + bfa_cee_get_attr_cbfn_t get_attr_cbfn; + void *get_attr_cbarg; + bfa_cee_get_stats_cbfn_t get_stats_cbfn; + void *get_stats_cbarg; + bfa_cee_reset_stats_cbfn_t reset_stats_cbfn; + void *reset_stats_cbarg; +}; + +struct bfa_cee { + void *dev; + bool get_attr_pending; + bool get_stats_pending; + bool reset_stats_pending; + enum bfa_status get_attr_status; + enum bfa_status get_stats_status; + enum bfa_status reset_stats_status; + struct bfa_cee_cbfn cbfn; + struct bfa_ioc_hbfail_notify hbfail; + struct bfa_cee_attr *attr; + struct bfa_cee_stats *stats; + struct bfa_dma attr_dma; + struct bfa_dma stats_dma; + struct bfa_ioc *ioc; + struct bfa_mbox_cmd get_cfg_mb; + struct bfa_mbox_cmd get_stats_mb; + struct bfa_mbox_cmd reset_stats_mb; +}; + +u32 bfa_cee_meminfo(void); +void bfa_cee_mem_claim(struct bfa_cee *cee, u8 *dma_kva, + u64 dma_pa); +void bfa_cee_attach(struct bfa_cee *cee, struct bfa_ioc *ioc, void *dev); +void bfa_cee_detach(struct bfa_cee *cee); +enum bfa_status bfa_cee_get_attr(struct bfa_cee *cee, + struct bfa_cee_attr *attr, bfa_cee_get_attr_cbfn_t cbfn, void *cbarg); +enum bfa_status bfa_cee_get_stats(struct bfa_cee *cee, + struct bfa_cee_stats *stats, bfa_cee_get_stats_cbfn_t cbfn, + void *cbarg); +enum bfa_status bfa_cee_reset_stats(struct bfa_cee *cee, + bfa_cee_reset_stats_cbfn_t cbfn, void *cbarg); + +#endif /* __BFA_CEE_H__ */ diff --git a/drivers/net/bna/bfa_defs.h b/drivers/net/bna/bfa_defs.h new file mode 100644 index 000000000000..29c1b8de2c2d --- /dev/null +++ b/drivers/net/bna/bfa_defs.h @@ -0,0 +1,243 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#ifndef __BFA_DEFS_H__ +#define __BFA_DEFS_H__ + +#include "cna.h" +#include "bfa_defs_status.h" +#include "bfa_defs_mfg_comm.h" + +#define BFA_STRING_32 32 +#define BFA_VERSION_LEN 64 + +/** + * ---------------------- adapter definitions ------------ + */ + +/** + * BFA adapter level attributes. + */ +enum { + BFA_ADAPTER_SERIAL_NUM_LEN = STRSZ(BFA_MFG_SERIALNUM_SIZE), + /* + *!< adapter serial num length + */ + BFA_ADAPTER_MODEL_NAME_LEN = 16, /*!< model name length */ + BFA_ADAPTER_MODEL_DESCR_LEN = 128, /*!< model description length */ + BFA_ADAPTER_MFG_NAME_LEN = 8, /*!< manufacturer name length */ + BFA_ADAPTER_SYM_NAME_LEN = 64, /*!< adapter symbolic name length */ + BFA_ADAPTER_OS_TYPE_LEN = 64, /*!< adapter os type length */ +}; + +struct bfa_adapter_attr { + char manufacturer[BFA_ADAPTER_MFG_NAME_LEN]; + char serial_num[BFA_ADAPTER_SERIAL_NUM_LEN]; + u32 card_type; + char model[BFA_ADAPTER_MODEL_NAME_LEN]; + char model_descr[BFA_ADAPTER_MODEL_DESCR_LEN]; + u64 pwwn; + char node_symname[FC_SYMNAME_MAX]; + char hw_ver[BFA_VERSION_LEN]; + char fw_ver[BFA_VERSION_LEN]; + char optrom_ver[BFA_VERSION_LEN]; + char os_type[BFA_ADAPTER_OS_TYPE_LEN]; + struct bfa_mfg_vpd vpd; + struct mac mac; + + u8 nports; + u8 max_speed; + u8 prototype; + char asic_rev; + + u8 pcie_gen; + u8 pcie_lanes_orig; + u8 pcie_lanes; + u8 cna_capable; + + u8 is_mezz; + u8 trunk_capable; +}; + +/** + * ---------------------- IOC definitions ------------ + */ + +enum { + BFA_IOC_DRIVER_LEN = 16, + BFA_IOC_CHIP_REV_LEN = 8, +}; + +/** + * Driver and firmware versions. + */ +struct bfa_ioc_driver_attr { + char driver[BFA_IOC_DRIVER_LEN]; /*!< driver name */ + char driver_ver[BFA_VERSION_LEN]; /*!< driver version */ + char fw_ver[BFA_VERSION_LEN]; /*!< firmware version */ + char bios_ver[BFA_VERSION_LEN]; /*!< bios version */ + char efi_ver[BFA_VERSION_LEN]; /*!< EFI version */ + char ob_ver[BFA_VERSION_LEN]; /*!< openboot version */ +}; + +/** + * IOC PCI device attributes + */ +struct bfa_ioc_pci_attr { + u16 vendor_id; /*!< PCI vendor ID */ + u16 device_id; /*!< PCI device ID */ + u16 ssid; /*!< subsystem ID */ + u16 ssvid; /*!< subsystem vendor ID */ + u32 pcifn; /*!< PCI device function */ + u32 rsvd; /* padding */ + char chip_rev[BFA_IOC_CHIP_REV_LEN]; /*!< chip revision */ +}; + +/** + * IOC states + */ +enum bfa_ioc_state { + BFA_IOC_RESET = 1, /*!< IOC is in reset state */ + BFA_IOC_SEMWAIT = 2, /*!< Waiting for IOC h/w semaphore */ + BFA_IOC_HWINIT = 3, /*!< IOC h/w is being initialized */ + BFA_IOC_GETATTR = 4, /*!< IOC is being configured */ + BFA_IOC_OPERATIONAL = 5, /*!< IOC is operational */ + BFA_IOC_INITFAIL = 6, /*!< IOC hardware failure */ + BFA_IOC_HBFAIL = 7, /*!< IOC heart-beat failure */ + BFA_IOC_DISABLING = 8, /*!< IOC is being disabled */ + BFA_IOC_DISABLED = 9, /*!< IOC is disabled */ + BFA_IOC_FWMISMATCH = 10, /*!< IOC f/w different from drivers */ +}; + +/** + * IOC firmware stats + */ +struct bfa_fw_ioc_stats { + u32 enable_reqs; + u32 disable_reqs; + u32 get_attr_reqs; + u32 dbg_sync; + u32 dbg_dump; + u32 unknown_reqs; +}; + +/** + * IOC driver stats + */ +struct bfa_ioc_drv_stats { + u32 ioc_isrs; + u32 ioc_enables; + u32 ioc_disables; + u32 ioc_hbfails; + u32 ioc_boots; + u32 stats_tmos; + u32 hb_count; + u32 disable_reqs; + u32 enable_reqs; + u32 disable_replies; + u32 enable_replies; +}; + +/** + * IOC statistics + */ +struct bfa_ioc_stats { + struct bfa_ioc_drv_stats drv_stats; /*!< driver IOC stats */ + struct bfa_fw_ioc_stats fw_stats; /*!< firmware IOC stats */ +}; + +enum bfa_ioc_type { + BFA_IOC_TYPE_FC = 1, + BFA_IOC_TYPE_FCoE = 2, + BFA_IOC_TYPE_LL = 3, +}; + +/** + * IOC attributes returned in queries + */ +struct bfa_ioc_attr { + enum bfa_ioc_type ioc_type; + enum bfa_ioc_state state; /*!< IOC state */ + struct bfa_adapter_attr adapter_attr; /*!< HBA attributes */ + struct bfa_ioc_driver_attr driver_attr; /*!< driver attr */ + struct bfa_ioc_pci_attr pci_attr; + u8 port_id; /*!< port number */ + u8 rsvd[7]; /*!< 64bit align */ +}; + +/** + * ---------------------- mfg definitions ------------ + */ + +/** + * Checksum size + */ +#define BFA_MFG_CHKSUM_SIZE 16 + +#define BFA_MFG_PARTNUM_SIZE 14 +#define BFA_MFG_SUPPLIER_ID_SIZE 10 +#define BFA_MFG_SUPPLIER_PARTNUM_SIZE 20 +#define BFA_MFG_SUPPLIER_SERIALNUM_SIZE 20 +#define BFA_MFG_SUPPLIER_REVISION_SIZE 4 + +#pragma pack(1) + +/** + * @brief BFA adapter manufacturing block definition. + * + * All numerical fields are in big-endian format. + */ +struct bfa_mfg_block { + u8 version; /*!< manufacturing block version */ + u8 mfg_sig[3]; /*!< characters 'M', 'F', 'G' */ + u16 mfgsize; /*!< mfg block size */ + u16 u16_chksum; /*!< old u16 checksum */ + char brcd_serialnum[STRSZ(BFA_MFG_SERIALNUM_SIZE)]; + char brcd_partnum[STRSZ(BFA_MFG_PARTNUM_SIZE)]; + u8 mfg_day; /*!< manufacturing day */ + u8 mfg_month; /*!< manufacturing month */ + u16 mfg_year; /*!< manufacturing year */ + u64 mfg_wwn; /*!< wwn base for this adapter */ + u8 num_wwn; /*!< number of wwns assigned */ + u8 mfg_speeds; /*!< speeds allowed for this adapter */ + u8 rsv[2]; + char supplier_id[STRSZ(BFA_MFG_SUPPLIER_ID_SIZE)]; + char supplier_partnum[STRSZ(BFA_MFG_SUPPLIER_PARTNUM_SIZE)]; + char + supplier_serialnum[STRSZ(BFA_MFG_SUPPLIER_SERIALNUM_SIZE)]; + char + supplier_revision[STRSZ(BFA_MFG_SUPPLIER_REVISION_SIZE)]; + mac_t mfg_mac; /*!< mac address */ + u8 num_mac; /*!< number of mac addresses */ + u8 rsv2; + u32 mfg_type; /*!< card type */ + u8 rsv3[108]; + u8 md5_chksum[BFA_MFG_CHKSUM_SIZE]; /*!< md5 checksum */ +}; + +#pragma pack() + +/** + * ---------------------- pci definitions ------------ + */ + +#define bfa_asic_id_ct(devid) \ + ((devid) == PCI_DEVICE_ID_BROCADE_CT || \ + (devid) == PCI_DEVICE_ID_BROCADE_CT_FC) + +#endif /* __BFA_DEFS_H__ */ diff --git a/drivers/net/bna/bfa_defs_cna.h b/drivers/net/bna/bfa_defs_cna.h new file mode 100644 index 000000000000..7e0a9187bdd5 --- /dev/null +++ b/drivers/net/bna/bfa_defs_cna.h @@ -0,0 +1,223 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#ifndef __BFA_DEFS_CNA_H__ +#define __BFA_DEFS_CNA_H__ + +#include "bfa_defs.h" + +/** + * @brief + * FC physical port statistics. + */ +struct bfa_port_fc_stats { + u64 secs_reset; /*!< Seconds since stats is reset */ + u64 tx_frames; /*!< Tx frames */ + u64 tx_words; /*!< Tx words */ + u64 tx_lip; /*!< Tx LIP */ + u64 tx_nos; /*!< Tx NOS */ + u64 tx_ols; /*!< Tx OLS */ + u64 tx_lr; /*!< Tx LR */ + u64 tx_lrr; /*!< Tx LRR */ + u64 rx_frames; /*!< Rx frames */ + u64 rx_words; /*!< Rx words */ + u64 lip_count; /*!< Rx LIP */ + u64 nos_count; /*!< Rx NOS */ + u64 ols_count; /*!< Rx OLS */ + u64 lr_count; /*!< Rx LR */ + u64 lrr_count; /*!< Rx LRR */ + u64 invalid_crcs; /*!< Rx CRC err frames */ + u64 invalid_crc_gd_eof; /*!< Rx CRC err good EOF frames */ + u64 undersized_frm; /*!< Rx undersized frames */ + u64 oversized_frm; /*!< Rx oversized frames */ + u64 bad_eof_frm; /*!< Rx frames with bad EOF */ + u64 error_frames; /*!< Errored frames */ + u64 dropped_frames; /*!< Dropped frames */ + u64 link_failures; /*!< Link Failure (LF) count */ + u64 loss_of_syncs; /*!< Loss of sync count */ + u64 loss_of_signals; /*!< Loss of signal count */ + u64 primseq_errs; /*!< Primitive sequence protocol err. */ + u64 bad_os_count; /*!< Invalid ordered sets */ + u64 err_enc_out; /*!< Encoding err nonframe_8b10b */ + u64 err_enc; /*!< Encoding err frame_8b10b */ +}; + +/** + * @brief + * Eth Physical Port statistics. + */ +struct bfa_port_eth_stats { + u64 secs_reset; /*!< Seconds since stats is reset */ + u64 frame_64; /*!< Frames 64 bytes */ + u64 frame_65_127; /*!< Frames 65-127 bytes */ + u64 frame_128_255; /*!< Frames 128-255 bytes */ + u64 frame_256_511; /*!< Frames 256-511 bytes */ + u64 frame_512_1023; /*!< Frames 512-1023 bytes */ + u64 frame_1024_1518; /*!< Frames 1024-1518 bytes */ + u64 frame_1519_1522; /*!< Frames 1519-1522 bytes */ + u64 tx_bytes; /*!< Tx bytes */ + u64 tx_packets; /*!< Tx packets */ + u64 tx_mcast_packets; /*!< Tx multicast packets */ + u64 tx_bcast_packets; /*!< Tx broadcast packets */ + u64 tx_control_frame; /*!< Tx control frame */ + u64 tx_drop; /*!< Tx drops */ + u64 tx_jabber; /*!< Tx jabber */ + u64 tx_fcs_error; /*!< Tx FCS errors */ + u64 tx_fragments; /*!< Tx fragments */ + u64 rx_bytes; /*!< Rx bytes */ + u64 rx_packets; /*!< Rx packets */ + u64 rx_mcast_packets; /*!< Rx multicast packets */ + u64 rx_bcast_packets; /*!< Rx broadcast packets */ + u64 rx_control_frames; /*!< Rx control frames */ + u64 rx_unknown_opcode; /*!< Rx unknown opcode */ + u64 rx_drop; /*!< Rx drops */ + u64 rx_jabber; /*!< Rx jabber */ + u64 rx_fcs_error; /*!< Rx FCS errors */ + u64 rx_alignment_error; /*!< Rx alignment errors */ + u64 rx_frame_length_error; /*!< Rx frame len errors */ + u64 rx_code_error; /*!< Rx code errors */ + u64 rx_fragments; /*!< Rx fragments */ + u64 rx_pause; /*!< Rx pause */ + u64 rx_zero_pause; /*!< Rx zero pause */ + u64 tx_pause; /*!< Tx pause */ + u64 tx_zero_pause; /*!< Tx zero pause */ + u64 rx_fcoe_pause; /*!< Rx FCoE pause */ + u64 rx_fcoe_zero_pause; /*!< Rx FCoE zero pause */ + u64 tx_fcoe_pause; /*!< Tx FCoE pause */ + u64 tx_fcoe_zero_pause; /*!< Tx FCoE zero pause */ +}; + +/** + * @brief + * Port statistics. + */ +union bfa_port_stats_u { + struct bfa_port_fc_stats fc; + struct bfa_port_eth_stats eth; +}; + +#pragma pack(1) + +#define BFA_CEE_LLDP_MAX_STRING_LEN (128) +#define BFA_CEE_DCBX_MAX_PRIORITY (8) +#define BFA_CEE_DCBX_MAX_PGID (8) + +#define BFA_CEE_LLDP_SYS_CAP_OTHER 0x0001 +#define BFA_CEE_LLDP_SYS_CAP_REPEATER 0x0002 +#define BFA_CEE_LLDP_SYS_CAP_MAC_BRIDGE 0x0004 +#define BFA_CEE_LLDP_SYS_CAP_WLAN_AP 0x0008 +#define BFA_CEE_LLDP_SYS_CAP_ROUTER 0x0010 +#define BFA_CEE_LLDP_SYS_CAP_TELEPHONE 0x0020 +#define BFA_CEE_LLDP_SYS_CAP_DOCSIS_CD 0x0040 +#define BFA_CEE_LLDP_SYS_CAP_STATION 0x0080 +#define BFA_CEE_LLDP_SYS_CAP_CVLAN 0x0100 +#define BFA_CEE_LLDP_SYS_CAP_SVLAN 0x0200 +#define BFA_CEE_LLDP_SYS_CAP_TPMR 0x0400 + +/* LLDP string type */ +struct bfa_cee_lldp_str { + u8 sub_type; + u8 len; + u8 rsvd[2]; + u8 value[BFA_CEE_LLDP_MAX_STRING_LEN]; +}; + +/* LLDP paramters */ +struct bfa_cee_lldp_cfg { + struct bfa_cee_lldp_str chassis_id; + struct bfa_cee_lldp_str port_id; + struct bfa_cee_lldp_str port_desc; + struct bfa_cee_lldp_str sys_name; + struct bfa_cee_lldp_str sys_desc; + struct bfa_cee_lldp_str mgmt_addr; + u16 time_to_live; + u16 enabled_system_cap; +}; + +enum bfa_cee_dcbx_version { + DCBX_PROTOCOL_PRECEE = 1, + DCBX_PROTOCOL_CEE = 2, +}; + +enum bfa_cee_lls { + /* LLS is down because the TLV not sent by the peer */ + CEE_LLS_DOWN_NO_TLV = 0, + /* LLS is down as advertised by the peer */ + CEE_LLS_DOWN = 1, + CEE_LLS_UP = 2, +}; + +/* CEE/DCBX parameters */ +struct bfa_cee_dcbx_cfg { + u8 pgid[BFA_CEE_DCBX_MAX_PRIORITY]; + u8 pg_percentage[BFA_CEE_DCBX_MAX_PGID]; + u8 pfc_primap; /* bitmap of priorties with PFC enabled */ + u8 fcoe_primap; /* bitmap of priorities used for FcoE traffic */ + u8 iscsi_primap; /* bitmap of priorities used for iSCSI traffic */ + u8 dcbx_version; /* operating version:CEE or preCEE */ + u8 lls_fcoe; /* FCoE Logical Link Status */ + u8 lls_lan; /* LAN Logical Link Status */ + u8 rsvd[2]; +}; + +/* CEE status */ +/* Making this to tri-state for the benefit of port list command */ +enum bfa_cee_status { + CEE_UP = 0, + CEE_PHY_UP = 1, + CEE_LOOPBACK = 2, + CEE_PHY_DOWN = 3, +}; + +/* CEE Query */ +struct bfa_cee_attr { + u8 cee_status; + u8 error_reason; + struct bfa_cee_lldp_cfg lldp_remote; + struct bfa_cee_dcbx_cfg dcbx_remote; + mac_t src_mac; + u8 link_speed; + u8 nw_priority; + u8 filler[2]; +}; + +/* LLDP/DCBX/CEE Statistics */ +struct bfa_cee_stats { + u32 lldp_tx_frames; /*!< LLDP Tx Frames */ + u32 lldp_rx_frames; /*!< LLDP Rx Frames */ + u32 lldp_rx_frames_invalid; /*!< LLDP Rx Frames invalid */ + u32 lldp_rx_frames_new; /*!< LLDP Rx Frames new */ + u32 lldp_tlvs_unrecognized; /*!< LLDP Rx unrecognized TLVs */ + u32 lldp_rx_shutdown_tlvs; /*!< LLDP Rx shutdown TLVs */ + u32 lldp_info_aged_out; /*!< LLDP remote info aged out */ + u32 dcbx_phylink_ups; /*!< DCBX phy link ups */ + u32 dcbx_phylink_downs; /*!< DCBX phy link downs */ + u32 dcbx_rx_tlvs; /*!< DCBX Rx TLVs */ + u32 dcbx_rx_tlvs_invalid; /*!< DCBX Rx TLVs invalid */ + u32 dcbx_control_tlv_error; /*!< DCBX control TLV errors */ + u32 dcbx_feature_tlv_error; /*!< DCBX feature TLV errors */ + u32 dcbx_cee_cfg_new; /*!< DCBX new CEE cfg rcvd */ + u32 cee_status_down; /*!< CEE status down */ + u32 cee_status_up; /*!< CEE status up */ + u32 cee_hw_cfg_changed; /*!< CEE hw cfg changed */ + u32 cee_rx_invalid_cfg; /*!< CEE invalid cfg */ +}; + +#pragma pack() + +#endif /* __BFA_DEFS_CNA_H__ */ diff --git a/drivers/net/bna/bfa_defs_mfg_comm.h b/drivers/net/bna/bfa_defs_mfg_comm.h new file mode 100644 index 000000000000..987978fcb3fe --- /dev/null +++ b/drivers/net/bna/bfa_defs_mfg_comm.h @@ -0,0 +1,244 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#ifndef __BFA_DEFS_MFG_COMM_H__ +#define __BFA_DEFS_MFG_COMM_H__ + +#include "cna.h" + +/** + * Manufacturing block version + */ +#define BFA_MFG_VERSION 2 +#define BFA_MFG_VERSION_UNINIT 0xFF + +/** + * Manufacturing block encrypted version + */ +#define BFA_MFG_ENC_VER 2 + +/** + * Manufacturing block version 1 length + */ +#define BFA_MFG_VER1_LEN 128 + +/** + * Manufacturing block header length + */ +#define BFA_MFG_HDR_LEN 4 + +#define BFA_MFG_SERIALNUM_SIZE 11 +#define STRSZ(_n) (((_n) + 4) & ~3) + +/** + * Manufacturing card type + */ +enum { + BFA_MFG_TYPE_CB_MAX = 825, /*!< Crossbow card type max */ + BFA_MFG_TYPE_FC8P2 = 825, /*!< 8G 2port FC card */ + BFA_MFG_TYPE_FC8P1 = 815, /*!< 8G 1port FC card */ + BFA_MFG_TYPE_FC4P2 = 425, /*!< 4G 2port FC card */ + BFA_MFG_TYPE_FC4P1 = 415, /*!< 4G 1port FC card */ + BFA_MFG_TYPE_CNA10P2 = 1020, /*!< 10G 2port CNA card */ + BFA_MFG_TYPE_CNA10P1 = 1010, /*!< 10G 1port CNA card */ + BFA_MFG_TYPE_JAYHAWK = 804, /*!< Jayhawk mezz card */ + BFA_MFG_TYPE_WANCHESE = 1007, /*!< Wanchese mezz card */ + BFA_MFG_TYPE_ASTRA = 807, /*!< Astra mezz card */ + BFA_MFG_TYPE_LIGHTNING_P0 = 902, /*!< Lightning mezz card - old */ + BFA_MFG_TYPE_LIGHTNING = 1741, /*!< Lightning mezz card */ + BFA_MFG_TYPE_INVALID = 0, /*!< Invalid card type */ +}; + +#pragma pack(1) + +/** + * Check if 1-port card + */ +#define bfa_mfg_is_1port(type) (( \ + (type) == BFA_MFG_TYPE_FC8P1 || \ + (type) == BFA_MFG_TYPE_FC4P1 || \ + (type) == BFA_MFG_TYPE_CNA10P1)) + +/** + * Check if Mezz card + */ +#define bfa_mfg_is_mezz(type) (( \ + (type) == BFA_MFG_TYPE_JAYHAWK || \ + (type) == BFA_MFG_TYPE_WANCHESE || \ + (type) == BFA_MFG_TYPE_ASTRA || \ + (type) == BFA_MFG_TYPE_LIGHTNING_P0 || \ + (type) == BFA_MFG_TYPE_LIGHTNING)) + +/** + * Check if card type valid + */ +#define bfa_mfg_is_card_type_valid(type) (( \ + (type) == BFA_MFG_TYPE_FC8P2 || \ + (type) == BFA_MFG_TYPE_FC8P1 || \ + (type) == BFA_MFG_TYPE_FC4P2 || \ + (type) == BFA_MFG_TYPE_FC4P1 || \ + (type) == BFA_MFG_TYPE_CNA10P2 || \ + (type) == BFA_MFG_TYPE_CNA10P1 || \ + bfa_mfg_is_mezz(type))) + +/** + * Check if the card having old wwn/mac handling + */ +#define bfa_mfg_is_old_wwn_mac_model(type) (( \ + (type) == BFA_MFG_TYPE_FC8P2 || \ + (type) == BFA_MFG_TYPE_FC8P1 || \ + (type) == BFA_MFG_TYPE_FC4P2 || \ + (type) == BFA_MFG_TYPE_FC4P1 || \ + (type) == BFA_MFG_TYPE_CNA10P2 || \ + (type) == BFA_MFG_TYPE_CNA10P1 || \ + (type) == BFA_MFG_TYPE_JAYHAWK || \ + (type) == BFA_MFG_TYPE_WANCHESE)) + +#define bfa_mfg_increment_wwn_mac(m, i) \ +do { \ + u32 t = ((m)[0] << 16) | ((m)[1] << 8) | (m)[2]; \ + t += (i); \ + (m)[0] = (t >> 16) & 0xFF; \ + (m)[1] = (t >> 8) & 0xFF; \ + (m)[2] = t & 0xFF; \ +} while (0) + +#define bfa_mfg_adapter_prop_init_flash(card_type, prop) \ +do { \ + switch ((card_type)) { \ + case BFA_MFG_TYPE_FC8P2: \ + case BFA_MFG_TYPE_JAYHAWK: \ + case BFA_MFG_TYPE_ASTRA: \ + (prop) = BFI_ADAPTER_SETP(NPORTS, 2) | \ + BFI_ADAPTER_SETP(SPEED, 8); \ + break; \ + case BFA_MFG_TYPE_FC8P1: \ + (prop) = BFI_ADAPTER_SETP(NPORTS, 1) | \ + BFI_ADAPTER_SETP(SPEED, 8); \ + break; \ + case BFA_MFG_TYPE_FC4P2: \ + (prop) = BFI_ADAPTER_SETP(NPORTS, 2) | \ + BFI_ADAPTER_SETP(SPEED, 4); \ + break; \ + case BFA_MFG_TYPE_FC4P1: \ + (prop) = BFI_ADAPTER_SETP(NPORTS, 1) | \ + BFI_ADAPTER_SETP(SPEED, 4); \ + break; \ + case BFA_MFG_TYPE_CNA10P2: \ + case BFA_MFG_TYPE_WANCHESE: \ + case BFA_MFG_TYPE_LIGHTNING_P0: \ + case BFA_MFG_TYPE_LIGHTNING: \ + (prop) = BFI_ADAPTER_SETP(NPORTS, 2); \ + (prop) |= BFI_ADAPTER_SETP(SPEED, 10); \ + break; \ + case BFA_MFG_TYPE_CNA10P1: \ + (prop) = BFI_ADAPTER_SETP(NPORTS, 1); \ + (prop) |= BFI_ADAPTER_SETP(SPEED, 10); \ + break; \ + default: \ + (prop) = BFI_ADAPTER_UNSUPP; \ + } \ +} while (0) + +enum { + CB_GPIO_TTV = (1), /*!< TTV debug capable cards */ + CB_GPIO_FC8P2 = (2), /*!< 8G 2port FC card */ + CB_GPIO_FC8P1 = (3), /*!< 8G 1port FC card */ + CB_GPIO_FC4P2 = (4), /*!< 4G 2port FC card */ + CB_GPIO_FC4P1 = (5), /*!< 4G 1port FC card */ + CB_GPIO_DFLY = (6), /*!< 8G 2port FC mezzanine card */ + CB_GPIO_PROTO = (1 << 7) /*!< 8G 2port FC prototypes */ +}; + +#define bfa_mfg_adapter_prop_init_gpio(gpio, card_type, prop) \ +do { \ + if ((gpio) & CB_GPIO_PROTO) { \ + (prop) |= BFI_ADAPTER_PROTO; \ + (gpio) &= ~CB_GPIO_PROTO; \ + } \ + switch ((gpio)) { \ + case CB_GPIO_TTV: \ + (prop) |= BFI_ADAPTER_TTV; \ + case CB_GPIO_DFLY: \ + case CB_GPIO_FC8P2: \ + (prop) |= BFI_ADAPTER_SETP(NPORTS, 2); \ + (prop) |= BFI_ADAPTER_SETP(SPEED, 8); \ + (card_type) = BFA_MFG_TYPE_FC8P2; \ + break; \ + case CB_GPIO_FC8P1: \ + (prop) |= BFI_ADAPTER_SETP(NPORTS, 1); \ + (prop) |= BFI_ADAPTER_SETP(SPEED, 8); \ + (card_type) = BFA_MFG_TYPE_FC8P1; \ + break; \ + case CB_GPIO_FC4P2: \ + (prop) |= BFI_ADAPTER_SETP(NPORTS, 2); \ + (prop) |= BFI_ADAPTER_SETP(SPEED, 4); \ + (card_type) = BFA_MFG_TYPE_FC4P2; \ + break; \ + case CB_GPIO_FC4P1: \ + (prop) |= BFI_ADAPTER_SETP(NPORTS, 1); \ + (prop) |= BFI_ADAPTER_SETP(SPEED, 4); \ + (card_type) = BFA_MFG_TYPE_FC4P1; \ + break; \ + default: \ + (prop) |= BFI_ADAPTER_UNSUPP; \ + (card_type) = BFA_MFG_TYPE_INVALID; \ + } \ +} while (0) + +/** + * VPD data length + */ +#define BFA_MFG_VPD_LEN 512 +#define BFA_MFG_VPD_LEN_INVALID 0 + +#define BFA_MFG_VPD_PCI_HDR_OFF 137 +#define BFA_MFG_VPD_PCI_VER_MASK 0x07 /*!< version mask 3 bits */ +#define BFA_MFG_VPD_PCI_VDR_MASK 0xf8 /*!< vendor mask 5 bits */ + +/** + * VPD vendor tag + */ +enum { + BFA_MFG_VPD_UNKNOWN = 0, /*!< vendor unknown */ + BFA_MFG_VPD_IBM = 1, /*!< vendor IBM */ + BFA_MFG_VPD_HP = 2, /*!< vendor HP */ + BFA_MFG_VPD_DELL = 3, /*!< vendor DELL */ + BFA_MFG_VPD_PCI_IBM = 0x08, /*!< PCI VPD IBM */ + BFA_MFG_VPD_PCI_HP = 0x10, /*!< PCI VPD HP */ + BFA_MFG_VPD_PCI_DELL = 0x20, /*!< PCI VPD DELL */ + BFA_MFG_VPD_PCI_BRCD = 0xf8, /*!< PCI VPD Brocade */ +}; + +/** + * @brief BFA adapter flash vpd data definition. + * + * All numerical fields are in big-endian format. + */ +struct bfa_mfg_vpd { + u8 version; /*!< vpd data version */ + u8 vpd_sig[3]; /*!< characters 'V', 'P', 'D' */ + u8 chksum; /*!< u8 checksum */ + u8 vendor; /*!< vendor */ + u8 len; /*!< vpd data length excluding header */ + u8 rsv; + u8 data[BFA_MFG_VPD_LEN]; /*!< vpd data */ +}; + +#pragma pack() + +#endif /* __BFA_DEFS_MFG_H__ */ diff --git a/drivers/net/bna/bfa_defs_status.h b/drivers/net/bna/bfa_defs_status.h new file mode 100644 index 000000000000..af951126375c --- /dev/null +++ b/drivers/net/bna/bfa_defs_status.h @@ -0,0 +1,216 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#ifndef __BFA_DEFS_STATUS_H__ +#define __BFA_DEFS_STATUS_H__ + +/** + * API status return values + * + * NOTE: The error msgs are auto generated from the comments. Only singe line + * comments are supported + */ +enum bfa_status { + BFA_STATUS_OK = 0, + BFA_STATUS_FAILED = 1, + BFA_STATUS_EINVAL = 2, + BFA_STATUS_ENOMEM = 3, + BFA_STATUS_ENOSYS = 4, + BFA_STATUS_ETIMER = 5, + BFA_STATUS_EPROTOCOL = 6, + BFA_STATUS_ENOFCPORTS = 7, + BFA_STATUS_NOFLASH = 8, + BFA_STATUS_BADFLASH = 9, + BFA_STATUS_SFP_UNSUPP = 10, + BFA_STATUS_UNKNOWN_VFID = 11, + BFA_STATUS_DATACORRUPTED = 12, + BFA_STATUS_DEVBUSY = 13, + BFA_STATUS_ABORTED = 14, + BFA_STATUS_NODEV = 15, + BFA_STATUS_HDMA_FAILED = 16, + BFA_STATUS_FLASH_BAD_LEN = 17, + BFA_STATUS_UNKNOWN_LWWN = 18, + BFA_STATUS_UNKNOWN_RWWN = 19, + BFA_STATUS_FCPT_LS_RJT = 20, + BFA_STATUS_VPORT_EXISTS = 21, + BFA_STATUS_VPORT_MAX = 22, + BFA_STATUS_UNSUPP_SPEED = 23, + BFA_STATUS_INVLD_DFSZ = 24, + BFA_STATUS_CNFG_FAILED = 25, + BFA_STATUS_CMD_NOTSUPP = 26, + BFA_STATUS_NO_ADAPTER = 27, + BFA_STATUS_LINKDOWN = 28, + BFA_STATUS_FABRIC_RJT = 29, + BFA_STATUS_UNKNOWN_VWWN = 30, + BFA_STATUS_NSLOGIN_FAILED = 31, + BFA_STATUS_NO_RPORTS = 32, + BFA_STATUS_NSQUERY_FAILED = 33, + BFA_STATUS_PORT_OFFLINE = 34, + BFA_STATUS_RPORT_OFFLINE = 35, + BFA_STATUS_TGTOPEN_FAILED = 36, + BFA_STATUS_BAD_LUNS = 37, + BFA_STATUS_IO_FAILURE = 38, + BFA_STATUS_NO_FABRIC = 39, + BFA_STATUS_EBADF = 40, + BFA_STATUS_EINTR = 41, + BFA_STATUS_EIO = 42, + BFA_STATUS_ENOTTY = 43, + BFA_STATUS_ENXIO = 44, + BFA_STATUS_EFOPEN = 45, + BFA_STATUS_VPORT_WWN_BP = 46, + BFA_STATUS_PORT_NOT_DISABLED = 47, + BFA_STATUS_BADFRMHDR = 48, + BFA_STATUS_BADFRMSZ = 49, + BFA_STATUS_MISSINGFRM = 50, + BFA_STATUS_LINKTIMEOUT = 51, + BFA_STATUS_NO_FCPIM_NEXUS = 52, + BFA_STATUS_CHECKSUM_FAIL = 53, + BFA_STATUS_GZME_FAILED = 54, + BFA_STATUS_SCSISTART_REQD = 55, + BFA_STATUS_IOC_FAILURE = 56, + BFA_STATUS_INVALID_WWN = 57, + BFA_STATUS_MISMATCH = 58, + BFA_STATUS_IOC_ENABLED = 59, + BFA_STATUS_ADAPTER_ENABLED = 60, + BFA_STATUS_IOC_NON_OP = 61, + BFA_STATUS_ADDR_MAP_FAILURE = 62, + BFA_STATUS_SAME_NAME = 63, + BFA_STATUS_PENDING = 64, + BFA_STATUS_8G_SPD = 65, + BFA_STATUS_4G_SPD = 66, + BFA_STATUS_AD_IS_ENABLE = 67, + BFA_STATUS_EINVAL_TOV = 68, + BFA_STATUS_EINVAL_QDEPTH = 69, + BFA_STATUS_VERSION_FAIL = 70, + BFA_STATUS_DIAG_BUSY = 71, + BFA_STATUS_BEACON_ON = 72, + BFA_STATUS_BEACON_OFF = 73, + BFA_STATUS_LBEACON_ON = 74, + BFA_STATUS_LBEACON_OFF = 75, + BFA_STATUS_PORT_NOT_INITED = 76, + BFA_STATUS_RPSC_ENABLED = 77, + BFA_STATUS_ENOFSAVE = 78, + BFA_STATUS_BAD_FILE = 79, + BFA_STATUS_RLIM_EN = 80, + BFA_STATUS_RLIM_DIS = 81, + BFA_STATUS_IOC_DISABLED = 82, + BFA_STATUS_ADAPTER_DISABLED = 83, + BFA_STATUS_BIOS_DISABLED = 84, + BFA_STATUS_AUTH_ENABLED = 85, + BFA_STATUS_AUTH_DISABLED = 86, + BFA_STATUS_ERROR_TRL_ENABLED = 87, + BFA_STATUS_ERROR_QOS_ENABLED = 88, + BFA_STATUS_NO_SFP_DEV = 89, + BFA_STATUS_MEMTEST_FAILED = 90, + BFA_STATUS_INVALID_DEVID = 91, + BFA_STATUS_QOS_ENABLED = 92, + BFA_STATUS_QOS_DISABLED = 93, + BFA_STATUS_INCORRECT_DRV_CONFIG = 94, + BFA_STATUS_REG_FAIL = 95, + BFA_STATUS_IM_INV_CODE = 96, + BFA_STATUS_IM_INV_VLAN = 97, + BFA_STATUS_IM_INV_ADAPT_NAME = 98, + BFA_STATUS_IM_LOW_RESOURCES = 99, + BFA_STATUS_IM_VLANID_IS_PVID = 100, + BFA_STATUS_IM_VLANID_EXISTS = 101, + BFA_STATUS_IM_FW_UPDATE_FAIL = 102, + BFA_STATUS_PORTLOG_ENABLED = 103, + BFA_STATUS_PORTLOG_DISABLED = 104, + BFA_STATUS_FILE_NOT_FOUND = 105, + BFA_STATUS_QOS_FC_ONLY = 106, + BFA_STATUS_RLIM_FC_ONLY = 107, + BFA_STATUS_CT_SPD = 108, + BFA_STATUS_LEDTEST_OP = 109, + BFA_STATUS_CEE_NOT_DN = 110, + BFA_STATUS_10G_SPD = 111, + BFA_STATUS_IM_INV_TEAM_NAME = 112, + BFA_STATUS_IM_DUP_TEAM_NAME = 113, + BFA_STATUS_IM_ADAPT_ALREADY_IN_TEAM = 114, + BFA_STATUS_IM_ADAPT_HAS_VLANS = 115, + BFA_STATUS_IM_PVID_MISMATCH = 116, + BFA_STATUS_IM_LINK_SPEED_MISMATCH = 117, + BFA_STATUS_IM_MTU_MISMATCH = 118, + BFA_STATUS_IM_RSS_MISMATCH = 119, + BFA_STATUS_IM_HDS_MISMATCH = 120, + BFA_STATUS_IM_OFFLOAD_MISMATCH = 121, + BFA_STATUS_IM_PORT_PARAMS = 122, + BFA_STATUS_IM_PORT_NOT_IN_TEAM = 123, + BFA_STATUS_IM_CANNOT_REM_PRI = 124, + BFA_STATUS_IM_MAX_PORTS_REACHED = 125, + BFA_STATUS_IM_LAST_PORT_DELETE = 126, + BFA_STATUS_IM_NO_DRIVER = 127, + BFA_STATUS_IM_MAX_VLANS_REACHED = 128, + BFA_STATUS_TOMCAT_SPD_NOT_ALLOWED = 129, + BFA_STATUS_NO_MINPORT_DRIVER = 130, + BFA_STATUS_CARD_TYPE_MISMATCH = 131, + BFA_STATUS_BAD_ASICBLK = 132, + BFA_STATUS_NO_DRIVER = 133, + BFA_STATUS_INVALID_MAC = 134, + BFA_STATUS_IM_NO_VLAN = 135, + BFA_STATUS_IM_ETH_LB_FAILED = 136, + BFA_STATUS_IM_PVID_REMOVE = 137, + BFA_STATUS_IM_PVID_EDIT = 138, + BFA_STATUS_CNA_NO_BOOT = 139, + BFA_STATUS_IM_PVID_NON_ZERO = 140, + BFA_STATUS_IM_INETCFG_LOCK_FAILED = 141, + BFA_STATUS_IM_GET_INETCFG_FAILED = 142, + BFA_STATUS_IM_NOT_BOUND = 143, + BFA_STATUS_INSUFFICIENT_PERMS = 144, + BFA_STATUS_IM_INV_VLAN_NAME = 145, + BFA_STATUS_CMD_NOTSUPP_CNA = 146, + BFA_STATUS_IM_PASSTHRU_EDIT = 147, + BFA_STATUS_IM_BIND_FAILED = 148, + BFA_STATUS_IM_UNBIND_FAILED = 149, + BFA_STATUS_IM_PORT_IN_TEAM = 150, + BFA_STATUS_IM_VLAN_NOT_FOUND = 151, + BFA_STATUS_IM_TEAM_NOT_FOUND = 152, + BFA_STATUS_IM_TEAM_CFG_NOT_ALLOWED = 153, + BFA_STATUS_PBC = 154, + BFA_STATUS_DEVID_MISSING = 155, + BFA_STATUS_BAD_FWCFG = 156, + BFA_STATUS_CREATE_FILE = 157, + BFA_STATUS_INVALID_VENDOR = 158, + BFA_STATUS_SFP_NOT_READY = 159, + BFA_STATUS_FLASH_UNINIT = 160, + BFA_STATUS_FLASH_EMPTY = 161, + BFA_STATUS_FLASH_CKFAIL = 162, + BFA_STATUS_TRUNK_UNSUPP = 163, + BFA_STATUS_TRUNK_ENABLED = 164, + BFA_STATUS_TRUNK_DISABLED = 165, + BFA_STATUS_TRUNK_ERROR_TRL_ENABLED = 166, + BFA_STATUS_BOOT_CODE_UPDATED = 167, + BFA_STATUS_BOOT_VERSION = 168, + BFA_STATUS_CARDTYPE_MISSING = 169, + BFA_STATUS_INVALID_CARDTYPE = 170, + BFA_STATUS_NO_TOPOLOGY_FOR_CNA = 171, + BFA_STATUS_IM_VLAN_OVER_TEAM_DELETE_FAILED = 172, + BFA_STATUS_ETHBOOT_ENABLED = 173, + BFA_STATUS_ETHBOOT_DISABLED = 174, + BFA_STATUS_IOPROFILE_OFF = 175, + BFA_STATUS_NO_PORT_INSTANCE = 176, + BFA_STATUS_BOOT_CODE_TIMEDOUT = 177, + BFA_STATUS_NO_VPORT_LOCK = 178, + BFA_STATUS_VPORT_NO_CNFG = 179, + BFA_STATUS_MAX_VAL +}; + +enum bfa_eproto_status { + BFA_EPROTO_BAD_ACCEPT = 0, + BFA_EPROTO_UNKNOWN_RSP = 1 +}; + +#endif /* __BFA_DEFS_STATUS_H__ */ diff --git a/drivers/net/bna/bfa_ioc.c b/drivers/net/bna/bfa_ioc.c new file mode 100644 index 000000000000..cdc2cb1597ec --- /dev/null +++ b/drivers/net/bna/bfa_ioc.c @@ -0,0 +1,1839 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#include "bfa_ioc.h" +#include "cna.h" +#include "bfi.h" +#include "bfi_ctreg.h" +#include "bfa_defs.h" + +/** + * IOC local definitions + */ + +#define bfa_ioc_timer_start(__ioc) \ + mod_timer(&(__ioc)->ioc_timer, jiffies + \ + msecs_to_jiffies(BFA_IOC_TOV)) +#define bfa_ioc_timer_stop(__ioc) del_timer(&(__ioc)->ioc_timer) + +#define bfa_ioc_recovery_timer_start(__ioc) \ + mod_timer(&(__ioc)->ioc_timer, jiffies + \ + msecs_to_jiffies(BFA_IOC_TOV_RECOVER)) + +#define bfa_sem_timer_start(__ioc) \ + mod_timer(&(__ioc)->sem_timer, jiffies + \ + msecs_to_jiffies(BFA_IOC_HWSEM_TOV)) +#define bfa_sem_timer_stop(__ioc) del_timer(&(__ioc)->sem_timer) + +#define bfa_hb_timer_start(__ioc) \ + mod_timer(&(__ioc)->hb_timer, jiffies + \ + msecs_to_jiffies(BFA_IOC_HB_TOV)) +#define bfa_hb_timer_stop(__ioc) del_timer(&(__ioc)->hb_timer) + +/** + * Asic specific macros : see bfa_hw_cb.c and bfa_hw_ct.c for details. + */ + +#define bfa_ioc_firmware_lock(__ioc) \ + ((__ioc)->ioc_hwif->ioc_firmware_lock(__ioc)) +#define bfa_ioc_firmware_unlock(__ioc) \ + ((__ioc)->ioc_hwif->ioc_firmware_unlock(__ioc)) +#define bfa_ioc_reg_init(__ioc) ((__ioc)->ioc_hwif->ioc_reg_init(__ioc)) +#define bfa_ioc_map_port(__ioc) ((__ioc)->ioc_hwif->ioc_map_port(__ioc)) +#define bfa_ioc_notify_hbfail(__ioc) \ + ((__ioc)->ioc_hwif->ioc_notify_hbfail(__ioc)) + +#define bfa_ioc_is_optrom(__ioc) \ + (bfa_cb_image_get_size(BFA_IOC_FWIMG_TYPE(__ioc)) < BFA_IOC_FWIMG_MINSZ) + +#define bfa_ioc_mbox_cmd_pending(__ioc) \ + (!list_empty(&((__ioc)->mbox_mod.cmd_q)) || \ + readl((__ioc)->ioc_regs.hfn_mbox_cmd)) + +bool bfa_auto_recover = true; + +/* + * forward declarations + */ +static void bfa_ioc_hw_sem_get(struct bfa_ioc *ioc); +static void bfa_ioc_hw_sem_get_cancel(struct bfa_ioc *ioc); +static void bfa_ioc_hwinit(struct bfa_ioc *ioc, bool force); +static void bfa_ioc_send_enable(struct bfa_ioc *ioc); +static void bfa_ioc_send_disable(struct bfa_ioc *ioc); +static void bfa_ioc_send_getattr(struct bfa_ioc *ioc); +static void bfa_ioc_hb_monitor(struct bfa_ioc *ioc); +static void bfa_ioc_hb_stop(struct bfa_ioc *ioc); +static void bfa_ioc_reset(struct bfa_ioc *ioc, bool force); +static void bfa_ioc_mbox_poll(struct bfa_ioc *ioc); +static void bfa_ioc_mbox_hbfail(struct bfa_ioc *ioc); +static void bfa_ioc_recover(struct bfa_ioc *ioc); +static void bfa_ioc_check_attr_wwns(struct bfa_ioc *ioc); +static void bfa_ioc_disable_comp(struct bfa_ioc *ioc); +static void bfa_ioc_lpu_stop(struct bfa_ioc *ioc); + +/** + * IOC state machine events + */ +enum ioc_event { + IOC_E_ENABLE = 1, /*!< IOC enable request */ + IOC_E_DISABLE = 2, /*!< IOC disable request */ + IOC_E_TIMEOUT = 3, /*!< f/w response timeout */ + IOC_E_FWREADY = 4, /*!< f/w initialization done */ + IOC_E_FWRSP_GETATTR = 5, /*!< IOC get attribute response */ + IOC_E_FWRSP_ENABLE = 6, /*!< enable f/w response */ + IOC_E_FWRSP_DISABLE = 7, /*!< disable f/w response */ + IOC_E_HBFAIL = 8, /*!< heartbeat failure */ + IOC_E_HWERROR = 9, /*!< hardware error interrupt */ + IOC_E_SEMLOCKED = 10, /*!< h/w semaphore is locked */ + IOC_E_DETACH = 11, /*!< driver detach cleanup */ +}; + +bfa_fsm_state_decl(bfa_ioc, reset, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, fwcheck, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, mismatch, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, semwait, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, hwinit, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, enabling, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, getattr, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, op, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, initfail, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, hbfail, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, disabling, struct bfa_ioc, enum ioc_event); +bfa_fsm_state_decl(bfa_ioc, disabled, struct bfa_ioc, enum ioc_event); + +static struct bfa_sm_table ioc_sm_table[] = { + {BFA_SM(bfa_ioc_sm_reset), BFA_IOC_RESET}, + {BFA_SM(bfa_ioc_sm_fwcheck), BFA_IOC_FWMISMATCH}, + {BFA_SM(bfa_ioc_sm_mismatch), BFA_IOC_FWMISMATCH}, + {BFA_SM(bfa_ioc_sm_semwait), BFA_IOC_SEMWAIT}, + {BFA_SM(bfa_ioc_sm_hwinit), BFA_IOC_HWINIT}, + {BFA_SM(bfa_ioc_sm_enabling), BFA_IOC_HWINIT}, + {BFA_SM(bfa_ioc_sm_getattr), BFA_IOC_GETATTR}, + {BFA_SM(bfa_ioc_sm_op), BFA_IOC_OPERATIONAL}, + {BFA_SM(bfa_ioc_sm_initfail), BFA_IOC_INITFAIL}, + {BFA_SM(bfa_ioc_sm_hbfail), BFA_IOC_HBFAIL}, + {BFA_SM(bfa_ioc_sm_disabling), BFA_IOC_DISABLING}, + {BFA_SM(bfa_ioc_sm_disabled), BFA_IOC_DISABLED}, +}; + +/** + * Reset entry actions -- initialize state machine + */ +static void +bfa_ioc_sm_reset_entry(struct bfa_ioc *ioc) +{ + ioc->retry_count = 0; + ioc->auto_recover = bfa_auto_recover; +} + +/** + * Beginning state. IOC is in reset state. + */ +static void +bfa_ioc_sm_reset(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_ENABLE: + bfa_fsm_set_state(ioc, bfa_ioc_sm_fwcheck); + break; + + case IOC_E_DISABLE: + bfa_ioc_disable_comp(ioc); + break; + + case IOC_E_DETACH: + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +/** + * Semaphore should be acquired for version check. + */ +static void +bfa_ioc_sm_fwcheck_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_hw_sem_get(ioc); +} + +/** + * Awaiting h/w semaphore to continue with version check. + */ +static void +bfa_ioc_sm_fwcheck(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_SEMLOCKED: + if (bfa_ioc_firmware_lock(ioc)) { + ioc->retry_count = 0; + bfa_fsm_set_state(ioc, bfa_ioc_sm_hwinit); + } else { + bfa_ioc_hw_sem_release(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_mismatch); + } + break; + + case IOC_E_DISABLE: + bfa_ioc_disable_comp(ioc); + /* fall through */ + + case IOC_E_DETACH: + bfa_ioc_hw_sem_get_cancel(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_reset); + break; + + case IOC_E_FWREADY: + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +/** + * Notify enable completion callback and generate mismatch AEN. + */ +static void +bfa_ioc_sm_mismatch_entry(struct bfa_ioc *ioc) +{ + /** + * Provide enable completion callback and AEN notification only once. + */ + if (ioc->retry_count == 0) + ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE); + ioc->retry_count++; + bfa_ioc_timer_start(ioc); +} + +/** + * Awaiting firmware version match. + */ +static void +bfa_ioc_sm_mismatch(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_TIMEOUT: + bfa_fsm_set_state(ioc, bfa_ioc_sm_fwcheck); + break; + + case IOC_E_DISABLE: + bfa_ioc_disable_comp(ioc); + /* fall through */ + + case IOC_E_DETACH: + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_reset); + break; + + case IOC_E_FWREADY: + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +/** + * Request for semaphore. + */ +static void +bfa_ioc_sm_semwait_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_hw_sem_get(ioc); +} + +/** + * Awaiting semaphore for h/w initialzation. + */ +static void +bfa_ioc_sm_semwait(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_SEMLOCKED: + ioc->retry_count = 0; + bfa_fsm_set_state(ioc, bfa_ioc_sm_hwinit); + break; + + case IOC_E_DISABLE: + bfa_ioc_hw_sem_get_cancel(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_hwinit_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_timer_start(ioc); + bfa_ioc_reset(ioc, false); +} + +/** + * @brief + * Hardware is being initialized. Interrupts are enabled. + * Holding hardware semaphore lock. + */ +static void +bfa_ioc_sm_hwinit(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_FWREADY: + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_enabling); + break; + + case IOC_E_HWERROR: + bfa_ioc_timer_stop(ioc); + /* fall through */ + + case IOC_E_TIMEOUT: + ioc->retry_count++; + if (ioc->retry_count < BFA_IOC_HWINIT_MAX) { + bfa_ioc_timer_start(ioc); + bfa_ioc_reset(ioc, true); + break; + } + + bfa_ioc_hw_sem_release(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_initfail); + break; + + case IOC_E_DISABLE: + bfa_ioc_hw_sem_release(ioc); + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_enabling_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_timer_start(ioc); + bfa_ioc_send_enable(ioc); +} + +/** + * Host IOC function is being enabled, awaiting response from firmware. + * Semaphore is acquired. + */ +static void +bfa_ioc_sm_enabling(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_FWRSP_ENABLE: + bfa_ioc_timer_stop(ioc); + bfa_ioc_hw_sem_release(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_getattr); + break; + + case IOC_E_HWERROR: + bfa_ioc_timer_stop(ioc); + /* fall through */ + + case IOC_E_TIMEOUT: + ioc->retry_count++; + if (ioc->retry_count < BFA_IOC_HWINIT_MAX) { + writel(BFI_IOC_UNINIT, + ioc->ioc_regs.ioc_fwstate); + bfa_fsm_set_state(ioc, bfa_ioc_sm_hwinit); + break; + } + + bfa_ioc_hw_sem_release(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_initfail); + break; + + case IOC_E_DISABLE: + bfa_ioc_timer_stop(ioc); + bfa_ioc_hw_sem_release(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + case IOC_E_FWREADY: + bfa_ioc_send_enable(ioc); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_getattr_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_timer_start(ioc); + bfa_ioc_send_getattr(ioc); +} + +/** + * @brief + * IOC configuration in progress. Timer is active. + */ +static void +bfa_ioc_sm_getattr(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_FWRSP_GETATTR: + bfa_ioc_timer_stop(ioc); + bfa_ioc_check_attr_wwns(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_op); + break; + + case IOC_E_HWERROR: + bfa_ioc_timer_stop(ioc); + /* fall through */ + + case IOC_E_TIMEOUT: + bfa_fsm_set_state(ioc, bfa_ioc_sm_initfail); + break; + + case IOC_E_DISABLE: + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_op_entry(struct bfa_ioc *ioc) +{ + ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_OK); + bfa_ioc_hb_monitor(ioc); +} + +static void +bfa_ioc_sm_op(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_ENABLE: + break; + + case IOC_E_DISABLE: + bfa_ioc_hb_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabling); + break; + + case IOC_E_HWERROR: + case IOC_E_FWREADY: + /** + * Hard error or IOC recovery by other function. + * Treat it same as heartbeat failure. + */ + bfa_ioc_hb_stop(ioc); + /* !!! fall through !!! */ + + case IOC_E_HBFAIL: + bfa_fsm_set_state(ioc, bfa_ioc_sm_hbfail); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_disabling_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_timer_start(ioc); + bfa_ioc_send_disable(ioc); +} + +/** + * IOC is being disabled + */ +static void +bfa_ioc_sm_disabling(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_FWRSP_DISABLE: + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + case IOC_E_HWERROR: + bfa_ioc_timer_stop(ioc); + /* + * !!! fall through !!! + */ + + case IOC_E_TIMEOUT: + writel(BFI_IOC_FAIL, ioc->ioc_regs.ioc_fwstate); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +/** + * IOC disable completion entry. + */ +static void +bfa_ioc_sm_disabled_entry(struct bfa_ioc *ioc) +{ + bfa_ioc_disable_comp(ioc); +} + +static void +bfa_ioc_sm_disabled(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_ENABLE: + bfa_fsm_set_state(ioc, bfa_ioc_sm_semwait); + break; + + case IOC_E_DISABLE: + ioc->cbfn->disable_cbfn(ioc->bfa); + break; + + case IOC_E_FWREADY: + break; + + case IOC_E_DETACH: + bfa_ioc_firmware_unlock(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_reset); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_initfail_entry(struct bfa_ioc *ioc) +{ + ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE); + bfa_ioc_timer_start(ioc); +} + +/** + * @brief + * Hardware initialization failed. + */ +static void +bfa_ioc_sm_initfail(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + case IOC_E_DISABLE: + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + case IOC_E_DETACH: + bfa_ioc_timer_stop(ioc); + bfa_ioc_firmware_unlock(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_reset); + break; + + case IOC_E_TIMEOUT: + bfa_fsm_set_state(ioc, bfa_ioc_sm_semwait); + break; + + default: + bfa_sm_fault(ioc, event); + } +} + +static void +bfa_ioc_sm_hbfail_entry(struct bfa_ioc *ioc) +{ + struct list_head *qe; + struct bfa_ioc_hbfail_notify *notify; + + /** + * Mark IOC as failed in hardware and stop firmware. + */ + bfa_ioc_lpu_stop(ioc); + writel(BFI_IOC_FAIL, ioc->ioc_regs.ioc_fwstate); + + /** + * Notify other functions on HB failure. + */ + bfa_ioc_notify_hbfail(ioc); + + /** + * Notify driver and common modules registered for notification. + */ + ioc->cbfn->hbfail_cbfn(ioc->bfa); + list_for_each(qe, &ioc->hb_notify_q) { + notify = (struct bfa_ioc_hbfail_notify *) qe; + notify->cbfn(notify->cbarg); + } + + /** + * Flush any queued up mailbox requests. + */ + bfa_ioc_mbox_hbfail(ioc); + + /** + * Trigger auto-recovery after a delay. + */ + if (ioc->auto_recover) + mod_timer(&ioc->ioc_timer, jiffies + + msecs_to_jiffies(BFA_IOC_TOV_RECOVER)); +} + +/** + * @brief + * IOC heartbeat failure. + */ +static void +bfa_ioc_sm_hbfail(struct bfa_ioc *ioc, enum ioc_event event) +{ + switch (event) { + + case IOC_E_ENABLE: + ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE); + break; + + case IOC_E_DISABLE: + if (ioc->auto_recover) + bfa_ioc_timer_stop(ioc); + bfa_fsm_set_state(ioc, bfa_ioc_sm_disabled); + break; + + case IOC_E_TIMEOUT: + bfa_fsm_set_state(ioc, bfa_ioc_sm_semwait); + break; + + case IOC_E_FWREADY: + /** + * Recovery is already initiated by other function. + */ + break; + + case IOC_E_HWERROR: + /* + * HB failure notification, ignore. + */ + break; + default: + bfa_sm_fault(ioc, event); + } +} + +/** + * BFA IOC private functions + */ + +static void +bfa_ioc_disable_comp(struct bfa_ioc *ioc) +{ + struct list_head *qe; + struct bfa_ioc_hbfail_notify *notify; + + ioc->cbfn->disable_cbfn(ioc->bfa); + + /** + * Notify common modules registered for notification. + */ + list_for_each(qe, &ioc->hb_notify_q) { + notify = (struct bfa_ioc_hbfail_notify *) qe; + notify->cbfn(notify->cbarg); + } +} + +void +bfa_ioc_sem_timeout(void *ioc_arg) +{ + struct bfa_ioc *ioc = (struct bfa_ioc *) ioc_arg; + + bfa_ioc_hw_sem_get(ioc); +} + +bool +bfa_ioc_sem_get(void __iomem *sem_reg) +{ + u32 r32; + int cnt = 0; +#define BFA_SEM_SPINCNT 3000 + + r32 = readl(sem_reg); + + while (r32 && (cnt < BFA_SEM_SPINCNT)) { + cnt++; + udelay(2); + r32 = readl(sem_reg); + } + + if (r32 == 0) + return true; + + BUG_ON(!(cnt < BFA_SEM_SPINCNT)); + return false; +} + +void +bfa_ioc_sem_release(void __iomem *sem_reg) +{ + writel(1, sem_reg); +} + +static void +bfa_ioc_hw_sem_get(struct bfa_ioc *ioc) +{ + u32 r32; + + /** + * First read to the semaphore register will return 0, subsequent reads + * will return 1. Semaphore is released by writing 1 to the register + */ + r32 = readl(ioc->ioc_regs.ioc_sem_reg); + if (r32 == 0) { + bfa_fsm_send_event(ioc, IOC_E_SEMLOCKED); + return; + } + + mod_timer(&ioc->sem_timer, jiffies + + msecs_to_jiffies(BFA_IOC_HWSEM_TOV)); +} + +void +bfa_ioc_hw_sem_release(struct bfa_ioc *ioc) +{ + writel(1, ioc->ioc_regs.ioc_sem_reg); +} + +static void +bfa_ioc_hw_sem_get_cancel(struct bfa_ioc *ioc) +{ + del_timer(&ioc->sem_timer); +} + +/** + * @brief + * Initialize LPU local memory (aka secondary memory / SRAM) + */ +static void +bfa_ioc_lmem_init(struct bfa_ioc *ioc) +{ + u32 pss_ctl; + int i; +#define PSS_LMEM_INIT_TIME 10000 + + pss_ctl = readl(ioc->ioc_regs.pss_ctl_reg); + pss_ctl &= ~__PSS_LMEM_RESET; + pss_ctl |= __PSS_LMEM_INIT_EN; + + /* + * i2c workaround 12.5khz clock + */ + pss_ctl |= __PSS_I2C_CLK_DIV(3UL); + writel(pss_ctl, ioc->ioc_regs.pss_ctl_reg); + + /** + * wait for memory initialization to be complete + */ + i = 0; + do { + pss_ctl = readl(ioc->ioc_regs.pss_ctl_reg); + i++; + } while (!(pss_ctl & __PSS_LMEM_INIT_DONE) && (i < PSS_LMEM_INIT_TIME)); + + /** + * If memory initialization is not successful, IOC timeout will catch + * such failures. + */ + BUG_ON(!(pss_ctl & __PSS_LMEM_INIT_DONE)); + + pss_ctl &= ~(__PSS_LMEM_INIT_DONE | __PSS_LMEM_INIT_EN); + writel(pss_ctl, ioc->ioc_regs.pss_ctl_reg); +} + +static void +bfa_ioc_lpu_start(struct bfa_ioc *ioc) +{ + u32 pss_ctl; + + /** + * Take processor out of reset. + */ + pss_ctl = readl(ioc->ioc_regs.pss_ctl_reg); + pss_ctl &= ~__PSS_LPU0_RESET; + + writel(pss_ctl, ioc->ioc_regs.pss_ctl_reg); +} + +static void +bfa_ioc_lpu_stop(struct bfa_ioc *ioc) +{ + u32 pss_ctl; + + /** + * Put processors in reset. + */ + pss_ctl = readl(ioc->ioc_regs.pss_ctl_reg); + pss_ctl |= (__PSS_LPU0_RESET | __PSS_LPU1_RESET); + + writel(pss_ctl, ioc->ioc_regs.pss_ctl_reg); +} + +/** + * Get driver and firmware versions. + */ +void +bfa_ioc_fwver_get(struct bfa_ioc *ioc, struct bfi_ioc_image_hdr *fwhdr) +{ + u32 pgnum, pgoff; + u32 loff = 0; + int i; + u32 *fwsig = (u32 *) fwhdr; + + pgnum = bfa_ioc_smem_pgnum(ioc, loff); + pgoff = bfa_ioc_smem_pgoff(ioc, loff); + writel(pgnum, ioc->ioc_regs.host_page_num_fn); + + for (i = 0; i < (sizeof(struct bfi_ioc_image_hdr) / sizeof(u32)); + i++) { + fwsig[i] = + swab32(readl((loff) + (ioc->ioc_regs.smem_page_start))); + loff += sizeof(u32); + } +} + +/** + * Returns TRUE if same. + */ +bool +bfa_ioc_fwver_cmp(struct bfa_ioc *ioc, struct bfi_ioc_image_hdr *fwhdr) +{ + struct bfi_ioc_image_hdr *drv_fwhdr; + int i; + + drv_fwhdr = (struct bfi_ioc_image_hdr *) + bfa_cb_image_get_chunk(BFA_IOC_FWIMG_TYPE(ioc), 0); + + for (i = 0; i < BFI_IOC_MD5SUM_SZ; i++) { + if (fwhdr->md5sum[i] != drv_fwhdr->md5sum[i]) + return false; + } + + return true; +} + +/** + * Return true if current running version is valid. Firmware signature and + * execution context (driver/bios) must match. + */ +static bool +bfa_ioc_fwver_valid(struct bfa_ioc *ioc) +{ + struct bfi_ioc_image_hdr fwhdr, *drv_fwhdr; + + /** + * If bios/efi boot (flash based) -- return true + */ + if (bfa_ioc_is_optrom(ioc)) + return true; + + bfa_ioc_fwver_get(ioc, &fwhdr); + drv_fwhdr = (struct bfi_ioc_image_hdr *) + bfa_cb_image_get_chunk(BFA_IOC_FWIMG_TYPE(ioc), 0); + + if (fwhdr.signature != drv_fwhdr->signature) + return false; + + if (fwhdr.exec != drv_fwhdr->exec) + return false; + + return bfa_ioc_fwver_cmp(ioc, &fwhdr); +} + +/** + * Conditionally flush any pending message from firmware at start. + */ +static void +bfa_ioc_msgflush(struct bfa_ioc *ioc) +{ + u32 r32; + + r32 = readl(ioc->ioc_regs.lpu_mbox_cmd); + if (r32) + writel(1, ioc->ioc_regs.lpu_mbox_cmd); +} + +/** + * @img ioc_init_logic.jpg + */ +static void +bfa_ioc_hwinit(struct bfa_ioc *ioc, bool force) +{ + enum bfi_ioc_state ioc_fwstate; + bool fwvalid; + + ioc_fwstate = readl(ioc->ioc_regs.ioc_fwstate); + + if (force) + ioc_fwstate = BFI_IOC_UNINIT; + + /** + * check if firmware is valid + */ + fwvalid = (ioc_fwstate == BFI_IOC_UNINIT) ? + false : bfa_ioc_fwver_valid(ioc); + + if (!fwvalid) { + bfa_ioc_boot(ioc, BFI_BOOT_TYPE_NORMAL, ioc->pcidev.device_id); + return; + } + + /** + * If hardware initialization is in progress (initialized by other IOC), + * just wait for an initialization completion interrupt. + */ + if (ioc_fwstate == BFI_IOC_INITING) { + ioc->cbfn->reset_cbfn(ioc->bfa); + return; + } + + /** + * If IOC function is disabled and firmware version is same, + * just re-enable IOC. + * + * If option rom, IOC must not be in operational state. With + * convergence, IOC will be in operational state when 2nd driver + * is loaded. + */ + if (ioc_fwstate == BFI_IOC_DISABLED || + (!bfa_ioc_is_optrom(ioc) && ioc_fwstate == BFI_IOC_OP)) { + /** + * When using MSI-X any pending firmware ready event should + * be flushed. Otherwise MSI-X interrupts are not delivered. + */ + bfa_ioc_msgflush(ioc); + ioc->cbfn->reset_cbfn(ioc->bfa); + bfa_fsm_send_event(ioc, IOC_E_FWREADY); + return; + } + + /** + * Initialize the h/w for any other states. + */ + bfa_ioc_boot(ioc, BFI_BOOT_TYPE_NORMAL, ioc->pcidev.device_id); +} + +void +bfa_ioc_timeout(void *ioc_arg) +{ + struct bfa_ioc *ioc = (struct bfa_ioc *) ioc_arg; + + bfa_fsm_send_event(ioc, IOC_E_TIMEOUT); +} + +void +bfa_ioc_mbox_send(struct bfa_ioc *ioc, void *ioc_msg, int len) +{ + u32 *msgp = (u32 *) ioc_msg; + u32 i; + + BUG_ON(!(len <= BFI_IOC_MSGLEN_MAX)); + + /* + * first write msg to mailbox registers + */ + for (i = 0; i < len / sizeof(u32); i++) + writel(cpu_to_le32(msgp[i]), + ioc->ioc_regs.hfn_mbox + i * sizeof(u32)); + + for (; i < BFI_IOC_MSGLEN_MAX / sizeof(u32); i++) + writel(0, ioc->ioc_regs.hfn_mbox + i * sizeof(u32)); + + /* + * write 1 to mailbox CMD to trigger LPU event + */ + writel(1, ioc->ioc_regs.hfn_mbox_cmd); + (void) readl(ioc->ioc_regs.hfn_mbox_cmd); +} + +static void +bfa_ioc_send_enable(struct bfa_ioc *ioc) +{ + struct bfi_ioc_ctrl_req enable_req; + struct timeval tv; + + bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ, + bfa_ioc_portid(ioc)); + enable_req.ioc_class = ioc->ioc_mc; + do_gettimeofday(&tv); + enable_req.tv_sec = ntohl(tv.tv_sec); + bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req)); +} + +static void +bfa_ioc_send_disable(struct bfa_ioc *ioc) +{ + struct bfi_ioc_ctrl_req disable_req; + + bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ, + bfa_ioc_portid(ioc)); + bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req)); +} + +static void +bfa_ioc_send_getattr(struct bfa_ioc *ioc) +{ + struct bfi_ioc_getattr_req attr_req; + + bfi_h2i_set(attr_req.mh, BFI_MC_IOC, BFI_IOC_H2I_GETATTR_REQ, + bfa_ioc_portid(ioc)); + bfa_dma_be_addr_set(attr_req.attr_addr, ioc->attr_dma.pa); + bfa_ioc_mbox_send(ioc, &attr_req, sizeof(attr_req)); +} + +void +bfa_ioc_hb_check(void *cbarg) +{ + struct bfa_ioc *ioc = cbarg; + u32 hb_count; + + hb_count = readl(ioc->ioc_regs.heartbeat); + if (ioc->hb_count == hb_count) { + pr_crit("Firmware heartbeat failure at %d", hb_count); + bfa_ioc_recover(ioc); + return; + } else { + ioc->hb_count = hb_count; + } + + bfa_ioc_mbox_poll(ioc); + mod_timer(&ioc->hb_timer, jiffies + + msecs_to_jiffies(BFA_IOC_HB_TOV)); +} + +static void +bfa_ioc_hb_monitor(struct bfa_ioc *ioc) +{ + ioc->hb_count = readl(ioc->ioc_regs.heartbeat); + mod_timer(&ioc->hb_timer, jiffies + + msecs_to_jiffies(BFA_IOC_HB_TOV)); +} + +static void +bfa_ioc_hb_stop(struct bfa_ioc *ioc) +{ + del_timer(&ioc->hb_timer); +} + +/** + * @brief + * Initiate a full firmware download. + */ +static void +bfa_ioc_download_fw(struct bfa_ioc *ioc, u32 boot_type, + u32 boot_param) +{ + u32 *fwimg; + u32 pgnum, pgoff; + u32 loff = 0; + u32 chunkno = 0; + u32 i; + + /** + * Initialize LMEM first before code download + */ + bfa_ioc_lmem_init(ioc); + + /** + * Flash based firmware boot + */ + if (bfa_ioc_is_optrom(ioc)) + boot_type = BFI_BOOT_TYPE_FLASH; + fwimg = bfa_cb_image_get_chunk(BFA_IOC_FWIMG_TYPE(ioc), chunkno); + + pgnum = bfa_ioc_smem_pgnum(ioc, loff); + pgoff = bfa_ioc_smem_pgoff(ioc, loff); + + writel(pgnum, ioc->ioc_regs.host_page_num_fn); + + for (i = 0; i < bfa_cb_image_get_size(BFA_IOC_FWIMG_TYPE(ioc)); i++) { + if (BFA_IOC_FLASH_CHUNK_NO(i) != chunkno) { + chunkno = BFA_IOC_FLASH_CHUNK_NO(i); + fwimg = bfa_cb_image_get_chunk(BFA_IOC_FWIMG_TYPE(ioc), + BFA_IOC_FLASH_CHUNK_ADDR(chunkno)); + } + + /** + * write smem + */ + writel((swab32(fwimg[BFA_IOC_FLASH_OFFSET_IN_CHUNK(i)])), + ((ioc->ioc_regs.smem_page_start) + (loff))); + + loff += sizeof(u32); + + /** + * handle page offset wrap around + */ + loff = PSS_SMEM_PGOFF(loff); + if (loff == 0) { + pgnum++; + writel(pgnum, + ioc->ioc_regs.host_page_num_fn); + } + } + + writel(bfa_ioc_smem_pgnum(ioc, 0), + ioc->ioc_regs.host_page_num_fn); + + /* + * Set boot type and boot param at the end. + */ + writel((swab32(swab32(boot_type))), ((ioc->ioc_regs.smem_page_start) + + (BFI_BOOT_TYPE_OFF))); + writel((swab32(swab32(boot_param))), ((ioc->ioc_regs.smem_page_start) + + (BFI_BOOT_PARAM_OFF))); +} + +static void +bfa_ioc_reset(struct bfa_ioc *ioc, bool force) +{ + bfa_ioc_hwinit(ioc, force); +} + +/** + * @brief + * Update BFA configuration from firmware configuration. + */ +static void +bfa_ioc_getattr_reply(struct bfa_ioc *ioc) +{ + struct bfi_ioc_attr *attr = ioc->attr; + + attr->adapter_prop = ntohl(attr->adapter_prop); + attr->card_type = ntohl(attr->card_type); + attr->maxfrsize = ntohs(attr->maxfrsize); + + bfa_fsm_send_event(ioc, IOC_E_FWRSP_GETATTR); +} + +/** + * Attach time initialization of mbox logic. + */ +static void +bfa_ioc_mbox_attach(struct bfa_ioc *ioc) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + int mc; + + INIT_LIST_HEAD(&mod->cmd_q); + for (mc = 0; mc < BFI_MC_MAX; mc++) { + mod->mbhdlr[mc].cbfn = NULL; + mod->mbhdlr[mc].cbarg = ioc->bfa; + } +} + +/** + * Mbox poll timer -- restarts any pending mailbox requests. + */ +static void +bfa_ioc_mbox_poll(struct bfa_ioc *ioc) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + struct bfa_mbox_cmd *cmd; + u32 stat; + + /** + * If no command pending, do nothing + */ + if (list_empty(&mod->cmd_q)) + return; + + /** + * If previous command is not yet fetched by firmware, do nothing + */ + stat = readl(ioc->ioc_regs.hfn_mbox_cmd); + if (stat) + return; + + /** + * Enqueue command to firmware. + */ + bfa_q_deq(&mod->cmd_q, &cmd); + bfa_ioc_mbox_send(ioc, cmd->msg, sizeof(cmd->msg)); +} + +/** + * Cleanup any pending requests. + */ +static void +bfa_ioc_mbox_hbfail(struct bfa_ioc *ioc) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + struct bfa_mbox_cmd *cmd; + + while (!list_empty(&mod->cmd_q)) + bfa_q_deq(&mod->cmd_q, &cmd); +} + +/** + * IOC public + */ +enum bfa_status +bfa_ioc_pll_init(struct bfa_ioc *ioc) +{ + /* + * Hold semaphore so that nobody can access the chip during init. + */ + bfa_ioc_sem_get(ioc->ioc_regs.ioc_init_sem_reg); + + bfa_ioc_pll_init_asic(ioc); + + ioc->pllinit = true; + /* + * release semaphore. + */ + bfa_ioc_sem_release(ioc->ioc_regs.ioc_init_sem_reg); + + return BFA_STATUS_OK; +} + +/** + * Interface used by diag module to do firmware boot with memory test + * as the entry vector. + */ +void +bfa_ioc_boot(struct bfa_ioc *ioc, u32 boot_type, u32 boot_param) +{ + void __iomem *rb; + + bfa_ioc_stats(ioc, ioc_boots); + + if (bfa_ioc_pll_init(ioc) != BFA_STATUS_OK) + return; + + /** + * Initialize IOC state of all functions on a chip reset. + */ + rb = ioc->pcidev.pci_bar_kva; + if (boot_param == BFI_BOOT_TYPE_MEMTEST) { + writel(BFI_IOC_MEMTEST, (rb + BFA_IOC0_STATE_REG)); + writel(BFI_IOC_MEMTEST, (rb + BFA_IOC1_STATE_REG)); + } else { + writel(BFI_IOC_INITING, (rb + BFA_IOC0_STATE_REG)); + writel(BFI_IOC_INITING, (rb + BFA_IOC1_STATE_REG)); + } + + bfa_ioc_msgflush(ioc); + bfa_ioc_download_fw(ioc, boot_type, boot_param); + + /** + * Enable interrupts just before starting LPU + */ + ioc->cbfn->reset_cbfn(ioc->bfa); + bfa_ioc_lpu_start(ioc); +} + +/** + * Enable/disable IOC failure auto recovery. + */ +void +bfa_ioc_auto_recover(bool auto_recover) +{ + bfa_auto_recover = auto_recover; +} + +bool +bfa_ioc_is_operational(struct bfa_ioc *ioc) +{ + return bfa_fsm_cmp_state(ioc, bfa_ioc_sm_op); +} + +bool +bfa_ioc_is_initialized(struct bfa_ioc *ioc) +{ + u32 r32 = readl(ioc->ioc_regs.ioc_fwstate); + + return ((r32 != BFI_IOC_UNINIT) && + (r32 != BFI_IOC_INITING) && + (r32 != BFI_IOC_MEMTEST)); +} + +void +bfa_ioc_msgget(struct bfa_ioc *ioc, void *mbmsg) +{ + u32 *msgp = mbmsg; + u32 r32; + int i; + + /** + * read the MBOX msg + */ + for (i = 0; i < (sizeof(union bfi_ioc_i2h_msg_u) / sizeof(u32)); + i++) { + r32 = readl(ioc->ioc_regs.lpu_mbox + + i * sizeof(u32)); + msgp[i] = htonl(r32); + } + + /** + * turn off mailbox interrupt by clearing mailbox status + */ + writel(1, ioc->ioc_regs.lpu_mbox_cmd); + readl(ioc->ioc_regs.lpu_mbox_cmd); +} + +void +bfa_ioc_isr(struct bfa_ioc *ioc, struct bfi_mbmsg *m) +{ + union bfi_ioc_i2h_msg_u *msg; + + msg = (union bfi_ioc_i2h_msg_u *) m; + + bfa_ioc_stats(ioc, ioc_isrs); + + switch (msg->mh.msg_id) { + case BFI_IOC_I2H_HBEAT: + break; + + case BFI_IOC_I2H_READY_EVENT: + bfa_fsm_send_event(ioc, IOC_E_FWREADY); + break; + + case BFI_IOC_I2H_ENABLE_REPLY: + bfa_fsm_send_event(ioc, IOC_E_FWRSP_ENABLE); + break; + + case BFI_IOC_I2H_DISABLE_REPLY: + bfa_fsm_send_event(ioc, IOC_E_FWRSP_DISABLE); + break; + + case BFI_IOC_I2H_GETATTR_REPLY: + bfa_ioc_getattr_reply(ioc); + break; + + default: + BUG_ON(1); + } +} + +/** + * IOC attach time initialization and setup. + * + * @param[in] ioc memory for IOC + * @param[in] bfa driver instance structure + */ +void +bfa_ioc_attach(struct bfa_ioc *ioc, void *bfa, struct bfa_ioc_cbfn *cbfn) +{ + ioc->bfa = bfa; + ioc->cbfn = cbfn; + ioc->fcmode = false; + ioc->pllinit = false; + ioc->dbg_fwsave_once = true; + + bfa_ioc_mbox_attach(ioc); + INIT_LIST_HEAD(&ioc->hb_notify_q); + + bfa_fsm_set_state(ioc, bfa_ioc_sm_reset); +} + +/** + * Driver detach time IOC cleanup. + */ +void +bfa_ioc_detach(struct bfa_ioc *ioc) +{ + bfa_fsm_send_event(ioc, IOC_E_DETACH); +} + +/** + * Setup IOC PCI properties. + * + * @param[in] pcidev PCI device information for this IOC + */ +void +bfa_ioc_pci_init(struct bfa_ioc *ioc, struct bfa_pcidev *pcidev, + enum bfi_mclass mc) +{ + ioc->ioc_mc = mc; + ioc->pcidev = *pcidev; + ioc->ctdev = bfa_asic_id_ct(ioc->pcidev.device_id); + ioc->cna = ioc->ctdev && !ioc->fcmode; + + bfa_ioc_set_ct_hwif(ioc); + + bfa_ioc_map_port(ioc); + bfa_ioc_reg_init(ioc); +} + +/** + * Initialize IOC dma memory + * + * @param[in] dm_kva kernel virtual address of IOC dma memory + * @param[in] dm_pa physical address of IOC dma memory + */ +void +bfa_ioc_mem_claim(struct bfa_ioc *ioc, u8 *dm_kva, u64 dm_pa) +{ + /** + * dma memory for firmware attribute + */ + ioc->attr_dma.kva = dm_kva; + ioc->attr_dma.pa = dm_pa; + ioc->attr = (struct bfi_ioc_attr *) dm_kva; +} + +/** + * Return size of dma memory required. + */ +u32 +bfa_ioc_meminfo(void) +{ + return roundup(sizeof(struct bfi_ioc_attr), BFA_DMA_ALIGN_SZ); +} + +void +bfa_ioc_enable(struct bfa_ioc *ioc) +{ + bfa_ioc_stats(ioc, ioc_enables); + ioc->dbg_fwsave_once = true; + + bfa_fsm_send_event(ioc, IOC_E_ENABLE); +} + +void +bfa_ioc_disable(struct bfa_ioc *ioc) +{ + bfa_ioc_stats(ioc, ioc_disables); + bfa_fsm_send_event(ioc, IOC_E_DISABLE); +} + +u32 +bfa_ioc_smem_pgnum(struct bfa_ioc *ioc, u32 fmaddr) +{ + return PSS_SMEM_PGNUM(ioc->ioc_regs.smem_pg0, fmaddr); +} + +u32 +bfa_ioc_smem_pgoff(struct bfa_ioc *ioc, u32 fmaddr) +{ + return PSS_SMEM_PGOFF(fmaddr); +} + +/** + * Register mailbox message handler functions + * + * @param[in] ioc IOC instance + * @param[in] mcfuncs message class handler functions + */ +void +bfa_ioc_mbox_register(struct bfa_ioc *ioc, bfa_ioc_mbox_mcfunc_t *mcfuncs) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + int mc; + + for (mc = 0; mc < BFI_MC_MAX; mc++) + mod->mbhdlr[mc].cbfn = mcfuncs[mc]; +} + +/** + * Register mailbox message handler function, to be called by common modules + */ +void +bfa_ioc_mbox_regisr(struct bfa_ioc *ioc, enum bfi_mclass mc, + bfa_ioc_mbox_mcfunc_t cbfn, void *cbarg) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + + mod->mbhdlr[mc].cbfn = cbfn; + mod->mbhdlr[mc].cbarg = cbarg; +} + +/** + * Queue a mailbox command request to firmware. Waits if mailbox is busy. + * Responsibility of caller to serialize + * + * @param[in] ioc IOC instance + * @param[i] cmd Mailbox command + */ +void +bfa_ioc_mbox_queue(struct bfa_ioc *ioc, struct bfa_mbox_cmd *cmd) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + u32 stat; + + /** + * If a previous command is pending, queue new command + */ + if (!list_empty(&mod->cmd_q)) { + list_add_tail(&cmd->qe, &mod->cmd_q); + return; + } + + /** + * If mailbox is busy, queue command for poll timer + */ + stat = readl(ioc->ioc_regs.hfn_mbox_cmd); + if (stat) { + list_add_tail(&cmd->qe, &mod->cmd_q); + return; + } + + /** + * mailbox is free -- queue command to firmware + */ + bfa_ioc_mbox_send(ioc, cmd->msg, sizeof(cmd->msg)); +} + +/** + * Handle mailbox interrupts + */ +void +bfa_ioc_mbox_isr(struct bfa_ioc *ioc) +{ + struct bfa_ioc_mbox_mod *mod = &ioc->mbox_mod; + struct bfi_mbmsg m; + int mc; + + bfa_ioc_msgget(ioc, &m); + + /** + * Treat IOC message class as special. + */ + mc = m.mh.msg_class; + if (mc == BFI_MC_IOC) { + bfa_ioc_isr(ioc, &m); + return; + } + + if ((mc > BFI_MC_MAX) || (mod->mbhdlr[mc].cbfn == NULL)) + return; + + mod->mbhdlr[mc].cbfn(mod->mbhdlr[mc].cbarg, &m); +} + +void +bfa_ioc_error_isr(struct bfa_ioc *ioc) +{ + bfa_fsm_send_event(ioc, IOC_E_HWERROR); +} + +void +bfa_ioc_set_fcmode(struct bfa_ioc *ioc) +{ + ioc->fcmode = true; + ioc->port_id = bfa_ioc_pcifn(ioc); +} + +/** + * return true if IOC is disabled + */ +bool +bfa_ioc_is_disabled(struct bfa_ioc *ioc) +{ + return bfa_fsm_cmp_state(ioc, bfa_ioc_sm_disabling) || + bfa_fsm_cmp_state(ioc, bfa_ioc_sm_disabled); +} + +/** + * return true if IOC firmware is different. + */ +bool +bfa_ioc_fw_mismatch(struct bfa_ioc *ioc) +{ + return bfa_fsm_cmp_state(ioc, bfa_ioc_sm_reset) || + bfa_fsm_cmp_state(ioc, bfa_ioc_sm_fwcheck) || + bfa_fsm_cmp_state(ioc, bfa_ioc_sm_mismatch); +} + +#define bfa_ioc_state_disabled(__sm) \ + (((__sm) == BFI_IOC_UNINIT) || \ + ((__sm) == BFI_IOC_INITING) || \ + ((__sm) == BFI_IOC_HWINIT) || \ + ((__sm) == BFI_IOC_DISABLED) || \ + ((__sm) == BFI_IOC_FAIL) || \ + ((__sm) == BFI_IOC_CFG_DISABLED)) + +/** + * Check if adapter is disabled -- both IOCs should be in a disabled + * state. + */ +bool +bfa_ioc_adapter_is_disabled(struct bfa_ioc *ioc) +{ + u32 ioc_state; + void __iomem *rb = ioc->pcidev.pci_bar_kva; + + if (!bfa_fsm_cmp_state(ioc, bfa_ioc_sm_disabled)) + return false; + + ioc_state = readl(rb + BFA_IOC0_STATE_REG); + if (!bfa_ioc_state_disabled(ioc_state)) + return false; + + if (ioc->pcidev.device_id != PCI_DEVICE_ID_BROCADE_FC_8G1P) { + ioc_state = readl(rb + BFA_IOC1_STATE_REG); + if (!bfa_ioc_state_disabled(ioc_state)) + return false; + } + + return true; +} + +/** + * Add to IOC heartbeat failure notification queue. To be used by common + * modules such as cee, port, diag. + */ +void +bfa_ioc_hbfail_register(struct bfa_ioc *ioc, + struct bfa_ioc_hbfail_notify *notify) +{ + list_add_tail(¬ify->qe, &ioc->hb_notify_q); +} + +#define BFA_MFG_NAME "Brocade" +void +bfa_ioc_get_adapter_attr(struct bfa_ioc *ioc, + struct bfa_adapter_attr *ad_attr) +{ + struct bfi_ioc_attr *ioc_attr; + + ioc_attr = ioc->attr; + + bfa_ioc_get_adapter_serial_num(ioc, ad_attr->serial_num); + bfa_ioc_get_adapter_fw_ver(ioc, ad_attr->fw_ver); + bfa_ioc_get_adapter_optrom_ver(ioc, ad_attr->optrom_ver); + bfa_ioc_get_adapter_manufacturer(ioc, ad_attr->manufacturer); + memcpy(&ad_attr->vpd, &ioc_attr->vpd, + sizeof(struct bfa_mfg_vpd)); + + ad_attr->nports = bfa_ioc_get_nports(ioc); + ad_attr->max_speed = bfa_ioc_speed_sup(ioc); + + bfa_ioc_get_adapter_model(ioc, ad_attr->model); + /* For now, model descr uses same model string */ + bfa_ioc_get_adapter_model(ioc, ad_attr->model_descr); + + ad_attr->card_type = ioc_attr->card_type; + ad_attr->is_mezz = bfa_mfg_is_mezz(ioc_attr->card_type); + + if (BFI_ADAPTER_IS_SPECIAL(ioc_attr->adapter_prop)) + ad_attr->prototype = 1; + else + ad_attr->prototype = 0; + + ad_attr->pwwn = bfa_ioc_get_pwwn(ioc); + ad_attr->mac = bfa_ioc_get_mac(ioc); + + ad_attr->pcie_gen = ioc_attr->pcie_gen; + ad_attr->pcie_lanes = ioc_attr->pcie_lanes; + ad_attr->pcie_lanes_orig = ioc_attr->pcie_lanes_orig; + ad_attr->asic_rev = ioc_attr->asic_rev; + + bfa_ioc_get_pci_chip_rev(ioc, ad_attr->hw_ver); + + ad_attr->cna_capable = ioc->cna; + ad_attr->trunk_capable = (ad_attr->nports > 1) && !ioc->cna; +} + +enum bfa_ioc_type +bfa_ioc_get_type(struct bfa_ioc *ioc) +{ + if (!ioc->ctdev || ioc->fcmode) + return BFA_IOC_TYPE_FC; + else if (ioc->ioc_mc == BFI_MC_IOCFC) + return BFA_IOC_TYPE_FCoE; + else if (ioc->ioc_mc == BFI_MC_LL) + return BFA_IOC_TYPE_LL; + else { + BUG_ON(!(ioc->ioc_mc == BFI_MC_LL)); + return BFA_IOC_TYPE_LL; + } +} + +void +bfa_ioc_get_adapter_serial_num(struct bfa_ioc *ioc, char *serial_num) +{ + memset(serial_num, 0, BFA_ADAPTER_SERIAL_NUM_LEN); + memcpy(serial_num, + (void *)ioc->attr->brcd_serialnum, + BFA_ADAPTER_SERIAL_NUM_LEN); +} + +void +bfa_ioc_get_adapter_fw_ver(struct bfa_ioc *ioc, char *fw_ver) +{ + memset(fw_ver, 0, BFA_VERSION_LEN); + memcpy(fw_ver, ioc->attr->fw_version, BFA_VERSION_LEN); +} + +void +bfa_ioc_get_pci_chip_rev(struct bfa_ioc *ioc, char *chip_rev) +{ + BUG_ON(!(chip_rev)); + + memset(chip_rev, 0, BFA_IOC_CHIP_REV_LEN); + + chip_rev[0] = 'R'; + chip_rev[1] = 'e'; + chip_rev[2] = 'v'; + chip_rev[3] = '-'; + chip_rev[4] = ioc->attr->asic_rev; + chip_rev[5] = '\0'; +} + +void +bfa_ioc_get_adapter_optrom_ver(struct bfa_ioc *ioc, char *optrom_ver) +{ + memset(optrom_ver, 0, BFA_VERSION_LEN); + memcpy(optrom_ver, ioc->attr->optrom_version, + BFA_VERSION_LEN); +} + +void +bfa_ioc_get_adapter_manufacturer(struct bfa_ioc *ioc, char *manufacturer) +{ + memset(manufacturer, 0, BFA_ADAPTER_MFG_NAME_LEN); + memcpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN); +} + +void +bfa_ioc_get_adapter_model(struct bfa_ioc *ioc, char *model) +{ + struct bfi_ioc_attr *ioc_attr; + + BUG_ON(!(model)); + memset(model, 0, BFA_ADAPTER_MODEL_NAME_LEN); + + ioc_attr = ioc->attr; + + /** + * model name + */ + snprintf(model, BFA_ADAPTER_MODEL_NAME_LEN, "%s-%u", + BFA_MFG_NAME, ioc_attr->card_type); +} + +enum bfa_ioc_state +bfa_ioc_get_state(struct bfa_ioc *ioc) +{ + return bfa_sm_to_state(ioc_sm_table, ioc->fsm); +} + +void +bfa_ioc_get_attr(struct bfa_ioc *ioc, struct bfa_ioc_attr *ioc_attr) +{ + memset((void *)ioc_attr, 0, sizeof(struct bfa_ioc_attr)); + + ioc_attr->state = bfa_ioc_get_state(ioc); + ioc_attr->port_id = ioc->port_id; + + ioc_attr->ioc_type = bfa_ioc_get_type(ioc); + + bfa_ioc_get_adapter_attr(ioc, &ioc_attr->adapter_attr); + + ioc_attr->pci_attr.device_id = ioc->pcidev.device_id; + ioc_attr->pci_attr.pcifn = ioc->pcidev.pci_func; + bfa_ioc_get_pci_chip_rev(ioc, ioc_attr->pci_attr.chip_rev); +} + +/** + * WWN public + */ +u64 +bfa_ioc_get_pwwn(struct bfa_ioc *ioc) +{ + return ioc->attr->pwwn; +} + +u64 +bfa_ioc_get_nwwn(struct bfa_ioc *ioc) +{ + return ioc->attr->nwwn; +} + +u64 +bfa_ioc_get_adid(struct bfa_ioc *ioc) +{ + return ioc->attr->mfg_pwwn; +} + +mac_t +bfa_ioc_get_mac(struct bfa_ioc *ioc) +{ + /* + * Currently mfg mac is used as FCoE enode mac (not configured by PBC) + */ + if (bfa_ioc_get_type(ioc) == BFA_IOC_TYPE_FCoE) + return bfa_ioc_get_mfg_mac(ioc); + else + return ioc->attr->mac; +} + +u64 +bfa_ioc_get_mfg_pwwn(struct bfa_ioc *ioc) +{ + return ioc->attr->mfg_pwwn; +} + +u64 +bfa_ioc_get_mfg_nwwn(struct bfa_ioc *ioc) +{ + return ioc->attr->mfg_nwwn; +} + +mac_t +bfa_ioc_get_mfg_mac(struct bfa_ioc *ioc) +{ + mac_t m; + + m = ioc->attr->mfg_mac; + if (bfa_mfg_is_old_wwn_mac_model(ioc->attr->card_type)) + m.mac[MAC_ADDRLEN - 1] += bfa_ioc_pcifn(ioc); + else + bfa_mfg_increment_wwn_mac(&(m.mac[MAC_ADDRLEN-3]), + bfa_ioc_pcifn(ioc)); + + return m; +} + +bool +bfa_ioc_get_fcmode(struct bfa_ioc *ioc) +{ + return ioc->fcmode || !bfa_asic_id_ct(ioc->pcidev.device_id); +} + +/** + * Firmware failure detected. Start recovery actions. + */ +static void +bfa_ioc_recover(struct bfa_ioc *ioc) +{ + bfa_ioc_stats(ioc, ioc_hbfails); + bfa_fsm_send_event(ioc, IOC_E_HBFAIL); +} + +static void +bfa_ioc_check_attr_wwns(struct bfa_ioc *ioc) +{ + if (bfa_ioc_get_type(ioc) == BFA_IOC_TYPE_LL) + return; + +} diff --git a/drivers/net/bna/bfa_ioc.h b/drivers/net/bna/bfa_ioc.h new file mode 100644 index 000000000000..2e5c0adef899 --- /dev/null +++ b/drivers/net/bna/bfa_ioc.h @@ -0,0 +1,343 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#ifndef __BFA_IOC_H__ +#define __BFA_IOC_H__ + +#include "bfa_sm.h" +#include "bfi.h" +#include "cna.h" + +#define BFA_IOC_TOV 3000 /* msecs */ +#define BFA_IOC_HWSEM_TOV 500 /* msecs */ +#define BFA_IOC_HB_TOV 500 /* msecs */ +#define BFA_IOC_HWINIT_MAX 2 +#define BFA_IOC_TOV_RECOVER BFA_IOC_HB_TOV + +/** + * Generic Scatter Gather Element used by driver + */ +struct bfa_sge { + u32 sg_len; + void *sg_addr; +}; + +/** + * PCI device information required by IOC + */ +struct bfa_pcidev { + int pci_slot; + u8 pci_func; + u16 device_id; + void __iomem *pci_bar_kva; +}; + +/** + * Structure used to remember the DMA-able memory block's KVA and Physical + * Address + */ +struct bfa_dma { + void *kva; /* ! Kernel virtual address */ + u64 pa; /* ! Physical address */ +}; + +#define BFA_DMA_ALIGN_SZ 256 + +/** + * smem size for Crossbow and Catapult + */ +#define BFI_SMEM_CB_SIZE 0x200000U /* ! 2MB for crossbow */ +#define BFI_SMEM_CT_SIZE 0x280000U /* ! 2.5MB for catapult */ + +/** + * @brief BFA dma address assignment macro + */ +#define bfa_dma_addr_set(dma_addr, pa) \ + __bfa_dma_addr_set(&dma_addr, (u64)pa) + +static inline void +__bfa_dma_addr_set(union bfi_addr_u *dma_addr, u64 pa) +{ + dma_addr->a32.addr_lo = (u32) pa; + dma_addr->a32.addr_hi = (u32) (upper_32_bits(pa)); +} + +/** + * @brief BFA dma address assignment macro. (big endian format) + */ +#define bfa_dma_be_addr_set(dma_addr, pa) \ + __bfa_dma_be_addr_set(&dma_addr, (u64)pa) +static inline void +__bfa_dma_be_addr_set(union bfi_addr_u *dma_addr, u64 pa) +{ + dma_addr->a32.addr_lo = (u32) htonl(pa); + dma_addr->a32.addr_hi = (u32) htonl(upper_32_bits(pa)); +} + +struct bfa_ioc_regs { + void __iomem *hfn_mbox_cmd; + void __iomem *hfn_mbox; + void __iomem *lpu_mbox_cmd; + void __iomem *lpu_mbox; + void __iomem *pss_ctl_reg; + void __iomem *pss_err_status_reg; + void __iomem *app_pll_fast_ctl_reg; + void __iomem *app_pll_slow_ctl_reg; + void __iomem *ioc_sem_reg; + void __iomem *ioc_usage_sem_reg; + void __iomem *ioc_init_sem_reg; + void __iomem *ioc_usage_reg; + void __iomem *host_page_num_fn; + void __iomem *heartbeat; + void __iomem *ioc_fwstate; + void __iomem *ll_halt; + void __iomem *err_set; + void __iomem *shirq_isr_next; + void __iomem *shirq_msk_next; + void __iomem *smem_page_start; + u32 smem_pg0; +}; + +/** + * IOC Mailbox structures + */ +struct bfa_mbox_cmd { + struct list_head qe; + u32 msg[BFI_IOC_MSGSZ]; +}; + +/** + * IOC mailbox module + */ +typedef void (*bfa_ioc_mbox_mcfunc_t)(void *cbarg, struct bfi_mbmsg *m); +struct bfa_ioc_mbox_mod { + struct list_head cmd_q; /*!< pending mbox queue */ + int nmclass; /*!< number of handlers */ + struct { + bfa_ioc_mbox_mcfunc_t cbfn; /*!< message handlers */ + void *cbarg; + } mbhdlr[BFI_MC_MAX]; +}; + +/** + * IOC callback function interfaces + */ +typedef void (*bfa_ioc_enable_cbfn_t)(void *bfa, enum bfa_status status); +typedef void (*bfa_ioc_disable_cbfn_t)(void *bfa); +typedef void (*bfa_ioc_hbfail_cbfn_t)(void *bfa); +typedef void (*bfa_ioc_reset_cbfn_t)(void *bfa); +struct bfa_ioc_cbfn { + bfa_ioc_enable_cbfn_t enable_cbfn; + bfa_ioc_disable_cbfn_t disable_cbfn; + bfa_ioc_hbfail_cbfn_t hbfail_cbfn; + bfa_ioc_reset_cbfn_t reset_cbfn; +}; + +/** + * Heartbeat failure notification queue element. + */ +struct bfa_ioc_hbfail_notify { + struct list_head qe; + bfa_ioc_hbfail_cbfn_t cbfn; + void *cbarg; +}; + +/** + * Initialize a heartbeat failure notification structure + */ +#define bfa_ioc_hbfail_init(__notify, __cbfn, __cbarg) do { \ + (__notify)->cbfn = (__cbfn); \ + (__notify)->cbarg = (__cbarg); \ +} while (0) + +struct bfa_ioc { + bfa_fsm_t fsm; + struct bfa *bfa; + struct bfa_pcidev pcidev; + struct bfa_timer_mod *timer_mod; + struct timer_list ioc_timer; + struct timer_list sem_timer; + struct timer_list hb_timer; + u32 hb_count; + u32 retry_count; + struct list_head hb_notify_q; + void *dbg_fwsave; + int dbg_fwsave_len; + bool dbg_fwsave_once; + enum bfi_mclass ioc_mc; + struct bfa_ioc_regs ioc_regs; + struct bfa_ioc_drv_stats stats; + bool auto_recover; + bool fcmode; + bool ctdev; + bool cna; + bool pllinit; + bool stats_busy; /*!< outstanding stats */ + u8 port_id; + + struct bfa_dma attr_dma; + struct bfi_ioc_attr *attr; + struct bfa_ioc_cbfn *cbfn; + struct bfa_ioc_mbox_mod mbox_mod; + struct bfa_ioc_hwif *ioc_hwif; +}; + +struct bfa_ioc_hwif { + enum bfa_status (*ioc_pll_init) (void __iomem *rb, bool fcmode); + bool (*ioc_firmware_lock) (struct bfa_ioc *ioc); + void (*ioc_firmware_unlock) (struct bfa_ioc *ioc); + void (*ioc_reg_init) (struct bfa_ioc *ioc); + void (*ioc_map_port) (struct bfa_ioc *ioc); + void (*ioc_isr_mode_set) (struct bfa_ioc *ioc, + bool msix); + void (*ioc_notify_hbfail) (struct bfa_ioc *ioc); + void (*ioc_ownership_reset) (struct bfa_ioc *ioc); +}; + +#define bfa_ioc_pcifn(__ioc) ((__ioc)->pcidev.pci_func) +#define bfa_ioc_devid(__ioc) ((__ioc)->pcidev.device_id) +#define bfa_ioc_bar0(__ioc) ((__ioc)->pcidev.pci_bar_kva) +#define bfa_ioc_portid(__ioc) ((__ioc)->port_id) +#define bfa_ioc_fetch_stats(__ioc, __stats) \ + (((__stats)->drv_stats) = (__ioc)->stats) +#define bfa_ioc_clr_stats(__ioc) \ + memset(&(__ioc)->stats, 0, sizeof((__ioc)->stats)) +#define bfa_ioc_maxfrsize(__ioc) ((__ioc)->attr->maxfrsize) +#define bfa_ioc_rx_bbcredit(__ioc) ((__ioc)->attr->rx_bbcredit) +#define bfa_ioc_speed_sup(__ioc) \ + BFI_ADAPTER_GETP(SPEED, (__ioc)->attr->adapter_prop) +#define bfa_ioc_get_nports(__ioc) \ + BFI_ADAPTER_GETP(NPORTS, (__ioc)->attr->adapter_prop) + +#define bfa_ioc_stats(_ioc, _stats) ((_ioc)->stats._stats++) +#define BFA_IOC_FWIMG_MINSZ (16 * 1024) +#define BFA_IOC_FWIMG_TYPE(__ioc) \ + (((__ioc)->ctdev) ? \ + (((__ioc)->fcmode) ? BFI_IMAGE_CT_FC : BFI_IMAGE_CT_CNA) : \ + BFI_IMAGE_CB_FC) +#define BFA_IOC_FW_SMEM_SIZE(__ioc) \ + (((__ioc)->ctdev) ? BFI_SMEM_CT_SIZE : BFI_SMEM_CB_SIZE) +#define BFA_IOC_FLASH_CHUNK_NO(off) (off / BFI_FLASH_CHUNK_SZ_WORDS) +#define BFA_IOC_FLASH_OFFSET_IN_CHUNK(off) (off % BFI_FLASH_CHUNK_SZ_WORDS) +#define BFA_IOC_FLASH_CHUNK_ADDR(chunkno) (chunkno * BFI_FLASH_CHUNK_SZ_WORDS) + +/** + * IOC mailbox interface + */ +void bfa_ioc_mbox_queue(struct bfa_ioc *ioc, struct bfa_mbox_cmd *cmd); +void bfa_ioc_mbox_register(struct bfa_ioc *ioc, + bfa_ioc_mbox_mcfunc_t *mcfuncs); +void bfa_ioc_mbox_isr(struct bfa_ioc *ioc); +void bfa_ioc_mbox_send(struct bfa_ioc *ioc, void *ioc_msg, int len); +void bfa_ioc_msgget(struct bfa_ioc *ioc, void *mbmsg); +void bfa_ioc_mbox_regisr(struct bfa_ioc *ioc, enum bfi_mclass mc, + bfa_ioc_mbox_mcfunc_t cbfn, void *cbarg); + +/** + * IOC interfaces + */ + +#define bfa_ioc_pll_init_asic(__ioc) \ + ((__ioc)->ioc_hwif->ioc_pll_init((__ioc)->pcidev.pci_bar_kva, \ + (__ioc)->fcmode)) + +enum bfa_status bfa_ioc_pll_init(struct bfa_ioc *ioc); +enum bfa_status bfa_ioc_cb_pll_init(void __iomem *rb, bool fcmode); +enum bfa_status bfa_ioc_ct_pll_init(void __iomem *rb, bool fcmode); + +#define bfa_ioc_isr_mode_set(__ioc, __msix) \ + ((__ioc)->ioc_hwif->ioc_isr_mode_set(__ioc, __msix)) +#define bfa_ioc_ownership_reset(__ioc) \ + ((__ioc)->ioc_hwif->ioc_ownership_reset(__ioc)) + +void bfa_ioc_set_ct_hwif(struct bfa_ioc *ioc); + +void bfa_ioc_attach(struct bfa_ioc *ioc, void *bfa, + struct bfa_ioc_cbfn *cbfn); +void bfa_ioc_auto_recover(bool auto_recover); +void bfa_ioc_detach(struct bfa_ioc *ioc); +void bfa_ioc_pci_init(struct bfa_ioc *ioc, struct bfa_pcidev *pcidev, + enum bfi_mclass mc); +u32 bfa_ioc_meminfo(void); +void bfa_ioc_mem_claim(struct bfa_ioc *ioc, u8 *dm_kva, u64 dm_pa); +void bfa_ioc_enable(struct bfa_ioc *ioc); +void bfa_ioc_disable(struct bfa_ioc *ioc); +bool bfa_ioc_intx_claim(struct bfa_ioc *ioc); + +void bfa_ioc_boot(struct bfa_ioc *ioc, u32 boot_type, + u32 boot_param); +void bfa_ioc_isr(struct bfa_ioc *ioc, struct bfi_mbmsg *msg); +void bfa_ioc_error_isr(struct bfa_ioc *ioc); +bool bfa_ioc_is_operational(struct bfa_ioc *ioc); +bool bfa_ioc_is_initialized(struct bfa_ioc *ioc); +bool bfa_ioc_is_disabled(struct bfa_ioc *ioc); +bool bfa_ioc_fw_mismatch(struct bfa_ioc *ioc); +bool bfa_ioc_adapter_is_disabled(struct bfa_ioc *ioc); +void bfa_ioc_cfg_complete(struct bfa_ioc *ioc); +enum bfa_ioc_type bfa_ioc_get_type(struct bfa_ioc *ioc); +void bfa_ioc_get_adapter_serial_num(struct bfa_ioc *ioc, char *serial_num); +void bfa_ioc_get_adapter_fw_ver(struct bfa_ioc *ioc, char *fw_ver); +void bfa_ioc_get_adapter_optrom_ver(struct bfa_ioc *ioc, char *optrom_ver); +void bfa_ioc_get_adapter_model(struct bfa_ioc *ioc, char *model); +void bfa_ioc_get_adapter_manufacturer(struct bfa_ioc *ioc, + char *manufacturer); +void bfa_ioc_get_pci_chip_rev(struct bfa_ioc *ioc, char *chip_rev); +enum bfa_ioc_state bfa_ioc_get_state(struct bfa_ioc *ioc); + +void bfa_ioc_get_attr(struct bfa_ioc *ioc, struct bfa_ioc_attr *ioc_attr); +void bfa_ioc_get_adapter_attr(struct bfa_ioc *ioc, + struct bfa_adapter_attr *ad_attr); +u32 bfa_ioc_smem_pgnum(struct bfa_ioc *ioc, u32 fmaddr); +u32 bfa_ioc_smem_pgoff(struct bfa_ioc *ioc, u32 fmaddr); +void bfa_ioc_set_fcmode(struct bfa_ioc *ioc); +bool bfa_ioc_get_fcmode(struct bfa_ioc *ioc); +void bfa_ioc_hbfail_register(struct bfa_ioc *ioc, + struct bfa_ioc_hbfail_notify *notify); +bool bfa_ioc_sem_get(void __iomem *sem_reg); +void bfa_ioc_sem_release(void __iomem *sem_reg); +void bfa_ioc_hw_sem_release(struct bfa_ioc *ioc); +void bfa_ioc_fwver_get(struct bfa_ioc *ioc, + struct bfi_ioc_image_hdr *fwhdr); +bool bfa_ioc_fwver_cmp(struct bfa_ioc *ioc, + struct bfi_ioc_image_hdr *fwhdr); + +/* + * Timeout APIs + */ +void bfa_ioc_timeout(void *ioc); +void bfa_ioc_hb_check(void *ioc); +void bfa_ioc_sem_timeout(void *ioc); + +/* + * bfa mfg wwn API functions + */ +u64 bfa_ioc_get_pwwn(struct bfa_ioc *ioc); +u64 bfa_ioc_get_nwwn(struct bfa_ioc *ioc); +mac_t bfa_ioc_get_mac(struct bfa_ioc *ioc); +u64 bfa_ioc_get_mfg_pwwn(struct bfa_ioc *ioc); +u64 bfa_ioc_get_mfg_nwwn(struct bfa_ioc *ioc); +mac_t bfa_ioc_get_mfg_mac(struct bfa_ioc *ioc); +u64 bfa_ioc_get_adid(struct bfa_ioc *ioc); + +/* + * F/W Image Size & Chunk + */ +u32 *bfa_cb_image_get_chunk(int type, u32 off); +u32 bfa_cb_image_get_size(int type); + +#endif /* __BFA_IOC_H__ */ diff --git a/drivers/net/bna/bfa_ioc_ct.c b/drivers/net/bna/bfa_ioc_ct.c new file mode 100644 index 000000000000..870046e32c8d --- /dev/null +++ b/drivers/net/bna/bfa_ioc_ct.c @@ -0,0 +1,391 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#include "bfa_ioc.h" +#include "cna.h" +#include "bfi.h" +#include "bfi_ctreg.h" +#include "bfa_defs.h" + +/* + * forward declarations + */ +static bool bfa_ioc_ct_firmware_lock(struct bfa_ioc *ioc); +static void bfa_ioc_ct_firmware_unlock(struct bfa_ioc *ioc); +static void bfa_ioc_ct_reg_init(struct bfa_ioc *ioc); +static void bfa_ioc_ct_map_port(struct bfa_ioc *ioc); +static void bfa_ioc_ct_isr_mode_set(struct bfa_ioc *ioc, bool msix); +static void bfa_ioc_ct_notify_hbfail(struct bfa_ioc *ioc); +static void bfa_ioc_ct_ownership_reset(struct bfa_ioc *ioc); + +struct bfa_ioc_hwif hwif_ct; + +/** + * Called from bfa_ioc_attach() to map asic specific calls. + */ +void +bfa_ioc_set_ct_hwif(struct bfa_ioc *ioc) +{ + hwif_ct.ioc_pll_init = bfa_ioc_ct_pll_init; + hwif_ct.ioc_firmware_lock = bfa_ioc_ct_firmware_lock; + hwif_ct.ioc_firmware_unlock = bfa_ioc_ct_firmware_unlock; + hwif_ct.ioc_reg_init = bfa_ioc_ct_reg_init; + hwif_ct.ioc_map_port = bfa_ioc_ct_map_port; + hwif_ct.ioc_isr_mode_set = bfa_ioc_ct_isr_mode_set; + hwif_ct.ioc_notify_hbfail = bfa_ioc_ct_notify_hbfail; + hwif_ct.ioc_ownership_reset = bfa_ioc_ct_ownership_reset; + + ioc->ioc_hwif = &hwif_ct; +} + +/** + * Return true if firmware of current driver matches the running firmware. + */ +static bool +bfa_ioc_ct_firmware_lock(struct bfa_ioc *ioc) +{ + enum bfi_ioc_state ioc_fwstate; + u32 usecnt; + struct bfi_ioc_image_hdr fwhdr; + + /** + * Firmware match check is relevant only for CNA. + */ + if (!ioc->cna) + return true; + + /** + * If bios boot (flash based) -- do not increment usage count + */ + if (bfa_cb_image_get_size(BFA_IOC_FWIMG_TYPE(ioc)) < + BFA_IOC_FWIMG_MINSZ) + return true; + + bfa_ioc_sem_get(ioc->ioc_regs.ioc_usage_sem_reg); + usecnt = readl(ioc->ioc_regs.ioc_usage_reg); + + /** + * If usage count is 0, always return TRUE. + */ + if (usecnt == 0) { + writel(1, ioc->ioc_regs.ioc_usage_reg); + bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg); + return true; + } + + ioc_fwstate = readl(ioc->ioc_regs.ioc_fwstate); + + /** + * Use count cannot be non-zero and chip in uninitialized state. + */ + BUG_ON(!(ioc_fwstate != BFI_IOC_UNINIT)); + + /** + * Check if another driver with a different firmware is active + */ + bfa_ioc_fwver_get(ioc, &fwhdr); + if (!bfa_ioc_fwver_cmp(ioc, &fwhdr)) { + bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg); + return false; + } + + /** + * Same firmware version. Increment the reference count. + */ + usecnt++; + writel(usecnt, ioc->ioc_regs.ioc_usage_reg); + bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg); + return true; +} + +static void +bfa_ioc_ct_firmware_unlock(struct bfa_ioc *ioc) +{ + u32 usecnt; + + /** + * Firmware lock is relevant only for CNA. + */ + if (!ioc->cna) + return; + + /** + * If bios boot (flash based) -- do not decrement usage count + */ + if (bfa_cb_image_get_size(BFA_IOC_FWIMG_TYPE(ioc)) < + BFA_IOC_FWIMG_MINSZ) + return; + + /** + * decrement usage count + */ + bfa_ioc_sem_get(ioc->ioc_regs.ioc_usage_sem_reg); + usecnt = readl(ioc->ioc_regs.ioc_usage_reg); + BUG_ON(!(usecnt > 0)); + + usecnt--; + writel(usecnt, ioc->ioc_regs.ioc_usage_reg); + + bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg); +} + +/** + * Notify other functions on HB failure. + */ +static void +bfa_ioc_ct_notify_hbfail(struct bfa_ioc *ioc) +{ + if (ioc->cna) { + writel(__FW_INIT_HALT_P, ioc->ioc_regs.ll_halt); + /* Wait for halt to take effect */ + readl(ioc->ioc_regs.ll_halt); + } else { + writel(__PSS_ERR_STATUS_SET, ioc->ioc_regs.err_set); + readl(ioc->ioc_regs.err_set); + } +} + +/** + * Host to LPU mailbox message addresses + */ +static struct { u32 hfn_mbox, lpu_mbox, hfn_pgn; } iocreg_fnreg[] = { + { HOSTFN0_LPU_MBOX0_0, LPU_HOSTFN0_MBOX0_0, HOST_PAGE_NUM_FN0 }, + { HOSTFN1_LPU_MBOX0_8, LPU_HOSTFN1_MBOX0_8, HOST_PAGE_NUM_FN1 }, + { HOSTFN2_LPU_MBOX0_0, LPU_HOSTFN2_MBOX0_0, HOST_PAGE_NUM_FN2 }, + { HOSTFN3_LPU_MBOX0_8, LPU_HOSTFN3_MBOX0_8, HOST_PAGE_NUM_FN3 } +}; + +/** + * Host <-> LPU mailbox command/status registers - port 0 + */ +static struct { u32 hfn, lpu; } iocreg_mbcmd_p0[] = { + { HOSTFN0_LPU0_MBOX0_CMD_STAT, LPU0_HOSTFN0_MBOX0_CMD_STAT }, + { HOSTFN1_LPU0_MBOX0_CMD_STAT, LPU0_HOSTFN1_MBOX0_CMD_STAT }, + { HOSTFN2_LPU0_MBOX0_CMD_STAT, LPU0_HOSTFN2_MBOX0_CMD_STAT }, + { HOSTFN3_LPU0_MBOX0_CMD_STAT, LPU0_HOSTFN3_MBOX0_CMD_STAT } +}; + +/** + * Host <-> LPU mailbox command/status registers - port 1 + */ +static struct { u32 hfn, lpu; } iocreg_mbcmd_p1[] = { + { HOSTFN0_LPU1_MBOX0_CMD_STAT, LPU1_HOSTFN0_MBOX0_CMD_STAT }, + { HOSTFN1_LPU1_MBOX0_CMD_STAT, LPU1_HOSTFN1_MBOX0_CMD_STAT }, + { HOSTFN2_LPU1_MBOX0_CMD_STAT, LPU1_HOSTFN2_MBOX0_CMD_STAT }, + { HOSTFN3_LPU1_MBOX0_CMD_STAT, LPU1_HOSTFN3_MBOX0_CMD_STAT } +}; + +static void +bfa_ioc_ct_reg_init(struct bfa_ioc *ioc) +{ + void __iomem *rb; + int pcifn = bfa_ioc_pcifn(ioc); + + rb = bfa_ioc_bar0(ioc); + + ioc->ioc_regs.hfn_mbox = rb + iocreg_fnreg[pcifn].hfn_mbox; + ioc->ioc_regs.lpu_mbox = rb + iocreg_fnreg[pcifn].lpu_mbox; + ioc->ioc_regs.host_page_num_fn = rb + iocreg_fnreg[pcifn].hfn_pgn; + + if (ioc->port_id == 0) { + ioc->ioc_regs.heartbeat = rb + BFA_IOC0_HBEAT_REG; + ioc->ioc_regs.ioc_fwstate = rb + BFA_IOC0_STATE_REG; + ioc->ioc_regs.hfn_mbox_cmd = rb + iocreg_mbcmd_p0[pcifn].hfn; + ioc->ioc_regs.lpu_mbox_cmd = rb + iocreg_mbcmd_p0[pcifn].lpu; + ioc->ioc_regs.ll_halt = rb + FW_INIT_HALT_P0; + } else { + ioc->ioc_regs.heartbeat = (rb + BFA_IOC1_HBEAT_REG); + ioc->ioc_regs.ioc_fwstate = (rb + BFA_IOC1_STATE_REG); + ioc->ioc_regs.hfn_mbox_cmd = rb + iocreg_mbcmd_p1[pcifn].hfn; + ioc->ioc_regs.lpu_mbox_cmd = rb + iocreg_mbcmd_p1[pcifn].lpu; + ioc->ioc_regs.ll_halt = rb + FW_INIT_HALT_P1; + } + + /* + * PSS control registers + */ + ioc->ioc_regs.pss_ctl_reg = (rb + PSS_CTL_REG); + ioc->ioc_regs.pss_err_status_reg = (rb + PSS_ERR_STATUS_REG); + ioc->ioc_regs.app_pll_fast_ctl_reg = (rb + APP_PLL_425_CTL_REG); + ioc->ioc_regs.app_pll_slow_ctl_reg = (rb + APP_PLL_312_CTL_REG); + + /* + * IOC semaphore registers and serialization + */ + ioc->ioc_regs.ioc_sem_reg = (rb + HOST_SEM0_REG); + ioc->ioc_regs.ioc_usage_sem_reg = (rb + HOST_SEM1_REG); + ioc->ioc_regs.ioc_init_sem_reg = (rb + HOST_SEM2_REG); + ioc->ioc_regs.ioc_usage_reg = (rb + BFA_FW_USE_COUNT); + + /** + * sram memory access + */ + ioc->ioc_regs.smem_page_start = (rb + PSS_SMEM_PAGE_START); + ioc->ioc_regs.smem_pg0 = BFI_IOC_SMEM_PG0_CT; + + /* + * err set reg : for notification of hb failure in fcmode + */ + ioc->ioc_regs.err_set = (rb + ERR_SET_REG); +} + +/** + * Initialize IOC to port mapping. + */ + +#define FNC_PERS_FN_SHIFT(__fn) ((__fn) * 8) +static void +bfa_ioc_ct_map_port(struct bfa_ioc *ioc) +{ + void __iomem *rb = ioc->pcidev.pci_bar_kva; + u32 r32; + + /** + * For catapult, base port id on personality register and IOC type + */ + r32 = readl(rb + FNC_PERS_REG); + r32 >>= FNC_PERS_FN_SHIFT(bfa_ioc_pcifn(ioc)); + ioc->port_id = (r32 & __F0_PORT_MAP_MK) >> __F0_PORT_MAP_SH; + +} + +/** + * Set interrupt mode for a function: INTX or MSIX + */ +static void +bfa_ioc_ct_isr_mode_set(struct bfa_ioc *ioc, bool msix) +{ + void __iomem *rb = ioc->pcidev.pci_bar_kva; + u32 r32, mode; + + r32 = readl(rb + FNC_PERS_REG); + + mode = (r32 >> FNC_PERS_FN_SHIFT(bfa_ioc_pcifn(ioc))) & + __F0_INTX_STATUS; + + /** + * If already in desired mode, do not change anything + */ + if (!msix && mode) + return; + + if (msix) + mode = __F0_INTX_STATUS_MSIX; + else + mode = __F0_INTX_STATUS_INTA; + + r32 &= ~(__F0_INTX_STATUS << FNC_PERS_FN_SHIFT(bfa_ioc_pcifn(ioc))); + r32 |= (mode << FNC_PERS_FN_SHIFT(bfa_ioc_pcifn(ioc))); + + writel(r32, rb + FNC_PERS_REG); +} + +/** + * Cleanup hw semaphore and usecnt registers + */ +static void +bfa_ioc_ct_ownership_reset(struct bfa_ioc *ioc) +{ + if (ioc->cna) { + bfa_ioc_sem_get(ioc->ioc_regs.ioc_usage_sem_reg); + writel(0, ioc->ioc_regs.ioc_usage_reg); + bfa_ioc_sem_release(ioc->ioc_regs.ioc_usage_sem_reg); + } + + /* + * Read the hw sem reg to make sure that it is locked + * before we clear it. If it is not locked, writing 1 + * will lock it instead of clearing it. + */ + readl(ioc->ioc_regs.ioc_sem_reg); + bfa_ioc_hw_sem_release(ioc); +} + +enum bfa_status +bfa_ioc_ct_pll_init(void __iomem *rb, bool fcmode) +{ + u32 pll_sclk, pll_fclk, r32; + + pll_sclk = __APP_PLL_312_LRESETN | __APP_PLL_312_ENARST | + __APP_PLL_312_RSEL200500 | __APP_PLL_312_P0_1(3U) | + __APP_PLL_312_JITLMT0_1(3U) | + __APP_PLL_312_CNTLMT0_1(1U); + pll_fclk = __APP_PLL_425_LRESETN | __APP_PLL_425_ENARST | + __APP_PLL_425_RSEL200500 | __APP_PLL_425_P0_1(3U) | + __APP_PLL_425_JITLMT0_1(3U) | + __APP_PLL_425_CNTLMT0_1(1U); + if (fcmode) { + writel(0, (rb + OP_MODE)); + writel(__APP_EMS_CMLCKSEL | + __APP_EMS_REFCKBUFEN2 | + __APP_EMS_CHANNEL_SEL, + (rb + ETH_MAC_SER_REG)); + } else { + writel(__GLOBAL_FCOE_MODE, (rb + OP_MODE)); + writel(__APP_EMS_REFCKBUFEN1, + (rb + ETH_MAC_SER_REG)); + } + writel(BFI_IOC_UNINIT, (rb + BFA_IOC0_STATE_REG)); + writel(BFI_IOC_UNINIT, (rb + BFA_IOC1_STATE_REG)); + writel(0xffffffffU, (rb + HOSTFN0_INT_MSK)); + writel(0xffffffffU, (rb + HOSTFN1_INT_MSK)); + writel(0xffffffffU, (rb + HOSTFN0_INT_STATUS)); + writel(0xffffffffU, (rb + HOSTFN1_INT_STATUS)); + writel(0xffffffffU, (rb + HOSTFN0_INT_MSK)); + writel(0xffffffffU, (rb + HOSTFN1_INT_MSK)); + writel(pll_sclk | + __APP_PLL_312_LOGIC_SOFT_RESET, + rb + APP_PLL_312_CTL_REG); + writel(pll_fclk | + __APP_PLL_425_LOGIC_SOFT_RESET, + rb + APP_PLL_425_CTL_REG); + writel(pll_sclk | + __APP_PLL_312_LOGIC_SOFT_RESET | __APP_PLL_312_ENABLE, + rb + APP_PLL_312_CTL_REG); + writel(pll_fclk | + __APP_PLL_425_LOGIC_SOFT_RESET | __APP_PLL_425_ENABLE, + rb + APP_PLL_425_CTL_REG); + readl(rb + HOSTFN0_INT_MSK); + udelay(2000); + writel(0xffffffffU, (rb + HOSTFN0_INT_STATUS)); + writel(0xffffffffU, (rb + HOSTFN1_INT_STATUS)); + writel(pll_sclk | + __APP_PLL_312_ENABLE, + rb + APP_PLL_312_CTL_REG); + writel(pll_fclk | + __APP_PLL_425_ENABLE, + rb + APP_PLL_425_CTL_REG); + if (!fcmode) { + writel(__PMM_1T_RESET_P, (rb + PMM_1T_RESET_REG_P0)); + writel(__PMM_1T_RESET_P, (rb + PMM_1T_RESET_REG_P1)); + } + r32 = readl((rb + PSS_CTL_REG)); + r32 &= ~__PSS_LMEM_RESET; + writel(r32, (rb + PSS_CTL_REG)); + udelay(1000); + if (!fcmode) { + writel(0, (rb + PMM_1T_RESET_REG_P0)); + writel(0, (rb + PMM_1T_RESET_REG_P1)); + } + + writel(__EDRAM_BISTR_START, (rb + MBIST_CTL_REG)); + udelay(1000); + r32 = readl((rb + MBIST_STAT_REG)); + writel(0, (rb + MBIST_CTL_REG)); + return BFA_STATUS_OK; +} diff --git a/drivers/net/bna/bfa_sm.h b/drivers/net/bna/bfa_sm.h new file mode 100644 index 000000000000..1d3d975d6f68 --- /dev/null +++ b/drivers/net/bna/bfa_sm.h @@ -0,0 +1,88 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +/** + * @file bfasm.h State machine defines + */ + +#ifndef __BFA_SM_H__ +#define __BFA_SM_H__ + +#include "cna.h" + +typedef void (*bfa_sm_t)(void *sm, int event); + +/** + * oc - object class eg. bfa_ioc + * st - state, eg. reset + * otype - object type, eg. struct bfa_ioc + * etype - object type, eg. enum ioc_event + */ +#define bfa_sm_state_decl(oc, st, otype, etype) \ + static void oc ## _sm_ ## st(otype * fsm, etype event) + +#define bfa_sm_set_state(_sm, _state) ((_sm)->sm = (bfa_sm_t)(_state)) +#define bfa_sm_send_event(_sm, _event) ((_sm)->sm((_sm), (_event))) +#define bfa_sm_get_state(_sm) ((_sm)->sm) +#define bfa_sm_cmp_state(_sm, _state) ((_sm)->sm == (bfa_sm_t)(_state)) + +/** + * For converting from state machine function to state encoding. + */ +struct bfa_sm_table { + bfa_sm_t sm; /*!< state machine function */ + int state; /*!< state machine encoding */ + char *name; /*!< state name for display */ +}; +#define BFA_SM(_sm) ((bfa_sm_t)(_sm)) + +/** + * State machine with entry actions. + */ +typedef void (*bfa_fsm_t)(void *fsm, int event); + +/** + * oc - object class eg. bfa_ioc + * st - state, eg. reset + * otype - object type, eg. struct bfa_ioc + * etype - object type, eg. enum ioc_event + */ +#define bfa_fsm_state_decl(oc, st, otype, etype) \ + static void oc ## _sm_ ## st(otype * fsm, etype event); \ + static void oc ## _sm_ ## st ## _entry(otype * fsm) + +#define bfa_fsm_set_state(_fsm, _state) do { \ + (_fsm)->fsm = (bfa_fsm_t)(_state); \ + _state ## _entry(_fsm); \ +} while (0) + +#define bfa_fsm_send_event(_fsm, _event) ((_fsm)->fsm((_fsm), (_event))) +#define bfa_fsm_get_state(_fsm) ((_fsm)->fsm) +#define bfa_fsm_cmp_state(_fsm, _state) \ + ((_fsm)->fsm == (bfa_fsm_t)(_state)) + +static inline int +bfa_sm_to_state(struct bfa_sm_table *smt, bfa_sm_t sm) +{ + int i = 0; + + while (smt[i].sm && smt[i].sm != sm) + i++; + return smt[i].state; +} +#endif diff --git a/drivers/net/bna/bfa_wc.h b/drivers/net/bna/bfa_wc.h new file mode 100644 index 000000000000..d0e4caee67b0 --- /dev/null +++ b/drivers/net/bna/bfa_wc.h @@ -0,0 +1,69 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +/** + * @file bfa_wc.h Generic wait counter. + */ + +#ifndef __BFA_WC_H__ +#define __BFA_WC_H__ + +typedef void (*bfa_wc_resume_t) (void *cbarg); + +struct bfa_wc { + bfa_wc_resume_t wc_resume; + void *wc_cbarg; + int wc_count; +}; + +static inline void +bfa_wc_up(struct bfa_wc *wc) +{ + wc->wc_count++; +} + +static inline void +bfa_wc_down(struct bfa_wc *wc) +{ + wc->wc_count--; + if (wc->wc_count == 0) + wc->wc_resume(wc->wc_cbarg); +} + +/** + * Initialize a waiting counter. + */ +static inline void +bfa_wc_init(struct bfa_wc *wc, bfa_wc_resume_t wc_resume, void *wc_cbarg) +{ + wc->wc_resume = wc_resume; + wc->wc_cbarg = wc_cbarg; + wc->wc_count = 0; + bfa_wc_up(wc); +} + +/** + * Wait for counter to reach zero + */ +static inline void +bfa_wc_wait(struct bfa_wc *wc) +{ + bfa_wc_down(wc); +} + +#endif diff --git a/drivers/net/bna/bfi.h b/drivers/net/bna/bfi.h new file mode 100644 index 000000000000..a97396811050 --- /dev/null +++ b/drivers/net/bna/bfi.h @@ -0,0 +1,392 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#ifndef __BFI_H__ +#define __BFI_H__ + +#include "bfa_defs.h" + +#pragma pack(1) + +/** + * BFI FW image type + */ +#define BFI_FLASH_CHUNK_SZ 256 /*!< Flash chunk size */ +#define BFI_FLASH_CHUNK_SZ_WORDS (BFI_FLASH_CHUNK_SZ/sizeof(u32)) +enum { + BFI_IMAGE_CB_FC, + BFI_IMAGE_CT_FC, + BFI_IMAGE_CT_CNA, + BFI_IMAGE_MAX, +}; + +/** + * Msg header common to all msgs + */ +struct bfi_mhdr { + u8 msg_class; /*!< @ref enum bfi_mclass */ + u8 msg_id; /*!< msg opcode with in the class */ + union { + struct { + u8 rsvd; + u8 lpu_id; /*!< msg destination */ + } h2i; + u16 i2htok; /*!< token in msgs to host */ + } mtag; +}; + +#define bfi_h2i_set(_mh, _mc, _op, _lpuid) do { \ + (_mh).msg_class = (_mc); \ + (_mh).msg_id = (_op); \ + (_mh).mtag.h2i.lpu_id = (_lpuid); \ +} while (0) + +#define bfi_i2h_set(_mh, _mc, _op, _i2htok) do { \ + (_mh).msg_class = (_mc); \ + (_mh).msg_id = (_op); \ + (_mh).mtag.i2htok = (_i2htok); \ +} while (0) + +/* + * Message opcodes: 0-127 to firmware, 128-255 to host + */ +#define BFI_I2H_OPCODE_BASE 128 +#define BFA_I2HM(_x) ((_x) + BFI_I2H_OPCODE_BASE) + +/** + **************************************************************************** + * + * Scatter Gather Element and Page definition + * + **************************************************************************** + */ + +#define BFI_SGE_INLINE 1 +#define BFI_SGE_INLINE_MAX (BFI_SGE_INLINE + 1) + +/** + * SG Flags + */ +enum { + BFI_SGE_DATA = 0, /*!< data address, not last */ + BFI_SGE_DATA_CPL = 1, /*!< data addr, last in current page */ + BFI_SGE_DATA_LAST = 3, /*!< data address, last */ + BFI_SGE_LINK = 2, /*!< link address */ + BFI_SGE_PGDLEN = 2, /*!< cumulative data length for page */ +}; + +/** + * DMA addresses + */ +union bfi_addr_u { + struct { + u32 addr_lo; + u32 addr_hi; + } a32; +}; + +/** + * Scatter Gather Element + */ +struct bfi_sge { +#ifdef __BIGENDIAN + u32 flags:2, + rsvd:2, + sg_len:28; +#else + u32 sg_len:28, + rsvd:2, + flags:2; +#endif + union bfi_addr_u sga; +}; + +/** + * Scatter Gather Page + */ +#define BFI_SGPG_DATA_SGES 7 +#define BFI_SGPG_SGES_MAX (BFI_SGPG_DATA_SGES + 1) +#define BFI_SGPG_RSVD_WD_LEN 8 +struct bfi_sgpg { + struct bfi_sge sges[BFI_SGPG_SGES_MAX]; + u32 rsvd[BFI_SGPG_RSVD_WD_LEN]; +}; + +/* + * Large Message structure - 128 Bytes size Msgs + */ +#define BFI_LMSG_SZ 128 +#define BFI_LMSG_PL_WSZ \ + ((BFI_LMSG_SZ - sizeof(struct bfi_mhdr)) / 4) + +struct bfi_msg { + struct bfi_mhdr mhdr; + u32 pl[BFI_LMSG_PL_WSZ]; +}; + +/** + * Mailbox message structure + */ +#define BFI_MBMSG_SZ 7 +struct bfi_mbmsg { + struct bfi_mhdr mh; + u32 pl[BFI_MBMSG_SZ]; +}; + +/** + * Message Classes + */ +enum bfi_mclass { + BFI_MC_IOC = 1, /*!< IO Controller (IOC) */ + BFI_MC_DIAG = 2, /*!< Diagnostic Msgs */ + BFI_MC_FLASH = 3, /*!< Flash message class */ + BFI_MC_CEE = 4, /*!< CEE */ + BFI_MC_FCPORT = 5, /*!< FC port */ + BFI_MC_IOCFC = 6, /*!< FC - IO Controller (IOC) */ + BFI_MC_LL = 7, /*!< Link Layer */ + BFI_MC_UF = 8, /*!< Unsolicited frame receive */ + BFI_MC_FCXP = 9, /*!< FC Transport */ + BFI_MC_LPS = 10, /*!< lport fc login services */ + BFI_MC_RPORT = 11, /*!< Remote port */ + BFI_MC_ITNIM = 12, /*!< I-T nexus (Initiator mode) */ + BFI_MC_IOIM_READ = 13, /*!< read IO (Initiator mode) */ + BFI_MC_IOIM_WRITE = 14, /*!< write IO (Initiator mode) */ + BFI_MC_IOIM_IO = 15, /*!< IO (Initiator mode) */ + BFI_MC_IOIM = 16, /*!< IO (Initiator mode) */ + BFI_MC_IOIM_IOCOM = 17, /*!< good IO completion */ + BFI_MC_TSKIM = 18, /*!< Initiator Task management */ + BFI_MC_SBOOT = 19, /*!< SAN boot services */ + BFI_MC_IPFC = 20, /*!< IP over FC Msgs */ + BFI_MC_PORT = 21, /*!< Physical port */ + BFI_MC_SFP = 22, /*!< SFP module */ + BFI_MC_MSGQ = 23, /*!< MSGQ */ + BFI_MC_ENET = 24, /*!< ENET commands/responses */ + BFI_MC_MAX = 32 +}; + +#define BFI_IOC_MAX_CQS 4 +#define BFI_IOC_MAX_CQS_ASIC 8 +#define BFI_IOC_MSGLEN_MAX 32 /* 32 bytes */ + +#define BFI_BOOT_TYPE_OFF 8 +#define BFI_BOOT_PARAM_OFF 12 + +#define BFI_BOOT_TYPE_NORMAL 0 /* param is device id */ +#define BFI_BOOT_TYPE_FLASH 1 +#define BFI_BOOT_TYPE_MEMTEST 2 + +#define BFI_BOOT_MEMTEST_RES_ADDR 0x900 +#define BFI_BOOT_MEMTEST_RES_SIG 0xA0A1A2A3 + +/** + *---------------------------------------------------------------------- + * IOC + *---------------------------------------------------------------------- + */ + +enum bfi_ioc_h2i_msgs { + BFI_IOC_H2I_ENABLE_REQ = 1, + BFI_IOC_H2I_DISABLE_REQ = 2, + BFI_IOC_H2I_GETATTR_REQ = 3, + BFI_IOC_H2I_DBG_SYNC = 4, + BFI_IOC_H2I_DBG_DUMP = 5, +}; + +enum bfi_ioc_i2h_msgs { + BFI_IOC_I2H_ENABLE_REPLY = BFA_I2HM(1), + BFI_IOC_I2H_DISABLE_REPLY = BFA_I2HM(2), + BFI_IOC_I2H_GETATTR_REPLY = BFA_I2HM(3), + BFI_IOC_I2H_READY_EVENT = BFA_I2HM(4), + BFI_IOC_I2H_HBEAT = BFA_I2HM(5), +}; + +/** + * BFI_IOC_H2I_GETATTR_REQ message + */ +struct bfi_ioc_getattr_req { + struct bfi_mhdr mh; + union bfi_addr_u attr_addr; +}; + +struct bfi_ioc_attr { + u64 mfg_pwwn; /*!< Mfg port wwn */ + u64 mfg_nwwn; /*!< Mfg node wwn */ + mac_t mfg_mac; /*!< Mfg mac */ + u16 rsvd_a; + u64 pwwn; + u64 nwwn; + mac_t mac; /*!< PBC or Mfg mac */ + u16 rsvd_b; + mac_t fcoe_mac; + u16 rsvd_c; + char brcd_serialnum[STRSZ(BFA_MFG_SERIALNUM_SIZE)]; + u8 pcie_gen; + u8 pcie_lanes_orig; + u8 pcie_lanes; + u8 rx_bbcredit; /*!< receive buffer credits */ + u32 adapter_prop; /*!< adapter properties */ + u16 maxfrsize; /*!< max receive frame size */ + char asic_rev; + u8 rsvd_d; + char fw_version[BFA_VERSION_LEN]; + char optrom_version[BFA_VERSION_LEN]; + struct bfa_mfg_vpd vpd; + u32 card_type; /*!< card type */ +}; + +/** + * BFI_IOC_I2H_GETATTR_REPLY message + */ +struct bfi_ioc_getattr_reply { + struct bfi_mhdr mh; /*!< Common msg header */ + u8 status; /*!< cfg reply status */ + u8 rsvd[3]; +}; + +/** + * Firmware memory page offsets + */ +#define BFI_IOC_SMEM_PG0_CB (0x40) +#define BFI_IOC_SMEM_PG0_CT (0x180) + +/** + * Firmware statistic offset + */ +#define BFI_IOC_FWSTATS_OFF (0x6B40) +#define BFI_IOC_FWSTATS_SZ (4096) + +/** + * Firmware trace offset + */ +#define BFI_IOC_TRC_OFF (0x4b00) +#define BFI_IOC_TRC_ENTS 256 + +#define BFI_IOC_FW_SIGNATURE (0xbfadbfad) +#define BFI_IOC_MD5SUM_SZ 4 +struct bfi_ioc_image_hdr { + u32 signature; /*!< constant signature */ + u32 rsvd_a; + u32 exec; /*!< exec vector */ + u32 param; /*!< parameters */ + u32 rsvd_b[4]; + u32 md5sum[BFI_IOC_MD5SUM_SZ]; +}; + +/** + * BFI_IOC_I2H_READY_EVENT message + */ +struct bfi_ioc_rdy_event { + struct bfi_mhdr mh; /*!< common msg header */ + u8 init_status; /*!< init event status */ + u8 rsvd[3]; +}; + +struct bfi_ioc_hbeat { + struct bfi_mhdr mh; /*!< common msg header */ + u32 hb_count; /*!< current heart beat count */ +}; + +/** + * IOC hardware/firmware state + */ +enum bfi_ioc_state { + BFI_IOC_UNINIT = 0, /*!< not initialized */ + BFI_IOC_INITING = 1, /*!< h/w is being initialized */ + BFI_IOC_HWINIT = 2, /*!< h/w is initialized */ + BFI_IOC_CFG = 3, /*!< IOC configuration in progress */ + BFI_IOC_OP = 4, /*!< IOC is operational */ + BFI_IOC_DISABLING = 5, /*!< IOC is being disabled */ + BFI_IOC_DISABLED = 6, /*!< IOC is disabled */ + BFI_IOC_CFG_DISABLED = 7, /*!< IOC is being disabled;transient */ + BFI_IOC_FAIL = 8, /*!< IOC heart-beat failure */ + BFI_IOC_MEMTEST = 9, /*!< IOC is doing memtest */ +}; + +#define BFI_IOC_ENDIAN_SIG 0x12345678 + +enum { + BFI_ADAPTER_TYPE_FC = 0x01, /*!< FC adapters */ + BFI_ADAPTER_TYPE_MK = 0x0f0000, /*!< adapter type mask */ + BFI_ADAPTER_TYPE_SH = 16, /*!< adapter type shift */ + BFI_ADAPTER_NPORTS_MK = 0xff00, /*!< number of ports mask */ + BFI_ADAPTER_NPORTS_SH = 8, /*!< number of ports shift */ + BFI_ADAPTER_SPEED_MK = 0xff, /*!< adapter speed mask */ + BFI_ADAPTER_SPEED_SH = 0, /*!< adapter speed shift */ + BFI_ADAPTER_PROTO = 0x100000, /*!< prototype adapaters */ + BFI_ADAPTER_TTV = 0x200000, /*!< TTV debug capable */ + BFI_ADAPTER_UNSUPP = 0x400000, /*!< unknown adapter type */ +}; + +#define BFI_ADAPTER_GETP(__prop, __adap_prop) \ + (((__adap_prop) & BFI_ADAPTER_ ## __prop ## _MK) >> \ + BFI_ADAPTER_ ## __prop ## _SH) +#define BFI_ADAPTER_SETP(__prop, __val) \ + ((__val) << BFI_ADAPTER_ ## __prop ## _SH) +#define BFI_ADAPTER_IS_PROTO(__adap_type) \ + ((__adap_type) & BFI_ADAPTER_PROTO) +#define BFI_ADAPTER_IS_TTV(__adap_type) \ + ((__adap_type) & BFI_ADAPTER_TTV) +#define BFI_ADAPTER_IS_UNSUPP(__adap_type) \ + ((__adap_type) & BFI_ADAPTER_UNSUPP) +#define BFI_ADAPTER_IS_SPECIAL(__adap_type) \ + ((__adap_type) & (BFI_ADAPTER_TTV | BFI_ADAPTER_PROTO | \ + BFI_ADAPTER_UNSUPP)) + +/** + * BFI_IOC_H2I_ENABLE_REQ & BFI_IOC_H2I_DISABLE_REQ messages + */ +struct bfi_ioc_ctrl_req { + struct bfi_mhdr mh; + u8 ioc_class; + u8 rsvd[3]; + u32 tv_sec; +}; + +/** + * BFI_IOC_I2H_ENABLE_REPLY & BFI_IOC_I2H_DISABLE_REPLY messages + */ +struct bfi_ioc_ctrl_reply { + struct bfi_mhdr mh; /*!< Common msg header */ + u8 status; /*!< enable/disable status */ + u8 rsvd[3]; +}; + +#define BFI_IOC_MSGSZ 8 +/** + * H2I Messages + */ +union bfi_ioc_h2i_msg_u { + struct bfi_mhdr mh; + struct bfi_ioc_ctrl_req enable_req; + struct bfi_ioc_ctrl_req disable_req; + struct bfi_ioc_getattr_req getattr_req; + u32 mboxmsg[BFI_IOC_MSGSZ]; +}; + +/** + * I2H Messages + */ +union bfi_ioc_i2h_msg_u { + struct bfi_mhdr mh; + struct bfi_ioc_rdy_event rdy_event; + u32 mboxmsg[BFI_IOC_MSGSZ]; +}; + +#pragma pack() + +#endif /* __BFI_H__ */ diff --git a/drivers/net/bna/bfi_cna.h b/drivers/net/bna/bfi_cna.h new file mode 100644 index 000000000000..4eecabea397b --- /dev/null +++ b/drivers/net/bna/bfi_cna.h @@ -0,0 +1,199 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#ifndef __BFI_CNA_H__ +#define __BFI_CNA_H__ + +#include "bfi.h" +#include "bfa_defs_cna.h" + +#pragma pack(1) + +enum bfi_port_h2i { + BFI_PORT_H2I_ENABLE_REQ = (1), + BFI_PORT_H2I_DISABLE_REQ = (2), + BFI_PORT_H2I_GET_STATS_REQ = (3), + BFI_PORT_H2I_CLEAR_STATS_REQ = (4), +}; + +enum bfi_port_i2h { + BFI_PORT_I2H_ENABLE_RSP = BFA_I2HM(1), + BFI_PORT_I2H_DISABLE_RSP = BFA_I2HM(2), + BFI_PORT_I2H_GET_STATS_RSP = BFA_I2HM(3), + BFI_PORT_I2H_CLEAR_STATS_RSP = BFA_I2HM(4), +}; + +/** + * Generic REQ type + */ +struct bfi_port_generic_req { + struct bfi_mhdr mh; /*!< msg header */ + u32 msgtag; /*!< msgtag for reply */ + u32 rsvd; +}; + +/** + * Generic RSP type + */ +struct bfi_port_generic_rsp { + struct bfi_mhdr mh; /*!< common msg header */ + u8 status; /*!< port enable status */ + u8 rsvd[3]; + u32 msgtag; /*!< msgtag for reply */ +}; + +/** + * @todo + * BFI_PORT_H2I_ENABLE_REQ + */ + +/** + * @todo + * BFI_PORT_I2H_ENABLE_RSP + */ + +/** + * BFI_PORT_H2I_DISABLE_REQ + */ + +/** + * BFI_PORT_I2H_DISABLE_RSP + */ + +/** + * BFI_PORT_H2I_GET_STATS_REQ + */ +struct bfi_port_get_stats_req { + struct bfi_mhdr mh; /*!< common msg header */ + union bfi_addr_u dma_addr; +}; + +/** + * BFI_PORT_I2H_GET_STATS_RSP + */ + +/** + * BFI_PORT_H2I_CLEAR_STATS_REQ + */ + +/** + * BFI_PORT_I2H_CLEAR_STATS_RSP + */ + +union bfi_port_h2i_msg_u { + struct bfi_mhdr mh; + struct bfi_port_generic_req enable_req; + struct bfi_port_generic_req disable_req; + struct bfi_port_get_stats_req getstats_req; + struct bfi_port_generic_req clearstats_req; +}; + +union bfi_port_i2h_msg_u { + struct bfi_mhdr mh; + struct bfi_port_generic_rsp enable_rsp; + struct bfi_port_generic_rsp disable_rsp; + struct bfi_port_generic_rsp getstats_rsp; + struct bfi_port_generic_rsp clearstats_rsp; +}; + +/* @brief Mailbox commands from host to (DCBX/LLDP) firmware */ +enum bfi_cee_h2i_msgs { + BFI_CEE_H2I_GET_CFG_REQ = 1, + BFI_CEE_H2I_RESET_STATS = 2, + BFI_CEE_H2I_GET_STATS_REQ = 3, +}; + +/* @brief Mailbox reply and AEN messages from DCBX/LLDP firmware to host */ +enum bfi_cee_i2h_msgs { + BFI_CEE_I2H_GET_CFG_RSP = BFA_I2HM(1), + BFI_CEE_I2H_RESET_STATS_RSP = BFA_I2HM(2), + BFI_CEE_I2H_GET_STATS_RSP = BFA_I2HM(3), +}; + +/* Data structures */ + +/* + * @brief H2I command structure for resetting the stats. + * BFI_CEE_H2I_RESET_STATS + */ +struct bfi_lldp_reset_stats { + struct bfi_mhdr mh; +}; + +/* + * @brief H2I command structure for resetting the stats. + * BFI_CEE_H2I_RESET_STATS + */ +struct bfi_cee_reset_stats { + struct bfi_mhdr mh; +}; + +/* + * @brief get configuration command from host + * BFI_CEE_H2I_GET_CFG_REQ + */ +struct bfi_cee_get_req { + struct bfi_mhdr mh; + union bfi_addr_u dma_addr; +}; + +/* + * @brief reply message from firmware + * BFI_CEE_I2H_GET_CFG_RSP + */ +struct bfi_cee_get_rsp { + struct bfi_mhdr mh; + u8 cmd_status; + u8 rsvd[3]; +}; + +/* + * @brief get configuration command from host + * BFI_CEE_H2I_GET_STATS_REQ + */ +struct bfi_cee_stats_req { + struct bfi_mhdr mh; + union bfi_addr_u dma_addr; +}; + +/* + * @brief reply message from firmware + * BFI_CEE_I2H_GET_STATS_RSP + */ +struct bfi_cee_stats_rsp { + struct bfi_mhdr mh; + u8 cmd_status; + u8 rsvd[3]; +}; + +/* @brief mailbox command structures from host to firmware */ +union bfi_cee_h2i_msg_u { + struct bfi_mhdr mh; + struct bfi_cee_get_req get_req; + struct bfi_cee_stats_req stats_req; +}; + +/* @brief mailbox message structures from firmware to host */ +union bfi_cee_i2h_msg_u { + struct bfi_mhdr mh; + struct bfi_cee_get_rsp get_rsp; + struct bfi_cee_stats_rsp stats_rsp; +}; + +#pragma pack() + +#endif /* __BFI_CNA_H__ */ diff --git a/drivers/net/bna/bfi_ctreg.h b/drivers/net/bna/bfi_ctreg.h new file mode 100644 index 000000000000..404ea351d4a1 --- /dev/null +++ b/drivers/net/bna/bfi_ctreg.h @@ -0,0 +1,637 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +/* + * bfi_ctreg.h catapult host block register definitions + * + * !!! Do not edit. Auto generated. !!! + */ + +#ifndef __BFI_CTREG_H__ +#define __BFI_CTREG_H__ + +#define HOSTFN0_LPU_MBOX0_0 0x00019200 +#define HOSTFN1_LPU_MBOX0_8 0x00019260 +#define LPU_HOSTFN0_MBOX0_0 0x00019280 +#define LPU_HOSTFN1_MBOX0_8 0x000192e0 +#define HOSTFN2_LPU_MBOX0_0 0x00019400 +#define HOSTFN3_LPU_MBOX0_8 0x00019460 +#define LPU_HOSTFN2_MBOX0_0 0x00019480 +#define LPU_HOSTFN3_MBOX0_8 0x000194e0 +#define HOSTFN0_INT_STATUS 0x00014000 +#define __HOSTFN0_HALT_OCCURRED 0x01000000 +#define __HOSTFN0_INT_STATUS_LVL_MK 0x00f00000 +#define __HOSTFN0_INT_STATUS_LVL_SH 20 +#define __HOSTFN0_INT_STATUS_LVL(_v) ((_v) << __HOSTFN0_INT_STATUS_LVL_SH) +#define __HOSTFN0_INT_STATUS_P_MK 0x000f0000 +#define __HOSTFN0_INT_STATUS_P_SH 16 +#define __HOSTFN0_INT_STATUS_P(_v) ((_v) << __HOSTFN0_INT_STATUS_P_SH) +#define __HOSTFN0_INT_STATUS_F 0x0000ffff +#define HOSTFN0_INT_MSK 0x00014004 +#define HOST_PAGE_NUM_FN0 0x00014008 +#define __HOST_PAGE_NUM_FN 0x000001ff +#define HOST_MSIX_ERR_INDEX_FN0 0x0001400c +#define __MSIX_ERR_INDEX_FN 0x000001ff +#define HOSTFN1_INT_STATUS 0x00014100 +#define __HOSTFN1_HALT_OCCURRED 0x01000000 +#define __HOSTFN1_INT_STATUS_LVL_MK 0x00f00000 +#define __HOSTFN1_INT_STATUS_LVL_SH 20 +#define __HOSTFN1_INT_STATUS_LVL(_v) ((_v) << __HOSTFN1_INT_STATUS_LVL_SH) +#define __HOSTFN1_INT_STATUS_P_MK 0x000f0000 +#define __HOSTFN1_INT_STATUS_P_SH 16 +#define __HOSTFN1_INT_STATUS_P(_v) ((_v) << __HOSTFN1_INT_STATUS_P_SH) +#define __HOSTFN1_INT_STATUS_F 0x0000ffff +#define HOSTFN1_INT_MSK 0x00014104 +#define HOST_PAGE_NUM_FN1 0x00014108 +#define HOST_MSIX_ERR_INDEX_FN1 0x0001410c +#define APP_PLL_425_CTL_REG 0x00014204 +#define __P_425_PLL_LOCK 0x80000000 +#define __APP_PLL_425_SRAM_USE_100MHZ 0x00100000 +#define __APP_PLL_425_RESET_TIMER_MK 0x000e0000 +#define __APP_PLL_425_RESET_TIMER_SH 17 +#define __APP_PLL_425_RESET_TIMER(_v) ((_v) << __APP_PLL_425_RESET_TIMER_SH) +#define __APP_PLL_425_LOGIC_SOFT_RESET 0x00010000 +#define __APP_PLL_425_CNTLMT0_1_MK 0x0000c000 +#define __APP_PLL_425_CNTLMT0_1_SH 14 +#define __APP_PLL_425_CNTLMT0_1(_v) ((_v) << __APP_PLL_425_CNTLMT0_1_SH) +#define __APP_PLL_425_JITLMT0_1_MK 0x00003000 +#define __APP_PLL_425_JITLMT0_1_SH 12 +#define __APP_PLL_425_JITLMT0_1(_v) ((_v) << __APP_PLL_425_JITLMT0_1_SH) +#define __APP_PLL_425_HREF 0x00000800 +#define __APP_PLL_425_HDIV 0x00000400 +#define __APP_PLL_425_P0_1_MK 0x00000300 +#define __APP_PLL_425_P0_1_SH 8 +#define __APP_PLL_425_P0_1(_v) ((_v) << __APP_PLL_425_P0_1_SH) +#define __APP_PLL_425_Z0_2_MK 0x000000e0 +#define __APP_PLL_425_Z0_2_SH 5 +#define __APP_PLL_425_Z0_2(_v) ((_v) << __APP_PLL_425_Z0_2_SH) +#define __APP_PLL_425_RSEL200500 0x00000010 +#define __APP_PLL_425_ENARST 0x00000008 +#define __APP_PLL_425_BYPASS 0x00000004 +#define __APP_PLL_425_LRESETN 0x00000002 +#define __APP_PLL_425_ENABLE 0x00000001 +#define APP_PLL_312_CTL_REG 0x00014208 +#define __P_312_PLL_LOCK 0x80000000 +#define __ENABLE_MAC_AHB_1 0x00800000 +#define __ENABLE_MAC_AHB_0 0x00400000 +#define __ENABLE_MAC_1 0x00200000 +#define __ENABLE_MAC_0 0x00100000 +#define __APP_PLL_312_RESET_TIMER_MK 0x000e0000 +#define __APP_PLL_312_RESET_TIMER_SH 17 +#define __APP_PLL_312_RESET_TIMER(_v) ((_v) << __APP_PLL_312_RESET_TIMER_SH) +#define __APP_PLL_312_LOGIC_SOFT_RESET 0x00010000 +#define __APP_PLL_312_CNTLMT0_1_MK 0x0000c000 +#define __APP_PLL_312_CNTLMT0_1_SH 14 +#define __APP_PLL_312_CNTLMT0_1(_v) ((_v) << __APP_PLL_312_CNTLMT0_1_SH) +#define __APP_PLL_312_JITLMT0_1_MK 0x00003000 +#define __APP_PLL_312_JITLMT0_1_SH 12 +#define __APP_PLL_312_JITLMT0_1(_v) ((_v) << __APP_PLL_312_JITLMT0_1_SH) +#define __APP_PLL_312_HREF 0x00000800 +#define __APP_PLL_312_HDIV 0x00000400 +#define __APP_PLL_312_P0_1_MK 0x00000300 +#define __APP_PLL_312_P0_1_SH 8 +#define __APP_PLL_312_P0_1(_v) ((_v) << __APP_PLL_312_P0_1_SH) +#define __APP_PLL_312_Z0_2_MK 0x000000e0 +#define __APP_PLL_312_Z0_2_SH 5 +#define __APP_PLL_312_Z0_2(_v) ((_v) << __APP_PLL_312_Z0_2_SH) +#define __APP_PLL_312_RSEL200500 0x00000010 +#define __APP_PLL_312_ENARST 0x00000008 +#define __APP_PLL_312_BYPASS 0x00000004 +#define __APP_PLL_312_LRESETN 0x00000002 +#define __APP_PLL_312_ENABLE 0x00000001 +#define MBIST_CTL_REG 0x00014220 +#define __EDRAM_BISTR_START 0x00000004 +#define __MBIST_RESET 0x00000002 +#define __MBIST_START 0x00000001 +#define MBIST_STAT_REG 0x00014224 +#define __EDRAM_BISTR_STATUS 0x00000008 +#define __EDRAM_BISTR_DONE 0x00000004 +#define __MEM_BIT_STATUS 0x00000002 +#define __MBIST_DONE 0x00000001 +#define HOST_SEM0_REG 0x00014230 +#define __HOST_SEMAPHORE 0x00000001 +#define HOST_SEM1_REG 0x00014234 +#define HOST_SEM2_REG 0x00014238 +#define HOST_SEM3_REG 0x0001423c +#define HOST_SEM0_INFO_REG 0x00014240 +#define HOST_SEM1_INFO_REG 0x00014244 +#define HOST_SEM2_INFO_REG 0x00014248 +#define HOST_SEM3_INFO_REG 0x0001424c +#define ETH_MAC_SER_REG 0x00014288 +#define __APP_EMS_CKBUFAMPIN 0x00000020 +#define __APP_EMS_REFCLKSEL 0x00000010 +#define __APP_EMS_CMLCKSEL 0x00000008 +#define __APP_EMS_REFCKBUFEN2 0x00000004 +#define __APP_EMS_REFCKBUFEN1 0x00000002 +#define __APP_EMS_CHANNEL_SEL 0x00000001 +#define HOSTFN2_INT_STATUS 0x00014300 +#define __HOSTFN2_HALT_OCCURRED 0x01000000 +#define __HOSTFN2_INT_STATUS_LVL_MK 0x00f00000 +#define __HOSTFN2_INT_STATUS_LVL_SH 20 +#define __HOSTFN2_INT_STATUS_LVL(_v) ((_v) << __HOSTFN2_INT_STATUS_LVL_SH) +#define __HOSTFN2_INT_STATUS_P_MK 0x000f0000 +#define __HOSTFN2_INT_STATUS_P_SH 16 +#define __HOSTFN2_INT_STATUS_P(_v) ((_v) << __HOSTFN2_INT_STATUS_P_SH) +#define __HOSTFN2_INT_STATUS_F 0x0000ffff +#define HOSTFN2_INT_MSK 0x00014304 +#define HOST_PAGE_NUM_FN2 0x00014308 +#define HOST_MSIX_ERR_INDEX_FN2 0x0001430c +#define HOSTFN3_INT_STATUS 0x00014400 +#define __HALT_OCCURRED 0x01000000 +#define __HOSTFN3_INT_STATUS_LVL_MK 0x00f00000 +#define __HOSTFN3_INT_STATUS_LVL_SH 20 +#define __HOSTFN3_INT_STATUS_LVL(_v) ((_v) << __HOSTFN3_INT_STATUS_LVL_SH) +#define __HOSTFN3_INT_STATUS_P_MK 0x000f0000 +#define __HOSTFN3_INT_STATUS_P_SH 16 +#define __HOSTFN3_INT_STATUS_P(_v) ((_v) << __HOSTFN3_INT_STATUS_P_SH) +#define __HOSTFN3_INT_STATUS_F 0x0000ffff +#define HOSTFN3_INT_MSK 0x00014404 +#define HOST_PAGE_NUM_FN3 0x00014408 +#define HOST_MSIX_ERR_INDEX_FN3 0x0001440c +#define FNC_ID_REG 0x00014600 +#define __FUNCTION_NUMBER 0x00000007 +#define FNC_PERS_REG 0x00014604 +#define __F3_FUNCTION_ACTIVE 0x80000000 +#define __F3_FUNCTION_MODE 0x40000000 +#define __F3_PORT_MAP_MK 0x30000000 +#define __F3_PORT_MAP_SH 28 +#define __F3_PORT_MAP(_v) ((_v) << __F3_PORT_MAP_SH) +#define __F3_VM_MODE 0x08000000 +#define __F3_INTX_STATUS_MK 0x07000000 +#define __F3_INTX_STATUS_SH 24 +#define __F3_INTX_STATUS(_v) ((_v) << __F3_INTX_STATUS_SH) +#define __F2_FUNCTION_ACTIVE 0x00800000 +#define __F2_FUNCTION_MODE 0x00400000 +#define __F2_PORT_MAP_MK 0x00300000 +#define __F2_PORT_MAP_SH 20 +#define __F2_PORT_MAP(_v) ((_v) << __F2_PORT_MAP_SH) +#define __F2_VM_MODE 0x00080000 +#define __F2_INTX_STATUS_MK 0x00070000 +#define __F2_INTX_STATUS_SH 16 +#define __F2_INTX_STATUS(_v) ((_v) << __F2_INTX_STATUS_SH) +#define __F1_FUNCTION_ACTIVE 0x00008000 +#define __F1_FUNCTION_MODE 0x00004000 +#define __F1_PORT_MAP_MK 0x00003000 +#define __F1_PORT_MAP_SH 12 +#define __F1_PORT_MAP(_v) ((_v) << __F1_PORT_MAP_SH) +#define __F1_VM_MODE 0x00000800 +#define __F1_INTX_STATUS_MK 0x00000700 +#define __F1_INTX_STATUS_SH 8 +#define __F1_INTX_STATUS(_v) ((_v) << __F1_INTX_STATUS_SH) +#define __F0_FUNCTION_ACTIVE 0x00000080 +#define __F0_FUNCTION_MODE 0x00000040 +#define __F0_PORT_MAP_MK 0x00000030 +#define __F0_PORT_MAP_SH 4 +#define __F0_PORT_MAP(_v) ((_v) << __F0_PORT_MAP_SH) +#define __F0_VM_MODE 0x00000008 +#define __F0_INTX_STATUS 0x00000007 +enum { + __F0_INTX_STATUS_MSIX = 0x0, + __F0_INTX_STATUS_INTA = 0x1, + __F0_INTX_STATUS_INTB = 0x2, + __F0_INTX_STATUS_INTC = 0x3, + __F0_INTX_STATUS_INTD = 0x4, +}; +#define OP_MODE 0x0001460c +#define __APP_ETH_CLK_LOWSPEED 0x00000004 +#define __GLOBAL_CORECLK_HALFSPEED 0x00000002 +#define __GLOBAL_FCOE_MODE 0x00000001 +#define HOST_SEM4_REG 0x00014610 +#define HOST_SEM5_REG 0x00014614 +#define HOST_SEM6_REG 0x00014618 +#define HOST_SEM7_REG 0x0001461c +#define HOST_SEM4_INFO_REG 0x00014620 +#define HOST_SEM5_INFO_REG 0x00014624 +#define HOST_SEM6_INFO_REG 0x00014628 +#define HOST_SEM7_INFO_REG 0x0001462c +#define HOSTFN0_LPU0_MBOX0_CMD_STAT 0x00019000 +#define __HOSTFN0_LPU0_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN0_LPU0_MBOX0_INFO_SH 1 +#define __HOSTFN0_LPU0_MBOX0_INFO(_v) ((_v) << __HOSTFN0_LPU0_MBOX0_INFO_SH) +#define __HOSTFN0_LPU0_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN0_LPU1_MBOX0_CMD_STAT 0x00019004 +#define __HOSTFN0_LPU1_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN0_LPU1_MBOX0_INFO_SH 1 +#define __HOSTFN0_LPU1_MBOX0_INFO(_v) ((_v) << __HOSTFN0_LPU1_MBOX0_INFO_SH) +#define __HOSTFN0_LPU1_MBOX0_CMD_STATUS 0x00000001 +#define LPU0_HOSTFN0_MBOX0_CMD_STAT 0x00019008 +#define __LPU0_HOSTFN0_MBOX0_INFO_MK 0xfffffffe +#define __LPU0_HOSTFN0_MBOX0_INFO_SH 1 +#define __LPU0_HOSTFN0_MBOX0_INFO(_v) ((_v) << __LPU0_HOSTFN0_MBOX0_INFO_SH) +#define __LPU0_HOSTFN0_MBOX0_CMD_STATUS 0x00000001 +#define LPU1_HOSTFN0_MBOX0_CMD_STAT 0x0001900c +#define __LPU1_HOSTFN0_MBOX0_INFO_MK 0xfffffffe +#define __LPU1_HOSTFN0_MBOX0_INFO_SH 1 +#define __LPU1_HOSTFN0_MBOX0_INFO(_v) ((_v) << __LPU1_HOSTFN0_MBOX0_INFO_SH) +#define __LPU1_HOSTFN0_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN1_LPU0_MBOX0_CMD_STAT 0x00019010 +#define __HOSTFN1_LPU0_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN1_LPU0_MBOX0_INFO_SH 1 +#define __HOSTFN1_LPU0_MBOX0_INFO(_v) ((_v) << __HOSTFN1_LPU0_MBOX0_INFO_SH) +#define __HOSTFN1_LPU0_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN1_LPU1_MBOX0_CMD_STAT 0x00019014 +#define __HOSTFN1_LPU1_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN1_LPU1_MBOX0_INFO_SH 1 +#define __HOSTFN1_LPU1_MBOX0_INFO(_v) ((_v) << __HOSTFN1_LPU1_MBOX0_INFO_SH) +#define __HOSTFN1_LPU1_MBOX0_CMD_STATUS 0x00000001 +#define LPU0_HOSTFN1_MBOX0_CMD_STAT 0x00019018 +#define __LPU0_HOSTFN1_MBOX0_INFO_MK 0xfffffffe +#define __LPU0_HOSTFN1_MBOX0_INFO_SH 1 +#define __LPU0_HOSTFN1_MBOX0_INFO(_v) ((_v) << __LPU0_HOSTFN1_MBOX0_INFO_SH) +#define __LPU0_HOSTFN1_MBOX0_CMD_STATUS 0x00000001 +#define LPU1_HOSTFN1_MBOX0_CMD_STAT 0x0001901c +#define __LPU1_HOSTFN1_MBOX0_INFO_MK 0xfffffffe +#define __LPU1_HOSTFN1_MBOX0_INFO_SH 1 +#define __LPU1_HOSTFN1_MBOX0_INFO(_v) ((_v) << __LPU1_HOSTFN1_MBOX0_INFO_SH) +#define __LPU1_HOSTFN1_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN2_LPU0_MBOX0_CMD_STAT 0x00019150 +#define __HOSTFN2_LPU0_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN2_LPU0_MBOX0_INFO_SH 1 +#define __HOSTFN2_LPU0_MBOX0_INFO(_v) ((_v) << __HOSTFN2_LPU0_MBOX0_INFO_SH) +#define __HOSTFN2_LPU0_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN2_LPU1_MBOX0_CMD_STAT 0x00019154 +#define __HOSTFN2_LPU1_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN2_LPU1_MBOX0_INFO_SH 1 +#define __HOSTFN2_LPU1_MBOX0_INFO(_v) ((_v) << __HOSTFN2_LPU1_MBOX0_INFO_SH) +#define __HOSTFN2_LPU1_MBOX0BOX0_CMD_STATUS 0x00000001 +#define LPU0_HOSTFN2_MBOX0_CMD_STAT 0x00019158 +#define __LPU0_HOSTFN2_MBOX0_INFO_MK 0xfffffffe +#define __LPU0_HOSTFN2_MBOX0_INFO_SH 1 +#define __LPU0_HOSTFN2_MBOX0_INFO(_v) ((_v) << __LPU0_HOSTFN2_MBOX0_INFO_SH) +#define __LPU0_HOSTFN2_MBOX0_CMD_STATUS 0x00000001 +#define LPU1_HOSTFN2_MBOX0_CMD_STAT 0x0001915c +#define __LPU1_HOSTFN2_MBOX0_INFO_MK 0xfffffffe +#define __LPU1_HOSTFN2_MBOX0_INFO_SH 1 +#define __LPU1_HOSTFN2_MBOX0_INFO(_v) ((_v) << __LPU1_HOSTFN2_MBOX0_INFO_SH) +#define __LPU1_HOSTFN2_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN3_LPU0_MBOX0_CMD_STAT 0x00019160 +#define __HOSTFN3_LPU0_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN3_LPU0_MBOX0_INFO_SH 1 +#define __HOSTFN3_LPU0_MBOX0_INFO(_v) ((_v) << __HOSTFN3_LPU0_MBOX0_INFO_SH) +#define __HOSTFN3_LPU0_MBOX0_CMD_STATUS 0x00000001 +#define HOSTFN3_LPU1_MBOX0_CMD_STAT 0x00019164 +#define __HOSTFN3_LPU1_MBOX0_INFO_MK 0xfffffffe +#define __HOSTFN3_LPU1_MBOX0_INFO_SH 1 +#define __HOSTFN3_LPU1_MBOX0_INFO(_v) ((_v) << __HOSTFN3_LPU1_MBOX0_INFO_SH) +#define __HOSTFN3_LPU1_MBOX0_CMD_STATUS 0x00000001 +#define LPU0_HOSTFN3_MBOX0_CMD_STAT 0x00019168 +#define __LPU0_HOSTFN3_MBOX0_INFO_MK 0xfffffffe +#define __LPU0_HOSTFN3_MBOX0_INFO_SH 1 +#define __LPU0_HOSTFN3_MBOX0_INFO(_v) ((_v) << __LPU0_HOSTFN3_MBOX0_INFO_SH) +#define __LPU0_HOSTFN3_MBOX0_CMD_STATUS 0x00000001 +#define LPU1_HOSTFN3_MBOX0_CMD_STAT 0x0001916c +#define __LPU1_HOSTFN3_MBOX0_INFO_MK 0xfffffffe +#define __LPU1_HOSTFN3_MBOX0_INFO_SH 1 +#define __LPU1_HOSTFN3_MBOX0_INFO(_v) ((_v) << __LPU1_HOSTFN3_MBOX0_INFO_SH) +#define __LPU1_HOSTFN3_MBOX0_CMD_STATUS 0x00000001 +#define FW_INIT_HALT_P0 0x000191ac +#define __FW_INIT_HALT_P 0x00000001 +#define FW_INIT_HALT_P1 0x000191bc +#define CPE_PI_PTR_Q0 0x00038000 +#define __CPE_PI_UNUSED_MK 0xffff0000 +#define __CPE_PI_UNUSED_SH 16 +#define __CPE_PI_UNUSED(_v) ((_v) << __CPE_PI_UNUSED_SH) +#define __CPE_PI_PTR 0x0000ffff +#define CPE_PI_PTR_Q1 0x00038040 +#define CPE_CI_PTR_Q0 0x00038004 +#define __CPE_CI_UNUSED_MK 0xffff0000 +#define __CPE_CI_UNUSED_SH 16 +#define __CPE_CI_UNUSED(_v) ((_v) << __CPE_CI_UNUSED_SH) +#define __CPE_CI_PTR 0x0000ffff +#define CPE_CI_PTR_Q1 0x00038044 +#define CPE_DEPTH_Q0 0x00038008 +#define __CPE_DEPTH_UNUSED_MK 0xf8000000 +#define __CPE_DEPTH_UNUSED_SH 27 +#define __CPE_DEPTH_UNUSED(_v) ((_v) << __CPE_DEPTH_UNUSED_SH) +#define __CPE_MSIX_VEC_INDEX_MK 0x07ff0000 +#define __CPE_MSIX_VEC_INDEX_SH 16 +#define __CPE_MSIX_VEC_INDEX(_v) ((_v) << __CPE_MSIX_VEC_INDEX_SH) +#define __CPE_DEPTH 0x0000ffff +#define CPE_DEPTH_Q1 0x00038048 +#define CPE_QCTRL_Q0 0x0003800c +#define __CPE_CTRL_UNUSED30_MK 0xfc000000 +#define __CPE_CTRL_UNUSED30_SH 26 +#define __CPE_CTRL_UNUSED30(_v) ((_v) << __CPE_CTRL_UNUSED30_SH) +#define __CPE_FUNC_INT_CTRL_MK 0x03000000 +#define __CPE_FUNC_INT_CTRL_SH 24 +#define __CPE_FUNC_INT_CTRL(_v) ((_v) << __CPE_FUNC_INT_CTRL_SH) +enum { + __CPE_FUNC_INT_CTRL_DISABLE = 0x0, + __CPE_FUNC_INT_CTRL_F2NF = 0x1, + __CPE_FUNC_INT_CTRL_3QUART = 0x2, + __CPE_FUNC_INT_CTRL_HALF = 0x3, +}; +#define __CPE_CTRL_UNUSED20_MK 0x00f00000 +#define __CPE_CTRL_UNUSED20_SH 20 +#define __CPE_CTRL_UNUSED20(_v) ((_v) << __CPE_CTRL_UNUSED20_SH) +#define __CPE_SCI_TH_MK 0x000f0000 +#define __CPE_SCI_TH_SH 16 +#define __CPE_SCI_TH(_v) ((_v) << __CPE_SCI_TH_SH) +#define __CPE_CTRL_UNUSED10_MK 0x0000c000 +#define __CPE_CTRL_UNUSED10_SH 14 +#define __CPE_CTRL_UNUSED10(_v) ((_v) << __CPE_CTRL_UNUSED10_SH) +#define __CPE_ACK_PENDING 0x00002000 +#define __CPE_CTRL_UNUSED40_MK 0x00001c00 +#define __CPE_CTRL_UNUSED40_SH 10 +#define __CPE_CTRL_UNUSED40(_v) ((_v) << __CPE_CTRL_UNUSED40_SH) +#define __CPE_PCIEID_MK 0x00000300 +#define __CPE_PCIEID_SH 8 +#define __CPE_PCIEID(_v) ((_v) << __CPE_PCIEID_SH) +#define __CPE_CTRL_UNUSED00_MK 0x000000fe +#define __CPE_CTRL_UNUSED00_SH 1 +#define __CPE_CTRL_UNUSED00(_v) ((_v) << __CPE_CTRL_UNUSED00_SH) +#define __CPE_ESIZE 0x00000001 +#define CPE_QCTRL_Q1 0x0003804c +#define __CPE_CTRL_UNUSED31_MK 0xfc000000 +#define __CPE_CTRL_UNUSED31_SH 26 +#define __CPE_CTRL_UNUSED31(_v) ((_v) << __CPE_CTRL_UNUSED31_SH) +#define __CPE_CTRL_UNUSED21_MK 0x00f00000 +#define __CPE_CTRL_UNUSED21_SH 20 +#define __CPE_CTRL_UNUSED21(_v) ((_v) << __CPE_CTRL_UNUSED21_SH) +#define __CPE_CTRL_UNUSED11_MK 0x0000c000 +#define __CPE_CTRL_UNUSED11_SH 14 +#define __CPE_CTRL_UNUSED11(_v) ((_v) << __CPE_CTRL_UNUSED11_SH) +#define __CPE_CTRL_UNUSED41_MK 0x00001c00 +#define __CPE_CTRL_UNUSED41_SH 10 +#define __CPE_CTRL_UNUSED41(_v) ((_v) << __CPE_CTRL_UNUSED41_SH) +#define __CPE_CTRL_UNUSED01_MK 0x000000fe +#define __CPE_CTRL_UNUSED01_SH 1 +#define __CPE_CTRL_UNUSED01(_v) ((_v) << __CPE_CTRL_UNUSED01_SH) +#define RME_PI_PTR_Q0 0x00038020 +#define __LATENCY_TIME_STAMP_MK 0xffff0000 +#define __LATENCY_TIME_STAMP_SH 16 +#define __LATENCY_TIME_STAMP(_v) ((_v) << __LATENCY_TIME_STAMP_SH) +#define __RME_PI_PTR 0x0000ffff +#define RME_PI_PTR_Q1 0x00038060 +#define RME_CI_PTR_Q0 0x00038024 +#define __DELAY_TIME_STAMP_MK 0xffff0000 +#define __DELAY_TIME_STAMP_SH 16 +#define __DELAY_TIME_STAMP(_v) ((_v) << __DELAY_TIME_STAMP_SH) +#define __RME_CI_PTR 0x0000ffff +#define RME_CI_PTR_Q1 0x00038064 +#define RME_DEPTH_Q0 0x00038028 +#define __RME_DEPTH_UNUSED_MK 0xf8000000 +#define __RME_DEPTH_UNUSED_SH 27 +#define __RME_DEPTH_UNUSED(_v) ((_v) << __RME_DEPTH_UNUSED_SH) +#define __RME_MSIX_VEC_INDEX_MK 0x07ff0000 +#define __RME_MSIX_VEC_INDEX_SH 16 +#define __RME_MSIX_VEC_INDEX(_v) ((_v) << __RME_MSIX_VEC_INDEX_SH) +#define __RME_DEPTH 0x0000ffff +#define RME_DEPTH_Q1 0x00038068 +#define RME_QCTRL_Q0 0x0003802c +#define __RME_INT_LATENCY_TIMER_MK 0xff000000 +#define __RME_INT_LATENCY_TIMER_SH 24 +#define __RME_INT_LATENCY_TIMER(_v) ((_v) << __RME_INT_LATENCY_TIMER_SH) +#define __RME_INT_DELAY_TIMER_MK 0x00ff0000 +#define __RME_INT_DELAY_TIMER_SH 16 +#define __RME_INT_DELAY_TIMER(_v) ((_v) << __RME_INT_DELAY_TIMER_SH) +#define __RME_INT_DELAY_DISABLE 0x00008000 +#define __RME_DLY_DELAY_DISABLE 0x00004000 +#define __RME_ACK_PENDING 0x00002000 +#define __RME_FULL_INTERRUPT_DISABLE 0x00001000 +#define __RME_CTRL_UNUSED10_MK 0x00000c00 +#define __RME_CTRL_UNUSED10_SH 10 +#define __RME_CTRL_UNUSED10(_v) ((_v) << __RME_CTRL_UNUSED10_SH) +#define __RME_PCIEID_MK 0x00000300 +#define __RME_PCIEID_SH 8 +#define __RME_PCIEID(_v) ((_v) << __RME_PCIEID_SH) +#define __RME_CTRL_UNUSED00_MK 0x000000fe +#define __RME_CTRL_UNUSED00_SH 1 +#define __RME_CTRL_UNUSED00(_v) ((_v) << __RME_CTRL_UNUSED00_SH) +#define __RME_ESIZE 0x00000001 +#define RME_QCTRL_Q1 0x0003806c +#define __RME_CTRL_UNUSED11_MK 0x00000c00 +#define __RME_CTRL_UNUSED11_SH 10 +#define __RME_CTRL_UNUSED11(_v) ((_v) << __RME_CTRL_UNUSED11_SH) +#define __RME_CTRL_UNUSED01_MK 0x000000fe +#define __RME_CTRL_UNUSED01_SH 1 +#define __RME_CTRL_UNUSED01(_v) ((_v) << __RME_CTRL_UNUSED01_SH) +#define PSS_CTL_REG 0x00018800 +#define __PSS_I2C_CLK_DIV_MK 0x007f0000 +#define __PSS_I2C_CLK_DIV_SH 16 +#define __PSS_I2C_CLK_DIV(_v) ((_v) << __PSS_I2C_CLK_DIV_SH) +#define __PSS_LMEM_INIT_DONE 0x00001000 +#define __PSS_LMEM_RESET 0x00000200 +#define __PSS_LMEM_INIT_EN 0x00000100 +#define __PSS_LPU1_RESET 0x00000002 +#define __PSS_LPU0_RESET 0x00000001 +#define PSS_ERR_STATUS_REG 0x00018810 +#define __PSS_LPU1_TCM_READ_ERR 0x00200000 +#define __PSS_LPU0_TCM_READ_ERR 0x00100000 +#define __PSS_LMEM5_CORR_ERR 0x00080000 +#define __PSS_LMEM4_CORR_ERR 0x00040000 +#define __PSS_LMEM3_CORR_ERR 0x00020000 +#define __PSS_LMEM2_CORR_ERR 0x00010000 +#define __PSS_LMEM1_CORR_ERR 0x00008000 +#define __PSS_LMEM0_CORR_ERR 0x00004000 +#define __PSS_LMEM5_UNCORR_ERR 0x00002000 +#define __PSS_LMEM4_UNCORR_ERR 0x00001000 +#define __PSS_LMEM3_UNCORR_ERR 0x00000800 +#define __PSS_LMEM2_UNCORR_ERR 0x00000400 +#define __PSS_LMEM1_UNCORR_ERR 0x00000200 +#define __PSS_LMEM0_UNCORR_ERR 0x00000100 +#define __PSS_BAL_PERR 0x00000080 +#define __PSS_DIP_IF_ERR 0x00000040 +#define __PSS_IOH_IF_ERR 0x00000020 +#define __PSS_TDS_IF_ERR 0x00000010 +#define __PSS_RDS_IF_ERR 0x00000008 +#define __PSS_SGM_IF_ERR 0x00000004 +#define __PSS_LPU1_RAM_ERR 0x00000002 +#define __PSS_LPU0_RAM_ERR 0x00000001 +#define ERR_SET_REG 0x00018818 +#define __PSS_ERR_STATUS_SET 0x003fffff +#define PMM_1T_RESET_REG_P0 0x0002381c +#define __PMM_1T_RESET_P 0x00000001 +#define PMM_1T_RESET_REG_P1 0x00023c1c +#define HQM_QSET0_RXQ_DRBL_P0 0x00038000 +#define __RXQ0_ADD_VECTORS_P 0x80000000 +#define __RXQ0_STOP_P 0x40000000 +#define __RXQ0_PRD_PTR_P 0x0000ffff +#define HQM_QSET1_RXQ_DRBL_P0 0x00038080 +#define __RXQ1_ADD_VECTORS_P 0x80000000 +#define __RXQ1_STOP_P 0x40000000 +#define __RXQ1_PRD_PTR_P 0x0000ffff +#define HQM_QSET0_RXQ_DRBL_P1 0x0003c000 +#define HQM_QSET1_RXQ_DRBL_P1 0x0003c080 +#define HQM_QSET0_TXQ_DRBL_P0 0x00038020 +#define __TXQ0_ADD_VECTORS_P 0x80000000 +#define __TXQ0_STOP_P 0x40000000 +#define __TXQ0_PRD_PTR_P 0x0000ffff +#define HQM_QSET1_TXQ_DRBL_P0 0x000380a0 +#define __TXQ1_ADD_VECTORS_P 0x80000000 +#define __TXQ1_STOP_P 0x40000000 +#define __TXQ1_PRD_PTR_P 0x0000ffff +#define HQM_QSET0_TXQ_DRBL_P1 0x0003c020 +#define HQM_QSET1_TXQ_DRBL_P1 0x0003c0a0 +#define HQM_QSET0_IB_DRBL_1_P0 0x00038040 +#define __IB1_0_ACK_P 0x80000000 +#define __IB1_0_DISABLE_P 0x40000000 +#define __IB1_0_COALESCING_CFG_P_MK 0x00ff0000 +#define __IB1_0_COALESCING_CFG_P_SH 16 +#define __IB1_0_COALESCING_CFG_P(_v) ((_v) << __IB1_0_COALESCING_CFG_P_SH) +#define __IB1_0_NUM_OF_ACKED_EVENTS_P 0x0000ffff +#define HQM_QSET1_IB_DRBL_1_P0 0x000380c0 +#define __IB1_1_ACK_P 0x80000000 +#define __IB1_1_DISABLE_P 0x40000000 +#define __IB1_1_COALESCING_CFG_P_MK 0x00ff0000 +#define __IB1_1_COALESCING_CFG_P_SH 16 +#define __IB1_1_COALESCING_CFG_P(_v) ((_v) << __IB1_1_COALESCING_CFG_P_SH) +#define __IB1_1_NUM_OF_ACKED_EVENTS_P 0x0000ffff +#define HQM_QSET0_IB_DRBL_1_P1 0x0003c040 +#define HQM_QSET1_IB_DRBL_1_P1 0x0003c0c0 +#define HQM_QSET0_IB_DRBL_2_P0 0x00038060 +#define __IB2_0_ACK_P 0x80000000 +#define __IB2_0_DISABLE_P 0x40000000 +#define __IB2_0_COALESCING_CFG_P_MK 0x00ff0000 +#define __IB2_0_COALESCING_CFG_P_SH 16 +#define __IB2_0_COALESCING_CFG_P(_v) ((_v) << __IB2_0_COALESCING_CFG_P_SH) +#define __IB2_0_NUM_OF_ACKED_EVENTS_P 0x0000ffff +#define HQM_QSET1_IB_DRBL_2_P0 0x000380e0 +#define __IB2_1_ACK_P 0x80000000 +#define __IB2_1_DISABLE_P 0x40000000 +#define __IB2_1_COALESCING_CFG_P_MK 0x00ff0000 +#define __IB2_1_COALESCING_CFG_P_SH 16 +#define __IB2_1_COALESCING_CFG_P(_v) ((_v) << __IB2_1_COALESCING_CFG_P_SH) +#define __IB2_1_NUM_OF_ACKED_EVENTS_P 0x0000ffff +#define HQM_QSET0_IB_DRBL_2_P1 0x0003c060 +#define HQM_QSET1_IB_DRBL_2_P1 0x0003c0e0 + +/* + * These definitions are either in error/missing in spec. Its auto-generated + * from hard coded values in regparse.pl. + */ +#define __EMPHPOST_AT_4G_MK_FIX 0x0000001c +#define __EMPHPOST_AT_4G_SH_FIX 0x00000002 +#define __EMPHPRE_AT_4G_FIX 0x00000003 +#define __SFP_TXRATE_EN_FIX 0x00000100 +#define __SFP_RXRATE_EN_FIX 0x00000080 + +/* + * These register definitions are auto-generated from hard coded values + * in regparse.pl. + */ + +/* + * These register mapping definitions are auto-generated from mapping tables + * in regparse.pl. + */ +#define BFA_IOC0_HBEAT_REG HOST_SEM0_INFO_REG +#define BFA_IOC0_STATE_REG HOST_SEM1_INFO_REG +#define BFA_IOC1_HBEAT_REG HOST_SEM2_INFO_REG +#define BFA_IOC1_STATE_REG HOST_SEM3_INFO_REG +#define BFA_FW_USE_COUNT HOST_SEM4_INFO_REG + +#define CPE_DEPTH_Q(__n) \ + (CPE_DEPTH_Q0 + (__n) * (CPE_DEPTH_Q1 - CPE_DEPTH_Q0)) +#define CPE_QCTRL_Q(__n) \ + (CPE_QCTRL_Q0 + (__n) * (CPE_QCTRL_Q1 - CPE_QCTRL_Q0)) +#define CPE_PI_PTR_Q(__n) \ + (CPE_PI_PTR_Q0 + (__n) * (CPE_PI_PTR_Q1 - CPE_PI_PTR_Q0)) +#define CPE_CI_PTR_Q(__n) \ + (CPE_CI_PTR_Q0 + (__n) * (CPE_CI_PTR_Q1 - CPE_CI_PTR_Q0)) +#define RME_DEPTH_Q(__n) \ + (RME_DEPTH_Q0 + (__n) * (RME_DEPTH_Q1 - RME_DEPTH_Q0)) +#define RME_QCTRL_Q(__n) \ + (RME_QCTRL_Q0 + (__n) * (RME_QCTRL_Q1 - RME_QCTRL_Q0)) +#define RME_PI_PTR_Q(__n) \ + (RME_PI_PTR_Q0 + (__n) * (RME_PI_PTR_Q1 - RME_PI_PTR_Q0)) +#define RME_CI_PTR_Q(__n) \ + (RME_CI_PTR_Q0 + (__n) * (RME_CI_PTR_Q1 - RME_CI_PTR_Q0)) +#define HQM_QSET_RXQ_DRBL_P0(__n) (HQM_QSET0_RXQ_DRBL_P0 + (__n) \ + * (HQM_QSET1_RXQ_DRBL_P0 - HQM_QSET0_RXQ_DRBL_P0)) +#define HQM_QSET_TXQ_DRBL_P0(__n) (HQM_QSET0_TXQ_DRBL_P0 + (__n) \ + * (HQM_QSET1_TXQ_DRBL_P0 - HQM_QSET0_TXQ_DRBL_P0)) +#define HQM_QSET_IB_DRBL_1_P0(__n) (HQM_QSET0_IB_DRBL_1_P0 + (__n) \ + * (HQM_QSET1_IB_DRBL_1_P0 - HQM_QSET0_IB_DRBL_1_P0)) +#define HQM_QSET_IB_DRBL_2_P0(__n) (HQM_QSET0_IB_DRBL_2_P0 + (__n) \ + * (HQM_QSET1_IB_DRBL_2_P0 - HQM_QSET0_IB_DRBL_2_P0)) +#define HQM_QSET_RXQ_DRBL_P1(__n) (HQM_QSET0_RXQ_DRBL_P1 + (__n) \ + * (HQM_QSET1_RXQ_DRBL_P1 - HQM_QSET0_RXQ_DRBL_P1)) +#define HQM_QSET_TXQ_DRBL_P1(__n) (HQM_QSET0_TXQ_DRBL_P1 + (__n) \ + * (HQM_QSET1_TXQ_DRBL_P1 - HQM_QSET0_TXQ_DRBL_P1)) +#define HQM_QSET_IB_DRBL_1_P1(__n) (HQM_QSET0_IB_DRBL_1_P1 + (__n) \ + * (HQM_QSET1_IB_DRBL_1_P1 - HQM_QSET0_IB_DRBL_1_P1)) +#define HQM_QSET_IB_DRBL_2_P1(__n) (HQM_QSET0_IB_DRBL_2_P1 + (__n) \ + * (HQM_QSET1_IB_DRBL_2_P1 - HQM_QSET0_IB_DRBL_2_P1)) + +#define CPE_Q_NUM(__fn, __q) (((__fn) << 2) + (__q)) +#define RME_Q_NUM(__fn, __q) (((__fn) << 2) + (__q)) +#define CPE_Q_MASK(__q) ((__q) & 0x3) +#define RME_Q_MASK(__q) ((__q) & 0x3) + +/* + * PCI MSI-X vector defines + */ +enum { + BFA_MSIX_CPE_Q0 = 0, + BFA_MSIX_CPE_Q1 = 1, + BFA_MSIX_CPE_Q2 = 2, + BFA_MSIX_CPE_Q3 = 3, + BFA_MSIX_RME_Q0 = 4, + BFA_MSIX_RME_Q1 = 5, + BFA_MSIX_RME_Q2 = 6, + BFA_MSIX_RME_Q3 = 7, + BFA_MSIX_LPU_ERR = 8, + BFA_MSIX_CT_MAX = 9, +}; + +/* + * And corresponding host interrupt status bit field defines + */ +#define __HFN_INT_CPE_Q0 0x00000001U +#define __HFN_INT_CPE_Q1 0x00000002U +#define __HFN_INT_CPE_Q2 0x00000004U +#define __HFN_INT_CPE_Q3 0x00000008U +#define __HFN_INT_CPE_Q4 0x00000010U +#define __HFN_INT_CPE_Q5 0x00000020U +#define __HFN_INT_CPE_Q6 0x00000040U +#define __HFN_INT_CPE_Q7 0x00000080U +#define __HFN_INT_RME_Q0 0x00000100U +#define __HFN_INT_RME_Q1 0x00000200U +#define __HFN_INT_RME_Q2 0x00000400U +#define __HFN_INT_RME_Q3 0x00000800U +#define __HFN_INT_RME_Q4 0x00001000U +#define __HFN_INT_RME_Q5 0x00002000U +#define __HFN_INT_RME_Q6 0x00004000U +#define __HFN_INT_RME_Q7 0x00008000U +#define __HFN_INT_ERR_EMC 0x00010000U +#define __HFN_INT_ERR_LPU0 0x00020000U +#define __HFN_INT_ERR_LPU1 0x00040000U +#define __HFN_INT_ERR_PSS 0x00080000U +#define __HFN_INT_MBOX_LPU0 0x00100000U +#define __HFN_INT_MBOX_LPU1 0x00200000U +#define __HFN_INT_MBOX1_LPU0 0x00400000U +#define __HFN_INT_MBOX1_LPU1 0x00800000U +#define __HFN_INT_LL_HALT 0x01000000U +#define __HFN_INT_CPE_MASK 0x000000ffU +#define __HFN_INT_RME_MASK 0x0000ff00U + +/* + * catapult memory map. + */ +#define LL_PGN_HQM0 0x0096 +#define LL_PGN_HQM1 0x0097 +#define PSS_SMEM_PAGE_START 0x8000 +#define PSS_SMEM_PGNUM(_pg0, _ma) ((_pg0) + ((_ma) >> 15)) +#define PSS_SMEM_PGOFF(_ma) ((_ma) & 0x7fff) + +/* + * End of catapult memory map + */ + +#endif /* __BFI_CTREG_H__ */ diff --git a/drivers/net/bna/bfi_ll.h b/drivers/net/bna/bfi_ll.h new file mode 100644 index 000000000000..bee4d054066a --- /dev/null +++ b/drivers/net/bna/bfi_ll.h @@ -0,0 +1,438 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#ifndef __BFI_LL_H__ +#define __BFI_LL_H__ + +#include "bfi.h" + +#pragma pack(1) + +/** + * @brief + * "enums" for all LL mailbox messages other than IOC + */ +enum { + BFI_LL_H2I_MAC_UCAST_SET_REQ = 1, + BFI_LL_H2I_MAC_UCAST_ADD_REQ = 2, + BFI_LL_H2I_MAC_UCAST_DEL_REQ = 3, + + BFI_LL_H2I_MAC_MCAST_ADD_REQ = 4, + BFI_LL_H2I_MAC_MCAST_DEL_REQ = 5, + BFI_LL_H2I_MAC_MCAST_FILTER_REQ = 6, + BFI_LL_H2I_MAC_MCAST_DEL_ALL_REQ = 7, + + BFI_LL_H2I_PORT_ADMIN_REQ = 8, + BFI_LL_H2I_STATS_GET_REQ = 9, + BFI_LL_H2I_STATS_CLEAR_REQ = 10, + + BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ = 11, + BFI_LL_H2I_RXF_DEFAULT_SET_REQ = 12, + + BFI_LL_H2I_TXQ_STOP_REQ = 13, + BFI_LL_H2I_RXQ_STOP_REQ = 14, + + BFI_LL_H2I_DIAG_LOOPBACK_REQ = 15, + + BFI_LL_H2I_SET_PAUSE_REQ = 16, + BFI_LL_H2I_MTU_INFO_REQ = 17, + + BFI_LL_H2I_RX_REQ = 18, +} ; + +enum { + BFI_LL_I2H_MAC_UCAST_SET_RSP = BFA_I2HM(1), + BFI_LL_I2H_MAC_UCAST_ADD_RSP = BFA_I2HM(2), + BFI_LL_I2H_MAC_UCAST_DEL_RSP = BFA_I2HM(3), + + BFI_LL_I2H_MAC_MCAST_ADD_RSP = BFA_I2HM(4), + BFI_LL_I2H_MAC_MCAST_DEL_RSP = BFA_I2HM(5), + BFI_LL_I2H_MAC_MCAST_FILTER_RSP = BFA_I2HM(6), + BFI_LL_I2H_MAC_MCAST_DEL_ALL_RSP = BFA_I2HM(7), + + BFI_LL_I2H_PORT_ADMIN_RSP = BFA_I2HM(8), + BFI_LL_I2H_STATS_GET_RSP = BFA_I2HM(9), + BFI_LL_I2H_STATS_CLEAR_RSP = BFA_I2HM(10), + + BFI_LL_I2H_RXF_PROMISCUOUS_SET_RSP = BFA_I2HM(11), + BFI_LL_I2H_RXF_DEFAULT_SET_RSP = BFA_I2HM(12), + + BFI_LL_I2H_TXQ_STOP_RSP = BFA_I2HM(13), + BFI_LL_I2H_RXQ_STOP_RSP = BFA_I2HM(14), + + BFI_LL_I2H_DIAG_LOOPBACK_RSP = BFA_I2HM(15), + + BFI_LL_I2H_SET_PAUSE_RSP = BFA_I2HM(16), + + BFI_LL_I2H_MTU_INFO_RSP = BFA_I2HM(17), + BFI_LL_I2H_RX_RSP = BFA_I2HM(18), + + BFI_LL_I2H_LINK_DOWN_AEN = BFA_I2HM(19), + BFI_LL_I2H_LINK_UP_AEN = BFA_I2HM(20), + + BFI_LL_I2H_PORT_ENABLE_AEN = BFA_I2HM(21), + BFI_LL_I2H_PORT_DISABLE_AEN = BFA_I2HM(22), +} ; + +/** + * @brief bfi_ll_mac_addr_req is used by: + * BFI_LL_H2I_MAC_UCAST_SET_REQ + * BFI_LL_H2I_MAC_UCAST_ADD_REQ + * BFI_LL_H2I_MAC_UCAST_DEL_REQ + * BFI_LL_H2I_MAC_MCAST_ADD_REQ + * BFI_LL_H2I_MAC_MCAST_DEL_REQ + */ +struct bfi_ll_mac_addr_req { + struct bfi_mhdr mh; /*!< common msg header */ + u8 rxf_id; + u8 rsvd1[3]; + mac_t mac_addr; + u8 rsvd2[2]; +}; + +/** + * @brief bfi_ll_mcast_filter_req is used by: + * BFI_LL_H2I_MAC_MCAST_FILTER_REQ + */ +struct bfi_ll_mcast_filter_req { + struct bfi_mhdr mh; /*!< common msg header */ + u8 rxf_id; + u8 enable; + u8 rsvd[2]; +}; + +/** + * @brief bfi_ll_mcast_del_all is used by: + * BFI_LL_H2I_MAC_MCAST_DEL_ALL_REQ + */ +struct bfi_ll_mcast_del_all_req { + struct bfi_mhdr mh; /*!< common msg header */ + u8 rxf_id; + u8 rsvd[3]; +}; + +/** + * @brief bfi_ll_q_stop_req is used by: + * BFI_LL_H2I_TXQ_STOP_REQ + * BFI_LL_H2I_RXQ_STOP_REQ + */ +struct bfi_ll_q_stop_req { + struct bfi_mhdr mh; /*!< common msg header */ + u32 q_id_mask[2]; /* !< bit-mask for queue ids */ +}; + +/** + * @brief bfi_ll_stats_req is used by: + * BFI_LL_I2H_STATS_GET_REQ + * BFI_LL_I2H_STATS_CLEAR_REQ + */ +struct bfi_ll_stats_req { + struct bfi_mhdr mh; /*!< common msg header */ + u16 stats_mask; /* !< bit-mask for non-function statistics */ + u8 rsvd[2]; + u32 rxf_id_mask[2]; /* !< bit-mask for RxF Statistics */ + u32 txf_id_mask[2]; /* !< bit-mask for TxF Statistics */ + union bfi_addr_u host_buffer; /* !< where statistics are returned */ +}; + +/** + * @brief defines for "stats_mask" above. + */ +#define BFI_LL_STATS_MAC (1 << 0) /* !< MAC Statistics */ +#define BFI_LL_STATS_BPC (1 << 1) /* !< Pause Stats from BPC */ +#define BFI_LL_STATS_RAD (1 << 2) /* !< Rx Admission Statistics */ +#define BFI_LL_STATS_RX_FC (1 << 3) /* !< Rx FC Stats from RxA */ +#define BFI_LL_STATS_TX_FC (1 << 4) /* !< Tx FC Stats from TxA */ + +#define BFI_LL_STATS_ALL 0x1f + +/** + * @brief bfi_ll_port_admin_req + */ +struct bfi_ll_port_admin_req { + struct bfi_mhdr mh; /*!< common msg header */ + u8 up; + u8 rsvd[3]; +}; + +/** + * @brief bfi_ll_rxf_req is used by: + * BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ + * BFI_LL_H2I_RXF_DEFAULT_SET_REQ + */ +struct bfi_ll_rxf_req { + struct bfi_mhdr mh; /*!< common msg header */ + u8 rxf_id; + u8 enable; + u8 rsvd[2]; +}; + +/** + * @brief bfi_ll_rxf_multi_req is used by: + * BFI_LL_H2I_RX_REQ + */ +struct bfi_ll_rxf_multi_req { + struct bfi_mhdr mh; /*!< common msg header */ + u32 rxf_id_mask[2]; + u8 enable; + u8 rsvd[3]; +}; + +/** + * @brief enum for Loopback opmodes + */ +enum { + BFI_LL_DIAG_LB_OPMODE_EXT = 0, + BFI_LL_DIAG_LB_OPMODE_CBL = 1, +}; + +/** + * @brief bfi_ll_set_pause_req is used by: + * BFI_LL_H2I_SET_PAUSE_REQ + */ +struct bfi_ll_set_pause_req { + struct bfi_mhdr mh; + u8 tx_pause; /* 1 = enable, 0 = disable */ + u8 rx_pause; /* 1 = enable, 0 = disable */ + u8 rsvd[2]; +}; + +/** + * @brief bfi_ll_mtu_info_req is used by: + * BFI_LL_H2I_MTU_INFO_REQ + */ +struct bfi_ll_mtu_info_req { + struct bfi_mhdr mh; + u16 mtu; + u8 rsvd[2]; +}; + +/** + * @brief + * Response header format used by all responses + * For both responses and asynchronous notifications + */ +struct bfi_ll_rsp { + struct bfi_mhdr mh; /*!< common msg header */ + u8 error; + u8 rsvd[3]; +}; + +/** + * @brief bfi_ll_cee_aen is used by: + * BFI_LL_I2H_LINK_DOWN_AEN + * BFI_LL_I2H_LINK_UP_AEN + */ +struct bfi_ll_aen { + struct bfi_mhdr mh; /*!< common msg header */ + u32 reason; + u8 cee_linkup; + u8 prio_map; /*!< LL priority bit-map */ + u8 rsvd[2]; +}; + +/** + * @brief + * The following error codes can be returned + * by the mbox commands + */ +enum { + BFI_LL_CMD_OK = 0, + BFI_LL_CMD_FAIL = 1, + BFI_LL_CMD_DUP_ENTRY = 2, /* !< Duplicate entry in CAM */ + BFI_LL_CMD_CAM_FULL = 3, /* !< CAM is full */ + BFI_LL_CMD_NOT_OWNER = 4, /* !< Not permitted, b'cos not owner */ + BFI_LL_CMD_NOT_EXEC = 5, /* !< Was not sent to f/w at all */ + BFI_LL_CMD_WAITING = 6, /* !< Waiting for completion (VMware) */ + BFI_LL_CMD_PORT_DISABLED = 7, /* !< port in disabled state */ +} ; + +/* Statistics */ +#define BFI_LL_TXF_ID_MAX 64 +#define BFI_LL_RXF_ID_MAX 64 + +/* TxF Frame Statistics */ +struct bfi_ll_stats_txf { + u64 ucast_octets; + u64 ucast; + u64 ucast_vlan; + + u64 mcast_octets; + u64 mcast; + u64 mcast_vlan; + + u64 bcast_octets; + u64 bcast; + u64 bcast_vlan; + + u64 errors; + u64 filter_vlan; /* frames filtered due to VLAN */ + u64 filter_mac_sa; /* frames filtered due to SA check */ +}; + +/* RxF Frame Statistics */ +struct bfi_ll_stats_rxf { + u64 ucast_octets; + u64 ucast; + u64 ucast_vlan; + + u64 mcast_octets; + u64 mcast; + u64 mcast_vlan; + + u64 bcast_octets; + u64 bcast; + u64 bcast_vlan; + u64 frame_drops; +}; + +/* FC Tx Frame Statistics */ +struct bfi_ll_stats_fc_tx { + u64 txf_ucast_octets; + u64 txf_ucast; + u64 txf_ucast_vlan; + + u64 txf_mcast_octets; + u64 txf_mcast; + u64 txf_mcast_vlan; + + u64 txf_bcast_octets; + u64 txf_bcast; + u64 txf_bcast_vlan; + + u64 txf_parity_errors; + u64 txf_timeout; + u64 txf_fid_parity_errors; +}; + +/* FC Rx Frame Statistics */ +struct bfi_ll_stats_fc_rx { + u64 rxf_ucast_octets; + u64 rxf_ucast; + u64 rxf_ucast_vlan; + + u64 rxf_mcast_octets; + u64 rxf_mcast; + u64 rxf_mcast_vlan; + + u64 rxf_bcast_octets; + u64 rxf_bcast; + u64 rxf_bcast_vlan; +}; + +/* RAD Frame Statistics */ +struct bfi_ll_stats_rad { + u64 rx_frames; + u64 rx_octets; + u64 rx_vlan_frames; + + u64 rx_ucast; + u64 rx_ucast_octets; + u64 rx_ucast_vlan; + + u64 rx_mcast; + u64 rx_mcast_octets; + u64 rx_mcast_vlan; + + u64 rx_bcast; + u64 rx_bcast_octets; + u64 rx_bcast_vlan; + + u64 rx_drops; +}; + +/* BPC Tx Registers */ +struct bfi_ll_stats_bpc { + /* transmit stats */ + u64 tx_pause[8]; + u64 tx_zero_pause[8]; /*!< Pause cancellation */ + /*!timer_mod)) + +/* MBOX API for PORT, TX, RX */ +#define bna_mbox_qe_fill(_qe, _cmd, _cmd_len, _cbfn, _cbarg) \ +do { \ + memcpy(&((_qe)->cmd.msg[0]), (_cmd), (_cmd_len)); \ + (_qe)->cbfn = (_cbfn); \ + (_qe)->cbarg = (_cbarg); \ +} while (0) + +#define bna_is_small_rxq(rcb) ((rcb)->id == 1) + +#define BNA_MAC_IS_EQUAL(_mac1, _mac2) \ + (!memcmp((_mac1), (_mac2), sizeof(mac_t))) + +#define BNA_POWER_OF_2(x) (((x) & ((x) - 1)) == 0) + +#define BNA_TO_POWER_OF_2(x) \ +do { \ + int _shift = 0; \ + while ((x) && (x) != 1) { \ + (x) >>= 1; \ + _shift++; \ + } \ + (x) <<= _shift; \ +} while (0) + +#define BNA_TO_POWER_OF_2_HIGH(x) \ +do { \ + int n = 1; \ + while (n < (x)) \ + n <<= 1; \ + (x) = n; \ +} while (0) + +/* + * input : _addr-> os dma addr in host endian format, + * output : _bna_dma_addr-> pointer to hw dma addr + */ +#define BNA_SET_DMA_ADDR(_addr, _bna_dma_addr) \ +do { \ + u64 tmp_addr = \ + cpu_to_be64((u64)(_addr)); \ + (_bna_dma_addr)->msb = ((struct bna_dma_addr *)&tmp_addr)->msb; \ + (_bna_dma_addr)->lsb = ((struct bna_dma_addr *)&tmp_addr)->lsb; \ +} while (0) + +/* + * input : _bna_dma_addr-> pointer to hw dma addr + * output : _addr-> os dma addr in host endian format + */ +#define BNA_GET_DMA_ADDR(_bna_dma_addr, _addr) \ +do { \ + (_addr) = ((((u64)ntohl((_bna_dma_addr)->msb))) << 32) \ + | ((ntohl((_bna_dma_addr)->lsb) & 0xffffffff)); \ +} while (0) + +#define containing_rec(addr, type, field) \ + ((type *)((unsigned char *)(addr) - \ + (unsigned char *)(&((type *)0)->field))) + +#define BNA_TXQ_WI_NEEDED(_vectors) (((_vectors) + 3) >> 2) + +/* TxQ element is 64 bytes */ +#define BNA_TXQ_PAGE_INDEX_MAX (PAGE_SIZE >> 6) +#define BNA_TXQ_PAGE_INDEX_MAX_SHIFT (PAGE_SHIFT - 6) + +#define BNA_TXQ_QPGE_PTR_GET(_qe_idx, _qpt_ptr, _qe_ptr, _qe_ptr_range) \ +{ \ + unsigned int page_index; /* index within a page */ \ + void *page_addr; \ + page_index = (_qe_idx) & (BNA_TXQ_PAGE_INDEX_MAX - 1); \ + (_qe_ptr_range) = (BNA_TXQ_PAGE_INDEX_MAX - page_index); \ + page_addr = (_qpt_ptr)[((_qe_idx) >> BNA_TXQ_PAGE_INDEX_MAX_SHIFT)];\ + (_qe_ptr) = &((struct bna_txq_entry *)(page_addr))[page_index]; \ +} + +/* RxQ element is 8 bytes */ +#define BNA_RXQ_PAGE_INDEX_MAX (PAGE_SIZE >> 3) +#define BNA_RXQ_PAGE_INDEX_MAX_SHIFT (PAGE_SHIFT - 3) + +#define BNA_RXQ_QPGE_PTR_GET(_qe_idx, _qpt_ptr, _qe_ptr, _qe_ptr_range) \ +{ \ + unsigned int page_index; /* index within a page */ \ + void *page_addr; \ + page_index = (_qe_idx) & (BNA_RXQ_PAGE_INDEX_MAX - 1); \ + (_qe_ptr_range) = (BNA_RXQ_PAGE_INDEX_MAX - page_index); \ + page_addr = (_qpt_ptr)[((_qe_idx) >> \ + BNA_RXQ_PAGE_INDEX_MAX_SHIFT)]; \ + (_qe_ptr) = &((struct bna_rxq_entry *)(page_addr))[page_index]; \ +} + +/* CQ element is 16 bytes */ +#define BNA_CQ_PAGE_INDEX_MAX (PAGE_SIZE >> 4) +#define BNA_CQ_PAGE_INDEX_MAX_SHIFT (PAGE_SHIFT - 4) + +#define BNA_CQ_QPGE_PTR_GET(_qe_idx, _qpt_ptr, _qe_ptr, _qe_ptr_range) \ +{ \ + unsigned int page_index; /* index within a page */ \ + void *page_addr; \ + \ + page_index = (_qe_idx) & (BNA_CQ_PAGE_INDEX_MAX - 1); \ + (_qe_ptr_range) = (BNA_CQ_PAGE_INDEX_MAX - page_index); \ + page_addr = (_qpt_ptr)[((_qe_idx) >> \ + BNA_CQ_PAGE_INDEX_MAX_SHIFT)]; \ + (_qe_ptr) = &((struct bna_cq_entry *)(page_addr))[page_index];\ +} + +#define BNA_QE_INDX_2_PTR(_cast, _qe_idx, _q_base) \ + (&((_cast *)(_q_base))[(_qe_idx)]) + +#define BNA_QE_INDX_RANGE(_qe_idx, _q_depth) ((_q_depth) - (_qe_idx)) + +#define BNA_QE_INDX_ADD(_qe_idx, _qe_num, _q_depth) \ + ((_qe_idx) = ((_qe_idx) + (_qe_num)) & ((_q_depth) - 1)) + +#define BNA_Q_INDEX_CHANGE(_old_idx, _updated_idx, _q_depth) \ + (((_updated_idx) - (_old_idx)) & ((_q_depth) - 1)) + +#define BNA_QE_FREE_CNT(_q_ptr, _q_depth) \ + (((_q_ptr)->consumer_index - (_q_ptr)->producer_index - 1) & \ + ((_q_depth) - 1)) + +#define BNA_QE_IN_USE_CNT(_q_ptr, _q_depth) \ + ((((_q_ptr)->producer_index - (_q_ptr)->consumer_index)) & \ + (_q_depth - 1)) + +#define BNA_Q_GET_CI(_q_ptr) ((_q_ptr)->q.consumer_index) + +#define BNA_Q_GET_PI(_q_ptr) ((_q_ptr)->q.producer_index) + +#define BNA_Q_PI_ADD(_q_ptr, _num) \ + (_q_ptr)->q.producer_index = \ + (((_q_ptr)->q.producer_index + (_num)) & \ + ((_q_ptr)->q.q_depth - 1)) + +#define BNA_Q_CI_ADD(_q_ptr, _num) \ + (_q_ptr)->q.consumer_index = \ + (((_q_ptr)->q.consumer_index + (_num)) \ + & ((_q_ptr)->q.q_depth - 1)) + +#define BNA_Q_FREE_COUNT(_q_ptr) \ + (BNA_QE_FREE_CNT(&((_q_ptr)->q), (_q_ptr)->q.q_depth)) + +#define BNA_Q_IN_USE_COUNT(_q_ptr) \ + (BNA_QE_IN_USE_CNT(&(_q_ptr)->q, (_q_ptr)->q.q_depth)) + +/* These macros build the data portion of the TxQ/RxQ doorbell */ +#define BNA_DOORBELL_Q_PRD_IDX(_pi) (0x80000000 | (_pi)) +#define BNA_DOORBELL_Q_STOP (0x40000000) + +/* These macros build the data portion of the IB doorbell */ +#define BNA_DOORBELL_IB_INT_ACK(_timeout, _events) \ + (0x80000000 | ((_timeout) << 16) | (_events)) +#define BNA_DOORBELL_IB_INT_DISABLE (0x40000000) + +/* Set the coalescing timer for the given ib */ +#define bna_ib_coalescing_timer_set(_i_dbell, _cls_timer) \ + ((_i_dbell)->doorbell_ack = BNA_DOORBELL_IB_INT_ACK((_cls_timer), 0)); + +/* Acks 'events' # of events for a given ib */ +#define bna_ib_ack(_i_dbell, _events) \ + (writel(((_i_dbell)->doorbell_ack | (_events)), \ + (_i_dbell)->doorbell_addr)); + +#define bna_txq_prod_indx_doorbell(_tcb) \ + (writel(BNA_DOORBELL_Q_PRD_IDX((_tcb)->producer_index), \ + (_tcb)->q_dbell)); + +#define bna_rxq_prod_indx_doorbell(_rcb) \ + (writel(BNA_DOORBELL_Q_PRD_IDX((_rcb)->producer_index), \ + (_rcb)->q_dbell)); + +#define BNA_LARGE_PKT_SIZE 1000 + +#define BNA_UPDATE_PKT_CNT(_pkt, _len) \ +do { \ + if ((_len) > BNA_LARGE_PKT_SIZE) { \ + (_pkt)->large_pkt_cnt++; \ + } else { \ + (_pkt)->small_pkt_cnt++; \ + } \ +} while (0) + +#define call_rxf_stop_cbfn(rxf, status) \ + if ((rxf)->stop_cbfn) { \ + (*(rxf)->stop_cbfn)((rxf)->stop_cbarg, (status)); \ + (rxf)->stop_cbfn = NULL; \ + (rxf)->stop_cbarg = NULL; \ + } + +#define call_rxf_start_cbfn(rxf, status) \ + if ((rxf)->start_cbfn) { \ + (*(rxf)->start_cbfn)((rxf)->start_cbarg, (status)); \ + (rxf)->start_cbfn = NULL; \ + (rxf)->start_cbarg = NULL; \ + } + +#define call_rxf_cam_fltr_cbfn(rxf, status) \ + if ((rxf)->cam_fltr_cbfn) { \ + (*(rxf)->cam_fltr_cbfn)((rxf)->cam_fltr_cbarg, rxf->rx, \ + (status)); \ + (rxf)->cam_fltr_cbfn = NULL; \ + (rxf)->cam_fltr_cbarg = NULL; \ + } + +#define call_rxf_pause_cbfn(rxf, status) \ + if ((rxf)->oper_state_cbfn) { \ + (*(rxf)->oper_state_cbfn)((rxf)->oper_state_cbarg, rxf->rx,\ + (status)); \ + (rxf)->rxf_flags &= ~BNA_RXF_FL_OPERSTATE_CHANGED; \ + (rxf)->oper_state_cbfn = NULL; \ + (rxf)->oper_state_cbarg = NULL; \ + } + +#define call_rxf_resume_cbfn(rxf, status) call_rxf_pause_cbfn(rxf, status) + +#define is_xxx_enable(mode, bitmask, xxx) ((bitmask & xxx) && (mode & xxx)) + +#define is_xxx_disable(mode, bitmask, xxx) ((bitmask & xxx) && !(mode & xxx)) + +#define xxx_enable(mode, bitmask, xxx) \ +do { \ + bitmask |= xxx; \ + mode |= xxx; \ +} while (0) + +#define xxx_disable(mode, bitmask, xxx) \ +do { \ + bitmask |= xxx; \ + mode &= ~xxx; \ +} while (0) + +#define xxx_inactive(mode, bitmask, xxx) \ +do { \ + bitmask &= ~xxx; \ + mode &= ~xxx; \ +} while (0) + +#define is_promisc_enable(mode, bitmask) \ + is_xxx_enable(mode, bitmask, BNA_RXMODE_PROMISC) + +#define is_promisc_disable(mode, bitmask) \ + is_xxx_disable(mode, bitmask, BNA_RXMODE_PROMISC) + +#define promisc_enable(mode, bitmask) \ + xxx_enable(mode, bitmask, BNA_RXMODE_PROMISC) + +#define promisc_disable(mode, bitmask) \ + xxx_disable(mode, bitmask, BNA_RXMODE_PROMISC) + +#define promisc_inactive(mode, bitmask) \ + xxx_inactive(mode, bitmask, BNA_RXMODE_PROMISC) + +#define is_default_enable(mode, bitmask) \ + is_xxx_enable(mode, bitmask, BNA_RXMODE_DEFAULT) + +#define is_default_disable(mode, bitmask) \ + is_xxx_disable(mode, bitmask, BNA_RXMODE_DEFAULT) + +#define default_enable(mode, bitmask) \ + xxx_enable(mode, bitmask, BNA_RXMODE_DEFAULT) + +#define default_disable(mode, bitmask) \ + xxx_disable(mode, bitmask, BNA_RXMODE_DEFAULT) + +#define default_inactive(mode, bitmask) \ + xxx_inactive(mode, bitmask, BNA_RXMODE_DEFAULT) + +#define is_allmulti_enable(mode, bitmask) \ + is_xxx_enable(mode, bitmask, BNA_RXMODE_ALLMULTI) + +#define is_allmulti_disable(mode, bitmask) \ + is_xxx_disable(mode, bitmask, BNA_RXMODE_ALLMULTI) + +#define allmulti_enable(mode, bitmask) \ + xxx_enable(mode, bitmask, BNA_RXMODE_ALLMULTI) + +#define allmulti_disable(mode, bitmask) \ + xxx_disable(mode, bitmask, BNA_RXMODE_ALLMULTI) + +#define allmulti_inactive(mode, bitmask) \ + xxx_inactive(mode, bitmask, BNA_RXMODE_ALLMULTI) + +#define GET_RXQS(rxp, q0, q1) do { \ + switch ((rxp)->type) { \ + case BNA_RXP_SINGLE: \ + (q0) = rxp->rxq.single.only; \ + (q1) = NULL; \ + break; \ + case BNA_RXP_SLR: \ + (q0) = rxp->rxq.slr.large; \ + (q1) = rxp->rxq.slr.small; \ + break; \ + case BNA_RXP_HDS: \ + (q0) = rxp->rxq.hds.data; \ + (q1) = rxp->rxq.hds.hdr; \ + break; \ + } \ +} while (0) + +/** + * + * Function prototypes + * + */ + +/** + * BNA + */ + +/* Internal APIs */ +void bna_adv_res_req(struct bna_res_info *res_info); + +/* APIs for BNAD */ +void bna_res_req(struct bna_res_info *res_info); +void bna_init(struct bna *bna, struct bnad *bnad, + struct bfa_pcidev *pcidev, + struct bna_res_info *res_info); +void bna_uninit(struct bna *bna); +void bna_stats_get(struct bna *bna); +void bna_stats_clr(struct bna *bna); +void bna_get_perm_mac(struct bna *bna, u8 *mac); + +/* APIs for Rx */ +int bna_rit_mod_can_satisfy(struct bna_rit_mod *rit_mod, int seg_size); + +/* APIs for RxF */ +struct bna_mac *bna_ucam_mod_mac_get(struct bna_ucam_mod *ucam_mod); +void bna_ucam_mod_mac_put(struct bna_ucam_mod *ucam_mod, + struct bna_mac *mac); +struct bna_mac *bna_mcam_mod_mac_get(struct bna_mcam_mod *mcam_mod); +void bna_mcam_mod_mac_put(struct bna_mcam_mod *mcam_mod, + struct bna_mac *mac); +struct bna_rit_segment * +bna_rit_mod_seg_get(struct bna_rit_mod *rit_mod, int seg_size); +void bna_rit_mod_seg_put(struct bna_rit_mod *rit_mod, + struct bna_rit_segment *seg); + +/** + * DEVICE + */ + +/* Interanl APIs */ +void bna_adv_device_init(struct bna_device *device, struct bna *bna, + struct bna_res_info *res_info); + +/* APIs for BNA */ +void bna_device_init(struct bna_device *device, struct bna *bna, + struct bna_res_info *res_info); +void bna_device_uninit(struct bna_device *device); +void bna_device_cb_port_stopped(void *arg, enum bna_cb_status status); +int bna_device_status_get(struct bna_device *device); +int bna_device_state_get(struct bna_device *device); + +/* APIs for BNAD */ +void bna_device_enable(struct bna_device *device); +void bna_device_disable(struct bna_device *device, + enum bna_cleanup_type type); + +/** + * MBOX + */ + +/* APIs for DEVICE */ +void bna_mbox_mod_init(struct bna_mbox_mod *mbox_mod, struct bna *bna); +void bna_mbox_mod_uninit(struct bna_mbox_mod *mbox_mod); +void bna_mbox_mod_start(struct bna_mbox_mod *mbox_mod); +void bna_mbox_mod_stop(struct bna_mbox_mod *mbox_mod); + +/* APIs for PORT, TX, RX */ +void bna_mbox_handler(struct bna *bna, u32 intr_status); +void bna_mbox_send(struct bna *bna, struct bna_mbox_qe *mbox_qe); + +/** + * PORT + */ + +/* APIs for BNA */ +void bna_port_init(struct bna_port *port, struct bna *bna); +void bna_port_uninit(struct bna_port *port); +int bna_port_state_get(struct bna_port *port); +int bna_llport_state_get(struct bna_llport *llport); + +/* APIs for DEVICE */ +void bna_port_start(struct bna_port *port); +void bna_port_stop(struct bna_port *port); +void bna_port_fail(struct bna_port *port); + +/* API for RX */ +int bna_port_mtu_get(struct bna_port *port); +void bna_llport_admin_up(struct bna_llport *llport); +void bna_llport_admin_down(struct bna_llport *llport); + +/* API for BNAD */ +void bna_port_enable(struct bna_port *port); +void bna_port_disable(struct bna_port *port, enum bna_cleanup_type type, + void (*cbfn)(void *, enum bna_cb_status)); +void bna_port_pause_config(struct bna_port *port, + struct bna_pause_config *pause_config, + void (*cbfn)(struct bnad *, enum bna_cb_status)); +void bna_port_mtu_set(struct bna_port *port, int mtu, + void (*cbfn)(struct bnad *, enum bna_cb_status)); +void bna_port_mac_get(struct bna_port *port, mac_t *mac); +void bna_port_type_set(struct bna_port *port, enum bna_port_type type); +void bna_port_linkcbfn_set(struct bna_port *port, + void (*linkcbfn)(struct bnad *, + enum bna_link_status)); +void bna_port_admin_up(struct bna_port *port); +void bna_port_admin_down(struct bna_port *port); + +/* Callbacks for TX, RX */ +void bna_port_cb_tx_stopped(struct bna_port *port, + enum bna_cb_status status); +void bna_port_cb_rx_stopped(struct bna_port *port, + enum bna_cb_status status); + +/* Callbacks for MBOX */ +void bna_port_cb_link_up(struct bna_port *port, struct bfi_ll_aen *aen, + int status); +void bna_port_cb_link_down(struct bna_port *port, int status); + +/** + * IB + */ + +/* APIs for BNA */ +void bna_ib_mod_init(struct bna_ib_mod *ib_mod, struct bna *bna, + struct bna_res_info *res_info); +void bna_ib_mod_uninit(struct bna_ib_mod *ib_mod); + +/* APIs for TX, RX */ +struct bna_ib *bna_ib_get(struct bna_ib_mod *ib_mod, + enum bna_intr_type intr_type, int vector); +void bna_ib_put(struct bna_ib_mod *ib_mod, struct bna_ib *ib); +int bna_ib_reserve_idx(struct bna_ib *ib); +void bna_ib_release_idx(struct bna_ib *ib, int idx); +int bna_ib_config(struct bna_ib *ib, struct bna_ib_config *ib_config); +void bna_ib_start(struct bna_ib *ib); +void bna_ib_stop(struct bna_ib *ib); +void bna_ib_fail(struct bna_ib *ib); +void bna_ib_coalescing_timeo_set(struct bna_ib *ib, u8 coalescing_timeo); + +/** + * TX MODULE AND TX + */ + +/* Internal APIs */ +void bna_tx_prio_changed(struct bna_tx *tx, int prio); + +/* APIs for BNA */ +void bna_tx_mod_init(struct bna_tx_mod *tx_mod, struct bna *bna, + struct bna_res_info *res_info); +void bna_tx_mod_uninit(struct bna_tx_mod *tx_mod); +int bna_tx_state_get(struct bna_tx *tx); + +/* APIs for PORT */ +void bna_tx_mod_start(struct bna_tx_mod *tx_mod, enum bna_tx_type type); +void bna_tx_mod_stop(struct bna_tx_mod *tx_mod, enum bna_tx_type type); +void bna_tx_mod_fail(struct bna_tx_mod *tx_mod); +void bna_tx_mod_prio_changed(struct bna_tx_mod *tx_mod, int prio); +void bna_tx_mod_cee_link_status(struct bna_tx_mod *tx_mod, int cee_link); + +/* APIs for BNAD */ +void bna_tx_res_req(int num_txq, int txq_depth, + struct bna_res_info *res_info); +struct bna_tx *bna_tx_create(struct bna *bna, struct bnad *bnad, + struct bna_tx_config *tx_cfg, + struct bna_tx_event_cbfn *tx_cbfn, + struct bna_res_info *res_info, void *priv); +void bna_tx_destroy(struct bna_tx *tx); +void bna_tx_enable(struct bna_tx *tx); +void bna_tx_disable(struct bna_tx *tx, enum bna_cleanup_type type, + void (*cbfn)(void *, struct bna_tx *, + enum bna_cb_status)); +enum bna_cb_status +bna_tx_prio_set(struct bna_tx *tx, int prio, + void (*cbfn)(struct bnad *, struct bna_tx *, + enum bna_cb_status)); +void bna_tx_coalescing_timeo_set(struct bna_tx *tx, int coalescing_timeo); + +/** + * RX MODULE, RX, RXF + */ + +/* Internal APIs */ +void rxf_cb_cam_fltr_mbox_cmd(void *arg, int status); +void rxf_cam_mbox_cmd(struct bna_rxf *rxf, u8 cmd, + const struct bna_mac *mac_addr); +void __rxf_vlan_filter_set(struct bna_rxf *rxf, enum bna_status status); +void bna_rxf_adv_init(struct bna_rxf *rxf, + struct bna_rx *rx, + struct bna_rx_config *q_config); +int rxf_process_packet_filter_ucast(struct bna_rxf *rxf); +int rxf_process_packet_filter_promisc(struct bna_rxf *rxf); +int rxf_process_packet_filter_default(struct bna_rxf *rxf); +int rxf_process_packet_filter_allmulti(struct bna_rxf *rxf); +int rxf_clear_packet_filter_ucast(struct bna_rxf *rxf); +int rxf_clear_packet_filter_promisc(struct bna_rxf *rxf); +int rxf_clear_packet_filter_default(struct bna_rxf *rxf); +int rxf_clear_packet_filter_allmulti(struct bna_rxf *rxf); +void rxf_reset_packet_filter_ucast(struct bna_rxf *rxf); +void rxf_reset_packet_filter_promisc(struct bna_rxf *rxf); +void rxf_reset_packet_filter_default(struct bna_rxf *rxf); +void rxf_reset_packet_filter_allmulti(struct bna_rxf *rxf); + +/* APIs for BNA */ +void bna_rx_mod_init(struct bna_rx_mod *rx_mod, struct bna *bna, + struct bna_res_info *res_info); +void bna_rx_mod_uninit(struct bna_rx_mod *rx_mod); +int bna_rx_state_get(struct bna_rx *rx); +int bna_rxf_state_get(struct bna_rxf *rxf); + +/* APIs for PORT */ +void bna_rx_mod_start(struct bna_rx_mod *rx_mod, enum bna_rx_type type); +void bna_rx_mod_stop(struct bna_rx_mod *rx_mod, enum bna_rx_type type); +void bna_rx_mod_fail(struct bna_rx_mod *rx_mod); + +/* APIs for BNAD */ +void bna_rx_res_req(struct bna_rx_config *rx_config, + struct bna_res_info *res_info); +struct bna_rx *bna_rx_create(struct bna *bna, struct bnad *bnad, + struct bna_rx_config *rx_cfg, + struct bna_rx_event_cbfn *rx_cbfn, + struct bna_res_info *res_info, void *priv); +void bna_rx_destroy(struct bna_rx *rx); +void bna_rx_enable(struct bna_rx *rx); +void bna_rx_disable(struct bna_rx *rx, enum bna_cleanup_type type, + void (*cbfn)(void *, struct bna_rx *, + enum bna_cb_status)); +void bna_rx_coalescing_timeo_set(struct bna_rx *rx, int coalescing_timeo); +void bna_rx_dim_reconfig(struct bna *bna, u32 vector[][BNA_BIAS_T_MAX]); +void bna_rx_dim_update(struct bna_ccb *ccb); +enum bna_cb_status +bna_rx_ucast_set(struct bna_rx *rx, u8 *ucmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +enum bna_cb_status +bna_rx_ucast_add(struct bna_rx *rx, u8* ucmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +enum bna_cb_status +bna_rx_ucast_del(struct bna_rx *rx, u8 *ucmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +enum bna_cb_status +bna_rx_mcast_add(struct bna_rx *rx, u8 *mcmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +enum bna_cb_status +bna_rx_mcast_del(struct bna_rx *rx, u8 *mcmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +enum bna_cb_status +bna_rx_mcast_listset(struct bna_rx *rx, int count, u8 *mcmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +void bna_rx_mcast_delall(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +enum bna_cb_status +bna_rx_mode_set(struct bna_rx *rx, enum bna_rxmode rxmode, + enum bna_rxmode bitmask, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +void bna_rx_vlan_add(struct bna_rx *rx, int vlan_id); +void bna_rx_vlan_del(struct bna_rx *rx, int vlan_id); +void bna_rx_vlanfilter_enable(struct bna_rx *rx); +void bna_rx_vlanfilter_disable(struct bna_rx *rx); +void bna_rx_rss_enable(struct bna_rx *rx); +void bna_rx_rss_disable(struct bna_rx *rx); +void bna_rx_rss_reconfig(struct bna_rx *rx, struct bna_rxf_rss *rss_config); +void bna_rx_rss_rit_set(struct bna_rx *rx, unsigned int *vectors, + int nvectors); +void bna_rx_hds_enable(struct bna_rx *rx, struct bna_rxf_hds *hds_config, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +void bna_rx_hds_disable(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +void bna_rx_receive_pause(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); +void bna_rx_receive_resume(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)); + +/* RxF APIs for RX */ +void bna_rxf_start(struct bna_rxf *rxf); +void bna_rxf_stop(struct bna_rxf *rxf); +void bna_rxf_fail(struct bna_rxf *rxf); +void bna_rxf_init(struct bna_rxf *rxf, struct bna_rx *rx, + struct bna_rx_config *q_config); +void bna_rxf_uninit(struct bna_rxf *rxf); + +/* Callback from RXF to RX */ +void bna_rx_cb_rxf_stopped(struct bna_rx *rx, enum bna_cb_status); +void bna_rx_cb_rxf_started(struct bna_rx *rx, enum bna_cb_status); + +/** + * BNAD + */ + +/* Callbacks for BNA */ +void bnad_cb_stats_get(struct bnad *bnad, enum bna_cb_status status, + struct bna_stats *stats); +void bnad_cb_stats_clr(struct bnad *bnad); + +/* Callbacks for DEVICE */ +void bnad_cb_device_enabled(struct bnad *bnad, enum bna_cb_status status); +void bnad_cb_device_disabled(struct bnad *bnad, enum bna_cb_status status); +void bnad_cb_device_enable_mbox_intr(struct bnad *bnad); +void bnad_cb_device_disable_mbox_intr(struct bnad *bnad); + +/* Callbacks for port */ +void bnad_cb_port_link_status(struct bnad *bnad, + enum bna_link_status status); + +#endif /* __BNA_H__ */ diff --git a/drivers/net/bna/bna_ctrl.c b/drivers/net/bna/bna_ctrl.c new file mode 100644 index 000000000000..9d41ebf41cf4 --- /dev/null +++ b/drivers/net/bna/bna_ctrl.c @@ -0,0 +1,3626 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#include "bna.h" +#include "bfa_sm.h" +#include "bfa_wc.h" + +/** + * MBOX + */ +static int +bna_is_aen(u8 msg_id) +{ + return (msg_id == BFI_LL_I2H_LINK_DOWN_AEN || + msg_id == BFI_LL_I2H_LINK_UP_AEN); +} + +static void +bna_mbox_aen_callback(struct bna *bna, struct bfi_mbmsg *msg) +{ + struct bfi_ll_aen *aen = (struct bfi_ll_aen *)(msg); + + switch (aen->mh.msg_id) { + case BFI_LL_I2H_LINK_UP_AEN: + bna_port_cb_link_up(&bna->port, aen, aen->reason); + break; + case BFI_LL_I2H_LINK_DOWN_AEN: + bna_port_cb_link_down(&bna->port, aen->reason); + break; + default: + break; + } +} + +static void +bna_ll_isr(void *llarg, struct bfi_mbmsg *msg) +{ + struct bna *bna = (struct bna *)(llarg); + struct bfi_ll_rsp *mb_rsp = (struct bfi_ll_rsp *)(msg); + struct bfi_mhdr *cmd_h, *rsp_h; + struct bna_mbox_qe *mb_qe = NULL; + int to_post = 0; + u8 aen = 0; + char message[BNA_MESSAGE_SIZE]; + + aen = bna_is_aen(mb_rsp->mh.msg_id); + + if (!aen) { + mb_qe = bfa_q_first(&bna->mbox_mod.posted_q); + cmd_h = (struct bfi_mhdr *)(&mb_qe->cmd.msg[0]); + rsp_h = (struct bfi_mhdr *)(&mb_rsp->mh); + + if ((BFA_I2HM(cmd_h->msg_id) == rsp_h->msg_id) && + (cmd_h->mtag.i2htok == rsp_h->mtag.i2htok)) { + /* Remove the request from posted_q, update state */ + list_del(&mb_qe->qe); + bna->mbox_mod.msg_pending--; + if (list_empty(&bna->mbox_mod.posted_q)) + bna->mbox_mod.state = BNA_MBOX_FREE; + else + to_post = 1; + + /* Dispatch the cbfn */ + if (mb_qe->cbfn) + mb_qe->cbfn(mb_qe->cbarg, mb_rsp->error); + + /* Post the next entry, if needed */ + if (to_post) { + mb_qe = bfa_q_first(&bna->mbox_mod.posted_q); + bfa_ioc_mbox_queue(&bna->device.ioc, + &mb_qe->cmd); + } + } else { + snprintf(message, BNA_MESSAGE_SIZE, + "No matching rsp for [%d:%d:%d]\n", + mb_rsp->mh.msg_class, mb_rsp->mh.msg_id, + mb_rsp->mh.mtag.i2htok); + pr_info("%s", message); + } + + } else + bna_mbox_aen_callback(bna, msg); +} + +void +bna_err_handler(struct bna *bna, u32 intr_status) +{ + u32 init_halt; + + if (intr_status & __HALT_STATUS_BITS) { + init_halt = readl(bna->device.ioc.ioc_regs.ll_halt); + init_halt &= ~__FW_INIT_HALT_P; + writel(init_halt, bna->device.ioc.ioc_regs.ll_halt); + } + + bfa_ioc_error_isr(&bna->device.ioc); +} + +void +bna_mbox_handler(struct bna *bna, u32 intr_status) +{ + if (BNA_IS_ERR_INTR(intr_status)) { + bna_err_handler(bna, intr_status); + return; + } + if (BNA_IS_MBOX_INTR(intr_status)) + bfa_ioc_mbox_isr(&bna->device.ioc); +} + +void +bna_mbox_send(struct bna *bna, struct bna_mbox_qe *mbox_qe) +{ + struct bfi_mhdr *mh; + + mh = (struct bfi_mhdr *)(&mbox_qe->cmd.msg[0]); + + mh->mtag.i2htok = htons(bna->mbox_mod.msg_ctr); + bna->mbox_mod.msg_ctr++; + bna->mbox_mod.msg_pending++; + if (bna->mbox_mod.state == BNA_MBOX_FREE) { + list_add_tail(&mbox_qe->qe, &bna->mbox_mod.posted_q); + bfa_ioc_mbox_queue(&bna->device.ioc, &mbox_qe->cmd); + bna->mbox_mod.state = BNA_MBOX_POSTED; + } else { + list_add_tail(&mbox_qe->qe, &bna->mbox_mod.posted_q); + } +} + +void +bna_mbox_flush_q(struct bna *bna, struct list_head *q) +{ + struct bna_mbox_qe *mb_qe = NULL; + struct bfi_mhdr *cmd_h; + struct list_head *mb_q; + void (*cbfn)(void *arg, int status); + void *cbarg; + + mb_q = &bna->mbox_mod.posted_q; + + while (!list_empty(mb_q)) { + bfa_q_deq(mb_q, &mb_qe); + cbfn = mb_qe->cbfn; + cbarg = mb_qe->cbarg; + bfa_q_qe_init(mb_qe); + bna->mbox_mod.msg_pending--; + + cmd_h = (struct bfi_mhdr *)(&mb_qe->cmd.msg[0]); + if (cbfn) + cbfn(cbarg, BNA_CB_NOT_EXEC); + } + + bna->mbox_mod.state = BNA_MBOX_FREE; +} + +void +bna_mbox_mod_start(struct bna_mbox_mod *mbox_mod) +{ +} + +void +bna_mbox_mod_stop(struct bna_mbox_mod *mbox_mod) +{ + bna_mbox_flush_q(mbox_mod->bna, &mbox_mod->posted_q); +} + +void +bna_mbox_mod_init(struct bna_mbox_mod *mbox_mod, struct bna *bna) +{ + bfa_ioc_mbox_regisr(&bna->device.ioc, BFI_MC_LL, bna_ll_isr, bna); + mbox_mod->state = BNA_MBOX_FREE; + mbox_mod->msg_ctr = mbox_mod->msg_pending = 0; + INIT_LIST_HEAD(&mbox_mod->posted_q); + mbox_mod->bna = bna; +} + +void +bna_mbox_mod_uninit(struct bna_mbox_mod *mbox_mod) +{ + mbox_mod->bna = NULL; +} + +/** + * LLPORT + */ +#define call_llport_stop_cbfn(llport, status)\ +do {\ + if ((llport)->stop_cbfn)\ + (llport)->stop_cbfn(&(llport)->bna->port, status);\ + (llport)->stop_cbfn = NULL;\ +} while (0) + +static void bna_fw_llport_up(struct bna_llport *llport); +static void bna_fw_cb_llport_up(void *arg, int status); +static void bna_fw_llport_down(struct bna_llport *llport); +static void bna_fw_cb_llport_down(void *arg, int status); +static void bna_llport_start(struct bna_llport *llport); +static void bna_llport_stop(struct bna_llport *llport); +static void bna_llport_fail(struct bna_llport *llport); + +enum bna_llport_event { + LLPORT_E_START = 1, + LLPORT_E_STOP = 2, + LLPORT_E_FAIL = 3, + LLPORT_E_UP = 4, + LLPORT_E_DOWN = 5, + LLPORT_E_FWRESP_UP = 6, + LLPORT_E_FWRESP_DOWN = 7 +}; + +enum bna_llport_state { + BNA_LLPORT_STOPPED = 1, + BNA_LLPORT_DOWN = 2, + BNA_LLPORT_UP_RESP_WAIT = 3, + BNA_LLPORT_DOWN_RESP_WAIT = 4, + BNA_LLPORT_UP = 5, + BNA_LLPORT_LAST_RESP_WAIT = 6 +}; + +bfa_fsm_state_decl(bna_llport, stopped, struct bna_llport, + enum bna_llport_event); +bfa_fsm_state_decl(bna_llport, down, struct bna_llport, + enum bna_llport_event); +bfa_fsm_state_decl(bna_llport, up_resp_wait, struct bna_llport, + enum bna_llport_event); +bfa_fsm_state_decl(bna_llport, down_resp_wait, struct bna_llport, + enum bna_llport_event); +bfa_fsm_state_decl(bna_llport, up, struct bna_llport, + enum bna_llport_event); +bfa_fsm_state_decl(bna_llport, last_resp_wait, struct bna_llport, + enum bna_llport_event); + +static struct bfa_sm_table llport_sm_table[] = { + {BFA_SM(bna_llport_sm_stopped), BNA_LLPORT_STOPPED}, + {BFA_SM(bna_llport_sm_down), BNA_LLPORT_DOWN}, + {BFA_SM(bna_llport_sm_up_resp_wait), BNA_LLPORT_UP_RESP_WAIT}, + {BFA_SM(bna_llport_sm_down_resp_wait), BNA_LLPORT_DOWN_RESP_WAIT}, + {BFA_SM(bna_llport_sm_up), BNA_LLPORT_UP}, + {BFA_SM(bna_llport_sm_last_resp_wait), BNA_LLPORT_LAST_RESP_WAIT} +}; + +static void +bna_llport_sm_stopped_entry(struct bna_llport *llport) +{ + llport->bna->port.link_cbfn((llport)->bna->bnad, BNA_LINK_DOWN); + call_llport_stop_cbfn(llport, BNA_CB_SUCCESS); +} + +static void +bna_llport_sm_stopped(struct bna_llport *llport, + enum bna_llport_event event) +{ + switch (event) { + case LLPORT_E_START: + bfa_fsm_set_state(llport, bna_llport_sm_down); + break; + + case LLPORT_E_STOP: + call_llport_stop_cbfn(llport, BNA_CB_SUCCESS); + break; + + case LLPORT_E_FAIL: + break; + + case LLPORT_E_DOWN: + /* This event is received due to Rx objects failing */ + /* No-op */ + break; + + case LLPORT_E_FWRESP_UP: + case LLPORT_E_FWRESP_DOWN: + /** + * These events are received due to flushing of mbox when + * device fails + */ + /* No-op */ + break; + + default: + bfa_sm_fault(llport->bna, event); + } +} + +static void +bna_llport_sm_down_entry(struct bna_llport *llport) +{ + bnad_cb_port_link_status((llport)->bna->bnad, BNA_LINK_DOWN); +} + +static void +bna_llport_sm_down(struct bna_llport *llport, + enum bna_llport_event event) +{ + switch (event) { + case LLPORT_E_STOP: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + case LLPORT_E_FAIL: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + case LLPORT_E_UP: + bfa_fsm_set_state(llport, bna_llport_sm_up_resp_wait); + bna_fw_llport_up(llport); + break; + + default: + bfa_sm_fault(llport->bna, event); + } +} + +static void +bna_llport_sm_up_resp_wait_entry(struct bna_llport *llport) +{ + /** + * NOTE: Do not call bna_fw_llport_up() here. That will over step + * mbox due to down_resp_wait -> up_resp_wait transition on event + * LLPORT_E_UP + */ +} + +static void +bna_llport_sm_up_resp_wait(struct bna_llport *llport, + enum bna_llport_event event) +{ + switch (event) { + case LLPORT_E_STOP: + bfa_fsm_set_state(llport, bna_llport_sm_last_resp_wait); + break; + + case LLPORT_E_FAIL: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + case LLPORT_E_DOWN: + bfa_fsm_set_state(llport, bna_llport_sm_down_resp_wait); + break; + + case LLPORT_E_FWRESP_UP: + bfa_fsm_set_state(llport, bna_llport_sm_up); + break; + + case LLPORT_E_FWRESP_DOWN: + /* down_resp_wait -> up_resp_wait transition on LLPORT_E_UP */ + bna_fw_llport_up(llport); + break; + + default: + bfa_sm_fault(llport->bna, event); + } +} + +static void +bna_llport_sm_down_resp_wait_entry(struct bna_llport *llport) +{ + /** + * NOTE: Do not call bna_fw_llport_down() here. That will over step + * mbox due to up_resp_wait -> down_resp_wait transition on event + * LLPORT_E_DOWN + */ +} + +static void +bna_llport_sm_down_resp_wait(struct bna_llport *llport, + enum bna_llport_event event) +{ + switch (event) { + case LLPORT_E_STOP: + bfa_fsm_set_state(llport, bna_llport_sm_last_resp_wait); + break; + + case LLPORT_E_FAIL: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + case LLPORT_E_UP: + bfa_fsm_set_state(llport, bna_llport_sm_up_resp_wait); + break; + + case LLPORT_E_FWRESP_UP: + /* up_resp_wait->down_resp_wait transition on LLPORT_E_DOWN */ + bna_fw_llport_down(llport); + break; + + case LLPORT_E_FWRESP_DOWN: + bfa_fsm_set_state(llport, bna_llport_sm_down); + break; + + default: + bfa_sm_fault(llport->bna, event); + } +} + +static void +bna_llport_sm_up_entry(struct bna_llport *llport) +{ +} + +static void +bna_llport_sm_up(struct bna_llport *llport, + enum bna_llport_event event) +{ + switch (event) { + case LLPORT_E_STOP: + bfa_fsm_set_state(llport, bna_llport_sm_last_resp_wait); + bna_fw_llport_down(llport); + break; + + case LLPORT_E_FAIL: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + case LLPORT_E_DOWN: + bfa_fsm_set_state(llport, bna_llport_sm_down_resp_wait); + bna_fw_llport_down(llport); + break; + + default: + bfa_sm_fault(llport->bna, event); + } +} + +static void +bna_llport_sm_last_resp_wait_entry(struct bna_llport *llport) +{ +} + +static void +bna_llport_sm_last_resp_wait(struct bna_llport *llport, + enum bna_llport_event event) +{ + switch (event) { + case LLPORT_E_FAIL: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + case LLPORT_E_DOWN: + /** + * This event is received due to Rx objects stopping in + * parallel to llport + */ + /* No-op */ + break; + + case LLPORT_E_FWRESP_UP: + /* up_resp_wait->last_resp_wait transition on LLPORT_T_STOP */ + bna_fw_llport_down(llport); + break; + + case LLPORT_E_FWRESP_DOWN: + bfa_fsm_set_state(llport, bna_llport_sm_stopped); + break; + + default: + bfa_sm_fault(llport->bna, event); + } +} + +static void +bna_fw_llport_admin_up(struct bna_llport *llport) +{ + struct bfi_ll_port_admin_req ll_req; + + memset(&ll_req, 0, sizeof(ll_req)); + ll_req.mh.msg_class = BFI_MC_LL; + ll_req.mh.msg_id = BFI_LL_H2I_PORT_ADMIN_REQ; + ll_req.mh.mtag.h2i.lpu_id = 0; + + ll_req.up = BNA_STATUS_T_ENABLED; + + bna_mbox_qe_fill(&llport->mbox_qe, &ll_req, sizeof(ll_req), + bna_fw_cb_llport_up, llport); + + bna_mbox_send(llport->bna, &llport->mbox_qe); +} + +static void +bna_fw_llport_up(struct bna_llport *llport) +{ + if (llport->type == BNA_PORT_T_REGULAR) + bna_fw_llport_admin_up(llport); +} + +static void +bna_fw_cb_llport_up(void *arg, int status) +{ + struct bna_llport *llport = (struct bna_llport *)arg; + + bfa_q_qe_init(&llport->mbox_qe.qe); + bfa_fsm_send_event(llport, LLPORT_E_FWRESP_UP); +} + +static void +bna_fw_llport_admin_down(struct bna_llport *llport) +{ + struct bfi_ll_port_admin_req ll_req; + + memset(&ll_req, 0, sizeof(ll_req)); + ll_req.mh.msg_class = BFI_MC_LL; + ll_req.mh.msg_id = BFI_LL_H2I_PORT_ADMIN_REQ; + ll_req.mh.mtag.h2i.lpu_id = 0; + + ll_req.up = BNA_STATUS_T_DISABLED; + + bna_mbox_qe_fill(&llport->mbox_qe, &ll_req, sizeof(ll_req), + bna_fw_cb_llport_down, llport); + + bna_mbox_send(llport->bna, &llport->mbox_qe); +} + +static void +bna_fw_llport_down(struct bna_llport *llport) +{ + if (llport->type == BNA_PORT_T_REGULAR) + bna_fw_llport_admin_down(llport); +} + +static void +bna_fw_cb_llport_down(void *arg, int status) +{ + struct bna_llport *llport = (struct bna_llport *)arg; + + bfa_q_qe_init(&llport->mbox_qe.qe); + bfa_fsm_send_event(llport, LLPORT_E_FWRESP_DOWN); +} + +void +bna_port_cb_llport_stopped(struct bna_port *port, + enum bna_cb_status status) +{ + bfa_wc_down(&port->chld_stop_wc); +} + +static void +bna_llport_init(struct bna_llport *llport, struct bna *bna) +{ + llport->flags |= BNA_LLPORT_F_ENABLED; + llport->type = BNA_PORT_T_REGULAR; + llport->bna = bna; + + llport->link_status = BNA_LINK_DOWN; + + llport->admin_up_count = 0; + + llport->stop_cbfn = NULL; + + bfa_q_qe_init(&llport->mbox_qe.qe); + + bfa_fsm_set_state(llport, bna_llport_sm_stopped); +} + +static void +bna_llport_uninit(struct bna_llport *llport) +{ + llport->flags &= ~BNA_LLPORT_F_ENABLED; + + llport->bna = NULL; +} + +static void +bna_llport_start(struct bna_llport *llport) +{ + bfa_fsm_send_event(llport, LLPORT_E_START); +} + +static void +bna_llport_stop(struct bna_llport *llport) +{ + llport->stop_cbfn = bna_port_cb_llport_stopped; + + bfa_fsm_send_event(llport, LLPORT_E_STOP); +} + +static void +bna_llport_fail(struct bna_llport *llport) +{ + bfa_fsm_send_event(llport, LLPORT_E_FAIL); +} + +int +bna_llport_state_get(struct bna_llport *llport) +{ + return bfa_sm_to_state(llport_sm_table, llport->fsm); +} + +void +bna_llport_admin_up(struct bna_llport *llport) +{ + llport->admin_up_count++; + + if (llport->admin_up_count == 1) { + llport->flags |= BNA_LLPORT_F_RX_ENABLED; + if (llport->flags & BNA_LLPORT_F_ENABLED) + bfa_fsm_send_event(llport, LLPORT_E_UP); + } +} + +void +bna_llport_admin_down(struct bna_llport *llport) +{ + llport->admin_up_count--; + + if (llport->admin_up_count == 0) { + llport->flags &= ~BNA_LLPORT_F_RX_ENABLED; + if (llport->flags & BNA_LLPORT_F_ENABLED) + bfa_fsm_send_event(llport, LLPORT_E_DOWN); + } +} + +/** + * PORT + */ +#define bna_port_chld_start(port)\ +do {\ + enum bna_tx_type tx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\ + BNA_TX_T_REGULAR : BNA_TX_T_LOOPBACK;\ + enum bna_rx_type rx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\ + BNA_RX_T_REGULAR : BNA_RX_T_LOOPBACK;\ + bna_llport_start(&(port)->llport);\ + bna_tx_mod_start(&(port)->bna->tx_mod, tx_type);\ + bna_rx_mod_start(&(port)->bna->rx_mod, rx_type);\ +} while (0) + +#define bna_port_chld_stop(port)\ +do {\ + enum bna_tx_type tx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\ + BNA_TX_T_REGULAR : BNA_TX_T_LOOPBACK;\ + enum bna_rx_type rx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\ + BNA_RX_T_REGULAR : BNA_RX_T_LOOPBACK;\ + bfa_wc_up(&(port)->chld_stop_wc);\ + bfa_wc_up(&(port)->chld_stop_wc);\ + bfa_wc_up(&(port)->chld_stop_wc);\ + bna_llport_stop(&(port)->llport);\ + bna_tx_mod_stop(&(port)->bna->tx_mod, tx_type);\ + bna_rx_mod_stop(&(port)->bna->rx_mod, rx_type);\ +} while (0) + +#define bna_port_chld_fail(port)\ +do {\ + bna_llport_fail(&(port)->llport);\ + bna_tx_mod_fail(&(port)->bna->tx_mod);\ + bna_rx_mod_fail(&(port)->bna->rx_mod);\ +} while (0) + +#define bna_port_rx_start(port)\ +do {\ + enum bna_rx_type rx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\ + BNA_RX_T_REGULAR : BNA_RX_T_LOOPBACK;\ + bna_rx_mod_start(&(port)->bna->rx_mod, rx_type);\ +} while (0) + +#define bna_port_rx_stop(port)\ +do {\ + enum bna_rx_type rx_type = ((port)->type == BNA_PORT_T_REGULAR) ?\ + BNA_RX_T_REGULAR : BNA_RX_T_LOOPBACK;\ + bfa_wc_up(&(port)->chld_stop_wc);\ + bna_rx_mod_stop(&(port)->bna->rx_mod, rx_type);\ +} while (0) + +#define call_port_stop_cbfn(port, status)\ +do {\ + if ((port)->stop_cbfn)\ + (port)->stop_cbfn((port)->stop_cbarg, status);\ + (port)->stop_cbfn = NULL;\ + (port)->stop_cbarg = NULL;\ +} while (0) + +#define call_port_pause_cbfn(port, status)\ +do {\ + if ((port)->pause_cbfn)\ + (port)->pause_cbfn((port)->bna->bnad, status);\ + (port)->pause_cbfn = NULL;\ +} while (0) + +#define call_port_mtu_cbfn(port, status)\ +do {\ + if ((port)->mtu_cbfn)\ + (port)->mtu_cbfn((port)->bna->bnad, status);\ + (port)->mtu_cbfn = NULL;\ +} while (0) + +static void bna_fw_pause_set(struct bna_port *port); +static void bna_fw_cb_pause_set(void *arg, int status); +static void bna_fw_mtu_set(struct bna_port *port); +static void bna_fw_cb_mtu_set(void *arg, int status); + +enum bna_port_event { + PORT_E_START = 1, + PORT_E_STOP = 2, + PORT_E_FAIL = 3, + PORT_E_PAUSE_CFG = 4, + PORT_E_MTU_CFG = 5, + PORT_E_CHLD_STOPPED = 6, + PORT_E_FWRESP_PAUSE = 7, + PORT_E_FWRESP_MTU = 8 +}; + +enum bna_port_state { + BNA_PORT_STOPPED = 1, + BNA_PORT_MTU_INIT_WAIT = 2, + BNA_PORT_PAUSE_INIT_WAIT = 3, + BNA_PORT_LAST_RESP_WAIT = 4, + BNA_PORT_STARTED = 5, + BNA_PORT_PAUSE_CFG_WAIT = 6, + BNA_PORT_RX_STOP_WAIT = 7, + BNA_PORT_MTU_CFG_WAIT = 8, + BNA_PORT_CHLD_STOP_WAIT = 9 +}; + +bfa_fsm_state_decl(bna_port, stopped, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, mtu_init_wait, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, pause_init_wait, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, last_resp_wait, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, started, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, pause_cfg_wait, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, rx_stop_wait, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, mtu_cfg_wait, struct bna_port, + enum bna_port_event); +bfa_fsm_state_decl(bna_port, chld_stop_wait, struct bna_port, + enum bna_port_event); + +static struct bfa_sm_table port_sm_table[] = { + {BFA_SM(bna_port_sm_stopped), BNA_PORT_STOPPED}, + {BFA_SM(bna_port_sm_mtu_init_wait), BNA_PORT_MTU_INIT_WAIT}, + {BFA_SM(bna_port_sm_pause_init_wait), BNA_PORT_PAUSE_INIT_WAIT}, + {BFA_SM(bna_port_sm_last_resp_wait), BNA_PORT_LAST_RESP_WAIT}, + {BFA_SM(bna_port_sm_started), BNA_PORT_STARTED}, + {BFA_SM(bna_port_sm_pause_cfg_wait), BNA_PORT_PAUSE_CFG_WAIT}, + {BFA_SM(bna_port_sm_rx_stop_wait), BNA_PORT_RX_STOP_WAIT}, + {BFA_SM(bna_port_sm_mtu_cfg_wait), BNA_PORT_MTU_CFG_WAIT}, + {BFA_SM(bna_port_sm_chld_stop_wait), BNA_PORT_CHLD_STOP_WAIT} +}; + +static void +bna_port_sm_stopped_entry(struct bna_port *port) +{ + call_port_pause_cbfn(port, BNA_CB_SUCCESS); + call_port_mtu_cbfn(port, BNA_CB_SUCCESS); + call_port_stop_cbfn(port, BNA_CB_SUCCESS); +} + +static void +bna_port_sm_stopped(struct bna_port *port, enum bna_port_event event) +{ + switch (event) { + case PORT_E_START: + bfa_fsm_set_state(port, bna_port_sm_mtu_init_wait); + break; + + case PORT_E_STOP: + call_port_stop_cbfn(port, BNA_CB_SUCCESS); + break; + + case PORT_E_FAIL: + /* No-op */ + break; + + case PORT_E_PAUSE_CFG: + call_port_pause_cbfn(port, BNA_CB_SUCCESS); + break; + + case PORT_E_MTU_CFG: + call_port_mtu_cbfn(port, BNA_CB_SUCCESS); + break; + + case PORT_E_CHLD_STOPPED: + /** + * This event is received due to LLPort, Tx and Rx objects + * failing + */ + /* No-op */ + break; + + case PORT_E_FWRESP_PAUSE: + case PORT_E_FWRESP_MTU: + /** + * These events are received due to flushing of mbox when + * device fails + */ + /* No-op */ + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_mtu_init_wait_entry(struct bna_port *port) +{ + bna_fw_mtu_set(port); +} + +static void +bna_port_sm_mtu_init_wait(struct bna_port *port, enum bna_port_event event) +{ + switch (event) { + case PORT_E_STOP: + bfa_fsm_set_state(port, bna_port_sm_last_resp_wait); + break; + + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + break; + + case PORT_E_PAUSE_CFG: + /* No-op */ + break; + + case PORT_E_MTU_CFG: + port->flags |= BNA_PORT_F_MTU_CHANGED; + break; + + case PORT_E_FWRESP_MTU: + if (port->flags & BNA_PORT_F_MTU_CHANGED) { + port->flags &= ~BNA_PORT_F_MTU_CHANGED; + bna_fw_mtu_set(port); + } else { + bfa_fsm_set_state(port, bna_port_sm_pause_init_wait); + } + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_pause_init_wait_entry(struct bna_port *port) +{ + bna_fw_pause_set(port); +} + +static void +bna_port_sm_pause_init_wait(struct bna_port *port, + enum bna_port_event event) +{ + switch (event) { + case PORT_E_STOP: + bfa_fsm_set_state(port, bna_port_sm_last_resp_wait); + break; + + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + break; + + case PORT_E_PAUSE_CFG: + port->flags |= BNA_PORT_F_PAUSE_CHANGED; + break; + + case PORT_E_MTU_CFG: + port->flags |= BNA_PORT_F_MTU_CHANGED; + break; + + case PORT_E_FWRESP_PAUSE: + if (port->flags & BNA_PORT_F_PAUSE_CHANGED) { + port->flags &= ~BNA_PORT_F_PAUSE_CHANGED; + bna_fw_pause_set(port); + } else if (port->flags & BNA_PORT_F_MTU_CHANGED) { + port->flags &= ~BNA_PORT_F_MTU_CHANGED; + bfa_fsm_set_state(port, bna_port_sm_mtu_init_wait); + } else { + bfa_fsm_set_state(port, bna_port_sm_started); + bna_port_chld_start(port); + } + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_last_resp_wait_entry(struct bna_port *port) +{ +} + +static void +bna_port_sm_last_resp_wait(struct bna_port *port, + enum bna_port_event event) +{ + switch (event) { + case PORT_E_FAIL: + case PORT_E_FWRESP_PAUSE: + case PORT_E_FWRESP_MTU: + bfa_fsm_set_state(port, bna_port_sm_stopped); + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_started_entry(struct bna_port *port) +{ + /** + * NOTE: Do not call bna_port_chld_start() here, since it will be + * inadvertently called during pause_cfg_wait->started transition + * as well + */ + call_port_pause_cbfn(port, BNA_CB_SUCCESS); + call_port_mtu_cbfn(port, BNA_CB_SUCCESS); +} + +static void +bna_port_sm_started(struct bna_port *port, + enum bna_port_event event) +{ + switch (event) { + case PORT_E_STOP: + bfa_fsm_set_state(port, bna_port_sm_chld_stop_wait); + break; + + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + bna_port_chld_fail(port); + break; + + case PORT_E_PAUSE_CFG: + bfa_fsm_set_state(port, bna_port_sm_pause_cfg_wait); + break; + + case PORT_E_MTU_CFG: + bfa_fsm_set_state(port, bna_port_sm_rx_stop_wait); + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_pause_cfg_wait_entry(struct bna_port *port) +{ + bna_fw_pause_set(port); +} + +static void +bna_port_sm_pause_cfg_wait(struct bna_port *port, + enum bna_port_event event) +{ + switch (event) { + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + bna_port_chld_fail(port); + break; + + case PORT_E_FWRESP_PAUSE: + bfa_fsm_set_state(port, bna_port_sm_started); + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_rx_stop_wait_entry(struct bna_port *port) +{ + bna_port_rx_stop(port); +} + +static void +bna_port_sm_rx_stop_wait(struct bna_port *port, + enum bna_port_event event) +{ + switch (event) { + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + bna_port_chld_fail(port); + break; + + case PORT_E_CHLD_STOPPED: + bfa_fsm_set_state(port, bna_port_sm_mtu_cfg_wait); + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_mtu_cfg_wait_entry(struct bna_port *port) +{ + bna_fw_mtu_set(port); +} + +static void +bna_port_sm_mtu_cfg_wait(struct bna_port *port, enum bna_port_event event) +{ + switch (event) { + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + bna_port_chld_fail(port); + break; + + case PORT_E_FWRESP_MTU: + bfa_fsm_set_state(port, bna_port_sm_started); + bna_port_rx_start(port); + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_port_sm_chld_stop_wait_entry(struct bna_port *port) +{ + bna_port_chld_stop(port); +} + +static void +bna_port_sm_chld_stop_wait(struct bna_port *port, + enum bna_port_event event) +{ + switch (event) { + case PORT_E_FAIL: + bfa_fsm_set_state(port, bna_port_sm_stopped); + bna_port_chld_fail(port); + break; + + case PORT_E_CHLD_STOPPED: + bfa_fsm_set_state(port, bna_port_sm_stopped); + break; + + default: + bfa_sm_fault(port->bna, event); + } +} + +static void +bna_fw_pause_set(struct bna_port *port) +{ + struct bfi_ll_set_pause_req ll_req; + + memset(&ll_req, 0, sizeof(ll_req)); + ll_req.mh.msg_class = BFI_MC_LL; + ll_req.mh.msg_id = BFI_LL_H2I_SET_PAUSE_REQ; + ll_req.mh.mtag.h2i.lpu_id = 0; + + ll_req.tx_pause = port->pause_config.tx_pause; + ll_req.rx_pause = port->pause_config.rx_pause; + + bna_mbox_qe_fill(&port->mbox_qe, &ll_req, sizeof(ll_req), + bna_fw_cb_pause_set, port); + + bna_mbox_send(port->bna, &port->mbox_qe); +} + +static void +bna_fw_cb_pause_set(void *arg, int status) +{ + struct bna_port *port = (struct bna_port *)arg; + + bfa_q_qe_init(&port->mbox_qe.qe); + bfa_fsm_send_event(port, PORT_E_FWRESP_PAUSE); +} + +void +bna_fw_mtu_set(struct bna_port *port) +{ + struct bfi_ll_mtu_info_req ll_req; + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_MTU_INFO_REQ, 0); + ll_req.mtu = htons((u16)port->mtu); + + bna_mbox_qe_fill(&port->mbox_qe, &ll_req, sizeof(ll_req), + bna_fw_cb_mtu_set, port); + bna_mbox_send(port->bna, &port->mbox_qe); +} + +void +bna_fw_cb_mtu_set(void *arg, int status) +{ + struct bna_port *port = (struct bna_port *)arg; + + bfa_q_qe_init(&port->mbox_qe.qe); + bfa_fsm_send_event(port, PORT_E_FWRESP_MTU); +} + +static void +bna_port_cb_chld_stopped(void *arg) +{ + struct bna_port *port = (struct bna_port *)arg; + + bfa_fsm_send_event(port, PORT_E_CHLD_STOPPED); +} + +void +bna_port_init(struct bna_port *port, struct bna *bna) +{ + port->bna = bna; + port->flags = 0; + port->mtu = 0; + port->type = BNA_PORT_T_REGULAR; + + port->link_cbfn = bnad_cb_port_link_status; + + port->chld_stop_wc.wc_resume = bna_port_cb_chld_stopped; + port->chld_stop_wc.wc_cbarg = port; + port->chld_stop_wc.wc_count = 0; + + port->stop_cbfn = NULL; + port->stop_cbarg = NULL; + + port->pause_cbfn = NULL; + + port->mtu_cbfn = NULL; + + bfa_q_qe_init(&port->mbox_qe.qe); + + bfa_fsm_set_state(port, bna_port_sm_stopped); + + bna_llport_init(&port->llport, bna); +} + +void +bna_port_uninit(struct bna_port *port) +{ + bna_llport_uninit(&port->llport); + + port->flags = 0; + + port->bna = NULL; +} + +int +bna_port_state_get(struct bna_port *port) +{ + return bfa_sm_to_state(port_sm_table, port->fsm); +} + +void +bna_port_start(struct bna_port *port) +{ + port->flags |= BNA_PORT_F_DEVICE_READY; + if (port->flags & BNA_PORT_F_ENABLED) + bfa_fsm_send_event(port, PORT_E_START); +} + +void +bna_port_stop(struct bna_port *port) +{ + port->stop_cbfn = bna_device_cb_port_stopped; + port->stop_cbarg = &port->bna->device; + + port->flags &= ~BNA_PORT_F_DEVICE_READY; + bfa_fsm_send_event(port, PORT_E_STOP); +} + +void +bna_port_fail(struct bna_port *port) +{ + port->flags &= ~BNA_PORT_F_DEVICE_READY; + bfa_fsm_send_event(port, PORT_E_FAIL); +} + +void +bna_port_cb_tx_stopped(struct bna_port *port, enum bna_cb_status status) +{ + bfa_wc_down(&port->chld_stop_wc); +} + +void +bna_port_cb_rx_stopped(struct bna_port *port, enum bna_cb_status status) +{ + bfa_wc_down(&port->chld_stop_wc); +} + +void +bna_port_cb_link_up(struct bna_port *port, struct bfi_ll_aen *aen, + int status) +{ + int i; + u8 prio_map; + + port->llport.link_status = BNA_LINK_UP; + if (aen->cee_linkup) + port->llport.link_status = BNA_CEE_UP; + + /* Compute the priority */ + prio_map = aen->prio_map; + if (prio_map) { + for (i = 0; i < 8; i++) { + if ((prio_map >> i) & 0x1) + break; + } + port->priority = i; + } else + port->priority = 0; + + /* Dispatch events */ + bna_tx_mod_cee_link_status(&port->bna->tx_mod, aen->cee_linkup); + bna_tx_mod_prio_changed(&port->bna->tx_mod, port->priority); + port->link_cbfn(port->bna->bnad, port->llport.link_status); +} + +void +bna_port_cb_link_down(struct bna_port *port, int status) +{ + port->llport.link_status = BNA_LINK_DOWN; + + /* Dispatch events */ + bna_tx_mod_cee_link_status(&port->bna->tx_mod, BNA_LINK_DOWN); + port->link_cbfn(port->bna->bnad, BNA_LINK_DOWN); +} + +int +bna_port_mtu_get(struct bna_port *port) +{ + return port->mtu; +} + +void +bna_port_enable(struct bna_port *port) +{ + if (port->fsm != (bfa_sm_t)bna_port_sm_stopped) + return; + + port->flags |= BNA_PORT_F_ENABLED; + + if (port->flags & BNA_PORT_F_DEVICE_READY) + bfa_fsm_send_event(port, PORT_E_START); +} + +void +bna_port_disable(struct bna_port *port, enum bna_cleanup_type type, + void (*cbfn)(void *, enum bna_cb_status)) +{ + if (type == BNA_SOFT_CLEANUP) { + (*cbfn)(port->bna->bnad, BNA_CB_SUCCESS); + return; + } + + port->stop_cbfn = cbfn; + port->stop_cbarg = port->bna->bnad; + + port->flags &= ~BNA_PORT_F_ENABLED; + + bfa_fsm_send_event(port, PORT_E_STOP); +} + +void +bna_port_pause_config(struct bna_port *port, + struct bna_pause_config *pause_config, + void (*cbfn)(struct bnad *, enum bna_cb_status)) +{ + port->pause_config = *pause_config; + + port->pause_cbfn = cbfn; + + bfa_fsm_send_event(port, PORT_E_PAUSE_CFG); +} + +void +bna_port_mtu_set(struct bna_port *port, int mtu, + void (*cbfn)(struct bnad *, enum bna_cb_status)) +{ + port->mtu = mtu; + + port->mtu_cbfn = cbfn; + + bfa_fsm_send_event(port, PORT_E_MTU_CFG); +} + +void +bna_port_mac_get(struct bna_port *port, mac_t *mac) +{ + *mac = bfa_ioc_get_mac(&port->bna->device.ioc); +} + +/** + * Should be called only when port is disabled + */ +void +bna_port_type_set(struct bna_port *port, enum bna_port_type type) +{ + port->type = type; + port->llport.type = type; +} + +/** + * Should be called only when port is disabled + */ +void +bna_port_linkcbfn_set(struct bna_port *port, + void (*linkcbfn)(struct bnad *, enum bna_link_status)) +{ + port->link_cbfn = linkcbfn; +} + +void +bna_port_admin_up(struct bna_port *port) +{ + struct bna_llport *llport = &port->llport; + + if (llport->flags & BNA_LLPORT_F_ENABLED) + return; + + llport->flags |= BNA_LLPORT_F_ENABLED; + + if (llport->flags & BNA_LLPORT_F_RX_ENABLED) + bfa_fsm_send_event(llport, LLPORT_E_UP); +} + +void +bna_port_admin_down(struct bna_port *port) +{ + struct bna_llport *llport = &port->llport; + + if (!(llport->flags & BNA_LLPORT_F_ENABLED)) + return; + + llport->flags &= ~BNA_LLPORT_F_ENABLED; + + if (llport->flags & BNA_LLPORT_F_RX_ENABLED) + bfa_fsm_send_event(llport, LLPORT_E_DOWN); +} + +/** + * DEVICE + */ +#define enable_mbox_intr(_device)\ +do {\ + u32 intr_status;\ + bna_intr_status_get((_device)->bna, intr_status);\ + bnad_cb_device_enable_mbox_intr((_device)->bna->bnad);\ + bna_mbox_intr_enable((_device)->bna);\ +} while (0) + +#define disable_mbox_intr(_device)\ +do {\ + bna_mbox_intr_disable((_device)->bna);\ + bnad_cb_device_disable_mbox_intr((_device)->bna->bnad);\ +} while (0) + +const struct bna_chip_regs_offset reg_offset[] = +{{HOST_PAGE_NUM_FN0, HOSTFN0_INT_STATUS, + HOSTFN0_INT_MASK, HOST_MSIX_ERR_INDEX_FN0}, +{HOST_PAGE_NUM_FN1, HOSTFN1_INT_STATUS, + HOSTFN1_INT_MASK, HOST_MSIX_ERR_INDEX_FN1}, +{HOST_PAGE_NUM_FN2, HOSTFN2_INT_STATUS, + HOSTFN2_INT_MASK, HOST_MSIX_ERR_INDEX_FN2}, +{HOST_PAGE_NUM_FN3, HOSTFN3_INT_STATUS, + HOSTFN3_INT_MASK, HOST_MSIX_ERR_INDEX_FN3}, +}; + +enum bna_device_event { + DEVICE_E_ENABLE = 1, + DEVICE_E_DISABLE = 2, + DEVICE_E_IOC_READY = 3, + DEVICE_E_IOC_FAILED = 4, + DEVICE_E_IOC_DISABLED = 5, + DEVICE_E_IOC_RESET = 6, + DEVICE_E_PORT_STOPPED = 7, +}; + +enum bna_device_state { + BNA_DEVICE_STOPPED = 1, + BNA_DEVICE_IOC_READY_WAIT = 2, + BNA_DEVICE_READY = 3, + BNA_DEVICE_PORT_STOP_WAIT = 4, + BNA_DEVICE_IOC_DISABLE_WAIT = 5, + BNA_DEVICE_FAILED = 6 +}; + +bfa_fsm_state_decl(bna_device, stopped, struct bna_device, + enum bna_device_event); +bfa_fsm_state_decl(bna_device, ioc_ready_wait, struct bna_device, + enum bna_device_event); +bfa_fsm_state_decl(bna_device, ready, struct bna_device, + enum bna_device_event); +bfa_fsm_state_decl(bna_device, port_stop_wait, struct bna_device, + enum bna_device_event); +bfa_fsm_state_decl(bna_device, ioc_disable_wait, struct bna_device, + enum bna_device_event); +bfa_fsm_state_decl(bna_device, failed, struct bna_device, + enum bna_device_event); + +static struct bfa_sm_table device_sm_table[] = { + {BFA_SM(bna_device_sm_stopped), BNA_DEVICE_STOPPED}, + {BFA_SM(bna_device_sm_ioc_ready_wait), BNA_DEVICE_IOC_READY_WAIT}, + {BFA_SM(bna_device_sm_ready), BNA_DEVICE_READY}, + {BFA_SM(bna_device_sm_port_stop_wait), BNA_DEVICE_PORT_STOP_WAIT}, + {BFA_SM(bna_device_sm_ioc_disable_wait), BNA_DEVICE_IOC_DISABLE_WAIT}, + {BFA_SM(bna_device_sm_failed), BNA_DEVICE_FAILED}, +}; + +static void +bna_device_sm_stopped_entry(struct bna_device *device) +{ + if (device->stop_cbfn) + device->stop_cbfn(device->stop_cbarg, BNA_CB_SUCCESS); + + device->stop_cbfn = NULL; + device->stop_cbarg = NULL; +} + +static void +bna_device_sm_stopped(struct bna_device *device, + enum bna_device_event event) +{ + switch (event) { + case DEVICE_E_ENABLE: + if (device->intr_type == BNA_INTR_T_MSIX) + bna_mbox_msix_idx_set(device); + bfa_ioc_enable(&device->ioc); + bfa_fsm_set_state(device, bna_device_sm_ioc_ready_wait); + break; + + case DEVICE_E_DISABLE: + bfa_fsm_set_state(device, bna_device_sm_stopped); + break; + + case DEVICE_E_IOC_RESET: + enable_mbox_intr(device); + break; + + case DEVICE_E_IOC_FAILED: + bfa_fsm_set_state(device, bna_device_sm_failed); + break; + + default: + bfa_sm_fault(device->bna, event); + } +} + +static void +bna_device_sm_ioc_ready_wait_entry(struct bna_device *device) +{ + /** + * Do not call bfa_ioc_enable() here. It must be called in the + * previous state due to failed -> ioc_ready_wait transition. + */ +} + +static void +bna_device_sm_ioc_ready_wait(struct bna_device *device, + enum bna_device_event event) +{ + switch (event) { + case DEVICE_E_DISABLE: + if (device->ready_cbfn) + device->ready_cbfn(device->ready_cbarg, + BNA_CB_INTERRUPT); + device->ready_cbfn = NULL; + device->ready_cbarg = NULL; + bfa_fsm_set_state(device, bna_device_sm_ioc_disable_wait); + break; + + case DEVICE_E_IOC_READY: + bfa_fsm_set_state(device, bna_device_sm_ready); + break; + + case DEVICE_E_IOC_FAILED: + bfa_fsm_set_state(device, bna_device_sm_failed); + break; + + case DEVICE_E_IOC_RESET: + enable_mbox_intr(device); + break; + + default: + bfa_sm_fault(device->bna, event); + } +} + +static void +bna_device_sm_ready_entry(struct bna_device *device) +{ + bna_mbox_mod_start(&device->bna->mbox_mod); + bna_port_start(&device->bna->port); + + if (device->ready_cbfn) + device->ready_cbfn(device->ready_cbarg, + BNA_CB_SUCCESS); + device->ready_cbfn = NULL; + device->ready_cbarg = NULL; +} + +static void +bna_device_sm_ready(struct bna_device *device, enum bna_device_event event) +{ + switch (event) { + case DEVICE_E_DISABLE: + bfa_fsm_set_state(device, bna_device_sm_port_stop_wait); + break; + + case DEVICE_E_IOC_FAILED: + bfa_fsm_set_state(device, bna_device_sm_failed); + break; + + default: + bfa_sm_fault(device->bna, event); + } +} + +static void +bna_device_sm_port_stop_wait_entry(struct bna_device *device) +{ + bna_port_stop(&device->bna->port); +} + +static void +bna_device_sm_port_stop_wait(struct bna_device *device, + enum bna_device_event event) +{ + switch (event) { + case DEVICE_E_PORT_STOPPED: + bna_mbox_mod_stop(&device->bna->mbox_mod); + bfa_fsm_set_state(device, bna_device_sm_ioc_disable_wait); + break; + + case DEVICE_E_IOC_FAILED: + disable_mbox_intr(device); + bna_port_fail(&device->bna->port); + break; + + default: + bfa_sm_fault(device->bna, event); + } +} + +static void +bna_device_sm_ioc_disable_wait_entry(struct bna_device *device) +{ + bfa_ioc_disable(&device->ioc); +} + +static void +bna_device_sm_ioc_disable_wait(struct bna_device *device, + enum bna_device_event event) +{ + switch (event) { + case DEVICE_E_IOC_DISABLED: + disable_mbox_intr(device); + bfa_fsm_set_state(device, bna_device_sm_stopped); + break; + + default: + bfa_sm_fault(device->bna, event); + } +} + +static void +bna_device_sm_failed_entry(struct bna_device *device) +{ + disable_mbox_intr(device); + bna_port_fail(&device->bna->port); + bna_mbox_mod_stop(&device->bna->mbox_mod); + + if (device->ready_cbfn) + device->ready_cbfn(device->ready_cbarg, + BNA_CB_FAIL); + device->ready_cbfn = NULL; + device->ready_cbarg = NULL; +} + +static void +bna_device_sm_failed(struct bna_device *device, + enum bna_device_event event) +{ + switch (event) { + case DEVICE_E_DISABLE: + bfa_fsm_set_state(device, bna_device_sm_ioc_disable_wait); + break; + + case DEVICE_E_IOC_RESET: + enable_mbox_intr(device); + bfa_fsm_set_state(device, bna_device_sm_ioc_ready_wait); + break; + + default: + bfa_sm_fault(device->bna, event); + } +} + +/* IOC callback functions */ + +static void +bna_device_cb_iocll_ready(void *dev, enum bfa_status error) +{ + struct bna_device *device = (struct bna_device *)dev; + + if (error) + bfa_fsm_send_event(device, DEVICE_E_IOC_FAILED); + else + bfa_fsm_send_event(device, DEVICE_E_IOC_READY); +} + +static void +bna_device_cb_iocll_disabled(void *dev) +{ + struct bna_device *device = (struct bna_device *)dev; + + bfa_fsm_send_event(device, DEVICE_E_IOC_DISABLED); +} + +static void +bna_device_cb_iocll_failed(void *dev) +{ + struct bna_device *device = (struct bna_device *)dev; + + bfa_fsm_send_event(device, DEVICE_E_IOC_FAILED); +} + +static void +bna_device_cb_iocll_reset(void *dev) +{ + struct bna_device *device = (struct bna_device *)dev; + + bfa_fsm_send_event(device, DEVICE_E_IOC_RESET); +} + +static struct bfa_ioc_cbfn bfa_iocll_cbfn = { + bna_device_cb_iocll_ready, + bna_device_cb_iocll_disabled, + bna_device_cb_iocll_failed, + bna_device_cb_iocll_reset +}; + +void +bna_device_init(struct bna_device *device, struct bna *bna, + struct bna_res_info *res_info) +{ + u64 dma; + + device->bna = bna; + + /** + * Attach IOC and claim: + * 1. DMA memory for IOC attributes + * 2. Kernel memory for FW trace + */ + bfa_ioc_attach(&device->ioc, device, &bfa_iocll_cbfn); + bfa_ioc_pci_init(&device->ioc, &bna->pcidev, BFI_MC_LL); + + BNA_GET_DMA_ADDR( + &res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.mdl[0].dma, dma); + bfa_ioc_mem_claim(&device->ioc, + res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.mdl[0].kva, + dma); + + bna_adv_device_init(device, bna, res_info); + /* + * Initialize mbox_mod only after IOC, so that mbox handler + * registration goes through + */ + device->intr_type = + res_info[BNA_RES_INTR_T_MBOX].res_u.intr_info.intr_type; + device->vector = + res_info[BNA_RES_INTR_T_MBOX].res_u.intr_info.idl[0].vector; + bna_mbox_mod_init(&bna->mbox_mod, bna); + + device->ready_cbfn = device->stop_cbfn = NULL; + device->ready_cbarg = device->stop_cbarg = NULL; + + bfa_fsm_set_state(device, bna_device_sm_stopped); +} + +void +bna_device_uninit(struct bna_device *device) +{ + bna_mbox_mod_uninit(&device->bna->mbox_mod); + + bfa_cee_detach(&device->bna->cee); + + bfa_ioc_detach(&device->ioc); + + device->bna = NULL; +} + +void +bna_device_cb_port_stopped(void *arg, enum bna_cb_status status) +{ + struct bna_device *device = (struct bna_device *)arg; + + bfa_fsm_send_event(device, DEVICE_E_PORT_STOPPED); +} + +int +bna_device_status_get(struct bna_device *device) +{ + return (device->fsm == (bfa_fsm_t)bna_device_sm_ready); +} + +void +bna_device_enable(struct bna_device *device) +{ + if (device->fsm != (bfa_fsm_t)bna_device_sm_stopped) { + bnad_cb_device_enabled(device->bna->bnad, BNA_CB_BUSY); + return; + } + + device->ready_cbfn = bnad_cb_device_enabled; + device->ready_cbarg = device->bna->bnad; + + bfa_fsm_send_event(device, DEVICE_E_ENABLE); +} + +void +bna_device_disable(struct bna_device *device, enum bna_cleanup_type type) +{ + if (type == BNA_SOFT_CLEANUP) { + bnad_cb_device_disabled(device->bna->bnad, BNA_CB_SUCCESS); + return; + } + + device->stop_cbfn = bnad_cb_device_disabled; + device->stop_cbarg = device->bna->bnad; + + bfa_fsm_send_event(device, DEVICE_E_DISABLE); +} + +int +bna_device_state_get(struct bna_device *device) +{ + return bfa_sm_to_state(device_sm_table, device->fsm); +} + +u32 bna_dim_vector[BNA_LOAD_T_MAX][BNA_BIAS_T_MAX] = { + {12, 20}, + {10, 18}, + {8, 16}, + {6, 12}, + {4, 8}, + {3, 6}, + {2, 4}, + {1, 2}, +}; + +u32 bna_napi_dim_vector[BNA_LOAD_T_MAX][BNA_BIAS_T_MAX] = { + {12, 12}, + {6, 10}, + {5, 10}, + {4, 8}, + {3, 6}, + {3, 6}, + {2, 4}, + {1, 2}, +}; + +/* device */ +void +bna_adv_device_init(struct bna_device *device, struct bna *bna, + struct bna_res_info *res_info) +{ + u8 *kva; + u64 dma; + + device->bna = bna; + + kva = res_info[BNA_RES_MEM_T_FWTRC].res_u.mem_info.mdl[0].kva; + + /** + * Attach common modules (Diag, SFP, CEE, Port) and claim respective + * DMA memory. + */ + BNA_GET_DMA_ADDR( + &res_info[BNA_RES_MEM_T_COM].res_u.mem_info.mdl[0].dma, dma); + kva = res_info[BNA_RES_MEM_T_COM].res_u.mem_info.mdl[0].kva; + + bfa_cee_attach(&bna->cee, &device->ioc, bna); + bfa_cee_mem_claim(&bna->cee, kva, dma); + kva += bfa_cee_meminfo(); + dma += bfa_cee_meminfo(); + +} + +/* utils */ + +void +bna_adv_res_req(struct bna_res_info *res_info) +{ + /* DMA memory for COMMON_MODULE */ + res_info[BNA_RES_MEM_T_COM].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_COM].res_u.mem_info.mem_type = BNA_MEM_T_DMA; + res_info[BNA_RES_MEM_T_COM].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_COM].res_u.mem_info.len = ALIGN( + bfa_cee_meminfo(), PAGE_SIZE); + + /* Virtual memory for retreiving fw_trc */ + res_info[BNA_RES_MEM_T_FWTRC].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_FWTRC].res_u.mem_info.mem_type = BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_FWTRC].res_u.mem_info.num = 0; + res_info[BNA_RES_MEM_T_FWTRC].res_u.mem_info.len = 0; + + /* DMA memory for retreiving stats */ + res_info[BNA_RES_MEM_T_STATS].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.mem_type = BNA_MEM_T_DMA; + res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.len = + ALIGN(BFI_HW_STATS_SIZE, PAGE_SIZE); + + /* Virtual memory for soft stats */ + res_info[BNA_RES_MEM_T_SWSTATS].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_SWSTATS].res_u.mem_info.mem_type = BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_SWSTATS].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_SWSTATS].res_u.mem_info.len = + sizeof(struct bna_sw_stats); +} + +static void +bna_sw_stats_get(struct bna *bna, struct bna_sw_stats *sw_stats) +{ + struct bna_tx *tx; + struct bna_txq *txq; + struct bna_rx *rx; + struct bna_rxp *rxp; + struct list_head *qe; + struct list_head *txq_qe; + struct list_head *rxp_qe; + struct list_head *mac_qe; + int i; + + sw_stats->device_state = bna_device_state_get(&bna->device); + sw_stats->port_state = bna_port_state_get(&bna->port); + sw_stats->port_flags = bna->port.flags; + sw_stats->llport_state = bna_llport_state_get(&bna->port.llport); + sw_stats->priority = bna->port.priority; + + i = 0; + list_for_each(qe, &bna->tx_mod.tx_active_q) { + tx = (struct bna_tx *)qe; + sw_stats->tx_stats[i].tx_state = bna_tx_state_get(tx); + sw_stats->tx_stats[i].tx_flags = tx->flags; + + sw_stats->tx_stats[i].num_txqs = 0; + sw_stats->tx_stats[i].txq_bmap[0] = 0; + sw_stats->tx_stats[i].txq_bmap[1] = 0; + list_for_each(txq_qe, &tx->txq_q) { + txq = (struct bna_txq *)txq_qe; + if (txq->txq_id < 32) + sw_stats->tx_stats[i].txq_bmap[0] |= + ((u32)1 << txq->txq_id); + else + sw_stats->tx_stats[i].txq_bmap[1] |= + ((u32) + 1 << (txq->txq_id - 32)); + sw_stats->tx_stats[i].num_txqs++; + } + + sw_stats->tx_stats[i].txf_id = tx->txf.txf_id; + + i++; + } + sw_stats->num_active_tx = i; + + i = 0; + list_for_each(qe, &bna->rx_mod.rx_active_q) { + rx = (struct bna_rx *)qe; + sw_stats->rx_stats[i].rx_state = bna_rx_state_get(rx); + sw_stats->rx_stats[i].rx_flags = rx->rx_flags; + + sw_stats->rx_stats[i].num_rxps = 0; + sw_stats->rx_stats[i].num_rxqs = 0; + sw_stats->rx_stats[i].rxq_bmap[0] = 0; + sw_stats->rx_stats[i].rxq_bmap[1] = 0; + sw_stats->rx_stats[i].cq_bmap[0] = 0; + sw_stats->rx_stats[i].cq_bmap[1] = 0; + list_for_each(rxp_qe, &rx->rxp_q) { + rxp = (struct bna_rxp *)rxp_qe; + + sw_stats->rx_stats[i].num_rxqs += 1; + + if (rxp->type == BNA_RXP_SINGLE) { + if (rxp->rxq.single.only->rxq_id < 32) { + sw_stats->rx_stats[i].rxq_bmap[0] |= + ((u32)1 << + rxp->rxq.single.only->rxq_id); + } else { + sw_stats->rx_stats[i].rxq_bmap[1] |= + ((u32)1 << + (rxp->rxq.single.only->rxq_id - 32)); + } + } else { + if (rxp->rxq.slr.large->rxq_id < 32) { + sw_stats->rx_stats[i].rxq_bmap[0] |= + ((u32)1 << + rxp->rxq.slr.large->rxq_id); + } else { + sw_stats->rx_stats[i].rxq_bmap[1] |= + ((u32)1 << + (rxp->rxq.slr.large->rxq_id - 32)); + } + + if (rxp->rxq.slr.small->rxq_id < 32) { + sw_stats->rx_stats[i].rxq_bmap[0] |= + ((u32)1 << + rxp->rxq.slr.small->rxq_id); + } else { + sw_stats->rx_stats[i].rxq_bmap[1] |= + ((u32)1 << + (rxp->rxq.slr.small->rxq_id - 32)); + } + sw_stats->rx_stats[i].num_rxqs += 1; + } + + if (rxp->cq.cq_id < 32) + sw_stats->rx_stats[i].cq_bmap[0] |= + (1 << rxp->cq.cq_id); + else + sw_stats->rx_stats[i].cq_bmap[1] |= + (1 << (rxp->cq.cq_id - 32)); + + sw_stats->rx_stats[i].num_rxps++; + } + + sw_stats->rx_stats[i].rxf_id = rx->rxf.rxf_id; + sw_stats->rx_stats[i].rxf_state = bna_rxf_state_get(&rx->rxf); + sw_stats->rx_stats[i].rxf_oper_state = rx->rxf.rxf_oper_state; + + sw_stats->rx_stats[i].num_active_ucast = 0; + if (rx->rxf.ucast_active_mac) + sw_stats->rx_stats[i].num_active_ucast++; + list_for_each(mac_qe, &rx->rxf.ucast_active_q) + sw_stats->rx_stats[i].num_active_ucast++; + + sw_stats->rx_stats[i].num_active_mcast = 0; + list_for_each(mac_qe, &rx->rxf.mcast_active_q) + sw_stats->rx_stats[i].num_active_mcast++; + + sw_stats->rx_stats[i].rxmode_active = rx->rxf.rxmode_active; + sw_stats->rx_stats[i].vlan_filter_status = + rx->rxf.vlan_filter_status; + memcpy(sw_stats->rx_stats[i].vlan_filter_table, + rx->rxf.vlan_filter_table, + sizeof(u32) * ((BFI_MAX_VLAN + 1) / 32)); + + sw_stats->rx_stats[i].rss_status = rx->rxf.rss_status; + sw_stats->rx_stats[i].hds_status = rx->rxf.hds_status; + + i++; + } + sw_stats->num_active_rx = i; +} + +static void +bna_fw_cb_stats_get(void *arg, int status) +{ + struct bna *bna = (struct bna *)arg; + u64 *p_stats; + int i, count; + int rxf_count, txf_count; + u64 rxf_bmap, txf_bmap; + + bfa_q_qe_init(&bna->mbox_qe.qe); + + if (status == 0) { + p_stats = (u64 *)bna->stats.hw_stats; + count = sizeof(struct bfi_ll_stats) / sizeof(u64); + for (i = 0; i < count; i++) + p_stats[i] = cpu_to_be64(p_stats[i]); + + rxf_count = 0; + rxf_bmap = (u64)bna->stats.rxf_bmap[0] | + ((u64)bna->stats.rxf_bmap[1] << 32); + for (i = 0; i < BFI_LL_RXF_ID_MAX; i++) + if (rxf_bmap & ((u64)1 << i)) + rxf_count++; + + txf_count = 0; + txf_bmap = (u64)bna->stats.txf_bmap[0] | + ((u64)bna->stats.txf_bmap[1] << 32); + for (i = 0; i < BFI_LL_TXF_ID_MAX; i++) + if (txf_bmap & ((u64)1 << i)) + txf_count++; + + p_stats = (u64 *)&bna->stats.hw_stats->rxf_stats[0] + + ((rxf_count * sizeof(struct bfi_ll_stats_rxf) + + txf_count * sizeof(struct bfi_ll_stats_txf))/ + sizeof(u64)); + + /* Populate the TXF stats from the firmware DMAed copy */ + for (i = (BFI_LL_TXF_ID_MAX - 1); i >= 0; i--) + if (txf_bmap & ((u64)1 << i)) { + p_stats -= sizeof(struct bfi_ll_stats_txf)/ + sizeof(u64); + memcpy(&bna->stats.hw_stats->txf_stats[i], + p_stats, + sizeof(struct bfi_ll_stats_txf)); + } + + /* Populate the RXF stats from the firmware DMAed copy */ + for (i = (BFI_LL_RXF_ID_MAX - 1); i >= 0; i--) + if (rxf_bmap & ((u64)1 << i)) { + p_stats -= sizeof(struct bfi_ll_stats_rxf)/ + sizeof(u64); + memcpy(&bna->stats.hw_stats->rxf_stats[i], + p_stats, + sizeof(struct bfi_ll_stats_rxf)); + } + + bna_sw_stats_get(bna, bna->stats.sw_stats); + bnad_cb_stats_get(bna->bnad, BNA_CB_SUCCESS, &bna->stats); + } else + bnad_cb_stats_get(bna->bnad, BNA_CB_FAIL, &bna->stats); +} + +static void +bna_fw_stats_get(struct bna *bna) +{ + struct bfi_ll_stats_req ll_req; + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_STATS_GET_REQ, 0); + ll_req.stats_mask = htons(BFI_LL_STATS_ALL); + + ll_req.rxf_id_mask[0] = htonl(bna->rx_mod.rxf_bmap[0]); + ll_req.rxf_id_mask[1] = htonl(bna->rx_mod.rxf_bmap[1]); + ll_req.txf_id_mask[0] = htonl(bna->tx_mod.txf_bmap[0]); + ll_req.txf_id_mask[1] = htonl(bna->tx_mod.txf_bmap[1]); + + ll_req.host_buffer.a32.addr_hi = bna->hw_stats_dma.msb; + ll_req.host_buffer.a32.addr_lo = bna->hw_stats_dma.lsb; + + bna_mbox_qe_fill(&bna->mbox_qe, &ll_req, sizeof(ll_req), + bna_fw_cb_stats_get, bna); + bna_mbox_send(bna, &bna->mbox_qe); + + bna->stats.rxf_bmap[0] = bna->rx_mod.rxf_bmap[0]; + bna->stats.rxf_bmap[1] = bna->rx_mod.rxf_bmap[1]; + bna->stats.txf_bmap[0] = bna->tx_mod.txf_bmap[0]; + bna->stats.txf_bmap[1] = bna->tx_mod.txf_bmap[1]; +} + +static void +bna_fw_cb_stats_clr(void *arg, int status) +{ + struct bna *bna = (struct bna *)arg; + + bfa_q_qe_init(&bna->mbox_qe.qe); + + memset(bna->stats.sw_stats, 0, sizeof(struct bna_sw_stats)); + memset(bna->stats.hw_stats, 0, sizeof(struct bfi_ll_stats)); + + bnad_cb_stats_clr(bna->bnad); +} + +static void +bna_fw_stats_clr(struct bna *bna) +{ + struct bfi_ll_stats_req ll_req; + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_STATS_CLEAR_REQ, 0); + ll_req.stats_mask = htons(BFI_LL_STATS_ALL); + ll_req.rxf_id_mask[0] = htonl(0xffffffff); + ll_req.rxf_id_mask[1] = htonl(0xffffffff); + ll_req.txf_id_mask[0] = htonl(0xffffffff); + ll_req.txf_id_mask[1] = htonl(0xffffffff); + + bna_mbox_qe_fill(&bna->mbox_qe, &ll_req, sizeof(ll_req), + bna_fw_cb_stats_clr, bna); + bna_mbox_send(bna, &bna->mbox_qe); +} + +void +bna_stats_get(struct bna *bna) +{ + if (bna_device_status_get(&bna->device)) + bna_fw_stats_get(bna); + else + bnad_cb_stats_get(bna->bnad, BNA_CB_FAIL, &bna->stats); +} + +void +bna_stats_clr(struct bna *bna) +{ + if (bna_device_status_get(&bna->device)) + bna_fw_stats_clr(bna); + else { + memset(&bna->stats.sw_stats, 0, + sizeof(struct bna_sw_stats)); + memset(bna->stats.hw_stats, 0, + sizeof(struct bfi_ll_stats)); + bnad_cb_stats_clr(bna->bnad); + } +} + +/* IB */ +void +bna_ib_coalescing_timeo_set(struct bna_ib *ib, u8 coalescing_timeo) +{ + ib->ib_config.coalescing_timeo = coalescing_timeo; + + if (ib->start_count) + ib->door_bell.doorbell_ack = BNA_DOORBELL_IB_INT_ACK( + (u32)ib->ib_config.coalescing_timeo, 0); +} + +/* RxF */ +void +bna_rxf_adv_init(struct bna_rxf *rxf, + struct bna_rx *rx, + struct bna_rx_config *q_config) +{ + switch (q_config->rxp_type) { + case BNA_RXP_SINGLE: + /* No-op */ + break; + case BNA_RXP_SLR: + rxf->ctrl_flags |= BNA_RXF_CF_SM_LG_RXQ; + break; + case BNA_RXP_HDS: + rxf->hds_cfg.hdr_type = q_config->hds_config.hdr_type; + rxf->hds_cfg.header_size = + q_config->hds_config.header_size; + rxf->forced_offset = 0; + break; + default: + break; + } + + if (q_config->rss_status == BNA_STATUS_T_ENABLED) { + rxf->ctrl_flags |= BNA_RXF_CF_RSS_ENABLE; + rxf->rss_cfg.hash_type = q_config->rss_config.hash_type; + rxf->rss_cfg.hash_mask = q_config->rss_config.hash_mask; + memcpy(&rxf->rss_cfg.toeplitz_hash_key[0], + &q_config->rss_config.toeplitz_hash_key[0], + sizeof(rxf->rss_cfg.toeplitz_hash_key)); + } +} + +static void +rxf_fltr_mbox_cmd(struct bna_rxf *rxf, u8 cmd, enum bna_status status) +{ + struct bfi_ll_rxf_req req; + + bfi_h2i_set(req.mh, BFI_MC_LL, cmd, 0); + + req.rxf_id = rxf->rxf_id; + req.enable = status; + + bna_mbox_qe_fill(&rxf->mbox_qe, &req, sizeof(req), + rxf_cb_cam_fltr_mbox_cmd, rxf); + + bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe); +} + +void +__rxf_default_function_config(struct bna_rxf *rxf, enum bna_status status) +{ + struct bna_rx_fndb_ram *rx_fndb_ram; + u32 ctrl_flags; + int i; + + rx_fndb_ram = (struct bna_rx_fndb_ram *) + BNA_GET_MEM_BASE_ADDR(rxf->rx->bna->pcidev.pci_bar_kva, + RX_FNDB_RAM_BASE_OFFSET); + + for (i = 0; i < BFI_MAX_RXF; i++) { + if (status == BNA_STATUS_T_ENABLED) { + if (i == rxf->rxf_id) + continue; + + ctrl_flags = + readl(&rx_fndb_ram[i].control_flags); + ctrl_flags |= BNA_RXF_CF_DEFAULT_FUNCTION_ENABLE; + writel(ctrl_flags, + &rx_fndb_ram[i].control_flags); + } else { + ctrl_flags = + readl(&rx_fndb_ram[i].control_flags); + ctrl_flags &= ~BNA_RXF_CF_DEFAULT_FUNCTION_ENABLE; + writel(ctrl_flags, + &rx_fndb_ram[i].control_flags); + } + } +} + +int +rxf_process_packet_filter_ucast(struct bna_rxf *rxf) +{ + struct bna_mac *mac = NULL; + struct list_head *qe; + + /* Add additional MAC entries */ + if (!list_empty(&rxf->ucast_pending_add_q)) { + bfa_q_deq(&rxf->ucast_pending_add_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_ADD_REQ, mac); + list_add_tail(&mac->qe, &rxf->ucast_active_q); + return 1; + } + + /* Delete MAC addresses previousely added */ + if (!list_empty(&rxf->ucast_pending_del_q)) { + bfa_q_deq(&rxf->ucast_pending_del_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_DEL_REQ, mac); + bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac); + return 1; + } + + return 0; +} + +int +rxf_process_packet_filter_promisc(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + + /* Enable/disable promiscuous mode */ + if (is_promisc_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move promisc configuration from pending -> active */ + promisc_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active |= BNA_RXMODE_PROMISC; + + /* Disable VLAN filter to allow all VLANs */ + __rxf_vlan_filter_set(rxf, BNA_STATUS_T_DISABLED); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ, + BNA_STATUS_T_ENABLED); + return 1; + } else if (is_promisc_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move promisc configuration from pending -> active */ + promisc_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_PROMISC; + bna->rxf_promisc_id = BFI_MAX_RXF; + + /* Revert VLAN filter */ + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + return 0; +} + +int +rxf_process_packet_filter_default(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + + /* Enable/disable default mode */ + if (is_default_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move default configuration from pending -> active */ + default_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active |= BNA_RXMODE_DEFAULT; + + /* Disable VLAN filter to allow all VLANs */ + __rxf_vlan_filter_set(rxf, BNA_STATUS_T_DISABLED); + /* Redirect all other RxF vlan filtering to this one */ + __rxf_default_function_config(rxf, BNA_STATUS_T_ENABLED); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_DEFAULT_SET_REQ, + BNA_STATUS_T_ENABLED); + return 1; + } else if (is_default_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move default configuration from pending -> active */ + default_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT; + bna->rxf_default_id = BFI_MAX_RXF; + + /* Revert VLAN filter */ + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + /* Stop RxF vlan filter table redirection */ + __rxf_default_function_config(rxf, BNA_STATUS_T_DISABLED); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_DEFAULT_SET_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + return 0; +} + +int +rxf_process_packet_filter_allmulti(struct bna_rxf *rxf) +{ + /* Enable/disable allmulti mode */ + if (is_allmulti_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move allmulti configuration from pending -> active */ + allmulti_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active |= BNA_RXMODE_ALLMULTI; + + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_FILTER_REQ, + BNA_STATUS_T_ENABLED); + return 1; + } else if (is_allmulti_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move allmulti configuration from pending -> active */ + allmulti_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI; + + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_FILTER_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + return 0; +} + +int +rxf_clear_packet_filter_ucast(struct bna_rxf *rxf) +{ + struct bna_mac *mac = NULL; + struct list_head *qe; + + /* 1. delete pending ucast entries */ + if (!list_empty(&rxf->ucast_pending_del_q)) { + bfa_q_deq(&rxf->ucast_pending_del_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_DEL_REQ, mac); + bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac); + return 1; + } + + /* 2. clear active ucast entries; move them to pending_add_q */ + if (!list_empty(&rxf->ucast_active_q)) { + bfa_q_deq(&rxf->ucast_active_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_DEL_REQ, mac); + list_add_tail(&mac->qe, &rxf->ucast_pending_add_q); + return 1; + } + + return 0; +} + +int +rxf_clear_packet_filter_promisc(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + + /* 6. Execute pending promisc mode disable command */ + if (is_promisc_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move promisc configuration from pending -> active */ + promisc_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_PROMISC; + bna->rxf_promisc_id = BFI_MAX_RXF; + + /* Revert VLAN filter */ + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + /* 7. Clear active promisc mode; move it to pending enable */ + if (rxf->rxmode_active & BNA_RXMODE_PROMISC) { + /* move promisc configuration from active -> pending */ + promisc_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_PROMISC; + + /* Revert VLAN filter */ + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_PROMISCUOUS_SET_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + return 0; +} + +int +rxf_clear_packet_filter_default(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + + /* 8. Execute pending default mode disable command */ + if (is_default_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move default configuration from pending -> active */ + default_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT; + bna->rxf_default_id = BFI_MAX_RXF; + + /* Revert VLAN filter */ + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + /* Stop RxF vlan filter table redirection */ + __rxf_default_function_config(rxf, BNA_STATUS_T_DISABLED); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_DEFAULT_SET_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + /* 9. Clear active default mode; move it to pending enable */ + if (rxf->rxmode_active & BNA_RXMODE_DEFAULT) { + /* move default configuration from active -> pending */ + default_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT; + + /* Revert VLAN filter */ + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + /* Stop RxF vlan filter table redirection */ + __rxf_default_function_config(rxf, BNA_STATUS_T_DISABLED); + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_RXF_DEFAULT_SET_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + return 0; +} + +int +rxf_clear_packet_filter_allmulti(struct bna_rxf *rxf) +{ + /* 10. Execute pending allmulti mode disable command */ + if (is_allmulti_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* move allmulti configuration from pending -> active */ + allmulti_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI; + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_FILTER_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + /* 11. Clear active allmulti mode; move it to pending enable */ + if (rxf->rxmode_active & BNA_RXMODE_ALLMULTI) { + /* move allmulti configuration from active -> pending */ + allmulti_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI; + rxf_fltr_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_FILTER_REQ, + BNA_STATUS_T_DISABLED); + return 1; + } + + return 0; +} + +void +rxf_reset_packet_filter_ucast(struct bna_rxf *rxf) +{ + struct list_head *qe; + struct bna_mac *mac; + + /* 1. Move active ucast entries to pending_add_q */ + while (!list_empty(&rxf->ucast_active_q)) { + bfa_q_deq(&rxf->ucast_active_q, &qe); + bfa_q_qe_init(qe); + list_add_tail(qe, &rxf->ucast_pending_add_q); + } + + /* 2. Throw away delete pending ucast entries */ + while (!list_empty(&rxf->ucast_pending_del_q)) { + bfa_q_deq(&rxf->ucast_pending_del_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac); + } +} + +void +rxf_reset_packet_filter_promisc(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + + /* 6. Clear pending promisc mode disable */ + if (is_promisc_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + promisc_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_PROMISC; + bna->rxf_promisc_id = BFI_MAX_RXF; + } + + /* 7. Move promisc mode config from active -> pending */ + if (rxf->rxmode_active & BNA_RXMODE_PROMISC) { + promisc_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_PROMISC; + } + +} + +void +rxf_reset_packet_filter_default(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + + /* 8. Clear pending default mode disable */ + if (is_default_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + default_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT; + bna->rxf_default_id = BFI_MAX_RXF; + } + + /* 9. Move default mode config from active -> pending */ + if (rxf->rxmode_active & BNA_RXMODE_DEFAULT) { + default_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_DEFAULT; + } +} + +void +rxf_reset_packet_filter_allmulti(struct bna_rxf *rxf) +{ + /* 10. Clear pending allmulti mode disable */ + if (is_allmulti_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + allmulti_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI; + } + + /* 11. Move allmulti mode config from active -> pending */ + if (rxf->rxmode_active & BNA_RXMODE_ALLMULTI) { + allmulti_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + rxf->rxmode_active &= ~BNA_RXMODE_ALLMULTI; + } +} + +/** + * Should only be called by bna_rxf_mode_set. + * Helps deciding if h/w configuration is needed or not. + * Returns: + * 0 = no h/w change + * 1 = need h/w change + */ +int +rxf_promisc_enable(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + int ret = 0; + + /* There can not be any pending disable command */ + + /* Do nothing if pending enable or already enabled */ + if (is_promisc_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask) || + (rxf->rxmode_active & BNA_RXMODE_PROMISC)) { + /* Schedule enable */ + } else { + /* Promisc mode should not be active in the system */ + promisc_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + bna->rxf_promisc_id = rxf->rxf_id; + ret = 1; + } + + return ret; +} + +/** + * Should only be called by bna_rxf_mode_set. + * Helps deciding if h/w configuration is needed or not. + * Returns: + * 0 = no h/w change + * 1 = need h/w change + */ +int +rxf_promisc_disable(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + int ret = 0; + + /* There can not be any pending disable */ + + /* Turn off pending enable command , if any */ + if (is_promisc_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* Promisc mode should not be active */ + /* system promisc state should be pending */ + promisc_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + /* Remove the promisc state from the system */ + bna->rxf_promisc_id = BFI_MAX_RXF; + + /* Schedule disable */ + } else if (rxf->rxmode_active & BNA_RXMODE_PROMISC) { + /* Promisc mode should be active in the system */ + promisc_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + ret = 1; + + /* Do nothing if already disabled */ + } else { + } + + return ret; +} + +/** + * Should only be called by bna_rxf_mode_set. + * Helps deciding if h/w configuration is needed or not. + * Returns: + * 0 = no h/w change + * 1 = need h/w change + */ +int +rxf_default_enable(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + int ret = 0; + + /* There can not be any pending disable command */ + + /* Do nothing if pending enable or already enabled */ + if (is_default_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask) || + (rxf->rxmode_active & BNA_RXMODE_DEFAULT)) { + /* Schedule enable */ + } else { + /* Default mode should not be active in the system */ + default_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + bna->rxf_default_id = rxf->rxf_id; + ret = 1; + } + + return ret; +} + +/** + * Should only be called by bna_rxf_mode_set. + * Helps deciding if h/w configuration is needed or not. + * Returns: + * 0 = no h/w change + * 1 = need h/w change + */ +int +rxf_default_disable(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + int ret = 0; + + /* There can not be any pending disable */ + + /* Turn off pending enable command , if any */ + if (is_default_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* Promisc mode should not be active */ + /* system default state should be pending */ + default_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + /* Remove the default state from the system */ + bna->rxf_default_id = BFI_MAX_RXF; + + /* Schedule disable */ + } else if (rxf->rxmode_active & BNA_RXMODE_DEFAULT) { + /* Default mode should be active in the system */ + default_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + ret = 1; + + /* Do nothing if already disabled */ + } else { + } + + return ret; +} + +/** + * Should only be called by bna_rxf_mode_set. + * Helps deciding if h/w configuration is needed or not. + * Returns: + * 0 = no h/w change + * 1 = need h/w change + */ +int +rxf_allmulti_enable(struct bna_rxf *rxf) +{ + int ret = 0; + + /* There can not be any pending disable command */ + + /* Do nothing if pending enable or already enabled */ + if (is_allmulti_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask) || + (rxf->rxmode_active & BNA_RXMODE_ALLMULTI)) { + /* Schedule enable */ + } else { + allmulti_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + ret = 1; + } + + return ret; +} + +/** + * Should only be called by bna_rxf_mode_set. + * Helps deciding if h/w configuration is needed or not. + * Returns: + * 0 = no h/w change + * 1 = need h/w change + */ +int +rxf_allmulti_disable(struct bna_rxf *rxf) +{ + int ret = 0; + + /* There can not be any pending disable */ + + /* Turn off pending enable command , if any */ + if (is_allmulti_enable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask)) { + /* Allmulti mode should not be active */ + allmulti_inactive(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + + /* Schedule disable */ + } else if (rxf->rxmode_active & BNA_RXMODE_ALLMULTI) { + allmulti_disable(rxf->rxmode_pending, + rxf->rxmode_pending_bitmask); + ret = 1; + } + + return ret; +} + +/* RxF <- bnad */ +void +bna_rx_mcast_delall(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + struct list_head *qe; + struct bna_mac *mac; + int need_hw_config = 0; + + /* Purge all entries from pending_add_q */ + while (!list_empty(&rxf->mcast_pending_add_q)) { + bfa_q_deq(&rxf->mcast_pending_add_q, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + } + + /* Schedule all entries in active_q for deletion */ + while (!list_empty(&rxf->mcast_active_q)) { + bfa_q_deq(&rxf->mcast_active_q, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + list_add_tail(&mac->qe, &rxf->mcast_pending_del_q); + need_hw_config = 1; + } + + if (need_hw_config) { + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + return; + } + + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); +} + +/* RxF <- Rx */ +void +bna_rx_receive_resume(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + + if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_PAUSED) { + rxf->oper_state_cbfn = cbfn; + rxf->oper_state_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_RESUME); + } else if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); +} + +void +bna_rx_receive_pause(struct bna_rx *rx, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + + if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_RUNNING) { + rxf->oper_state_cbfn = cbfn; + rxf->oper_state_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_PAUSE); + } else if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); +} + +/* RxF <- bnad */ +enum bna_cb_status +bna_rx_ucast_add(struct bna_rx *rx, u8 *addr, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + struct list_head *qe; + struct bna_mac *mac; + + /* Check if already added */ + list_for_each(qe, &rxf->ucast_active_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + return BNA_CB_SUCCESS; + } + } + + /* Check if pending addition */ + list_for_each(qe, &rxf->ucast_pending_add_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + return BNA_CB_SUCCESS; + } + } + + mac = bna_ucam_mod_mac_get(&rxf->rx->bna->ucam_mod); + if (mac == NULL) + return BNA_CB_UCAST_CAM_FULL; + bfa_q_qe_init(&mac->qe); + memcpy(mac->addr, addr, ETH_ALEN); + list_add_tail(&mac->qe, &rxf->ucast_pending_add_q); + + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + + return BNA_CB_SUCCESS; +} + +/* RxF <- bnad */ +enum bna_cb_status +bna_rx_ucast_del(struct bna_rx *rx, u8 *addr, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + struct list_head *qe; + struct bna_mac *mac; + + list_for_each(qe, &rxf->ucast_pending_add_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + list_del(qe); + bfa_q_qe_init(qe); + bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac); + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + return BNA_CB_SUCCESS; + } + } + + list_for_each(qe, &rxf->ucast_active_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + list_del(qe); + bfa_q_qe_init(qe); + list_add_tail(qe, &rxf->ucast_pending_del_q); + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + return BNA_CB_SUCCESS; + } + } + + return BNA_CB_INVALID_MAC; +} + +/* RxF <- bnad */ +enum bna_cb_status +bna_rx_mode_set(struct bna_rx *rx, enum bna_rxmode new_mode, + enum bna_rxmode bitmask, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + int need_hw_config = 0; + + /* Error checks */ + + if (is_promisc_enable(new_mode, bitmask)) { + /* If promisc mode is already enabled elsewhere in the system */ + if ((rx->bna->rxf_promisc_id != BFI_MAX_RXF) && + (rx->bna->rxf_promisc_id != rxf->rxf_id)) + goto err_return; + + /* If default mode is already enabled in the system */ + if (rx->bna->rxf_default_id != BFI_MAX_RXF) + goto err_return; + + /* Trying to enable promiscuous and default mode together */ + if (is_default_enable(new_mode, bitmask)) + goto err_return; + } + + if (is_default_enable(new_mode, bitmask)) { + /* If default mode is already enabled elsewhere in the system */ + if ((rx->bna->rxf_default_id != BFI_MAX_RXF) && + (rx->bna->rxf_default_id != rxf->rxf_id)) { + goto err_return; + } + + /* If promiscuous mode is already enabled in the system */ + if (rx->bna->rxf_promisc_id != BFI_MAX_RXF) + goto err_return; + } + + /* Process the commands */ + + if (is_promisc_enable(new_mode, bitmask)) { + if (rxf_promisc_enable(rxf)) + need_hw_config = 1; + } else if (is_promisc_disable(new_mode, bitmask)) { + if (rxf_promisc_disable(rxf)) + need_hw_config = 1; + } + + if (is_default_enable(new_mode, bitmask)) { + if (rxf_default_enable(rxf)) + need_hw_config = 1; + } else if (is_default_disable(new_mode, bitmask)) { + if (rxf_default_disable(rxf)) + need_hw_config = 1; + } + + if (is_allmulti_enable(new_mode, bitmask)) { + if (rxf_allmulti_enable(rxf)) + need_hw_config = 1; + } else if (is_allmulti_disable(new_mode, bitmask)) { + if (rxf_allmulti_disable(rxf)) + need_hw_config = 1; + } + + /* Trigger h/w if needed */ + + if (need_hw_config) { + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + } else if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + + return BNA_CB_SUCCESS; + +err_return: + return BNA_CB_FAIL; +} + +/* RxF <- bnad */ +void +bna_rx_rss_enable(struct bna_rx *rx) +{ + struct bna_rxf *rxf = &rx->rxf; + + rxf->rxf_flags |= BNA_RXF_FL_RSS_CONFIG_PENDING; + rxf->rss_status = BNA_STATUS_T_ENABLED; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); +} + +/* RxF <- bnad */ +void +bna_rx_rss_disable(struct bna_rx *rx) +{ + struct bna_rxf *rxf = &rx->rxf; + + rxf->rxf_flags |= BNA_RXF_FL_RSS_CONFIG_PENDING; + rxf->rss_status = BNA_STATUS_T_DISABLED; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); +} + +/* RxF <- bnad */ +void +bna_rx_rss_reconfig(struct bna_rx *rx, struct bna_rxf_rss *rss_config) +{ + struct bna_rxf *rxf = &rx->rxf; + rxf->rxf_flags |= BNA_RXF_FL_RSS_CONFIG_PENDING; + rxf->rss_status = BNA_STATUS_T_ENABLED; + rxf->rss_cfg = *rss_config; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); +} + +void +/* RxF <- bnad */ +bna_rx_vlanfilter_enable(struct bna_rx *rx) +{ + struct bna_rxf *rxf = &rx->rxf; + + if (rxf->vlan_filter_status == BNA_STATUS_T_DISABLED) { + rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING; + rxf->vlan_filter_status = BNA_STATUS_T_ENABLED; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + } +} + +/* RxF <- bnad */ +void +bna_rx_vlanfilter_disable(struct bna_rx *rx) +{ + struct bna_rxf *rxf = &rx->rxf; + + if (rxf->vlan_filter_status == BNA_STATUS_T_ENABLED) { + rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING; + rxf->vlan_filter_status = BNA_STATUS_T_DISABLED; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + } +} + +/* Rx */ + +struct bna_rxp * +bna_rx_get_rxp(struct bna_rx *rx, int vector) +{ + struct bna_rxp *rxp; + struct list_head *qe; + + list_for_each(qe, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe; + if (rxp->vector == vector) + return rxp; + } + return NULL; +} + +/* + * bna_rx_rss_rit_set() + * Sets the Q ids for the specified msi-x vectors in the RIT. + * Maximum rit size supported is 64, which should be the max size of the + * vectors array. + */ + +void +bna_rx_rss_rit_set(struct bna_rx *rx, unsigned int *vectors, int nvectors) +{ + int i; + struct bna_rxp *rxp; + struct bna_rxq *q0 = NULL, *q1 = NULL; + struct bna *bna; + struct bna_rxf *rxf; + + /* Build the RIT contents for this RX */ + bna = rx->bna; + + rxf = &rx->rxf; + for (i = 0; i < nvectors; i++) { + rxp = bna_rx_get_rxp(rx, vectors[i]); + + GET_RXQS(rxp, q0, q1); + rxf->rit_segment->rit[i].large_rxq_id = q0->rxq_id; + rxf->rit_segment->rit[i].small_rxq_id = (q1 ? q1->rxq_id : 0); + } + + rxf->rit_segment->rit_size = nvectors; + + /* Subsequent call to enable/reconfig RSS will update the RIT in h/w */ +} + +/* Rx <- bnad */ +void +bna_rx_coalescing_timeo_set(struct bna_rx *rx, int coalescing_timeo) +{ + struct bna_rxp *rxp; + struct list_head *qe; + + list_for_each(qe, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe; + rxp->cq.ccb->rx_coalescing_timeo = coalescing_timeo; + bna_ib_coalescing_timeo_set(rxp->cq.ib, coalescing_timeo); + } +} + +/* Rx <- bnad */ +void +bna_rx_dim_reconfig(struct bna *bna, u32 vector[][BNA_BIAS_T_MAX]) +{ + int i, j; + + for (i = 0; i < BNA_LOAD_T_MAX; i++) + for (j = 0; j < BNA_BIAS_T_MAX; j++) + bna->rx_mod.dim_vector[i][j] = vector[i][j]; +} + +/* Rx <- bnad */ +void +bna_rx_dim_update(struct bna_ccb *ccb) +{ + struct bna *bna = ccb->cq->rx->bna; + u32 load, bias; + u32 pkt_rt, small_rt, large_rt; + u8 coalescing_timeo; + + if ((ccb->pkt_rate.small_pkt_cnt == 0) && + (ccb->pkt_rate.large_pkt_cnt == 0)) + return; + + /* Arrive at preconfigured coalescing timeo value based on pkt rate */ + + small_rt = ccb->pkt_rate.small_pkt_cnt; + large_rt = ccb->pkt_rate.large_pkt_cnt; + + pkt_rt = small_rt + large_rt; + + if (pkt_rt < BNA_PKT_RATE_10K) + load = BNA_LOAD_T_LOW_4; + else if (pkt_rt < BNA_PKT_RATE_20K) + load = BNA_LOAD_T_LOW_3; + else if (pkt_rt < BNA_PKT_RATE_30K) + load = BNA_LOAD_T_LOW_2; + else if (pkt_rt < BNA_PKT_RATE_40K) + load = BNA_LOAD_T_LOW_1; + else if (pkt_rt < BNA_PKT_RATE_50K) + load = BNA_LOAD_T_HIGH_1; + else if (pkt_rt < BNA_PKT_RATE_60K) + load = BNA_LOAD_T_HIGH_2; + else if (pkt_rt < BNA_PKT_RATE_80K) + load = BNA_LOAD_T_HIGH_3; + else + load = BNA_LOAD_T_HIGH_4; + + if (small_rt > (large_rt << 1)) + bias = 0; + else + bias = 1; + + ccb->pkt_rate.small_pkt_cnt = 0; + ccb->pkt_rate.large_pkt_cnt = 0; + + coalescing_timeo = bna->rx_mod.dim_vector[load][bias]; + ccb->rx_coalescing_timeo = coalescing_timeo; + + /* Set it to IB */ + bna_ib_coalescing_timeo_set(ccb->cq->ib, coalescing_timeo); +} + +/* Tx */ +/* TX <- bnad */ +enum bna_cb_status +bna_tx_prio_set(struct bna_tx *tx, int prio, + void (*cbfn)(struct bnad *, struct bna_tx *, + enum bna_cb_status)) +{ + if (tx->flags & BNA_TX_F_PRIO_LOCK) + return BNA_CB_FAIL; + else { + tx->prio_change_cbfn = cbfn; + bna_tx_prio_changed(tx, prio); + } + + return BNA_CB_SUCCESS; +} + +/* TX <- bnad */ +void +bna_tx_coalescing_timeo_set(struct bna_tx *tx, int coalescing_timeo) +{ + struct bna_txq *txq; + struct list_head *qe; + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bna_ib_coalescing_timeo_set(txq->ib, coalescing_timeo); + } +} + +/* + * Private data + */ + +struct bna_ritseg_pool_cfg { + u32 pool_size; + u32 pool_entry_size; +}; +init_ritseg_pool(ritseg_pool_cfg); + +/* + * Private functions + */ +static void +bna_ucam_mod_init(struct bna_ucam_mod *ucam_mod, struct bna *bna, + struct bna_res_info *res_info) +{ + int i; + + ucam_mod->ucmac = (struct bna_mac *) + res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_u.mem_info.mdl[0].kva; + + INIT_LIST_HEAD(&ucam_mod->free_q); + for (i = 0; i < BFI_MAX_UCMAC; i++) { + bfa_q_qe_init(&ucam_mod->ucmac[i].qe); + list_add_tail(&ucam_mod->ucmac[i].qe, &ucam_mod->free_q); + } + + ucam_mod->bna = bna; +} + +static void +bna_ucam_mod_uninit(struct bna_ucam_mod *ucam_mod) +{ + struct list_head *qe; + int i = 0; + + list_for_each(qe, &ucam_mod->free_q) + i++; + + ucam_mod->bna = NULL; +} + +static void +bna_mcam_mod_init(struct bna_mcam_mod *mcam_mod, struct bna *bna, + struct bna_res_info *res_info) +{ + int i; + + mcam_mod->mcmac = (struct bna_mac *) + res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_u.mem_info.mdl[0].kva; + + INIT_LIST_HEAD(&mcam_mod->free_q); + for (i = 0; i < BFI_MAX_MCMAC; i++) { + bfa_q_qe_init(&mcam_mod->mcmac[i].qe); + list_add_tail(&mcam_mod->mcmac[i].qe, &mcam_mod->free_q); + } + + mcam_mod->bna = bna; +} + +static void +bna_mcam_mod_uninit(struct bna_mcam_mod *mcam_mod) +{ + struct list_head *qe; + int i = 0; + + list_for_each(qe, &mcam_mod->free_q) + i++; + + mcam_mod->bna = NULL; +} + +static void +bna_rit_mod_init(struct bna_rit_mod *rit_mod, + struct bna_res_info *res_info) +{ + int i; + int j; + int count; + int offset; + + rit_mod->rit = (struct bna_rit_entry *) + res_info[BNA_RES_MEM_T_RIT_ENTRY].res_u.mem_info.mdl[0].kva; + rit_mod->rit_segment = (struct bna_rit_segment *) + res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_u.mem_info.mdl[0].kva; + + count = 0; + offset = 0; + for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) { + INIT_LIST_HEAD(&rit_mod->rit_seg_pool[i]); + for (j = 0; j < ritseg_pool_cfg[i].pool_size; j++) { + bfa_q_qe_init(&rit_mod->rit_segment[count].qe); + rit_mod->rit_segment[count].max_rit_size = + ritseg_pool_cfg[i].pool_entry_size; + rit_mod->rit_segment[count].rit_offset = offset; + rit_mod->rit_segment[count].rit = + &rit_mod->rit[offset]; + list_add_tail(&rit_mod->rit_segment[count].qe, + &rit_mod->rit_seg_pool[i]); + count++; + offset += ritseg_pool_cfg[i].pool_entry_size; + } + } +} + +static void +bna_rit_mod_uninit(struct bna_rit_mod *rit_mod) +{ + struct bna_rit_segment *rit_segment; + struct list_head *qe; + int i; + int j; + + for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) { + j = 0; + list_for_each(qe, &rit_mod->rit_seg_pool[i]) { + rit_segment = (struct bna_rit_segment *)qe; + j++; + } + } +} + +/* + * Public functions + */ + +/* Called during probe(), before calling bna_init() */ +void +bna_res_req(struct bna_res_info *res_info) +{ + bna_adv_res_req(res_info); + + /* DMA memory for retrieving IOC attributes */ + res_info[BNA_RES_MEM_T_ATTR].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.mem_type = BNA_MEM_T_DMA; + res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_ATTR].res_u.mem_info.len = + ALIGN(bfa_ioc_meminfo(), PAGE_SIZE); + + /* DMA memory for index segment of an IB */ + res_info[BNA_RES_MEM_T_IBIDX].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.mem_type = BNA_MEM_T_DMA; + res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.len = + BFI_IBIDX_SIZE * BFI_IBIDX_MAX_SEGSIZE; + res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.num = BFI_MAX_IB; + + /* Virtual memory for IB objects - stored by IB module */ + res_info[BNA_RES_MEM_T_IB_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_IB_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_IB_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_IB_ARRAY].res_u.mem_info.len = + BFI_MAX_IB * sizeof(struct bna_ib); + + /* Virtual memory for intr objects - stored by IB module */ + res_info[BNA_RES_MEM_T_INTR_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_INTR_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_INTR_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_INTR_ARRAY].res_u.mem_info.len = + BFI_MAX_IB * sizeof(struct bna_intr); + + /* Virtual memory for idx_seg objects - stored by IB module */ + res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_u.mem_info.len = + BFI_IBIDX_TOTAL_SEGS * sizeof(struct bna_ibidx_seg); + + /* Virtual memory for Tx objects - stored by Tx module */ + res_info[BNA_RES_MEM_T_TX_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_TX_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_TX_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_TX_ARRAY].res_u.mem_info.len = + BFI_MAX_TXQ * sizeof(struct bna_tx); + + /* Virtual memory for TxQ - stored by Tx module */ + res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_u.mem_info.len = + BFI_MAX_TXQ * sizeof(struct bna_txq); + + /* Virtual memory for Rx objects - stored by Rx module */ + res_info[BNA_RES_MEM_T_RX_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_RX_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_RX_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_RX_ARRAY].res_u.mem_info.len = + BFI_MAX_RXQ * sizeof(struct bna_rx); + + /* Virtual memory for RxPath - stored by Rx module */ + res_info[BNA_RES_MEM_T_RXP_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_RXP_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_RXP_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_RXP_ARRAY].res_u.mem_info.len = + BFI_MAX_RXQ * sizeof(struct bna_rxp); + + /* Virtual memory for RxQ - stored by Rx module */ + res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_u.mem_info.len = + BFI_MAX_RXQ * sizeof(struct bna_rxq); + + /* Virtual memory for Unicast MAC address - stored by ucam module */ + res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_UCMAC_ARRAY].res_u.mem_info.len = + BFI_MAX_UCMAC * sizeof(struct bna_mac); + + /* Virtual memory for Multicast MAC address - stored by mcam module */ + res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_MCMAC_ARRAY].res_u.mem_info.len = + BFI_MAX_MCMAC * sizeof(struct bna_mac); + + /* Virtual memory for RIT entries */ + res_info[BNA_RES_MEM_T_RIT_ENTRY].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_RIT_ENTRY].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_RIT_ENTRY].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_RIT_ENTRY].res_u.mem_info.len = + BFI_MAX_RIT_SIZE * sizeof(struct bna_rit_entry); + + /* Virtual memory for RIT segment table */ + res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_type = BNA_RES_T_MEM; + res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_u.mem_info.mem_type = + BNA_MEM_T_KVA; + res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_u.mem_info.num = 1; + res_info[BNA_RES_MEM_T_RIT_SEGMENT].res_u.mem_info.len = + BFI_RIT_TOTAL_SEGS * sizeof(struct bna_rit_segment); + + /* Interrupt resource for mailbox interrupt */ + res_info[BNA_RES_INTR_T_MBOX].res_type = BNA_RES_T_INTR; + res_info[BNA_RES_INTR_T_MBOX].res_u.intr_info.intr_type = + BNA_INTR_T_MSIX; + res_info[BNA_RES_INTR_T_MBOX].res_u.intr_info.num = 1; +} + +/* Called during probe() */ +void +bna_init(struct bna *bna, struct bnad *bnad, struct bfa_pcidev *pcidev, + struct bna_res_info *res_info) +{ + bna->bnad = bnad; + bna->pcidev = *pcidev; + + bna->stats.hw_stats = (struct bfi_ll_stats *) + res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.mdl[0].kva; + bna->hw_stats_dma.msb = + res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.mdl[0].dma.msb; + bna->hw_stats_dma.lsb = + res_info[BNA_RES_MEM_T_STATS].res_u.mem_info.mdl[0].dma.lsb; + bna->stats.sw_stats = (struct bna_sw_stats *) + res_info[BNA_RES_MEM_T_SWSTATS].res_u.mem_info.mdl[0].kva; + + bna->regs.page_addr = bna->pcidev.pci_bar_kva + + reg_offset[bna->pcidev.pci_func].page_addr; + bna->regs.fn_int_status = bna->pcidev.pci_bar_kva + + reg_offset[bna->pcidev.pci_func].fn_int_status; + bna->regs.fn_int_mask = bna->pcidev.pci_bar_kva + + reg_offset[bna->pcidev.pci_func].fn_int_mask; + + if (bna->pcidev.pci_func < 3) + bna->port_num = 0; + else + bna->port_num = 1; + + /* Also initializes diag, cee, sfp, phy_port and mbox_mod */ + bna_device_init(&bna->device, bna, res_info); + + bna_port_init(&bna->port, bna); + + bna_tx_mod_init(&bna->tx_mod, bna, res_info); + + bna_rx_mod_init(&bna->rx_mod, bna, res_info); + + bna_ib_mod_init(&bna->ib_mod, bna, res_info); + + bna_rit_mod_init(&bna->rit_mod, res_info); + + bna_ucam_mod_init(&bna->ucam_mod, bna, res_info); + + bna_mcam_mod_init(&bna->mcam_mod, bna, res_info); + + bna->rxf_default_id = BFI_MAX_RXF; + bna->rxf_promisc_id = BFI_MAX_RXF; + + /* Mbox q element for posting stat request to f/w */ + bfa_q_qe_init(&bna->mbox_qe.qe); +} + +void +bna_uninit(struct bna *bna) +{ + bna_mcam_mod_uninit(&bna->mcam_mod); + + bna_ucam_mod_uninit(&bna->ucam_mod); + + bna_rit_mod_uninit(&bna->rit_mod); + + bna_ib_mod_uninit(&bna->ib_mod); + + bna_rx_mod_uninit(&bna->rx_mod); + + bna_tx_mod_uninit(&bna->tx_mod); + + bna_port_uninit(&bna->port); + + bna_device_uninit(&bna->device); + + bna->bnad = NULL; +} + +struct bna_mac * +bna_ucam_mod_mac_get(struct bna_ucam_mod *ucam_mod) +{ + struct list_head *qe; + + if (list_empty(&ucam_mod->free_q)) + return NULL; + + bfa_q_deq(&ucam_mod->free_q, &qe); + + return (struct bna_mac *)qe; +} + +void +bna_ucam_mod_mac_put(struct bna_ucam_mod *ucam_mod, struct bna_mac *mac) +{ + list_add_tail(&mac->qe, &ucam_mod->free_q); +} + +struct bna_mac * +bna_mcam_mod_mac_get(struct bna_mcam_mod *mcam_mod) +{ + struct list_head *qe; + + if (list_empty(&mcam_mod->free_q)) + return NULL; + + bfa_q_deq(&mcam_mod->free_q, &qe); + + return (struct bna_mac *)qe; +} + +void +bna_mcam_mod_mac_put(struct bna_mcam_mod *mcam_mod, struct bna_mac *mac) +{ + list_add_tail(&mac->qe, &mcam_mod->free_q); +} + +/** + * Note: This should be called in the same locking context as the call to + * bna_rit_mod_seg_get() + */ +int +bna_rit_mod_can_satisfy(struct bna_rit_mod *rit_mod, int seg_size) +{ + int i; + + /* Select the pool for seg_size */ + for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) { + if (seg_size <= ritseg_pool_cfg[i].pool_entry_size) + break; + } + + if (i == BFI_RIT_SEG_TOTAL_POOLS) + return 0; + + if (list_empty(&rit_mod->rit_seg_pool[i])) + return 0; + + return 1; +} + +struct bna_rit_segment * +bna_rit_mod_seg_get(struct bna_rit_mod *rit_mod, int seg_size) +{ + struct bna_rit_segment *seg; + struct list_head *qe; + int i; + + /* Select the pool for seg_size */ + for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) { + if (seg_size <= ritseg_pool_cfg[i].pool_entry_size) + break; + } + + if (i == BFI_RIT_SEG_TOTAL_POOLS) + return NULL; + + if (list_empty(&rit_mod->rit_seg_pool[i])) + return NULL; + + bfa_q_deq(&rit_mod->rit_seg_pool[i], &qe); + seg = (struct bna_rit_segment *)qe; + bfa_q_qe_init(&seg->qe); + seg->rit_size = seg_size; + + return seg; +} + +void +bna_rit_mod_seg_put(struct bna_rit_mod *rit_mod, + struct bna_rit_segment *seg) +{ + int i; + + /* Select the pool for seg->max_rit_size */ + for (i = 0; i < BFI_RIT_SEG_TOTAL_POOLS; i++) { + if (seg->max_rit_size == ritseg_pool_cfg[i].pool_entry_size) + break; + } + + seg->rit_size = 0; + list_add_tail(&seg->qe, &rit_mod->rit_seg_pool[i]); +} diff --git a/drivers/net/bna/bna_hw.h b/drivers/net/bna/bna_hw.h new file mode 100644 index 000000000000..67eb376c5c7e --- /dev/null +++ b/drivers/net/bna/bna_hw.h @@ -0,0 +1,1491 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + * + * File for interrupt macros and functions + */ + +#ifndef __BNA_HW_H__ +#define __BNA_HW_H__ + +#include "bfi_ctreg.h" + +/** + * + * SW imposed limits + * + */ + +#ifndef BNA_BIOS_BUILD + +#define BFI_MAX_TXQ 64 +#define BFI_MAX_RXQ 64 +#define BFI_MAX_RXF 64 +#define BFI_MAX_IB 128 +#define BFI_MAX_RIT_SIZE 256 +#define BFI_RSS_RIT_SIZE 64 +#define BFI_NONRSS_RIT_SIZE 1 +#define BFI_MAX_UCMAC 256 +#define BFI_MAX_MCMAC 512 +#define BFI_IBIDX_SIZE 4 +#define BFI_MAX_VLAN 4095 + +/** + * There are 2 free IB index pools: + * pool1: 120 segments of 1 index each + * pool8: 1 segment of 8 indexes + */ +#define BFI_IBIDX_POOL1_SIZE 116 +#define BFI_IBIDX_POOL1_ENTRY_SIZE 1 +#define BFI_IBIDX_POOL2_SIZE 2 +#define BFI_IBIDX_POOL2_ENTRY_SIZE 2 +#define BFI_IBIDX_POOL8_SIZE 1 +#define BFI_IBIDX_POOL8_ENTRY_SIZE 8 +#define BFI_IBIDX_TOTAL_POOLS 3 +#define BFI_IBIDX_TOTAL_SEGS 119 /* (POOL1 + POOL2 + POOL8)_SIZE */ +#define BFI_IBIDX_MAX_SEGSIZE 8 +#define init_ibidx_pool(name) \ +static struct bna_ibidx_pool name[BFI_IBIDX_TOTAL_POOLS] = \ +{ \ + { BFI_IBIDX_POOL1_SIZE, BFI_IBIDX_POOL1_ENTRY_SIZE }, \ + { BFI_IBIDX_POOL2_SIZE, BFI_IBIDX_POOL2_ENTRY_SIZE }, \ + { BFI_IBIDX_POOL8_SIZE, BFI_IBIDX_POOL8_ENTRY_SIZE } \ +} + +/** + * There are 2 free RIT segment pools: + * Pool1: 192 segments of 1 RIT entry each + * Pool2: 1 segment of 64 RIT entry + */ +#define BFI_RIT_SEG_POOL1_SIZE 192 +#define BFI_RIT_SEG_POOL1_ENTRY_SIZE 1 +#define BFI_RIT_SEG_POOLRSS_SIZE 1 +#define BFI_RIT_SEG_POOLRSS_ENTRY_SIZE 64 +#define BFI_RIT_SEG_TOTAL_POOLS 2 +#define BFI_RIT_TOTAL_SEGS 193 /* POOL1_SIZE + POOLRSS_SIZE */ +#define init_ritseg_pool(name) \ +static struct bna_ritseg_pool_cfg name[BFI_RIT_SEG_TOTAL_POOLS] = \ +{ \ + { BFI_RIT_SEG_POOL1_SIZE, BFI_RIT_SEG_POOL1_ENTRY_SIZE }, \ + { BFI_RIT_SEG_POOLRSS_SIZE, BFI_RIT_SEG_POOLRSS_ENTRY_SIZE } \ +} + +#else /* BNA_BIOS_BUILD */ + +#define BFI_MAX_TXQ 1 +#define BFI_MAX_RXQ 1 +#define BFI_MAX_RXF 1 +#define BFI_MAX_IB 2 +#define BFI_MAX_RIT_SIZE 2 +#define BFI_RSS_RIT_SIZE 64 +#define BFI_NONRSS_RIT_SIZE 1 +#define BFI_MAX_UCMAC 1 +#define BFI_MAX_MCMAC 8 +#define BFI_IBIDX_SIZE 4 +#define BFI_MAX_VLAN 4095 +/* There is one free pool: 2 segments of 1 index each */ +#define BFI_IBIDX_POOL1_SIZE 2 +#define BFI_IBIDX_POOL1_ENTRY_SIZE 1 +#define BFI_IBIDX_TOTAL_POOLS 1 +#define BFI_IBIDX_TOTAL_SEGS 2 /* POOL1_SIZE */ +#define BFI_IBIDX_MAX_SEGSIZE 1 +#define init_ibidx_pool(name) \ +static struct bna_ibidx_pool name[BFI_IBIDX_TOTAL_POOLS] = \ +{ \ + { BFI_IBIDX_POOL1_SIZE, BFI_IBIDX_POOL1_ENTRY_SIZE } \ +} + +#define BFI_RIT_SEG_POOL1_SIZE 1 +#define BFI_RIT_SEG_POOL1_ENTRY_SIZE 1 +#define BFI_RIT_SEG_TOTAL_POOLS 1 +#define BFI_RIT_TOTAL_SEGS 1 /* POOL1_SIZE */ +#define init_ritseg_pool(name) \ +static struct bna_ritseg_pool_cfg name[BFI_RIT_SEG_TOTAL_POOLS] = \ +{ \ + { BFI_RIT_SEG_POOL1_SIZE, BFI_RIT_SEG_POOL1_ENTRY_SIZE } \ +} + +#endif /* BNA_BIOS_BUILD */ + +#define BFI_RSS_HASH_KEY_LEN 10 + +#define BFI_COALESCING_TIMER_UNIT 5 /* 5us */ +#define BFI_MAX_COALESCING_TIMEO 0xFF /* in 5us units */ +#define BFI_MAX_INTERPKT_COUNT 0xFF +#define BFI_MAX_INTERPKT_TIMEO 0xF /* in 0.5us units */ +#define BFI_TX_COALESCING_TIMEO 20 /* 20 * 5 = 100us */ +#define BFI_TX_INTERPKT_COUNT 32 +#define BFI_RX_COALESCING_TIMEO 12 /* 12 * 5 = 60us */ +#define BFI_RX_INTERPKT_COUNT 6 /* Pkt Cnt = 6 */ +#define BFI_RX_INTERPKT_TIMEO 3 /* 3 * 0.5 = 1.5us */ + +#define BFI_TXQ_WI_SIZE 64 /* bytes */ +#define BFI_RXQ_WI_SIZE 8 /* bytes */ +#define BFI_CQ_WI_SIZE 16 /* bytes */ +#define BFI_TX_MAX_WRR_QUOTA 0xFFF + +#define BFI_TX_MAX_VECTORS_PER_WI 4 +#define BFI_TX_MAX_VECTORS_PER_PKT 0xFF +#define BFI_TX_MAX_DATA_PER_VECTOR 0xFFFF +#define BFI_TX_MAX_DATA_PER_PKT 0xFFFFFF + +/* Small Q buffer size */ +#define BFI_SMALL_RXBUF_SIZE 128 + +/* Defined separately since BFA_FLASH_DMA_BUF_SZ is in bfa_flash.c */ +#define BFI_FLASH_DMA_BUF_SZ 0x010000 /* 64K DMA */ +#define BFI_HW_STATS_SIZE 0x4000 /* 16K DMA */ + +/** + * + * HW register offsets, macros + * + */ + +/* DMA Block Register Host Window Start Address */ +#define DMA_BLK_REG_ADDR 0x00013000 + +/* DMA Block Internal Registers */ +#define DMA_CTRL_REG0 (DMA_BLK_REG_ADDR + 0x000) +#define DMA_CTRL_REG1 (DMA_BLK_REG_ADDR + 0x004) +#define DMA_ERR_INT_STATUS (DMA_BLK_REG_ADDR + 0x008) +#define DMA_ERR_INT_ENABLE (DMA_BLK_REG_ADDR + 0x00c) +#define DMA_ERR_INT_STATUS_SET (DMA_BLK_REG_ADDR + 0x010) + +/* APP Block Register Address Offset from BAR0 */ +#define APP_BLK_REG_ADDR 0x00014000 + +/* Host Function Interrupt Mask Registers */ +#define HOSTFN0_INT_MASK (APP_BLK_REG_ADDR + 0x004) +#define HOSTFN1_INT_MASK (APP_BLK_REG_ADDR + 0x104) +#define HOSTFN2_INT_MASK (APP_BLK_REG_ADDR + 0x304) +#define HOSTFN3_INT_MASK (APP_BLK_REG_ADDR + 0x404) + +/** + * Host Function PCIe Error Registers + * Duplicates "Correctable" & "Uncorrectable" + * registers in PCIe Config space. + */ +#define FN0_PCIE_ERR_REG (APP_BLK_REG_ADDR + 0x014) +#define FN1_PCIE_ERR_REG (APP_BLK_REG_ADDR + 0x114) +#define FN2_PCIE_ERR_REG (APP_BLK_REG_ADDR + 0x314) +#define FN3_PCIE_ERR_REG (APP_BLK_REG_ADDR + 0x414) + +/* Host Function Error Type Status Registers */ +#define FN0_ERR_TYPE_STATUS_REG (APP_BLK_REG_ADDR + 0x018) +#define FN1_ERR_TYPE_STATUS_REG (APP_BLK_REG_ADDR + 0x118) +#define FN2_ERR_TYPE_STATUS_REG (APP_BLK_REG_ADDR + 0x318) +#define FN3_ERR_TYPE_STATUS_REG (APP_BLK_REG_ADDR + 0x418) + +/* Host Function Error Type Mask Registers */ +#define FN0_ERR_TYPE_MSK_STATUS_REG (APP_BLK_REG_ADDR + 0x01c) +#define FN1_ERR_TYPE_MSK_STATUS_REG (APP_BLK_REG_ADDR + 0x11c) +#define FN2_ERR_TYPE_MSK_STATUS_REG (APP_BLK_REG_ADDR + 0x31c) +#define FN3_ERR_TYPE_MSK_STATUS_REG (APP_BLK_REG_ADDR + 0x41c) + +/* Catapult Host Semaphore Status Registers (App block) */ +#define HOST_SEM_STS0_REG (APP_BLK_REG_ADDR + 0x630) +#define HOST_SEM_STS1_REG (APP_BLK_REG_ADDR + 0x634) +#define HOST_SEM_STS2_REG (APP_BLK_REG_ADDR + 0x638) +#define HOST_SEM_STS3_REG (APP_BLK_REG_ADDR + 0x63c) +#define HOST_SEM_STS4_REG (APP_BLK_REG_ADDR + 0x640) +#define HOST_SEM_STS5_REG (APP_BLK_REG_ADDR + 0x644) +#define HOST_SEM_STS6_REG (APP_BLK_REG_ADDR + 0x648) +#define HOST_SEM_STS7_REG (APP_BLK_REG_ADDR + 0x64c) + +/* PCIe Misc Register */ +#define PCIE_MISC_REG (APP_BLK_REG_ADDR + 0x200) + +/* Temp Sensor Control Registers */ +#define TEMPSENSE_CNTL_REG (APP_BLK_REG_ADDR + 0x250) +#define TEMPSENSE_STAT_REG (APP_BLK_REG_ADDR + 0x254) + +/* APP Block local error registers */ +#define APP_LOCAL_ERR_STAT (APP_BLK_REG_ADDR + 0x258) +#define APP_LOCAL_ERR_MSK (APP_BLK_REG_ADDR + 0x25c) + +/* PCIe Link Error registers */ +#define PCIE_LNK_ERR_STAT (APP_BLK_REG_ADDR + 0x260) +#define PCIE_LNK_ERR_MSK (APP_BLK_REG_ADDR + 0x264) + +/** + * FCoE/FIP Ethertype Register + * 31:16 -- Chip wide value for FIP type + * 15:0 -- Chip wide value for FCoE type + */ +#define FCOE_FIP_ETH_TYPE (APP_BLK_REG_ADDR + 0x280) + +/** + * Reserved Ethertype Register + * 31:16 -- Reserved + * 15:0 -- Other ethertype + */ +#define RESV_ETH_TYPE (APP_BLK_REG_ADDR + 0x284) + +/** + * Host Command Status Registers + * Each set consists of 3 registers : + * clear, set, cmd + * 16 such register sets in all + * See catapult_spec.pdf for detailed functionality + * Put each type in a single macro accessed by _num ? + */ +#define HOST_CMDSTS0_CLR_REG (APP_BLK_REG_ADDR + 0x500) +#define HOST_CMDSTS0_SET_REG (APP_BLK_REG_ADDR + 0x504) +#define HOST_CMDSTS0_REG (APP_BLK_REG_ADDR + 0x508) +#define HOST_CMDSTS1_CLR_REG (APP_BLK_REG_ADDR + 0x510) +#define HOST_CMDSTS1_SET_REG (APP_BLK_REG_ADDR + 0x514) +#define HOST_CMDSTS1_REG (APP_BLK_REG_ADDR + 0x518) +#define HOST_CMDSTS2_CLR_REG (APP_BLK_REG_ADDR + 0x520) +#define HOST_CMDSTS2_SET_REG (APP_BLK_REG_ADDR + 0x524) +#define HOST_CMDSTS2_REG (APP_BLK_REG_ADDR + 0x528) +#define HOST_CMDSTS3_CLR_REG (APP_BLK_REG_ADDR + 0x530) +#define HOST_CMDSTS3_SET_REG (APP_BLK_REG_ADDR + 0x534) +#define HOST_CMDSTS3_REG (APP_BLK_REG_ADDR + 0x538) +#define HOST_CMDSTS4_CLR_REG (APP_BLK_REG_ADDR + 0x540) +#define HOST_CMDSTS4_SET_REG (APP_BLK_REG_ADDR + 0x544) +#define HOST_CMDSTS4_REG (APP_BLK_REG_ADDR + 0x548) +#define HOST_CMDSTS5_CLR_REG (APP_BLK_REG_ADDR + 0x550) +#define HOST_CMDSTS5_SET_REG (APP_BLK_REG_ADDR + 0x554) +#define HOST_CMDSTS5_REG (APP_BLK_REG_ADDR + 0x558) +#define HOST_CMDSTS6_CLR_REG (APP_BLK_REG_ADDR + 0x560) +#define HOST_CMDSTS6_SET_REG (APP_BLK_REG_ADDR + 0x564) +#define HOST_CMDSTS6_REG (APP_BLK_REG_ADDR + 0x568) +#define HOST_CMDSTS7_CLR_REG (APP_BLK_REG_ADDR + 0x570) +#define HOST_CMDSTS7_SET_REG (APP_BLK_REG_ADDR + 0x574) +#define HOST_CMDSTS7_REG (APP_BLK_REG_ADDR + 0x578) +#define HOST_CMDSTS8_CLR_REG (APP_BLK_REG_ADDR + 0x580) +#define HOST_CMDSTS8_SET_REG (APP_BLK_REG_ADDR + 0x584) +#define HOST_CMDSTS8_REG (APP_BLK_REG_ADDR + 0x588) +#define HOST_CMDSTS9_CLR_REG (APP_BLK_REG_ADDR + 0x590) +#define HOST_CMDSTS9_SET_REG (APP_BLK_REG_ADDR + 0x594) +#define HOST_CMDSTS9_REG (APP_BLK_REG_ADDR + 0x598) +#define HOST_CMDSTS10_CLR_REG (APP_BLK_REG_ADDR + 0x5A0) +#define HOST_CMDSTS10_SET_REG (APP_BLK_REG_ADDR + 0x5A4) +#define HOST_CMDSTS10_REG (APP_BLK_REG_ADDR + 0x5A8) +#define HOST_CMDSTS11_CLR_REG (APP_BLK_REG_ADDR + 0x5B0) +#define HOST_CMDSTS11_SET_REG (APP_BLK_REG_ADDR + 0x5B4) +#define HOST_CMDSTS11_REG (APP_BLK_REG_ADDR + 0x5B8) +#define HOST_CMDSTS12_CLR_REG (APP_BLK_REG_ADDR + 0x5C0) +#define HOST_CMDSTS12_SET_REG (APP_BLK_REG_ADDR + 0x5C4) +#define HOST_CMDSTS12_REG (APP_BLK_REG_ADDR + 0x5C8) +#define HOST_CMDSTS13_CLR_REG (APP_BLK_REG_ADDR + 0x5D0) +#define HOST_CMDSTS13_SET_REG (APP_BLK_REG_ADDR + 0x5D4) +#define HOST_CMDSTS13_REG (APP_BLK_REG_ADDR + 0x5D8) +#define HOST_CMDSTS14_CLR_REG (APP_BLK_REG_ADDR + 0x5E0) +#define HOST_CMDSTS14_SET_REG (APP_BLK_REG_ADDR + 0x5E4) +#define HOST_CMDSTS14_REG (APP_BLK_REG_ADDR + 0x5E8) +#define HOST_CMDSTS15_CLR_REG (APP_BLK_REG_ADDR + 0x5F0) +#define HOST_CMDSTS15_SET_REG (APP_BLK_REG_ADDR + 0x5F4) +#define HOST_CMDSTS15_REG (APP_BLK_REG_ADDR + 0x5F8) + +/** + * LPU0 Block Register Address Offset from BAR0 + * Range 0x18000 - 0x18033 + */ +#define LPU0_BLK_REG_ADDR 0x00018000 + +/** + * LPU0 Registers + * Should they be directly used from host, + * except for diagnostics ? + * CTL_REG : Control register + * CMD_REG : Triggers exec. of cmd. in + * Mailbox memory + */ +#define LPU0_MBOX_CTL_REG (LPU0_BLK_REG_ADDR + 0x000) +#define LPU0_MBOX_CMD_REG (LPU0_BLK_REG_ADDR + 0x004) +#define LPU0_MBOX_LINK_0REG (LPU0_BLK_REG_ADDR + 0x008) +#define LPU1_MBOX_LINK_0REG (LPU0_BLK_REG_ADDR + 0x00c) +#define LPU0_MBOX_STATUS_0REG (LPU0_BLK_REG_ADDR + 0x010) +#define LPU1_MBOX_STATUS_0REG (LPU0_BLK_REG_ADDR + 0x014) +#define LPU0_ERR_STATUS_REG (LPU0_BLK_REG_ADDR + 0x018) +#define LPU0_ERR_SET_REG (LPU0_BLK_REG_ADDR + 0x020) + +/** + * LPU1 Block Register Address Offset from BAR0 + * Range 0x18400 - 0x18433 + */ +#define LPU1_BLK_REG_ADDR 0x00018400 + +/** + * LPU1 Registers + * Same as LPU0 registers above + */ +#define LPU1_MBOX_CTL_REG (LPU1_BLK_REG_ADDR + 0x000) +#define LPU1_MBOX_CMD_REG (LPU1_BLK_REG_ADDR + 0x004) +#define LPU0_MBOX_LINK_1REG (LPU1_BLK_REG_ADDR + 0x008) +#define LPU1_MBOX_LINK_1REG (LPU1_BLK_REG_ADDR + 0x00c) +#define LPU0_MBOX_STATUS_1REG (LPU1_BLK_REG_ADDR + 0x010) +#define LPU1_MBOX_STATUS_1REG (LPU1_BLK_REG_ADDR + 0x014) +#define LPU1_ERR_STATUS_REG (LPU1_BLK_REG_ADDR + 0x018) +#define LPU1_ERR_SET_REG (LPU1_BLK_REG_ADDR + 0x020) + +/** + * PSS Block Register Address Offset from BAR0 + * Range 0x18800 - 0x188DB + */ +#define PSS_BLK_REG_ADDR 0x00018800 + +/** + * PSS Registers + * For details, see catapult_spec.pdf + * ERR_STATUS_REG : Indicates error in PSS module + * RAM_ERR_STATUS_REG : Indicates RAM module that detected error + */ +#define ERR_STATUS_SET (PSS_BLK_REG_ADDR + 0x018) +#define PSS_RAM_ERR_STATUS_REG (PSS_BLK_REG_ADDR + 0x01C) + +/** + * PSS Semaphore Lock Registers, total 16 + * First read when unlocked returns 0, + * and is set to 1, atomically. + * Subsequent reads returns 1. + * To clear set the value to 0. + * Range : 0x20 to 0x5c + */ +#define PSS_SEM_LOCK_REG(_num) \ + (PSS_BLK_REG_ADDR + 0x020 + ((_num) << 2)) + +/** + * PSS Semaphore Status Registers, + * corresponding to the lock registers above + */ +#define PSS_SEM_STATUS_REG(_num) \ + (PSS_BLK_REG_ADDR + 0x060 + ((_num) << 2)) + +/** + * Catapult CPQ Registers + * Defines for Mailbox Registers + * Used to send mailbox commands to firmware from + * host. The data part is written to the MBox + * memory, registers are used to indicate that + * a commnad is resident in memory. + * + * Note : LPU0<->LPU1 mailboxes are not listed here + */ +#define CPQ_BLK_REG_ADDR 0x00019000 + +#define HOSTFN0_LPU0_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x130) +#define HOSTFN0_LPU1_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x134) +#define LPU0_HOSTFN0_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x138) +#define LPU1_HOSTFN0_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x13C) + +#define HOSTFN1_LPU0_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x140) +#define HOSTFN1_LPU1_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x144) +#define LPU0_HOSTFN1_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x148) +#define LPU1_HOSTFN1_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x14C) + +#define HOSTFN2_LPU0_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x170) +#define HOSTFN2_LPU1_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x174) +#define LPU0_HOSTFN2_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x178) +#define LPU1_HOSTFN2_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x17C) + +#define HOSTFN3_LPU0_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x180) +#define HOSTFN3_LPU1_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x184) +#define LPU0_HOSTFN3_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x188) +#define LPU1_HOSTFN3_MBOX1_CMD_STAT (CPQ_BLK_REG_ADDR + 0x18C) + +/* Host Function Force Parity Error Registers */ +#define HOSTFN0_LPU_FORCE_PERR (CPQ_BLK_REG_ADDR + 0x120) +#define HOSTFN1_LPU_FORCE_PERR (CPQ_BLK_REG_ADDR + 0x124) +#define HOSTFN2_LPU_FORCE_PERR (CPQ_BLK_REG_ADDR + 0x128) +#define HOSTFN3_LPU_FORCE_PERR (CPQ_BLK_REG_ADDR + 0x12C) + +/* LL Port[0|1] Halt Mask Registers */ +#define LL_HALT_MSK_P0 (CPQ_BLK_REG_ADDR + 0x1A0) +#define LL_HALT_MSK_P1 (CPQ_BLK_REG_ADDR + 0x1B0) + +/* LL Port[0|1] Error Mask Registers */ +#define LL_ERR_MSK_P0 (CPQ_BLK_REG_ADDR + 0x1D0) +#define LL_ERR_MSK_P1 (CPQ_BLK_REG_ADDR + 0x1D4) + +/* EMC FLI (Flash Controller) Block Register Address Offset from BAR0 */ +#define FLI_BLK_REG_ADDR 0x0001D000 + +/* EMC FLI Registers */ +#define FLI_CMD_REG (FLI_BLK_REG_ADDR + 0x000) +#define FLI_ADDR_REG (FLI_BLK_REG_ADDR + 0x004) +#define FLI_CTL_REG (FLI_BLK_REG_ADDR + 0x008) +#define FLI_WRDATA_REG (FLI_BLK_REG_ADDR + 0x00C) +#define FLI_RDDATA_REG (FLI_BLK_REG_ADDR + 0x010) +#define FLI_DEV_STATUS_REG (FLI_BLK_REG_ADDR + 0x014) +#define FLI_SIG_WD_REG (FLI_BLK_REG_ADDR + 0x018) + +/** + * RO register + * 31:16 -- Vendor Id + * 15:0 -- Device Id + */ +#define FLI_DEV_VENDOR_REG (FLI_BLK_REG_ADDR + 0x01C) +#define FLI_ERR_STATUS_REG (FLI_BLK_REG_ADDR + 0x020) + +/** + * RAD (RxAdm) Block Register Address Offset from BAR0 + * RAD0 Range : 0x20000 - 0x203FF + * RAD1 Range : 0x20400 - 0x207FF + */ +#define RAD0_BLK_REG_ADDR 0x00020000 +#define RAD1_BLK_REG_ADDR 0x00020400 + +/* RAD0 Registers */ +#define RAD0_CTL_REG (RAD0_BLK_REG_ADDR + 0x000) +#define RAD0_PE_PARM_REG (RAD0_BLK_REG_ADDR + 0x004) +#define RAD0_BCN_REG (RAD0_BLK_REG_ADDR + 0x008) + +/* Default function ID register */ +#define RAD0_DEFAULT_REG (RAD0_BLK_REG_ADDR + 0x00C) + +/* Default promiscuous ID register */ +#define RAD0_PROMISC_REG (RAD0_BLK_REG_ADDR + 0x010) + +#define RAD0_BCNQ_REG (RAD0_BLK_REG_ADDR + 0x014) + +/* + * This register selects 1 of 8 PM Q's using + * VLAN pri, for non-BCN packets without a VLAN tag + */ +#define RAD0_DEFAULTQ_REG (RAD0_BLK_REG_ADDR + 0x018) + +#define RAD0_ERR_STS (RAD0_BLK_REG_ADDR + 0x01C) +#define RAD0_SET_ERR_STS (RAD0_BLK_REG_ADDR + 0x020) +#define RAD0_ERR_INT_EN (RAD0_BLK_REG_ADDR + 0x024) +#define RAD0_FIRST_ERR (RAD0_BLK_REG_ADDR + 0x028) +#define RAD0_FORCE_ERR (RAD0_BLK_REG_ADDR + 0x02C) + +#define RAD0_IF_RCVD (RAD0_BLK_REG_ADDR + 0x030) +#define RAD0_IF_RCVD_OCTETS_HIGH (RAD0_BLK_REG_ADDR + 0x034) +#define RAD0_IF_RCVD_OCTETS_LOW (RAD0_BLK_REG_ADDR + 0x038) +#define RAD0_IF_RCVD_VLAN (RAD0_BLK_REG_ADDR + 0x03C) +#define RAD0_IF_RCVD_UCAST (RAD0_BLK_REG_ADDR + 0x040) +#define RAD0_IF_RCVD_UCAST_OCTETS_HIGH (RAD0_BLK_REG_ADDR + 0x044) +#define RAD0_IF_RCVD_UCAST_OCTETS_LOW (RAD0_BLK_REG_ADDR + 0x048) +#define RAD0_IF_RCVD_UCAST_VLAN (RAD0_BLK_REG_ADDR + 0x04C) +#define RAD0_IF_RCVD_MCAST (RAD0_BLK_REG_ADDR + 0x050) +#define RAD0_IF_RCVD_MCAST_OCTETS_HIGH (RAD0_BLK_REG_ADDR + 0x054) +#define RAD0_IF_RCVD_MCAST_OCTETS_LOW (RAD0_BLK_REG_ADDR + 0x058) +#define RAD0_IF_RCVD_MCAST_VLAN (RAD0_BLK_REG_ADDR + 0x05C) +#define RAD0_IF_RCVD_BCAST (RAD0_BLK_REG_ADDR + 0x060) +#define RAD0_IF_RCVD_BCAST_OCTETS_HIGH (RAD0_BLK_REG_ADDR + 0x064) +#define RAD0_IF_RCVD_BCAST_OCTETS_LOW (RAD0_BLK_REG_ADDR + 0x068) +#define RAD0_IF_RCVD_BCAST_VLAN (RAD0_BLK_REG_ADDR + 0x06C) +#define RAD0_DROPPED_FRAMES (RAD0_BLK_REG_ADDR + 0x070) + +#define RAD0_MAC_MAN_1H (RAD0_BLK_REG_ADDR + 0x080) +#define RAD0_MAC_MAN_1L (RAD0_BLK_REG_ADDR + 0x084) +#define RAD0_MAC_MAN_2H (RAD0_BLK_REG_ADDR + 0x088) +#define RAD0_MAC_MAN_2L (RAD0_BLK_REG_ADDR + 0x08C) +#define RAD0_MAC_MAN_3H (RAD0_BLK_REG_ADDR + 0x090) +#define RAD0_MAC_MAN_3L (RAD0_BLK_REG_ADDR + 0x094) +#define RAD0_MAC_MAN_4H (RAD0_BLK_REG_ADDR + 0x098) +#define RAD0_MAC_MAN_4L (RAD0_BLK_REG_ADDR + 0x09C) + +#define RAD0_LAST4_IP (RAD0_BLK_REG_ADDR + 0x100) + +/* RAD1 Registers */ +#define RAD1_CTL_REG (RAD1_BLK_REG_ADDR + 0x000) +#define RAD1_PE_PARM_REG (RAD1_BLK_REG_ADDR + 0x004) +#define RAD1_BCN_REG (RAD1_BLK_REG_ADDR + 0x008) + +/* Default function ID register */ +#define RAD1_DEFAULT_REG (RAD1_BLK_REG_ADDR + 0x00C) + +/* Promiscuous function ID register */ +#define RAD1_PROMISC_REG (RAD1_BLK_REG_ADDR + 0x010) + +#define RAD1_BCNQ_REG (RAD1_BLK_REG_ADDR + 0x014) + +/* + * This register selects 1 of 8 PM Q's using + * VLAN pri, for non-BCN packets without a VLAN tag + */ +#define RAD1_DEFAULTQ_REG (RAD1_BLK_REG_ADDR + 0x018) + +#define RAD1_ERR_STS (RAD1_BLK_REG_ADDR + 0x01C) +#define RAD1_SET_ERR_STS (RAD1_BLK_REG_ADDR + 0x020) +#define RAD1_ERR_INT_EN (RAD1_BLK_REG_ADDR + 0x024) + +/** + * TXA Block Register Address Offset from BAR0 + * TXA0 Range : 0x21000 - 0x213FF + * TXA1 Range : 0x21400 - 0x217FF + */ +#define TXA0_BLK_REG_ADDR 0x00021000 +#define TXA1_BLK_REG_ADDR 0x00021400 + +/* TXA Registers */ +#define TXA0_CTRL_REG (TXA0_BLK_REG_ADDR + 0x000) +#define TXA1_CTRL_REG (TXA1_BLK_REG_ADDR + 0x000) + +/** + * TSO Sequence # Registers (RO) + * Total 8 (for 8 queues) + * Holds the last seq.# for TSO frames + * See catapult_spec.pdf for more details + */ +#define TXA0_TSO_TCP_SEQ_REG(_num) \ + (TXA0_BLK_REG_ADDR + 0x020 + ((_num) << 2)) + +#define TXA1_TSO_TCP_SEQ_REG(_num) \ + (TXA1_BLK_REG_ADDR + 0x020 + ((_num) << 2)) + +/** + * TSO IP ID # Registers (RO) + * Total 8 (for 8 queues) + * Holds the last IP ID for TSO frames + * See catapult_spec.pdf for more details + */ +#define TXA0_TSO_IP_INFO_REG(_num) \ + (TXA0_BLK_REG_ADDR + 0x040 + ((_num) << 2)) + +#define TXA1_TSO_IP_INFO_REG(_num) \ + (TXA1_BLK_REG_ADDR + 0x040 + ((_num) << 2)) + +/** + * RXA Block Register Address Offset from BAR0 + * RXA0 Range : 0x21800 - 0x21BFF + * RXA1 Range : 0x21C00 - 0x21FFF + */ +#define RXA0_BLK_REG_ADDR 0x00021800 +#define RXA1_BLK_REG_ADDR 0x00021C00 + +/* RXA Registers */ +#define RXA0_CTL_REG (RXA0_BLK_REG_ADDR + 0x040) +#define RXA1_CTL_REG (RXA1_BLK_REG_ADDR + 0x040) + +/** + * PPLB Block Register Address Offset from BAR0 + * PPLB0 Range : 0x22000 - 0x223FF + * PPLB1 Range : 0x22400 - 0x227FF + */ +#define PLB0_BLK_REG_ADDR 0x00022000 +#define PLB1_BLK_REG_ADDR 0x00022400 + +/** + * PLB Registers + * Holds RL timer used time stamps in RLT tagged frames + */ +#define PLB0_ECM_TIMER_REG (PLB0_BLK_REG_ADDR + 0x05C) +#define PLB1_ECM_TIMER_REG (PLB1_BLK_REG_ADDR + 0x05C) + +/* Controls the rate-limiter on each of the priority class */ +#define PLB0_RL_CTL (PLB0_BLK_REG_ADDR + 0x060) +#define PLB1_RL_CTL (PLB1_BLK_REG_ADDR + 0x060) + +/** + * Max byte register, total 8, 0-7 + * see catapult_spec.pdf for details + */ +#define PLB0_RL_MAX_BC(_num) \ + (PLB0_BLK_REG_ADDR + 0x064 + ((_num) << 2)) +#define PLB1_RL_MAX_BC(_num) \ + (PLB1_BLK_REG_ADDR + 0x064 + ((_num) << 2)) + +/** + * RL Time Unit Register for priority 0-7 + * 4 bits per priority + * (2^rl_unit)*1us is the actual time period + */ +#define PLB0_RL_TU_PRIO (PLB0_BLK_REG_ADDR + 0x084) +#define PLB1_RL_TU_PRIO (PLB1_BLK_REG_ADDR + 0x084) + +/** + * RL byte count register, + * bytes transmitted in (rl_unit*1)us time period + * 1 per priority, 8 in all, 0-7. + */ +#define PLB0_RL_BYTE_CNT(_num) \ + (PLB0_BLK_REG_ADDR + 0x088 + ((_num) << 2)) +#define PLB1_RL_BYTE_CNT(_num) \ + (PLB1_BLK_REG_ADDR + 0x088 + ((_num) << 2)) + +/** + * RL Min factor register + * 2 bits per priority, + * 4 factors possible: 1, 0.5, 0.25, 0 + * 2'b00 - 0; 2'b01 - 0.25; 2'b10 - 0.5; 2'b11 - 1 + */ +#define PLB0_RL_MIN_REG (PLB0_BLK_REG_ADDR + 0x0A8) +#define PLB1_RL_MIN_REG (PLB1_BLK_REG_ADDR + 0x0A8) + +/** + * RL Max factor register + * 2 bits per priority, + * 4 factors possible: 1, 0.5, 0.25, 0 + * 2'b00 - 0; 2'b01 - 0.25; 2'b10 - 0.5; 2'b11 - 1 + */ +#define PLB0_RL_MAX_REG (PLB0_BLK_REG_ADDR + 0x0AC) +#define PLB1_RL_MAX_REG (PLB1_BLK_REG_ADDR + 0x0AC) + +/* MAC SERDES Address Paging register */ +#define PLB0_EMS_ADD_REG (PLB0_BLK_REG_ADDR + 0xD0) +#define PLB1_EMS_ADD_REG (PLB1_BLK_REG_ADDR + 0xD0) + +/* LL EMS Registers */ +#define LL_EMS0_BLK_REG_ADDR 0x00026800 +#define LL_EMS1_BLK_REG_ADDR 0x00026C00 + +/** + * BPC Block Register Address Offset from BAR0 + * BPC0 Range : 0x23000 - 0x233FF + * BPC1 Range : 0x23400 - 0x237FF + */ +#define BPC0_BLK_REG_ADDR 0x00023000 +#define BPC1_BLK_REG_ADDR 0x00023400 + +/** + * PMM Block Register Address Offset from BAR0 + * PMM0 Range : 0x23800 - 0x23BFF + * PMM1 Range : 0x23C00 - 0x23FFF + */ +#define PMM0_BLK_REG_ADDR 0x00023800 +#define PMM1_BLK_REG_ADDR 0x00023C00 + +/** + * HQM Block Register Address Offset from BAR0 + * HQM0 Range : 0x24000 - 0x243FF + * HQM1 Range : 0x24400 - 0x247FF + */ +#define HQM0_BLK_REG_ADDR 0x00024000 +#define HQM1_BLK_REG_ADDR 0x00024400 + +/** + * HQM Control Register + * Controls some aspects of IB + * See catapult_spec.pdf for details + */ +#define HQM0_CTL_REG (HQM0_BLK_REG_ADDR + 0x000) +#define HQM1_CTL_REG (HQM1_BLK_REG_ADDR + 0x000) + +/** + * HQM Stop Q Semaphore Registers. + * Only one Queue resource can be stopped at + * any given time. This register controls access + * to the single stop Q resource. + * See catapult_spec.pdf for details + */ +#define HQM0_RXQ_STOP_SEM (HQM0_BLK_REG_ADDR + 0x028) +#define HQM0_TXQ_STOP_SEM (HQM0_BLK_REG_ADDR + 0x02C) +#define HQM1_RXQ_STOP_SEM (HQM1_BLK_REG_ADDR + 0x028) +#define HQM1_TXQ_STOP_SEM (HQM1_BLK_REG_ADDR + 0x02C) + +/** + * LUT Block Register Address Offset from BAR0 + * LUT0 Range : 0x25800 - 0x25BFF + * LUT1 Range : 0x25C00 - 0x25FFF + */ +#define LUT0_BLK_REG_ADDR 0x00025800 +#define LUT1_BLK_REG_ADDR 0x00025C00 + +/** + * LUT Registers + * See catapult_spec.pdf for details + */ +#define LUT0_ERR_STS (LUT0_BLK_REG_ADDR + 0x000) +#define LUT1_ERR_STS (LUT1_BLK_REG_ADDR + 0x000) +#define LUT0_SET_ERR_STS (LUT0_BLK_REG_ADDR + 0x004) +#define LUT1_SET_ERR_STS (LUT1_BLK_REG_ADDR + 0x004) + +/** + * TRC (Debug/Trace) Register Offset from BAR0 + * Range : 0x26000 -- 0x263FFF + */ +#define TRC_BLK_REG_ADDR 0x00026000 + +/** + * TRC Registers + * See catapult_spec.pdf for details of each + */ +#define TRC_CTL_REG (TRC_BLK_REG_ADDR + 0x000) +#define TRC_MODS_REG (TRC_BLK_REG_ADDR + 0x004) +#define TRC_TRGC_REG (TRC_BLK_REG_ADDR + 0x008) +#define TRC_CNT1_REG (TRC_BLK_REG_ADDR + 0x010) +#define TRC_CNT2_REG (TRC_BLK_REG_ADDR + 0x014) +#define TRC_NXTS_REG (TRC_BLK_REG_ADDR + 0x018) +#define TRC_DIRR_REG (TRC_BLK_REG_ADDR + 0x01C) + +/** + * TRC Trigger match filters, total 10 + * Determines the trigger condition + */ +#define TRC_TRGM_REG(_num) \ + (TRC_BLK_REG_ADDR + 0x040 + ((_num) << 2)) + +/** + * TRC Next State filters, total 10 + * Determines the next state conditions + */ +#define TRC_NXTM_REG(_num) \ + (TRC_BLK_REG_ADDR + 0x080 + ((_num) << 2)) + +/** + * TRC Store Match filters, total 10 + * Determines the store conditions + */ +#define TRC_STRM_REG(_num) \ + (TRC_BLK_REG_ADDR + 0x0C0 + ((_num) << 2)) + +/* DOORBELLS ACCESS */ + +/** + * Catapult doorbells + * Each doorbell-queue set has + * 1 RxQ, 1 TxQ, 2 IBs in that order + * Size of each entry in 32 bytes, even though only 1 word + * is used. For Non-VM case each doorbell-q set is + * separated by 128 bytes, for VM case it is separated + * by 4K bytes + * Non VM case Range : 0x38000 - 0x39FFF + * VM case Range : 0x100000 - 0x11FFFF + * The range applies to both HQMs + */ +#define HQM_DOORBELL_BLK_BASE_ADDR 0x00038000 +#define HQM_DOORBELL_VM_BLK_BASE_ADDR 0x00100000 + +/* MEMORY ACCESS */ + +/** + * Catapult H/W Block Memory Access Address + * To the host a memory space of 32K (page) is visible + * at a time. The address range is from 0x08000 to 0x0FFFF + */ +#define HW_BLK_HOST_MEM_ADDR 0x08000 + +/** + * Catapult LUT Memory Access Page Numbers + * Range : LUT0 0xa0-0xa1 + * LUT1 0xa2-0xa3 + */ +#define LUT0_MEM_BLK_BASE_PG_NUM 0x000000A0 +#define LUT1_MEM_BLK_BASE_PG_NUM 0x000000A2 + +/** + * Catapult RxFn Database Memory Block Base Offset + * + * The Rx function database exists in LUT block. + * In PCIe space this is accessible as a 256x32 + * bit block. Each entry in this database is 4 + * (4 byte) words. Max. entries is 64. + * Address of an entry corresponding to a function + * = base_addr + (function_no. * 16) + */ +#define RX_FNDB_RAM_BASE_OFFSET 0x0000B400 + +/** + * Catapult TxFn Database Memory Block Base Offset Address + * + * The Tx function database exists in LUT block. + * In PCIe space this is accessible as a 64x32 + * bit block. Each entry in this database is 1 + * (4 byte) word. Max. entries is 64. + * Address of an entry corresponding to a function + * = base_addr + (function_no. * 4) + */ +#define TX_FNDB_RAM_BASE_OFFSET 0x0000B800 + +/** + * Catapult Unicast CAM Base Offset Address + * + * Exists in LUT memory space. + * Shared by both the LL & FCoE driver. + * Size is 256x48 bits; mapped to PCIe space + * 512x32 bit blocks. For each address, bits + * are written in the order : [47:32] and then + * [31:0]. + */ +#define UCAST_CAM_BASE_OFFSET 0x0000A800 + +/** + * Catapult Unicast RAM Base Offset Address + * + * Exists in LUT memory space. + * Shared by both the LL & FCoE driver. + * Size is 256x9 bits. + */ +#define UCAST_RAM_BASE_OFFSET 0x0000B000 + +/** + * Catapult Mulicast CAM Base Offset Address + * + * Exists in LUT memory space. + * Shared by both the LL & FCoE driver. + * Size is 256x48 bits; mapped to PCIe space + * 512x32 bit blocks. For each address, bits + * are written in the order : [47:32] and then + * [31:0]. + */ +#define MCAST_CAM_BASE_OFFSET 0x0000A000 + +/** + * Catapult VLAN RAM Base Offset Address + * + * Exists in LUT memory space. + * Size is 4096x66 bits; mapped to PCIe space as + * 8192x32 bit blocks. + * All the 4K entries are within the address range + * 0x0000 to 0x8000, so in the first LUT page. + */ +#define VLAN_RAM_BASE_OFFSET 0x00000000 + +/** + * Catapult Tx Stats RAM Base Offset Address + * + * Exists in LUT memory space. + * Size is 1024x33 bits; + * Each Tx function has 64 bytes of space + */ +#define TX_STATS_RAM_BASE_OFFSET 0x00009000 + +/** + * Catapult Rx Stats RAM Base Offset Address + * + * Exists in LUT memory space. + * Size is 1024x33 bits; + * Each Rx function has 64 bytes of space + */ +#define RX_STATS_RAM_BASE_OFFSET 0x00008000 + +/* Catapult RXA Memory Access Page Numbers */ +#define RXA0_MEM_BLK_BASE_PG_NUM 0x0000008C +#define RXA1_MEM_BLK_BASE_PG_NUM 0x0000008D + +/** + * Catapult Multicast Vector Table Base Offset Address + * + * Exists in RxA memory space. + * Organized as 512x65 bit block. + * However for each entry 16 bytes allocated (power of 2) + * Total size 512*16 bytes. + * There are two logical divisions, 256 entries each : + * a) Entries 0x00 to 0xff (256) -- Approx. MVT + * Offset 0x000 to 0xFFF + * b) Entries 0x100 to 0x1ff (256) -- Exact MVT + * Offsets 0x1000 to 0x1FFF + */ +#define MCAST_APPROX_MVT_BASE_OFFSET 0x00000000 +#define MCAST_EXACT_MVT_BASE_OFFSET 0x00001000 + +/** + * Catapult RxQ Translate Table (RIT) Base Offset Address + * + * Exists in RxA memory space + * Total no. of entries 64 + * Each entry is 1 (4 byte) word. + * 31:12 -- Reserved + * 11:0 -- Two 6 bit RxQ Ids + */ +#define FUNCTION_TO_RXQ_TRANSLATE 0x00002000 + +/* Catapult RxAdm (RAD) Memory Access Page Numbers */ +#define RAD0_MEM_BLK_BASE_PG_NUM 0x00000086 +#define RAD1_MEM_BLK_BASE_PG_NUM 0x00000087 + +/** + * Catapult RSS Table Base Offset Address + * + * Exists in RAD memory space. + * Each entry is 352 bits, but alligned on + * 64 byte (512 bit) boundary. Accessed + * 4 byte words, the whole entry can be + * broken into 11 word accesses. + */ +#define RSS_TABLE_BASE_OFFSET 0x00000800 + +/** + * Catapult CPQ Block Page Number + * This value is written to the page number registers + * to access the memory associated with the mailboxes. + */ +#define CPQ_BLK_PG_NUM 0x00000005 + +/** + * Clarification : + * LL functions are 2 & 3; can HostFn0/HostFn1 + * <-> LPU0/LPU1 memories be used ? + */ +/** + * Catapult HostFn0/HostFn1 to LPU0/LPU1 Mbox memory + * Per catapult_spec.pdf, the offset of the mbox + * memory is in the register space at an offset of 0x200 + */ +#define CPQ_BLK_REG_MBOX_ADDR (CPQ_BLK_REG_ADDR + 0x200) + +#define HOSTFN_LPU_MBOX (CPQ_BLK_REG_MBOX_ADDR + 0x000) + +/* Catapult LPU0/LPU1 to HostFn0/HostFn1 Mbox memory */ +#define LPU_HOSTFN_MBOX (CPQ_BLK_REG_MBOX_ADDR + 0x080) + +/** + * Catapult HQM Block Page Number + * This is written to the page number register for + * the appropriate function to access the memory + * associated with HQM + */ +#define HQM0_BLK_PG_NUM 0x00000096 +#define HQM1_BLK_PG_NUM 0x00000097 + +/** + * Note that TxQ and RxQ entries are interlaced + * the HQM memory, i.e RXQ0, TXQ0, RXQ1, TXQ1.. etc. + */ + +#define HQM_RXTX_Q_RAM_BASE_OFFSET 0x00004000 + +/** + * CQ Memory + * Exists in HQM Memory space + * Each entry is 16 (4 byte) words of which + * only 12 words are used for configuration + * Total 64 entries per HQM memory space + */ +#define HQM_CQ_RAM_BASE_OFFSET 0x00006000 + +/** + * Interrupt Block (IB) Memory + * Exists in HQM Memory space + * Each entry is 8 (4 byte) words of which + * only 5 words are used for configuration + * Total 128 entries per HQM memory space + */ +#define HQM_IB_RAM_BASE_OFFSET 0x00001000 + +/** + * Index Table (IT) Memory + * Exists in HQM Memory space + * Each entry is 1 (4 byte) word which + * is used for configuration + * Total 128 entries per HQM memory space + */ +#define HQM_INDX_TBL_RAM_BASE_OFFSET 0x00002000 + +/** + * PSS Block Memory Page Number + * This is written to the appropriate page number + * register to access the CPU memory. + * Also known as the PSS secondary memory (SMEM). + * Range : 0x180 to 0x1CF + * See catapult_spec.pdf for details + */ +#define PSS_BLK_PG_NUM 0x00000180 + +/** + * Offsets of different instances of PSS SMEM + * 2.5M of continuous 1T memory space : 2 blocks + * of 1M each (32 pages each, page=32KB) and 4 smaller + * blocks of 128K each (4 pages each, page=32KB) + * PSS_LMEM_INST0 is used for firmware download + */ +#define PSS_LMEM_INST0 0x00000000 +#define PSS_LMEM_INST1 0x00100000 +#define PSS_LMEM_INST2 0x00200000 +#define PSS_LMEM_INST3 0x00220000 +#define PSS_LMEM_INST4 0x00240000 +#define PSS_LMEM_INST5 0x00260000 + +#define BNA_PCI_REG_CT_ADDRSZ (0x40000) + +#define BNA_GET_PAGE_NUM(_base_page, _offset) \ + ((_base_page) + ((_offset) >> 15)) + +#define BNA_GET_PAGE_OFFSET(_offset) \ + ((_offset) & 0x7fff) + +#define BNA_GET_MEM_BASE_ADDR(_bar0, _base_offset) \ + ((_bar0) + HW_BLK_HOST_MEM_ADDR \ + + BNA_GET_PAGE_OFFSET((_base_offset))) + +#define BNA_GET_VLAN_MEM_ENTRY_ADDR(_bar0, _fn_id, _vlan_id)\ + (_bar0 + (HW_BLK_HOST_MEM_ADDR) \ + + (BNA_GET_PAGE_OFFSET(VLAN_RAM_BASE_OFFSET)) \ + + (((_fn_id) & 0x3f) << 9) \ + + (((_vlan_id) & 0xfe0) >> 3)) + +/** + * + * Interrupt related bits, flags and macros + * + */ + +#define __LPU02HOST_MBOX0_STATUS_BITS 0x00100000 +#define __LPU12HOST_MBOX0_STATUS_BITS 0x00200000 +#define __LPU02HOST_MBOX1_STATUS_BITS 0x00400000 +#define __LPU12HOST_MBOX1_STATUS_BITS 0x00800000 + +#define __LPU02HOST_MBOX0_MASK_BITS 0x00100000 +#define __LPU12HOST_MBOX0_MASK_BITS 0x00200000 +#define __LPU02HOST_MBOX1_MASK_BITS 0x00400000 +#define __LPU12HOST_MBOX1_MASK_BITS 0x00800000 + +#define __LPU2HOST_MBOX_MASK_BITS \ + (__LPU02HOST_MBOX0_MASK_BITS | __LPU02HOST_MBOX1_MASK_BITS | \ + __LPU12HOST_MBOX0_MASK_BITS | __LPU12HOST_MBOX1_MASK_BITS) + +#define __LPU2HOST_IB_STATUS_BITS 0x0000ffff + +#define BNA_IS_LPU0_MBOX_INTR(_intr_status) \ + ((_intr_status) & (__LPU02HOST_MBOX0_STATUS_BITS | \ + __LPU02HOST_MBOX1_STATUS_BITS)) + +#define BNA_IS_LPU1_MBOX_INTR(_intr_status) \ + ((_intr_status) & (__LPU12HOST_MBOX0_STATUS_BITS | \ + __LPU12HOST_MBOX1_STATUS_BITS)) + +#define BNA_IS_MBOX_INTR(_intr_status) \ + ((_intr_status) & \ + (__LPU02HOST_MBOX0_STATUS_BITS | \ + __LPU02HOST_MBOX1_STATUS_BITS | \ + __LPU12HOST_MBOX0_STATUS_BITS | \ + __LPU12HOST_MBOX1_STATUS_BITS)) + +#define __EMC_ERROR_STATUS_BITS 0x00010000 +#define __LPU0_ERROR_STATUS_BITS 0x00020000 +#define __LPU1_ERROR_STATUS_BITS 0x00040000 +#define __PSS_ERROR_STATUS_BITS 0x00080000 + +#define __HALT_STATUS_BITS 0x01000000 + +#define __EMC_ERROR_MASK_BITS 0x00010000 +#define __LPU0_ERROR_MASK_BITS 0x00020000 +#define __LPU1_ERROR_MASK_BITS 0x00040000 +#define __PSS_ERROR_MASK_BITS 0x00080000 + +#define __HALT_MASK_BITS 0x01000000 + +#define __ERROR_MASK_BITS \ + (__EMC_ERROR_MASK_BITS | __LPU0_ERROR_MASK_BITS | \ + __LPU1_ERROR_MASK_BITS | __PSS_ERROR_MASK_BITS | \ + __HALT_MASK_BITS) + +#define BNA_IS_ERR_INTR(_intr_status) \ + ((_intr_status) & \ + (__EMC_ERROR_STATUS_BITS | \ + __LPU0_ERROR_STATUS_BITS | \ + __LPU1_ERROR_STATUS_BITS | \ + __PSS_ERROR_STATUS_BITS | \ + __HALT_STATUS_BITS)) + +#define BNA_IS_MBOX_ERR_INTR(_intr_status) \ + (BNA_IS_MBOX_INTR((_intr_status)) | \ + BNA_IS_ERR_INTR((_intr_status))) + +#define BNA_IS_INTX_DATA_INTR(_intr_status) \ + ((_intr_status) & __LPU2HOST_IB_STATUS_BITS) + +#define BNA_INTR_STATUS_MBOX_CLR(_intr_status) \ +do { \ + (_intr_status) &= ~(__LPU02HOST_MBOX0_STATUS_BITS | \ + __LPU02HOST_MBOX1_STATUS_BITS | \ + __LPU12HOST_MBOX0_STATUS_BITS | \ + __LPU12HOST_MBOX1_STATUS_BITS); \ +} while (0) + +#define BNA_INTR_STATUS_ERR_CLR(_intr_status) \ +do { \ + (_intr_status) &= ~(__EMC_ERROR_STATUS_BITS | \ + __LPU0_ERROR_STATUS_BITS | \ + __LPU1_ERROR_STATUS_BITS | \ + __PSS_ERROR_STATUS_BITS | \ + __HALT_STATUS_BITS); \ +} while (0) + +#define bna_intx_disable(_bna, _cur_mask) \ +{ \ + (_cur_mask) = readl((_bna)->regs.fn_int_mask);\ + writel(0xffffffff, (_bna)->regs.fn_int_mask);\ +} + +#define bna_intx_enable(bna, new_mask) \ + writel((new_mask), (bna)->regs.fn_int_mask) + +#define bna_mbox_intr_disable(bna) \ + writel((readl((bna)->regs.fn_int_mask) | \ + (__LPU2HOST_MBOX_MASK_BITS | __ERROR_MASK_BITS)), \ + (bna)->regs.fn_int_mask) + +#define bna_mbox_intr_enable(bna) \ + writel((readl((bna)->regs.fn_int_mask) & \ + ~(__LPU2HOST_MBOX_MASK_BITS | __ERROR_MASK_BITS)), \ + (bna)->regs.fn_int_mask) + +#define bna_intr_status_get(_bna, _status) \ +{ \ + (_status) = readl((_bna)->regs.fn_int_status); \ + if ((_status)) { \ + writel((_status) & ~(__LPU02HOST_MBOX0_STATUS_BITS |\ + __LPU02HOST_MBOX1_STATUS_BITS |\ + __LPU12HOST_MBOX0_STATUS_BITS |\ + __LPU12HOST_MBOX1_STATUS_BITS), \ + (_bna)->regs.fn_int_status);\ + } \ +} + +#define bna_intr_status_get_no_clr(_bna, _status) \ + (_status) = readl((_bna)->regs.fn_int_status) + +#define bna_intr_mask_get(bna, mask) \ + (*mask) = readl((bna)->regs.fn_int_mask) + +#define bna_intr_ack(bna, intr_bmap) \ + writel((intr_bmap), (bna)->regs.fn_int_status) + +#define bna_ib_intx_disable(bna, ib_id) \ + writel(readl((bna)->regs.fn_int_mask) | \ + (1 << (ib_id)), \ + (bna)->regs.fn_int_mask) + +#define bna_ib_intx_enable(bna, ib_id) \ + writel(readl((bna)->regs.fn_int_mask) & \ + ~(1 << (ib_id)), \ + (bna)->regs.fn_int_mask) + +#define bna_mbox_msix_idx_set(_device) \ +do {\ + writel(((_device)->vector & 0x000001FF), \ + (_device)->bna->pcidev.pci_bar_kva + \ + reg_offset[(_device)->bna->pcidev.pci_func].msix_idx);\ +} while (0) + +/** + * + * TxQ, RxQ, CQ related bits, offsets, macros + * + */ + +#define BNA_Q_IDLE_STATE 0x00008001 + +#define BNA_GET_DOORBELL_BASE_ADDR(_bar0) \ + ((_bar0) + HQM_DOORBELL_BLK_BASE_ADDR) + +#define BNA_GET_DOORBELL_ENTRY_OFFSET(_entry) \ + ((HQM_DOORBELL_BLK_BASE_ADDR) \ + + (_entry << 7)) + +#define BNA_DOORBELL_IB_INT_ACK(_timeout, _events) \ + (0x80000000 | ((_timeout) << 16) | (_events)) + +#define BNA_DOORBELL_IB_INT_DISABLE (0x40000000) + +/* TxQ Entry Opcodes */ +#define BNA_TXQ_WI_SEND (0x402) /* Single Frame Transmission */ +#define BNA_TXQ_WI_SEND_LSO (0x403) /* Multi-Frame Transmission */ +#define BNA_TXQ_WI_EXTENSION (0x104) /* Extension WI */ + +/* TxQ Entry Control Flags */ +#define BNA_TXQ_WI_CF_FCOE_CRC (1 << 8) +#define BNA_TXQ_WI_CF_IPID_MODE (1 << 5) +#define BNA_TXQ_WI_CF_INS_PRIO (1 << 4) +#define BNA_TXQ_WI_CF_INS_VLAN (1 << 3) +#define BNA_TXQ_WI_CF_UDP_CKSUM (1 << 2) +#define BNA_TXQ_WI_CF_TCP_CKSUM (1 << 1) +#define BNA_TXQ_WI_CF_IP_CKSUM (1 << 0) + +#define BNA_TXQ_WI_L4_HDR_N_OFFSET(_hdr_size, _offset) \ + (((_hdr_size) << 10) | ((_offset) & 0x3FF)) + +/* + * Completion Q defines + */ +/* CQ Entry Flags */ +#define BNA_CQ_EF_MAC_ERROR (1 << 0) +#define BNA_CQ_EF_FCS_ERROR (1 << 1) +#define BNA_CQ_EF_TOO_LONG (1 << 2) +#define BNA_CQ_EF_FC_CRC_OK (1 << 3) + +#define BNA_CQ_EF_RSVD1 (1 << 4) +#define BNA_CQ_EF_L4_CKSUM_OK (1 << 5) +#define BNA_CQ_EF_L3_CKSUM_OK (1 << 6) +#define BNA_CQ_EF_HDS_HEADER (1 << 7) + +#define BNA_CQ_EF_UDP (1 << 8) +#define BNA_CQ_EF_TCP (1 << 9) +#define BNA_CQ_EF_IP_OPTIONS (1 << 10) +#define BNA_CQ_EF_IPV6 (1 << 11) + +#define BNA_CQ_EF_IPV4 (1 << 12) +#define BNA_CQ_EF_VLAN (1 << 13) +#define BNA_CQ_EF_RSS (1 << 14) +#define BNA_CQ_EF_RSVD2 (1 << 15) + +#define BNA_CQ_EF_MCAST_MATCH (1 << 16) +#define BNA_CQ_EF_MCAST (1 << 17) +#define BNA_CQ_EF_BCAST (1 << 18) +#define BNA_CQ_EF_REMOTE (1 << 19) + +#define BNA_CQ_EF_LOCAL (1 << 20) + +/** + * + * Data structures + * + */ + +enum txf_flags { + BFI_TXF_CF_ENABLE = 1 << 0, + BFI_TXF_CF_VLAN_FILTER = 1 << 8, + BFI_TXF_CF_VLAN_ADMIT = 1 << 9, + BFI_TXF_CF_VLAN_INSERT = 1 << 10, + BFI_TXF_CF_RSVD1 = 1 << 11, + BFI_TXF_CF_MAC_SA_CHECK = 1 << 12, + BFI_TXF_CF_VLAN_WI_BASED = 1 << 13, + BFI_TXF_CF_VSWITCH_MCAST = 1 << 14, + BFI_TXF_CF_VSWITCH_UCAST = 1 << 15, + BFI_TXF_CF_RSVD2 = 0x7F << 1 +}; + +enum ib_flags { + BFI_IB_CF_MASTER_ENABLE = (1 << 0), + BFI_IB_CF_MSIX_MODE = (1 << 1), + BFI_IB_CF_COALESCING_MODE = (1 << 2), + BFI_IB_CF_INTER_PKT_ENABLE = (1 << 3), + BFI_IB_CF_INT_ENABLE = (1 << 4), + BFI_IB_CF_INTER_PKT_DMA = (1 << 5), + BFI_IB_CF_ACK_PENDING = (1 << 6), + BFI_IB_CF_RESERVED1 = (1 << 7) +}; + +enum rss_hash_type { + BFI_RSS_T_V4_TCP = (1 << 11), + BFI_RSS_T_V4_IP = (1 << 10), + BFI_RSS_T_V6_TCP = (1 << 9), + BFI_RSS_T_V6_IP = (1 << 8) +}; +enum hds_header_type { + BNA_HDS_T_V4_TCP = (1 << 11), + BNA_HDS_T_V4_UDP = (1 << 10), + BNA_HDS_T_V6_TCP = (1 << 9), + BNA_HDS_T_V6_UDP = (1 << 8), + BNA_HDS_FORCED = (1 << 7), +}; +enum rxf_flags { + BNA_RXF_CF_SM_LG_RXQ = (1 << 15), + BNA_RXF_CF_DEFAULT_VLAN = (1 << 14), + BNA_RXF_CF_DEFAULT_FUNCTION_ENABLE = (1 << 13), + BNA_RXF_CF_VLAN_STRIP = (1 << 12), + BNA_RXF_CF_RSS_ENABLE = (1 << 8) +}; +struct bna_chip_regs_offset { + u32 page_addr; + u32 fn_int_status; + u32 fn_int_mask; + u32 msix_idx; +}; +extern const struct bna_chip_regs_offset reg_offset[]; + +struct bna_chip_regs { + void __iomem *page_addr; + void __iomem *fn_int_status; + void __iomem *fn_int_mask; +}; + +struct bna_txq_mem { + u32 pg_tbl_addr_lo; + u32 pg_tbl_addr_hi; + u32 cur_q_entry_lo; + u32 cur_q_entry_hi; + u32 reserved1; + u32 reserved2; + u32 pg_cnt_n_prd_ptr; /* 31:16->total page count */ + /* 15:0 ->producer pointer (index?) */ + u32 entry_n_pg_size; /* 31:16->entry size */ + /* 15:0 ->page size */ + u32 int_blk_n_cns_ptr; /* 31:24->Int Blk Id; */ + /* 23:16->Int Blk Offset */ + /* 15:0 ->consumer pointer(index?) */ + u32 cns_ptr2_n_q_state; /* 31:16->cons. ptr 2; 15:0-> Q state */ + u32 nxt_qid_n_fid_n_pri; /* 17:10->next */ + /* QId;9:3->FID;2:0->Priority */ + u32 wvc_n_cquota_n_rquota; /* 31:24->WI Vector Count; */ + /* 23:12->Cfg Quota; */ + /* 11:0 ->Run Quota */ + u32 reserved3[4]; +}; + +struct bna_rxq_mem { + u32 pg_tbl_addr_lo; + u32 pg_tbl_addr_hi; + u32 cur_q_entry_lo; + u32 cur_q_entry_hi; + u32 reserved1; + u32 reserved2; + u32 pg_cnt_n_prd_ptr; /* 31:16->total page count */ + /* 15:0 ->producer pointer (index?) */ + u32 entry_n_pg_size; /* 31:16->entry size */ + /* 15:0 ->page size */ + u32 sg_n_cq_n_cns_ptr; /* 31:28->reserved; 27:24->sg count */ + /* 23:16->CQ; */ + /* 15:0->consumer pointer(index?) */ + u32 buf_sz_n_q_state; /* 31:16->buffer size; 15:0-> Q state */ + u32 next_qid; /* 17:10->next QId */ + u32 reserved3; + u32 reserved4[4]; +}; + +struct bna_rxtx_q_mem { + struct bna_rxq_mem rxq; + struct bna_txq_mem txq; +}; + +struct bna_cq_mem { + u32 pg_tbl_addr_lo; + u32 pg_tbl_addr_hi; + u32 cur_q_entry_lo; + u32 cur_q_entry_hi; + + u32 reserved1; + u32 reserved2; + u32 pg_cnt_n_prd_ptr; /* 31:16->total page count */ + /* 15:0 ->producer pointer (index?) */ + u32 entry_n_pg_size; /* 31:16->entry size */ + /* 15:0 ->page size */ + u32 int_blk_n_cns_ptr; /* 31:24->Int Blk Id; */ + /* 23:16->Int Blk Offset */ + /* 15:0 ->consumer pointer(index?) */ + u32 q_state; /* 31:16->reserved; 15:0-> Q state */ + u32 reserved3[2]; + u32 reserved4[4]; +}; + +struct bna_ib_blk_mem { + u32 host_addr_lo; + u32 host_addr_hi; + u32 clsc_n_ctrl_n_msix; /* 31:24->coalescing; */ + /* 23:16->coalescing cfg; */ + /* 15:8 ->control; */ + /* 7:0 ->msix; */ + u32 ipkt_n_ent_n_idxof; + u32 ipkt_cnt_cfg_n_unacked; + + u32 reserved[3]; +}; + +struct bna_idx_tbl_mem { + u32 idx; /* !< 31:16->res;15:0->idx; */ +}; + +struct bna_doorbell_qset { + u32 rxq[0x20 >> 2]; + u32 txq[0x20 >> 2]; + u32 ib0[0x20 >> 2]; + u32 ib1[0x20 >> 2]; +}; + +struct bna_rx_fndb_ram { + u32 rss_prop; + u32 size_routing_props; + u32 rit_hds_mcastq; + u32 control_flags; +}; + +struct bna_tx_fndb_ram { + u32 vlan_n_ctrl_flags; +}; + +/** + * @brief + * Structure which maps to RxFn Indirection Table (RIT) + * Size : 1 word + * See catapult_spec.pdf, RxA for details + */ +struct bna_rit_mem { + u32 rxq_ids; /* !< 31:12->res;11:0->two 6 bit RxQ Ids */ +}; + +/** + * @brief + * Structure which maps to RSS Table entry + * Size : 16 words + * See catapult_spec.pdf, RAD for details + */ +struct bna_rss_mem { + /* + * 31:12-> res + * 11:8 -> protocol type + * 7:0 -> hash index + */ + u32 type_n_hash; + u32 hash_key[10]; /* !< 40 byte Toeplitz hash key */ + u32 reserved[5]; +}; + +/* TxQ Vector (a.k.a. Tx-Buffer Descriptor) */ +struct bna_dma_addr { + u32 msb; + u32 lsb; +}; + +struct bna_txq_wi_vector { + u16 reserved; + u16 length; /* Only 14 LSB are valid */ + struct bna_dma_addr host_addr; /* Tx-Buf DMA addr */ +}; + +typedef u16 bna_txq_wi_opcode_t; + +typedef u16 bna_txq_wi_ctrl_flag_t; + +/** + * TxQ Entry Structure + * + * BEWARE: Load values into this structure with correct endianess. + */ +struct bna_txq_entry { + union { + struct { + u8 reserved; + u8 num_vectors; /* number of vectors present */ + bna_txq_wi_opcode_t opcode; /* Either */ + /* BNA_TXQ_WI_SEND or */ + /* BNA_TXQ_WI_SEND_LSO */ + bna_txq_wi_ctrl_flag_t flags; /* OR of all the flags */ + u16 l4_hdr_size_n_offset; + u16 vlan_tag; + u16 lso_mss; /* Only 14 LSB are valid */ + u32 frame_length; /* Only 24 LSB are valid */ + } wi; + + struct { + u16 reserved; + bna_txq_wi_opcode_t opcode; /* Must be */ + /* BNA_TXQ_WI_EXTENSION */ + u32 reserved2[3]; /* Place holder for */ + /* removed vector (12 bytes) */ + } wi_ext; + } hdr; + struct bna_txq_wi_vector vector[4]; +}; +#define wi_hdr hdr.wi +#define wi_ext_hdr hdr.wi_ext + +/* RxQ Entry Structure */ +struct bna_rxq_entry { /* Rx-Buffer */ + struct bna_dma_addr host_addr; /* Rx-Buffer DMA address */ +}; + +typedef u32 bna_cq_e_flag_t; + +/* CQ Entry Structure */ +struct bna_cq_entry { + bna_cq_e_flag_t flags; + u16 vlan_tag; + u16 length; + u32 rss_hash; + u8 valid; + u8 reserved1; + u8 reserved2; + u8 rxq_id; +}; + +#endif /* __BNA_HW_H__ */ diff --git a/drivers/net/bna/bna_txrx.c b/drivers/net/bna/bna_txrx.c new file mode 100644 index 000000000000..890846d55502 --- /dev/null +++ b/drivers/net/bna/bna_txrx.c @@ -0,0 +1,4209 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#include "bna.h" +#include "bfa_sm.h" +#include "bfi.h" + +/** + * IB + */ +#define bna_ib_find_free_ibidx(_mask, _pos)\ +do {\ + (_pos) = 0;\ + while (((_pos) < (BFI_IBIDX_MAX_SEGSIZE)) &&\ + ((1 << (_pos)) & (_mask)))\ + (_pos)++;\ +} while (0) + +#define bna_ib_count_ibidx(_mask, _count)\ +do {\ + int pos = 0;\ + (_count) = 0;\ + while (pos < (BFI_IBIDX_MAX_SEGSIZE)) {\ + if ((1 << pos) & (_mask))\ + (_count) = pos + 1;\ + pos++;\ + } \ +} while (0) + +#define bna_ib_select_segpool(_count, _q_idx)\ +do {\ + int i;\ + (_q_idx) = -1;\ + for (i = 0; i < BFI_IBIDX_TOTAL_POOLS; i++) {\ + if ((_count <= ibidx_pool[i].pool_entry_size)) {\ + (_q_idx) = i;\ + break;\ + } \ + } \ +} while (0) + +struct bna_ibidx_pool { + int pool_size; + int pool_entry_size; +}; +init_ibidx_pool(ibidx_pool); + +static struct bna_intr * +bna_intr_get(struct bna_ib_mod *ib_mod, enum bna_intr_type intr_type, + int vector) +{ + struct bna_intr *intr; + struct list_head *qe; + + list_for_each(qe, &ib_mod->intr_active_q) { + intr = (struct bna_intr *)qe; + + if ((intr->intr_type == intr_type) && + (intr->vector == vector)) { + intr->ref_count++; + return intr; + } + } + + if (list_empty(&ib_mod->intr_free_q)) + return NULL; + + bfa_q_deq(&ib_mod->intr_free_q, &intr); + bfa_q_qe_init(&intr->qe); + + intr->ref_count = 1; + intr->intr_type = intr_type; + intr->vector = vector; + + list_add_tail(&intr->qe, &ib_mod->intr_active_q); + + return intr; +} + +static void +bna_intr_put(struct bna_ib_mod *ib_mod, + struct bna_intr *intr) +{ + intr->ref_count--; + + if (intr->ref_count == 0) { + intr->ib = NULL; + list_del(&intr->qe); + bfa_q_qe_init(&intr->qe); + list_add_tail(&intr->qe, &ib_mod->intr_free_q); + } +} + +void +bna_ib_mod_init(struct bna_ib_mod *ib_mod, struct bna *bna, + struct bna_res_info *res_info) +{ + int i; + int j; + int count; + u8 offset; + struct bna_doorbell_qset *qset; + unsigned long off; + + ib_mod->bna = bna; + + ib_mod->ib = (struct bna_ib *) + res_info[BNA_RES_MEM_T_IB_ARRAY].res_u.mem_info.mdl[0].kva; + ib_mod->intr = (struct bna_intr *) + res_info[BNA_RES_MEM_T_INTR_ARRAY].res_u.mem_info.mdl[0].kva; + ib_mod->idx_seg = (struct bna_ibidx_seg *) + res_info[BNA_RES_MEM_T_IDXSEG_ARRAY].res_u.mem_info.mdl[0].kva; + + INIT_LIST_HEAD(&ib_mod->ib_free_q); + INIT_LIST_HEAD(&ib_mod->intr_free_q); + INIT_LIST_HEAD(&ib_mod->intr_active_q); + + for (i = 0; i < BFI_IBIDX_TOTAL_POOLS; i++) + INIT_LIST_HEAD(&ib_mod->ibidx_seg_pool[i]); + + for (i = 0; i < BFI_MAX_IB; i++) { + ib_mod->ib[i].ib_id = i; + + ib_mod->ib[i].ib_seg_host_addr_kva = + res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.mdl[i].kva; + ib_mod->ib[i].ib_seg_host_addr.lsb = + res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.mdl[i].dma.lsb; + ib_mod->ib[i].ib_seg_host_addr.msb = + res_info[BNA_RES_MEM_T_IBIDX].res_u.mem_info.mdl[i].dma.msb; + + qset = (struct bna_doorbell_qset *)0; + off = (unsigned long)(&qset[i >> 1].ib0[(i & 0x1) + * (0x20 >> 2)]); + ib_mod->ib[i].door_bell.doorbell_addr = off + + BNA_GET_DOORBELL_BASE_ADDR(bna->pcidev.pci_bar_kva); + + bfa_q_qe_init(&ib_mod->ib[i].qe); + list_add_tail(&ib_mod->ib[i].qe, &ib_mod->ib_free_q); + + bfa_q_qe_init(&ib_mod->intr[i].qe); + list_add_tail(&ib_mod->intr[i].qe, &ib_mod->intr_free_q); + } + + count = 0; + offset = 0; + for (i = 0; i < BFI_IBIDX_TOTAL_POOLS; i++) { + for (j = 0; j < ibidx_pool[i].pool_size; j++) { + bfa_q_qe_init(&ib_mod->idx_seg[count]); + ib_mod->idx_seg[count].ib_seg_size = + ibidx_pool[i].pool_entry_size; + ib_mod->idx_seg[count].ib_idx_tbl_offset = offset; + list_add_tail(&ib_mod->idx_seg[count].qe, + &ib_mod->ibidx_seg_pool[i]); + count++; + offset += ibidx_pool[i].pool_entry_size; + } + } +} + +void +bna_ib_mod_uninit(struct bna_ib_mod *ib_mod) +{ + int i; + int j; + struct list_head *qe; + + i = 0; + list_for_each(qe, &ib_mod->ib_free_q) + i++; + + i = 0; + list_for_each(qe, &ib_mod->intr_free_q) + i++; + + for (i = 0; i < BFI_IBIDX_TOTAL_POOLS; i++) { + j = 0; + list_for_each(qe, &ib_mod->ibidx_seg_pool[i]) + j++; + } + + ib_mod->bna = NULL; +} + +struct bna_ib * +bna_ib_get(struct bna_ib_mod *ib_mod, + enum bna_intr_type intr_type, + int vector) +{ + struct bna_ib *ib; + struct bna_intr *intr; + + if (intr_type == BNA_INTR_T_INTX) + vector = (1 << vector); + + intr = bna_intr_get(ib_mod, intr_type, vector); + if (intr == NULL) + return NULL; + + if (intr->ib) { + if (intr->ib->ref_count == BFI_IBIDX_MAX_SEGSIZE) { + bna_intr_put(ib_mod, intr); + return NULL; + } + intr->ib->ref_count++; + return intr->ib; + } + + if (list_empty(&ib_mod->ib_free_q)) { + bna_intr_put(ib_mod, intr); + return NULL; + } + + bfa_q_deq(&ib_mod->ib_free_q, &ib); + bfa_q_qe_init(&ib->qe); + + ib->ref_count = 1; + ib->start_count = 0; + ib->idx_mask = 0; + + ib->intr = intr; + ib->idx_seg = NULL; + intr->ib = ib; + + ib->bna = ib_mod->bna; + + return ib; +} + +void +bna_ib_put(struct bna_ib_mod *ib_mod, struct bna_ib *ib) +{ + bna_intr_put(ib_mod, ib->intr); + + ib->ref_count--; + + if (ib->ref_count == 0) { + ib->intr = NULL; + ib->bna = NULL; + list_add_tail(&ib->qe, &ib_mod->ib_free_q); + } +} + +/* Returns index offset - starting from 0 */ +int +bna_ib_reserve_idx(struct bna_ib *ib) +{ + struct bna_ib_mod *ib_mod = &ib->bna->ib_mod; + struct bna_ibidx_seg *idx_seg; + int idx; + int num_idx; + int q_idx; + + /* Find the first free index position */ + bna_ib_find_free_ibidx(ib->idx_mask, idx); + if (idx == BFI_IBIDX_MAX_SEGSIZE) + return -1; + + /* + * Calculate the total number of indexes held by this IB, + * including the index newly reserved above. + */ + bna_ib_count_ibidx((ib->idx_mask | (1 << idx)), num_idx); + + /* See if there is a free space in the index segment held by this IB */ + if (ib->idx_seg && (num_idx <= ib->idx_seg->ib_seg_size)) { + ib->idx_mask |= (1 << idx); + return idx; + } + + if (ib->start_count) + return -1; + + /* Allocate a new segment */ + bna_ib_select_segpool(num_idx, q_idx); + while (1) { + if (q_idx == BFI_IBIDX_TOTAL_POOLS) + return -1; + if (!list_empty(&ib_mod->ibidx_seg_pool[q_idx])) + break; + q_idx++; + } + bfa_q_deq(&ib_mod->ibidx_seg_pool[q_idx], &idx_seg); + bfa_q_qe_init(&idx_seg->qe); + + /* Free the old segment */ + if (ib->idx_seg) { + bna_ib_select_segpool(ib->idx_seg->ib_seg_size, q_idx); + list_add_tail(&ib->idx_seg->qe, &ib_mod->ibidx_seg_pool[q_idx]); + } + + ib->idx_seg = idx_seg; + + ib->idx_mask |= (1 << idx); + + return idx; +} + +void +bna_ib_release_idx(struct bna_ib *ib, int idx) +{ + struct bna_ib_mod *ib_mod = &ib->bna->ib_mod; + struct bna_ibidx_seg *idx_seg; + int num_idx; + int cur_q_idx; + int new_q_idx; + + ib->idx_mask &= ~(1 << idx); + + if (ib->start_count) + return; + + bna_ib_count_ibidx(ib->idx_mask, num_idx); + + /* + * Free the segment, if there are no more indexes in the segment + * held by this IB + */ + if (!num_idx) { + bna_ib_select_segpool(ib->idx_seg->ib_seg_size, cur_q_idx); + list_add_tail(&ib->idx_seg->qe, + &ib_mod->ibidx_seg_pool[cur_q_idx]); + ib->idx_seg = NULL; + return; + } + + /* See if we can move to a smaller segment */ + bna_ib_select_segpool(num_idx, new_q_idx); + bna_ib_select_segpool(ib->idx_seg->ib_seg_size, cur_q_idx); + while (new_q_idx < cur_q_idx) { + if (!list_empty(&ib_mod->ibidx_seg_pool[new_q_idx])) + break; + new_q_idx++; + } + if (new_q_idx < cur_q_idx) { + /* Select the new smaller segment */ + bfa_q_deq(&ib_mod->ibidx_seg_pool[new_q_idx], &idx_seg); + bfa_q_qe_init(&idx_seg->qe); + /* Free the old segment */ + list_add_tail(&ib->idx_seg->qe, + &ib_mod->ibidx_seg_pool[cur_q_idx]); + ib->idx_seg = idx_seg; + } +} + +int +bna_ib_config(struct bna_ib *ib, struct bna_ib_config *ib_config) +{ + if (ib->start_count) + return -1; + + ib->ib_config.coalescing_timeo = ib_config->coalescing_timeo; + ib->ib_config.interpkt_timeo = ib_config->interpkt_timeo; + ib->ib_config.interpkt_count = ib_config->interpkt_count; + ib->ib_config.ctrl_flags = ib_config->ctrl_flags; + + ib->ib_config.ctrl_flags |= BFI_IB_CF_MASTER_ENABLE; + if (ib->intr->intr_type == BNA_INTR_T_MSIX) + ib->ib_config.ctrl_flags |= BFI_IB_CF_MSIX_MODE; + + return 0; +} + +void +bna_ib_start(struct bna_ib *ib) +{ + struct bna_ib_blk_mem ib_cfg; + struct bna_ib_blk_mem *ib_mem; + u32 pg_num; + u32 intx_mask; + int i; + void __iomem *base_addr; + unsigned long off; + + ib->start_count++; + + if (ib->start_count > 1) + return; + + ib_cfg.host_addr_lo = (u32)(ib->ib_seg_host_addr.lsb); + ib_cfg.host_addr_hi = (u32)(ib->ib_seg_host_addr.msb); + + ib_cfg.clsc_n_ctrl_n_msix = (((u32) + ib->ib_config.coalescing_timeo << 16) | + ((u32)ib->ib_config.ctrl_flags << 8) | + (ib->intr->vector)); + ib_cfg.ipkt_n_ent_n_idxof = + ((u32) + (ib->ib_config.interpkt_timeo & 0xf) << 16) | + ((u32)ib->idx_seg->ib_seg_size << 8) | + (ib->idx_seg->ib_idx_tbl_offset); + ib_cfg.ipkt_cnt_cfg_n_unacked = ((u32) + ib->ib_config.interpkt_count << 24); + + pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + ib->bna->port_num, + HQM_IB_RAM_BASE_OFFSET); + writel(pg_num, ib->bna->regs.page_addr); + + base_addr = BNA_GET_MEM_BASE_ADDR(ib->bna->pcidev.pci_bar_kva, + HQM_IB_RAM_BASE_OFFSET); + + ib_mem = (struct bna_ib_blk_mem *)0; + off = (unsigned long)&ib_mem[ib->ib_id].host_addr_lo; + writel(htonl(ib_cfg.host_addr_lo), base_addr + off); + + off = (unsigned long)&ib_mem[ib->ib_id].host_addr_hi; + writel(htonl(ib_cfg.host_addr_hi), base_addr + off); + + off = (unsigned long)&ib_mem[ib->ib_id].clsc_n_ctrl_n_msix; + writel(ib_cfg.clsc_n_ctrl_n_msix, base_addr + off); + + off = (unsigned long)&ib_mem[ib->ib_id].ipkt_n_ent_n_idxof; + writel(ib_cfg.ipkt_n_ent_n_idxof, base_addr + off); + + off = (unsigned long)&ib_mem[ib->ib_id].ipkt_cnt_cfg_n_unacked; + writel(ib_cfg.ipkt_cnt_cfg_n_unacked, base_addr + off); + + ib->door_bell.doorbell_ack = BNA_DOORBELL_IB_INT_ACK( + (u32)ib->ib_config.coalescing_timeo, 0); + + pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + ib->bna->port_num, + HQM_INDX_TBL_RAM_BASE_OFFSET); + writel(pg_num, ib->bna->regs.page_addr); + + base_addr = BNA_GET_MEM_BASE_ADDR(ib->bna->pcidev.pci_bar_kva, + HQM_INDX_TBL_RAM_BASE_OFFSET); + for (i = 0; i < ib->idx_seg->ib_seg_size; i++) { + off = (unsigned long) + ((ib->idx_seg->ib_idx_tbl_offset + i) * BFI_IBIDX_SIZE); + writel(0, base_addr + off); + } + + if (ib->intr->intr_type == BNA_INTR_T_INTX) { + bna_intx_disable(ib->bna, intx_mask); + intx_mask &= ~(ib->intr->vector); + bna_intx_enable(ib->bna, intx_mask); + } +} + +void +bna_ib_stop(struct bna_ib *ib) +{ + u32 intx_mask; + + ib->start_count--; + + if (ib->start_count == 0) { + writel(BNA_DOORBELL_IB_INT_DISABLE, + ib->door_bell.doorbell_addr); + if (ib->intr->intr_type == BNA_INTR_T_INTX) { + bna_intx_disable(ib->bna, intx_mask); + intx_mask |= (ib->intr->vector); + bna_intx_enable(ib->bna, intx_mask); + } + } +} + +void +bna_ib_fail(struct bna_ib *ib) +{ + ib->start_count = 0; +} + +/** + * RXF + */ +static void rxf_enable(struct bna_rxf *rxf); +static void rxf_disable(struct bna_rxf *rxf); +static void __rxf_config_set(struct bna_rxf *rxf); +static void __rxf_rit_set(struct bna_rxf *rxf); +static void __bna_rxf_stat_clr(struct bna_rxf *rxf); +static int rxf_process_packet_filter(struct bna_rxf *rxf); +static int rxf_clear_packet_filter(struct bna_rxf *rxf); +static void rxf_reset_packet_filter(struct bna_rxf *rxf); +static void rxf_cb_enabled(void *arg, int status); +static void rxf_cb_disabled(void *arg, int status); +static void bna_rxf_cb_stats_cleared(void *arg, int status); +static void __rxf_enable(struct bna_rxf *rxf); +static void __rxf_disable(struct bna_rxf *rxf); + +bfa_fsm_state_decl(bna_rxf, stopped, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, start_wait, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, cam_fltr_mod_wait, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, started, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, cam_fltr_clr_wait, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, stop_wait, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, pause_wait, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, resume_wait, struct bna_rxf, + enum bna_rxf_event); +bfa_fsm_state_decl(bna_rxf, stat_clr_wait, struct bna_rxf, + enum bna_rxf_event); + +static struct bfa_sm_table rxf_sm_table[] = { + {BFA_SM(bna_rxf_sm_stopped), BNA_RXF_STOPPED}, + {BFA_SM(bna_rxf_sm_start_wait), BNA_RXF_START_WAIT}, + {BFA_SM(bna_rxf_sm_cam_fltr_mod_wait), BNA_RXF_CAM_FLTR_MOD_WAIT}, + {BFA_SM(bna_rxf_sm_started), BNA_RXF_STARTED}, + {BFA_SM(bna_rxf_sm_cam_fltr_clr_wait), BNA_RXF_CAM_FLTR_CLR_WAIT}, + {BFA_SM(bna_rxf_sm_stop_wait), BNA_RXF_STOP_WAIT}, + {BFA_SM(bna_rxf_sm_pause_wait), BNA_RXF_PAUSE_WAIT}, + {BFA_SM(bna_rxf_sm_resume_wait), BNA_RXF_RESUME_WAIT}, + {BFA_SM(bna_rxf_sm_stat_clr_wait), BNA_RXF_STAT_CLR_WAIT} +}; + +static void +bna_rxf_sm_stopped_entry(struct bna_rxf *rxf) +{ + call_rxf_stop_cbfn(rxf, BNA_CB_SUCCESS); +} + +static void +bna_rxf_sm_stopped(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_START: + bfa_fsm_set_state(rxf, bna_rxf_sm_start_wait); + break; + + case RXF_E_STOP: + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_FAIL: + /* No-op */ + break; + + case RXF_E_CAM_FLTR_MOD: + call_rxf_cam_fltr_cbfn(rxf, BNA_CB_SUCCESS); + break; + + case RXF_E_STARTED: + case RXF_E_STOPPED: + case RXF_E_CAM_FLTR_RESP: + /** + * These events are received due to flushing of mbox + * when device fails + */ + /* No-op */ + break; + + case RXF_E_PAUSE: + rxf->rxf_oper_state = BNA_RXF_OPER_STATE_PAUSED; + call_rxf_pause_cbfn(rxf, BNA_CB_SUCCESS); + break; + + case RXF_E_RESUME: + rxf->rxf_oper_state = BNA_RXF_OPER_STATE_RUNNING; + call_rxf_resume_cbfn(rxf, BNA_CB_SUCCESS); + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_start_wait_entry(struct bna_rxf *rxf) +{ + __rxf_config_set(rxf); + __rxf_rit_set(rxf); + rxf_enable(rxf); +} + +static void +bna_rxf_sm_start_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_STOP: + /** + * STOP is originated from bnad. When this happens, + * it can not be waiting for filter update + */ + call_rxf_start_cbfn(rxf, BNA_CB_INTERRUPT); + bfa_fsm_set_state(rxf, bna_rxf_sm_stop_wait); + break; + + case RXF_E_FAIL: + call_rxf_cam_fltr_cbfn(rxf, BNA_CB_SUCCESS); + call_rxf_start_cbfn(rxf, BNA_CB_FAIL); + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_CAM_FLTR_MOD: + /* No-op */ + break; + + case RXF_E_STARTED: + /** + * Force rxf_process_filter() to go through initial + * config + */ + if ((rxf->ucast_active_mac != NULL) && + (rxf->ucast_pending_set == 0)) + rxf->ucast_pending_set = 1; + + if (rxf->rss_status == BNA_STATUS_T_ENABLED) + rxf->rxf_flags |= BNA_RXF_FL_RSS_CONFIG_PENDING; + + rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING; + + bfa_fsm_set_state(rxf, bna_rxf_sm_cam_fltr_mod_wait); + break; + + case RXF_E_PAUSE: + case RXF_E_RESUME: + rxf->rxf_flags |= BNA_RXF_FL_OPERSTATE_CHANGED; + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_cam_fltr_mod_wait_entry(struct bna_rxf *rxf) +{ + if (!rxf_process_packet_filter(rxf)) { + /* No more pending CAM entries to update */ + bfa_fsm_set_state(rxf, bna_rxf_sm_started); + } +} + +static void +bna_rxf_sm_cam_fltr_mod_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_STOP: + /** + * STOP is originated from bnad. When this happens, + * it can not be waiting for filter update + */ + call_rxf_start_cbfn(rxf, BNA_CB_INTERRUPT); + bfa_fsm_set_state(rxf, bna_rxf_sm_cam_fltr_clr_wait); + break; + + case RXF_E_FAIL: + rxf_reset_packet_filter(rxf); + call_rxf_cam_fltr_cbfn(rxf, BNA_CB_SUCCESS); + call_rxf_start_cbfn(rxf, BNA_CB_FAIL); + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_CAM_FLTR_MOD: + /* No-op */ + break; + + case RXF_E_CAM_FLTR_RESP: + if (!rxf_process_packet_filter(rxf)) { + /* No more pending CAM entries to update */ + call_rxf_cam_fltr_cbfn(rxf, BNA_CB_SUCCESS); + bfa_fsm_set_state(rxf, bna_rxf_sm_started); + } + break; + + case RXF_E_PAUSE: + case RXF_E_RESUME: + rxf->rxf_flags |= BNA_RXF_FL_OPERSTATE_CHANGED; + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_started_entry(struct bna_rxf *rxf) +{ + call_rxf_start_cbfn(rxf, BNA_CB_SUCCESS); + + if (rxf->rxf_flags & BNA_RXF_FL_OPERSTATE_CHANGED) { + if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_PAUSED) + bfa_fsm_send_event(rxf, RXF_E_PAUSE); + else + bfa_fsm_send_event(rxf, RXF_E_RESUME); + } + +} + +static void +bna_rxf_sm_started(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_STOP: + bfa_fsm_set_state(rxf, bna_rxf_sm_cam_fltr_clr_wait); + /* Hack to get FSM start clearing CAM entries */ + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_RESP); + break; + + case RXF_E_FAIL: + rxf_reset_packet_filter(rxf); + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_CAM_FLTR_MOD: + bfa_fsm_set_state(rxf, bna_rxf_sm_cam_fltr_mod_wait); + break; + + case RXF_E_PAUSE: + bfa_fsm_set_state(rxf, bna_rxf_sm_pause_wait); + break; + + case RXF_E_RESUME: + bfa_fsm_set_state(rxf, bna_rxf_sm_resume_wait); + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_cam_fltr_clr_wait_entry(struct bna_rxf *rxf) +{ + /** + * Note: Do not add rxf_clear_packet_filter here. + * It will overstep mbox when this transition happens: + * cam_fltr_mod_wait -> cam_fltr_clr_wait on RXF_E_STOP event + */ +} + +static void +bna_rxf_sm_cam_fltr_clr_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_FAIL: + /** + * FSM was in the process of stopping, initiated by + * bnad. When this happens, no one can be waiting for + * start or filter update + */ + rxf_reset_packet_filter(rxf); + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_CAM_FLTR_RESP: + if (!rxf_clear_packet_filter(rxf)) { + /* No more pending CAM entries to clear */ + bfa_fsm_set_state(rxf, bna_rxf_sm_stop_wait); + rxf_disable(rxf); + } + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_stop_wait_entry(struct bna_rxf *rxf) +{ + /** + * NOTE: Do not add rxf_disable here. + * It will overstep mbox when this transition happens: + * start_wait -> stop_wait on RXF_E_STOP event + */ +} + +static void +bna_rxf_sm_stop_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_FAIL: + /** + * FSM was in the process of stopping, initiated by + * bnad. When this happens, no one can be waiting for + * start or filter update + */ + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_STARTED: + /** + * This event is received due to abrupt transition from + * bna_rxf_sm_start_wait state on receiving + * RXF_E_STOP event + */ + rxf_disable(rxf); + break; + + case RXF_E_STOPPED: + /** + * FSM was in the process of stopping, initiated by + * bnad. When this happens, no one can be waiting for + * start or filter update + */ + bfa_fsm_set_state(rxf, bna_rxf_sm_stat_clr_wait); + break; + + case RXF_E_PAUSE: + rxf->rxf_oper_state = BNA_RXF_OPER_STATE_PAUSED; + break; + + case RXF_E_RESUME: + rxf->rxf_oper_state = BNA_RXF_OPER_STATE_RUNNING; + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_pause_wait_entry(struct bna_rxf *rxf) +{ + rxf->rxf_flags &= + ~(BNA_RXF_FL_OPERSTATE_CHANGED | BNA_RXF_FL_RXF_ENABLED); + __rxf_disable(rxf); +} + +static void +bna_rxf_sm_pause_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_FAIL: + /** + * FSM was in the process of disabling rxf, initiated by + * bnad. + */ + call_rxf_pause_cbfn(rxf, BNA_CB_FAIL); + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_STOPPED: + rxf->rxf_oper_state = BNA_RXF_OPER_STATE_PAUSED; + call_rxf_pause_cbfn(rxf, BNA_CB_SUCCESS); + bfa_fsm_set_state(rxf, bna_rxf_sm_started); + break; + + /* + * Since PAUSE/RESUME can only be sent by bnad, we don't expect + * any other event during these states + */ + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_resume_wait_entry(struct bna_rxf *rxf) +{ + rxf->rxf_flags &= ~(BNA_RXF_FL_OPERSTATE_CHANGED); + rxf->rxf_flags |= BNA_RXF_FL_RXF_ENABLED; + __rxf_enable(rxf); +} + +static void +bna_rxf_sm_resume_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_FAIL: + /** + * FSM was in the process of disabling rxf, initiated by + * bnad. + */ + call_rxf_resume_cbfn(rxf, BNA_CB_FAIL); + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + case RXF_E_STARTED: + rxf->rxf_oper_state = BNA_RXF_OPER_STATE_RUNNING; + call_rxf_resume_cbfn(rxf, BNA_CB_SUCCESS); + bfa_fsm_set_state(rxf, bna_rxf_sm_started); + break; + + /* + * Since PAUSE/RESUME can only be sent by bnad, we don't expect + * any other event during these states + */ + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +bna_rxf_sm_stat_clr_wait_entry(struct bna_rxf *rxf) +{ + __bna_rxf_stat_clr(rxf); +} + +static void +bna_rxf_sm_stat_clr_wait(struct bna_rxf *rxf, enum bna_rxf_event event) +{ + switch (event) { + case RXF_E_FAIL: + case RXF_E_STAT_CLEARED: + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); + break; + + default: + bfa_sm_fault(rxf->rx->bna, event); + } +} + +static void +__rxf_enable(struct bna_rxf *rxf) +{ + struct bfi_ll_rxf_multi_req ll_req; + u32 bm[2] = {0, 0}; + + if (rxf->rxf_id < 32) + bm[0] = 1 << rxf->rxf_id; + else + bm[1] = 1 << (rxf->rxf_id - 32); + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_RX_REQ, 0); + ll_req.rxf_id_mask[0] = htonl(bm[0]); + ll_req.rxf_id_mask[1] = htonl(bm[1]); + ll_req.enable = 1; + + bna_mbox_qe_fill(&rxf->mbox_qe, &ll_req, sizeof(ll_req), + rxf_cb_enabled, rxf); + + bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe); +} + +static void +__rxf_disable(struct bna_rxf *rxf) +{ + struct bfi_ll_rxf_multi_req ll_req; + u32 bm[2] = {0, 0}; + + if (rxf->rxf_id < 32) + bm[0] = 1 << rxf->rxf_id; + else + bm[1] = 1 << (rxf->rxf_id - 32); + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_RX_REQ, 0); + ll_req.rxf_id_mask[0] = htonl(bm[0]); + ll_req.rxf_id_mask[1] = htonl(bm[1]); + ll_req.enable = 0; + + bna_mbox_qe_fill(&rxf->mbox_qe, &ll_req, sizeof(ll_req), + rxf_cb_disabled, rxf); + + bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe); +} + +static void +__rxf_config_set(struct bna_rxf *rxf) +{ + u32 i; + struct bna_rss_mem *rss_mem; + struct bna_rx_fndb_ram *rx_fndb_ram; + struct bna *bna = rxf->rx->bna; + void __iomem *base_addr; + unsigned long off; + + base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva, + RSS_TABLE_BASE_OFFSET); + + rss_mem = (struct bna_rss_mem *)0; + + /* Configure RSS if required */ + if (rxf->ctrl_flags & BNA_RXF_CF_RSS_ENABLE) { + /* configure RSS Table */ + writel(BNA_GET_PAGE_NUM(RAD0_MEM_BLK_BASE_PG_NUM + + bna->port_num, RSS_TABLE_BASE_OFFSET), + bna->regs.page_addr); + + /* temporarily disable RSS, while hash value is written */ + off = (unsigned long)&rss_mem[0].type_n_hash; + writel(0, base_addr + off); + + for (i = 0; i < BFI_RSS_HASH_KEY_LEN; i++) { + off = (unsigned long) + &rss_mem[0].hash_key[(BFI_RSS_HASH_KEY_LEN - 1) - i]; + writel(htonl(rxf->rss_cfg.toeplitz_hash_key[i]), + base_addr + off); + } + + off = (unsigned long)&rss_mem[0].type_n_hash; + writel(rxf->rss_cfg.hash_type | rxf->rss_cfg.hash_mask, + base_addr + off); + } + + /* Configure RxF */ + writel(BNA_GET_PAGE_NUM( + LUT0_MEM_BLK_BASE_PG_NUM + (bna->port_num * 2), + RX_FNDB_RAM_BASE_OFFSET), + bna->regs.page_addr); + + base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva, + RX_FNDB_RAM_BASE_OFFSET); + + rx_fndb_ram = (struct bna_rx_fndb_ram *)0; + + /* We always use RSS table 0 */ + off = (unsigned long)&rx_fndb_ram[rxf->rxf_id].rss_prop; + writel(rxf->ctrl_flags & BNA_RXF_CF_RSS_ENABLE, + base_addr + off); + + /* small large buffer enable/disable */ + off = (unsigned long)&rx_fndb_ram[rxf->rxf_id].size_routing_props; + writel((rxf->ctrl_flags & BNA_RXF_CF_SM_LG_RXQ) | 0x80, + base_addr + off); + + /* RIT offset, HDS forced offset, multicast RxQ Id */ + off = (unsigned long)&rx_fndb_ram[rxf->rxf_id].rit_hds_mcastq; + writel((rxf->rit_segment->rit_offset << 16) | + (rxf->forced_offset << 8) | + (rxf->hds_cfg.hdr_type & BNA_HDS_FORCED) | rxf->mcast_rxq_id, + base_addr + off); + + /* + * default vlan tag, default function enable, strip vlan bytes, + * HDS type, header size + */ + + off = (unsigned long)&rx_fndb_ram[rxf->rxf_id].control_flags; + writel(((u32)rxf->default_vlan_tag << 16) | + (rxf->ctrl_flags & + (BNA_RXF_CF_DEFAULT_VLAN | + BNA_RXF_CF_DEFAULT_FUNCTION_ENABLE | + BNA_RXF_CF_VLAN_STRIP)) | + (rxf->hds_cfg.hdr_type & ~BNA_HDS_FORCED) | + rxf->hds_cfg.header_size, + base_addr + off); +} + +void +__rxf_vlan_filter_set(struct bna_rxf *rxf, enum bna_status status) +{ + struct bna *bna = rxf->rx->bna; + int i; + + writel(BNA_GET_PAGE_NUM(LUT0_MEM_BLK_BASE_PG_NUM + + (bna->port_num * 2), VLAN_RAM_BASE_OFFSET), + bna->regs.page_addr); + + if (status == BNA_STATUS_T_ENABLED) { + /* enable VLAN filtering on this function */ + for (i = 0; i <= BFI_MAX_VLAN / 32; i++) { + writel(rxf->vlan_filter_table[i], + BNA_GET_VLAN_MEM_ENTRY_ADDR + (bna->pcidev.pci_bar_kva, rxf->rxf_id, + i * 32)); + } + } else { + /* disable VLAN filtering on this function */ + for (i = 0; i <= BFI_MAX_VLAN / 32; i++) { + writel(0xffffffff, + BNA_GET_VLAN_MEM_ENTRY_ADDR + (bna->pcidev.pci_bar_kva, rxf->rxf_id, + i * 32)); + } + } +} + +static void +__rxf_rit_set(struct bna_rxf *rxf) +{ + struct bna *bna = rxf->rx->bna; + struct bna_rit_mem *rit_mem; + int i; + void __iomem *base_addr; + unsigned long off; + + base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva, + FUNCTION_TO_RXQ_TRANSLATE); + + rit_mem = (struct bna_rit_mem *)0; + + writel(BNA_GET_PAGE_NUM(RXA0_MEM_BLK_BASE_PG_NUM + bna->port_num, + FUNCTION_TO_RXQ_TRANSLATE), + bna->regs.page_addr); + + for (i = 0; i < rxf->rit_segment->rit_size; i++) { + off = (unsigned long)&rit_mem[i + rxf->rit_segment->rit_offset]; + writel(rxf->rit_segment->rit[i].large_rxq_id << 6 | + rxf->rit_segment->rit[i].small_rxq_id, + base_addr + off); + } +} + +static void +__bna_rxf_stat_clr(struct bna_rxf *rxf) +{ + struct bfi_ll_stats_req ll_req; + u32 bm[2] = {0, 0}; + + if (rxf->rxf_id < 32) + bm[0] = 1 << rxf->rxf_id; + else + bm[1] = 1 << (rxf->rxf_id - 32); + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_STATS_CLEAR_REQ, 0); + ll_req.stats_mask = 0; + ll_req.txf_id_mask[0] = 0; + ll_req.txf_id_mask[1] = 0; + + ll_req.rxf_id_mask[0] = htonl(bm[0]); + ll_req.rxf_id_mask[1] = htonl(bm[1]); + + bna_mbox_qe_fill(&rxf->mbox_qe, &ll_req, sizeof(ll_req), + bna_rxf_cb_stats_cleared, rxf); + bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe); +} + +static void +rxf_enable(struct bna_rxf *rxf) +{ + if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_PAUSED) + bfa_fsm_send_event(rxf, RXF_E_STARTED); + else { + rxf->rxf_flags |= BNA_RXF_FL_RXF_ENABLED; + __rxf_enable(rxf); + } +} + +static void +rxf_cb_enabled(void *arg, int status) +{ + struct bna_rxf *rxf = (struct bna_rxf *)arg; + + bfa_q_qe_init(&rxf->mbox_qe.qe); + bfa_fsm_send_event(rxf, RXF_E_STARTED); +} + +static void +rxf_disable(struct bna_rxf *rxf) +{ + if (rxf->rxf_oper_state == BNA_RXF_OPER_STATE_PAUSED) + bfa_fsm_send_event(rxf, RXF_E_STOPPED); + else + rxf->rxf_flags &= ~BNA_RXF_FL_RXF_ENABLED; + __rxf_disable(rxf); +} + +static void +rxf_cb_disabled(void *arg, int status) +{ + struct bna_rxf *rxf = (struct bna_rxf *)arg; + + bfa_q_qe_init(&rxf->mbox_qe.qe); + bfa_fsm_send_event(rxf, RXF_E_STOPPED); +} + +void +rxf_cb_cam_fltr_mbox_cmd(void *arg, int status) +{ + struct bna_rxf *rxf = (struct bna_rxf *)arg; + + bfa_q_qe_init(&rxf->mbox_qe.qe); + + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_RESP); +} + +static void +bna_rxf_cb_stats_cleared(void *arg, int status) +{ + struct bna_rxf *rxf = (struct bna_rxf *)arg; + + bfa_q_qe_init(&rxf->mbox_qe.qe); + bfa_fsm_send_event(rxf, RXF_E_STAT_CLEARED); +} + +void +rxf_cam_mbox_cmd(struct bna_rxf *rxf, u8 cmd, + const struct bna_mac *mac_addr) +{ + struct bfi_ll_mac_addr_req req; + + bfi_h2i_set(req.mh, BFI_MC_LL, cmd, 0); + + req.rxf_id = rxf->rxf_id; + memcpy(&req.mac_addr, (void *)&mac_addr->addr, ETH_ALEN); + + bna_mbox_qe_fill(&rxf->mbox_qe, &req, sizeof(req), + rxf_cb_cam_fltr_mbox_cmd, rxf); + + bna_mbox_send(rxf->rx->bna, &rxf->mbox_qe); +} + +static int +rxf_process_packet_filter_mcast(struct bna_rxf *rxf) +{ + struct bna_mac *mac = NULL; + struct list_head *qe; + + /* Add multicast entries */ + if (!list_empty(&rxf->mcast_pending_add_q)) { + bfa_q_deq(&rxf->mcast_pending_add_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_ADD_REQ, mac); + list_add_tail(&mac->qe, &rxf->mcast_active_q); + return 1; + } + + /* Delete multicast entries previousely added */ + if (!list_empty(&rxf->mcast_pending_del_q)) { + bfa_q_deq(&rxf->mcast_pending_del_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_DEL_REQ, mac); + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + return 1; + } + + return 0; +} + +static int +rxf_process_packet_filter_vlan(struct bna_rxf *rxf) +{ + /* Apply the VLAN filter */ + if (rxf->rxf_flags & BNA_RXF_FL_VLAN_CONFIG_PENDING) { + rxf->rxf_flags &= ~BNA_RXF_FL_VLAN_CONFIG_PENDING; + if (!(rxf->rxmode_active & BNA_RXMODE_PROMISC) && + !(rxf->rxmode_active & BNA_RXMODE_DEFAULT)) + __rxf_vlan_filter_set(rxf, rxf->vlan_filter_status); + } + + /* Apply RSS configuration */ + if (rxf->rxf_flags & BNA_RXF_FL_RSS_CONFIG_PENDING) { + rxf->rxf_flags &= ~BNA_RXF_FL_RSS_CONFIG_PENDING; + if (rxf->rss_status == BNA_STATUS_T_DISABLED) { + /* RSS is being disabled */ + rxf->ctrl_flags &= ~BNA_RXF_CF_RSS_ENABLE; + __rxf_rit_set(rxf); + __rxf_config_set(rxf); + } else { + /* RSS is being enabled or reconfigured */ + rxf->ctrl_flags |= BNA_RXF_CF_RSS_ENABLE; + __rxf_rit_set(rxf); + __rxf_config_set(rxf); + } + } + + return 0; +} + +/** + * Processes pending ucast, mcast entry addition/deletion and issues mailbox + * command. Also processes pending filter configuration - promiscuous mode, + * default mode, allmutli mode and issues mailbox command or directly applies + * to h/w + */ +static int +rxf_process_packet_filter(struct bna_rxf *rxf) +{ + /* Set the default MAC first */ + if (rxf->ucast_pending_set > 0) { + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_UCAST_SET_REQ, + rxf->ucast_active_mac); + rxf->ucast_pending_set--; + return 1; + } + + if (rxf_process_packet_filter_ucast(rxf)) + return 1; + + if (rxf_process_packet_filter_mcast(rxf)) + return 1; + + if (rxf_process_packet_filter_promisc(rxf)) + return 1; + + if (rxf_process_packet_filter_default(rxf)) + return 1; + + if (rxf_process_packet_filter_allmulti(rxf)) + return 1; + + if (rxf_process_packet_filter_vlan(rxf)) + return 1; + + return 0; +} + +static int +rxf_clear_packet_filter_mcast(struct bna_rxf *rxf) +{ + struct bna_mac *mac = NULL; + struct list_head *qe; + + /* 3. delete pending mcast entries */ + if (!list_empty(&rxf->mcast_pending_del_q)) { + bfa_q_deq(&rxf->mcast_pending_del_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_DEL_REQ, mac); + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + return 1; + } + + /* 4. clear active mcast entries; move them to pending_add_q */ + if (!list_empty(&rxf->mcast_active_q)) { + bfa_q_deq(&rxf->mcast_active_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + rxf_cam_mbox_cmd(rxf, BFI_LL_H2I_MAC_MCAST_DEL_REQ, mac); + list_add_tail(&mac->qe, &rxf->mcast_pending_add_q); + return 1; + } + + return 0; +} + +/** + * In the rxf stop path, processes pending ucast/mcast delete queue and issues + * the mailbox command. Moves the active ucast/mcast entries to pending add q, + * so that they are added to CAM again in the rxf start path. Moves the current + * filter settings - promiscuous, default, allmutli - to pending filter + * configuration + */ +static int +rxf_clear_packet_filter(struct bna_rxf *rxf) +{ + if (rxf_clear_packet_filter_ucast(rxf)) + return 1; + + if (rxf_clear_packet_filter_mcast(rxf)) + return 1; + + /* 5. clear active default MAC in the CAM */ + if (rxf->ucast_pending_set > 0) + rxf->ucast_pending_set = 0; + + if (rxf_clear_packet_filter_promisc(rxf)) + return 1; + + if (rxf_clear_packet_filter_default(rxf)) + return 1; + + if (rxf_clear_packet_filter_allmulti(rxf)) + return 1; + + return 0; +} + +static void +rxf_reset_packet_filter_mcast(struct bna_rxf *rxf) +{ + struct list_head *qe; + struct bna_mac *mac; + + /* 3. Move active mcast entries to pending_add_q */ + while (!list_empty(&rxf->mcast_active_q)) { + bfa_q_deq(&rxf->mcast_active_q, &qe); + bfa_q_qe_init(qe); + list_add_tail(qe, &rxf->mcast_pending_add_q); + } + + /* 4. Throw away delete pending mcast entries */ + while (!list_empty(&rxf->mcast_pending_del_q)) { + bfa_q_deq(&rxf->mcast_pending_del_q, &qe); + bfa_q_qe_init(qe); + mac = (struct bna_mac *)qe; + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + } +} + +/** + * In the rxf fail path, throws away the ucast/mcast entries pending for + * deletion, moves all active ucast/mcast entries to pending queue so that + * they are added back to CAM in the rxf start path. Also moves the current + * filter configuration to pending filter configuration. + */ +static void +rxf_reset_packet_filter(struct bna_rxf *rxf) +{ + rxf_reset_packet_filter_ucast(rxf); + + rxf_reset_packet_filter_mcast(rxf); + + /* 5. Turn off ucast set flag */ + rxf->ucast_pending_set = 0; + + rxf_reset_packet_filter_promisc(rxf); + + rxf_reset_packet_filter_default(rxf); + + rxf_reset_packet_filter_allmulti(rxf); +} + +void +bna_rxf_init(struct bna_rxf *rxf, + struct bna_rx *rx, + struct bna_rx_config *q_config) +{ + struct list_head *qe; + struct bna_rxp *rxp; + + /* rxf_id is initialized during rx_mod init */ + rxf->rx = rx; + + INIT_LIST_HEAD(&rxf->ucast_pending_add_q); + INIT_LIST_HEAD(&rxf->ucast_pending_del_q); + rxf->ucast_pending_set = 0; + INIT_LIST_HEAD(&rxf->ucast_active_q); + rxf->ucast_active_mac = NULL; + + INIT_LIST_HEAD(&rxf->mcast_pending_add_q); + INIT_LIST_HEAD(&rxf->mcast_pending_del_q); + INIT_LIST_HEAD(&rxf->mcast_active_q); + + bfa_q_qe_init(&rxf->mbox_qe.qe); + + if (q_config->vlan_strip_status == BNA_STATUS_T_ENABLED) + rxf->ctrl_flags |= BNA_RXF_CF_VLAN_STRIP; + + rxf->rxf_oper_state = (q_config->paused) ? + BNA_RXF_OPER_STATE_PAUSED : BNA_RXF_OPER_STATE_RUNNING; + + bna_rxf_adv_init(rxf, rx, q_config); + + rxf->rit_segment = bna_rit_mod_seg_get(&rxf->rx->bna->rit_mod, + q_config->num_paths); + + list_for_each(qe, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe; + if (q_config->rxp_type == BNA_RXP_SINGLE) + rxf->mcast_rxq_id = rxp->rxq.single.only->rxq_id; + else + rxf->mcast_rxq_id = rxp->rxq.slr.large->rxq_id; + break; + } + + rxf->vlan_filter_status = BNA_STATUS_T_DISABLED; + memset(rxf->vlan_filter_table, 0, + (sizeof(u32) * ((BFI_MAX_VLAN + 1) / 32))); + + bfa_fsm_set_state(rxf, bna_rxf_sm_stopped); +} + +void +bna_rxf_uninit(struct bna_rxf *rxf) +{ + struct bna_mac *mac; + + bna_rit_mod_seg_put(&rxf->rx->bna->rit_mod, rxf->rit_segment); + rxf->rit_segment = NULL; + + rxf->ucast_pending_set = 0; + + while (!list_empty(&rxf->ucast_pending_add_q)) { + bfa_q_deq(&rxf->ucast_pending_add_q, &mac); + bfa_q_qe_init(&mac->qe); + bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, mac); + } + + if (rxf->ucast_active_mac) { + bfa_q_qe_init(&rxf->ucast_active_mac->qe); + bna_ucam_mod_mac_put(&rxf->rx->bna->ucam_mod, + rxf->ucast_active_mac); + rxf->ucast_active_mac = NULL; + } + + while (!list_empty(&rxf->mcast_pending_add_q)) { + bfa_q_deq(&rxf->mcast_pending_add_q, &mac); + bfa_q_qe_init(&mac->qe); + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + } + + rxf->rx = NULL; +} + +void +bna_rxf_start(struct bna_rxf *rxf) +{ + rxf->start_cbfn = bna_rx_cb_rxf_started; + rxf->start_cbarg = rxf->rx; + rxf->rxf_flags &= ~BNA_RXF_FL_FAILED; + bfa_fsm_send_event(rxf, RXF_E_START); +} + +void +bna_rxf_stop(struct bna_rxf *rxf) +{ + rxf->stop_cbfn = bna_rx_cb_rxf_stopped; + rxf->stop_cbarg = rxf->rx; + bfa_fsm_send_event(rxf, RXF_E_STOP); +} + +void +bna_rxf_fail(struct bna_rxf *rxf) +{ + rxf->rxf_flags |= BNA_RXF_FL_FAILED; + bfa_fsm_send_event(rxf, RXF_E_FAIL); +} + +int +bna_rxf_state_get(struct bna_rxf *rxf) +{ + return bfa_sm_to_state(rxf_sm_table, rxf->fsm); +} + +enum bna_cb_status +bna_rx_ucast_set(struct bna_rx *rx, u8 *ucmac, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + + if (rxf->ucast_active_mac == NULL) { + rxf->ucast_active_mac = + bna_ucam_mod_mac_get(&rxf->rx->bna->ucam_mod); + if (rxf->ucast_active_mac == NULL) + return BNA_CB_UCAST_CAM_FULL; + bfa_q_qe_init(&rxf->ucast_active_mac->qe); + } + + memcpy(rxf->ucast_active_mac->addr, ucmac, ETH_ALEN); + rxf->ucast_pending_set++; + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + + return BNA_CB_SUCCESS; +} + +enum bna_cb_status +bna_rx_mcast_add(struct bna_rx *rx, u8 *addr, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + struct list_head *qe; + struct bna_mac *mac; + + /* Check if already added */ + list_for_each(qe, &rxf->mcast_active_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + return BNA_CB_SUCCESS; + } + } + + /* Check if pending addition */ + list_for_each(qe, &rxf->mcast_pending_add_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + return BNA_CB_SUCCESS; + } + } + + mac = bna_mcam_mod_mac_get(&rxf->rx->bna->mcam_mod); + if (mac == NULL) + return BNA_CB_MCAST_LIST_FULL; + bfa_q_qe_init(&mac->qe); + memcpy(mac->addr, addr, ETH_ALEN); + list_add_tail(&mac->qe, &rxf->mcast_pending_add_q); + + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + + return BNA_CB_SUCCESS; +} + +enum bna_cb_status +bna_rx_mcast_del(struct bna_rx *rx, u8 *addr, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + struct list_head *qe; + struct bna_mac *mac; + + list_for_each(qe, &rxf->mcast_pending_add_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + list_del(qe); + bfa_q_qe_init(qe); + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + return BNA_CB_SUCCESS; + } + } + + list_for_each(qe, &rxf->mcast_active_q) { + mac = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac->addr, addr)) { + list_del(qe); + bfa_q_qe_init(qe); + list_add_tail(qe, &rxf->mcast_pending_del_q); + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + return BNA_CB_SUCCESS; + } + } + + return BNA_CB_INVALID_MAC; +} + +enum bna_cb_status +bna_rx_mcast_listset(struct bna_rx *rx, int count, u8 *mclist, + void (*cbfn)(struct bnad *, struct bna_rx *, + enum bna_cb_status)) +{ + struct bna_rxf *rxf = &rx->rxf; + struct list_head list_head; + struct list_head *qe; + u8 *mcaddr; + struct bna_mac *mac; + struct bna_mac *mac1; + int skip; + int delete; + int need_hw_config = 0; + int i; + + /* Allocate nodes */ + INIT_LIST_HEAD(&list_head); + for (i = 0, mcaddr = mclist; i < count; i++) { + mac = bna_mcam_mod_mac_get(&rxf->rx->bna->mcam_mod); + if (mac == NULL) + goto err_return; + bfa_q_qe_init(&mac->qe); + memcpy(mac->addr, mcaddr, ETH_ALEN); + list_add_tail(&mac->qe, &list_head); + + mcaddr += ETH_ALEN; + } + + /* Schedule for addition */ + while (!list_empty(&list_head)) { + bfa_q_deq(&list_head, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + + skip = 0; + + /* Skip if already added */ + list_for_each(qe, &rxf->mcast_active_q) { + mac1 = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac1->addr, mac->addr)) { + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, + mac); + skip = 1; + break; + } + } + + if (skip) + continue; + + /* Skip if pending addition */ + list_for_each(qe, &rxf->mcast_pending_add_q) { + mac1 = (struct bna_mac *)qe; + if (BNA_MAC_IS_EQUAL(mac1->addr, mac->addr)) { + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, + mac); + skip = 1; + break; + } + } + + if (skip) + continue; + + need_hw_config = 1; + list_add_tail(&mac->qe, &rxf->mcast_pending_add_q); + } + + /** + * Delete the entries that are in the pending_add_q but not + * in the new list + */ + while (!list_empty(&rxf->mcast_pending_add_q)) { + bfa_q_deq(&rxf->mcast_pending_add_q, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + for (i = 0, mcaddr = mclist, delete = 1; i < count; i++) { + if (BNA_MAC_IS_EQUAL(mcaddr, mac->addr)) { + delete = 0; + break; + } + mcaddr += ETH_ALEN; + } + if (delete) + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + else + list_add_tail(&mac->qe, &list_head); + } + while (!list_empty(&list_head)) { + bfa_q_deq(&list_head, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + list_add_tail(&mac->qe, &rxf->mcast_pending_add_q); + } + + /** + * Schedule entries for deletion that are in the active_q but not + * in the new list + */ + while (!list_empty(&rxf->mcast_active_q)) { + bfa_q_deq(&rxf->mcast_active_q, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + for (i = 0, mcaddr = mclist, delete = 1; i < count; i++) { + if (BNA_MAC_IS_EQUAL(mcaddr, mac->addr)) { + delete = 0; + break; + } + mcaddr += ETH_ALEN; + } + if (delete) { + list_add_tail(&mac->qe, &rxf->mcast_pending_del_q); + need_hw_config = 1; + } else { + list_add_tail(&mac->qe, &list_head); + } + } + while (!list_empty(&list_head)) { + bfa_q_deq(&list_head, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + list_add_tail(&mac->qe, &rxf->mcast_active_q); + } + + if (need_hw_config) { + rxf->cam_fltr_cbfn = cbfn; + rxf->cam_fltr_cbarg = rx->bna->bnad; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + } else if (cbfn) + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + + return BNA_CB_SUCCESS; + +err_return: + while (!list_empty(&list_head)) { + bfa_q_deq(&list_head, &qe); + mac = (struct bna_mac *)qe; + bfa_q_qe_init(&mac->qe); + bna_mcam_mod_mac_put(&rxf->rx->bna->mcam_mod, mac); + } + + return BNA_CB_MCAST_LIST_FULL; +} + +void +bna_rx_vlan_add(struct bna_rx *rx, int vlan_id) +{ + struct bna_rxf *rxf = &rx->rxf; + int index = (vlan_id >> 5); + int bit = (1 << (vlan_id & 0x1F)); + + rxf->vlan_filter_table[index] |= bit; + if (rxf->vlan_filter_status == BNA_STATUS_T_ENABLED) { + rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + } +} + +void +bna_rx_vlan_del(struct bna_rx *rx, int vlan_id) +{ + struct bna_rxf *rxf = &rx->rxf; + int index = (vlan_id >> 5); + int bit = (1 << (vlan_id & 0x1F)); + + rxf->vlan_filter_table[index] &= ~bit; + if (rxf->vlan_filter_status == BNA_STATUS_T_ENABLED) { + rxf->rxf_flags |= BNA_RXF_FL_VLAN_CONFIG_PENDING; + bfa_fsm_send_event(rxf, RXF_E_CAM_FLTR_MOD); + } +} + +/** + * RX + */ +#define RXQ_RCB_INIT(q, rxp, qdepth, bna, _id, unmapq_mem) do { \ + struct bna_doorbell_qset *_qset; \ + unsigned long off; \ + (q)->rcb->producer_index = (q)->rcb->consumer_index = 0; \ + (q)->rcb->q_depth = (qdepth); \ + (q)->rcb->unmap_q = unmapq_mem; \ + (q)->rcb->rxq = (q); \ + (q)->rcb->cq = &(rxp)->cq; \ + (q)->rcb->bnad = (bna)->bnad; \ + _qset = (struct bna_doorbell_qset *)0; \ + off = (unsigned long)&_qset[(q)->rxq_id].rxq[0]; \ + (q)->rcb->q_dbell = off + \ + BNA_GET_DOORBELL_BASE_ADDR((bna)->pcidev.pci_bar_kva); \ + (q)->rcb->id = _id; \ +} while (0) + +#define BNA_GET_RXQS(qcfg) (((qcfg)->rxp_type == BNA_RXP_SINGLE) ? \ + (qcfg)->num_paths : ((qcfg)->num_paths * 2)) + +#define SIZE_TO_PAGES(size) (((size) >> PAGE_SHIFT) + ((((size) &\ + (PAGE_SIZE - 1)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT)) + +#define call_rx_stop_callback(rx, status) \ + if ((rx)->stop_cbfn) { \ + (*(rx)->stop_cbfn)((rx)->stop_cbarg, rx, (status)); \ + (rx)->stop_cbfn = NULL; \ + (rx)->stop_cbarg = NULL; \ + } + +/* + * Since rx_enable is synchronous callback, there is no start_cbfn required. + * Instead, we'll call bnad_rx_post(rxp) so that bnad can post the buffers + * for each rxpath. + */ + +#define call_rx_disable_cbfn(rx, status) \ + if ((rx)->disable_cbfn) { \ + (*(rx)->disable_cbfn)((rx)->disable_cbarg, \ + status); \ + (rx)->disable_cbfn = NULL; \ + (rx)->disable_cbarg = NULL; \ + } \ + +#define rxqs_reqd(type, num_rxqs) \ + (((type) == BNA_RXP_SINGLE) ? (num_rxqs) : ((num_rxqs) * 2)) + +#define rx_ib_fail(rx) \ +do { \ + struct bna_rxp *rxp; \ + struct list_head *qe; \ + list_for_each(qe, &(rx)->rxp_q) { \ + rxp = (struct bna_rxp *)qe; \ + bna_ib_fail(rxp->cq.ib); \ + } \ +} while (0) + +static void __bna_multi_rxq_stop(struct bna_rxp *, u32 *); +static void __bna_rxq_start(struct bna_rxq *rxq); +static void __bna_cq_start(struct bna_cq *cq); +static void bna_rit_create(struct bna_rx *rx); +static void bna_rx_cb_multi_rxq_stopped(void *arg, int status); +static void bna_rx_cb_rxq_stopped_all(void *arg); + +bfa_fsm_state_decl(bna_rx, stopped, + struct bna_rx, enum bna_rx_event); +bfa_fsm_state_decl(bna_rx, rxf_start_wait, + struct bna_rx, enum bna_rx_event); +bfa_fsm_state_decl(bna_rx, started, + struct bna_rx, enum bna_rx_event); +bfa_fsm_state_decl(bna_rx, rxf_stop_wait, + struct bna_rx, enum bna_rx_event); +bfa_fsm_state_decl(bna_rx, rxq_stop_wait, + struct bna_rx, enum bna_rx_event); + +static struct bfa_sm_table rx_sm_table[] = { + {BFA_SM(bna_rx_sm_stopped), BNA_RX_STOPPED}, + {BFA_SM(bna_rx_sm_rxf_start_wait), BNA_RX_RXF_START_WAIT}, + {BFA_SM(bna_rx_sm_started), BNA_RX_STARTED}, + {BFA_SM(bna_rx_sm_rxf_stop_wait), BNA_RX_RXF_STOP_WAIT}, + {BFA_SM(bna_rx_sm_rxq_stop_wait), BNA_RX_RXQ_STOP_WAIT}, +}; + +static void bna_rx_sm_stopped_entry(struct bna_rx *rx) +{ + struct bna_rxp *rxp; + struct list_head *qe_rxp; + + list_for_each(qe_rxp, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe_rxp; + rx->rx_cleanup_cbfn(rx->bna->bnad, rxp->cq.ccb); + } + + call_rx_stop_callback(rx, BNA_CB_SUCCESS); +} + +static void bna_rx_sm_stopped(struct bna_rx *rx, + enum bna_rx_event event) +{ + switch (event) { + case RX_E_START: + bfa_fsm_set_state(rx, bna_rx_sm_rxf_start_wait); + break; + case RX_E_STOP: + call_rx_stop_callback(rx, BNA_CB_SUCCESS); + break; + case RX_E_FAIL: + /* no-op */ + break; + default: + bfa_sm_fault(rx->bna, event); + break; + } + +} + +static void bna_rx_sm_rxf_start_wait_entry(struct bna_rx *rx) +{ + struct bna_rxp *rxp; + struct list_head *qe_rxp; + struct bna_rxq *q0 = NULL, *q1 = NULL; + + /* Setup the RIT */ + bna_rit_create(rx); + + list_for_each(qe_rxp, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe_rxp; + bna_ib_start(rxp->cq.ib); + GET_RXQS(rxp, q0, q1); + q0->buffer_size = bna_port_mtu_get(&rx->bna->port); + __bna_rxq_start(q0); + rx->rx_post_cbfn(rx->bna->bnad, q0->rcb); + if (q1) { + __bna_rxq_start(q1); + rx->rx_post_cbfn(rx->bna->bnad, q1->rcb); + } + __bna_cq_start(&rxp->cq); + } + + bna_rxf_start(&rx->rxf); +} + +static void bna_rx_sm_rxf_start_wait(struct bna_rx *rx, + enum bna_rx_event event) +{ + switch (event) { + case RX_E_STOP: + bfa_fsm_set_state(rx, bna_rx_sm_rxf_stop_wait); + break; + case RX_E_FAIL: + bfa_fsm_set_state(rx, bna_rx_sm_stopped); + rx_ib_fail(rx); + bna_rxf_fail(&rx->rxf); + break; + case RX_E_RXF_STARTED: + bfa_fsm_set_state(rx, bna_rx_sm_started); + break; + default: + bfa_sm_fault(rx->bna, event); + break; + } +} + +void +bna_rx_sm_started_entry(struct bna_rx *rx) +{ + struct bna_rxp *rxp; + struct list_head *qe_rxp; + + /* Start IB */ + list_for_each(qe_rxp, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe_rxp; + bna_ib_ack(&rxp->cq.ib->door_bell, 0); + } + + bna_llport_admin_up(&rx->bna->port.llport); +} + +void +bna_rx_sm_started(struct bna_rx *rx, enum bna_rx_event event) +{ + switch (event) { + case RX_E_FAIL: + bna_llport_admin_down(&rx->bna->port.llport); + bfa_fsm_set_state(rx, bna_rx_sm_stopped); + rx_ib_fail(rx); + bna_rxf_fail(&rx->rxf); + break; + case RX_E_STOP: + bna_llport_admin_down(&rx->bna->port.llport); + bfa_fsm_set_state(rx, bna_rx_sm_rxf_stop_wait); + break; + default: + bfa_sm_fault(rx->bna, event); + break; + } +} + +void +bna_rx_sm_rxf_stop_wait_entry(struct bna_rx *rx) +{ + bna_rxf_stop(&rx->rxf); +} + +void +bna_rx_sm_rxf_stop_wait(struct bna_rx *rx, enum bna_rx_event event) +{ + switch (event) { + case RX_E_RXF_STOPPED: + bfa_fsm_set_state(rx, bna_rx_sm_rxq_stop_wait); + break; + case RX_E_RXF_STARTED: + /** + * RxF was in the process of starting up when + * RXF_E_STOP was issued. Ignore this event + */ + break; + case RX_E_FAIL: + bfa_fsm_set_state(rx, bna_rx_sm_stopped); + rx_ib_fail(rx); + bna_rxf_fail(&rx->rxf); + break; + default: + bfa_sm_fault(rx->bna, event); + break; + } + +} + +void +bna_rx_sm_rxq_stop_wait_entry(struct bna_rx *rx) +{ + struct bna_rxp *rxp = NULL; + struct bna_rxq *q0 = NULL; + struct bna_rxq *q1 = NULL; + struct list_head *qe; + u32 rxq_mask[2] = {0, 0}; + + /* Only one call to multi-rxq-stop for all RXPs in this RX */ + bfa_wc_up(&rx->rxq_stop_wc); + list_for_each(qe, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe; + GET_RXQS(rxp, q0, q1); + if (q0->rxq_id < 32) + rxq_mask[0] |= ((u32)1 << q0->rxq_id); + else + rxq_mask[1] |= ((u32)1 << (q0->rxq_id - 32)); + if (q1) { + if (q1->rxq_id < 32) + rxq_mask[0] |= ((u32)1 << q1->rxq_id); + else + rxq_mask[1] |= ((u32) + 1 << (q1->rxq_id - 32)); + } + } + + __bna_multi_rxq_stop(rxp, rxq_mask); +} + +void +bna_rx_sm_rxq_stop_wait(struct bna_rx *rx, enum bna_rx_event event) +{ + struct bna_rxp *rxp = NULL; + struct list_head *qe; + + switch (event) { + case RX_E_RXQ_STOPPED: + list_for_each(qe, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe; + bna_ib_stop(rxp->cq.ib); + } + /* Fall through */ + case RX_E_FAIL: + bfa_fsm_set_state(rx, bna_rx_sm_stopped); + break; + default: + bfa_sm_fault(rx->bna, event); + break; + } +} + +void +__bna_multi_rxq_stop(struct bna_rxp *rxp, u32 * rxq_id_mask) +{ + struct bfi_ll_q_stop_req ll_req; + + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_RXQ_STOP_REQ, 0); + ll_req.q_id_mask[0] = htonl(rxq_id_mask[0]); + ll_req.q_id_mask[1] = htonl(rxq_id_mask[1]); + bna_mbox_qe_fill(&rxp->mbox_qe, &ll_req, sizeof(ll_req), + bna_rx_cb_multi_rxq_stopped, rxp); + bna_mbox_send(rxp->rx->bna, &rxp->mbox_qe); +} + +void +__bna_rxq_start(struct bna_rxq *rxq) +{ + struct bna_rxtx_q_mem *q_mem; + struct bna_rxq_mem rxq_cfg, *rxq_mem; + struct bna_dma_addr cur_q_addr; + /* struct bna_doorbell_qset *qset; */ + struct bna_qpt *qpt; + u32 pg_num; + struct bna *bna = rxq->rx->bna; + void __iomem *base_addr; + unsigned long off; + + qpt = &rxq->qpt; + cur_q_addr = *((struct bna_dma_addr *)(qpt->kv_qpt_ptr)); + + rxq_cfg.pg_tbl_addr_lo = qpt->hw_qpt_ptr.lsb; + rxq_cfg.pg_tbl_addr_hi = qpt->hw_qpt_ptr.msb; + rxq_cfg.cur_q_entry_lo = cur_q_addr.lsb; + rxq_cfg.cur_q_entry_hi = cur_q_addr.msb; + + rxq_cfg.pg_cnt_n_prd_ptr = ((u32)qpt->page_count << 16) | 0x0; + rxq_cfg.entry_n_pg_size = ((u32)(BFI_RXQ_WI_SIZE >> 2) << 16) | + (qpt->page_size >> 2); + rxq_cfg.sg_n_cq_n_cns_ptr = + ((u32)(rxq->rxp->cq.cq_id & 0xff) << 16) | 0x0; + rxq_cfg.buf_sz_n_q_state = ((u32)rxq->buffer_size << 16) | + BNA_Q_IDLE_STATE; + rxq_cfg.next_qid = 0x0 | (0x3 << 8); + + /* Write the page number register */ + pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + bna->port_num, + HQM_RXTX_Q_RAM_BASE_OFFSET); + writel(pg_num, bna->regs.page_addr); + + /* Write to h/w */ + base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva, + HQM_RXTX_Q_RAM_BASE_OFFSET); + + q_mem = (struct bna_rxtx_q_mem *)0; + rxq_mem = &q_mem[rxq->rxq_id].rxq; + + off = (unsigned long)&rxq_mem->pg_tbl_addr_lo; + writel(htonl(rxq_cfg.pg_tbl_addr_lo), base_addr + off); + + off = (unsigned long)&rxq_mem->pg_tbl_addr_hi; + writel(htonl(rxq_cfg.pg_tbl_addr_hi), base_addr + off); + + off = (unsigned long)&rxq_mem->cur_q_entry_lo; + writel(htonl(rxq_cfg.cur_q_entry_lo), base_addr + off); + + off = (unsigned long)&rxq_mem->cur_q_entry_hi; + writel(htonl(rxq_cfg.cur_q_entry_hi), base_addr + off); + + off = (unsigned long)&rxq_mem->pg_cnt_n_prd_ptr; + writel(rxq_cfg.pg_cnt_n_prd_ptr, base_addr + off); + + off = (unsigned long)&rxq_mem->entry_n_pg_size; + writel(rxq_cfg.entry_n_pg_size, base_addr + off); + + off = (unsigned long)&rxq_mem->sg_n_cq_n_cns_ptr; + writel(rxq_cfg.sg_n_cq_n_cns_ptr, base_addr + off); + + off = (unsigned long)&rxq_mem->buf_sz_n_q_state; + writel(rxq_cfg.buf_sz_n_q_state, base_addr + off); + + off = (unsigned long)&rxq_mem->next_qid; + writel(rxq_cfg.next_qid, base_addr + off); + + rxq->rcb->producer_index = 0; + rxq->rcb->consumer_index = 0; +} + +void +__bna_cq_start(struct bna_cq *cq) +{ + struct bna_cq_mem cq_cfg, *cq_mem; + const struct bna_qpt *qpt; + struct bna_dma_addr cur_q_addr; + u32 pg_num; + struct bna *bna = cq->rx->bna; + void __iomem *base_addr; + unsigned long off; + + qpt = &cq->qpt; + cur_q_addr = *((struct bna_dma_addr *)(qpt->kv_qpt_ptr)); + + /* + * Fill out structure, to be subsequently written + * to hardware + */ + cq_cfg.pg_tbl_addr_lo = qpt->hw_qpt_ptr.lsb; + cq_cfg.pg_tbl_addr_hi = qpt->hw_qpt_ptr.msb; + cq_cfg.cur_q_entry_lo = cur_q_addr.lsb; + cq_cfg.cur_q_entry_hi = cur_q_addr.msb; + + cq_cfg.pg_cnt_n_prd_ptr = (qpt->page_count << 16) | 0x0; + cq_cfg.entry_n_pg_size = + ((u32)(BFI_CQ_WI_SIZE >> 2) << 16) | (qpt->page_size >> 2); + cq_cfg.int_blk_n_cns_ptr = ((((u32)cq->ib_seg_offset) << 24) | + ((u32)(cq->ib->ib_id & 0xff) << 16) | 0x0); + cq_cfg.q_state = BNA_Q_IDLE_STATE; + + /* Write the page number register */ + pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + bna->port_num, + HQM_CQ_RAM_BASE_OFFSET); + + writel(pg_num, bna->regs.page_addr); + + /* H/W write */ + base_addr = BNA_GET_MEM_BASE_ADDR(bna->pcidev.pci_bar_kva, + HQM_CQ_RAM_BASE_OFFSET); + + cq_mem = (struct bna_cq_mem *)0; + + off = (unsigned long)&cq_mem[cq->cq_id].pg_tbl_addr_lo; + writel(htonl(cq_cfg.pg_tbl_addr_lo), base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].pg_tbl_addr_hi; + writel(htonl(cq_cfg.pg_tbl_addr_hi), base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].cur_q_entry_lo; + writel(htonl(cq_cfg.cur_q_entry_lo), base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].cur_q_entry_hi; + writel(htonl(cq_cfg.cur_q_entry_hi), base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].pg_cnt_n_prd_ptr; + writel(cq_cfg.pg_cnt_n_prd_ptr, base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].entry_n_pg_size; + writel(cq_cfg.entry_n_pg_size, base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].int_blk_n_cns_ptr; + writel(cq_cfg.int_blk_n_cns_ptr, base_addr + off); + + off = (unsigned long)&cq_mem[cq->cq_id].q_state; + writel(cq_cfg.q_state, base_addr + off); + + cq->ccb->producer_index = 0; + *(cq->ccb->hw_producer_index) = 0; +} + +void +bna_rit_create(struct bna_rx *rx) +{ + struct list_head *qe_rxp; + struct bna *bna; + struct bna_rxp *rxp; + struct bna_rxq *q0 = NULL; + struct bna_rxq *q1 = NULL; + int offset; + + bna = rx->bna; + + offset = 0; + list_for_each(qe_rxp, &rx->rxp_q) { + rxp = (struct bna_rxp *)qe_rxp; + GET_RXQS(rxp, q0, q1); + rx->rxf.rit_segment->rit[offset].large_rxq_id = q0->rxq_id; + rx->rxf.rit_segment->rit[offset].small_rxq_id = + (q1 ? q1->rxq_id : 0); + offset++; + } +} + +int +_rx_can_satisfy(struct bna_rx_mod *rx_mod, + struct bna_rx_config *rx_cfg) +{ + if ((rx_mod->rx_free_count == 0) || + (rx_mod->rxp_free_count == 0) || + (rx_mod->rxq_free_count == 0)) + return 0; + + if (rx_cfg->rxp_type == BNA_RXP_SINGLE) { + if ((rx_mod->rxp_free_count < rx_cfg->num_paths) || + (rx_mod->rxq_free_count < rx_cfg->num_paths)) + return 0; + } else { + if ((rx_mod->rxp_free_count < rx_cfg->num_paths) || + (rx_mod->rxq_free_count < (2 * rx_cfg->num_paths))) + return 0; + } + + if (!bna_rit_mod_can_satisfy(&rx_mod->bna->rit_mod, rx_cfg->num_paths)) + return 0; + + return 1; +} + +struct bna_rxq * +_get_free_rxq(struct bna_rx_mod *rx_mod) +{ + struct bna_rxq *rxq = NULL; + struct list_head *qe = NULL; + + bfa_q_deq(&rx_mod->rxq_free_q, &qe); + if (qe) { + rx_mod->rxq_free_count--; + rxq = (struct bna_rxq *)qe; + } + return rxq; +} + +void +_put_free_rxq(struct bna_rx_mod *rx_mod, struct bna_rxq *rxq) +{ + bfa_q_qe_init(&rxq->qe); + list_add_tail(&rxq->qe, &rx_mod->rxq_free_q); + rx_mod->rxq_free_count++; +} + +struct bna_rxp * +_get_free_rxp(struct bna_rx_mod *rx_mod) +{ + struct list_head *qe = NULL; + struct bna_rxp *rxp = NULL; + + bfa_q_deq(&rx_mod->rxp_free_q, &qe); + if (qe) { + rx_mod->rxp_free_count--; + + rxp = (struct bna_rxp *)qe; + } + + return rxp; +} + +void +_put_free_rxp(struct bna_rx_mod *rx_mod, struct bna_rxp *rxp) +{ + bfa_q_qe_init(&rxp->qe); + list_add_tail(&rxp->qe, &rx_mod->rxp_free_q); + rx_mod->rxp_free_count++; +} + +struct bna_rx * +_get_free_rx(struct bna_rx_mod *rx_mod) +{ + struct list_head *qe = NULL; + struct bna_rx *rx = NULL; + + bfa_q_deq(&rx_mod->rx_free_q, &qe); + if (qe) { + rx_mod->rx_free_count--; + + rx = (struct bna_rx *)qe; + bfa_q_qe_init(qe); + list_add_tail(&rx->qe, &rx_mod->rx_active_q); + } + + return rx; +} + +void +_put_free_rx(struct bna_rx_mod *rx_mod, struct bna_rx *rx) +{ + bfa_q_qe_init(&rx->qe); + list_add_tail(&rx->qe, &rx_mod->rx_free_q); + rx_mod->rx_free_count++; +} + +void +_rx_init(struct bna_rx *rx, struct bna *bna) +{ + rx->bna = bna; + rx->rx_flags = 0; + + INIT_LIST_HEAD(&rx->rxp_q); + + rx->rxq_stop_wc.wc_resume = bna_rx_cb_rxq_stopped_all; + rx->rxq_stop_wc.wc_cbarg = rx; + rx->rxq_stop_wc.wc_count = 0; + + rx->stop_cbfn = NULL; + rx->stop_cbarg = NULL; +} + +void +_rxp_add_rxqs(struct bna_rxp *rxp, + struct bna_rxq *q0, + struct bna_rxq *q1) +{ + switch (rxp->type) { + case BNA_RXP_SINGLE: + rxp->rxq.single.only = q0; + rxp->rxq.single.reserved = NULL; + break; + case BNA_RXP_SLR: + rxp->rxq.slr.large = q0; + rxp->rxq.slr.small = q1; + break; + case BNA_RXP_HDS: + rxp->rxq.hds.data = q0; + rxp->rxq.hds.hdr = q1; + break; + default: + break; + } +} + +void +_rxq_qpt_init(struct bna_rxq *rxq, + struct bna_rxp *rxp, + u32 page_count, + u32 page_size, + struct bna_mem_descr *qpt_mem, + struct bna_mem_descr *swqpt_mem, + struct bna_mem_descr *page_mem) +{ + int i; + + rxq->qpt.hw_qpt_ptr.lsb = qpt_mem->dma.lsb; + rxq->qpt.hw_qpt_ptr.msb = qpt_mem->dma.msb; + rxq->qpt.kv_qpt_ptr = qpt_mem->kva; + rxq->qpt.page_count = page_count; + rxq->qpt.page_size = page_size; + + rxq->rcb->sw_qpt = (void **) swqpt_mem->kva; + + for (i = 0; i < rxq->qpt.page_count; i++) { + rxq->rcb->sw_qpt[i] = page_mem[i].kva; + ((struct bna_dma_addr *)rxq->qpt.kv_qpt_ptr)[i].lsb = + page_mem[i].dma.lsb; + ((struct bna_dma_addr *)rxq->qpt.kv_qpt_ptr)[i].msb = + page_mem[i].dma.msb; + + } +} + +void +_rxp_cqpt_setup(struct bna_rxp *rxp, + u32 page_count, + u32 page_size, + struct bna_mem_descr *qpt_mem, + struct bna_mem_descr *swqpt_mem, + struct bna_mem_descr *page_mem) +{ + int i; + + rxp->cq.qpt.hw_qpt_ptr.lsb = qpt_mem->dma.lsb; + rxp->cq.qpt.hw_qpt_ptr.msb = qpt_mem->dma.msb; + rxp->cq.qpt.kv_qpt_ptr = qpt_mem->kva; + rxp->cq.qpt.page_count = page_count; + rxp->cq.qpt.page_size = page_size; + + rxp->cq.ccb->sw_qpt = (void **) swqpt_mem->kva; + + for (i = 0; i < rxp->cq.qpt.page_count; i++) { + rxp->cq.ccb->sw_qpt[i] = page_mem[i].kva; + + ((struct bna_dma_addr *)rxp->cq.qpt.kv_qpt_ptr)[i].lsb = + page_mem[i].dma.lsb; + ((struct bna_dma_addr *)rxp->cq.qpt.kv_qpt_ptr)[i].msb = + page_mem[i].dma.msb; + + } +} + +void +_rx_add_rxp(struct bna_rx *rx, struct bna_rxp *rxp) +{ + list_add_tail(&rxp->qe, &rx->rxp_q); +} + +void +_init_rxmod_queues(struct bna_rx_mod *rx_mod) +{ + INIT_LIST_HEAD(&rx_mod->rx_free_q); + INIT_LIST_HEAD(&rx_mod->rxq_free_q); + INIT_LIST_HEAD(&rx_mod->rxp_free_q); + INIT_LIST_HEAD(&rx_mod->rx_active_q); + + rx_mod->rx_free_count = 0; + rx_mod->rxq_free_count = 0; + rx_mod->rxp_free_count = 0; +} + +void +_rx_ctor(struct bna_rx *rx, int id) +{ + bfa_q_qe_init(&rx->qe); + INIT_LIST_HEAD(&rx->rxp_q); + rx->bna = NULL; + + rx->rxf.rxf_id = id; + + /* FIXME: mbox_qe ctor()?? */ + bfa_q_qe_init(&rx->mbox_qe.qe); + + rx->stop_cbfn = NULL; + rx->stop_cbarg = NULL; +} + +void +bna_rx_cb_multi_rxq_stopped(void *arg, int status) +{ + struct bna_rxp *rxp = (struct bna_rxp *)arg; + + bfa_wc_down(&rxp->rx->rxq_stop_wc); +} + +void +bna_rx_cb_rxq_stopped_all(void *arg) +{ + struct bna_rx *rx = (struct bna_rx *)arg; + + bfa_fsm_send_event(rx, RX_E_RXQ_STOPPED); +} + +void +bna_rx_mod_cb_rx_stopped(void *arg, struct bna_rx *rx, + enum bna_cb_status status) +{ + struct bna_rx_mod *rx_mod = (struct bna_rx_mod *)arg; + + bfa_wc_down(&rx_mod->rx_stop_wc); +} + +void +bna_rx_mod_cb_rx_stopped_all(void *arg) +{ + struct bna_rx_mod *rx_mod = (struct bna_rx_mod *)arg; + + if (rx_mod->stop_cbfn) + rx_mod->stop_cbfn(&rx_mod->bna->port, BNA_CB_SUCCESS); + rx_mod->stop_cbfn = NULL; +} + +void +bna_rx_start(struct bna_rx *rx) +{ + rx->rx_flags |= BNA_RX_F_PORT_ENABLED; + if (rx->rx_flags & BNA_RX_F_ENABLE) + bfa_fsm_send_event(rx, RX_E_START); +} + +void +bna_rx_stop(struct bna_rx *rx) +{ + rx->rx_flags &= ~BNA_RX_F_PORT_ENABLED; + if (rx->fsm == (bfa_fsm_t) bna_rx_sm_stopped) + bna_rx_mod_cb_rx_stopped(&rx->bna->rx_mod, rx, BNA_CB_SUCCESS); + else { + rx->stop_cbfn = bna_rx_mod_cb_rx_stopped; + rx->stop_cbarg = &rx->bna->rx_mod; + bfa_fsm_send_event(rx, RX_E_STOP); + } +} + +void +bna_rx_fail(struct bna_rx *rx) +{ + /* Indicate port is not enabled, and failed */ + rx->rx_flags &= ~BNA_RX_F_PORT_ENABLED; + rx->rx_flags |= BNA_RX_F_PORT_FAILED; + bfa_fsm_send_event(rx, RX_E_FAIL); +} + +void +bna_rx_cb_rxf_started(struct bna_rx *rx, enum bna_cb_status status) +{ + bfa_fsm_send_event(rx, RX_E_RXF_STARTED); + if (rx->rxf.rxf_id < 32) + rx->bna->rx_mod.rxf_bmap[0] |= ((u32)1 << rx->rxf.rxf_id); + else + rx->bna->rx_mod.rxf_bmap[1] |= ((u32) + 1 << (rx->rxf.rxf_id - 32)); +} + +void +bna_rx_cb_rxf_stopped(struct bna_rx *rx, enum bna_cb_status status) +{ + bfa_fsm_send_event(rx, RX_E_RXF_STOPPED); + if (rx->rxf.rxf_id < 32) + rx->bna->rx_mod.rxf_bmap[0] &= ~(u32)1 << rx->rxf.rxf_id; + else + rx->bna->rx_mod.rxf_bmap[1] &= ~(u32) + 1 << (rx->rxf.rxf_id - 32); +} + +void +bna_rx_mod_start(struct bna_rx_mod *rx_mod, enum bna_rx_type type) +{ + struct bna_rx *rx; + struct list_head *qe; + + rx_mod->flags |= BNA_RX_MOD_F_PORT_STARTED; + if (type == BNA_RX_T_LOOPBACK) + rx_mod->flags |= BNA_RX_MOD_F_PORT_LOOPBACK; + + list_for_each(qe, &rx_mod->rx_active_q) { + rx = (struct bna_rx *)qe; + if (rx->type == type) + bna_rx_start(rx); + } +} + +void +bna_rx_mod_stop(struct bna_rx_mod *rx_mod, enum bna_rx_type type) +{ + struct bna_rx *rx; + struct list_head *qe; + + rx_mod->flags &= ~BNA_RX_MOD_F_PORT_STARTED; + rx_mod->flags &= ~BNA_RX_MOD_F_PORT_LOOPBACK; + + rx_mod->stop_cbfn = bna_port_cb_rx_stopped; + + /** + * Before calling bna_rx_stop(), increment rx_stop_wc as many times + * as we are going to call bna_rx_stop + */ + list_for_each(qe, &rx_mod->rx_active_q) { + rx = (struct bna_rx *)qe; + if (rx->type == type) + bfa_wc_up(&rx_mod->rx_stop_wc); + } + + if (rx_mod->rx_stop_wc.wc_count == 0) { + rx_mod->stop_cbfn(&rx_mod->bna->port, BNA_CB_SUCCESS); + rx_mod->stop_cbfn = NULL; + return; + } + + list_for_each(qe, &rx_mod->rx_active_q) { + rx = (struct bna_rx *)qe; + if (rx->type == type) + bna_rx_stop(rx); + } +} + +void +bna_rx_mod_fail(struct bna_rx_mod *rx_mod) +{ + struct bna_rx *rx; + struct list_head *qe; + + rx_mod->flags &= ~BNA_RX_MOD_F_PORT_STARTED; + rx_mod->flags &= ~BNA_RX_MOD_F_PORT_LOOPBACK; + + list_for_each(qe, &rx_mod->rx_active_q) { + rx = (struct bna_rx *)qe; + bna_rx_fail(rx); + } +} + +void bna_rx_mod_init(struct bna_rx_mod *rx_mod, struct bna *bna, + struct bna_res_info *res_info) +{ + int index; + struct bna_rx *rx_ptr; + struct bna_rxp *rxp_ptr; + struct bna_rxq *rxq_ptr; + + rx_mod->bna = bna; + rx_mod->flags = 0; + + rx_mod->rx = (struct bna_rx *) + res_info[BNA_RES_MEM_T_RX_ARRAY].res_u.mem_info.mdl[0].kva; + rx_mod->rxp = (struct bna_rxp *) + res_info[BNA_RES_MEM_T_RXP_ARRAY].res_u.mem_info.mdl[0].kva; + rx_mod->rxq = (struct bna_rxq *) + res_info[BNA_RES_MEM_T_RXQ_ARRAY].res_u.mem_info.mdl[0].kva; + + /* Initialize the queues */ + _init_rxmod_queues(rx_mod); + + /* Build RX queues */ + for (index = 0; index < BFI_MAX_RXQ; index++) { + rx_ptr = &rx_mod->rx[index]; + _rx_ctor(rx_ptr, index); + list_add_tail(&rx_ptr->qe, &rx_mod->rx_free_q); + rx_mod->rx_free_count++; + } + + /* build RX-path queue */ + for (index = 0; index < BFI_MAX_RXQ; index++) { + rxp_ptr = &rx_mod->rxp[index]; + rxp_ptr->cq.cq_id = index; + bfa_q_qe_init(&rxp_ptr->qe); + list_add_tail(&rxp_ptr->qe, &rx_mod->rxp_free_q); + rx_mod->rxp_free_count++; + } + + /* build RXQ queue */ + for (index = 0; index < BFI_MAX_RXQ; index++) { + rxq_ptr = &rx_mod->rxq[index]; + rxq_ptr->rxq_id = index; + + bfa_q_qe_init(&rxq_ptr->qe); + list_add_tail(&rxq_ptr->qe, &rx_mod->rxq_free_q); + rx_mod->rxq_free_count++; + } + + rx_mod->rx_stop_wc.wc_resume = bna_rx_mod_cb_rx_stopped_all; + rx_mod->rx_stop_wc.wc_cbarg = rx_mod; + rx_mod->rx_stop_wc.wc_count = 0; +} + +void +bna_rx_mod_uninit(struct bna_rx_mod *rx_mod) +{ + struct list_head *qe; + int i; + + i = 0; + list_for_each(qe, &rx_mod->rx_free_q) + i++; + + i = 0; + list_for_each(qe, &rx_mod->rxp_free_q) + i++; + + i = 0; + list_for_each(qe, &rx_mod->rxq_free_q) + i++; + + rx_mod->bna = NULL; +} + +int +bna_rx_state_get(struct bna_rx *rx) +{ + return bfa_sm_to_state(rx_sm_table, rx->fsm); +} + +void +bna_rx_res_req(struct bna_rx_config *q_cfg, struct bna_res_info *res_info) +{ + u32 cq_size, hq_size, dq_size; + u32 cpage_count, hpage_count, dpage_count; + struct bna_mem_info *mem_info; + u32 cq_depth; + u32 hq_depth; + u32 dq_depth; + + dq_depth = q_cfg->q_depth; + hq_depth = ((q_cfg->rxp_type == BNA_RXP_SINGLE) ? 0 : q_cfg->q_depth); + cq_depth = dq_depth + hq_depth; + + BNA_TO_POWER_OF_2_HIGH(cq_depth); + cq_size = cq_depth * BFI_CQ_WI_SIZE; + cq_size = ALIGN(cq_size, PAGE_SIZE); + cpage_count = SIZE_TO_PAGES(cq_size); + + BNA_TO_POWER_OF_2_HIGH(dq_depth); + dq_size = dq_depth * BFI_RXQ_WI_SIZE; + dq_size = ALIGN(dq_size, PAGE_SIZE); + dpage_count = SIZE_TO_PAGES(dq_size); + + if (BNA_RXP_SINGLE != q_cfg->rxp_type) { + BNA_TO_POWER_OF_2_HIGH(hq_depth); + hq_size = hq_depth * BFI_RXQ_WI_SIZE; + hq_size = ALIGN(hq_size, PAGE_SIZE); + hpage_count = SIZE_TO_PAGES(hq_size); + } else { + hpage_count = 0; + } + + /* CCB structures */ + res_info[BNA_RX_RES_MEM_T_CCB].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_CCB].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = sizeof(struct bna_ccb); + mem_info->num = q_cfg->num_paths; + + /* RCB structures */ + res_info[BNA_RX_RES_MEM_T_RCB].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_RCB].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = sizeof(struct bna_rcb); + mem_info->num = BNA_GET_RXQS(q_cfg); + + /* Completion QPT */ + res_info[BNA_RX_RES_MEM_T_CQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_CQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = cpage_count * sizeof(struct bna_dma_addr); + mem_info->num = q_cfg->num_paths; + + /* Completion s/w QPT */ + res_info[BNA_RX_RES_MEM_T_CSWQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_CSWQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = cpage_count * sizeof(void *); + mem_info->num = q_cfg->num_paths; + + /* Completion QPT pages */ + res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = PAGE_SIZE; + mem_info->num = cpage_count * q_cfg->num_paths; + + /* Data QPTs */ + res_info[BNA_RX_RES_MEM_T_DQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_DQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = dpage_count * sizeof(struct bna_dma_addr); + mem_info->num = q_cfg->num_paths; + + /* Data s/w QPTs */ + res_info[BNA_RX_RES_MEM_T_DSWQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_DSWQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = dpage_count * sizeof(void *); + mem_info->num = q_cfg->num_paths; + + /* Data QPT pages */ + res_info[BNA_RX_RES_MEM_T_DPAGE].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = PAGE_SIZE; + mem_info->num = dpage_count * q_cfg->num_paths; + + /* Hdr QPTs */ + res_info[BNA_RX_RES_MEM_T_HQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_HQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = hpage_count * sizeof(struct bna_dma_addr); + mem_info->num = (hpage_count ? q_cfg->num_paths : 0); + + /* Hdr s/w QPTs */ + res_info[BNA_RX_RES_MEM_T_HSWQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_HSWQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = hpage_count * sizeof(void *); + mem_info->num = (hpage_count ? q_cfg->num_paths : 0); + + /* Hdr QPT pages */ + res_info[BNA_RX_RES_MEM_T_HPAGE].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = (hpage_count ? PAGE_SIZE : 0); + mem_info->num = (hpage_count ? (hpage_count * q_cfg->num_paths) : 0); + + /* RX Interrupts */ + res_info[BNA_RX_RES_T_INTR].res_type = BNA_RES_T_INTR; + res_info[BNA_RX_RES_T_INTR].res_u.intr_info.intr_type = BNA_INTR_T_MSIX; + res_info[BNA_RX_RES_T_INTR].res_u.intr_info.num = q_cfg->num_paths; +} + +struct bna_rx * +bna_rx_create(struct bna *bna, struct bnad *bnad, + struct bna_rx_config *rx_cfg, + struct bna_rx_event_cbfn *rx_cbfn, + struct bna_res_info *res_info, + void *priv) +{ + struct bna_rx_mod *rx_mod = &bna->rx_mod; + struct bna_rx *rx; + struct bna_rxp *rxp; + struct bna_rxq *q0; + struct bna_rxq *q1; + struct bna_intr_info *intr_info; + u32 page_count; + struct bna_mem_descr *ccb_mem; + struct bna_mem_descr *rcb_mem; + struct bna_mem_descr *unmapq_mem; + struct bna_mem_descr *cqpt_mem; + struct bna_mem_descr *cswqpt_mem; + struct bna_mem_descr *cpage_mem; + struct bna_mem_descr *hqpt_mem; /* Header/Small Q qpt */ + struct bna_mem_descr *dqpt_mem; /* Data/Large Q qpt */ + struct bna_mem_descr *hsqpt_mem; /* s/w qpt for hdr */ + struct bna_mem_descr *dsqpt_mem; /* s/w qpt for data */ + struct bna_mem_descr *hpage_mem; /* hdr page mem */ + struct bna_mem_descr *dpage_mem; /* data page mem */ + int i, cpage_idx = 0, dpage_idx = 0, hpage_idx = 0, ret; + int dpage_count, hpage_count, rcb_idx; + struct bna_ib_config ibcfg; + /* Fail if we don't have enough RXPs, RXQs */ + if (!_rx_can_satisfy(rx_mod, rx_cfg)) + return NULL; + + /* Initialize resource pointers */ + intr_info = &res_info[BNA_RX_RES_T_INTR].res_u.intr_info; + ccb_mem = &res_info[BNA_RX_RES_MEM_T_CCB].res_u.mem_info.mdl[0]; + rcb_mem = &res_info[BNA_RX_RES_MEM_T_RCB].res_u.mem_info.mdl[0]; + unmapq_mem = &res_info[BNA_RX_RES_MEM_T_UNMAPQ].res_u.mem_info.mdl[0]; + cqpt_mem = &res_info[BNA_RX_RES_MEM_T_CQPT].res_u.mem_info.mdl[0]; + cswqpt_mem = &res_info[BNA_RX_RES_MEM_T_CSWQPT].res_u.mem_info.mdl[0]; + cpage_mem = &res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_u.mem_info.mdl[0]; + hqpt_mem = &res_info[BNA_RX_RES_MEM_T_HQPT].res_u.mem_info.mdl[0]; + dqpt_mem = &res_info[BNA_RX_RES_MEM_T_DQPT].res_u.mem_info.mdl[0]; + hsqpt_mem = &res_info[BNA_RX_RES_MEM_T_HSWQPT].res_u.mem_info.mdl[0]; + dsqpt_mem = &res_info[BNA_RX_RES_MEM_T_DSWQPT].res_u.mem_info.mdl[0]; + hpage_mem = &res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info.mdl[0]; + dpage_mem = &res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info.mdl[0]; + + /* Compute q depth & page count */ + page_count = res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_u.mem_info.num / + rx_cfg->num_paths; + + dpage_count = res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info.num / + rx_cfg->num_paths; + + hpage_count = res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info.num / + rx_cfg->num_paths; + /* Get RX pointer */ + rx = _get_free_rx(rx_mod); + _rx_init(rx, bna); + rx->priv = priv; + rx->type = rx_cfg->rx_type; + + rx->rcb_setup_cbfn = rx_cbfn->rcb_setup_cbfn; + rx->rcb_destroy_cbfn = rx_cbfn->rcb_destroy_cbfn; + rx->ccb_setup_cbfn = rx_cbfn->ccb_setup_cbfn; + rx->ccb_destroy_cbfn = rx_cbfn->ccb_destroy_cbfn; + /* Following callbacks are mandatory */ + rx->rx_cleanup_cbfn = rx_cbfn->rx_cleanup_cbfn; + rx->rx_post_cbfn = rx_cbfn->rx_post_cbfn; + + if (rx->bna->rx_mod.flags & BNA_RX_MOD_F_PORT_STARTED) { + switch (rx->type) { + case BNA_RX_T_REGULAR: + if (!(rx->bna->rx_mod.flags & + BNA_RX_MOD_F_PORT_LOOPBACK)) + rx->rx_flags |= BNA_RX_F_PORT_ENABLED; + break; + case BNA_RX_T_LOOPBACK: + if (rx->bna->rx_mod.flags & BNA_RX_MOD_F_PORT_LOOPBACK) + rx->rx_flags |= BNA_RX_F_PORT_ENABLED; + break; + } + } + + for (i = 0, rcb_idx = 0; i < rx_cfg->num_paths; i++) { + rxp = _get_free_rxp(rx_mod); + rxp->type = rx_cfg->rxp_type; + rxp->rx = rx; + rxp->cq.rx = rx; + + /* Get required RXQs, and queue them to rx-path */ + q0 = _get_free_rxq(rx_mod); + if (BNA_RXP_SINGLE == rx_cfg->rxp_type) + q1 = NULL; + else + q1 = _get_free_rxq(rx_mod); + + /* Initialize IB */ + if (1 == intr_info->num) { + rxp->cq.ib = bna_ib_get(&bna->ib_mod, + intr_info->intr_type, + intr_info->idl[0].vector); + rxp->vector = intr_info->idl[0].vector; + } else { + rxp->cq.ib = bna_ib_get(&bna->ib_mod, + intr_info->intr_type, + intr_info->idl[i].vector); + + /* Map the MSI-x vector used for this RXP */ + rxp->vector = intr_info->idl[i].vector; + } + + rxp->cq.ib_seg_offset = bna_ib_reserve_idx(rxp->cq.ib); + + ibcfg.coalescing_timeo = BFI_RX_COALESCING_TIMEO; + ibcfg.interpkt_count = BFI_RX_INTERPKT_COUNT; + ibcfg.interpkt_timeo = BFI_RX_INTERPKT_TIMEO; + ibcfg.ctrl_flags = BFI_IB_CF_INT_ENABLE; + + ret = bna_ib_config(rxp->cq.ib, &ibcfg); + + /* Link rxqs to rxp */ + _rxp_add_rxqs(rxp, q0, q1); + + /* Link rxp to rx */ + _rx_add_rxp(rx, rxp); + + q0->rx = rx; + q0->rxp = rxp; + + /* Initialize RCB for the large / data q */ + q0->rcb = (struct bna_rcb *) rcb_mem[rcb_idx].kva; + RXQ_RCB_INIT(q0, rxp, rx_cfg->q_depth, bna, 0, + (void *)unmapq_mem[rcb_idx].kva); + rcb_idx++; + (q0)->rx_packets = (q0)->rx_bytes = 0; + (q0)->rx_packets_with_error = (q0)->rxbuf_alloc_failed = 0; + + /* Initialize RXQs */ + _rxq_qpt_init(q0, rxp, dpage_count, PAGE_SIZE, + &dqpt_mem[i], &dsqpt_mem[i], &dpage_mem[dpage_idx]); + q0->rcb->page_idx = dpage_idx; + q0->rcb->page_count = dpage_count; + dpage_idx += dpage_count; + + /* Call bnad to complete rcb setup */ + if (rx->rcb_setup_cbfn) + rx->rcb_setup_cbfn(bnad, q0->rcb); + + if (q1) { + q1->rx = rx; + q1->rxp = rxp; + + q1->rcb = (struct bna_rcb *) rcb_mem[rcb_idx].kva; + RXQ_RCB_INIT(q1, rxp, rx_cfg->q_depth, bna, 1, + (void *)unmapq_mem[rcb_idx].kva); + rcb_idx++; + (q1)->buffer_size = (rx_cfg)->small_buff_size; + (q1)->rx_packets = (q1)->rx_bytes = 0; + (q1)->rx_packets_with_error = + (q1)->rxbuf_alloc_failed = 0; + + _rxq_qpt_init(q1, rxp, hpage_count, PAGE_SIZE, + &hqpt_mem[i], &hsqpt_mem[i], + &hpage_mem[hpage_idx]); + q1->rcb->page_idx = hpage_idx; + q1->rcb->page_count = hpage_count; + hpage_idx += hpage_count; + + /* Call bnad to complete rcb setup */ + if (rx->rcb_setup_cbfn) + rx->rcb_setup_cbfn(bnad, q1->rcb); + } + /* Setup RXP::CQ */ + rxp->cq.ccb = (struct bna_ccb *) ccb_mem[i].kva; + _rxp_cqpt_setup(rxp, page_count, PAGE_SIZE, + &cqpt_mem[i], &cswqpt_mem[i], &cpage_mem[cpage_idx]); + rxp->cq.ccb->page_idx = cpage_idx; + rxp->cq.ccb->page_count = page_count; + cpage_idx += page_count; + + rxp->cq.ccb->pkt_rate.small_pkt_cnt = 0; + rxp->cq.ccb->pkt_rate.large_pkt_cnt = 0; + + rxp->cq.ccb->producer_index = 0; + rxp->cq.ccb->q_depth = rx_cfg->q_depth + + ((rx_cfg->rxp_type == BNA_RXP_SINGLE) ? + 0 : rx_cfg->q_depth); + rxp->cq.ccb->i_dbell = &rxp->cq.ib->door_bell; + rxp->cq.ccb->rcb[0] = q0->rcb; + if (q1) + rxp->cq.ccb->rcb[1] = q1->rcb; + rxp->cq.ccb->cq = &rxp->cq; + rxp->cq.ccb->bnad = bna->bnad; + rxp->cq.ccb->hw_producer_index = + ((volatile u32 *)rxp->cq.ib->ib_seg_host_addr_kva + + (rxp->cq.ib_seg_offset * BFI_IBIDX_SIZE)); + *(rxp->cq.ccb->hw_producer_index) = 0; + rxp->cq.ccb->intr_type = intr_info->intr_type; + rxp->cq.ccb->intr_vector = (intr_info->num == 1) ? + intr_info->idl[0].vector : + intr_info->idl[i].vector; + rxp->cq.ccb->rx_coalescing_timeo = + rxp->cq.ib->ib_config.coalescing_timeo; + rxp->cq.ccb->id = i; + + /* Call bnad to complete CCB setup */ + if (rx->ccb_setup_cbfn) + rx->ccb_setup_cbfn(bnad, rxp->cq.ccb); + + } /* for each rx-path */ + + bna_rxf_init(&rx->rxf, rx, rx_cfg); + + bfa_fsm_set_state(rx, bna_rx_sm_stopped); + + return rx; +} + +void +bna_rx_destroy(struct bna_rx *rx) +{ + struct bna_rx_mod *rx_mod = &rx->bna->rx_mod; + struct bna_ib_mod *ib_mod = &rx->bna->ib_mod; + struct bna_rxq *q0 = NULL; + struct bna_rxq *q1 = NULL; + struct bna_rxp *rxp; + struct list_head *qe; + + bna_rxf_uninit(&rx->rxf); + + while (!list_empty(&rx->rxp_q)) { + bfa_q_deq(&rx->rxp_q, &rxp); + GET_RXQS(rxp, q0, q1); + /* Callback to bnad for destroying RCB */ + if (rx->rcb_destroy_cbfn) + rx->rcb_destroy_cbfn(rx->bna->bnad, q0->rcb); + q0->rcb = NULL; + q0->rxp = NULL; + q0->rx = NULL; + _put_free_rxq(rx_mod, q0); + if (q1) { + /* Callback to bnad for destroying RCB */ + if (rx->rcb_destroy_cbfn) + rx->rcb_destroy_cbfn(rx->bna->bnad, q1->rcb); + q1->rcb = NULL; + q1->rxp = NULL; + q1->rx = NULL; + _put_free_rxq(rx_mod, q1); + } + rxp->rxq.slr.large = NULL; + rxp->rxq.slr.small = NULL; + if (rxp->cq.ib) { + if (rxp->cq.ib_seg_offset != 0xff) + bna_ib_release_idx(rxp->cq.ib, + rxp->cq.ib_seg_offset); + bna_ib_put(ib_mod, rxp->cq.ib); + rxp->cq.ib = NULL; + } + /* Callback to bnad for destroying CCB */ + if (rx->ccb_destroy_cbfn) + rx->ccb_destroy_cbfn(rx->bna->bnad, rxp->cq.ccb); + rxp->cq.ccb = NULL; + rxp->rx = NULL; + _put_free_rxp(rx_mod, rxp); + } + + list_for_each(qe, &rx_mod->rx_active_q) { + if (qe == &rx->qe) { + list_del(&rx->qe); + bfa_q_qe_init(&rx->qe); + break; + } + } + + rx->bna = NULL; + rx->priv = NULL; + _put_free_rx(rx_mod, rx); +} + +void +bna_rx_enable(struct bna_rx *rx) +{ + if (rx->fsm != (bfa_sm_t)bna_rx_sm_stopped) + return; + + rx->rx_flags |= BNA_RX_F_ENABLE; + if (rx->rx_flags & BNA_RX_F_PORT_ENABLED) + bfa_fsm_send_event(rx, RX_E_START); +} + +void +bna_rx_disable(struct bna_rx *rx, enum bna_cleanup_type type, + void (*cbfn)(void *, struct bna_rx *, + enum bna_cb_status)) +{ + if (type == BNA_SOFT_CLEANUP) { + /* h/w should not be accessed. Treat we're stopped */ + (*cbfn)(rx->bna->bnad, rx, BNA_CB_SUCCESS); + } else { + rx->stop_cbfn = cbfn; + rx->stop_cbarg = rx->bna->bnad; + + rx->rx_flags &= ~BNA_RX_F_ENABLE; + + bfa_fsm_send_event(rx, RX_E_STOP); + } +} + +/** + * TX + */ +#define call_tx_stop_cbfn(tx, status)\ +do {\ + if ((tx)->stop_cbfn)\ + (tx)->stop_cbfn((tx)->stop_cbarg, (tx), status);\ + (tx)->stop_cbfn = NULL;\ + (tx)->stop_cbarg = NULL;\ +} while (0) + +#define call_tx_prio_change_cbfn(tx, status)\ +do {\ + if ((tx)->prio_change_cbfn)\ + (tx)->prio_change_cbfn((tx)->bna->bnad, (tx), status);\ + (tx)->prio_change_cbfn = NULL;\ +} while (0) + +static void bna_tx_mod_cb_tx_stopped(void *tx_mod, struct bna_tx *tx, + enum bna_cb_status status); +static void bna_tx_cb_txq_stopped(void *arg, int status); +static void bna_tx_cb_stats_cleared(void *arg, int status); +static void __bna_tx_stop(struct bna_tx *tx); +static void __bna_tx_start(struct bna_tx *tx); +static void __bna_txf_stat_clr(struct bna_tx *tx); + +enum bna_tx_event { + TX_E_START = 1, + TX_E_STOP = 2, + TX_E_FAIL = 3, + TX_E_TXQ_STOPPED = 4, + TX_E_PRIO_CHANGE = 5, + TX_E_STAT_CLEARED = 6, +}; + +enum bna_tx_state { + BNA_TX_STOPPED = 1, + BNA_TX_STARTED = 2, + BNA_TX_TXQ_STOP_WAIT = 3, + BNA_TX_PRIO_STOP_WAIT = 4, + BNA_TX_STAT_CLR_WAIT = 5, +}; + +bfa_fsm_state_decl(bna_tx, stopped, struct bna_tx, + enum bna_tx_event); +bfa_fsm_state_decl(bna_tx, started, struct bna_tx, + enum bna_tx_event); +bfa_fsm_state_decl(bna_tx, txq_stop_wait, struct bna_tx, + enum bna_tx_event); +bfa_fsm_state_decl(bna_tx, prio_stop_wait, struct bna_tx, + enum bna_tx_event); +bfa_fsm_state_decl(bna_tx, stat_clr_wait, struct bna_tx, + enum bna_tx_event); + +static struct bfa_sm_table tx_sm_table[] = { + {BFA_SM(bna_tx_sm_stopped), BNA_TX_STOPPED}, + {BFA_SM(bna_tx_sm_started), BNA_TX_STARTED}, + {BFA_SM(bna_tx_sm_txq_stop_wait), BNA_TX_TXQ_STOP_WAIT}, + {BFA_SM(bna_tx_sm_prio_stop_wait), BNA_TX_PRIO_STOP_WAIT}, + {BFA_SM(bna_tx_sm_stat_clr_wait), BNA_TX_STAT_CLR_WAIT}, +}; + +static void +bna_tx_sm_stopped_entry(struct bna_tx *tx) +{ + struct bna_txq *txq; + struct list_head *qe; + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + (tx->tx_cleanup_cbfn)(tx->bna->bnad, txq->tcb); + } + + call_tx_stop_cbfn(tx, BNA_CB_SUCCESS); +} + +static void +bna_tx_sm_stopped(struct bna_tx *tx, enum bna_tx_event event) +{ + switch (event) { + case TX_E_START: + bfa_fsm_set_state(tx, bna_tx_sm_started); + break; + + case TX_E_STOP: + bfa_fsm_set_state(tx, bna_tx_sm_stopped); + break; + + case TX_E_FAIL: + /* No-op */ + break; + + case TX_E_PRIO_CHANGE: + call_tx_prio_change_cbfn(tx, BNA_CB_SUCCESS); + break; + + case TX_E_TXQ_STOPPED: + /** + * This event is received due to flushing of mbox when + * device fails + */ + /* No-op */ + break; + + default: + bfa_sm_fault(tx->bna, event); + } +} + +static void +bna_tx_sm_started_entry(struct bna_tx *tx) +{ + struct bna_txq *txq; + struct list_head *qe; + + __bna_tx_start(tx); + + /* Start IB */ + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bna_ib_ack(&txq->ib->door_bell, 0); + } +} + +static void +bna_tx_sm_started(struct bna_tx *tx, enum bna_tx_event event) +{ + struct bna_txq *txq; + struct list_head *qe; + + switch (event) { + case TX_E_STOP: + bfa_fsm_set_state(tx, bna_tx_sm_txq_stop_wait); + __bna_tx_stop(tx); + break; + + case TX_E_FAIL: + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bna_ib_fail(txq->ib); + (tx->tx_stall_cbfn)(tx->bna->bnad, txq->tcb); + } + bfa_fsm_set_state(tx, bna_tx_sm_stopped); + break; + + case TX_E_PRIO_CHANGE: + bfa_fsm_set_state(tx, bna_tx_sm_prio_stop_wait); + break; + + default: + bfa_sm_fault(tx->bna, event); + } +} + +static void +bna_tx_sm_txq_stop_wait_entry(struct bna_tx *tx) +{ +} + +static void +bna_tx_sm_txq_stop_wait(struct bna_tx *tx, enum bna_tx_event event) +{ + struct bna_txq *txq; + struct list_head *qe; + + switch (event) { + case TX_E_FAIL: + bfa_fsm_set_state(tx, bna_tx_sm_stopped); + break; + + case TX_E_TXQ_STOPPED: + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bna_ib_stop(txq->ib); + } + bfa_fsm_set_state(tx, bna_tx_sm_stat_clr_wait); + break; + + case TX_E_PRIO_CHANGE: + /* No-op */ + break; + + default: + bfa_sm_fault(tx->bna, event); + } +} + +static void +bna_tx_sm_prio_stop_wait_entry(struct bna_tx *tx) +{ + __bna_tx_stop(tx); +} + +static void +bna_tx_sm_prio_stop_wait(struct bna_tx *tx, enum bna_tx_event event) +{ + struct bna_txq *txq; + struct list_head *qe; + + switch (event) { + case TX_E_STOP: + bfa_fsm_set_state(tx, bna_tx_sm_txq_stop_wait); + break; + + case TX_E_FAIL: + call_tx_prio_change_cbfn(tx, BNA_CB_FAIL); + bfa_fsm_set_state(tx, bna_tx_sm_stopped); + break; + + case TX_E_TXQ_STOPPED: + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bna_ib_stop(txq->ib); + (tx->tx_cleanup_cbfn)(tx->bna->bnad, txq->tcb); + } + call_tx_prio_change_cbfn(tx, BNA_CB_SUCCESS); + bfa_fsm_set_state(tx, bna_tx_sm_started); + break; + + case TX_E_PRIO_CHANGE: + /* No-op */ + break; + + default: + bfa_sm_fault(tx->bna, event); + } +} + +static void +bna_tx_sm_stat_clr_wait_entry(struct bna_tx *tx) +{ + __bna_txf_stat_clr(tx); +} + +static void +bna_tx_sm_stat_clr_wait(struct bna_tx *tx, enum bna_tx_event event) +{ + switch (event) { + case TX_E_FAIL: + case TX_E_STAT_CLEARED: + bfa_fsm_set_state(tx, bna_tx_sm_stopped); + break; + + default: + bfa_sm_fault(tx->bna, event); + } +} + +static void +__bna_txq_start(struct bna_tx *tx, struct bna_txq *txq) +{ + struct bna_rxtx_q_mem *q_mem; + struct bna_txq_mem txq_cfg; + struct bna_txq_mem *txq_mem; + struct bna_dma_addr cur_q_addr; + u32 pg_num; + void __iomem *base_addr; + unsigned long off; + + /* Fill out structure, to be subsequently written to hardware */ + txq_cfg.pg_tbl_addr_lo = txq->qpt.hw_qpt_ptr.lsb; + txq_cfg.pg_tbl_addr_hi = txq->qpt.hw_qpt_ptr.msb; + cur_q_addr = *((struct bna_dma_addr *)(txq->qpt.kv_qpt_ptr)); + txq_cfg.cur_q_entry_lo = cur_q_addr.lsb; + txq_cfg.cur_q_entry_hi = cur_q_addr.msb; + + txq_cfg.pg_cnt_n_prd_ptr = (txq->qpt.page_count << 16) | 0x0; + + txq_cfg.entry_n_pg_size = ((u32)(BFI_TXQ_WI_SIZE >> 2) << 16) | + (txq->qpt.page_size >> 2); + txq_cfg.int_blk_n_cns_ptr = ((((u32)txq->ib_seg_offset) << 24) | + ((u32)(txq->ib->ib_id & 0xff) << 16) | 0x0); + + txq_cfg.cns_ptr2_n_q_state = BNA_Q_IDLE_STATE; + txq_cfg.nxt_qid_n_fid_n_pri = (((tx->txf.txf_id & 0x3f) << 3) | + (txq->priority & 0x3)); + txq_cfg.wvc_n_cquota_n_rquota = + ((((u32)BFI_TX_MAX_WRR_QUOTA & 0xfff) << 12) | + (BFI_TX_MAX_WRR_QUOTA & 0xfff)); + + /* Setup the page and write to H/W */ + + pg_num = BNA_GET_PAGE_NUM(HQM0_BLK_PG_NUM + tx->bna->port_num, + HQM_RXTX_Q_RAM_BASE_OFFSET); + writel(pg_num, tx->bna->regs.page_addr); + + base_addr = BNA_GET_MEM_BASE_ADDR(tx->bna->pcidev.pci_bar_kva, + HQM_RXTX_Q_RAM_BASE_OFFSET); + q_mem = (struct bna_rxtx_q_mem *)0; + txq_mem = &q_mem[txq->txq_id].txq; + + /* + * The following 4 lines, is a hack b'cos the H/W needs to read + * these DMA addresses as little endian + */ + + off = (unsigned long)&txq_mem->pg_tbl_addr_lo; + writel(htonl(txq_cfg.pg_tbl_addr_lo), base_addr + off); + + off = (unsigned long)&txq_mem->pg_tbl_addr_hi; + writel(htonl(txq_cfg.pg_tbl_addr_hi), base_addr + off); + + off = (unsigned long)&txq_mem->cur_q_entry_lo; + writel(htonl(txq_cfg.cur_q_entry_lo), base_addr + off); + + off = (unsigned long)&txq_mem->cur_q_entry_hi; + writel(htonl(txq_cfg.cur_q_entry_hi), base_addr + off); + + off = (unsigned long)&txq_mem->pg_cnt_n_prd_ptr; + writel(txq_cfg.pg_cnt_n_prd_ptr, base_addr + off); + + off = (unsigned long)&txq_mem->entry_n_pg_size; + writel(txq_cfg.entry_n_pg_size, base_addr + off); + + off = (unsigned long)&txq_mem->int_blk_n_cns_ptr; + writel(txq_cfg.int_blk_n_cns_ptr, base_addr + off); + + off = (unsigned long)&txq_mem->cns_ptr2_n_q_state; + writel(txq_cfg.cns_ptr2_n_q_state, base_addr + off); + + off = (unsigned long)&txq_mem->nxt_qid_n_fid_n_pri; + writel(txq_cfg.nxt_qid_n_fid_n_pri, base_addr + off); + + off = (unsigned long)&txq_mem->wvc_n_cquota_n_rquota; + writel(txq_cfg.wvc_n_cquota_n_rquota, base_addr + off); + + txq->tcb->producer_index = 0; + txq->tcb->consumer_index = 0; + *(txq->tcb->hw_consumer_index) = 0; + +} + +static void +__bna_txq_stop(struct bna_tx *tx, struct bna_txq *txq) +{ + struct bfi_ll_q_stop_req ll_req; + u32 bit_mask[2] = {0, 0}; + if (txq->txq_id < 32) + bit_mask[0] = (u32)1 << txq->txq_id; + else + bit_mask[1] = (u32)1 << (txq->txq_id - 32); + + memset(&ll_req, 0, sizeof(ll_req)); + ll_req.mh.msg_class = BFI_MC_LL; + ll_req.mh.msg_id = BFI_LL_H2I_TXQ_STOP_REQ; + ll_req.mh.mtag.h2i.lpu_id = 0; + ll_req.q_id_mask[0] = htonl(bit_mask[0]); + ll_req.q_id_mask[1] = htonl(bit_mask[1]); + + bna_mbox_qe_fill(&tx->mbox_qe, &ll_req, sizeof(ll_req), + bna_tx_cb_txq_stopped, tx); + + bna_mbox_send(tx->bna, &tx->mbox_qe); +} + +static void +__bna_txf_start(struct bna_tx *tx) +{ + struct bna_tx_fndb_ram *tx_fndb; + struct bna_txf *txf = &tx->txf; + void __iomem *base_addr; + unsigned long off; + + writel(BNA_GET_PAGE_NUM(LUT0_MEM_BLK_BASE_PG_NUM + + (tx->bna->port_num * 2), TX_FNDB_RAM_BASE_OFFSET), + tx->bna->regs.page_addr); + + base_addr = BNA_GET_MEM_BASE_ADDR(tx->bna->pcidev.pci_bar_kva, + TX_FNDB_RAM_BASE_OFFSET); + + tx_fndb = (struct bna_tx_fndb_ram *)0; + off = (unsigned long)&tx_fndb[txf->txf_id].vlan_n_ctrl_flags; + + writel(((u32)txf->vlan << 16) | txf->ctrl_flags, + base_addr + off); + + if (tx->txf.txf_id < 32) + tx->bna->tx_mod.txf_bmap[0] |= ((u32)1 << tx->txf.txf_id); + else + tx->bna->tx_mod.txf_bmap[1] |= ((u32) + 1 << (tx->txf.txf_id - 32)); +} + +static void +__bna_txf_stop(struct bna_tx *tx) +{ + struct bna_tx_fndb_ram *tx_fndb; + u32 page_num; + u32 ctl_flags; + struct bna_txf *txf = &tx->txf; + void __iomem *base_addr; + unsigned long off; + + /* retrieve the running txf_flags & turn off enable bit */ + page_num = BNA_GET_PAGE_NUM(LUT0_MEM_BLK_BASE_PG_NUM + + (tx->bna->port_num * 2), TX_FNDB_RAM_BASE_OFFSET); + writel(page_num, tx->bna->regs.page_addr); + + base_addr = BNA_GET_MEM_BASE_ADDR(tx->bna->pcidev.pci_bar_kva, + TX_FNDB_RAM_BASE_OFFSET); + tx_fndb = (struct bna_tx_fndb_ram *)0; + off = (unsigned long)&tx_fndb[txf->txf_id].vlan_n_ctrl_flags; + + ctl_flags = readl(base_addr + off); + ctl_flags &= ~BFI_TXF_CF_ENABLE; + + writel(ctl_flags, base_addr + off); + + if (tx->txf.txf_id < 32) + tx->bna->tx_mod.txf_bmap[0] &= ~((u32)1 << tx->txf.txf_id); + else + tx->bna->tx_mod.txf_bmap[0] &= ~((u32) + 1 << (tx->txf.txf_id - 32)); +} + +static void +__bna_txf_stat_clr(struct bna_tx *tx) +{ + struct bfi_ll_stats_req ll_req; + u32 txf_bmap[2] = {0, 0}; + if (tx->txf.txf_id < 32) + txf_bmap[0] = ((u32)1 << tx->txf.txf_id); + else + txf_bmap[1] = ((u32)1 << (tx->txf.txf_id - 32)); + bfi_h2i_set(ll_req.mh, BFI_MC_LL, BFI_LL_H2I_STATS_CLEAR_REQ, 0); + ll_req.stats_mask = 0; + ll_req.rxf_id_mask[0] = 0; + ll_req.rxf_id_mask[1] = 0; + ll_req.txf_id_mask[0] = htonl(txf_bmap[0]); + ll_req.txf_id_mask[1] = htonl(txf_bmap[1]); + + bna_mbox_qe_fill(&tx->mbox_qe, &ll_req, sizeof(ll_req), + bna_tx_cb_stats_cleared, tx); + bna_mbox_send(tx->bna, &tx->mbox_qe); +} + +static void +__bna_tx_start(struct bna_tx *tx) +{ + struct bna_txq *txq; + struct list_head *qe; + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bna_ib_start(txq->ib); + __bna_txq_start(tx, txq); + } + + __bna_txf_start(tx); + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + txq->tcb->priority = txq->priority; + (tx->tx_resume_cbfn)(tx->bna->bnad, txq->tcb); + } +} + +static void +__bna_tx_stop(struct bna_tx *tx) +{ + struct bna_txq *txq; + struct list_head *qe; + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + (tx->tx_stall_cbfn)(tx->bna->bnad, txq->tcb); + } + + __bna_txf_stop(tx); + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + bfa_wc_up(&tx->txq_stop_wc); + } + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + __bna_txq_stop(tx, txq); + } +} + +static void +bna_txq_qpt_setup(struct bna_txq *txq, int page_count, int page_size, + struct bna_mem_descr *qpt_mem, + struct bna_mem_descr *swqpt_mem, + struct bna_mem_descr *page_mem) +{ + int i; + + txq->qpt.hw_qpt_ptr.lsb = qpt_mem->dma.lsb; + txq->qpt.hw_qpt_ptr.msb = qpt_mem->dma.msb; + txq->qpt.kv_qpt_ptr = qpt_mem->kva; + txq->qpt.page_count = page_count; + txq->qpt.page_size = page_size; + + txq->tcb->sw_qpt = (void **) swqpt_mem->kva; + + for (i = 0; i < page_count; i++) { + txq->tcb->sw_qpt[i] = page_mem[i].kva; + + ((struct bna_dma_addr *)txq->qpt.kv_qpt_ptr)[i].lsb = + page_mem[i].dma.lsb; + ((struct bna_dma_addr *)txq->qpt.kv_qpt_ptr)[i].msb = + page_mem[i].dma.msb; + + } +} + +static void +bna_tx_free(struct bna_tx *tx) +{ + struct bna_tx_mod *tx_mod = &tx->bna->tx_mod; + struct bna_txq *txq; + struct bna_ib_mod *ib_mod = &tx->bna->ib_mod; + struct list_head *qe; + + while (!list_empty(&tx->txq_q)) { + bfa_q_deq(&tx->txq_q, &txq); + bfa_q_qe_init(&txq->qe); + if (txq->ib) { + if (txq->ib_seg_offset != -1) + bna_ib_release_idx(txq->ib, + txq->ib_seg_offset); + bna_ib_put(ib_mod, txq->ib); + txq->ib = NULL; + } + txq->tcb = NULL; + txq->tx = NULL; + list_add_tail(&txq->qe, &tx_mod->txq_free_q); + } + + list_for_each(qe, &tx_mod->tx_active_q) { + if (qe == &tx->qe) { + list_del(&tx->qe); + bfa_q_qe_init(&tx->qe); + break; + } + } + + tx->bna = NULL; + tx->priv = NULL; + list_add_tail(&tx->qe, &tx_mod->tx_free_q); +} + +static void +bna_tx_cb_txq_stopped(void *arg, int status) +{ + struct bna_tx *tx = (struct bna_tx *)arg; + + bfa_q_qe_init(&tx->mbox_qe.qe); + bfa_wc_down(&tx->txq_stop_wc); +} + +static void +bna_tx_cb_txq_stopped_all(void *arg) +{ + struct bna_tx *tx = (struct bna_tx *)arg; + + bfa_fsm_send_event(tx, TX_E_TXQ_STOPPED); +} + +static void +bna_tx_cb_stats_cleared(void *arg, int status) +{ + struct bna_tx *tx = (struct bna_tx *)arg; + + bfa_q_qe_init(&tx->mbox_qe.qe); + + bfa_fsm_send_event(tx, TX_E_STAT_CLEARED); +} + +static void +bna_tx_start(struct bna_tx *tx) +{ + tx->flags |= BNA_TX_F_PORT_STARTED; + if (tx->flags & BNA_TX_F_ENABLED) + bfa_fsm_send_event(tx, TX_E_START); +} + +static void +bna_tx_stop(struct bna_tx *tx) +{ + tx->stop_cbfn = bna_tx_mod_cb_tx_stopped; + tx->stop_cbarg = &tx->bna->tx_mod; + + tx->flags &= ~BNA_TX_F_PORT_STARTED; + bfa_fsm_send_event(tx, TX_E_STOP); +} + +static void +bna_tx_fail(struct bna_tx *tx) +{ + tx->flags &= ~BNA_TX_F_PORT_STARTED; + bfa_fsm_send_event(tx, TX_E_FAIL); +} + +void +bna_tx_prio_changed(struct bna_tx *tx, int prio) +{ + struct bna_txq *txq; + struct list_head *qe; + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + txq->priority = prio; + } + + bfa_fsm_send_event(tx, TX_E_PRIO_CHANGE); +} + +static void +bna_tx_cee_link_status(struct bna_tx *tx, int cee_link) +{ + if (cee_link) + tx->flags |= BNA_TX_F_PRIO_LOCK; + else + tx->flags &= ~BNA_TX_F_PRIO_LOCK; +} + +static void +bna_tx_mod_cb_tx_stopped(void *arg, struct bna_tx *tx, + enum bna_cb_status status) +{ + struct bna_tx_mod *tx_mod = (struct bna_tx_mod *)arg; + + bfa_wc_down(&tx_mod->tx_stop_wc); +} + +static void +bna_tx_mod_cb_tx_stopped_all(void *arg) +{ + struct bna_tx_mod *tx_mod = (struct bna_tx_mod *)arg; + + if (tx_mod->stop_cbfn) + tx_mod->stop_cbfn(&tx_mod->bna->port, BNA_CB_SUCCESS); + tx_mod->stop_cbfn = NULL; +} + +void +bna_tx_res_req(int num_txq, int txq_depth, struct bna_res_info *res_info) +{ + u32 q_size; + u32 page_count; + struct bna_mem_info *mem_info; + + res_info[BNA_TX_RES_MEM_T_TCB].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_TX_RES_MEM_T_TCB].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = sizeof(struct bna_tcb); + mem_info->num = num_txq; + + q_size = txq_depth * BFI_TXQ_WI_SIZE; + q_size = ALIGN(q_size, PAGE_SIZE); + page_count = q_size >> PAGE_SHIFT; + + res_info[BNA_TX_RES_MEM_T_QPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_TX_RES_MEM_T_QPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = page_count * sizeof(struct bna_dma_addr); + mem_info->num = num_txq; + + res_info[BNA_TX_RES_MEM_T_SWQPT].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_TX_RES_MEM_T_SWQPT].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_KVA; + mem_info->len = page_count * sizeof(void *); + mem_info->num = num_txq; + + res_info[BNA_TX_RES_MEM_T_PAGE].res_type = BNA_RES_T_MEM; + mem_info = &res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info; + mem_info->mem_type = BNA_MEM_T_DMA; + mem_info->len = PAGE_SIZE; + mem_info->num = num_txq * page_count; + + res_info[BNA_TX_RES_INTR_T_TXCMPL].res_type = BNA_RES_T_INTR; + res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info.intr_type = + BNA_INTR_T_MSIX; + res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info.num = num_txq; +} + +struct bna_tx * +bna_tx_create(struct bna *bna, struct bnad *bnad, + struct bna_tx_config *tx_cfg, + struct bna_tx_event_cbfn *tx_cbfn, + struct bna_res_info *res_info, void *priv) +{ + struct bna_intr_info *intr_info; + struct bna_tx_mod *tx_mod = &bna->tx_mod; + struct bna_tx *tx; + struct bna_txq *txq; + struct list_head *qe; + struct bna_ib_mod *ib_mod = &bna->ib_mod; + struct bna_doorbell_qset *qset; + struct bna_ib_config ib_config; + int page_count; + int page_size; + int page_idx; + int i; + unsigned long off; + + intr_info = &res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info; + page_count = (res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info.num) / + tx_cfg->num_txq; + page_size = res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info.len; + + /** + * Get resources + */ + + if ((intr_info->num != 1) && (intr_info->num != tx_cfg->num_txq)) + return NULL; + + /* Tx */ + + if (list_empty(&tx_mod->tx_free_q)) + return NULL; + bfa_q_deq(&tx_mod->tx_free_q, &tx); + bfa_q_qe_init(&tx->qe); + + /* TxQs */ + + INIT_LIST_HEAD(&tx->txq_q); + for (i = 0; i < tx_cfg->num_txq; i++) { + if (list_empty(&tx_mod->txq_free_q)) + goto err_return; + + bfa_q_deq(&tx_mod->txq_free_q, &txq); + bfa_q_qe_init(&txq->qe); + list_add_tail(&txq->qe, &tx->txq_q); + txq->ib = NULL; + txq->ib_seg_offset = -1; + txq->tx = tx; + } + + /* IBs */ + i = 0; + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + + if (intr_info->num == 1) + txq->ib = bna_ib_get(ib_mod, intr_info->intr_type, + intr_info->idl[0].vector); + else + txq->ib = bna_ib_get(ib_mod, intr_info->intr_type, + intr_info->idl[i].vector); + + if (txq->ib == NULL) + goto err_return; + + txq->ib_seg_offset = bna_ib_reserve_idx(txq->ib); + if (txq->ib_seg_offset == -1) + goto err_return; + + i++; + } + + /* + * Initialize + */ + + /* Tx */ + + tx->tcb_setup_cbfn = tx_cbfn->tcb_setup_cbfn; + tx->tcb_destroy_cbfn = tx_cbfn->tcb_destroy_cbfn; + /* Following callbacks are mandatory */ + tx->tx_stall_cbfn = tx_cbfn->tx_stall_cbfn; + tx->tx_resume_cbfn = tx_cbfn->tx_resume_cbfn; + tx->tx_cleanup_cbfn = tx_cbfn->tx_cleanup_cbfn; + + list_add_tail(&tx->qe, &tx_mod->tx_active_q); + tx->bna = bna; + tx->priv = priv; + tx->txq_stop_wc.wc_resume = bna_tx_cb_txq_stopped_all; + tx->txq_stop_wc.wc_cbarg = tx; + tx->txq_stop_wc.wc_count = 0; + + tx->type = tx_cfg->tx_type; + + tx->flags = 0; + if (tx->bna->tx_mod.flags & BNA_TX_MOD_F_PORT_STARTED) { + switch (tx->type) { + case BNA_TX_T_REGULAR: + if (!(tx->bna->tx_mod.flags & + BNA_TX_MOD_F_PORT_LOOPBACK)) + tx->flags |= BNA_TX_F_PORT_STARTED; + break; + case BNA_TX_T_LOOPBACK: + if (tx->bna->tx_mod.flags & BNA_TX_MOD_F_PORT_LOOPBACK) + tx->flags |= BNA_TX_F_PORT_STARTED; + break; + } + } + if (tx->bna->tx_mod.cee_link) + tx->flags |= BNA_TX_F_PRIO_LOCK; + + /* TxQ */ + + i = 0; + page_idx = 0; + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + txq->priority = tx_mod->priority; + txq->tcb = (struct bna_tcb *) + res_info[BNA_TX_RES_MEM_T_TCB].res_u.mem_info.mdl[i].kva; + txq->tx_packets = 0; + txq->tx_bytes = 0; + + /* IB */ + + ib_config.coalescing_timeo = BFI_TX_COALESCING_TIMEO; + ib_config.interpkt_timeo = 0; /* Not used */ + ib_config.interpkt_count = BFI_TX_INTERPKT_COUNT; + ib_config.ctrl_flags = (BFI_IB_CF_INTER_PKT_DMA | + BFI_IB_CF_INT_ENABLE | + BFI_IB_CF_COALESCING_MODE); + bna_ib_config(txq->ib, &ib_config); + + /* TCB */ + + txq->tcb->producer_index = 0; + txq->tcb->consumer_index = 0; + txq->tcb->hw_consumer_index = (volatile u32 *) + ((volatile u8 *)txq->ib->ib_seg_host_addr_kva + + (txq->ib_seg_offset * BFI_IBIDX_SIZE)); + *(txq->tcb->hw_consumer_index) = 0; + txq->tcb->q_depth = tx_cfg->txq_depth; + txq->tcb->unmap_q = (void *) + res_info[BNA_TX_RES_MEM_T_UNMAPQ].res_u.mem_info.mdl[i].kva; + qset = (struct bna_doorbell_qset *)0; + off = (unsigned long)&qset[txq->txq_id].txq[0]; + txq->tcb->q_dbell = off + + BNA_GET_DOORBELL_BASE_ADDR(bna->pcidev.pci_bar_kva); + txq->tcb->i_dbell = &txq->ib->door_bell; + txq->tcb->intr_type = intr_info->intr_type; + txq->tcb->intr_vector = (intr_info->num == 1) ? + intr_info->idl[0].vector : + intr_info->idl[i].vector; + txq->tcb->txq = txq; + txq->tcb->bnad = bnad; + txq->tcb->id = i; + + /* QPT, SWQPT, Pages */ + bna_txq_qpt_setup(txq, page_count, page_size, + &res_info[BNA_TX_RES_MEM_T_QPT].res_u.mem_info.mdl[i], + &res_info[BNA_TX_RES_MEM_T_SWQPT].res_u.mem_info.mdl[i], + &res_info[BNA_TX_RES_MEM_T_PAGE]. + res_u.mem_info.mdl[page_idx]); + txq->tcb->page_idx = page_idx; + txq->tcb->page_count = page_count; + page_idx += page_count; + + /* Callback to bnad for setting up TCB */ + if (tx->tcb_setup_cbfn) + (tx->tcb_setup_cbfn)(bna->bnad, txq->tcb); + + i++; + } + + /* TxF */ + + tx->txf.ctrl_flags = BFI_TXF_CF_ENABLE | BFI_TXF_CF_VLAN_WI_BASED; + tx->txf.vlan = 0; + + /* Mbox element */ + bfa_q_qe_init(&tx->mbox_qe.qe); + + bfa_fsm_set_state(tx, bna_tx_sm_stopped); + + return tx; + +err_return: + bna_tx_free(tx); + return NULL; +} + +void +bna_tx_destroy(struct bna_tx *tx) +{ + /* Callback to bnad for destroying TCB */ + if (tx->tcb_destroy_cbfn) { + struct bna_txq *txq; + struct list_head *qe; + + list_for_each(qe, &tx->txq_q) { + txq = (struct bna_txq *)qe; + (tx->tcb_destroy_cbfn)(tx->bna->bnad, txq->tcb); + } + } + + bna_tx_free(tx); +} + +void +bna_tx_enable(struct bna_tx *tx) +{ + if (tx->fsm != (bfa_sm_t)bna_tx_sm_stopped) + return; + + tx->flags |= BNA_TX_F_ENABLED; + + if (tx->flags & BNA_TX_F_PORT_STARTED) + bfa_fsm_send_event(tx, TX_E_START); +} + +void +bna_tx_disable(struct bna_tx *tx, enum bna_cleanup_type type, + void (*cbfn)(void *, struct bna_tx *, enum bna_cb_status)) +{ + if (type == BNA_SOFT_CLEANUP) { + (*cbfn)(tx->bna->bnad, tx, BNA_CB_SUCCESS); + return; + } + + tx->stop_cbfn = cbfn; + tx->stop_cbarg = tx->bna->bnad; + + tx->flags &= ~BNA_TX_F_ENABLED; + + bfa_fsm_send_event(tx, TX_E_STOP); +} + +int +bna_tx_state_get(struct bna_tx *tx) +{ + return bfa_sm_to_state(tx_sm_table, tx->fsm); +} + +void +bna_tx_mod_init(struct bna_tx_mod *tx_mod, struct bna *bna, + struct bna_res_info *res_info) +{ + int i; + + tx_mod->bna = bna; + tx_mod->flags = 0; + + tx_mod->tx = (struct bna_tx *) + res_info[BNA_RES_MEM_T_TX_ARRAY].res_u.mem_info.mdl[0].kva; + tx_mod->txq = (struct bna_txq *) + res_info[BNA_RES_MEM_T_TXQ_ARRAY].res_u.mem_info.mdl[0].kva; + + INIT_LIST_HEAD(&tx_mod->tx_free_q); + INIT_LIST_HEAD(&tx_mod->tx_active_q); + + INIT_LIST_HEAD(&tx_mod->txq_free_q); + + for (i = 0; i < BFI_MAX_TXQ; i++) { + tx_mod->tx[i].txf.txf_id = i; + bfa_q_qe_init(&tx_mod->tx[i].qe); + list_add_tail(&tx_mod->tx[i].qe, &tx_mod->tx_free_q); + + tx_mod->txq[i].txq_id = i; + bfa_q_qe_init(&tx_mod->txq[i].qe); + list_add_tail(&tx_mod->txq[i].qe, &tx_mod->txq_free_q); + } + + tx_mod->tx_stop_wc.wc_resume = bna_tx_mod_cb_tx_stopped_all; + tx_mod->tx_stop_wc.wc_cbarg = tx_mod; + tx_mod->tx_stop_wc.wc_count = 0; +} + +void +bna_tx_mod_uninit(struct bna_tx_mod *tx_mod) +{ + struct list_head *qe; + int i; + + i = 0; + list_for_each(qe, &tx_mod->tx_free_q) + i++; + + i = 0; + list_for_each(qe, &tx_mod->txq_free_q) + i++; + + tx_mod->bna = NULL; +} + +void +bna_tx_mod_start(struct bna_tx_mod *tx_mod, enum bna_tx_type type) +{ + struct bna_tx *tx; + struct list_head *qe; + + tx_mod->flags |= BNA_TX_MOD_F_PORT_STARTED; + if (type == BNA_TX_T_LOOPBACK) + tx_mod->flags |= BNA_TX_MOD_F_PORT_LOOPBACK; + + list_for_each(qe, &tx_mod->tx_active_q) { + tx = (struct bna_tx *)qe; + if (tx->type == type) + bna_tx_start(tx); + } +} + +void +bna_tx_mod_stop(struct bna_tx_mod *tx_mod, enum bna_tx_type type) +{ + struct bna_tx *tx; + struct list_head *qe; + + tx_mod->flags &= ~BNA_TX_MOD_F_PORT_STARTED; + tx_mod->flags &= ~BNA_TX_MOD_F_PORT_LOOPBACK; + + tx_mod->stop_cbfn = bna_port_cb_tx_stopped; + + /** + * Before calling bna_tx_stop(), increment tx_stop_wc as many times + * as we are going to call bna_tx_stop + */ + list_for_each(qe, &tx_mod->tx_active_q) { + tx = (struct bna_tx *)qe; + if (tx->type == type) + bfa_wc_up(&tx_mod->tx_stop_wc); + } + + if (tx_mod->tx_stop_wc.wc_count == 0) { + tx_mod->stop_cbfn(&tx_mod->bna->port, BNA_CB_SUCCESS); + tx_mod->stop_cbfn = NULL; + return; + } + + list_for_each(qe, &tx_mod->tx_active_q) { + tx = (struct bna_tx *)qe; + if (tx->type == type) + bna_tx_stop(tx); + } +} + +void +bna_tx_mod_fail(struct bna_tx_mod *tx_mod) +{ + struct bna_tx *tx; + struct list_head *qe; + + tx_mod->flags &= ~BNA_TX_MOD_F_PORT_STARTED; + tx_mod->flags &= ~BNA_TX_MOD_F_PORT_LOOPBACK; + + list_for_each(qe, &tx_mod->tx_active_q) { + tx = (struct bna_tx *)qe; + bna_tx_fail(tx); + } +} + +void +bna_tx_mod_prio_changed(struct bna_tx_mod *tx_mod, int prio) +{ + struct bna_tx *tx; + struct list_head *qe; + + if (prio != tx_mod->priority) { + tx_mod->priority = prio; + + list_for_each(qe, &tx_mod->tx_active_q) { + tx = (struct bna_tx *)qe; + bna_tx_prio_changed(tx, prio); + } + } +} + +void +bna_tx_mod_cee_link_status(struct bna_tx_mod *tx_mod, int cee_link) +{ + struct bna_tx *tx; + struct list_head *qe; + + tx_mod->cee_link = cee_link; + + list_for_each(qe, &tx_mod->tx_active_q) { + tx = (struct bna_tx *)qe; + bna_tx_cee_link_status(tx, cee_link); + } +} diff --git a/drivers/net/bna/bna_types.h b/drivers/net/bna/bna_types.h new file mode 100644 index 000000000000..6877310f6ef4 --- /dev/null +++ b/drivers/net/bna/bna_types.h @@ -0,0 +1,1128 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#ifndef __BNA_TYPES_H__ +#define __BNA_TYPES_H__ + +#include "cna.h" +#include "bna_hw.h" +#include "bfa_cee.h" + +/** + * + * Forward declarations + * + */ + +struct bna_txq; +struct bna_tx; +struct bna_rxq; +struct bna_cq; +struct bna_rx; +struct bna_rxf; +struct bna_port; +struct bna; +struct bnad; + +/** + * + * Enums, primitive data types + * + */ + +enum bna_status { + BNA_STATUS_T_DISABLED = 0, + BNA_STATUS_T_ENABLED = 1 +}; + +enum bna_cleanup_type { + BNA_HARD_CLEANUP = 0, + BNA_SOFT_CLEANUP = 1 +}; + +enum bna_cb_status { + BNA_CB_SUCCESS = 0, + BNA_CB_FAIL = 1, + BNA_CB_INTERRUPT = 2, + BNA_CB_BUSY = 3, + BNA_CB_INVALID_MAC = 4, + BNA_CB_MCAST_LIST_FULL = 5, + BNA_CB_UCAST_CAM_FULL = 6, + BNA_CB_WAITING = 7, + BNA_CB_NOT_EXEC = 8 +}; + +enum bna_res_type { + BNA_RES_T_MEM = 1, + BNA_RES_T_INTR = 2 +}; + +enum bna_mem_type { + BNA_MEM_T_KVA = 1, + BNA_MEM_T_DMA = 2 +}; + +enum bna_intr_type { + BNA_INTR_T_INTX = 1, + BNA_INTR_T_MSIX = 2 +}; + +enum bna_res_req_type { + BNA_RES_MEM_T_COM = 0, + BNA_RES_MEM_T_ATTR = 1, + BNA_RES_MEM_T_FWTRC = 2, + BNA_RES_MEM_T_STATS = 3, + BNA_RES_MEM_T_SWSTATS = 4, + BNA_RES_MEM_T_IBIDX = 5, + BNA_RES_MEM_T_IB_ARRAY = 6, + BNA_RES_MEM_T_INTR_ARRAY = 7, + BNA_RES_MEM_T_IDXSEG_ARRAY = 8, + BNA_RES_MEM_T_TX_ARRAY = 9, + BNA_RES_MEM_T_TXQ_ARRAY = 10, + BNA_RES_MEM_T_RX_ARRAY = 11, + BNA_RES_MEM_T_RXP_ARRAY = 12, + BNA_RES_MEM_T_RXQ_ARRAY = 13, + BNA_RES_MEM_T_UCMAC_ARRAY = 14, + BNA_RES_MEM_T_MCMAC_ARRAY = 15, + BNA_RES_MEM_T_RIT_ENTRY = 16, + BNA_RES_MEM_T_RIT_SEGMENT = 17, + BNA_RES_INTR_T_MBOX = 18, + BNA_RES_T_MAX +}; + +enum bna_tx_res_req_type { + BNA_TX_RES_MEM_T_TCB = 0, + BNA_TX_RES_MEM_T_UNMAPQ = 1, + BNA_TX_RES_MEM_T_QPT = 2, + BNA_TX_RES_MEM_T_SWQPT = 3, + BNA_TX_RES_MEM_T_PAGE = 4, + BNA_TX_RES_INTR_T_TXCMPL = 5, + BNA_TX_RES_T_MAX, +}; + +enum bna_rx_mem_type { + BNA_RX_RES_MEM_T_CCB = 0, /* CQ context */ + BNA_RX_RES_MEM_T_RCB = 1, /* CQ context */ + BNA_RX_RES_MEM_T_UNMAPQ = 2, /* UnmapQ for RxQs */ + BNA_RX_RES_MEM_T_CQPT = 3, /* CQ QPT */ + BNA_RX_RES_MEM_T_CSWQPT = 4, /* S/W QPT */ + BNA_RX_RES_MEM_T_CQPT_PAGE = 5, /* CQPT page */ + BNA_RX_RES_MEM_T_HQPT = 6, /* RX QPT */ + BNA_RX_RES_MEM_T_DQPT = 7, /* RX QPT */ + BNA_RX_RES_MEM_T_HSWQPT = 8, /* RX s/w QPT */ + BNA_RX_RES_MEM_T_DSWQPT = 9, /* RX s/w QPT */ + BNA_RX_RES_MEM_T_DPAGE = 10, /* RX s/w QPT */ + BNA_RX_RES_MEM_T_HPAGE = 11, /* RX s/w QPT */ + BNA_RX_RES_T_INTR = 12, /* Rx interrupts */ + BNA_RX_RES_T_MAX = 13 +}; + +enum bna_mbox_state { + BNA_MBOX_FREE = 0, + BNA_MBOX_POSTED = 1 +}; + +enum bna_tx_type { + BNA_TX_T_REGULAR = 0, + BNA_TX_T_LOOPBACK = 1, +}; + +enum bna_tx_flags { + BNA_TX_F_PORT_STARTED = 1, + BNA_TX_F_ENABLED = 2, + BNA_TX_F_PRIO_LOCK = 4, +}; + +enum bna_tx_mod_flags { + BNA_TX_MOD_F_PORT_STARTED = 1, + BNA_TX_MOD_F_PORT_LOOPBACK = 2, +}; + +enum bna_rx_type { + BNA_RX_T_REGULAR = 0, + BNA_RX_T_LOOPBACK = 1, +}; + +enum bna_rxp_type { + BNA_RXP_SINGLE = 1, + BNA_RXP_SLR = 2, + BNA_RXP_HDS = 3 +}; + +enum bna_rxmode { + BNA_RXMODE_PROMISC = 1, + BNA_RXMODE_DEFAULT = 2, + BNA_RXMODE_ALLMULTI = 4 +}; + +enum bna_rx_event { + RX_E_START = 1, + RX_E_STOP = 2, + RX_E_FAIL = 3, + RX_E_RXF_STARTED = 4, + RX_E_RXF_STOPPED = 5, + RX_E_RXQ_STOPPED = 6, +}; + +enum bna_rx_state { + BNA_RX_STOPPED = 1, + BNA_RX_RXF_START_WAIT = 2, + BNA_RX_STARTED = 3, + BNA_RX_RXF_STOP_WAIT = 4, + BNA_RX_RXQ_STOP_WAIT = 5, +}; + +enum bna_rx_flags { + BNA_RX_F_ENABLE = 0x01, /* bnad enabled rxf */ + BNA_RX_F_PORT_ENABLED = 0x02, /* Port object is enabled */ + BNA_RX_F_PORT_FAILED = 0x04, /* Port in failed state */ +}; + +enum bna_rx_mod_flags { + BNA_RX_MOD_F_PORT_STARTED = 1, + BNA_RX_MOD_F_PORT_LOOPBACK = 2, +}; + +enum bna_rxf_oper_state { + BNA_RXF_OPER_STATE_RUNNING = 0x01, /* rxf operational */ + BNA_RXF_OPER_STATE_PAUSED = 0x02, /* rxf in PAUSED state */ +}; + +enum bna_rxf_flags { + BNA_RXF_FL_STOP_PENDING = 0x01, + BNA_RXF_FL_FAILED = 0x02, + BNA_RXF_FL_RSS_CONFIG_PENDING = 0x04, + BNA_RXF_FL_OPERSTATE_CHANGED = 0x08, + BNA_RXF_FL_RXF_ENABLED = 0x10, + BNA_RXF_FL_VLAN_CONFIG_PENDING = 0x20, +}; + +enum bna_rxf_event { + RXF_E_START = 1, + RXF_E_STOP = 2, + RXF_E_FAIL = 3, + RXF_E_CAM_FLTR_MOD = 4, + RXF_E_STARTED = 5, + RXF_E_STOPPED = 6, + RXF_E_CAM_FLTR_RESP = 7, + RXF_E_PAUSE = 8, + RXF_E_RESUME = 9, + RXF_E_STAT_CLEARED = 10, +}; + +enum bna_rxf_state { + BNA_RXF_STOPPED = 1, + BNA_RXF_START_WAIT = 2, + BNA_RXF_CAM_FLTR_MOD_WAIT = 3, + BNA_RXF_STARTED = 4, + BNA_RXF_CAM_FLTR_CLR_WAIT = 5, + BNA_RXF_STOP_WAIT = 6, + BNA_RXF_PAUSE_WAIT = 7, + BNA_RXF_RESUME_WAIT = 8, + BNA_RXF_STAT_CLR_WAIT = 9, +}; + +enum bna_port_type { + BNA_PORT_T_REGULAR = 0, + BNA_PORT_T_LOOPBACK_INTERNAL = 1, + BNA_PORT_T_LOOPBACK_EXTERNAL = 2, +}; + +enum bna_link_status { + BNA_LINK_DOWN = 0, + BNA_LINK_UP = 1, + BNA_CEE_UP = 2 +}; + +enum bna_llport_flags { + BNA_LLPORT_F_ENABLED = 1, + BNA_LLPORT_F_RX_ENABLED = 2 +}; + +enum bna_port_flags { + BNA_PORT_F_DEVICE_READY = 1, + BNA_PORT_F_ENABLED = 2, + BNA_PORT_F_PAUSE_CHANGED = 4, + BNA_PORT_F_MTU_CHANGED = 8 +}; + +enum bna_pkt_rates { + BNA_PKT_RATE_10K = 10000, + BNA_PKT_RATE_20K = 20000, + BNA_PKT_RATE_30K = 30000, + BNA_PKT_RATE_40K = 40000, + BNA_PKT_RATE_50K = 50000, + BNA_PKT_RATE_60K = 60000, + BNA_PKT_RATE_70K = 70000, + BNA_PKT_RATE_80K = 80000, +}; + +enum bna_dim_load_types { + BNA_LOAD_T_HIGH_4 = 0, /* 80K <= r */ + BNA_LOAD_T_HIGH_3 = 1, /* 60K <= r < 80K */ + BNA_LOAD_T_HIGH_2 = 2, /* 50K <= r < 60K */ + BNA_LOAD_T_HIGH_1 = 3, /* 40K <= r < 50K */ + BNA_LOAD_T_LOW_1 = 4, /* 30K <= r < 40K */ + BNA_LOAD_T_LOW_2 = 5, /* 20K <= r < 30K */ + BNA_LOAD_T_LOW_3 = 6, /* 10K <= r < 20K */ + BNA_LOAD_T_LOW_4 = 7, /* r < 10K */ + BNA_LOAD_T_MAX = 8 +}; + +enum bna_dim_bias_types { + BNA_BIAS_T_SMALL = 0, /* small pkts > (large pkts * 2) */ + BNA_BIAS_T_LARGE = 1, /* Not BNA_BIAS_T_SMALL */ + BNA_BIAS_T_MAX = 2 +}; + +struct bna_mac { + /* This should be the first one */ + struct list_head qe; + u8 addr[ETH_ALEN]; +}; + +struct bna_mem_descr { + u32 len; + void *kva; + struct bna_dma_addr dma; +}; + +struct bna_mem_info { + enum bna_mem_type mem_type; + u32 len; + u32 num; + u32 align_sz; /* 0/1 = no alignment */ + struct bna_mem_descr *mdl; + void *cookie; /* For bnad to unmap dma later */ +}; + +struct bna_intr_descr { + int vector; +}; + +struct bna_intr_info { + enum bna_intr_type intr_type; + int num; + struct bna_intr_descr *idl; +}; + +union bna_res_u { + struct bna_mem_info mem_info; + struct bna_intr_info intr_info; +}; + +struct bna_res_info { + enum bna_res_type res_type; + union bna_res_u res_u; +}; + +/* HW QPT */ +struct bna_qpt { + struct bna_dma_addr hw_qpt_ptr; + void *kv_qpt_ptr; + u32 page_count; + u32 page_size; +}; + +/** + * + * Device + * + */ + +struct bna_device { + bfa_fsm_t fsm; + struct bfa_ioc ioc; + + enum bna_intr_type intr_type; + int vector; + + void (*ready_cbfn)(struct bnad *bnad, enum bna_cb_status status); + struct bnad *ready_cbarg; + + void (*stop_cbfn)(struct bnad *bnad, enum bna_cb_status status); + struct bnad *stop_cbarg; + + struct bna *bna; +}; + +/** + * + * Mail box + * + */ + +struct bna_mbox_qe { + /* This should be the first one */ + struct list_head qe; + + struct bfa_mbox_cmd cmd; + u32 cmd_len; + /* Callback for port, tx, rx, rxf */ + void (*cbfn)(void *arg, int status); + void *cbarg; +}; + +struct bna_mbox_mod { + enum bna_mbox_state state; + struct list_head posted_q; + u32 msg_pending; + u32 msg_ctr; + struct bna *bna; +}; + +/** + * + * Port + * + */ + +/* Pause configuration */ +struct bna_pause_config { + enum bna_status tx_pause; + enum bna_status rx_pause; +}; + +struct bna_llport { + bfa_fsm_t fsm; + enum bna_llport_flags flags; + + enum bna_port_type type; + + enum bna_link_status link_status; + + int admin_up_count; + + void (*stop_cbfn)(struct bna_port *, enum bna_cb_status); + + struct bna_mbox_qe mbox_qe; + + struct bna *bna; +}; + +struct bna_port { + bfa_fsm_t fsm; + enum bna_port_flags flags; + + enum bna_port_type type; + + struct bna_llport llport; + + struct bna_pause_config pause_config; + u8 priority; + int mtu; + + /* Callback for bna_port_disable(), port_stop() */ + void (*stop_cbfn)(void *, enum bna_cb_status); + void *stop_cbarg; + + /* Callback for bna_port_pause_config() */ + void (*pause_cbfn)(struct bnad *, enum bna_cb_status); + + /* Callback for bna_port_mtu_set() */ + void (*mtu_cbfn)(struct bnad *, enum bna_cb_status); + + void (*link_cbfn)(struct bnad *, enum bna_link_status); + + struct bfa_wc chld_stop_wc; + + struct bna_mbox_qe mbox_qe; + + struct bna *bna; +}; + +/** + * + * Interrupt Block + * + */ + +/* IB index segment structure */ +struct bna_ibidx_seg { + /* This should be the first one */ + struct list_head qe; + + u8 ib_seg_size; + u8 ib_idx_tbl_offset; +}; + +/* Interrupt structure */ +struct bna_intr { + /* This should be the first one */ + struct list_head qe; + int ref_count; + + enum bna_intr_type intr_type; + int vector; + + struct bna_ib *ib; +}; + +/* Doorbell structure */ +struct bna_ib_dbell { + void *__iomem doorbell_addr; + u32 doorbell_ack; +}; + +/* Interrupt timer configuration */ +struct bna_ib_config { + u8 coalescing_timeo; /* Unit is 5usec. */ + + int interpkt_count; + int interpkt_timeo; + + enum ib_flags ctrl_flags; +}; + +/* IB structure */ +struct bna_ib { + /* This should be the first one */ + struct list_head qe; + + int ib_id; + + int ref_count; + int start_count; + + struct bna_dma_addr ib_seg_host_addr; + void *ib_seg_host_addr_kva; + u32 idx_mask; /* Size >= BNA_IBIDX_MAX_SEGSIZE */ + + struct bna_ibidx_seg *idx_seg; + + struct bna_ib_dbell door_bell; + + struct bna_intr *intr; + + struct bna_ib_config ib_config; + + struct bna *bna; +}; + +/* IB module - keeps track of IBs and interrupts */ +struct bna_ib_mod { + struct bna_ib *ib; /* BFI_MAX_IB entries */ + struct bna_intr *intr; /* BFI_MAX_IB entries */ + struct bna_ibidx_seg *idx_seg; /* BNA_IBIDX_TOTAL_SEGS */ + + struct list_head ib_free_q; + + struct list_head ibidx_seg_pool[BFI_IBIDX_TOTAL_POOLS]; + + struct list_head intr_free_q; + struct list_head intr_active_q; + + struct bna *bna; +}; + +/** + * + * Tx object + * + */ + +/* Tx datapath control structure */ +#define BNA_Q_NAME_SIZE 16 +struct bna_tcb { + /* Fast path */ + void **sw_qpt; + void *unmap_q; + u32 producer_index; + u32 consumer_index; + volatile u32 *hw_consumer_index; + u32 q_depth; + void *__iomem q_dbell; + struct bna_ib_dbell *i_dbell; + int page_idx; + int page_count; + /* Control path */ + struct bna_txq *txq; + struct bnad *bnad; + enum bna_intr_type intr_type; + int intr_vector; + u8 priority; /* Current priority */ + unsigned long flags; /* Used by bnad as required */ + int id; + char name[BNA_Q_NAME_SIZE]; +}; + +/* TxQ QPT and configuration */ +struct bna_txq { + /* This should be the first one */ + struct list_head qe; + + int txq_id; + + u8 priority; + + struct bna_qpt qpt; + struct bna_tcb *tcb; + struct bna_ib *ib; + int ib_seg_offset; + + struct bna_tx *tx; + + u64 tx_packets; + u64 tx_bytes; +}; + +/* TxF structure (hardware Tx Function) */ +struct bna_txf { + int txf_id; + enum txf_flags ctrl_flags; + u16 vlan; +}; + +/* Tx object */ +struct bna_tx { + /* This should be the first one */ + struct list_head qe; + + bfa_fsm_t fsm; + enum bna_tx_flags flags; + + enum bna_tx_type type; + + struct list_head txq_q; + struct bna_txf txf; + + /* Tx event handlers */ + void (*tcb_setup_cbfn)(struct bnad *, struct bna_tcb *); + void (*tcb_destroy_cbfn)(struct bnad *, struct bna_tcb *); + void (*tx_stall_cbfn)(struct bnad *, struct bna_tcb *); + void (*tx_resume_cbfn)(struct bnad *, struct bna_tcb *); + void (*tx_cleanup_cbfn)(struct bnad *, struct bna_tcb *); + + /* callback for bna_tx_disable(), bna_tx_stop() */ + void (*stop_cbfn)(void *arg, struct bna_tx *tx, + enum bna_cb_status status); + void *stop_cbarg; + + /* callback for bna_tx_prio_set() */ + void (*prio_change_cbfn)(struct bnad *bnad, struct bna_tx *tx, + enum bna_cb_status status); + + struct bfa_wc txq_stop_wc; + + struct bna_mbox_qe mbox_qe; + + struct bna *bna; + void *priv; /* bnad's cookie */ +}; + +struct bna_tx_config { + int num_txq; + int txq_depth; + enum bna_tx_type tx_type; +}; + +struct bna_tx_event_cbfn { + /* Optional */ + void (*tcb_setup_cbfn)(struct bnad *, struct bna_tcb *); + void (*tcb_destroy_cbfn)(struct bnad *, struct bna_tcb *); + /* Mandatory */ + void (*tx_stall_cbfn)(struct bnad *, struct bna_tcb *); + void (*tx_resume_cbfn)(struct bnad *, struct bna_tcb *); + void (*tx_cleanup_cbfn)(struct bnad *, struct bna_tcb *); +}; + +/* Tx module - keeps track of free, active tx objects */ +struct bna_tx_mod { + struct bna_tx *tx; /* BFI_MAX_TXQ entries */ + struct bna_txq *txq; /* BFI_MAX_TXQ entries */ + + struct list_head tx_free_q; + struct list_head tx_active_q; + + struct list_head txq_free_q; + + /* callback for bna_tx_mod_stop() */ + void (*stop_cbfn)(struct bna_port *port, + enum bna_cb_status status); + + struct bfa_wc tx_stop_wc; + + enum bna_tx_mod_flags flags; + + int priority; + int cee_link; + + u32 txf_bmap[2]; + + struct bna *bna; +}; + +/** + * + * Receive Indirection Table + * + */ + +/* One row of RIT table */ +struct bna_rit_entry { + u8 large_rxq_id; /* used for either large or data buffers */ + u8 small_rxq_id; /* used for either small or header buffers */ +}; + +/* RIT segment */ +struct bna_rit_segment { + struct list_head qe; + + u32 rit_offset; + u32 rit_size; + /** + * max_rit_size: Varies per RIT segment depending on how RIT is + * partitioned + */ + u32 max_rit_size; + + struct bna_rit_entry *rit; +}; + +struct bna_rit_mod { + struct bna_rit_entry *rit; + struct bna_rit_segment *rit_segment; + + struct list_head rit_seg_pool[BFI_RIT_SEG_TOTAL_POOLS]; +}; + +/** + * + * Rx object + * + */ + +/* Rx datapath control structure */ +struct bna_rcb { + /* Fast path */ + void **sw_qpt; + void *unmap_q; + u32 producer_index; + u32 consumer_index; + u32 q_depth; + void *__iomem q_dbell; + int page_idx; + int page_count; + /* Control path */ + struct bna_rxq *rxq; + struct bna_cq *cq; + struct bnad *bnad; + unsigned long flags; + int id; +}; + +/* RxQ structure - QPT, configuration */ +struct bna_rxq { + struct list_head qe; + int rxq_id; + + int buffer_size; + int q_depth; + + struct bna_qpt qpt; + struct bna_rcb *rcb; + + struct bna_rxp *rxp; + struct bna_rx *rx; + + u64 rx_packets; + u64 rx_bytes; + u64 rx_packets_with_error; + u64 rxbuf_alloc_failed; +}; + +/* RxQ pair */ +union bna_rxq_u { + struct { + struct bna_rxq *hdr; + struct bna_rxq *data; + } hds; + struct { + struct bna_rxq *small; + struct bna_rxq *large; + } slr; + struct { + struct bna_rxq *only; + struct bna_rxq *reserved; + } single; +}; + +/* Packet rate for Dynamic Interrupt Moderation */ +struct bna_pkt_rate { + u32 small_pkt_cnt; + u32 large_pkt_cnt; +}; + +/* Completion control structure */ +struct bna_ccb { + /* Fast path */ + void **sw_qpt; + u32 producer_index; + volatile u32 *hw_producer_index; + u32 q_depth; + struct bna_ib_dbell *i_dbell; + struct bna_rcb *rcb[2]; + void *ctrl; /* For bnad */ + struct bna_pkt_rate pkt_rate; + int page_idx; + int page_count; + + /* Control path */ + struct bna_cq *cq; + struct bnad *bnad; + enum bna_intr_type intr_type; + int intr_vector; + u8 rx_coalescing_timeo; /* For NAPI */ + int id; + char name[BNA_Q_NAME_SIZE]; +}; + +/* CQ QPT, configuration */ +struct bna_cq { + int cq_id; + + struct bna_qpt qpt; + struct bna_ccb *ccb; + + struct bna_ib *ib; + u8 ib_seg_offset; + + struct bna_rx *rx; +}; + +struct bna_rss_config { + enum rss_hash_type hash_type; + u8 hash_mask; + u32 toeplitz_hash_key[BFI_RSS_HASH_KEY_LEN]; +}; + +struct bna_hds_config { + enum hds_header_type hdr_type; + int header_size; +}; + +/* This structure is used during RX creation */ +struct bna_rx_config { + enum bna_rx_type rx_type; + int num_paths; + enum bna_rxp_type rxp_type; + int paused; + int q_depth; + /* + * Small/Large (or Header/Data) buffer size to be configured + * for SLR and HDS queue type. Large buffer size comes from + * port->mtu. + */ + int small_buff_size; + + enum bna_status rss_status; + struct bna_rss_config rss_config; + + enum bna_status hds_status; + struct bna_hds_config hds_config; + + enum bna_status vlan_strip_status; +}; + +/* Rx Path structure - one per MSIX vector/CPU */ +struct bna_rxp { + /* This should be the first one */ + struct list_head qe; + + enum bna_rxp_type type; + union bna_rxq_u rxq; + struct bna_cq cq; + + struct bna_rx *rx; + + /* MSI-x vector number for configuring RSS */ + int vector; + + struct bna_mbox_qe mbox_qe; +}; + +/* HDS configuration structure */ +struct bna_rxf_hds { + enum hds_header_type hdr_type; + int header_size; +}; + +/* RSS configuration structure */ +struct bna_rxf_rss { + enum rss_hash_type hash_type; + u8 hash_mask; + u32 toeplitz_hash_key[BFI_RSS_HASH_KEY_LEN]; +}; + +/* RxF structure (hardware Rx Function) */ +struct bna_rxf { + bfa_fsm_t fsm; + int rxf_id; + enum rxf_flags ctrl_flags; + u16 default_vlan_tag; + enum bna_rxf_oper_state rxf_oper_state; + enum bna_status hds_status; + struct bna_rxf_hds hds_cfg; + enum bna_status rss_status; + struct bna_rxf_rss rss_cfg; + struct bna_rit_segment *rit_segment; + struct bna_rx *rx; + u32 forced_offset; + struct bna_mbox_qe mbox_qe; + int mcast_rxq_id; + + /* callback for bna_rxf_start() */ + void (*start_cbfn) (struct bna_rx *rx, enum bna_cb_status status); + struct bna_rx *start_cbarg; + + /* callback for bna_rxf_stop() */ + void (*stop_cbfn) (struct bna_rx *rx, enum bna_cb_status status); + struct bna_rx *stop_cbarg; + + /* callback for bna_rxf_receive_enable() / bna_rxf_receive_disable() */ + void (*oper_state_cbfn) (struct bnad *bnad, struct bna_rx *rx, + enum bna_cb_status status); + struct bnad *oper_state_cbarg; + + /** + * callback for: + * bna_rxf_ucast_set() + * bna_rxf_{ucast/mcast}_add(), + * bna_rxf_{ucast/mcast}_del(), + * bna_rxf_mode_set() + */ + void (*cam_fltr_cbfn)(struct bnad *bnad, struct bna_rx *rx, + enum bna_cb_status status); + struct bnad *cam_fltr_cbarg; + + enum bna_rxf_flags rxf_flags; + + /* List of unicast addresses yet to be applied to h/w */ + struct list_head ucast_pending_add_q; + struct list_head ucast_pending_del_q; + int ucast_pending_set; + /* ucast addresses applied to the h/w */ + struct list_head ucast_active_q; + struct bna_mac *ucast_active_mac; + + /* List of multicast addresses yet to be applied to h/w */ + struct list_head mcast_pending_add_q; + struct list_head mcast_pending_del_q; + /* multicast addresses applied to the h/w */ + struct list_head mcast_active_q; + + /* Rx modes yet to be applied to h/w */ + enum bna_rxmode rxmode_pending; + enum bna_rxmode rxmode_pending_bitmask; + /* Rx modes applied to h/w */ + enum bna_rxmode rxmode_active; + + enum bna_status vlan_filter_status; + u32 vlan_filter_table[(BFI_MAX_VLAN + 1) / 32]; +}; + +/* Rx object */ +struct bna_rx { + /* This should be the first one */ + struct list_head qe; + + bfa_fsm_t fsm; + + enum bna_rx_type type; + + /* list-head for RX path objects */ + struct list_head rxp_q; + + struct bna_rxf rxf; + + enum bna_rx_flags rx_flags; + + struct bna_mbox_qe mbox_qe; + + struct bfa_wc rxq_stop_wc; + + /* Rx event handlers */ + void (*rcb_setup_cbfn)(struct bnad *, struct bna_rcb *); + void (*rcb_destroy_cbfn)(struct bnad *, struct bna_rcb *); + void (*ccb_setup_cbfn)(struct bnad *, struct bna_ccb *); + void (*ccb_destroy_cbfn)(struct bnad *, struct bna_ccb *); + void (*rx_cleanup_cbfn)(struct bnad *, struct bna_ccb *); + void (*rx_post_cbfn)(struct bnad *, struct bna_rcb *); + + /* callback for bna_rx_disable(), bna_rx_stop() */ + void (*stop_cbfn)(void *arg, struct bna_rx *rx, + enum bna_cb_status status); + void *stop_cbarg; + + struct bna *bna; + void *priv; /* bnad's cookie */ +}; + +struct bna_rx_event_cbfn { + /* Optional */ + void (*rcb_setup_cbfn)(struct bnad *, struct bna_rcb *); + void (*rcb_destroy_cbfn)(struct bnad *, struct bna_rcb *); + void (*ccb_setup_cbfn)(struct bnad *, struct bna_ccb *); + void (*ccb_destroy_cbfn)(struct bnad *, struct bna_ccb *); + /* Mandatory */ + void (*rx_cleanup_cbfn)(struct bnad *, struct bna_ccb *); + void (*rx_post_cbfn)(struct bnad *, struct bna_rcb *); +}; + +/* Rx module - keeps track of free, active rx objects */ +struct bna_rx_mod { + struct bna *bna; /* back pointer to parent */ + struct bna_rx *rx; /* BFI_MAX_RXQ entries */ + struct bna_rxp *rxp; /* BFI_MAX_RXQ entries */ + struct bna_rxq *rxq; /* BFI_MAX_RXQ entries */ + + struct list_head rx_free_q; + struct list_head rx_active_q; + int rx_free_count; + + struct list_head rxp_free_q; + int rxp_free_count; + + struct list_head rxq_free_q; + int rxq_free_count; + + enum bna_rx_mod_flags flags; + + /* callback for bna_rx_mod_stop() */ + void (*stop_cbfn)(struct bna_port *port, + enum bna_cb_status status); + + struct bfa_wc rx_stop_wc; + u32 dim_vector[BNA_LOAD_T_MAX][BNA_BIAS_T_MAX]; + u32 rxf_bmap[2]; +}; + +/** + * + * CAM + * + */ + +struct bna_ucam_mod { + struct bna_mac *ucmac; /* BFI_MAX_UCMAC entries */ + struct list_head free_q; + + struct bna *bna; +}; + +struct bna_mcam_mod { + struct bna_mac *mcmac; /* BFI_MAX_MCMAC entries */ + struct list_head free_q; + + struct bna *bna; +}; + +/** + * + * Statistics + * + */ + +struct bna_tx_stats { + int tx_state; + int tx_flags; + int num_txqs; + u32 txq_bmap[2]; + int txf_id; +}; + +struct bna_rx_stats { + int rx_state; + int rx_flags; + int num_rxps; + int num_rxqs; + u32 rxq_bmap[2]; + u32 cq_bmap[2]; + int rxf_id; + int rxf_state; + int rxf_oper_state; + int num_active_ucast; + int num_active_mcast; + int rxmode_active; + int vlan_filter_status; + u32 vlan_filter_table[(BFI_MAX_VLAN + 1) / 32]; + int rss_status; + int hds_status; +}; + +struct bna_sw_stats { + int device_state; + int port_state; + int port_flags; + int llport_state; + int priority; + int num_active_tx; + int num_active_rx; + struct bna_tx_stats tx_stats[BFI_MAX_TXQ]; + struct bna_rx_stats rx_stats[BFI_MAX_RXQ]; +}; + +struct bna_stats { + u32 txf_bmap[2]; + u32 rxf_bmap[2]; + struct bfi_ll_stats *hw_stats; + struct bna_sw_stats *sw_stats; +}; + +/** + * + * BNA + * + */ + +struct bna { + struct bfa_pcidev pcidev; + + int port_num; + + struct bna_chip_regs regs; + + struct bna_dma_addr hw_stats_dma; + struct bna_stats stats; + + struct bna_device device; + struct bfa_cee cee; + + struct bna_mbox_mod mbox_mod; + + struct bna_port port; + + struct bna_tx_mod tx_mod; + + struct bna_rx_mod rx_mod; + + struct bna_ib_mod ib_mod; + + struct bna_ucam_mod ucam_mod; + struct bna_mcam_mod mcam_mod; + + struct bna_rit_mod rit_mod; + + int rxf_default_id; + int rxf_promisc_id; + + struct bna_mbox_qe mbox_qe; + + struct bnad *bnad; +}; + +#endif /* __BNA_TYPES_H__ */ diff --git a/drivers/net/bna/bnad.c b/drivers/net/bna/bnad.c new file mode 100644 index 000000000000..491d148f88ae --- /dev/null +++ b/drivers/net/bna/bnad.c @@ -0,0 +1,3270 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bnad.h" +#include "bna.h" +#include "cna.h" + +DEFINE_MUTEX(bnad_fwimg_mutex); + +/* + * Module params + */ +static uint bnad_msix_disable; +module_param(bnad_msix_disable, uint, 0444); +MODULE_PARM_DESC(bnad_msix_disable, "Disable MSIX mode"); + +static uint bnad_ioc_auto_recover = 1; +module_param(bnad_ioc_auto_recover, uint, 0444); +MODULE_PARM_DESC(bnad_ioc_auto_recover, "Enable / Disable auto recovery"); + +/* + * Global variables + */ +u32 bnad_rxqs_per_cq = 2; + +const u8 bnad_bcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +/* + * Local MACROS + */ +#define BNAD_TX_UNMAPQ_DEPTH (bnad->txq_depth * 2) + +#define BNAD_RX_UNMAPQ_DEPTH (bnad->rxq_depth) + +#define BNAD_GET_MBOX_IRQ(_bnad) \ + (((_bnad)->cfg_flags & BNAD_CF_MSIX) ? \ + ((_bnad)->msix_table[(_bnad)->msix_num - 1].vector) : \ + ((_bnad)->pcidev->irq)) + +#define BNAD_FILL_UNMAPQ_MEM_REQ(_res_info, _num, _depth) \ +do { \ + (_res_info)->res_type = BNA_RES_T_MEM; \ + (_res_info)->res_u.mem_info.mem_type = BNA_MEM_T_KVA; \ + (_res_info)->res_u.mem_info.num = (_num); \ + (_res_info)->res_u.mem_info.len = \ + sizeof(struct bnad_unmap_q) + \ + (sizeof(struct bnad_skb_unmap) * ((_depth) - 1)); \ +} while (0) + +/* + * Reinitialize completions in CQ, once Rx is taken down + */ +static void +bnad_cq_cmpl_init(struct bnad *bnad, struct bna_ccb *ccb) +{ + struct bna_cq_entry *cmpl, *next_cmpl; + unsigned int wi_range, wis = 0, ccb_prod = 0; + int i; + + BNA_CQ_QPGE_PTR_GET(ccb_prod, ccb->sw_qpt, cmpl, + wi_range); + + for (i = 0; i < ccb->q_depth; i++) { + wis++; + if (likely(--wi_range)) + next_cmpl = cmpl + 1; + else { + BNA_QE_INDX_ADD(ccb_prod, wis, ccb->q_depth); + wis = 0; + BNA_CQ_QPGE_PTR_GET(ccb_prod, ccb->sw_qpt, + next_cmpl, wi_range); + } + cmpl->valid = 0; + cmpl = next_cmpl; + } +} + +/* + * Frees all pending Tx Bufs + * At this point no activity is expected on the Q, + * so DMA unmap & freeing is fine. + */ +static void +bnad_free_all_txbufs(struct bnad *bnad, + struct bna_tcb *tcb) +{ + u16 unmap_cons; + struct bnad_unmap_q *unmap_q = tcb->unmap_q; + struct bnad_skb_unmap *unmap_array; + struct sk_buff *skb = NULL; + int i; + + unmap_array = unmap_q->unmap_array; + + unmap_cons = 0; + while (unmap_cons < unmap_q->q_depth) { + skb = unmap_array[unmap_cons].skb; + if (!skb) { + unmap_cons++; + continue; + } + unmap_array[unmap_cons].skb = NULL; + + pci_unmap_single(bnad->pcidev, + pci_unmap_addr(&unmap_array[unmap_cons], + dma_addr), skb_headlen(skb), + PCI_DMA_TODEVICE); + + pci_unmap_addr_set(&unmap_array[unmap_cons], dma_addr, 0); + unmap_cons++; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + pci_unmap_page(bnad->pcidev, + pci_unmap_addr(&unmap_array[unmap_cons], + dma_addr), + skb_shinfo(skb)->frags[i].size, + PCI_DMA_TODEVICE); + pci_unmap_addr_set(&unmap_array[unmap_cons], dma_addr, + 0); + unmap_cons++; + } + dev_kfree_skb_any(skb); + } +} + +/* Data Path Handlers */ + +/* + * bnad_free_txbufs : Frees the Tx bufs on Tx completion + * Can be called in a) Interrupt context + * b) Sending context + * c) Tasklet context + */ +static u32 +bnad_free_txbufs(struct bnad *bnad, + struct bna_tcb *tcb) +{ + u32 sent_packets = 0, sent_bytes = 0; + u16 wis, unmap_cons, updated_hw_cons; + struct bnad_unmap_q *unmap_q = tcb->unmap_q; + struct bnad_skb_unmap *unmap_array; + struct sk_buff *skb; + int i; + + /* + * Just return if TX is stopped. This check is useful + * when bnad_free_txbufs() runs out of a tasklet scheduled + * before bnad_cb_tx_cleanup() cleared BNAD_RF_TX_STARTED bit + * but this routine runs actually after the cleanup has been + * executed. + */ + if (!test_bit(BNAD_RF_TX_STARTED, &bnad->run_flags)) + return 0; + + updated_hw_cons = *(tcb->hw_consumer_index); + + wis = BNA_Q_INDEX_CHANGE(tcb->consumer_index, + updated_hw_cons, tcb->q_depth); + + BUG_ON(!(wis <= BNA_QE_IN_USE_CNT(tcb, tcb->q_depth))); + + unmap_array = unmap_q->unmap_array; + unmap_cons = unmap_q->consumer_index; + + prefetch(&unmap_array[unmap_cons + 1]); + while (wis) { + skb = unmap_array[unmap_cons].skb; + + unmap_array[unmap_cons].skb = NULL; + + sent_packets++; + sent_bytes += skb->len; + wis -= BNA_TXQ_WI_NEEDED(1 + skb_shinfo(skb)->nr_frags); + + pci_unmap_single(bnad->pcidev, + pci_unmap_addr(&unmap_array[unmap_cons], + dma_addr), skb_headlen(skb), + PCI_DMA_TODEVICE); + pci_unmap_addr_set(&unmap_array[unmap_cons], dma_addr, 0); + BNA_QE_INDX_ADD(unmap_cons, 1, unmap_q->q_depth); + + prefetch(&unmap_array[unmap_cons + 1]); + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + prefetch(&unmap_array[unmap_cons + 1]); + + pci_unmap_page(bnad->pcidev, + pci_unmap_addr(&unmap_array[unmap_cons], + dma_addr), + skb_shinfo(skb)->frags[i].size, + PCI_DMA_TODEVICE); + pci_unmap_addr_set(&unmap_array[unmap_cons], dma_addr, + 0); + BNA_QE_INDX_ADD(unmap_cons, 1, unmap_q->q_depth); + } + dev_kfree_skb_any(skb); + } + + /* Update consumer pointers. */ + tcb->consumer_index = updated_hw_cons; + unmap_q->consumer_index = unmap_cons; + + tcb->txq->tx_packets += sent_packets; + tcb->txq->tx_bytes += sent_bytes; + + return sent_packets; +} + +/* Tx Free Tasklet function */ +/* Frees for all the tcb's in all the Tx's */ +/* + * Scheduled from sending context, so that + * the fat Tx lock is not held for too long + * in the sending context. + */ +static void +bnad_tx_free_tasklet(unsigned long bnad_ptr) +{ + struct bnad *bnad = (struct bnad *)bnad_ptr; + struct bna_tcb *tcb; + u32 acked; + int i, j; + + for (i = 0; i < bnad->num_tx; i++) { + for (j = 0; j < bnad->num_txq_per_tx; j++) { + tcb = bnad->tx_info[i].tcb[j]; + if (!tcb) + continue; + if (((u16) (*tcb->hw_consumer_index) != + tcb->consumer_index) && + (!test_and_set_bit(BNAD_TXQ_FREE_SENT, + &tcb->flags))) { + acked = bnad_free_txbufs(bnad, tcb); + bna_ib_ack(tcb->i_dbell, acked); + smp_mb__before_clear_bit(); + clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags); + } + } + } +} + +static u32 +bnad_tx(struct bnad *bnad, struct bna_tcb *tcb) +{ + struct net_device *netdev = bnad->netdev; + u32 sent; + + if (test_and_set_bit(BNAD_TXQ_FREE_SENT, &tcb->flags)) + return 0; + + sent = bnad_free_txbufs(bnad, tcb); + if (sent) { + if (netif_queue_stopped(netdev) && + netif_carrier_ok(netdev) && + BNA_QE_FREE_CNT(tcb, tcb->q_depth) >= + BNAD_NETIF_WAKE_THRESHOLD) { + netif_wake_queue(netdev); + BNAD_UPDATE_CTR(bnad, netif_queue_wakeup); + } + bna_ib_ack(tcb->i_dbell, sent); + } else + bna_ib_ack(tcb->i_dbell, 0); + + smp_mb__before_clear_bit(); + clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags); + + return sent; +} + +/* MSIX Tx Completion Handler */ +static irqreturn_t +bnad_msix_tx(int irq, void *data) +{ + struct bna_tcb *tcb = (struct bna_tcb *)data; + struct bnad *bnad = tcb->bnad; + + bnad_tx(bnad, tcb); + + return IRQ_HANDLED; +} + +static void +bnad_reset_rcb(struct bnad *bnad, struct bna_rcb *rcb) +{ + struct bnad_unmap_q *unmap_q = rcb->unmap_q; + + rcb->producer_index = 0; + rcb->consumer_index = 0; + + unmap_q->producer_index = 0; + unmap_q->consumer_index = 0; +} + +static void +bnad_free_rxbufs(struct bnad *bnad, struct bna_rcb *rcb) +{ + struct bnad_unmap_q *unmap_q; + struct sk_buff *skb; + + unmap_q = rcb->unmap_q; + while (BNA_QE_IN_USE_CNT(unmap_q, unmap_q->q_depth)) { + skb = unmap_q->unmap_array[unmap_q->consumer_index].skb; + BUG_ON(!(skb)); + unmap_q->unmap_array[unmap_q->consumer_index].skb = NULL; + pci_unmap_single(bnad->pcidev, pci_unmap_addr(&unmap_q-> + unmap_array[unmap_q->consumer_index], + dma_addr), rcb->rxq->buffer_size + + NET_IP_ALIGN, PCI_DMA_FROMDEVICE); + dev_kfree_skb(skb); + BNA_QE_INDX_ADD(unmap_q->consumer_index, 1, unmap_q->q_depth); + BNA_QE_INDX_ADD(rcb->consumer_index, 1, rcb->q_depth); + } + + bnad_reset_rcb(bnad, rcb); +} + +static void +bnad_alloc_n_post_rxbufs(struct bnad *bnad, struct bna_rcb *rcb) +{ + u16 to_alloc, alloced, unmap_prod, wi_range; + struct bnad_unmap_q *unmap_q = rcb->unmap_q; + struct bnad_skb_unmap *unmap_array; + struct bna_rxq_entry *rxent; + struct sk_buff *skb; + dma_addr_t dma_addr; + + alloced = 0; + to_alloc = + BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth); + + unmap_array = unmap_q->unmap_array; + unmap_prod = unmap_q->producer_index; + + BNA_RXQ_QPGE_PTR_GET(unmap_prod, rcb->sw_qpt, rxent, wi_range); + + while (to_alloc--) { + if (!wi_range) { + BNA_RXQ_QPGE_PTR_GET(unmap_prod, rcb->sw_qpt, rxent, + wi_range); + } + skb = alloc_skb(rcb->rxq->buffer_size + NET_IP_ALIGN, + GFP_ATOMIC); + if (unlikely(!skb)) { + BNAD_UPDATE_CTR(bnad, rxbuf_alloc_failed); + goto finishing; + } + skb->dev = bnad->netdev; + skb_reserve(skb, NET_IP_ALIGN); + unmap_array[unmap_prod].skb = skb; + dma_addr = pci_map_single(bnad->pcidev, skb->data, + rcb->rxq->buffer_size, PCI_DMA_FROMDEVICE); + pci_unmap_addr_set(&unmap_array[unmap_prod], dma_addr, + dma_addr); + BNA_SET_DMA_ADDR(dma_addr, &rxent->host_addr); + BNA_QE_INDX_ADD(unmap_prod, 1, unmap_q->q_depth); + + rxent++; + wi_range--; + alloced++; + } + +finishing: + if (likely(alloced)) { + unmap_q->producer_index = unmap_prod; + rcb->producer_index = unmap_prod; + smp_mb(); + bna_rxq_prod_indx_doorbell(rcb); + } +} + +/* + * Locking is required in the enable path + * because it is called from a napi poll + * context, where the bna_lock is not held + * unlike the IRQ context. + */ +static void +bnad_enable_txrx_irqs(struct bnad *bnad) +{ + struct bna_tcb *tcb; + struct bna_ccb *ccb; + int i, j; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + for (i = 0; i < bnad->num_tx; i++) { + for (j = 0; j < bnad->num_txq_per_tx; j++) { + tcb = bnad->tx_info[i].tcb[j]; + bna_ib_coalescing_timer_set(tcb->i_dbell, + tcb->txq->ib->ib_config.coalescing_timeo); + bna_ib_ack(tcb->i_dbell, 0); + } + } + + for (i = 0; i < bnad->num_rx; i++) { + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + ccb = bnad->rx_info[i].rx_ctrl[j].ccb; + bnad_enable_rx_irq_unsafe(ccb); + } + } + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +static inline void +bnad_refill_rxq(struct bnad *bnad, struct bna_rcb *rcb) +{ + struct bnad_unmap_q *unmap_q = rcb->unmap_q; + + if (!test_and_set_bit(BNAD_RXQ_REFILL, &rcb->flags)) { + if (BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth) + >> BNAD_RXQ_REFILL_THRESHOLD_SHIFT) + bnad_alloc_n_post_rxbufs(bnad, rcb); + smp_mb__before_clear_bit(); + clear_bit(BNAD_RXQ_REFILL, &rcb->flags); + } +} + +static u32 +bnad_poll_cq(struct bnad *bnad, struct bna_ccb *ccb, int budget) +{ + struct bna_cq_entry *cmpl, *next_cmpl; + struct bna_rcb *rcb = NULL; + unsigned int wi_range, packets = 0, wis = 0; + struct bnad_unmap_q *unmap_q; + struct sk_buff *skb; + u32 flags; + u32 qid0 = ccb->rcb[0]->rxq->rxq_id; + struct bna_pkt_rate *pkt_rt = &ccb->pkt_rate; + + prefetch(bnad->netdev); + BNA_CQ_QPGE_PTR_GET(ccb->producer_index, ccb->sw_qpt, cmpl, + wi_range); + BUG_ON(!(wi_range <= ccb->q_depth)); + while (cmpl->valid && packets < budget) { + packets++; + BNA_UPDATE_PKT_CNT(pkt_rt, ntohs(cmpl->length)); + + if (qid0 == cmpl->rxq_id) + rcb = ccb->rcb[0]; + else + rcb = ccb->rcb[1]; + + unmap_q = rcb->unmap_q; + + skb = unmap_q->unmap_array[unmap_q->consumer_index].skb; + BUG_ON(!(skb)); + unmap_q->unmap_array[unmap_q->consumer_index].skb = NULL; + pci_unmap_single(bnad->pcidev, + pci_unmap_addr(&unmap_q-> + unmap_array[unmap_q-> + consumer_index], + dma_addr), + rcb->rxq->buffer_size, + PCI_DMA_FROMDEVICE); + BNA_QE_INDX_ADD(unmap_q->consumer_index, 1, unmap_q->q_depth); + + /* Should be more efficient ? Performance ? */ + BNA_QE_INDX_ADD(rcb->consumer_index, 1, rcb->q_depth); + + wis++; + if (likely(--wi_range)) + next_cmpl = cmpl + 1; + else { + BNA_QE_INDX_ADD(ccb->producer_index, wis, ccb->q_depth); + wis = 0; + BNA_CQ_QPGE_PTR_GET(ccb->producer_index, ccb->sw_qpt, + next_cmpl, wi_range); + BUG_ON(!(wi_range <= ccb->q_depth)); + } + prefetch(next_cmpl); + + flags = ntohl(cmpl->flags); + if (unlikely + (flags & + (BNA_CQ_EF_MAC_ERROR | BNA_CQ_EF_FCS_ERROR | + BNA_CQ_EF_TOO_LONG))) { + dev_kfree_skb_any(skb); + rcb->rxq->rx_packets_with_error++; + goto next; + } + + skb_put(skb, ntohs(cmpl->length)); + if (likely + (bnad->rx_csum && + (((flags & BNA_CQ_EF_IPV4) && + (flags & BNA_CQ_EF_L3_CKSUM_OK)) || + (flags & BNA_CQ_EF_IPV6)) && + (flags & (BNA_CQ_EF_TCP | BNA_CQ_EF_UDP)) && + (flags & BNA_CQ_EF_L4_CKSUM_OK))) + skb->ip_summed = CHECKSUM_UNNECESSARY; + else + skb->ip_summed = CHECKSUM_NONE; + + rcb->rxq->rx_packets++; + rcb->rxq->rx_bytes += skb->len; + skb->protocol = eth_type_trans(skb, bnad->netdev); + + if (bnad->vlan_grp && (flags & BNA_CQ_EF_VLAN)) { + struct bnad_rx_ctrl *rx_ctrl = + (struct bnad_rx_ctrl *)ccb->ctrl; + if (skb->ip_summed == CHECKSUM_UNNECESSARY) + vlan_gro_receive(&rx_ctrl->napi, bnad->vlan_grp, + ntohs(cmpl->vlan_tag), skb); + else + vlan_hwaccel_receive_skb(skb, + bnad->vlan_grp, + ntohs(cmpl->vlan_tag)); + + } else { /* Not VLAN tagged/stripped */ + struct bnad_rx_ctrl *rx_ctrl = + (struct bnad_rx_ctrl *)ccb->ctrl; + if (skb->ip_summed == CHECKSUM_UNNECESSARY) + napi_gro_receive(&rx_ctrl->napi, skb); + else + netif_receive_skb(skb); + } + +next: + cmpl->valid = 0; + cmpl = next_cmpl; + } + + BNA_QE_INDX_ADD(ccb->producer_index, wis, ccb->q_depth); + + if (likely(ccb)) { + bna_ib_ack(ccb->i_dbell, packets); + bnad_refill_rxq(bnad, ccb->rcb[0]); + if (ccb->rcb[1]) + bnad_refill_rxq(bnad, ccb->rcb[1]); + } else + bna_ib_ack(ccb->i_dbell, 0); + + return packets; +} + +static void +bnad_disable_rx_irq(struct bnad *bnad, struct bna_ccb *ccb) +{ + bna_ib_coalescing_timer_set(ccb->i_dbell, 0); + bna_ib_ack(ccb->i_dbell, 0); +} + +static void +bnad_enable_rx_irq(struct bnad *bnad, struct bna_ccb *ccb) +{ + spin_lock_irq(&bnad->bna_lock); /* Because of polling context */ + bnad_enable_rx_irq_unsafe(ccb); + spin_unlock_irq(&bnad->bna_lock); +} + +static void +bnad_netif_rx_schedule_poll(struct bnad *bnad, struct bna_ccb *ccb) +{ + struct bnad_rx_ctrl *rx_ctrl = (struct bnad_rx_ctrl *)(ccb->ctrl); + if (likely(napi_schedule_prep((&rx_ctrl->napi)))) { + bnad_disable_rx_irq(bnad, ccb); + __napi_schedule((&rx_ctrl->napi)); + } + BNAD_UPDATE_CTR(bnad, netif_rx_schedule); +} + +/* MSIX Rx Path Handler */ +static irqreturn_t +bnad_msix_rx(int irq, void *data) +{ + struct bna_ccb *ccb = (struct bna_ccb *)data; + struct bnad *bnad = ccb->bnad; + + bnad_netif_rx_schedule_poll(bnad, ccb); + + return IRQ_HANDLED; +} + +/* Interrupt handlers */ + +/* Mbox Interrupt Handlers */ +static irqreturn_t +bnad_msix_mbox_handler(int irq, void *data) +{ + u32 intr_status; + unsigned long flags; + struct net_device *netdev = data; + struct bnad *bnad; + + bnad = netdev_priv(netdev); + + /* BNA_ISR_GET(bnad); Inc Ref count */ + spin_lock_irqsave(&bnad->bna_lock, flags); + + bna_intr_status_get(&bnad->bna, intr_status); + + if (BNA_IS_MBOX_ERR_INTR(intr_status)) + bna_mbox_handler(&bnad->bna, intr_status); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* BNAD_ISR_PUT(bnad); Dec Ref count */ + return IRQ_HANDLED; +} + +static irqreturn_t +bnad_isr(int irq, void *data) +{ + int i, j; + u32 intr_status; + unsigned long flags; + struct net_device *netdev = data; + struct bnad *bnad = netdev_priv(netdev); + struct bnad_rx_info *rx_info; + struct bnad_rx_ctrl *rx_ctrl; + + spin_lock_irqsave(&bnad->bna_lock, flags); + + bna_intr_status_get(&bnad->bna, intr_status); + if (!intr_status) { + spin_unlock_irqrestore(&bnad->bna_lock, flags); + return IRQ_NONE; + } + + if (BNA_IS_MBOX_ERR_INTR(intr_status)) { + bna_mbox_handler(&bnad->bna, intr_status); + if (!BNA_IS_INTX_DATA_INTR(intr_status)) { + spin_unlock_irqrestore(&bnad->bna_lock, flags); + goto done; + } + } + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* Process data interrupts */ + for (i = 0; i < bnad->num_rx; i++) { + rx_info = &bnad->rx_info[i]; + if (!rx_info->rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + rx_ctrl = &rx_info->rx_ctrl[j]; + if (rx_ctrl->ccb) + bnad_netif_rx_schedule_poll(bnad, + rx_ctrl->ccb); + } + } +done: + return IRQ_HANDLED; +} + +/* + * Called in interrupt / callback context + * with bna_lock held, so cfg_flags access is OK + */ +static void +bnad_enable_mbox_irq(struct bnad *bnad) +{ + int irq = BNAD_GET_MBOX_IRQ(bnad); + + if (!(bnad->cfg_flags & BNAD_CF_MSIX)) + return; + + if (test_and_clear_bit(BNAD_RF_MBOX_IRQ_DISABLED, &bnad->run_flags)) + enable_irq(irq); + BNAD_UPDATE_CTR(bnad, mbox_intr_enabled); +} + +/* + * Called with bnad->bna_lock held b'cos of + * bnad->cfg_flags access. + */ +void +bnad_disable_mbox_irq(struct bnad *bnad) +{ + int irq = BNAD_GET_MBOX_IRQ(bnad); + + if (!(bnad->cfg_flags & BNAD_CF_MSIX)) + return; + + if (!test_and_set_bit(BNAD_RF_MBOX_IRQ_DISABLED, &bnad->run_flags)) + disable_irq_nosync(irq); + BNAD_UPDATE_CTR(bnad, mbox_intr_disabled); +} + +/* Control Path Handlers */ + +/* Callbacks */ +void +bnad_cb_device_enable_mbox_intr(struct bnad *bnad) +{ + bnad_enable_mbox_irq(bnad); +} + +void +bnad_cb_device_disable_mbox_intr(struct bnad *bnad) +{ + bnad_disable_mbox_irq(bnad); +} + +void +bnad_cb_device_enabled(struct bnad *bnad, enum bna_cb_status status) +{ + complete(&bnad->bnad_completions.ioc_comp); + bnad->bnad_completions.ioc_comp_status = status; +} + +void +bnad_cb_device_disabled(struct bnad *bnad, enum bna_cb_status status) +{ + complete(&bnad->bnad_completions.ioc_comp); + bnad->bnad_completions.ioc_comp_status = status; +} + +static void +bnad_cb_port_disabled(void *arg, enum bna_cb_status status) +{ + struct bnad *bnad = (struct bnad *)arg; + + complete(&bnad->bnad_completions.port_comp); + + netif_carrier_off(bnad->netdev); +} + +void +bnad_cb_port_link_status(struct bnad *bnad, + enum bna_link_status link_status) +{ + bool link_up = 0; + + link_up = (link_status == BNA_LINK_UP) || (link_status == BNA_CEE_UP); + + if (link_status == BNA_CEE_UP) { + set_bit(BNAD_RF_CEE_RUNNING, &bnad->run_flags); + BNAD_UPDATE_CTR(bnad, cee_up); + } else + clear_bit(BNAD_RF_CEE_RUNNING, &bnad->run_flags); + + if (link_up) { + if (!netif_carrier_ok(bnad->netdev)) { + pr_warn("bna: %s link up\n", + bnad->netdev->name); + netif_carrier_on(bnad->netdev); + BNAD_UPDATE_CTR(bnad, link_toggle); + if (test_bit(BNAD_RF_TX_STARTED, &bnad->run_flags)) { + /* Force an immediate Transmit Schedule */ + pr_info("bna: %s TX_STARTED\n", + bnad->netdev->name); + netif_wake_queue(bnad->netdev); + BNAD_UPDATE_CTR(bnad, netif_queue_wakeup); + } else { + netif_stop_queue(bnad->netdev); + BNAD_UPDATE_CTR(bnad, netif_queue_stop); + } + } + } else { + if (netif_carrier_ok(bnad->netdev)) { + pr_warn("bna: %s link down\n", + bnad->netdev->name); + netif_carrier_off(bnad->netdev); + BNAD_UPDATE_CTR(bnad, link_toggle); + } + } +} + +static void +bnad_cb_tx_disabled(void *arg, struct bna_tx *tx, + enum bna_cb_status status) +{ + struct bnad *bnad = (struct bnad *)arg; + + complete(&bnad->bnad_completions.tx_comp); +} + +static void +bnad_cb_tcb_setup(struct bnad *bnad, struct bna_tcb *tcb) +{ + struct bnad_tx_info *tx_info = + (struct bnad_tx_info *)tcb->txq->tx->priv; + struct bnad_unmap_q *unmap_q = tcb->unmap_q; + + tx_info->tcb[tcb->id] = tcb; + unmap_q->producer_index = 0; + unmap_q->consumer_index = 0; + unmap_q->q_depth = BNAD_TX_UNMAPQ_DEPTH; +} + +static void +bnad_cb_tcb_destroy(struct bnad *bnad, struct bna_tcb *tcb) +{ + struct bnad_tx_info *tx_info = + (struct bnad_tx_info *)tcb->txq->tx->priv; + + tx_info->tcb[tcb->id] = NULL; +} + +static void +bnad_cb_rcb_setup(struct bnad *bnad, struct bna_rcb *rcb) +{ + struct bnad_unmap_q *unmap_q = rcb->unmap_q; + + unmap_q->producer_index = 0; + unmap_q->consumer_index = 0; + unmap_q->q_depth = BNAD_RX_UNMAPQ_DEPTH; +} + +static void +bnad_cb_ccb_setup(struct bnad *bnad, struct bna_ccb *ccb) +{ + struct bnad_rx_info *rx_info = + (struct bnad_rx_info *)ccb->cq->rx->priv; + + rx_info->rx_ctrl[ccb->id].ccb = ccb; + ccb->ctrl = &rx_info->rx_ctrl[ccb->id]; +} + +static void +bnad_cb_ccb_destroy(struct bnad *bnad, struct bna_ccb *ccb) +{ + struct bnad_rx_info *rx_info = + (struct bnad_rx_info *)ccb->cq->rx->priv; + + rx_info->rx_ctrl[ccb->id].ccb = NULL; +} + +static void +bnad_cb_tx_stall(struct bnad *bnad, struct bna_tcb *tcb) +{ + struct bnad_tx_info *tx_info = + (struct bnad_tx_info *)tcb->txq->tx->priv; + + if (tx_info != &bnad->tx_info[0]) + return; + + clear_bit(BNAD_RF_TX_STARTED, &bnad->run_flags); + netif_stop_queue(bnad->netdev); + pr_info("bna: %s TX_STOPPED\n", bnad->netdev->name); +} + +static void +bnad_cb_tx_resume(struct bnad *bnad, struct bna_tcb *tcb) +{ + if (test_and_set_bit(BNAD_RF_TX_STARTED, &bnad->run_flags)) + return; + + if (netif_carrier_ok(bnad->netdev)) { + pr_info("bna: %s TX_STARTED\n", bnad->netdev->name); + netif_wake_queue(bnad->netdev); + BNAD_UPDATE_CTR(bnad, netif_queue_wakeup); + } +} + +static void +bnad_cb_tx_cleanup(struct bnad *bnad, struct bna_tcb *tcb) +{ + struct bnad_unmap_q *unmap_q = tcb->unmap_q; + + if (!tcb || (!tcb->unmap_q)) + return; + + if (!unmap_q->unmap_array) + return; + + if (test_and_set_bit(BNAD_TXQ_FREE_SENT, &tcb->flags)) + return; + + bnad_free_all_txbufs(bnad, tcb); + + unmap_q->producer_index = 0; + unmap_q->consumer_index = 0; + + smp_mb__before_clear_bit(); + clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags); +} + +static void +bnad_cb_rx_cleanup(struct bnad *bnad, + struct bna_ccb *ccb) +{ + bnad_cq_cmpl_init(bnad, ccb); + + bnad_free_rxbufs(bnad, ccb->rcb[0]); + clear_bit(BNAD_RXQ_STARTED, &ccb->rcb[0]->flags); + + if (ccb->rcb[1]) { + bnad_free_rxbufs(bnad, ccb->rcb[1]); + clear_bit(BNAD_RXQ_STARTED, &ccb->rcb[1]->flags); + } +} + +static void +bnad_cb_rx_post(struct bnad *bnad, struct bna_rcb *rcb) +{ + struct bnad_unmap_q *unmap_q = rcb->unmap_q; + + set_bit(BNAD_RXQ_STARTED, &rcb->flags); + + /* Now allocate & post buffers for this RCB */ + /* !!Allocation in callback context */ + if (!test_and_set_bit(BNAD_RXQ_REFILL, &rcb->flags)) { + if (BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth) + >> BNAD_RXQ_REFILL_THRESHOLD_SHIFT) + bnad_alloc_n_post_rxbufs(bnad, rcb); + smp_mb__before_clear_bit(); + clear_bit(BNAD_RXQ_REFILL, &rcb->flags); + } +} + +static void +bnad_cb_rx_disabled(void *arg, struct bna_rx *rx, + enum bna_cb_status status) +{ + struct bnad *bnad = (struct bnad *)arg; + + complete(&bnad->bnad_completions.rx_comp); +} + +static void +bnad_cb_rx_mcast_add(struct bnad *bnad, struct bna_rx *rx, + enum bna_cb_status status) +{ + bnad->bnad_completions.mcast_comp_status = status; + complete(&bnad->bnad_completions.mcast_comp); +} + +void +bnad_cb_stats_get(struct bnad *bnad, enum bna_cb_status status, + struct bna_stats *stats) +{ + if (status == BNA_CB_SUCCESS) + BNAD_UPDATE_CTR(bnad, hw_stats_updates); + + if (!netif_running(bnad->netdev) || + !test_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) + return; + + mod_timer(&bnad->stats_timer, + jiffies + msecs_to_jiffies(BNAD_STATS_TIMER_FREQ)); +} + +void +bnad_cb_stats_clr(struct bnad *bnad) +{ +} + +/* Resource allocation, free functions */ + +static void +bnad_mem_free(struct bnad *bnad, + struct bna_mem_info *mem_info) +{ + int i; + dma_addr_t dma_pa; + + if (mem_info->mdl == NULL) + return; + + for (i = 0; i < mem_info->num; i++) { + if (mem_info->mdl[i].kva != NULL) { + if (mem_info->mem_type == BNA_MEM_T_DMA) { + BNA_GET_DMA_ADDR(&(mem_info->mdl[i].dma), + dma_pa); + pci_free_consistent(bnad->pcidev, + mem_info->mdl[i].len, + mem_info->mdl[i].kva, dma_pa); + } else + kfree(mem_info->mdl[i].kva); + } + } + kfree(mem_info->mdl); + mem_info->mdl = NULL; +} + +static int +bnad_mem_alloc(struct bnad *bnad, + struct bna_mem_info *mem_info) +{ + int i; + dma_addr_t dma_pa; + + if ((mem_info->num == 0) || (mem_info->len == 0)) { + mem_info->mdl = NULL; + return 0; + } + + mem_info->mdl = kcalloc(mem_info->num, sizeof(struct bna_mem_descr), + GFP_KERNEL); + if (mem_info->mdl == NULL) + return -ENOMEM; + + if (mem_info->mem_type == BNA_MEM_T_DMA) { + for (i = 0; i < mem_info->num; i++) { + mem_info->mdl[i].len = mem_info->len; + mem_info->mdl[i].kva = + pci_alloc_consistent(bnad->pcidev, + mem_info->len, &dma_pa); + + if (mem_info->mdl[i].kva == NULL) + goto err_return; + + BNA_SET_DMA_ADDR(dma_pa, + &(mem_info->mdl[i].dma)); + } + } else { + for (i = 0; i < mem_info->num; i++) { + mem_info->mdl[i].len = mem_info->len; + mem_info->mdl[i].kva = kzalloc(mem_info->len, + GFP_KERNEL); + if (mem_info->mdl[i].kva == NULL) + goto err_return; + } + } + + return 0; + +err_return: + bnad_mem_free(bnad, mem_info); + return -ENOMEM; +} + +/* Free IRQ for Mailbox */ +static void +bnad_mbox_irq_free(struct bnad *bnad, + struct bna_intr_info *intr_info) +{ + int irq; + unsigned long flags; + + if (intr_info->idl == NULL) + return; + + spin_lock_irqsave(&bnad->bna_lock, flags); + + bnad_disable_mbox_irq(bnad); + + irq = BNAD_GET_MBOX_IRQ(bnad); + free_irq(irq, bnad->netdev); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + kfree(intr_info->idl); +} + +/* + * Allocates IRQ for Mailbox, but keep it disabled + * This will be enabled once we get the mbox enable callback + * from bna + */ +static int +bnad_mbox_irq_alloc(struct bnad *bnad, + struct bna_intr_info *intr_info) +{ + int err; + unsigned long flags; + u32 irq; + irq_handler_t irq_handler; + + /* Mbox should use only 1 vector */ + + intr_info->idl = kzalloc(sizeof(*(intr_info->idl)), GFP_KERNEL); + if (!intr_info->idl) + return -ENOMEM; + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (bnad->cfg_flags & BNAD_CF_MSIX) { + irq_handler = (irq_handler_t)bnad_msix_mbox_handler; + irq = bnad->msix_table[bnad->msix_num - 1].vector; + flags = 0; + intr_info->intr_type = BNA_INTR_T_MSIX; + intr_info->idl[0].vector = bnad->msix_num - 1; + } else { + irq_handler = (irq_handler_t)bnad_isr; + irq = bnad->pcidev->irq; + flags = IRQF_SHARED; + intr_info->intr_type = BNA_INTR_T_INTX; + /* intr_info->idl.vector = 0 ? */ + } + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + sprintf(bnad->mbox_irq_name, "%s", BNAD_NAME); + + err = request_irq(irq, irq_handler, flags, + bnad->mbox_irq_name, bnad->netdev); + if (err) { + kfree(intr_info->idl); + intr_info->idl = NULL; + return err; + } + + spin_lock_irqsave(&bnad->bna_lock, flags); + bnad_disable_mbox_irq(bnad); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + return 0; +} + +static void +bnad_txrx_irq_free(struct bnad *bnad, struct bna_intr_info *intr_info) +{ + kfree(intr_info->idl); + intr_info->idl = NULL; +} + +/* Allocates Interrupt Descriptor List for MSIX/INT-X vectors */ +static int +bnad_txrx_irq_alloc(struct bnad *bnad, enum bnad_intr_source src, + uint txrx_id, struct bna_intr_info *intr_info) +{ + int i, vector_start = 0; + u32 cfg_flags; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + cfg_flags = bnad->cfg_flags; + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + if (cfg_flags & BNAD_CF_MSIX) { + intr_info->intr_type = BNA_INTR_T_MSIX; + intr_info->idl = kcalloc(intr_info->num, + sizeof(struct bna_intr_descr), + GFP_KERNEL); + if (!intr_info->idl) + return -ENOMEM; + + switch (src) { + case BNAD_INTR_TX: + vector_start = txrx_id; + break; + + case BNAD_INTR_RX: + vector_start = bnad->num_tx * bnad->num_txq_per_tx + + txrx_id; + break; + + default: + BUG(); + } + + for (i = 0; i < intr_info->num; i++) + intr_info->idl[i].vector = vector_start + i; + } else { + intr_info->intr_type = BNA_INTR_T_INTX; + intr_info->num = 1; + intr_info->idl = kcalloc(intr_info->num, + sizeof(struct bna_intr_descr), + GFP_KERNEL); + if (!intr_info->idl) + return -ENOMEM; + + switch (src) { + case BNAD_INTR_TX: + intr_info->idl[0].vector = 0x1; /* Bit mask : Tx IB */ + break; + + case BNAD_INTR_RX: + intr_info->idl[0].vector = 0x2; /* Bit mask : Rx IB */ + break; + } + } + return 0; +} + +/** + * NOTE: Should be called for MSIX only + * Unregisters Tx MSIX vector(s) from the kernel + */ +static void +bnad_tx_msix_unregister(struct bnad *bnad, struct bnad_tx_info *tx_info, + int num_txqs) +{ + int i; + int vector_num; + + for (i = 0; i < num_txqs; i++) { + if (tx_info->tcb[i] == NULL) + continue; + + vector_num = tx_info->tcb[i]->intr_vector; + free_irq(bnad->msix_table[vector_num].vector, tx_info->tcb[i]); + } +} + +/** + * NOTE: Should be called for MSIX only + * Registers Tx MSIX vector(s) and ISR(s), cookie with the kernel + */ +static int +bnad_tx_msix_register(struct bnad *bnad, struct bnad_tx_info *tx_info, + uint tx_id, int num_txqs) +{ + int i; + int err; + int vector_num; + + for (i = 0; i < num_txqs; i++) { + vector_num = tx_info->tcb[i]->intr_vector; + sprintf(tx_info->tcb[i]->name, "%s TXQ %d", bnad->netdev->name, + tx_id + tx_info->tcb[i]->id); + err = request_irq(bnad->msix_table[vector_num].vector, + (irq_handler_t)bnad_msix_tx, 0, + tx_info->tcb[i]->name, + tx_info->tcb[i]); + if (err) + goto err_return; + } + + return 0; + +err_return: + if (i > 0) + bnad_tx_msix_unregister(bnad, tx_info, (i - 1)); + return -1; +} + +/** + * NOTE: Should be called for MSIX only + * Unregisters Rx MSIX vector(s) from the kernel + */ +static void +bnad_rx_msix_unregister(struct bnad *bnad, struct bnad_rx_info *rx_info, + int num_rxps) +{ + int i; + int vector_num; + + for (i = 0; i < num_rxps; i++) { + if (rx_info->rx_ctrl[i].ccb == NULL) + continue; + + vector_num = rx_info->rx_ctrl[i].ccb->intr_vector; + free_irq(bnad->msix_table[vector_num].vector, + rx_info->rx_ctrl[i].ccb); + } +} + +/** + * NOTE: Should be called for MSIX only + * Registers Tx MSIX vector(s) and ISR(s), cookie with the kernel + */ +static int +bnad_rx_msix_register(struct bnad *bnad, struct bnad_rx_info *rx_info, + uint rx_id, int num_rxps) +{ + int i; + int err; + int vector_num; + + for (i = 0; i < num_rxps; i++) { + vector_num = rx_info->rx_ctrl[i].ccb->intr_vector; + sprintf(rx_info->rx_ctrl[i].ccb->name, "%s CQ %d", + bnad->netdev->name, + rx_id + rx_info->rx_ctrl[i].ccb->id); + err = request_irq(bnad->msix_table[vector_num].vector, + (irq_handler_t)bnad_msix_rx, 0, + rx_info->rx_ctrl[i].ccb->name, + rx_info->rx_ctrl[i].ccb); + if (err) + goto err_return; + } + + return 0; + +err_return: + if (i > 0) + bnad_rx_msix_unregister(bnad, rx_info, (i - 1)); + return -1; +} + +/* Free Tx object Resources */ +static void +bnad_tx_res_free(struct bnad *bnad, struct bna_res_info *res_info) +{ + int i; + + for (i = 0; i < BNA_TX_RES_T_MAX; i++) { + if (res_info[i].res_type == BNA_RES_T_MEM) + bnad_mem_free(bnad, &res_info[i].res_u.mem_info); + else if (res_info[i].res_type == BNA_RES_T_INTR) + bnad_txrx_irq_free(bnad, &res_info[i].res_u.intr_info); + } +} + +/* Allocates memory and interrupt resources for Tx object */ +static int +bnad_tx_res_alloc(struct bnad *bnad, struct bna_res_info *res_info, + uint tx_id) +{ + int i, err = 0; + + for (i = 0; i < BNA_TX_RES_T_MAX; i++) { + if (res_info[i].res_type == BNA_RES_T_MEM) + err = bnad_mem_alloc(bnad, + &res_info[i].res_u.mem_info); + else if (res_info[i].res_type == BNA_RES_T_INTR) + err = bnad_txrx_irq_alloc(bnad, BNAD_INTR_TX, tx_id, + &res_info[i].res_u.intr_info); + if (err) + goto err_return; + } + return 0; + +err_return: + bnad_tx_res_free(bnad, res_info); + return err; +} + +/* Free Rx object Resources */ +static void +bnad_rx_res_free(struct bnad *bnad, struct bna_res_info *res_info) +{ + int i; + + for (i = 0; i < BNA_RX_RES_T_MAX; i++) { + if (res_info[i].res_type == BNA_RES_T_MEM) + bnad_mem_free(bnad, &res_info[i].res_u.mem_info); + else if (res_info[i].res_type == BNA_RES_T_INTR) + bnad_txrx_irq_free(bnad, &res_info[i].res_u.intr_info); + } +} + +/* Allocates memory and interrupt resources for Rx object */ +static int +bnad_rx_res_alloc(struct bnad *bnad, struct bna_res_info *res_info, + uint rx_id) +{ + int i, err = 0; + + /* All memory needs to be allocated before setup_ccbs */ + for (i = 0; i < BNA_RX_RES_T_MAX; i++) { + if (res_info[i].res_type == BNA_RES_T_MEM) + err = bnad_mem_alloc(bnad, + &res_info[i].res_u.mem_info); + else if (res_info[i].res_type == BNA_RES_T_INTR) + err = bnad_txrx_irq_alloc(bnad, BNAD_INTR_RX, rx_id, + &res_info[i].res_u.intr_info); + if (err) + goto err_return; + } + return 0; + +err_return: + bnad_rx_res_free(bnad, res_info); + return err; +} + +/* Timer callbacks */ +/* a) IOC timer */ +static void +bnad_ioc_timeout(unsigned long data) +{ + struct bnad *bnad = (struct bnad *)data; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + bfa_ioc_timeout((void *) &bnad->bna.device.ioc); + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +static void +bnad_ioc_hb_check(unsigned long data) +{ + struct bnad *bnad = (struct bnad *)data; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + bfa_ioc_hb_check((void *) &bnad->bna.device.ioc); + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +static void +bnad_ioc_sem_timeout(unsigned long data) +{ + struct bnad *bnad = (struct bnad *)data; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + bfa_ioc_sem_timeout((void *) &bnad->bna.device.ioc); + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +/* + * All timer routines use bnad->bna_lock to protect against + * the following race, which may occur in case of no locking: + * Time CPU m CPU n + * 0 1 = test_bit + * 1 clear_bit + * 2 del_timer_sync + * 3 mod_timer + */ + +/* b) Dynamic Interrupt Moderation Timer */ +static void +bnad_dim_timeout(unsigned long data) +{ + struct bnad *bnad = (struct bnad *)data; + struct bnad_rx_info *rx_info; + struct bnad_rx_ctrl *rx_ctrl; + int i, j; + unsigned long flags; + + if (!netif_carrier_ok(bnad->netdev)) + return; + + spin_lock_irqsave(&bnad->bna_lock, flags); + for (i = 0; i < bnad->num_rx; i++) { + rx_info = &bnad->rx_info[i]; + if (!rx_info->rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + rx_ctrl = &rx_info->rx_ctrl[j]; + if (!rx_ctrl->ccb) + continue; + bna_rx_dim_update(rx_ctrl->ccb); + } + } + + /* Check for BNAD_CF_DIM_ENABLED, does not eleminate a race */ + if (test_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags)) + mod_timer(&bnad->dim_timer, + jiffies + msecs_to_jiffies(BNAD_DIM_TIMER_FREQ)); + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +/* c) Statistics Timer */ +static void +bnad_stats_timeout(unsigned long data) +{ + struct bnad *bnad = (struct bnad *)data; + unsigned long flags; + + if (!netif_running(bnad->netdev) || + !test_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) + return; + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_stats_get(&bnad->bna); + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +/* + * Set up timer for DIM + * Called with bnad->bna_lock held + */ +void +bnad_dim_timer_start(struct bnad *bnad) +{ + if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED && + !test_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags)) { + setup_timer(&bnad->dim_timer, bnad_dim_timeout, + (unsigned long)bnad); + set_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags); + mod_timer(&bnad->dim_timer, + jiffies + msecs_to_jiffies(BNAD_DIM_TIMER_FREQ)); + } +} + +/* + * Set up timer for statistics + * Called with mutex_lock(&bnad->conf_mutex) held + */ +static void +bnad_stats_timer_start(struct bnad *bnad) +{ + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (!test_and_set_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) { + setup_timer(&bnad->stats_timer, bnad_stats_timeout, + (unsigned long)bnad); + mod_timer(&bnad->stats_timer, + jiffies + msecs_to_jiffies(BNAD_STATS_TIMER_FREQ)); + } + spin_unlock_irqrestore(&bnad->bna_lock, flags); + +} + +/* + * Stops the stats timer + * Called with mutex_lock(&bnad->conf_mutex) held + */ +static void +bnad_stats_timer_stop(struct bnad *bnad) +{ + int to_del = 0; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (test_and_clear_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) + to_del = 1; + spin_unlock_irqrestore(&bnad->bna_lock, flags); + if (to_del) + del_timer_sync(&bnad->stats_timer); +} + +/* Utilities */ + +static void +bnad_netdev_mc_list_get(struct net_device *netdev, u8 *mc_list) +{ + int i = 1; /* Index 0 has broadcast address */ + struct netdev_hw_addr *mc_addr; + + netdev_for_each_mc_addr(mc_addr, netdev) { + memcpy(&mc_list[i * ETH_ALEN], &mc_addr->addr[0], + ETH_ALEN); + i++; + } +} + +static int +bnad_napi_poll_rx(struct napi_struct *napi, int budget) +{ + struct bnad_rx_ctrl *rx_ctrl = + container_of(napi, struct bnad_rx_ctrl, napi); + struct bna_ccb *ccb; + struct bnad *bnad; + int rcvd = 0; + + ccb = rx_ctrl->ccb; + + bnad = ccb->bnad; + + if (!netif_carrier_ok(bnad->netdev)) + goto poll_exit; + + rcvd = bnad_poll_cq(bnad, ccb, budget); + if (rcvd == budget) + return rcvd; + +poll_exit: + napi_complete((napi)); + + BNAD_UPDATE_CTR(bnad, netif_rx_complete); + + bnad_enable_rx_irq(bnad, ccb); + return rcvd; +} + +static int +bnad_napi_poll_txrx(struct napi_struct *napi, int budget) +{ + struct bnad_rx_ctrl *rx_ctrl = + container_of(napi, struct bnad_rx_ctrl, napi); + struct bna_ccb *ccb; + struct bnad *bnad; + int rcvd = 0; + int i, j; + + ccb = rx_ctrl->ccb; + + bnad = ccb->bnad; + + if (!netif_carrier_ok(bnad->netdev)) + goto poll_exit; + + /* Handle Tx Completions, if any */ + for (i = 0; i < bnad->num_tx; i++) { + for (j = 0; j < bnad->num_txq_per_tx; j++) + bnad_tx(bnad, bnad->tx_info[i].tcb[j]); + } + + /* Handle Rx Completions */ + rcvd = bnad_poll_cq(bnad, ccb, budget); + if (rcvd == budget) + return rcvd; +poll_exit: + napi_complete((napi)); + + BNAD_UPDATE_CTR(bnad, netif_rx_complete); + + bnad_enable_txrx_irqs(bnad); + return rcvd; +} + +static void +bnad_napi_enable(struct bnad *bnad, u32 rx_id) +{ + int (*napi_poll) (struct napi_struct *, int); + struct bnad_rx_ctrl *rx_ctrl; + int i; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (bnad->cfg_flags & BNAD_CF_MSIX) + napi_poll = bnad_napi_poll_rx; + else + napi_poll = bnad_napi_poll_txrx; + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* Initialize & enable NAPI */ + for (i = 0; i < bnad->num_rxp_per_rx; i++) { + rx_ctrl = &bnad->rx_info[rx_id].rx_ctrl[i]; + netif_napi_add(bnad->netdev, &rx_ctrl->napi, + napi_poll, 64); + napi_enable(&rx_ctrl->napi); + } +} + +static void +bnad_napi_disable(struct bnad *bnad, u32 rx_id) +{ + int i; + + /* First disable and then clean up */ + for (i = 0; i < bnad->num_rxp_per_rx; i++) { + napi_disable(&bnad->rx_info[rx_id].rx_ctrl[i].napi); + netif_napi_del(&bnad->rx_info[rx_id].rx_ctrl[i].napi); + } +} + +/* Should be held with conf_lock held */ +void +bnad_cleanup_tx(struct bnad *bnad, uint tx_id) +{ + struct bnad_tx_info *tx_info = &bnad->tx_info[tx_id]; + struct bna_res_info *res_info = &bnad->tx_res_info[tx_id].res_info[0]; + unsigned long flags; + + if (!tx_info->tx) + return; + + init_completion(&bnad->bnad_completions.tx_comp); + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_tx_disable(tx_info->tx, BNA_HARD_CLEANUP, bnad_cb_tx_disabled); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + wait_for_completion(&bnad->bnad_completions.tx_comp); + + if (tx_info->tcb[0]->intr_type == BNA_INTR_T_MSIX) + bnad_tx_msix_unregister(bnad, tx_info, + bnad->num_txq_per_tx); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_tx_destroy(tx_info->tx); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + tx_info->tx = NULL; + + if (0 == tx_id) + tasklet_kill(&bnad->tx_free_tasklet); + + bnad_tx_res_free(bnad, res_info); +} + +/* Should be held with conf_lock held */ +int +bnad_setup_tx(struct bnad *bnad, uint tx_id) +{ + int err; + struct bnad_tx_info *tx_info = &bnad->tx_info[tx_id]; + struct bna_res_info *res_info = &bnad->tx_res_info[tx_id].res_info[0]; + struct bna_intr_info *intr_info = + &res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info; + struct bna_tx_config *tx_config = &bnad->tx_config[tx_id]; + struct bna_tx_event_cbfn tx_cbfn; + struct bna_tx *tx; + unsigned long flags; + + /* Initialize the Tx object configuration */ + tx_config->num_txq = bnad->num_txq_per_tx; + tx_config->txq_depth = bnad->txq_depth; + tx_config->tx_type = BNA_TX_T_REGULAR; + + /* Initialize the tx event handlers */ + tx_cbfn.tcb_setup_cbfn = bnad_cb_tcb_setup; + tx_cbfn.tcb_destroy_cbfn = bnad_cb_tcb_destroy; + tx_cbfn.tx_stall_cbfn = bnad_cb_tx_stall; + tx_cbfn.tx_resume_cbfn = bnad_cb_tx_resume; + tx_cbfn.tx_cleanup_cbfn = bnad_cb_tx_cleanup; + + /* Get BNA's resource requirement for one tx object */ + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_tx_res_req(bnad->num_txq_per_tx, + bnad->txq_depth, res_info); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* Fill Unmap Q memory requirements */ + BNAD_FILL_UNMAPQ_MEM_REQ( + &res_info[BNA_TX_RES_MEM_T_UNMAPQ], + bnad->num_txq_per_tx, + BNAD_TX_UNMAPQ_DEPTH); + + /* Allocate resources */ + err = bnad_tx_res_alloc(bnad, res_info, tx_id); + if (err) + return err; + + /* Ask BNA to create one Tx object, supplying required resources */ + spin_lock_irqsave(&bnad->bna_lock, flags); + tx = bna_tx_create(&bnad->bna, bnad, tx_config, &tx_cbfn, res_info, + tx_info); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + if (!tx) + goto err_return; + tx_info->tx = tx; + + /* Register ISR for the Tx object */ + if (intr_info->intr_type == BNA_INTR_T_MSIX) { + err = bnad_tx_msix_register(bnad, tx_info, + tx_id, bnad->num_txq_per_tx); + if (err) + goto err_return; + } + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_tx_enable(tx); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + return 0; + +err_return: + bnad_tx_res_free(bnad, res_info); + return err; +} + +/* Setup the rx config for bna_rx_create */ +/* bnad decides the configuration */ +static void +bnad_init_rx_config(struct bnad *bnad, struct bna_rx_config *rx_config) +{ + rx_config->rx_type = BNA_RX_T_REGULAR; + rx_config->num_paths = bnad->num_rxp_per_rx; + + if (bnad->num_rxp_per_rx > 1) { + rx_config->rss_status = BNA_STATUS_T_ENABLED; + rx_config->rss_config.hash_type = + (BFI_RSS_T_V4_TCP | + BFI_RSS_T_V6_TCP | + BFI_RSS_T_V4_IP | + BFI_RSS_T_V6_IP); + rx_config->rss_config.hash_mask = + bnad->num_rxp_per_rx - 1; + get_random_bytes(rx_config->rss_config.toeplitz_hash_key, + sizeof(rx_config->rss_config.toeplitz_hash_key)); + } else { + rx_config->rss_status = BNA_STATUS_T_DISABLED; + memset(&rx_config->rss_config, 0, + sizeof(rx_config->rss_config)); + } + rx_config->rxp_type = BNA_RXP_SLR; + rx_config->q_depth = bnad->rxq_depth; + + rx_config->small_buff_size = BFI_SMALL_RXBUF_SIZE; + + rx_config->vlan_strip_status = BNA_STATUS_T_ENABLED; +} + +/* Called with mutex_lock(&bnad->conf_mutex) held */ +void +bnad_cleanup_rx(struct bnad *bnad, uint rx_id) +{ + struct bnad_rx_info *rx_info = &bnad->rx_info[rx_id]; + struct bna_rx_config *rx_config = &bnad->rx_config[rx_id]; + struct bna_res_info *res_info = &bnad->rx_res_info[rx_id].res_info[0]; + unsigned long flags; + int dim_timer_del = 0; + + if (!rx_info->rx) + return; + + if (0 == rx_id) { + spin_lock_irqsave(&bnad->bna_lock, flags); + dim_timer_del = bnad_dim_timer_running(bnad); + if (dim_timer_del) + clear_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + if (dim_timer_del) + del_timer_sync(&bnad->dim_timer); + } + + bnad_napi_disable(bnad, rx_id); + + init_completion(&bnad->bnad_completions.rx_comp); + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_rx_disable(rx_info->rx, BNA_HARD_CLEANUP, bnad_cb_rx_disabled); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + wait_for_completion(&bnad->bnad_completions.rx_comp); + + if (rx_info->rx_ctrl[0].ccb->intr_type == BNA_INTR_T_MSIX) + bnad_rx_msix_unregister(bnad, rx_info, rx_config->num_paths); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_rx_destroy(rx_info->rx); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + rx_info->rx = NULL; + + bnad_rx_res_free(bnad, res_info); +} + +/* Called with mutex_lock(&bnad->conf_mutex) held */ +int +bnad_setup_rx(struct bnad *bnad, uint rx_id) +{ + int err; + struct bnad_rx_info *rx_info = &bnad->rx_info[rx_id]; + struct bna_res_info *res_info = &bnad->rx_res_info[rx_id].res_info[0]; + struct bna_intr_info *intr_info = + &res_info[BNA_RX_RES_T_INTR].res_u.intr_info; + struct bna_rx_config *rx_config = &bnad->rx_config[rx_id]; + struct bna_rx_event_cbfn rx_cbfn; + struct bna_rx *rx; + unsigned long flags; + + /* Initialize the Rx object configuration */ + bnad_init_rx_config(bnad, rx_config); + + /* Initialize the Rx event handlers */ + rx_cbfn.rcb_setup_cbfn = bnad_cb_rcb_setup; + rx_cbfn.rcb_destroy_cbfn = NULL; + rx_cbfn.ccb_setup_cbfn = bnad_cb_ccb_setup; + rx_cbfn.ccb_destroy_cbfn = bnad_cb_ccb_destroy; + rx_cbfn.rx_cleanup_cbfn = bnad_cb_rx_cleanup; + rx_cbfn.rx_post_cbfn = bnad_cb_rx_post; + + /* Get BNA's resource requirement for one Rx object */ + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_rx_res_req(rx_config, res_info); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* Fill Unmap Q memory requirements */ + BNAD_FILL_UNMAPQ_MEM_REQ( + &res_info[BNA_RX_RES_MEM_T_UNMAPQ], + rx_config->num_paths + + ((rx_config->rxp_type == BNA_RXP_SINGLE) ? 0 : + rx_config->num_paths), BNAD_RX_UNMAPQ_DEPTH); + + /* Allocate resource */ + err = bnad_rx_res_alloc(bnad, res_info, rx_id); + if (err) + return err; + + /* Ask BNA to create one Rx object, supplying required resources */ + spin_lock_irqsave(&bnad->bna_lock, flags); + rx = bna_rx_create(&bnad->bna, bnad, rx_config, &rx_cbfn, res_info, + rx_info); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + if (!rx) + goto err_return; + rx_info->rx = rx; + + /* Register ISR for the Rx object */ + if (intr_info->intr_type == BNA_INTR_T_MSIX) { + err = bnad_rx_msix_register(bnad, rx_info, rx_id, + rx_config->num_paths); + if (err) + goto err_return; + } + + /* Enable NAPI */ + bnad_napi_enable(bnad, rx_id); + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (0 == rx_id) { + /* Set up Dynamic Interrupt Moderation Vector */ + if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED) + bna_rx_dim_reconfig(&bnad->bna, bna_napi_dim_vector); + + /* Enable VLAN filtering only on the default Rx */ + bna_rx_vlanfilter_enable(rx); + + /* Start the DIM timer */ + bnad_dim_timer_start(bnad); + } + + bna_rx_enable(rx); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + return 0; + +err_return: + bnad_cleanup_rx(bnad, rx_id); + return err; +} + +/* Called with conf_lock & bnad->bna_lock held */ +void +bnad_tx_coalescing_timeo_set(struct bnad *bnad) +{ + struct bnad_tx_info *tx_info; + + tx_info = &bnad->tx_info[0]; + if (!tx_info->tx) + return; + + bna_tx_coalescing_timeo_set(tx_info->tx, bnad->tx_coalescing_timeo); +} + +/* Called with conf_lock & bnad->bna_lock held */ +void +bnad_rx_coalescing_timeo_set(struct bnad *bnad) +{ + struct bnad_rx_info *rx_info; + int i; + + for (i = 0; i < bnad->num_rx; i++) { + rx_info = &bnad->rx_info[i]; + if (!rx_info->rx) + continue; + bna_rx_coalescing_timeo_set(rx_info->rx, + bnad->rx_coalescing_timeo); + } +} + +/* + * Called with bnad->bna_lock held + */ +static int +bnad_mac_addr_set_locked(struct bnad *bnad, u8 *mac_addr) +{ + int ret; + + if (!is_valid_ether_addr(mac_addr)) + return -EADDRNOTAVAIL; + + /* If datapath is down, pretend everything went through */ + if (!bnad->rx_info[0].rx) + return 0; + + ret = bna_rx_ucast_set(bnad->rx_info[0].rx, mac_addr, NULL); + if (ret != BNA_CB_SUCCESS) + return -EADDRNOTAVAIL; + + return 0; +} + +/* Should be called with conf_lock held */ +static int +bnad_enable_default_bcast(struct bnad *bnad) +{ + struct bnad_rx_info *rx_info = &bnad->rx_info[0]; + int ret; + unsigned long flags; + + init_completion(&bnad->bnad_completions.mcast_comp); + + spin_lock_irqsave(&bnad->bna_lock, flags); + ret = bna_rx_mcast_add(rx_info->rx, (u8 *)bnad_bcast_addr, + bnad_cb_rx_mcast_add); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + if (ret == BNA_CB_SUCCESS) + wait_for_completion(&bnad->bnad_completions.mcast_comp); + else + return -ENODEV; + + if (bnad->bnad_completions.mcast_comp_status != BNA_CB_SUCCESS) + return -ENODEV; + + return 0; +} + +/* Statistics utilities */ +void +bnad_netdev_qstats_fill(struct bnad *bnad) +{ + struct net_device_stats *net_stats = &bnad->net_stats; + int i, j; + + for (i = 0; i < bnad->num_rx; i++) { + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + if (bnad->rx_info[i].rx_ctrl[j].ccb) { + net_stats->rx_packets += bnad->rx_info[i]. + rx_ctrl[j].ccb->rcb[0]->rxq->rx_packets; + net_stats->rx_bytes += bnad->rx_info[i]. + rx_ctrl[j].ccb->rcb[0]->rxq->rx_bytes; + if (bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1] && + bnad->rx_info[i].rx_ctrl[j].ccb-> + rcb[1]->rxq) { + net_stats->rx_packets += + bnad->rx_info[i].rx_ctrl[j]. + ccb->rcb[1]->rxq->rx_packets; + net_stats->rx_bytes += + bnad->rx_info[i].rx_ctrl[j]. + ccb->rcb[1]->rxq->rx_bytes; + } + } + } + } + for (i = 0; i < bnad->num_tx; i++) { + for (j = 0; j < bnad->num_txq_per_tx; j++) { + if (bnad->tx_info[i].tcb[j]) { + net_stats->tx_packets += + bnad->tx_info[i].tcb[j]->txq->tx_packets; + net_stats->tx_bytes += + bnad->tx_info[i].tcb[j]->txq->tx_bytes; + } + } + } +} + +/* + * Must be called with the bna_lock held. + */ +void +bnad_netdev_hwstats_fill(struct bnad *bnad) +{ + struct bfi_ll_stats_mac *mac_stats; + struct net_device_stats *net_stats = &bnad->net_stats; + u64 bmap; + int i; + + mac_stats = &bnad->stats.bna_stats->hw_stats->mac_stats; + net_stats->rx_errors = + mac_stats->rx_fcs_error + mac_stats->rx_alignment_error + + mac_stats->rx_frame_length_error + mac_stats->rx_code_error + + mac_stats->rx_undersize; + net_stats->tx_errors = mac_stats->tx_fcs_error + + mac_stats->tx_undersize; + net_stats->rx_dropped = mac_stats->rx_drop; + net_stats->tx_dropped = mac_stats->tx_drop; + net_stats->multicast = mac_stats->rx_multicast; + net_stats->collisions = mac_stats->tx_total_collision; + + net_stats->rx_length_errors = mac_stats->rx_frame_length_error; + + /* receive ring buffer overflow ?? */ + + net_stats->rx_crc_errors = mac_stats->rx_fcs_error; + net_stats->rx_frame_errors = mac_stats->rx_alignment_error; + /* recv'r fifo overrun */ + bmap = (u64)bnad->stats.bna_stats->rxf_bmap[0] | + ((u64)bnad->stats.bna_stats->rxf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_RXF_ID_MAX); i++) { + if (bmap & 1) { + net_stats->rx_fifo_errors = + bnad->stats.bna_stats-> + hw_stats->rxf_stats[i].frame_drops; + break; + } + bmap >>= 1; + } +} + +static void +bnad_mbox_irq_sync(struct bnad *bnad) +{ + u32 irq; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (bnad->cfg_flags & BNAD_CF_MSIX) + irq = bnad->msix_table[bnad->msix_num - 1].vector; + else + irq = bnad->pcidev->irq; + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + synchronize_irq(irq); +} + +/* Utility used by bnad_start_xmit, for doing TSO */ +static int +bnad_tso_prepare(struct bnad *bnad, struct sk_buff *skb) +{ + int err; + + /* SKB_GSO_TCPV4 and SKB_GSO_TCPV6 is defined since 2.6.18. */ + BUG_ON(!(skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4 || + skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6)); + if (skb_header_cloned(skb)) { + err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (err) { + BNAD_UPDATE_CTR(bnad, tso_err); + return err; + } + } + + /* + * For TSO, the TCP checksum field is seeded with pseudo-header sum + * excluding the length field. + */ + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = ip_hdr(skb); + + /* Do we really need these? */ + iph->tot_len = 0; + iph->check = 0; + + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0, + IPPROTO_TCP, 0); + BNAD_UPDATE_CTR(bnad, tso4); + } else { + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + + BUG_ON(!(skb->protocol == htons(ETH_P_IPV6))); + ipv6h->payload_len = 0; + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 0, + IPPROTO_TCP, 0); + BNAD_UPDATE_CTR(bnad, tso6); + } + + return 0; +} + +/* + * Initialize Q numbers depending on Rx Paths + * Called with bnad->bna_lock held, because of cfg_flags + * access. + */ +static void +bnad_q_num_init(struct bnad *bnad) +{ + int rxps; + + rxps = min((uint)num_online_cpus(), + (uint)(BNAD_MAX_RXS * BNAD_MAX_RXPS_PER_RX)); + + if (!(bnad->cfg_flags & BNAD_CF_MSIX)) + rxps = 1; /* INTx */ + + bnad->num_rx = 1; + bnad->num_tx = 1; + bnad->num_rxp_per_rx = rxps; + bnad->num_txq_per_tx = BNAD_TXQ_NUM; +} + +/* + * Adjusts the Q numbers, given a number of msix vectors + * Give preference to RSS as opposed to Tx priority Queues, + * in such a case, just use 1 Tx Q + * Called with bnad->bna_lock held b'cos of cfg_flags access + */ +static void +bnad_q_num_adjust(struct bnad *bnad, int msix_vectors) +{ + bnad->num_txq_per_tx = 1; + if ((msix_vectors >= (bnad->num_tx * bnad->num_txq_per_tx) + + bnad_rxqs_per_cq + BNAD_MAILBOX_MSIX_VECTORS) && + (bnad->cfg_flags & BNAD_CF_MSIX)) { + bnad->num_rxp_per_rx = msix_vectors - + (bnad->num_tx * bnad->num_txq_per_tx) - + BNAD_MAILBOX_MSIX_VECTORS; + } else + bnad->num_rxp_per_rx = 1; +} + +static void +bnad_set_netdev_perm_addr(struct bnad *bnad) +{ + struct net_device *netdev = bnad->netdev; + + memcpy(netdev->perm_addr, &bnad->perm_addr, netdev->addr_len); + if (is_zero_ether_addr(netdev->dev_addr)) + memcpy(netdev->dev_addr, &bnad->perm_addr, netdev->addr_len); +} + +/* Enable / disable device */ +static void +bnad_device_disable(struct bnad *bnad) +{ + unsigned long flags; + + init_completion(&bnad->bnad_completions.ioc_comp); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_device_disable(&bnad->bna.device, BNA_HARD_CLEANUP); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + wait_for_completion(&bnad->bnad_completions.ioc_comp); + +} + +static int +bnad_device_enable(struct bnad *bnad) +{ + int err = 0; + unsigned long flags; + + init_completion(&bnad->bnad_completions.ioc_comp); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_device_enable(&bnad->bna.device); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + wait_for_completion(&bnad->bnad_completions.ioc_comp); + + if (bnad->bnad_completions.ioc_comp_status) + err = bnad->bnad_completions.ioc_comp_status; + + return err; +} + +/* Free BNA resources */ +static void +bnad_res_free(struct bnad *bnad) +{ + int i; + struct bna_res_info *res_info = &bnad->res_info[0]; + + for (i = 0; i < BNA_RES_T_MAX; i++) { + if (res_info[i].res_type == BNA_RES_T_MEM) + bnad_mem_free(bnad, &res_info[i].res_u.mem_info); + else + bnad_mbox_irq_free(bnad, &res_info[i].res_u.intr_info); + } +} + +/* Allocates memory and interrupt resources for BNA */ +static int +bnad_res_alloc(struct bnad *bnad) +{ + int i, err; + struct bna_res_info *res_info = &bnad->res_info[0]; + + for (i = 0; i < BNA_RES_T_MAX; i++) { + if (res_info[i].res_type == BNA_RES_T_MEM) + err = bnad_mem_alloc(bnad, &res_info[i].res_u.mem_info); + else + err = bnad_mbox_irq_alloc(bnad, + &res_info[i].res_u.intr_info); + if (err) + goto err_return; + } + return 0; + +err_return: + bnad_res_free(bnad); + return err; +} + +/* Interrupt enable / disable */ +static void +bnad_enable_msix(struct bnad *bnad) +{ + int i, ret; + u32 tot_msix_num; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (!(bnad->cfg_flags & BNAD_CF_MSIX)) { + spin_unlock_irqrestore(&bnad->bna_lock, flags); + return; + } + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + if (bnad->msix_table) + return; + + tot_msix_num = bnad->msix_num + bnad->msix_diag_num; + + bnad->msix_table = + kcalloc(tot_msix_num, sizeof(struct msix_entry), GFP_KERNEL); + + if (!bnad->msix_table) + goto intx_mode; + + for (i = 0; i < tot_msix_num; i++) + bnad->msix_table[i].entry = i; + + ret = pci_enable_msix(bnad->pcidev, bnad->msix_table, tot_msix_num); + if (ret > 0) { + /* Not enough MSI-X vectors. */ + + spin_lock_irqsave(&bnad->bna_lock, flags); + /* ret = #of vectors that we got */ + bnad_q_num_adjust(bnad, ret); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + bnad->msix_num = (bnad->num_tx * bnad->num_txq_per_tx) + + (bnad->num_rx + * bnad->num_rxp_per_rx) + + BNAD_MAILBOX_MSIX_VECTORS; + tot_msix_num = bnad->msix_num + bnad->msix_diag_num; + + /* Try once more with adjusted numbers */ + /* If this fails, fall back to INTx */ + ret = pci_enable_msix(bnad->pcidev, bnad->msix_table, + tot_msix_num); + if (ret) + goto intx_mode; + + } else if (ret < 0) + goto intx_mode; + return; + +intx_mode: + + kfree(bnad->msix_table); + bnad->msix_table = NULL; + bnad->msix_num = 0; + bnad->msix_diag_num = 0; + spin_lock_irqsave(&bnad->bna_lock, flags); + bnad->cfg_flags &= ~BNAD_CF_MSIX; + bnad_q_num_init(bnad); + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +static void +bnad_disable_msix(struct bnad *bnad) +{ + u32 cfg_flags; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + cfg_flags = bnad->cfg_flags; + if (bnad->cfg_flags & BNAD_CF_MSIX) + bnad->cfg_flags &= ~BNAD_CF_MSIX; + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + if (cfg_flags & BNAD_CF_MSIX) { + pci_disable_msix(bnad->pcidev); + kfree(bnad->msix_table); + bnad->msix_table = NULL; + } +} + +/* Netdev entry points */ +static int +bnad_open(struct net_device *netdev) +{ + int err; + struct bnad *bnad = netdev_priv(netdev); + struct bna_pause_config pause_config; + int mtu; + unsigned long flags; + + mutex_lock(&bnad->conf_mutex); + + /* Tx */ + err = bnad_setup_tx(bnad, 0); + if (err) + goto err_return; + + /* Rx */ + err = bnad_setup_rx(bnad, 0); + if (err) + goto cleanup_tx; + + /* Port */ + pause_config.tx_pause = 0; + pause_config.rx_pause = 0; + + mtu = ETH_HLEN + bnad->netdev->mtu + ETH_FCS_LEN; + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_port_mtu_set(&bnad->bna.port, mtu, NULL); + bna_port_pause_config(&bnad->bna.port, &pause_config, NULL); + bna_port_enable(&bnad->bna.port); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* Enable broadcast */ + bnad_enable_default_bcast(bnad); + + /* Set the UCAST address */ + spin_lock_irqsave(&bnad->bna_lock, flags); + bnad_mac_addr_set_locked(bnad, netdev->dev_addr); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + /* Start the stats timer */ + bnad_stats_timer_start(bnad); + + mutex_unlock(&bnad->conf_mutex); + + return 0; + +cleanup_tx: + bnad_cleanup_tx(bnad, 0); + +err_return: + mutex_unlock(&bnad->conf_mutex); + return err; +} + +static int +bnad_stop(struct net_device *netdev) +{ + struct bnad *bnad = netdev_priv(netdev); + unsigned long flags; + + mutex_lock(&bnad->conf_mutex); + + /* Stop the stats timer */ + bnad_stats_timer_stop(bnad); + + init_completion(&bnad->bnad_completions.port_comp); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_port_disable(&bnad->bna.port, BNA_HARD_CLEANUP, + bnad_cb_port_disabled); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + wait_for_completion(&bnad->bnad_completions.port_comp); + + bnad_cleanup_tx(bnad, 0); + bnad_cleanup_rx(bnad, 0); + + /* Synchronize mailbox IRQ */ + bnad_mbox_irq_sync(bnad); + + mutex_unlock(&bnad->conf_mutex); + + return 0; +} + +/* TX */ +/* + * bnad_start_xmit : Netdev entry point for Transmit + * Called under lock held by net_device + */ +static netdev_tx_t +bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct bnad *bnad = netdev_priv(netdev); + + u16 txq_prod, vlan_tag = 0; + u32 unmap_prod, wis, wis_used, wi_range; + u32 vectors, vect_id, i, acked; + u32 tx_id; + int err; + + struct bnad_tx_info *tx_info; + struct bna_tcb *tcb; + struct bnad_unmap_q *unmap_q; + dma_addr_t dma_addr; + struct bna_txq_entry *txqent; + bna_txq_wi_ctrl_flag_t flags; + + if (unlikely + (skb->len <= ETH_HLEN || skb->len > BFI_TX_MAX_DATA_PER_PKT)) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + /* + * Takes care of the Tx that is scheduled between clearing the flag + * and the netif_stop_queue() call. + */ + if (unlikely(!test_bit(BNAD_RF_TX_STARTED, &bnad->run_flags))) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + tx_id = 0; + + tx_info = &bnad->tx_info[tx_id]; + tcb = tx_info->tcb[tx_id]; + unmap_q = tcb->unmap_q; + + vectors = 1 + skb_shinfo(skb)->nr_frags; + if (vectors > BFI_TX_MAX_VECTORS_PER_PKT) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + wis = BNA_TXQ_WI_NEEDED(vectors); /* 4 vectors per work item */ + acked = 0; + if (unlikely + (wis > BNA_QE_FREE_CNT(tcb, tcb->q_depth) || + vectors > BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth))) { + if ((u16) (*tcb->hw_consumer_index) != + tcb->consumer_index && + !test_and_set_bit(BNAD_TXQ_FREE_SENT, &tcb->flags)) { + acked = bnad_free_txbufs(bnad, tcb); + bna_ib_ack(tcb->i_dbell, acked); + smp_mb__before_clear_bit(); + clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags); + } else { + netif_stop_queue(netdev); + BNAD_UPDATE_CTR(bnad, netif_queue_stop); + } + + smp_mb(); + /* + * Check again to deal with race condition between + * netif_stop_queue here, and netif_wake_queue in + * interrupt handler which is not inside netif tx lock. + */ + if (likely + (wis > BNA_QE_FREE_CNT(tcb, tcb->q_depth) || + vectors > BNA_QE_FREE_CNT(unmap_q, unmap_q->q_depth))) { + BNAD_UPDATE_CTR(bnad, netif_queue_stop); + return NETDEV_TX_BUSY; + } else { + netif_wake_queue(netdev); + BNAD_UPDATE_CTR(bnad, netif_queue_wakeup); + } + } + + unmap_prod = unmap_q->producer_index; + wis_used = 1; + vect_id = 0; + flags = 0; + + txq_prod = tcb->producer_index; + BNA_TXQ_QPGE_PTR_GET(txq_prod, tcb->sw_qpt, txqent, wi_range); + BUG_ON(!(wi_range <= tcb->q_depth)); + txqent->hdr.wi.reserved = 0; + txqent->hdr.wi.num_vectors = vectors; + txqent->hdr.wi.opcode = + htons((skb_is_gso(skb) ? BNA_TXQ_WI_SEND_LSO : + BNA_TXQ_WI_SEND)); + + if (bnad->vlan_grp && vlan_tx_tag_present(skb)) { + vlan_tag = (u16) vlan_tx_tag_get(skb); + flags |= (BNA_TXQ_WI_CF_INS_PRIO | BNA_TXQ_WI_CF_INS_VLAN); + } + if (test_bit(BNAD_RF_CEE_RUNNING, &bnad->run_flags)) { + vlan_tag = + (tcb->priority & 0x7) << 13 | (vlan_tag & 0x1fff); + flags |= (BNA_TXQ_WI_CF_INS_PRIO | BNA_TXQ_WI_CF_INS_VLAN); + } + + txqent->hdr.wi.vlan_tag = htons(vlan_tag); + + if (skb_is_gso(skb)) { + err = bnad_tso_prepare(bnad, skb); + if (err) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + txqent->hdr.wi.lso_mss = htons(skb_is_gso(skb)); + flags |= (BNA_TXQ_WI_CF_IP_CKSUM | BNA_TXQ_WI_CF_TCP_CKSUM); + txqent->hdr.wi.l4_hdr_size_n_offset = + htons(BNA_TXQ_WI_L4_HDR_N_OFFSET + (tcp_hdrlen(skb) >> 2, + skb_transport_offset(skb))); + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + u8 proto = 0; + + txqent->hdr.wi.lso_mss = 0; + + if (skb->protocol == htons(ETH_P_IP)) + proto = ip_hdr(skb)->protocol; + else if (skb->protocol == htons(ETH_P_IPV6)) { + /* nexthdr may not be TCP immediately. */ + proto = ipv6_hdr(skb)->nexthdr; + } + if (proto == IPPROTO_TCP) { + flags |= BNA_TXQ_WI_CF_TCP_CKSUM; + txqent->hdr.wi.l4_hdr_size_n_offset = + htons(BNA_TXQ_WI_L4_HDR_N_OFFSET + (0, skb_transport_offset(skb))); + + BNAD_UPDATE_CTR(bnad, tcpcsum_offload); + + BUG_ON(!(skb_headlen(skb) >= + skb_transport_offset(skb) + tcp_hdrlen(skb))); + + } else if (proto == IPPROTO_UDP) { + flags |= BNA_TXQ_WI_CF_UDP_CKSUM; + txqent->hdr.wi.l4_hdr_size_n_offset = + htons(BNA_TXQ_WI_L4_HDR_N_OFFSET + (0, skb_transport_offset(skb))); + + BNAD_UPDATE_CTR(bnad, udpcsum_offload); + + BUG_ON(!(skb_headlen(skb) >= + skb_transport_offset(skb) + + sizeof(struct udphdr))); + } else { + err = skb_checksum_help(skb); + BNAD_UPDATE_CTR(bnad, csum_help); + if (err) { + dev_kfree_skb(skb); + BNAD_UPDATE_CTR(bnad, csum_help_err); + return NETDEV_TX_OK; + } + } + } else { + txqent->hdr.wi.lso_mss = 0; + txqent->hdr.wi.l4_hdr_size_n_offset = 0; + } + + txqent->hdr.wi.flags = htons(flags); + + txqent->hdr.wi.frame_length = htonl(skb->len); + + unmap_q->unmap_array[unmap_prod].skb = skb; + BUG_ON(!(skb_headlen(skb) <= BFI_TX_MAX_DATA_PER_VECTOR)); + txqent->vector[vect_id].length = htons(skb_headlen(skb)); + dma_addr = pci_map_single(bnad->pcidev, skb->data, skb_headlen(skb), + PCI_DMA_TODEVICE); + pci_unmap_addr_set(&unmap_q->unmap_array[unmap_prod], dma_addr, + dma_addr); + + BNA_SET_DMA_ADDR(dma_addr, &txqent->vector[vect_id].host_addr); + BNA_QE_INDX_ADD(unmap_prod, 1, unmap_q->q_depth); + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + u32 size = frag->size; + + if (++vect_id == BFI_TX_MAX_VECTORS_PER_WI) { + vect_id = 0; + if (--wi_range) + txqent++; + else { + BNA_QE_INDX_ADD(txq_prod, wis_used, + tcb->q_depth); + wis_used = 0; + BNA_TXQ_QPGE_PTR_GET(txq_prod, tcb->sw_qpt, + txqent, wi_range); + BUG_ON(!(wi_range <= tcb->q_depth)); + } + wis_used++; + txqent->hdr.wi_ext.opcode = htons(BNA_TXQ_WI_EXTENSION); + } + + BUG_ON(!(size <= BFI_TX_MAX_DATA_PER_VECTOR)); + txqent->vector[vect_id].length = htons(size); + dma_addr = + pci_map_page(bnad->pcidev, frag->page, + frag->page_offset, size, + PCI_DMA_TODEVICE); + pci_unmap_addr_set(&unmap_q->unmap_array[unmap_prod], dma_addr, + dma_addr); + BNA_SET_DMA_ADDR(dma_addr, &txqent->vector[vect_id].host_addr); + BNA_QE_INDX_ADD(unmap_prod, 1, unmap_q->q_depth); + } + + unmap_q->producer_index = unmap_prod; + BNA_QE_INDX_ADD(txq_prod, wis_used, tcb->q_depth); + tcb->producer_index = txq_prod; + + smp_mb(); + bna_txq_prod_indx_doorbell(tcb); + + if ((u16) (*tcb->hw_consumer_index) != tcb->consumer_index) + tasklet_schedule(&bnad->tx_free_tasklet); + + return NETDEV_TX_OK; +} + +/* + * Used spin_lock to synchronize reading of stats structures, which + * is written by BNA under the same lock. + */ +static struct net_device_stats * +bnad_get_netdev_stats(struct net_device *netdev) +{ + struct bnad *bnad = netdev_priv(netdev); + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + + memset(&bnad->net_stats, 0, sizeof(struct net_device_stats)); + + bnad_netdev_qstats_fill(bnad); + bnad_netdev_hwstats_fill(bnad); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + return &bnad->net_stats; +} + +static void +bnad_set_rx_mode(struct net_device *netdev) +{ + struct bnad *bnad = netdev_priv(netdev); + u32 new_mask, valid_mask; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + + new_mask = valid_mask = 0; + + if (netdev->flags & IFF_PROMISC) { + if (!(bnad->cfg_flags & BNAD_CF_PROMISC)) { + new_mask = BNAD_RXMODE_PROMISC_DEFAULT; + valid_mask = BNAD_RXMODE_PROMISC_DEFAULT; + bnad->cfg_flags |= BNAD_CF_PROMISC; + } + } else { + if (bnad->cfg_flags & BNAD_CF_PROMISC) { + new_mask = ~BNAD_RXMODE_PROMISC_DEFAULT; + valid_mask = BNAD_RXMODE_PROMISC_DEFAULT; + bnad->cfg_flags &= ~BNAD_CF_PROMISC; + } + } + + if (netdev->flags & IFF_ALLMULTI) { + if (!(bnad->cfg_flags & BNAD_CF_ALLMULTI)) { + new_mask |= BNA_RXMODE_ALLMULTI; + valid_mask |= BNA_RXMODE_ALLMULTI; + bnad->cfg_flags |= BNAD_CF_ALLMULTI; + } + } else { + if (bnad->cfg_flags & BNAD_CF_ALLMULTI) { + new_mask &= ~BNA_RXMODE_ALLMULTI; + valid_mask |= BNA_RXMODE_ALLMULTI; + bnad->cfg_flags &= ~BNAD_CF_ALLMULTI; + } + } + + bna_rx_mode_set(bnad->rx_info[0].rx, new_mask, valid_mask, NULL); + + if (!netdev_mc_empty(netdev)) { + u8 *mcaddr_list; + int mc_count = netdev_mc_count(netdev); + + /* Index 0 holds the broadcast address */ + mcaddr_list = + kzalloc((mc_count + 1) * ETH_ALEN, + GFP_ATOMIC); + if (!mcaddr_list) + return; + + memcpy(&mcaddr_list[0], &bnad_bcast_addr[0], ETH_ALEN); + + /* Copy rest of the MC addresses */ + bnad_netdev_mc_list_get(netdev, mcaddr_list); + + bna_rx_mcast_listset(bnad->rx_info[0].rx, mc_count + 1, + mcaddr_list, NULL); + + /* Should we enable BNAD_CF_ALLMULTI for err != 0 ? */ + kfree(mcaddr_list); + } + spin_unlock_irqrestore(&bnad->bna_lock, flags); +} + +/* + * bna_lock is used to sync writes to netdev->addr + * conf_lock cannot be used since this call may be made + * in a non-blocking context. + */ +static int +bnad_set_mac_address(struct net_device *netdev, void *mac_addr) +{ + int err; + struct bnad *bnad = netdev_priv(netdev); + struct sockaddr *sa = (struct sockaddr *)mac_addr; + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + + err = bnad_mac_addr_set_locked(bnad, sa->sa_data); + + if (!err) + memcpy(netdev->dev_addr, sa->sa_data, netdev->addr_len); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + return err; +} + +static int +bnad_change_mtu(struct net_device *netdev, int new_mtu) +{ + int mtu, err = 0; + unsigned long flags; + + struct bnad *bnad = netdev_priv(netdev); + + if (new_mtu + ETH_HLEN < ETH_ZLEN || new_mtu > BNAD_JUMBO_MTU) + return -EINVAL; + + mutex_lock(&bnad->conf_mutex); + + netdev->mtu = new_mtu; + + mtu = ETH_HLEN + new_mtu + ETH_FCS_LEN; + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_port_mtu_set(&bnad->bna.port, mtu, NULL); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + mutex_unlock(&bnad->conf_mutex); + return err; +} + +static void +bnad_vlan_rx_register(struct net_device *netdev, + struct vlan_group *vlan_grp) +{ + struct bnad *bnad = netdev_priv(netdev); + + mutex_lock(&bnad->conf_mutex); + bnad->vlan_grp = vlan_grp; + mutex_unlock(&bnad->conf_mutex); +} + +static void +bnad_vlan_rx_add_vid(struct net_device *netdev, + unsigned short vid) +{ + struct bnad *bnad = netdev_priv(netdev); + unsigned long flags; + + if (!bnad->rx_info[0].rx) + return; + + mutex_lock(&bnad->conf_mutex); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_rx_vlan_add(bnad->rx_info[0].rx, vid); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + mutex_unlock(&bnad->conf_mutex); +} + +static void +bnad_vlan_rx_kill_vid(struct net_device *netdev, + unsigned short vid) +{ + struct bnad *bnad = netdev_priv(netdev); + unsigned long flags; + + if (!bnad->rx_info[0].rx) + return; + + mutex_lock(&bnad->conf_mutex); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_rx_vlan_del(bnad->rx_info[0].rx, vid); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + mutex_unlock(&bnad->conf_mutex); +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void +bnad_netpoll(struct net_device *netdev) +{ + struct bnad *bnad = netdev_priv(netdev); + struct bnad_rx_info *rx_info; + struct bnad_rx_ctrl *rx_ctrl; + u32 curr_mask; + int i, j; + + if (!(bnad->cfg_flags & BNAD_CF_MSIX)) { + bna_intx_disable(&bnad->bna, curr_mask); + bnad_isr(bnad->pcidev->irq, netdev); + bna_intx_enable(&bnad->bna, curr_mask); + } else { + for (i = 0; i < bnad->num_rx; i++) { + rx_info = &bnad->rx_info[i]; + if (!rx_info->rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + rx_ctrl = &rx_info->rx_ctrl[j]; + if (rx_ctrl->ccb) { + bnad_disable_rx_irq(bnad, + rx_ctrl->ccb); + bnad_netif_rx_schedule_poll(bnad, + rx_ctrl->ccb); + } + } + } + } +} +#endif + +static const struct net_device_ops bnad_netdev_ops = { + .ndo_open = bnad_open, + .ndo_stop = bnad_stop, + .ndo_start_xmit = bnad_start_xmit, + .ndo_get_stats = bnad_get_netdev_stats, + .ndo_set_rx_mode = bnad_set_rx_mode, + .ndo_set_multicast_list = bnad_set_rx_mode, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = bnad_set_mac_address, + .ndo_change_mtu = bnad_change_mtu, + .ndo_vlan_rx_register = bnad_vlan_rx_register, + .ndo_vlan_rx_add_vid = bnad_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = bnad_vlan_rx_kill_vid, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = bnad_netpoll +#endif +}; + +static void +bnad_netdev_init(struct bnad *bnad, bool using_dac) +{ + struct net_device *netdev = bnad->netdev; + + netdev->features |= NETIF_F_IPV6_CSUM; + netdev->features |= NETIF_F_TSO; + netdev->features |= NETIF_F_TSO6; + + netdev->features |= NETIF_F_GRO; + pr_warn("bna: GRO enabled, using kernel stack GRO\n"); + + netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; + + if (using_dac) + netdev->features |= NETIF_F_HIGHDMA; + + netdev->features |= + NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | + NETIF_F_HW_VLAN_FILTER; + + netdev->vlan_features = netdev->features; + netdev->mem_start = bnad->mmio_start; + netdev->mem_end = bnad->mmio_start + bnad->mmio_len - 1; + + netdev->netdev_ops = &bnad_netdev_ops; + bnad_set_ethtool_ops(netdev); +} + +/* + * 1. Initialize the bnad structure + * 2. Setup netdev pointer in pci_dev + * 3. Initialze Tx free tasklet + * 4. Initialize no. of TxQ & CQs & MSIX vectors + */ +static int +bnad_init(struct bnad *bnad, + struct pci_dev *pdev, struct net_device *netdev) +{ + unsigned long flags; + + SET_NETDEV_DEV(netdev, &pdev->dev); + pci_set_drvdata(pdev, netdev); + + bnad->netdev = netdev; + bnad->pcidev = pdev; + bnad->mmio_start = pci_resource_start(pdev, 0); + bnad->mmio_len = pci_resource_len(pdev, 0); + bnad->bar0 = ioremap_nocache(bnad->mmio_start, bnad->mmio_len); + if (!bnad->bar0) { + dev_err(&pdev->dev, "ioremap for bar0 failed\n"); + pci_set_drvdata(pdev, NULL); + return -ENOMEM; + } + pr_info("bar0 mapped to %p, len %llu\n", bnad->bar0, + (unsigned long long) bnad->mmio_len); + + spin_lock_irqsave(&bnad->bna_lock, flags); + if (!bnad_msix_disable) + bnad->cfg_flags = BNAD_CF_MSIX; + + bnad->cfg_flags |= BNAD_CF_DIM_ENABLED; + + bnad_q_num_init(bnad); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + bnad->msix_num = (bnad->num_tx * bnad->num_txq_per_tx) + + (bnad->num_rx * bnad->num_rxp_per_rx) + + BNAD_MAILBOX_MSIX_VECTORS; + bnad->msix_diag_num = 2; /* 1 for Tx, 1 for Rx */ + + bnad->txq_depth = BNAD_TXQ_DEPTH; + bnad->rxq_depth = BNAD_RXQ_DEPTH; + bnad->rx_csum = true; + + bnad->tx_coalescing_timeo = BFI_TX_COALESCING_TIMEO; + bnad->rx_coalescing_timeo = BFI_RX_COALESCING_TIMEO; + + tasklet_init(&bnad->tx_free_tasklet, bnad_tx_free_tasklet, + (unsigned long)bnad); + + return 0; +} + +/* + * Must be called after bnad_pci_uninit() + * so that iounmap() and pci_set_drvdata(NULL) + * happens only after PCI uninitialization. + */ +static void +bnad_uninit(struct bnad *bnad) +{ + if (bnad->bar0) + iounmap(bnad->bar0); + pci_set_drvdata(bnad->pcidev, NULL); +} + +/* + * Initialize locks + a) Per device mutes used for serializing configuration + changes from OS interface + b) spin lock used to protect bna state machine + */ +static void +bnad_lock_init(struct bnad *bnad) +{ + spin_lock_init(&bnad->bna_lock); + mutex_init(&bnad->conf_mutex); +} + +static void +bnad_lock_uninit(struct bnad *bnad) +{ + mutex_destroy(&bnad->conf_mutex); +} + +/* PCI Initialization */ +static int +bnad_pci_init(struct bnad *bnad, + struct pci_dev *pdev, bool *using_dac) +{ + int err; + + err = pci_enable_device(pdev); + if (err) + return err; + err = pci_request_regions(pdev, BNAD_NAME); + if (err) + goto disable_device; + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && + !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { + *using_dac = 1; + } else { + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + err = pci_set_consistent_dma_mask(pdev, + DMA_BIT_MASK(32)); + if (err) + goto release_regions; + } + *using_dac = 0; + } + pci_set_master(pdev); + return 0; + +release_regions: + pci_release_regions(pdev); +disable_device: + pci_disable_device(pdev); + + return err; +} + +static void +bnad_pci_uninit(struct pci_dev *pdev) +{ + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static int __devinit +bnad_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *pcidev_id) +{ + bool using_dac; + int err; + struct bnad *bnad; + struct bna *bna; + struct net_device *netdev; + struct bfa_pcidev pcidev_info; + unsigned long flags; + + pr_info("bnad_pci_probe : (0x%p, 0x%p) PCI Func : (%d)\n", + pdev, pcidev_id, PCI_FUNC(pdev->devfn)); + + mutex_lock(&bnad_fwimg_mutex); + if (!cna_get_firmware_buf(pdev)) { + mutex_unlock(&bnad_fwimg_mutex); + pr_warn("Failed to load Firmware Image!\n"); + return -ENODEV; + } + mutex_unlock(&bnad_fwimg_mutex); + + /* + * Allocates sizeof(struct net_device + struct bnad) + * bnad = netdev->priv + */ + netdev = alloc_etherdev(sizeof(struct bnad)); + if (!netdev) { + dev_err(&pdev->dev, "alloc_etherdev failed\n"); + err = -ENOMEM; + return err; + } + bnad = netdev_priv(netdev); + + + /* + * PCI initialization + * Output : using_dac = 1 for 64 bit DMA + * = 0 for 32 bit DMA + */ + err = bnad_pci_init(bnad, pdev, &using_dac); + if (err) + goto free_netdev; + + bnad_lock_init(bnad); + /* + * Initialize bnad structure + * Setup relation between pci_dev & netdev + * Init Tx free tasklet + */ + err = bnad_init(bnad, pdev, netdev); + if (err) + goto pci_uninit; + /* Initialize netdev structure, set up ethtool ops */ + bnad_netdev_init(bnad, using_dac); + + bnad_enable_msix(bnad); + + /* Get resource requirement form bna */ + bna_res_req(&bnad->res_info[0]); + + /* Allocate resources from bna */ + err = bnad_res_alloc(bnad); + if (err) + goto free_netdev; + + bna = &bnad->bna; + + /* Setup pcidev_info for bna_init() */ + pcidev_info.pci_slot = PCI_SLOT(bnad->pcidev->devfn); + pcidev_info.pci_func = PCI_FUNC(bnad->pcidev->devfn); + pcidev_info.device_id = bnad->pcidev->device; + pcidev_info.pci_bar_kva = bnad->bar0; + + mutex_lock(&bnad->conf_mutex); + + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_init(bna, bnad, &pcidev_info, &bnad->res_info[0]); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + bnad->stats.bna_stats = &bna->stats; + + /* Set up timers */ + setup_timer(&bnad->bna.device.ioc.ioc_timer, bnad_ioc_timeout, + ((unsigned long)bnad)); + setup_timer(&bnad->bna.device.ioc.hb_timer, bnad_ioc_hb_check, + ((unsigned long)bnad)); + setup_timer(&bnad->bna.device.ioc.sem_timer, bnad_ioc_sem_timeout, + ((unsigned long)bnad)); + + /* Now start the timer before calling IOC */ + mod_timer(&bnad->bna.device.ioc.ioc_timer, + jiffies + msecs_to_jiffies(BNA_IOC_TIMER_FREQ)); + + /* + * Start the chip + * Don't care even if err != 0, bna state machine will + * deal with it + */ + err = bnad_device_enable(bnad); + + /* Get the burnt-in mac */ + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_port_mac_get(&bna->port, &bnad->perm_addr); + bnad_set_netdev_perm_addr(bnad); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + mutex_unlock(&bnad->conf_mutex); + + /* + * Make sure the link appears down to the stack + */ + netif_carrier_off(netdev); + + /* Finally, reguister with net_device layer */ + err = register_netdev(netdev); + if (err) { + pr_err("BNA : Registering with netdev failed\n"); + goto disable_device; + } + + return 0; + +disable_device: + mutex_lock(&bnad->conf_mutex); + bnad_device_disable(bnad); + del_timer_sync(&bnad->bna.device.ioc.ioc_timer); + del_timer_sync(&bnad->bna.device.ioc.sem_timer); + del_timer_sync(&bnad->bna.device.ioc.hb_timer); + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_uninit(bna); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + mutex_unlock(&bnad->conf_mutex); + + bnad_res_free(bnad); + bnad_disable_msix(bnad); +pci_uninit: + bnad_pci_uninit(pdev); + bnad_lock_uninit(bnad); + bnad_uninit(bnad); +free_netdev: + free_netdev(netdev); + return err; +} + +static void __devexit +bnad_pci_remove(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct bnad *bnad; + struct bna *bna; + unsigned long flags; + + if (!netdev) + return; + + pr_info("%s bnad_pci_remove\n", netdev->name); + bnad = netdev_priv(netdev); + bna = &bnad->bna; + + unregister_netdev(netdev); + + mutex_lock(&bnad->conf_mutex); + bnad_device_disable(bnad); + del_timer_sync(&bnad->bna.device.ioc.ioc_timer); + del_timer_sync(&bnad->bna.device.ioc.sem_timer); + del_timer_sync(&bnad->bna.device.ioc.hb_timer); + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_uninit(bna); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + mutex_unlock(&bnad->conf_mutex); + + bnad_res_free(bnad); + bnad_disable_msix(bnad); + bnad_pci_uninit(pdev); + bnad_lock_uninit(bnad); + bnad_uninit(bnad); + free_netdev(netdev); +} + +const struct pci_device_id bnad_pci_id_table[] = { + { + PCI_DEVICE(PCI_VENDOR_ID_BROCADE, + PCI_DEVICE_ID_BROCADE_CT), + .class = PCI_CLASS_NETWORK_ETHERNET << 8, + .class_mask = 0xffff00 + }, {0, } +}; + +MODULE_DEVICE_TABLE(pci, bnad_pci_id_table); + +static struct pci_driver bnad_pci_driver = { + .name = BNAD_NAME, + .id_table = bnad_pci_id_table, + .probe = bnad_pci_probe, + .remove = __devexit_p(bnad_pci_remove), +}; + +static int __init +bnad_module_init(void) +{ + int err; + + pr_info("Brocade 10G Ethernet driver\n"); + + bfa_ioc_auto_recover(bnad_ioc_auto_recover); + + err = pci_register_driver(&bnad_pci_driver); + if (err < 0) { + pr_err("bna : PCI registration failed in module init " + "(%d)\n", err); + return err; + } + + return 0; +} + +static void __exit +bnad_module_exit(void) +{ + pci_unregister_driver(&bnad_pci_driver); + + if (bfi_fw) + release_firmware(bfi_fw); +} + +module_init(bnad_module_init); +module_exit(bnad_module_exit); + +MODULE_AUTHOR("Brocade"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Brocade 10G PCIe Ethernet driver"); +MODULE_VERSION(BNAD_VERSION); +MODULE_FIRMWARE(CNA_FW_FILE_CT); diff --git a/drivers/net/bna/bnad.h b/drivers/net/bna/bnad.h new file mode 100644 index 000000000000..3261401e35cb --- /dev/null +++ b/drivers/net/bna/bnad.h @@ -0,0 +1,334 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#ifndef __BNAD_H__ +#define __BNAD_H__ + +#include +#include +#include +#include +#include +#include + +/* Fix for IA64 */ +#include +#include + +#include +#include + +#include "bna.h" + +#define BNAD_TXQ_DEPTH 2048 +#define BNAD_RXQ_DEPTH 2048 + +#define BNAD_MAX_TXS 1 +#define BNAD_MAX_TXQ_PER_TX 8 /* 8 priority queues */ +#define BNAD_TXQ_NUM 1 + +#define BNAD_MAX_RXS 1 +#define BNAD_MAX_RXPS_PER_RX 16 + +/* + * Control structure pointed to ccb->ctrl, which + * determines the NAPI / LRO behavior CCB + * There is 1:1 corres. between ccb & ctrl + */ +struct bnad_rx_ctrl { + struct bna_ccb *ccb; + struct napi_struct napi; +}; + +#define BNAD_RXMODE_PROMISC_DEFAULT BNA_RXMODE_PROMISC + +#define BNAD_GET_TX_ID(_skb) (0) + +/* + * GLOBAL #defines (CONSTANTS) + */ +#define BNAD_NAME "bna" +#define BNAD_NAME_LEN 64 + +#define BNAD_VERSION "2.3.2.0" + +#define BNAD_MAILBOX_MSIX_VECTORS 1 + +#define BNAD_STATS_TIMER_FREQ 1000 /* in msecs */ +#define BNAD_DIM_TIMER_FREQ 1000 /* in msecs */ + +#define BNAD_MAX_Q_DEPTH 0x10000 +#define BNAD_MIN_Q_DEPTH 0x200 + +#define BNAD_JUMBO_MTU 9000 + +#define BNAD_NETIF_WAKE_THRESHOLD 8 + +#define BNAD_RXQ_REFILL_THRESHOLD_SHIFT 3 + +/* Bit positions for tcb->flags */ +#define BNAD_TXQ_FREE_SENT 0 + +/* Bit positions for rcb->flags */ +#define BNAD_RXQ_REFILL 0 +#define BNAD_RXQ_STARTED 1 + +/* + * DATA STRUCTURES + */ + +/* enums */ +enum bnad_intr_source { + BNAD_INTR_TX = 1, + BNAD_INTR_RX = 2 +}; + +enum bnad_link_state { + BNAD_LS_DOWN = 0, + BNAD_LS_UP = 1 +}; + +struct bnad_completion { + struct completion ioc_comp; + struct completion ucast_comp; + struct completion mcast_comp; + struct completion tx_comp; + struct completion rx_comp; + struct completion stats_comp; + struct completion port_comp; + + u8 ioc_comp_status; + u8 ucast_comp_status; + u8 mcast_comp_status; + u8 tx_comp_status; + u8 rx_comp_status; + u8 stats_comp_status; + u8 port_comp_status; +}; + +/* Tx Rx Control Stats */ +struct bnad_drv_stats { + u64 netif_queue_stop; + u64 netif_queue_wakeup; + u64 tso4; + u64 tso6; + u64 tso_err; + u64 tcpcsum_offload; + u64 udpcsum_offload; + u64 csum_help; + u64 csum_help_err; + + u64 hw_stats_updates; + u64 netif_rx_schedule; + u64 netif_rx_complete; + u64 netif_rx_dropped; + + u64 link_toggle; + u64 cee_up; + + u64 rxp_info_alloc_failed; + u64 mbox_intr_disabled; + u64 mbox_intr_enabled; + u64 tx_unmap_q_alloc_failed; + u64 rx_unmap_q_alloc_failed; + + u64 rxbuf_alloc_failed; +}; + +/* Complete driver stats */ +struct bnad_stats { + struct bnad_drv_stats drv_stats; + struct bna_stats *bna_stats; +}; + +/* Tx / Rx Resources */ +struct bnad_tx_res_info { + struct bna_res_info res_info[BNA_TX_RES_T_MAX]; +}; + +struct bnad_rx_res_info { + struct bna_res_info res_info[BNA_RX_RES_T_MAX]; +}; + +struct bnad_tx_info { + struct bna_tx *tx; /* 1:1 between tx_info & tx */ + struct bna_tcb *tcb[BNAD_MAX_TXQ_PER_TX]; +} ____cacheline_aligned; + +struct bnad_rx_info { + struct bna_rx *rx; /* 1:1 between rx_info & rx */ + + struct bnad_rx_ctrl rx_ctrl[BNAD_MAX_RXPS_PER_RX]; +} ____cacheline_aligned; + +/* Unmap queues for Tx / Rx cleanup */ +struct bnad_skb_unmap { + struct sk_buff *skb; + DECLARE_PCI_UNMAP_ADDR(dma_addr) +}; + +struct bnad_unmap_q { + u32 producer_index; + u32 consumer_index; + u32 q_depth; + /* This should be the last one */ + struct bnad_skb_unmap unmap_array[1]; +}; + +/* Bit mask values for bnad->cfg_flags */ +#define BNAD_CF_DIM_ENABLED 0x01 /* DIM */ +#define BNAD_CF_PROMISC 0x02 +#define BNAD_CF_ALLMULTI 0x04 +#define BNAD_CF_MSIX 0x08 /* If in MSIx mode */ + +/* Defines for run_flags bit-mask */ +/* Set, tested & cleared using xxx_bit() functions */ +/* Values indicated bit positions */ +#define BNAD_RF_CEE_RUNNING 1 +#define BNAD_RF_HW_ERROR 2 +#define BNAD_RF_MBOX_IRQ_DISABLED 3 +#define BNAD_RF_TX_STARTED 4 +#define BNAD_RF_RX_STARTED 5 +#define BNAD_RF_DIM_TIMER_RUNNING 6 +#define BNAD_RF_STATS_TIMER_RUNNING 7 + +struct bnad { + struct net_device *netdev; + + /* Data path */ + struct bnad_tx_info tx_info[BNAD_MAX_TXS]; + struct bnad_rx_info rx_info[BNAD_MAX_RXS]; + + struct vlan_group *vlan_grp; + /* + * These q numbers are global only because + * they are used to calculate MSIx vectors. + * Actually the exact # of queues are per Tx/Rx + * object. + */ + u32 num_tx; + u32 num_rx; + u32 num_txq_per_tx; + u32 num_rxp_per_rx; + + u32 txq_depth; + u32 rxq_depth; + + u8 tx_coalescing_timeo; + u8 rx_coalescing_timeo; + + struct bna_rx_config rx_config[BNAD_MAX_RXS]; + struct bna_tx_config tx_config[BNAD_MAX_TXS]; + + u32 rx_csum; + + void __iomem *bar0; /* BAR0 address */ + + struct bna bna; + + u32 cfg_flags; + unsigned long run_flags; + + struct pci_dev *pcidev; + u64 mmio_start; + u64 mmio_len; + + u32 msix_num; + u32 msix_diag_num; + struct msix_entry *msix_table; + + struct mutex conf_mutex; + spinlock_t bna_lock ____cacheline_aligned; + + /* Timers */ + struct timer_list ioc_timer; + struct timer_list dim_timer; + struct timer_list stats_timer; + + /* Control path resources, memory & irq */ + struct bna_res_info res_info[BNA_RES_T_MAX]; + struct bnad_tx_res_info tx_res_info[BNAD_MAX_TXS]; + struct bnad_rx_res_info rx_res_info[BNAD_MAX_RXS]; + + struct bnad_completion bnad_completions; + + /* Burnt in MAC address */ + mac_t perm_addr; + + struct tasklet_struct tx_free_tasklet; + + /* Statistics */ + struct bnad_stats stats; + struct net_device_stats net_stats; + + struct bnad_diag *diag; + + char adapter_name[BNAD_NAME_LEN]; + char port_name[BNAD_NAME_LEN]; + char mbox_irq_name[BNAD_NAME_LEN]; +}; + +/* + * EXTERN VARIABLES + */ +extern struct firmware *bfi_fw; +extern u32 bnad_rxqs_per_cq; + +/* + * EXTERN PROTOTYPES + */ +extern u32 *cna_get_firmware_buf(struct pci_dev *pdev); +/* Netdev entry point prototypes */ +extern void bnad_set_ethtool_ops(struct net_device *netdev); + +/* Configuration & setup */ +extern void bnad_tx_coalescing_timeo_set(struct bnad *bnad); +extern void bnad_rx_coalescing_timeo_set(struct bnad *bnad); + +extern int bnad_setup_rx(struct bnad *bnad, uint rx_id); +extern int bnad_setup_tx(struct bnad *bnad, uint tx_id); +extern void bnad_cleanup_tx(struct bnad *bnad, uint tx_id); +extern void bnad_cleanup_rx(struct bnad *bnad, uint rx_id); + +/* Timer start/stop protos */ +extern void bnad_dim_timer_start(struct bnad *bnad); + +/* Statistics */ +extern void bnad_netdev_qstats_fill(struct bnad *bnad); +extern void bnad_netdev_hwstats_fill(struct bnad *bnad); + +/** + * MACROS + */ +/* To set & get the stats counters */ +#define BNAD_UPDATE_CTR(_bnad, _ctr) \ + (((_bnad)->stats.drv_stats._ctr)++) + +#define BNAD_GET_CTR(_bnad, _ctr) ((_bnad)->stats.drv_stats._ctr) + +#define bnad_enable_rx_irq_unsafe(_ccb) \ +{ \ + bna_ib_coalescing_timer_set((_ccb)->i_dbell, \ + (_ccb)->rx_coalescing_timeo); \ + bna_ib_ack((_ccb)->i_dbell, 0); \ +} + +#define bnad_dim_timer_running(_bnad) \ + (((_bnad)->cfg_flags & BNAD_CF_DIM_ENABLED) && \ + (test_bit(BNAD_RF_DIM_TIMER_RUNNING, &((_bnad)->run_flags)))) + +#endif /* __BNAD_H__ */ diff --git a/drivers/net/bna/bnad_ethtool.c b/drivers/net/bna/bnad_ethtool.c new file mode 100644 index 000000000000..e982785b6b25 --- /dev/null +++ b/drivers/net/bna/bnad_ethtool.c @@ -0,0 +1,1282 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#include "cna.h" + +#include +#include +#include +#include + +#include "bna.h" + +#include "bnad.h" + +#define BNAD_NUM_TXF_COUNTERS 12 +#define BNAD_NUM_RXF_COUNTERS 10 +#define BNAD_NUM_CQ_COUNTERS 3 +#define BNAD_NUM_RXQ_COUNTERS 6 +#define BNAD_NUM_TXQ_COUNTERS 5 + +#define BNAD_ETHTOOL_STATS_NUM \ + (sizeof(struct net_device_stats) / sizeof(unsigned long) + \ + sizeof(struct bnad_drv_stats) / sizeof(u64) + \ + offsetof(struct bfi_ll_stats, rxf_stats[0]) / sizeof(u64)) + +static char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { + "rx_packets", + "tx_packets", + "rx_bytes", + "tx_bytes", + "rx_errors", + "tx_errors", + "rx_dropped", + "tx_dropped", + "multicast", + "collisions", + + "rx_length_errors", + "rx_over_errors", + "rx_crc_errors", + "rx_frame_errors", + "rx_fifo_errors", + "rx_missed_errors", + + "tx_aborted_errors", + "tx_carrier_errors", + "tx_fifo_errors", + "tx_heartbeat_errors", + "tx_window_errors", + + "rx_compressed", + "tx_compressed", + + "netif_queue_stop", + "netif_queue_wakeup", + "tso4", + "tso6", + "tso_err", + "tcpcsum_offload", + "udpcsum_offload", + "csum_help", + "csum_help_err", + "hw_stats_updates", + "netif_rx_schedule", + "netif_rx_complete", + "netif_rx_dropped", + + "link_toggle", + "cee_up", + + "rxp_info_alloc_failed", + "mbox_intr_disabled", + "mbox_intr_enabled", + "tx_unmap_q_alloc_failed", + "rx_unmap_q_alloc_failed", + "rxbuf_alloc_failed", + + "mac_frame_64", + "mac_frame_65_127", + "mac_frame_128_255", + "mac_frame_256_511", + "mac_frame_512_1023", + "mac_frame_1024_1518", + "mac_frame_1518_1522", + "mac_rx_bytes", + "mac_rx_packets", + "mac_rx_fcs_error", + "mac_rx_multicast", + "mac_rx_broadcast", + "mac_rx_control_frames", + "mac_rx_pause", + "mac_rx_unknown_opcode", + "mac_rx_alignment_error", + "mac_rx_frame_length_error", + "mac_rx_code_error", + "mac_rx_carrier_sense_error", + "mac_rx_undersize", + "mac_rx_oversize", + "mac_rx_fragments", + "mac_rx_jabber", + "mac_rx_drop", + + "mac_tx_bytes", + "mac_tx_packets", + "mac_tx_multicast", + "mac_tx_broadcast", + "mac_tx_pause", + "mac_tx_deferral", + "mac_tx_excessive_deferral", + "mac_tx_single_collision", + "mac_tx_muliple_collision", + "mac_tx_late_collision", + "mac_tx_excessive_collision", + "mac_tx_total_collision", + "mac_tx_pause_honored", + "mac_tx_drop", + "mac_tx_jabber", + "mac_tx_fcs_error", + "mac_tx_control_frame", + "mac_tx_oversize", + "mac_tx_undersize", + "mac_tx_fragments", + + "bpc_tx_pause_0", + "bpc_tx_pause_1", + "bpc_tx_pause_2", + "bpc_tx_pause_3", + "bpc_tx_pause_4", + "bpc_tx_pause_5", + "bpc_tx_pause_6", + "bpc_tx_pause_7", + "bpc_tx_zero_pause_0", + "bpc_tx_zero_pause_1", + "bpc_tx_zero_pause_2", + "bpc_tx_zero_pause_3", + "bpc_tx_zero_pause_4", + "bpc_tx_zero_pause_5", + "bpc_tx_zero_pause_6", + "bpc_tx_zero_pause_7", + "bpc_tx_first_pause_0", + "bpc_tx_first_pause_1", + "bpc_tx_first_pause_2", + "bpc_tx_first_pause_3", + "bpc_tx_first_pause_4", + "bpc_tx_first_pause_5", + "bpc_tx_first_pause_6", + "bpc_tx_first_pause_7", + + "bpc_rx_pause_0", + "bpc_rx_pause_1", + "bpc_rx_pause_2", + "bpc_rx_pause_3", + "bpc_rx_pause_4", + "bpc_rx_pause_5", + "bpc_rx_pause_6", + "bpc_rx_pause_7", + "bpc_rx_zero_pause_0", + "bpc_rx_zero_pause_1", + "bpc_rx_zero_pause_2", + "bpc_rx_zero_pause_3", + "bpc_rx_zero_pause_4", + "bpc_rx_zero_pause_5", + "bpc_rx_zero_pause_6", + "bpc_rx_zero_pause_7", + "bpc_rx_first_pause_0", + "bpc_rx_first_pause_1", + "bpc_rx_first_pause_2", + "bpc_rx_first_pause_3", + "bpc_rx_first_pause_4", + "bpc_rx_first_pause_5", + "bpc_rx_first_pause_6", + "bpc_rx_first_pause_7", + + "rad_rx_frames", + "rad_rx_octets", + "rad_rx_vlan_frames", + "rad_rx_ucast", + "rad_rx_ucast_octets", + "rad_rx_ucast_vlan", + "rad_rx_mcast", + "rad_rx_mcast_octets", + "rad_rx_mcast_vlan", + "rad_rx_bcast", + "rad_rx_bcast_octets", + "rad_rx_bcast_vlan", + "rad_rx_drops", + + "fc_rx_ucast_octets", + "fc_rx_ucast", + "fc_rx_ucast_vlan", + "fc_rx_mcast_octets", + "fc_rx_mcast", + "fc_rx_mcast_vlan", + "fc_rx_bcast_octets", + "fc_rx_bcast", + "fc_rx_bcast_vlan", + + "fc_tx_ucast_octets", + "fc_tx_ucast", + "fc_tx_ucast_vlan", + "fc_tx_mcast_octets", + "fc_tx_mcast", + "fc_tx_mcast_vlan", + "fc_tx_bcast_octets", + "fc_tx_bcast", + "fc_tx_bcast_vlan", + "fc_tx_parity_errors", + "fc_tx_timeout", + "fc_tx_fid_parity_errors", +}; + +static int +bnad_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd) +{ + cmd->supported = SUPPORTED_10000baseT_Full; + cmd->advertising = ADVERTISED_10000baseT_Full; + cmd->autoneg = AUTONEG_DISABLE; + cmd->supported |= SUPPORTED_FIBRE; + cmd->advertising |= ADVERTISED_FIBRE; + cmd->port = PORT_FIBRE; + cmd->phy_address = 0; + + if (netif_carrier_ok(netdev)) { + cmd->speed = SPEED_10000; + cmd->duplex = DUPLEX_FULL; + } else { + cmd->speed = -1; + cmd->duplex = -1; + } + cmd->transceiver = XCVR_EXTERNAL; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 0; + + return 0; +} + +static int +bnad_set_settings(struct net_device *netdev, struct ethtool_cmd *cmd) +{ + /* 10G full duplex setting supported only */ + if (cmd->autoneg == AUTONEG_ENABLE) + return -EOPNOTSUPP; else { + if ((cmd->speed == SPEED_10000) && (cmd->duplex == DUPLEX_FULL)) + return 0; + } + + return -EOPNOTSUPP; +} + +static void +bnad_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) +{ + struct bnad *bnad = netdev_priv(netdev); + struct bfa_ioc_attr *ioc_attr; + unsigned long flags; + + strcpy(drvinfo->driver, BNAD_NAME); + strcpy(drvinfo->version, BNAD_VERSION); + + ioc_attr = kzalloc(sizeof(*ioc_attr), GFP_KERNEL); + if (ioc_attr) { + memset(ioc_attr, 0, sizeof(*ioc_attr)); + spin_lock_irqsave(&bnad->bna_lock, flags); + bfa_ioc_get_attr(&bnad->bna.device.ioc, ioc_attr); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + strncpy(drvinfo->fw_version, ioc_attr->adapter_attr.fw_ver, + sizeof(drvinfo->fw_version) - 1); + kfree(ioc_attr); + } + + strncpy(drvinfo->bus_info, pci_name(bnad->pcidev), ETHTOOL_BUSINFO_LEN); +} + +static int +get_regs(struct bnad *bnad, u32 * regs) +{ + int num = 0, i; + u32 reg_addr; + unsigned long flags; + +#define BNAD_GET_REG(addr) \ +do { \ + if (regs) \ + regs[num++] = readl(bnad->bar0 + (addr)); \ + else \ + num++; \ +} while (0) + + spin_lock_irqsave(&bnad->bna_lock, flags); + + /* DMA Block Internal Registers */ + BNAD_GET_REG(DMA_CTRL_REG0); + BNAD_GET_REG(DMA_CTRL_REG1); + BNAD_GET_REG(DMA_ERR_INT_STATUS); + BNAD_GET_REG(DMA_ERR_INT_ENABLE); + BNAD_GET_REG(DMA_ERR_INT_STATUS_SET); + + /* APP Block Register Address Offset from BAR0 */ + BNAD_GET_REG(HOSTFN0_INT_STATUS); + BNAD_GET_REG(HOSTFN0_INT_MASK); + BNAD_GET_REG(HOST_PAGE_NUM_FN0); + BNAD_GET_REG(HOST_MSIX_ERR_INDEX_FN0); + BNAD_GET_REG(FN0_PCIE_ERR_REG); + BNAD_GET_REG(FN0_ERR_TYPE_STATUS_REG); + BNAD_GET_REG(FN0_ERR_TYPE_MSK_STATUS_REG); + + BNAD_GET_REG(HOSTFN1_INT_STATUS); + BNAD_GET_REG(HOSTFN1_INT_MASK); + BNAD_GET_REG(HOST_PAGE_NUM_FN1); + BNAD_GET_REG(HOST_MSIX_ERR_INDEX_FN1); + BNAD_GET_REG(FN1_PCIE_ERR_REG); + BNAD_GET_REG(FN1_ERR_TYPE_STATUS_REG); + BNAD_GET_REG(FN1_ERR_TYPE_MSK_STATUS_REG); + + BNAD_GET_REG(PCIE_MISC_REG); + + BNAD_GET_REG(HOST_SEM0_REG); + BNAD_GET_REG(HOST_SEM1_REG); + BNAD_GET_REG(HOST_SEM2_REG); + BNAD_GET_REG(HOST_SEM3_REG); + BNAD_GET_REG(HOST_SEM0_INFO_REG); + BNAD_GET_REG(HOST_SEM1_INFO_REG); + BNAD_GET_REG(HOST_SEM2_INFO_REG); + BNAD_GET_REG(HOST_SEM3_INFO_REG); + + BNAD_GET_REG(TEMPSENSE_CNTL_REG); + BNAD_GET_REG(TEMPSENSE_STAT_REG); + + BNAD_GET_REG(APP_LOCAL_ERR_STAT); + BNAD_GET_REG(APP_LOCAL_ERR_MSK); + + BNAD_GET_REG(PCIE_LNK_ERR_STAT); + BNAD_GET_REG(PCIE_LNK_ERR_MSK); + + BNAD_GET_REG(FCOE_FIP_ETH_TYPE); + BNAD_GET_REG(RESV_ETH_TYPE); + + BNAD_GET_REG(HOSTFN2_INT_STATUS); + BNAD_GET_REG(HOSTFN2_INT_MASK); + BNAD_GET_REG(HOST_PAGE_NUM_FN2); + BNAD_GET_REG(HOST_MSIX_ERR_INDEX_FN2); + BNAD_GET_REG(FN2_PCIE_ERR_REG); + BNAD_GET_REG(FN2_ERR_TYPE_STATUS_REG); + BNAD_GET_REG(FN2_ERR_TYPE_MSK_STATUS_REG); + + BNAD_GET_REG(HOSTFN3_INT_STATUS); + BNAD_GET_REG(HOSTFN3_INT_MASK); + BNAD_GET_REG(HOST_PAGE_NUM_FN3); + BNAD_GET_REG(HOST_MSIX_ERR_INDEX_FN3); + BNAD_GET_REG(FN3_PCIE_ERR_REG); + BNAD_GET_REG(FN3_ERR_TYPE_STATUS_REG); + BNAD_GET_REG(FN3_ERR_TYPE_MSK_STATUS_REG); + + /* Host Command Status Registers */ + reg_addr = HOST_CMDSTS0_CLR_REG; + for (i = 0; i < 16; i++) { + BNAD_GET_REG(reg_addr); + BNAD_GET_REG(reg_addr + 4); + BNAD_GET_REG(reg_addr + 8); + reg_addr += 0x10; + } + + /* Function ID register */ + BNAD_GET_REG(FNC_ID_REG); + + /* Function personality register */ + BNAD_GET_REG(FNC_PERS_REG); + + /* Operation mode register */ + BNAD_GET_REG(OP_MODE); + + /* LPU0 Registers */ + BNAD_GET_REG(LPU0_MBOX_CTL_REG); + BNAD_GET_REG(LPU0_MBOX_CMD_REG); + BNAD_GET_REG(LPU0_MBOX_LINK_0REG); + BNAD_GET_REG(LPU1_MBOX_LINK_0REG); + BNAD_GET_REG(LPU0_MBOX_STATUS_0REG); + BNAD_GET_REG(LPU1_MBOX_STATUS_0REG); + BNAD_GET_REG(LPU0_ERR_STATUS_REG); + BNAD_GET_REG(LPU0_ERR_SET_REG); + + /* LPU1 Registers */ + BNAD_GET_REG(LPU1_MBOX_CTL_REG); + BNAD_GET_REG(LPU1_MBOX_CMD_REG); + BNAD_GET_REG(LPU0_MBOX_LINK_1REG); + BNAD_GET_REG(LPU1_MBOX_LINK_1REG); + BNAD_GET_REG(LPU0_MBOX_STATUS_1REG); + BNAD_GET_REG(LPU1_MBOX_STATUS_1REG); + BNAD_GET_REG(LPU1_ERR_STATUS_REG); + BNAD_GET_REG(LPU1_ERR_SET_REG); + + /* PSS Registers */ + BNAD_GET_REG(PSS_CTL_REG); + BNAD_GET_REG(PSS_ERR_STATUS_REG); + BNAD_GET_REG(ERR_STATUS_SET); + BNAD_GET_REG(PSS_RAM_ERR_STATUS_REG); + + /* Catapult CPQ Registers */ + BNAD_GET_REG(HOSTFN0_LPU0_MBOX0_CMD_STAT); + BNAD_GET_REG(HOSTFN0_LPU1_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN0_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN0_MBOX0_CMD_STAT); + + BNAD_GET_REG(HOSTFN0_LPU0_MBOX1_CMD_STAT); + BNAD_GET_REG(HOSTFN0_LPU1_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN0_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN0_MBOX1_CMD_STAT); + + BNAD_GET_REG(HOSTFN1_LPU0_MBOX0_CMD_STAT); + BNAD_GET_REG(HOSTFN1_LPU1_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN1_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN1_MBOX0_CMD_STAT); + + BNAD_GET_REG(HOSTFN1_LPU0_MBOX1_CMD_STAT); + BNAD_GET_REG(HOSTFN1_LPU1_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN1_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN1_MBOX1_CMD_STAT); + + BNAD_GET_REG(HOSTFN2_LPU0_MBOX0_CMD_STAT); + BNAD_GET_REG(HOSTFN2_LPU1_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN2_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN2_MBOX0_CMD_STAT); + + BNAD_GET_REG(HOSTFN2_LPU0_MBOX1_CMD_STAT); + BNAD_GET_REG(HOSTFN2_LPU1_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN2_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN2_MBOX1_CMD_STAT); + + BNAD_GET_REG(HOSTFN3_LPU0_MBOX0_CMD_STAT); + BNAD_GET_REG(HOSTFN3_LPU1_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN3_MBOX0_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN3_MBOX0_CMD_STAT); + + BNAD_GET_REG(HOSTFN3_LPU0_MBOX1_CMD_STAT); + BNAD_GET_REG(HOSTFN3_LPU1_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU0_HOSTFN3_MBOX1_CMD_STAT); + BNAD_GET_REG(LPU1_HOSTFN3_MBOX1_CMD_STAT); + + /* Host Function Force Parity Error Registers */ + BNAD_GET_REG(HOSTFN0_LPU_FORCE_PERR); + BNAD_GET_REG(HOSTFN1_LPU_FORCE_PERR); + BNAD_GET_REG(HOSTFN2_LPU_FORCE_PERR); + BNAD_GET_REG(HOSTFN3_LPU_FORCE_PERR); + + /* LL Port[0|1] Halt Mask Registers */ + BNAD_GET_REG(LL_HALT_MSK_P0); + BNAD_GET_REG(LL_HALT_MSK_P1); + + /* LL Port[0|1] Error Mask Registers */ + BNAD_GET_REG(LL_ERR_MSK_P0); + BNAD_GET_REG(LL_ERR_MSK_P1); + + /* EMC FLI Registers */ + BNAD_GET_REG(FLI_CMD_REG); + BNAD_GET_REG(FLI_ADDR_REG); + BNAD_GET_REG(FLI_CTL_REG); + BNAD_GET_REG(FLI_WRDATA_REG); + BNAD_GET_REG(FLI_RDDATA_REG); + BNAD_GET_REG(FLI_DEV_STATUS_REG); + BNAD_GET_REG(FLI_SIG_WD_REG); + + BNAD_GET_REG(FLI_DEV_VENDOR_REG); + BNAD_GET_REG(FLI_ERR_STATUS_REG); + + /* RxAdm 0 Registers */ + BNAD_GET_REG(RAD0_CTL_REG); + BNAD_GET_REG(RAD0_PE_PARM_REG); + BNAD_GET_REG(RAD0_BCN_REG); + BNAD_GET_REG(RAD0_DEFAULT_REG); + BNAD_GET_REG(RAD0_PROMISC_REG); + BNAD_GET_REG(RAD0_BCNQ_REG); + BNAD_GET_REG(RAD0_DEFAULTQ_REG); + + BNAD_GET_REG(RAD0_ERR_STS); + BNAD_GET_REG(RAD0_SET_ERR_STS); + BNAD_GET_REG(RAD0_ERR_INT_EN); + BNAD_GET_REG(RAD0_FIRST_ERR); + BNAD_GET_REG(RAD0_FORCE_ERR); + + BNAD_GET_REG(RAD0_MAC_MAN_1H); + BNAD_GET_REG(RAD0_MAC_MAN_1L); + BNAD_GET_REG(RAD0_MAC_MAN_2H); + BNAD_GET_REG(RAD0_MAC_MAN_2L); + BNAD_GET_REG(RAD0_MAC_MAN_3H); + BNAD_GET_REG(RAD0_MAC_MAN_3L); + BNAD_GET_REG(RAD0_MAC_MAN_4H); + BNAD_GET_REG(RAD0_MAC_MAN_4L); + + BNAD_GET_REG(RAD0_LAST4_IP); + + /* RxAdm 1 Registers */ + BNAD_GET_REG(RAD1_CTL_REG); + BNAD_GET_REG(RAD1_PE_PARM_REG); + BNAD_GET_REG(RAD1_BCN_REG); + BNAD_GET_REG(RAD1_DEFAULT_REG); + BNAD_GET_REG(RAD1_PROMISC_REG); + BNAD_GET_REG(RAD1_BCNQ_REG); + BNAD_GET_REG(RAD1_DEFAULTQ_REG); + + BNAD_GET_REG(RAD1_ERR_STS); + BNAD_GET_REG(RAD1_SET_ERR_STS); + BNAD_GET_REG(RAD1_ERR_INT_EN); + + /* TxA0 Registers */ + BNAD_GET_REG(TXA0_CTRL_REG); + /* TxA0 TSO Sequence # Registers (RO) */ + for (i = 0; i < 8; i++) { + BNAD_GET_REG(TXA0_TSO_TCP_SEQ_REG(i)); + BNAD_GET_REG(TXA0_TSO_IP_INFO_REG(i)); + } + + /* TxA1 Registers */ + BNAD_GET_REG(TXA1_CTRL_REG); + /* TxA1 TSO Sequence # Registers (RO) */ + for (i = 0; i < 8; i++) { + BNAD_GET_REG(TXA1_TSO_TCP_SEQ_REG(i)); + BNAD_GET_REG(TXA1_TSO_IP_INFO_REG(i)); + } + + /* RxA Registers */ + BNAD_GET_REG(RXA0_CTL_REG); + BNAD_GET_REG(RXA1_CTL_REG); + + /* PLB0 Registers */ + BNAD_GET_REG(PLB0_ECM_TIMER_REG); + BNAD_GET_REG(PLB0_RL_CTL); + for (i = 0; i < 8; i++) + BNAD_GET_REG(PLB0_RL_MAX_BC(i)); + BNAD_GET_REG(PLB0_RL_TU_PRIO); + for (i = 0; i < 8; i++) + BNAD_GET_REG(PLB0_RL_BYTE_CNT(i)); + BNAD_GET_REG(PLB0_RL_MIN_REG); + BNAD_GET_REG(PLB0_RL_MAX_REG); + BNAD_GET_REG(PLB0_EMS_ADD_REG); + + /* PLB1 Registers */ + BNAD_GET_REG(PLB1_ECM_TIMER_REG); + BNAD_GET_REG(PLB1_RL_CTL); + for (i = 0; i < 8; i++) + BNAD_GET_REG(PLB1_RL_MAX_BC(i)); + BNAD_GET_REG(PLB1_RL_TU_PRIO); + for (i = 0; i < 8; i++) + BNAD_GET_REG(PLB1_RL_BYTE_CNT(i)); + BNAD_GET_REG(PLB1_RL_MIN_REG); + BNAD_GET_REG(PLB1_RL_MAX_REG); + BNAD_GET_REG(PLB1_EMS_ADD_REG); + + /* HQM Control Register */ + BNAD_GET_REG(HQM0_CTL_REG); + BNAD_GET_REG(HQM0_RXQ_STOP_SEM); + BNAD_GET_REG(HQM0_TXQ_STOP_SEM); + BNAD_GET_REG(HQM1_CTL_REG); + BNAD_GET_REG(HQM1_RXQ_STOP_SEM); + BNAD_GET_REG(HQM1_TXQ_STOP_SEM); + + /* LUT Registers */ + BNAD_GET_REG(LUT0_ERR_STS); + BNAD_GET_REG(LUT0_SET_ERR_STS); + BNAD_GET_REG(LUT1_ERR_STS); + BNAD_GET_REG(LUT1_SET_ERR_STS); + + /* TRC Registers */ + BNAD_GET_REG(TRC_CTL_REG); + BNAD_GET_REG(TRC_MODS_REG); + BNAD_GET_REG(TRC_TRGC_REG); + BNAD_GET_REG(TRC_CNT1_REG); + BNAD_GET_REG(TRC_CNT2_REG); + BNAD_GET_REG(TRC_NXTS_REG); + BNAD_GET_REG(TRC_DIRR_REG); + for (i = 0; i < 10; i++) + BNAD_GET_REG(TRC_TRGM_REG(i)); + for (i = 0; i < 10; i++) + BNAD_GET_REG(TRC_NXTM_REG(i)); + for (i = 0; i < 10; i++) + BNAD_GET_REG(TRC_STRM_REG(i)); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); +#undef BNAD_GET_REG + return num; +} +static int +bnad_get_regs_len(struct net_device *netdev) +{ + int ret = get_regs(netdev_priv(netdev), NULL) * sizeof(u32); + return ret; +} + +static void +bnad_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *buf) +{ + memset(buf, 0, bnad_get_regs_len(netdev)); + get_regs(netdev_priv(netdev), buf); +} + +static void +bnad_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wolinfo) +{ + wolinfo->supported = 0; + wolinfo->wolopts = 0; +} + +static int +bnad_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce) +{ + struct bnad *bnad = netdev_priv(netdev); + unsigned long flags; + + /* Lock rqd. to access bnad->bna_lock */ + spin_lock_irqsave(&bnad->bna_lock, flags); + coalesce->use_adaptive_rx_coalesce = + (bnad->cfg_flags & BNAD_CF_DIM_ENABLED) ? true : false; + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + coalesce->rx_coalesce_usecs = bnad->rx_coalescing_timeo * + BFI_COALESCING_TIMER_UNIT; + coalesce->tx_coalesce_usecs = bnad->tx_coalescing_timeo * + BFI_COALESCING_TIMER_UNIT; + coalesce->tx_max_coalesced_frames = BFI_TX_INTERPKT_COUNT; + + return 0; +} + +static int +bnad_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce) +{ + struct bnad *bnad = netdev_priv(netdev); + unsigned long flags; + int dim_timer_del = 0; + + if (coalesce->rx_coalesce_usecs == 0 || + coalesce->rx_coalesce_usecs > + BFI_MAX_COALESCING_TIMEO * BFI_COALESCING_TIMER_UNIT) + return -EINVAL; + + if (coalesce->tx_coalesce_usecs == 0 || + coalesce->tx_coalesce_usecs > + BFI_MAX_COALESCING_TIMEO * BFI_COALESCING_TIMER_UNIT) + return -EINVAL; + + mutex_lock(&bnad->conf_mutex); + /* + * Do not need to store rx_coalesce_usecs here + * Every time DIM is disabled, we can get it from the + * stack. + */ + spin_lock_irqsave(&bnad->bna_lock, flags); + if (coalesce->use_adaptive_rx_coalesce) { + if (!(bnad->cfg_flags & BNAD_CF_DIM_ENABLED)) { + bnad->cfg_flags |= BNAD_CF_DIM_ENABLED; + bnad_dim_timer_start(bnad); + } + } else { + if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED) { + bnad->cfg_flags &= ~BNAD_CF_DIM_ENABLED; + dim_timer_del = bnad_dim_timer_running(bnad); + if (dim_timer_del) { + clear_bit(BNAD_RF_DIM_TIMER_RUNNING, + &bnad->run_flags); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + del_timer_sync(&bnad->dim_timer); + spin_lock_irqsave(&bnad->bna_lock, flags); + } + bnad_rx_coalescing_timeo_set(bnad); + } + } + if (bnad->tx_coalescing_timeo != coalesce->tx_coalesce_usecs / + BFI_COALESCING_TIMER_UNIT) { + bnad->tx_coalescing_timeo = coalesce->tx_coalesce_usecs / + BFI_COALESCING_TIMER_UNIT; + bnad_tx_coalescing_timeo_set(bnad); + } + + if (bnad->rx_coalescing_timeo != coalesce->rx_coalesce_usecs / + BFI_COALESCING_TIMER_UNIT) { + bnad->rx_coalescing_timeo = coalesce->rx_coalesce_usecs / + BFI_COALESCING_TIMER_UNIT; + + if (!(bnad->cfg_flags & BNAD_CF_DIM_ENABLED)) + bnad_rx_coalescing_timeo_set(bnad); + + } + + /* Add Tx Inter-pkt DMA count? */ + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + mutex_unlock(&bnad->conf_mutex); + return 0; +} + +static void +bnad_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ringparam) +{ + struct bnad *bnad = netdev_priv(netdev); + + ringparam->rx_max_pending = BNAD_MAX_Q_DEPTH / bnad_rxqs_per_cq; + ringparam->rx_mini_max_pending = 0; + ringparam->rx_jumbo_max_pending = 0; + ringparam->tx_max_pending = BNAD_MAX_Q_DEPTH; + + ringparam->rx_pending = bnad->rxq_depth; + ringparam->rx_mini_max_pending = 0; + ringparam->rx_jumbo_max_pending = 0; + ringparam->tx_pending = bnad->txq_depth; +} + +static int +bnad_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ringparam) +{ + int i, current_err, err = 0; + struct bnad *bnad = netdev_priv(netdev); + + mutex_lock(&bnad->conf_mutex); + if (ringparam->rx_pending == bnad->rxq_depth && + ringparam->tx_pending == bnad->txq_depth) { + mutex_unlock(&bnad->conf_mutex); + return 0; + } + + if (ringparam->rx_pending < BNAD_MIN_Q_DEPTH || + ringparam->rx_pending > BNAD_MAX_Q_DEPTH / bnad_rxqs_per_cq || + !BNA_POWER_OF_2(ringparam->rx_pending)) { + mutex_unlock(&bnad->conf_mutex); + return -EINVAL; + } + if (ringparam->tx_pending < BNAD_MIN_Q_DEPTH || + ringparam->tx_pending > BNAD_MAX_Q_DEPTH || + !BNA_POWER_OF_2(ringparam->tx_pending)) { + mutex_unlock(&bnad->conf_mutex); + return -EINVAL; + } + + if (ringparam->rx_pending != bnad->rxq_depth) { + bnad->rxq_depth = ringparam->rx_pending; + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + bnad_cleanup_rx(bnad, i); + current_err = bnad_setup_rx(bnad, i); + if (current_err && !err) + err = current_err; + } + } + if (ringparam->tx_pending != bnad->txq_depth) { + bnad->txq_depth = ringparam->tx_pending; + for (i = 0; i < bnad->num_tx; i++) { + if (!bnad->tx_info[i].tx) + continue; + bnad_cleanup_tx(bnad, i); + current_err = bnad_setup_tx(bnad, i); + if (current_err && !err) + err = current_err; + } + } + + mutex_unlock(&bnad->conf_mutex); + return err; +} + +static void +bnad_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct bnad *bnad = netdev_priv(netdev); + + pauseparam->autoneg = 0; + pauseparam->rx_pause = bnad->bna.port.pause_config.rx_pause; + pauseparam->tx_pause = bnad->bna.port.pause_config.tx_pause; +} + +static int +bnad_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct bnad *bnad = netdev_priv(netdev); + struct bna_pause_config pause_config; + unsigned long flags; + + if (pauseparam->autoneg == AUTONEG_ENABLE) + return -EINVAL; + + mutex_lock(&bnad->conf_mutex); + if (pauseparam->rx_pause != bnad->bna.port.pause_config.rx_pause || + pauseparam->tx_pause != bnad->bna.port.pause_config.tx_pause) { + pause_config.rx_pause = pauseparam->rx_pause; + pause_config.tx_pause = pauseparam->tx_pause; + spin_lock_irqsave(&bnad->bna_lock, flags); + bna_port_pause_config(&bnad->bna.port, &pause_config, NULL); + spin_unlock_irqrestore(&bnad->bna_lock, flags); + } + mutex_unlock(&bnad->conf_mutex); + return 0; +} + +static u32 +bnad_get_rx_csum(struct net_device *netdev) +{ + u32 rx_csum; + struct bnad *bnad = netdev_priv(netdev); + + rx_csum = bnad->rx_csum; + return rx_csum; +} + +static int +bnad_set_rx_csum(struct net_device *netdev, u32 rx_csum) +{ + struct bnad *bnad = netdev_priv(netdev); + + mutex_lock(&bnad->conf_mutex); + bnad->rx_csum = rx_csum; + mutex_unlock(&bnad->conf_mutex); + return 0; +} + +static int +bnad_set_tx_csum(struct net_device *netdev, u32 tx_csum) +{ + struct bnad *bnad = netdev_priv(netdev); + + mutex_lock(&bnad->conf_mutex); + if (tx_csum) { + netdev->features |= NETIF_F_IP_CSUM; + netdev->features |= NETIF_F_IPV6_CSUM; + } else { + netdev->features &= ~NETIF_F_IP_CSUM; + netdev->features &= ~NETIF_F_IPV6_CSUM; + } + mutex_unlock(&bnad->conf_mutex); + return 0; +} + +static int +bnad_set_tso(struct net_device *netdev, u32 tso) +{ + struct bnad *bnad = netdev_priv(netdev); + + mutex_lock(&bnad->conf_mutex); + if (tso) { + netdev->features |= NETIF_F_TSO; + netdev->features |= NETIF_F_TSO6; + } else { + netdev->features &= ~NETIF_F_TSO; + netdev->features &= ~NETIF_F_TSO6; + } + mutex_unlock(&bnad->conf_mutex); + return 0; +} + +static void +bnad_get_strings(struct net_device *netdev, u32 stringset, u8 * string) +{ + struct bnad *bnad = netdev_priv(netdev); + int i, j, q_num; + u64 bmap; + + mutex_lock(&bnad->conf_mutex); + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < BNAD_ETHTOOL_STATS_NUM; i++) { + BUG_ON(!(strlen(bnad_net_stats_strings[i]) < + ETH_GSTRING_LEN)); + memcpy(string, bnad_net_stats_strings[i], + ETH_GSTRING_LEN); + string += ETH_GSTRING_LEN; + } + bmap = (u64)bnad->bna.tx_mod.txf_bmap[0] | + ((u64)bnad->bna.tx_mod.txf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_TXF_ID_MAX); i++) { + if (bmap & 1) { + sprintf(string, "txf%d_ucast_octets", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_ucast", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_ucast_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_mcast_octets", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_mcast", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_mcast_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_bcast_octets", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_bcast", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_bcast_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_errors", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_filter_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "txf%d_filter_mac_sa", i); + string += ETH_GSTRING_LEN; + } + bmap >>= 1; + } + + bmap = (u64)bnad->bna.rx_mod.rxf_bmap[0] | + ((u64)bnad->bna.rx_mod.rxf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_RXF_ID_MAX); i++) { + if (bmap & 1) { + sprintf(string, "rxf%d_ucast_octets", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_ucast", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_ucast_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_mcast_octets", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_mcast", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_mcast_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_bcast_octets", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_bcast", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_bcast_vlan", i); + string += ETH_GSTRING_LEN; + sprintf(string, "rxf%d_frame_drops", i); + string += ETH_GSTRING_LEN; + } + bmap >>= 1; + } + + q_num = 0; + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + sprintf(string, "cq%d_producer_index", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "cq%d_consumer_index", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "cq%d_hw_producer_index", + q_num); + string += ETH_GSTRING_LEN; + q_num++; + } + } + + q_num = 0; + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + sprintf(string, "rxq%d_packets", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_bytes", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_packets_with_error", + q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_allocbuf_failed", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_producer_index", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_consumer_index", q_num); + string += ETH_GSTRING_LEN; + q_num++; + if (bnad->rx_info[i].rx_ctrl[j].ccb && + bnad->rx_info[i].rx_ctrl[j].ccb-> + rcb[1] && + bnad->rx_info[i].rx_ctrl[j].ccb-> + rcb[1]->rxq) { + sprintf(string, "rxq%d_packets", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_bytes", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, + "rxq%d_packets_with_error", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_allocbuf_failed", + q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_producer_index", + q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_consumer_index", + q_num); + string += ETH_GSTRING_LEN; + q_num++; + } + } + } + + q_num = 0; + for (i = 0; i < bnad->num_tx; i++) { + if (!bnad->tx_info[i].tx) + continue; + for (j = 0; j < bnad->num_txq_per_tx; j++) { + sprintf(string, "txq%d_packets", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "txq%d_bytes", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "txq%d_producer_index", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "txq%d_consumer_index", q_num); + string += ETH_GSTRING_LEN; + sprintf(string, "txq%d_hw_consumer_index", + q_num); + string += ETH_GSTRING_LEN; + q_num++; + } + } + + break; + + default: + break; + } + + mutex_unlock(&bnad->conf_mutex); +} + +static int +bnad_get_stats_count_locked(struct net_device *netdev) +{ + struct bnad *bnad = netdev_priv(netdev); + int i, j, count, rxf_active_num = 0, txf_active_num = 0; + u64 bmap; + + bmap = (u64)bnad->bna.tx_mod.txf_bmap[0] | + ((u64)bnad->bna.tx_mod.txf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_TXF_ID_MAX); i++) { + if (bmap & 1) + txf_active_num++; + bmap >>= 1; + } + bmap = (u64)bnad->bna.rx_mod.rxf_bmap[0] | + ((u64)bnad->bna.rx_mod.rxf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_RXF_ID_MAX); i++) { + if (bmap & 1) + rxf_active_num++; + bmap >>= 1; + } + count = BNAD_ETHTOOL_STATS_NUM + + txf_active_num * BNAD_NUM_TXF_COUNTERS + + rxf_active_num * BNAD_NUM_RXF_COUNTERS; + + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + count += bnad->num_rxp_per_rx * BNAD_NUM_CQ_COUNTERS; + count += bnad->num_rxp_per_rx * BNAD_NUM_RXQ_COUNTERS; + for (j = 0; j < bnad->num_rxp_per_rx; j++) + if (bnad->rx_info[i].rx_ctrl[j].ccb && + bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1] && + bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1]->rxq) + count += BNAD_NUM_RXQ_COUNTERS; + } + + for (i = 0; i < bnad->num_tx; i++) { + if (!bnad->tx_info[i].tx) + continue; + count += bnad->num_txq_per_tx * BNAD_NUM_TXQ_COUNTERS; + } + return count; +} + +static int +bnad_per_q_stats_fill(struct bnad *bnad, u64 *buf, int bi) +{ + int i, j; + struct bna_rcb *rcb = NULL; + struct bna_tcb *tcb = NULL; + + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) + if (bnad->rx_info[i].rx_ctrl[j].ccb && + bnad->rx_info[i].rx_ctrl[j].ccb->rcb[0] && + bnad->rx_info[i].rx_ctrl[j].ccb->rcb[0]->rxq) { + buf[bi++] = bnad->rx_info[i].rx_ctrl[j]. + ccb->producer_index; + buf[bi++] = 0; /* ccb->consumer_index */ + buf[bi++] = *(bnad->rx_info[i].rx_ctrl[j]. + ccb->hw_producer_index); + } + } + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) + if (bnad->rx_info[i].rx_ctrl[j].ccb) { + if (bnad->rx_info[i].rx_ctrl[j].ccb->rcb[0] && + bnad->rx_info[i].rx_ctrl[j].ccb-> + rcb[0]->rxq) { + rcb = bnad->rx_info[i].rx_ctrl[j]. + ccb->rcb[0]; + buf[bi++] = rcb->rxq->rx_packets; + buf[bi++] = rcb->rxq->rx_bytes; + buf[bi++] = rcb->rxq-> + rx_packets_with_error; + buf[bi++] = rcb->rxq-> + rxbuf_alloc_failed; + buf[bi++] = rcb->producer_index; + buf[bi++] = rcb->consumer_index; + } + if (bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1] && + bnad->rx_info[i].rx_ctrl[j].ccb-> + rcb[1]->rxq) { + rcb = bnad->rx_info[i].rx_ctrl[j]. + ccb->rcb[1]; + buf[bi++] = rcb->rxq->rx_packets; + buf[bi++] = rcb->rxq->rx_bytes; + buf[bi++] = rcb->rxq-> + rx_packets_with_error; + buf[bi++] = rcb->rxq-> + rxbuf_alloc_failed; + buf[bi++] = rcb->producer_index; + buf[bi++] = rcb->consumer_index; + } + } + } + + for (i = 0; i < bnad->num_tx; i++) { + if (!bnad->tx_info[i].tx) + continue; + for (j = 0; j < bnad->num_txq_per_tx; j++) + if (bnad->tx_info[i].tcb[j] && + bnad->tx_info[i].tcb[j]->txq) { + tcb = bnad->tx_info[i].tcb[j]; + buf[bi++] = tcb->txq->tx_packets; + buf[bi++] = tcb->txq->tx_bytes; + buf[bi++] = tcb->producer_index; + buf[bi++] = tcb->consumer_index; + buf[bi++] = *(tcb->hw_consumer_index); + } + } + + return bi; +} + +static void +bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, + u64 *buf) +{ + struct bnad *bnad = netdev_priv(netdev); + int i, j, bi; + unsigned long *net_stats, flags; + u64 *stats64; + u64 bmap; + + mutex_lock(&bnad->conf_mutex); + if (bnad_get_stats_count_locked(netdev) != stats->n_stats) { + mutex_unlock(&bnad->conf_mutex); + return; + } + + /* + * Used bna_lock to sync reads from bna_stats, which is written + * under the same lock + */ + spin_lock_irqsave(&bnad->bna_lock, flags); + bi = 0; + memset(buf, 0, stats->n_stats * sizeof(u64)); + memset(&bnad->net_stats, 0, sizeof(struct net_device_stats)); + + bnad_netdev_qstats_fill(bnad); + bnad_netdev_hwstats_fill(bnad); + + /* Fill net_stats into ethtool buffers */ + net_stats = (unsigned long *)&bnad->net_stats; + for (i = 0; i < sizeof(struct net_device_stats) / sizeof(unsigned long); + i++) + buf[bi++] = net_stats[i]; + + /* Fill driver stats into ethtool buffers */ + stats64 = (u64 *)&bnad->stats.drv_stats; + for (i = 0; i < sizeof(struct bnad_drv_stats) / sizeof(u64); i++) + buf[bi++] = stats64[i]; + + /* Fill hardware stats excluding the rxf/txf into ethtool bufs */ + stats64 = (u64 *) bnad->stats.bna_stats->hw_stats; + for (i = 0; + i < offsetof(struct bfi_ll_stats, rxf_stats[0]) / sizeof(u64); + i++) + buf[bi++] = stats64[i]; + + /* Fill txf stats into ethtool buffers */ + bmap = (u64)bnad->bna.tx_mod.txf_bmap[0] | + ((u64)bnad->bna.tx_mod.txf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_TXF_ID_MAX); i++) { + if (bmap & 1) { + stats64 = (u64 *)&bnad->stats.bna_stats-> + hw_stats->txf_stats[i]; + for (j = 0; j < sizeof(struct bfi_ll_stats_txf) / + sizeof(u64); j++) + buf[bi++] = stats64[j]; + } + bmap >>= 1; + } + + /* Fill rxf stats into ethtool buffers */ + bmap = (u64)bnad->bna.rx_mod.rxf_bmap[0] | + ((u64)bnad->bna.rx_mod.rxf_bmap[1] << 32); + for (i = 0; bmap && (i < BFI_LL_RXF_ID_MAX); i++) { + if (bmap & 1) { + stats64 = (u64 *)&bnad->stats.bna_stats-> + hw_stats->rxf_stats[i]; + for (j = 0; j < sizeof(struct bfi_ll_stats_rxf) / + sizeof(u64); j++) + buf[bi++] = stats64[j]; + } + bmap >>= 1; + } + + /* Fill per Q stats into ethtool buffers */ + bi = bnad_per_q_stats_fill(bnad, buf, bi); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + + mutex_unlock(&bnad->conf_mutex); +} + +static int +bnad_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return bnad_get_stats_count_locked(netdev); + default: + return -EOPNOTSUPP; + } +} + +static struct ethtool_ops bnad_ethtool_ops = { + .get_settings = bnad_get_settings, + .set_settings = bnad_set_settings, + .get_drvinfo = bnad_get_drvinfo, + .get_regs_len = bnad_get_regs_len, + .get_regs = bnad_get_regs, + .get_wol = bnad_get_wol, + .get_link = ethtool_op_get_link, + .get_coalesce = bnad_get_coalesce, + .set_coalesce = bnad_set_coalesce, + .get_ringparam = bnad_get_ringparam, + .set_ringparam = bnad_set_ringparam, + .get_pauseparam = bnad_get_pauseparam, + .set_pauseparam = bnad_set_pauseparam, + .get_rx_csum = bnad_get_rx_csum, + .set_rx_csum = bnad_set_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = bnad_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_tso = ethtool_op_get_tso, + .set_tso = bnad_set_tso, + .get_flags = ethtool_op_get_flags, + .set_flags = ethtool_op_set_flags, + .get_strings = bnad_get_strings, + .get_ethtool_stats = bnad_get_ethtool_stats, + .get_sset_count = bnad_get_sset_count +}; + +void +bnad_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &bnad_ethtool_ops); +} diff --git a/drivers/net/bna/cna.h b/drivers/net/bna/cna.h new file mode 100644 index 000000000000..bbd39dc65972 --- /dev/null +++ b/drivers/net/bna/cna.h @@ -0,0 +1,81 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2006-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ + +#ifndef __CNA_H__ +#define __CNA_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define bfa_sm_fault(__mod, __event) do { \ + pr_err("SM Assertion failure: %s: %d: event = %d", __FILE__, __LINE__, \ + __event); \ +} while (0) + +extern char bfa_version[]; + +#define CNA_FW_FILE_CT "ctfw_cna.bin" +#define FC_SYMNAME_MAX 256 /*!< max name server symbolic name size */ + +#pragma pack(1) + +#define MAC_ADDRLEN (6) +typedef struct mac { u8 mac[MAC_ADDRLEN]; } mac_t; + +#pragma pack() + +#define bfa_q_first(_q) ((void *)(((struct list_head *) (_q))->next)) +#define bfa_q_next(_qe) (((struct list_head *) (_qe))->next) +#define bfa_q_prev(_qe) (((struct list_head *) (_qe))->prev) + +/* + * bfa_q_qe_init - to initialize a queue element + */ +#define bfa_q_qe_init(_qe) { \ + bfa_q_next(_qe) = (struct list_head *) NULL; \ + bfa_q_prev(_qe) = (struct list_head *) NULL; \ +} + +/* + * bfa_q_deq - dequeue an element from head of the queue + */ +#define bfa_q_deq(_q, _qe) { \ + if (!list_empty(_q)) { \ + (*((struct list_head **) (_qe))) = bfa_q_next(_q); \ + bfa_q_prev(bfa_q_next(*((struct list_head **) _qe))) = \ + (struct list_head *) (_q); \ + bfa_q_next(_q) = bfa_q_next(*((struct list_head **) _qe)); \ + bfa_q_qe_init(*((struct list_head **) _qe)); \ + } else { \ + *((struct list_head **) (_qe)) = (struct list_head *) NULL; \ + } \ +} + +#endif /* __CNA_H__ */ diff --git a/drivers/net/bna/cna_fwimg.c b/drivers/net/bna/cna_fwimg.c new file mode 100644 index 000000000000..0bd1d3790a27 --- /dev/null +++ b/drivers/net/bna/cna_fwimg.c @@ -0,0 +1,64 @@ +/* + * Linux network driver for Brocade Converged Network Adapter. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License (GPL) Version 2 as + * published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Copyright (c) 2005-2010 Brocade Communications Systems, Inc. + * All rights reserved + * www.brocade.com + */ +#include +#include "cna.h" + +const struct firmware *bfi_fw; +static u32 *bfi_image_ct_cna; +static u32 bfi_image_ct_cna_size; + +u32 * +cna_read_firmware(struct pci_dev *pdev, u32 **bfi_image, + u32 *bfi_image_size, char *fw_name) +{ + const struct firmware *fw; + + if (request_firmware(&fw, fw_name, &pdev->dev)) { + pr_alert("Can't locate firmware %s\n", fw_name); + goto error; + } + + *bfi_image = (u32 *)fw->data; + *bfi_image_size = fw->size/sizeof(u32); + bfi_fw = fw; + + return *bfi_image; +error: + return NULL; +} + +u32 * +cna_get_firmware_buf(struct pci_dev *pdev) +{ + if (bfi_image_ct_cna_size == 0) + cna_read_firmware(pdev, &bfi_image_ct_cna, + &bfi_image_ct_cna_size, CNA_FW_FILE_CT); + return bfi_image_ct_cna; +} + +u32 * +bfa_cb_image_get_chunk(int type, u32 off) +{ + return (u32 *)(bfi_image_ct_cna + off); +} + +u32 +bfa_cb_image_get_size(int type) +{ + return bfi_image_ct_cna_size; +} diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36cad..1f730de0df06 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2189,6 +2189,9 @@ #define PCI_VENDOR_ID_ARIMA 0x161f #define PCI_VENDOR_ID_BROCADE 0x1657 +#define PCI_DEVICE_ID_BROCADE_CT 0x0014 +#define PCI_DEVICE_ID_BROCADE_FC_8G1P 0x0017 +#define PCI_DEVICE_ID_BROCADE_CT_FC 0x0021 #define PCI_VENDOR_ID_SIBYTE 0x166d #define PCI_DEVICE_ID_BCM1250_PCI 0x0001 -- cgit v1.2.3 From 1726442e115a9e58f40747d009a5b4f303e0840a Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 23 Aug 2010 16:26:41 +0000 Subject: net: increase the size of priv_flags and add IFF_OVS_DATAPATH IFF_OVS_DATAPATH is a place-holder for the Open vSwitch datapath which I am preparing to submit for merging. As all 16 bits of priv_flags are already assigned flags, also increase the size of priv_flags to 32 bits. Unfortunately, by my calculations this increases the size of struct net_device by 4 bytes on 32bit architectures and 8 bytes on 64 bit architectures. I couldn't see an obvious way to avoid that. Cc: Jesse Gross Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/if.h | 2 ++ include/linux/netdevice.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if.h b/include/linux/if.h index 53558ec59e1b..6ed43c1f07ab 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -75,6 +75,8 @@ #define IFF_DISABLE_NETPOLL 0x2000 /* disable netpoll at run-time */ #define IFF_MACVLAN_PORT 0x4000 /* device used as macvlan port */ #define IFF_BRIDGE_PORT 0x8000 /* device used as bridge port */ +#define IFF_OVS_DATAPATH 0x10000 /* device used as Open vSwitch + * dapath port */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ce2de8b64083..59962dbc2758 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -901,7 +901,7 @@ struct net_device { unsigned int flags; /* interface flags (a la BSD) */ unsigned short gflags; - unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ + unsigned int priv_flags; /* Like 'flags' but invisible to userspace. */ unsigned short padded; /* How much padding added by alloc_netdev() */ unsigned char operstate; /* RFC2863 operstate */ -- cgit v1.2.3 From d187abb9a83e6c6b6e9f2ca17962bdeafb4bc903 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 11 Aug 2010 12:07:13 -0700 Subject: USB: gadget: fix composite kernel-doc warnings Warning(include/linux/usb/composite.h:284): No description found for parameter 'disconnect' Warning(drivers/usb/gadget/composite.c:744): No description found for parameter 'c' Warning(drivers/usb/gadget/composite.c:744): Excess function parameter 'cdev' description in 'usb_string_ids_n' Signed-off-by: Randy Dunlap Cc: David Brownell Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 4 ++-- include/linux/usb/composite.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index e483f80822d2..1160c55de7f2 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -723,12 +723,12 @@ int usb_string_ids_tab(struct usb_composite_dev *cdev, struct usb_string *str) /** * usb_string_ids_n() - allocate unused string IDs in batch - * @cdev: the device whose string descriptor IDs are being allocated + * @c: the device whose string descriptor IDs are being allocated * @n: number of string IDs to allocate * Context: single threaded during gadget setup * * Returns the first requested ID. This ID and next @n-1 IDs are now - * valid IDs. At least providind that @n is non zore because if it + * valid IDs. At least provided that @n is non-zero because if it * is, returns last requested ID which is now very useful information. * * @usb_string_ids_n() is called from bind() callbacks to allocate diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 890bc1472190..617068134ae8 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -247,6 +247,7 @@ int usb_add_config(struct usb_composite_dev *, * value; it should return zero on successful initialization. * @unbind: Reverses @bind(); called as a side effect of unregistering * this driver. + * @disconnect: optional driver disconnect method * @suspend: Notifies when the host stops sending USB traffic, * after function notifications * @resume: Notifies configuration when the host restarts USB traffic, -- cgit v1.2.3 From e41e704bc4f49057fc68b643108366e6e6781aa3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Aug 2010 14:22:47 +0200 Subject: workqueue: improve destroy_workqueue() debuggability Now that the worklist is global, having works pending after wq destruction can easily lead to oops and destroy_workqueue() have several BUG_ON()s to catch these cases. Unfortunately, BUG_ON() doesn't tell much about how the work became pending after the final flush_workqueue(). This patch adds WQ_DYING which is set before the final flush begins. If a work is requested to be queued on a dying workqueue, WARN_ON_ONCE() is triggered and the request is ignored. This clearly indicates which caller is trying to queue a work on a dying workqueue and keeps the system working in most cases. Locking rule comment is updated such that the 'I' rule includes modifying the field from destruction path. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 ++ kernel/workqueue.c | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4f9d277bcd9a..c959666eafca 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -241,6 +241,8 @@ enum { WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ + WQ_DYING = 1 << 6, /* internal: workqueue is dying */ + WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index cc3456f96c56..362b50d092e2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -87,7 +87,8 @@ enum { /* * Structure fields follow one of the following exclusion rules. * - * I: Set during initialization and read-only afterwards. + * I: Modifiable by initialization/destruction paths and read-only for + * everyone else. * * P: Preemption protected. Disabling preemption is enough and should * only be modified and accessed from the local cpu. @@ -944,6 +945,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, debug_work_activate(work); + if (WARN_ON_ONCE(wq->flags & WQ_DYING)) + return; + /* determine gcwq to use */ if (!(wq->flags & WQ_UNBOUND)) { struct global_cwq *last_gcwq; @@ -2828,6 +2832,7 @@ void destroy_workqueue(struct workqueue_struct *wq) { unsigned int cpu; + wq->flags |= WQ_DYING; flush_workqueue(wq); /* -- cgit v1.2.3 From 8ca3eb08097f6839b2206e2242db4179aee3cfb3 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 24 Aug 2010 11:44:18 -0700 Subject: guard page for stacks that grow upwards pa-risc and ia64 have stacks that grow upwards. Check that they do not run into other mappings. By making VM_GROWSUP 0x0 on architectures that do not ever use it, we can avoid some unpleasant #ifdefs in check_stack_guard_page(). Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 +++++++- mm/memory.c | 15 +++++++++++---- mm/mmap.c | 3 --- 3 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 709f6728fc90..831c693416b2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -78,7 +78,11 @@ extern unsigned int kobjsize(const void *objp); #define VM_MAYSHARE 0x00000080 #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ +#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64) #define VM_GROWSUP 0x00000200 +#else +#define VM_GROWSUP 0x00000000 +#endif #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ @@ -1330,8 +1334,10 @@ unsigned long ra_submit(struct file_ra_state *ra, /* Do stack extension */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); -#ifdef CONFIG_IA64 +#if VM_GROWSUP extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); +#else + #define expand_upwards(vma, address) do { } while (0) #endif extern int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address); diff --git a/mm/memory.c b/mm/memory.c index 2ed2267439df..6b2ab1051851 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2760,11 +2760,9 @@ out_release: } /* - * This is like a special single-page "expand_downwards()", - * except we must first make sure that 'address-PAGE_SIZE' + * This is like a special single-page "expand_{down|up}wards()", + * except we must first make sure that 'address{-|+}PAGE_SIZE' * doesn't hit another vma. - * - * The "find_vma()" will do the right thing even if we wrap */ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address) { @@ -2783,6 +2781,15 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo expand_stack(vma, address - PAGE_SIZE); } + if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { + struct vm_area_struct *next = vma->vm_next; + + /* As VM_GROWSDOWN but s/below/above/ */ + if (next && next->vm_start == address + PAGE_SIZE) + return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; + + expand_upwards(vma, address + PAGE_SIZE); + } return 0; } diff --git a/mm/mmap.c b/mm/mmap.c index 331e51af38c9..6128dc8e5ede 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1716,9 +1716,6 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns * PA-RISC uses this for its stack; IA64 for its Register Backing Store. * vma is the last one with address > vma->vm_end. Have to extend vma. */ -#ifndef CONFIG_IA64 -static -#endif int expand_upwards(struct vm_area_struct *vma, unsigned long address) { int error; -- cgit v1.2.3 From 633adf1ad1c92c02bd3f10bbd73737a969179378 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Aug 2010 14:49:58 +0200 Subject: cfg80211: mark ieee80211_hdrlen const This function analyses only its single, value-passed argument, and has no side effects. Thus it can be const, which makes mac80211 smaller, for example: text data bss dec hex filename 362518 16720 884 380122 5ccda mac80211.ko (before) 362358 16720 884 379962 5cc3a mac80211.ko (after) a 160 byte saving in text size, and an optimisation because the function won't be called as often. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- net/wireless/util.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2fd06c60ffbb..2b403c7ee32e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1715,7 +1715,7 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); * ieee80211_hdrlen - get header length in bytes from frame control * @fc: frame control field in little-endian format */ -unsigned int ieee80211_hdrlen(__le16 fc); +unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc); /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 diff --git a/net/wireless/util.c b/net/wireless/util.c index 0c8a1e8b7690..1eb24162be61 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -221,7 +221,7 @@ const unsigned char bridge_tunnel_header[] __aligned(2) = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; EXPORT_SYMBOL(bridge_tunnel_header); -unsigned int ieee80211_hdrlen(__le16 fc) +unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc) { unsigned int hdrlen = 24; -- cgit v1.2.3 From 2e161f78e5f63a7f9fd25a766bb7f816a01eb14a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Aug 2010 15:38:38 +0200 Subject: cfg80211/mac80211: extensible frame processing Allow userspace to register for more than just action frames by giving the frame subtype, and make it possible to use this in various modes as well. With some tweaks and some added functionality this will, in the future, also be usable in AP mode and be able to replace the cooked monitor interface currently used in that case. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 93 +++++++++++++++++++++++------ include/net/cfg80211.h | 56 +++++++++++------- net/mac80211/cfg.c | 12 ++-- net/mac80211/ieee80211_i.h | 1 + net/mac80211/iface.c | 6 +- net/mac80211/main.c | 37 ++++++++++++ net/mac80211/rx.c | 137 +++++++++++++++++++++++++++++------------- net/mac80211/status.c | 2 +- net/mac80211/util.c | 6 +- net/wireless/core.c | 8 +-- net/wireless/core.h | 21 +++---- net/wireless/mlme.c | 144 +++++++++++++++++++++++++++++---------------- net/wireless/nl80211.c | 108 +++++++++++++++++++++++++--------- net/wireless/nl80211.h | 14 ++--- net/wireless/util.c | 2 +- 15 files changed, 452 insertions(+), 195 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2c8701687336..8af1e66c3cf9 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -39,6 +39,43 @@ * TODO: need more info? */ +/** + * DOC: Frame transmission/registration support + * + * Frame transmission and registration support exists to allow userspace + * management entities such as wpa_supplicant react to management frames + * that are not being handled by the kernel. This includes, for example, + * certain classes of action frames that cannot be handled in the kernel + * for various reasons. + * + * Frame registration is done on a per-interface basis and registrations + * cannot be removed other than by closing the socket. It is possible to + * specify a registration filter to register, for example, only for a + * certain type of action frame. In particular with action frames, those + * that userspace registers for will not be returned as unhandled by the + * driver, so that the registered application has to take responsibility + * for doing that. + * + * The type of frame that can be registered for is also dependent on the + * driver and interface type. The frame types are advertised in wiphy + * attributes so applications know what to expect. + * + * NOTE: When an interface changes type while registrations are active, + * these registrations are ignored until the interface type is + * changed again. This means that changing the interface type can + * lead to a situation that couldn't otherwise be produced, but + * any such registrations will be dormant in the sense that they + * will not be serviced, i.e. they will not receive any frames. + * + * Frame transmission allows userspace to send for example the required + * responses to action frames. It is subject to some sanity checking, + * but many frames can be transmitted. When a frame was transmitted, its + * status is indicated to the sending socket. + * + * For more technical details, see the corresponding command descriptions + * below. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -301,16 +338,18 @@ * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface * and @NL80211_ATTR_TX_RATES the set of allowed rates. * - * @NL80211_CMD_REGISTER_ACTION: Register for receiving certain action frames - * (via @NL80211_CMD_ACTION) for processing in userspace. This command - * requires an interface index and a match attribute containing the first - * few bytes of the frame that should match, e.g. a single byte for only - * a category match or four bytes for vendor frames including the OUI. - * The registration cannot be dropped, but is removed automatically - * when the netlink socket is closed. Multiple registrations can be made. - * @NL80211_CMD_ACTION: Action frame TX request and RX notification. This - * command is used both as a request to transmit an Action frame and as an - * event indicating reception of an Action frame that was not processed in + * @NL80211_CMD_REGISTER_FRAME: Register for receiving certain mgmt frames + * (via @NL80211_CMD_FRAME) for processing in userspace. This command + * requires an interface index, a frame type attribute (optional for + * backward compatibility reasons, if not given assumes action frames) + * and a match attribute containing the first few bytes of the frame + * that should match, e.g. a single byte for only a category match or + * four bytes for vendor frames including the OUI. The registration + * cannot be dropped, but is removed automatically when the netlink + * socket is closed. Multiple registrations can be made. + * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This + * command is used both as a request to transmit a management frame and + * as an event indicating reception of a frame that was not processed in * kernel code, but is for us (i.e., which may need to be processed in a * user space application). %NL80211_ATTR_FRAME is used to specify the * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and @@ -320,8 +359,8 @@ * operational channel). When called, this operation returns a cookie * (%NL80211_ATTR_COOKIE) that will be included with the TX status event * pertaining to the TX request. - * @NL80211_CMD_ACTION_TX_STATUS: Report TX status of an Action frame - * transmitted with %NL80211_CMD_ACTION. %NL80211_ATTR_COOKIE identifies + * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame + * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies * the TX command and %NL80211_ATTR_FRAME includes the contents of the * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged * the frame. @@ -429,9 +468,12 @@ enum nl80211_commands { NL80211_CMD_SET_TX_BITRATE_MASK, - NL80211_CMD_REGISTER_ACTION, - NL80211_CMD_ACTION, - NL80211_CMD_ACTION_TX_STATUS, + NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_REGISTER_ACTION = NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_FRAME, + NL80211_CMD_ACTION = NL80211_CMD_FRAME, + NL80211_CMD_FRAME_TX_STATUS, + NL80211_CMD_ACTION_TX_STATUS = NL80211_CMD_FRAME_TX_STATUS, NL80211_CMD_SET_POWER_SAVE, NL80211_CMD_GET_POWER_SAVE, @@ -708,7 +750,16 @@ enum nl80211_commands { * is used with %NL80211_CMD_SET_TX_BITRATE_MASK. * * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain - * at least one byte, currently used with @NL80211_CMD_REGISTER_ACTION. + * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME. + * @NL80211_ATTR_FRAME_TYPE: A u16 indicating the frame type/subtype for the + * @NL80211_CMD_REGISTER_FRAME command. + * @NL80211_ATTR_TX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be transmitted with + * %NL80211_CMD_FRAME. + * @NL80211_ATTR_RX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be registered for RX. * * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was * acknowledged by the recipient. @@ -891,6 +942,10 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_TX_POWER_SETTING, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, + NL80211_ATTR_TX_FRAME_TYPES, + NL80211_ATTR_RX_FRAME_TYPES, + NL80211_ATTR_FRAME_TYPE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -947,7 +1002,7 @@ enum nl80211_attrs { * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames * @NL80211_IFTYPE_MESH_POINT: mesh point * @NL80211_IFTYPE_MAX: highest interface type number currently defined - * @__NL80211_IFTYPE_AFTER_LAST: internal use + * @NUM_NL80211_IFTYPES: number of defined interface types * * These values are used with the %NL80211_ATTR_IFTYPE * to set the type of an interface. @@ -964,8 +1019,8 @@ enum nl80211_iftype { NL80211_IFTYPE_MESH_POINT, /* keep last */ - __NL80211_IFTYPE_AFTER_LAST, - NL80211_IFTYPE_MAX = __NL80211_IFTYPE_AFTER_LAST - 1 + NUM_NL80211_IFTYPES, + NL80211_IFTYPE_MAX = NUM_NL80211_IFTYPES - 1 }; /** diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2b403c7ee32e..6a98b1b3bfde 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1020,7 +1020,7 @@ struct cfg80211_pmksa { * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. * This allows the operation to be terminated prior to timeout based on * the duration value. - * @action: Transmit an action frame + * @mgmt_tx: Transmit a management frame * * @testmode_cmd: run a test mode command * @@ -1172,7 +1172,7 @@ struct cfg80211_ops { struct net_device *dev, u64 cookie); - int (*action)(struct wiphy *wiphy, struct net_device *dev, + int (*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, bool channel_type_valid, @@ -1236,6 +1236,10 @@ struct mac_address { u8 addr[ETH_ALEN]; }; +struct ieee80211_txrx_stypes { + u16 tx, rx; +}; + /** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback @@ -1286,6 +1290,10 @@ struct mac_address { * @privid: a pointer that drivers can use to identify if an arbitrary * wiphy is theirs, e.g. in global notifiers * @bands: information about bands/channels supported by this device + * + * @mgmt_stypes: bitmasks of frame subtypes that can be subscribed to or + * transmitted through nl80211, points to an array indexed by interface + * type */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -1294,9 +1302,12 @@ struct wiphy { u8 perm_addr[ETH_ALEN]; u8 addr_mask[ETH_ALEN]; - u16 n_addresses; struct mac_address *addresses; + const struct ieee80211_txrx_stypes *mgmt_stypes; + + u16 n_addresses; + /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; @@ -1492,8 +1503,8 @@ struct cfg80211_cached_keys; * set by driver (if supported) on add_interface BEFORE registering the * netdev and may otherwise be used by driver read-only, will be update * by cfg80211 on change_interface - * @action_registrations: list of registrations for action frames - * @action_registrations_lock: lock for the list + * @mgmt_registrations: list of registrations for management frames + * @mgmt_registrations_lock: lock for the list * @mtx: mutex used to lock data in this struct * @cleanup_work: work struct used for cleanup that can't be done directly */ @@ -1505,8 +1516,8 @@ struct wireless_dev { struct list_head list; struct net_device *netdev; - struct list_head action_registrations; - spinlock_t action_registrations_lock; + struct list_head mgmt_registrations; + spinlock_t mgmt_registrations_lock; struct mutex mtx; @@ -2373,38 +2384,39 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); /** - * cfg80211_rx_action - notification of received, unprocessed Action frame + * cfg80211_rx_mgmt - notification of received, unprocessed management frame * @dev: network device * @freq: Frequency on which the frame was received in MHz - * @buf: Action frame (header + body) + * @buf: Management frame (header + body) * @len: length of the frame data * @gfp: context flags - * Returns %true if a user space application is responsible for rejecting the - * unrecognized Action frame; %false if no such application is registered - * (i.e., the driver is responsible for rejecting the unrecognized Action - * frame) + * + * Returns %true if a user space application has registered for this frame. + * For action frames, that makes it responsible for rejecting unrecognized + * action frames; %false otherwise, in which case for action frames the + * driver is responsible for rejecting the frame. * * This function is called whenever an Action frame is received for a station * mode interface, but is not processed in kernel. */ -bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, - size_t len, gfp_t gfp); +bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, + size_t len, gfp_t gfp); /** - * cfg80211_action_tx_status - notification of TX status for Action frame + * cfg80211_mgmt_tx_status - notification of TX status for management frame * @dev: network device - * @cookie: Cookie returned by cfg80211_ops::action() - * @buf: Action frame (header + body) + * @cookie: Cookie returned by cfg80211_ops::mgmt_tx() + * @buf: Management frame (header + body) * @len: length of the frame data * @ack: Whether frame was acknowledged * @gfp: context flags * - * This function is called whenever an Action frame was requested to be - * transmitted with cfg80211_ops::action() to report the TX status of the + * This function is called whenever a management frame was requested to be + * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the * transmission attempt. */ -void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp); +void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp); /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f9a317766136..94787d21282c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1521,11 +1521,11 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); } -static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, - const u8 *buf, size_t len, u64 *cookie) +static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + bool channel_type_valid, + const u8 *buf, size_t len, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -1625,6 +1625,6 @@ struct cfg80211_ops mac80211_config_ops = { .set_bitrate_mask = ieee80211_set_bitrate_mask, .remain_on_channel = ieee80211_remain_on_channel, .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, - .action = ieee80211_action, + .mgmt_tx = ieee80211_mgmt_tx, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1bf05bfd149d..e73ae51dc036 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -170,6 +170,7 @@ typedef unsigned __bitwise__ ieee80211_rx_result; #define IEEE80211_RX_RA_MATCH BIT(1) #define IEEE80211_RX_AMSDU BIT(2) #define IEEE80211_RX_FRAGMENTED BIT(3) +#define IEEE80211_MALFORMED_ACTION_FRM BIT(4) /* only add flags here that do not change with subframes of an aMPDU */ struct ieee80211_rx_data { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 9459aeee0ddc..86f434f234ae 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -177,7 +177,7 @@ static int ieee80211_open(struct net_device *dev) /* no special treatment */ break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: /* cannot happen */ WARN_ON(1); break; @@ -634,7 +634,7 @@ static void ieee80211_teardown_sdata(struct net_device *dev) case NL80211_IFTYPE_MONITOR: break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: BUG(); break; } @@ -886,7 +886,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP_VLAN: break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: BUG(); break; } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 0afccda42a24..a53feac4618c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -417,6 +417,41 @@ void ieee80211_napi_complete(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_napi_complete); +/* There isn't a lot of sense in it, but you can transmit anything you like */ +static const struct ieee80211_txrx_stypes +ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { + [NL80211_IFTYPE_ADHOC] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ACTION >> 4), + }, + [NL80211_IFTYPE_STATION] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4), + }, + [NL80211_IFTYPE_AP] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4) | + BIT(IEEE80211_STYPE_DISASSOC >> 4) | + BIT(IEEE80211_STYPE_AUTH >> 4) | + BIT(IEEE80211_STYPE_DEAUTH >> 4) | + BIT(IEEE80211_STYPE_ACTION >> 4), + }, + [NL80211_IFTYPE_AP_VLAN] = { + /* copy AP */ + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4) | + BIT(IEEE80211_STYPE_DISASSOC >> 4) | + BIT(IEEE80211_STYPE_AUTH >> 4) | + BIT(IEEE80211_STYPE_DEAUTH >> 4) | + BIT(IEEE80211_STYPE_ACTION >> 4), + }, +}; + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -446,6 +481,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, if (!wiphy) return NULL; + wiphy->mgmt_stypes = ieee80211_default_mgmt_stypes; + wiphy->flags |= WIPHY_FLAG_NETNS_OK | WIPHY_FLAG_4ADDR_AP | WIPHY_FLAG_4ADDR_STATION; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 4fdbed58ca2f..aa41e382bbb3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1939,14 +1939,37 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb); } +static ieee80211_rx_result debug_noinline +ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; + + /* + * From here on, look only at management frames. + * Data and control frames are already handled, + * and unknown (reserved) frames are useless. + */ + if (rx->skb->len < 24) + return RX_DROP_MONITOR; + + if (!ieee80211_is_mgmt(mgmt->frame_control)) + return RX_DROP_MONITOR; + + if (!(rx->flags & IEEE80211_RX_RA_MATCH)) + return RX_DROP_MONITOR; + + if (ieee80211_drop_unencrypted_mgmt(rx)) + return RX_DROP_UNUSABLE; + + return RX_CONTINUE; +} + static ieee80211_rx_result debug_noinline ieee80211_rx_h_action(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; - struct sk_buff *nskb; - struct ieee80211_rx_status *status; int len = rx->skb->len; if (!ieee80211_is_action(mgmt->frame_control)) @@ -1962,9 +1985,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_UNUSABLE; - if (ieee80211_drop_unencrypted_mgmt(rx)) - return RX_DROP_UNUSABLE; - switch (mgmt->u.action.category) { case WLAN_CATEGORY_BACK: /* @@ -2055,17 +2075,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto queue; } + return RX_CONTINUE; + invalid: - /* - * For AP mode, hostapd is responsible for handling any action - * frames that we didn't handle, including returning unknown - * ones. For all other modes we will return them to the sender, - * setting the 0x80 bit in the action category, as required by - * 802.11-2007 7.3.1.11. - */ - if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - return RX_DROP_MONITOR; + rx->flags |= IEEE80211_MALFORMED_ACTION_FRM; + /* will return in the next handlers */ + return RX_CONTINUE; + + handled: + if (rx->sta) + rx->sta->rx_packets++; + dev_kfree_skb(rx->skb); + return RX_QUEUED; + + queue: + rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&sdata->skb_queue, rx->skb); + ieee80211_queue_work(&local->hw, &sdata->work); + if (rx->sta) + rx->sta->rx_packets++; + return RX_QUEUED; +} + +static ieee80211_rx_result debug_noinline +ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) +{ + struct ieee80211_rx_status *status; + + /* skip known-bad action frames and return them in the next handler */ + if (rx->flags & IEEE80211_MALFORMED_ACTION_FRM) + return RX_CONTINUE; /* * Getting here means the kernel doesn't know how to handle @@ -2075,10 +2114,44 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) */ status = IEEE80211_SKB_RXCB(rx->skb); - if (cfg80211_rx_action(rx->sdata->dev, status->freq, - rx->skb->data, rx->skb->len, - GFP_ATOMIC)) - goto handled; + if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq, + rx->skb->data, rx->skb->len, + GFP_ATOMIC)) { + if (rx->sta) + rx->sta->rx_packets++; + dev_kfree_skb(rx->skb); + return RX_QUEUED; + } + + + return RX_CONTINUE; +} + +static ieee80211_rx_result debug_noinline +ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) +{ + struct ieee80211_local *local = rx->local; + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; + struct sk_buff *nskb; + struct ieee80211_sub_if_data *sdata = rx->sdata; + + if (!ieee80211_is_action(mgmt->frame_control)) + return RX_CONTINUE; + + /* + * For AP mode, hostapd is responsible for handling any action + * frames that we didn't handle, including returning unknown + * ones. For all other modes we will return them to the sender, + * setting the 0x80 bit in the action category, as required by + * 802.11-2007 7.3.1.11. + * Newer versions of hostapd shall also use the management frame + * registration mechanisms, but older ones still use cooked + * monitor interfaces so push all frames there. + */ + if (!(rx->flags & IEEE80211_MALFORMED_ACTION_FRM) && + (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) + return RX_DROP_MONITOR; /* do not return rejected action frames */ if (mgmt->u.action.category & 0x80) @@ -2097,20 +2170,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) ieee80211_tx_skb(rx->sdata, nskb); } - - handled: - if (rx->sta) - rx->sta->rx_packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; - - queue: - rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; - skb_queue_tail(&sdata->skb_queue, rx->skb); - ieee80211_queue_work(&local->hw, &sdata->work); - if (rx->sta) - rx->sta->rx_packets++; - return RX_QUEUED; } static ieee80211_rx_result debug_noinline @@ -2121,15 +2182,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; __le16 stype; - if (!(rx->flags & IEEE80211_RX_RA_MATCH)) - return RX_DROP_MONITOR; - - if (rx->skb->len < 24) - return RX_DROP_MONITOR; - - if (ieee80211_drop_unencrypted_mgmt(rx)) - return RX_DROP_UNUSABLE; - rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb); if (rxs != RX_CONTINUE) return rxs; @@ -2374,7 +2426,10 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, if (res != RX_CONTINUE) goto rxh_next; + CALL_RXH(ieee80211_rx_h_mgmt_check) CALL_RXH(ieee80211_rx_h_action) + CALL_RXH(ieee80211_rx_h_userspace_mgmt) + CALL_RXH(ieee80211_rx_h_action_return) CALL_RXH(ieee80211_rx_h_mgmt) rxh_next: @@ -2527,7 +2582,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, break; case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: /* should never get here */ WARN_ON(1); break; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 10caec5ea8fa..67a35841bef0 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -296,7 +296,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) - cfg80211_action_tx_status( + cfg80211_mgmt_tx_status( skb->dev, (unsigned long) skb, skb->data, skb->len, !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 748387d45bc0..cd2b485fed4f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -471,7 +471,7 @@ void ieee80211_iterate_active_interfaces( list_for_each_entry(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: @@ -505,7 +505,7 @@ void ieee80211_iterate_active_interfaces_atomic( list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: @@ -1189,7 +1189,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* ignore virtual */ break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: WARN_ON(1); break; } diff --git a/net/wireless/core.c b/net/wireless/core.c index c70909c3eae4..d52630bbab04 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -433,7 +433,7 @@ int wiphy_register(struct wiphy *wiphy) /* sanity check ifmodes */ WARN_ON(!ifmodes); - ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; + ifmodes &= ((1 << NUM_NL80211_IFTYPES) - 1) & ~1; if (WARN_ON(ifmodes != wiphy->interface_modes)) wiphy->interface_modes = ifmodes; @@ -685,8 +685,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); - INIT_LIST_HEAD(&wdev->action_registrations); - spin_lock_init(&wdev->action_registrations_lock); + INIT_LIST_HEAD(&wdev->mgmt_registrations); + spin_lock_init(&wdev->mgmt_registrations_lock); mutex_lock(&rdev->devlist_mtx); list_add_rcu(&wdev->list, &rdev->netdev_list); @@ -806,7 +806,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, sysfs_remove_link(&dev->dev.kobj, "phy80211"); list_del_rcu(&wdev->list); rdev->devlist_generation++; - cfg80211_mlme_purge_actions(wdev); + cfg80211_mlme_purge_registrations(wdev); #ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.keys); #endif diff --git a/net/wireless/core.h b/net/wireless/core.h index 63d57ae399c3..58ab2c791d28 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -331,16 +331,17 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, const u8 *resp_ie, size_t resp_ie_len, u16 status, bool wextev, struct cfg80211_bss *bss); -int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, - const u8 *match_data, int match_len); -void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid); -void cfg80211_mlme_purge_actions(struct wireless_dev *wdev); -int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, - const u8 *buf, size_t len, u64 *cookie); +int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, + u16 frame_type, const u8 *match_data, + int match_len); +void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); +void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); +int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + bool channel_type_valid, + const u8 *buf, size_t len, u64 *cookie); /* SME */ int __cfg80211_connect(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index ee0af32ed59e..8515b1e5c578 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -748,31 +748,51 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, } EXPORT_SYMBOL(cfg80211_new_sta); -struct cfg80211_action_registration { +struct cfg80211_mgmt_registration { struct list_head list; u32 nlpid; int match_len; + __le16 frame_type; + u8 match[]; }; -int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, - const u8 *match_data, int match_len) +int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, + u16 frame_type, const u8 *match_data, + int match_len) { - struct cfg80211_action_registration *reg, *nreg; + struct cfg80211_mgmt_registration *reg, *nreg; int err = 0; + u16 mgmt_type; + + if (!wdev->wiphy->mgmt_stypes) + return -EOPNOTSUPP; + + if ((frame_type & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT) + return -EINVAL; + + if (frame_type & ~(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) + return -EINVAL; + + mgmt_type = (frame_type & IEEE80211_FCTL_STYPE) >> 4; + if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].rx & BIT(mgmt_type))) + return -EINVAL; nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL); if (!nreg) return -ENOMEM; - spin_lock_bh(&wdev->action_registrations_lock); + spin_lock_bh(&wdev->mgmt_registrations_lock); - list_for_each_entry(reg, &wdev->action_registrations, list) { + list_for_each_entry(reg, &wdev->mgmt_registrations, list) { int mlen = min(match_len, reg->match_len); + if (frame_type != le16_to_cpu(reg->frame_type)) + continue; + if (memcmp(reg->match, match_data, mlen) == 0) { err = -EALREADY; break; @@ -787,62 +807,75 @@ int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, memcpy(nreg->match, match_data, match_len); nreg->match_len = match_len; nreg->nlpid = snd_pid; - list_add(&nreg->list, &wdev->action_registrations); + nreg->frame_type = cpu_to_le16(frame_type); + list_add(&nreg->list, &wdev->mgmt_registrations); out: - spin_unlock_bh(&wdev->action_registrations_lock); + spin_unlock_bh(&wdev->mgmt_registrations_lock); return err; } -void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid) +void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) { - struct cfg80211_action_registration *reg, *tmp; + struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->action_registrations_lock); + spin_lock_bh(&wdev->mgmt_registrations_lock); - list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { + list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { if (reg->nlpid == nlpid) { list_del(®->list); kfree(reg); } } - spin_unlock_bh(&wdev->action_registrations_lock); + spin_unlock_bh(&wdev->mgmt_registrations_lock); } -void cfg80211_mlme_purge_actions(struct wireless_dev *wdev) +void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) { - struct cfg80211_action_registration *reg, *tmp; + struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->action_registrations_lock); + spin_lock_bh(&wdev->mgmt_registrations_lock); - list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { + list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { list_del(®->list); kfree(reg); } - spin_unlock_bh(&wdev->action_registrations_lock); + spin_unlock_bh(&wdev->mgmt_registrations_lock); } -int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, - const u8 *buf, size_t len, u64 *cookie) +int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + bool channel_type_valid, + const u8 *buf, size_t len, u64 *cookie) { struct wireless_dev *wdev = dev->ieee80211_ptr; const struct ieee80211_mgmt *mgmt; + u16 stype; + + if (!wdev->wiphy->mgmt_stypes) + return -EOPNOTSUPP; - if (rdev->ops->action == NULL) + if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; + if (len < 24 + 1) return -EINVAL; mgmt = (const struct ieee80211_mgmt *) buf; - if (!ieee80211_is_action(mgmt->frame_control)) + + if (!ieee80211_is_mgmt(mgmt->frame_control)) return -EINVAL; - if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { + + stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE; + if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].tx & BIT(stype >> 4))) + return -EINVAL; + + if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { /* Verify that we are associated with the destination AP */ wdev_lock(wdev); @@ -863,64 +896,75 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, return -EINVAL; /* Transmit the Action frame as requested by user space */ - return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, - channel_type_valid, buf, len, cookie); + return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type, + channel_type_valid, buf, len, cookie); } -bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, - size_t len, gfp_t gfp) +bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, + size_t len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - struct cfg80211_action_registration *reg; - const u8 *action_data; - int action_data_len; + struct cfg80211_mgmt_registration *reg; + const struct ieee80211_txrx_stypes *stypes = + &wiphy->mgmt_stypes[wdev->iftype]; + struct ieee80211_mgmt *mgmt = (void *)buf; + const u8 *data; + int data_len; bool result = false; + __le16 ftype = mgmt->frame_control & + cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE); + u16 stype; - /* frame length - min size excluding category */ - action_data_len = len - (IEEE80211_MIN_ACTION_SIZE - 1); + stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4; - /* action data starts with category */ - action_data = buf + IEEE80211_MIN_ACTION_SIZE - 1; + if (!(stypes->rx & BIT(stype))) + return false; - spin_lock_bh(&wdev->action_registrations_lock); + data = buf + ieee80211_hdrlen(mgmt->frame_control); + data_len = len - ieee80211_hdrlen(mgmt->frame_control); + + spin_lock_bh(&wdev->mgmt_registrations_lock); + + list_for_each_entry(reg, &wdev->mgmt_registrations, list) { + if (reg->frame_type != ftype) + continue; - list_for_each_entry(reg, &wdev->action_registrations, list) { - if (reg->match_len > action_data_len) + if (reg->match_len > data_len) continue; - if (memcmp(reg->match, action_data, reg->match_len)) + if (memcmp(reg->match, data, reg->match_len)) continue; /* found match! */ /* Indicate the received Action frame to user space */ - if (nl80211_send_action(rdev, dev, reg->nlpid, freq, - buf, len, gfp)) + if (nl80211_send_mgmt(rdev, dev, reg->nlpid, freq, + buf, len, gfp)) continue; result = true; break; } - spin_unlock_bh(&wdev->action_registrations_lock); + spin_unlock_bh(&wdev->mgmt_registrations_lock); return result; } -EXPORT_SYMBOL(cfg80211_rx_action); +EXPORT_SYMBOL(cfg80211_rx_mgmt); -void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp) +void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); /* Indicate TX status of the Action frame to user space */ - nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp); + nl80211_send_mgmt_tx_status(rdev, dev, cookie, buf, len, ack, gfp); } -EXPORT_SYMBOL(cfg80211_action_tx_status); +EXPORT_SYMBOL(cfg80211_mgmt_tx_status); void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index bb5b78eebeb2..927ffbd2aebc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -156,6 +156,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, + [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, }; /* policy for the attributes */ @@ -437,6 +438,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct ieee80211_rate *rate; int i; u16 ifmodes = dev->wiphy.interface_modes; + const struct ieee80211_txrx_stypes *mgmt_stypes = + dev->wiphy.mgmt_stypes; hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) @@ -587,7 +590,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(flush_pmksa, FLUSH_PMKSA); CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); - CMD(action, ACTION); + CMD(mgmt_tx, FRAME); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -608,6 +611,53 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_cmds); + if (mgmt_stypes) { + u16 stypes; + struct nlattr *nl_ftypes, *nl_ifs; + enum nl80211_iftype ift; + + nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); + if (!nl_ifs) + goto nla_put_failure; + + for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { + nl_ftypes = nla_nest_start(msg, ift); + if (!nl_ftypes) + goto nla_put_failure; + i = 0; + stypes = mgmt_stypes[ift].tx; + while (stypes) { + if (stypes & 1) + NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE, + (i << 4) | IEEE80211_FTYPE_MGMT); + stypes >>= 1; + i++; + } + nla_nest_end(msg, nl_ftypes); + } + + nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); + if (!nl_ifs) + goto nla_put_failure; + + for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { + nl_ftypes = nla_nest_start(msg, ift); + if (!nl_ftypes) + goto nla_put_failure; + i = 0; + stypes = mgmt_stypes[ift].rx; + while (stypes) { + if (stypes & 1) + NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE, + (i << 4) | IEEE80211_FTYPE_MGMT); + stypes >>= 1; + i++; + } + nla_nest_end(msg, nl_ftypes); + } + nla_nest_end(msg, nl_ifs); + } + return genlmsg_end(msg, hdr); nla_put_failure: @@ -4732,17 +4782,18 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, return err; } -static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) +static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; struct net_device *dev; + u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION; int err; if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) return -EINVAL; - if (nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]) < 1) - return -EINVAL; + if (info->attrs[NL80211_ATTR_FRAME_TYPE]) + frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]); rtnl_lock(); @@ -4757,12 +4808,13 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) } /* not much point in registering if we can't reply */ - if (!rdev->ops->action) { + if (!rdev->ops->mgmt_tx) { err = -EOPNOTSUPP; goto out; } - err = cfg80211_mlme_register_action(dev->ieee80211_ptr, info->snd_pid, + err = cfg80211_mlme_register_mgmt(dev->ieee80211_ptr, info->snd_pid, + frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); out: @@ -4773,7 +4825,7 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) return err; } -static int nl80211_action(struct sk_buff *skb, struct genl_info *info) +static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; struct net_device *dev; @@ -4796,7 +4848,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) if (err) goto unlock_rtnl; - if (!rdev->ops->action) { + if (!rdev->ops->mgmt_tx) { err = -EOPNOTSUPP; goto out; } @@ -4839,17 +4891,17 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) } hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, - NL80211_CMD_ACTION); + NL80211_CMD_FRAME); if (IS_ERR(hdr)) { err = PTR_ERR(hdr); goto free_msg; } - err = cfg80211_mlme_action(rdev, dev, chan, channel_type, - channel_type_valid, - nla_data(info->attrs[NL80211_ATTR_FRAME]), - nla_len(info->attrs[NL80211_ATTR_FRAME]), - &cookie); + err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type, + channel_type_valid, + nla_data(info->attrs[NL80211_ATTR_FRAME]), + nla_len(info->attrs[NL80211_ATTR_FRAME]), + &cookie); if (err) goto free_msg; @@ -5348,14 +5400,14 @@ static struct genl_ops nl80211_ops[] = { .flags = GENL_ADMIN_PERM, }, { - .cmd = NL80211_CMD_REGISTER_ACTION, - .doit = nl80211_register_action, + .cmd = NL80211_CMD_REGISTER_FRAME, + .doit = nl80211_register_mgmt, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, { - .cmd = NL80211_CMD_ACTION, - .doit = nl80211_action, + .cmd = NL80211_CMD_FRAME, + .doit = nl80211_tx_mgmt, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, @@ -6055,9 +6107,9 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, nl80211_mlme_mcgrp.id, gfp); } -int nl80211_send_action(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u32 nlpid, - int freq, const u8 *buf, size_t len, gfp_t gfp) +int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u32 nlpid, + int freq, const u8 *buf, size_t len, gfp_t gfp) { struct sk_buff *msg; void *hdr; @@ -6067,7 +6119,7 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION); + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME); if (!hdr) { nlmsg_free(msg); return -ENOMEM; @@ -6095,10 +6147,10 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev, return -ENOBUFS; } -void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, - const u8 *buf, size_t len, bool ack, - gfp_t gfp) +void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u64 cookie, + const u8 *buf, size_t len, bool ack, + gfp_t gfp) { struct sk_buff *msg; void *hdr; @@ -6107,7 +6159,7 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, if (!msg) return; - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION_TX_STATUS); + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME_TX_STATUS); if (!hdr) { nlmsg_free(msg); return; @@ -6194,7 +6246,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) list_for_each_entry_rcu(wdev, &rdev->netdev_list, list) - cfg80211_mlme_unregister_actions(wdev, notify->pid); + cfg80211_mlme_unregister_socket(wdev, notify->pid); rcu_read_unlock(); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 2ad7fbc7d9f1..30d2f939150d 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -74,13 +74,13 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); -int nl80211_send_action(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u32 nlpid, int freq, - const u8 *buf, size_t len, gfp_t gfp); -void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, - const u8 *buf, size_t len, bool ack, - gfp_t gfp); +int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u32 nlpid, int freq, + const u8 *buf, size_t len, gfp_t gfp); +void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u64 cookie, + const u8 *buf, size_t len, bool ack, + gfp_t gfp); void nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/util.c b/net/wireless/util.c index 1eb24162be61..8d961cc4ae98 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -823,7 +823,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, /* monitor can't bridge anyway */ break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: /* not happening */ break; } -- cgit v1.2.3 From d2730b2a6a019d14455556019d744ab051e6554b Mon Sep 17 00:00:00 2001 From: Gábor Stefanik Date: Mon, 16 Aug 2010 22:39:16 +0200 Subject: b43: N-PHY: Implement MAC PHY clock set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gábor Stefanik Signed-off-by: John W. Linville --- drivers/net/wireless/b43/phy_n.c | 13 ++++++++++++- include/linux/ssb/ssb_regs.h | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c index f1b57070ada9..d212726d509b 100644 --- a/drivers/net/wireless/b43/phy_n.c +++ b/drivers/net/wireless/b43/phy_n.c @@ -3074,6 +3074,17 @@ static int b43_nphy_cal_rx_iq(struct b43_wldev *dev, return b43_nphy_rev2_cal_rx_iq(dev, target, type, debug); } +/* http://bcm-v4.sipsolutions.net/802.11/PHY/N/MacPhyClkSet */ +static void b43_nphy_mac_phy_clock_set(struct b43_wldev *dev, bool on) +{ + u32 tmslow = ssb_read32(dev->dev, SSB_TMSLOW); + if (on) + tmslow |= SSB_TMSLOW_PHYCLK; + else + tmslow &= ~SSB_TMSLOW_PHYCLK; + ssb_write32(dev->dev, SSB_TMSLOW, tmslow); +} + /* * Init N-PHY * http://bcm-v4.sipsolutions.net/802.11/PHY/Init/N @@ -3174,7 +3185,7 @@ int b43_phy_initn(struct b43_wldev *dev) b43_phy_write(dev, B43_NPHY_BBCFG, tmp & ~B43_NPHY_BBCFG_RSTCCA); b43_nphy_bmac_clock_fgc(dev, 0); - /* TODO N PHY MAC PHY Clock Set with argument 1 */ + b43_nphy_mac_phy_clock_set(dev, true); b43_nphy_pa_override(dev, false); b43_nphy_force_rf_sequence(dev, B43_RFSEQ_RX2TX); diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index a6d5225b9275..11daf9c140e7 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -97,6 +97,7 @@ #define SSB_TMSLOW_RESET 0x00000001 /* Reset */ #define SSB_TMSLOW_REJECT_22 0x00000002 /* Reject (Backplane rev 2.2) */ #define SSB_TMSLOW_REJECT_23 0x00000004 /* Reject (Backplane rev 2.3) */ +#define SSB_TMSLOW_PHYCLK 0x00000010 /* MAC PHY Clock Control Enable */ #define SSB_TMSLOW_CLOCK 0x00010000 /* Clock Enable */ #define SSB_TMSLOW_FGC 0x00020000 /* Force Gated Clocks On */ #define SSB_TMSLOW_PE 0x40000000 /* Power Management Enable */ -- cgit v1.2.3 From 633dd1ea683d907af944bcd9814092efe9869b05 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Aug 2010 15:01:23 +0200 Subject: mac80211: fix docbook Fix a small problem in the documentation for ieee80211_request_smps, and a now erroneous inclusion of enum ieee80211_key_alg, which no longer exists after the change to ciphers. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/DocBook/80211.tmpl | 1 - include/net/mac80211.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index 6f88e184d2e7..52310ac892ea 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl @@ -248,7 +248,6 @@ MISSING !Finclude/net/mac80211.h set_key_cmd !Finclude/net/mac80211.h ieee80211_key_conf -!Finclude/net/mac80211.h ieee80211_key_alg !Finclude/net/mac80211.h ieee80211_key_flags diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 914c747ac3a0..2a1811366076 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2537,7 +2537,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); /** * ieee80211_request_smps - request SM PS transition * @vif: &struct ieee80211_vif pointer from the add_interface callback. - * @mode: new SM PS mode + * @smps_mode: new SM PS mode * * This allows the driver to request an SM PS transition in managed * mode. This is useful when the driver has more information than -- cgit v1.2.3 From 2a5fb7b088f8418958775774dda9427d6c73c522 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Aug 2010 17:44:36 +0200 Subject: nl80211: some documentation fixes The nl80211 documentation is currently never generated, so problems have accumulated. Fix most of the trivial ones. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 57 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 8af1e66c3cf9..ec1690da7845 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -347,6 +347,8 @@ * four bytes for vendor frames including the OUI. The registration * cannot be dropped, but is removed automatically when the netlink * socket is closed. Multiple registrations can be made. + * @NL80211_CMD_REGISTER_ACTION: Alias for @NL80211_CMD_REGISTER_FRAME for + * backward compatibility * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This * command is used both as a request to transmit a management frame and * as an event indicating reception of a frame that was not processed in @@ -359,11 +361,14 @@ * operational channel). When called, this operation returns a cookie * (%NL80211_ATTR_COOKIE) that will be included with the TX status event * pertaining to the TX request. + * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility. * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies * the TX command and %NL80211_ATTR_FRAME includes the contents of the * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged * the frame. + * @NL80211_CMD_ACTION_TX_STATUS: Alias for @NL80211_CMD_FRAME_TX_STATUS for + * backward compatibility. * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command * is used to configure connection quality monitoring notification trigger * levels. @@ -1029,11 +1034,14 @@ enum nl80211_iftype { * Station flags. When a station is added to an AP interface, it is * assumed to be already associated (and hence authenticated.) * + * @__NL80211_STA_FLAG_INVALID: attribute number 0 is reserved * @NL80211_STA_FLAG_AUTHORIZED: station is authorized (802.1X) * @NL80211_STA_FLAG_SHORT_PREAMBLE: station is capable of receiving frames * with short barker preamble * @NL80211_STA_FLAG_WME: station is WME/QoS capable * @NL80211_STA_FLAG_MFP: station uses management frame protection + * @NL80211_STA_FLAG_MAX: highest station flag number currently defined + * @__NL80211_STA_FLAG_AFTER_LAST: internal use */ enum nl80211_sta_flags { __NL80211_STA_FLAG_INVALID, @@ -1146,14 +1154,17 @@ enum nl80211_mpath_flags { * information about a mesh path. * * @__NL80211_MPATH_INFO_INVALID: attribute number 0 is reserved - * @NL80211_ATTR_MPATH_FRAME_QLEN: number of queued frames for this destination - * @NL80211_ATTR_MPATH_SN: destination sequence number - * @NL80211_ATTR_MPATH_METRIC: metric (cost) of this mesh path - * @NL80211_ATTR_MPATH_EXPTIME: expiration time for the path, in msec from now - * @NL80211_ATTR_MPATH_FLAGS: mesh path flags, enumerated in + * @NL80211_MPATH_INFO_FRAME_QLEN: number of queued frames for this destination + * @NL80211_MPATH_INFO_SN: destination sequence number + * @NL80211_MPATH_INFO_METRIC: metric (cost) of this mesh path + * @NL80211_MPATH_INFO_EXPTIME: expiration time for the path, in msec from now + * @NL80211_MPATH_INFO_FLAGS: mesh path flags, enumerated in * &enum nl80211_mpath_flags; - * @NL80211_ATTR_MPATH_DISCOVERY_TIMEOUT: total path discovery timeout, in msec - * @NL80211_ATTR_MPATH_DISCOVERY_RETRIES: mesh path discovery retries + * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec + * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries + * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number + * currently defind + * @__NL80211_MPATH_INFO_AFTER_LAST: internal use */ enum nl80211_mpath_info { __NL80211_MPATH_INFO_INVALID, @@ -1182,6 +1193,8 @@ enum nl80211_mpath_info { * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n + * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined + * @__NL80211_BAND_ATTR_AFTER_LAST: internal use */ enum nl80211_band_attr { __NL80211_BAND_ATTR_INVALID, @@ -1202,6 +1215,7 @@ enum nl80211_band_attr { /** * enum nl80211_frequency_attr - frequency attributes + * @__NL80211_FREQUENCY_ATTR_INVALID: attribute number 0 is reserved * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz * @NL80211_FREQUENCY_ATTR_DISABLED: Channel is disabled in current * regulatory domain. @@ -1213,6 +1227,9 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm * (100 * dBm). + * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number + * currently defined + * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, @@ -1232,9 +1249,13 @@ enum nl80211_frequency_attr { /** * enum nl80211_bitrate_attr - bitrate attributes + * @__NL80211_BITRATE_ATTR_INVALID: attribute number 0 is reserved * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps * @NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE: Short preamble supported * in 2.4 GHz band. + * @NL80211_BITRATE_ATTR_MAX: highest bitrate attribute number + * currently defined + * @__NL80211_BITRATE_ATTR_AFTER_LAST: internal use */ enum nl80211_bitrate_attr { __NL80211_BITRATE_ATTR_INVALID, @@ -1290,6 +1311,7 @@ enum nl80211_reg_type { /** * enum nl80211_reg_rule_attr - regulatory rule attributes + * @__NL80211_REG_RULE_ATTR_INVALID: attribute number 0 is reserved * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional * considerations for a given frequency range. These are the * &enum nl80211_reg_rule_flags. @@ -1306,6 +1328,9 @@ enum nl80211_reg_type { * If you don't have one then don't send this. * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for * a given frequency range. The value is in mBm (100 * dBm). + * @NL80211_REG_RULE_ATTR_MAX: highest regulatory rule attribute number + * currently defined + * @__NL80211_REG_RULE_ATTR_AFTER_LAST: internal use */ enum nl80211_reg_rule_attr { __NL80211_REG_RULE_ATTR_INVALID, @@ -1357,6 +1382,9 @@ enum nl80211_reg_rule_flags { * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm) + * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number + * currently defined + * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use */ enum nl80211_survey_info { __NL80211_SURVEY_INFO_INVALID, @@ -1521,6 +1549,7 @@ enum nl80211_channel_type { * enum nl80211_bss - netlink attributes for a BSS * * @__NL80211_BSS_INVALID: invalid + * @NL80211_BSS_BSSID: BSSID of the BSS (6 octets) * @NL80211_BSS_FREQUENCY: frequency in MHz (u32) * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64) * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16) @@ -1564,6 +1593,12 @@ enum nl80211_bss { /** * enum nl80211_bss_status - BSS "status" + * @NL80211_BSS_STATUS_AUTHENTICATED: Authenticated with this BSS. + * @NL80211_BSS_STATUS_ASSOCIATED: Associated with this BSS. + * @NL80211_BSS_STATUS_IBSS_JOINED: Joined to this IBSS. + * + * The BSS status is a BSS attribute in scan dumps, which + * indicates the status the interface has wrt. this BSS. */ enum nl80211_bss_status { NL80211_BSS_STATUS_AUTHENTICATED, @@ -1674,8 +1709,8 @@ enum nl80211_tx_rate_attributes { /** * enum nl80211_band - Frequency band - * @NL80211_BAND_2GHZ - 2.4 GHz ISM band - * @NL80211_BAND_5GHZ - around 5 GHz band (4.9 - 5.7 GHz) + * @NL80211_BAND_2GHZ: 2.4 GHz ISM band + * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz) */ enum nl80211_band { NL80211_BAND_2GHZ, @@ -1713,9 +1748,9 @@ enum nl80211_attr_cqm { /** * enum nl80211_cqm_rssi_threshold_event - RSSI threshold event - * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW - The RSSI level is lower than the + * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW: The RSSI level is lower than the * configured threshold - * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH - The RSSI is higher than the + * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH: The RSSI is higher than the * configured threshold */ enum nl80211_cqm_rssi_threshold_event { -- cgit v1.2.3 From d70e96932de55fb2c05b1c0af1dff178651a9b77 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Aug 2010 16:11:27 +0200 Subject: cfg80211: add some documentation Add some documentation for cfg80211. I'm hoping some of the regulatory documentation will be filled by somebody more familiar with it, hint hint! :) Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/DocBook/80211.tmpl | 136 +++++++++++++++++++++++++++++++++++++-- include/net/cfg80211.h | 121 ++++++++++++++++++++++++++++++++-- 2 files changed, 246 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index 52310ac892ea..b84c9282828f 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl @@ -65,18 +65,144 @@ +!Ainclude/net/cfg80211.h The cfg80211 subsystem - -MISSING - +!Pinclude/net/cfg80211.h Introduction - + + Device registration +!Pinclude/net/cfg80211.h Device registration !Finclude/net/cfg80211.h ieee80211_band - +!Finclude/net/cfg80211.h ieee80211_channel_flags +!Finclude/net/cfg80211.h ieee80211_channel +!Finclude/net/cfg80211.h ieee80211_rate_flags +!Finclude/net/cfg80211.h ieee80211_rate +!Finclude/net/cfg80211.h ieee80211_sta_ht_cap +!Finclude/net/cfg80211.h ieee80211_supported_band +!Finclude/net/cfg80211.h cfg80211_signal_type +!Finclude/net/cfg80211.h wiphy_params_flags +!Finclude/net/cfg80211.h wiphy_flags +!Finclude/net/cfg80211.h wiphy +!Finclude/net/cfg80211.h wireless_dev +!Finclude/net/cfg80211.h wiphy_new +!Finclude/net/cfg80211.h wiphy_register +!Finclude/net/cfg80211.h wiphy_unregister +!Finclude/net/cfg80211.h wiphy_free + +!Finclude/net/cfg80211.h wiphy_name +!Finclude/net/cfg80211.h wiphy_dev +!Finclude/net/cfg80211.h wiphy_priv +!Finclude/net/cfg80211.h priv_to_wiphy +!Finclude/net/cfg80211.h set_wiphy_dev +!Finclude/net/cfg80211.h wdev_priv + + + Actions and configuration +!Pinclude/net/cfg80211.h Actions and configuration +!Finclude/net/cfg80211.h cfg80211_ops +!Finclude/net/cfg80211.h vif_params +!Finclude/net/cfg80211.h key_params +!Finclude/net/cfg80211.h survey_info_flags +!Finclude/net/cfg80211.h survey_info +!Finclude/net/cfg80211.h beacon_parameters +!Finclude/net/cfg80211.h plink_actions +!Finclude/net/cfg80211.h station_parameters +!Finclude/net/cfg80211.h station_info_flags +!Finclude/net/cfg80211.h rate_info_flags +!Finclude/net/cfg80211.h rate_info +!Finclude/net/cfg80211.h station_info +!Finclude/net/cfg80211.h monitor_flags +!Finclude/net/cfg80211.h mpath_info_flags +!Finclude/net/cfg80211.h mpath_info +!Finclude/net/cfg80211.h bss_parameters +!Finclude/net/cfg80211.h ieee80211_txq_params +!Finclude/net/cfg80211.h cfg80211_crypto_settings +!Finclude/net/cfg80211.h cfg80211_auth_request +!Finclude/net/cfg80211.h cfg80211_assoc_request +!Finclude/net/cfg80211.h cfg80211_deauth_request +!Finclude/net/cfg80211.h cfg80211_disassoc_request +!Finclude/net/cfg80211.h cfg80211_ibss_params +!Finclude/net/cfg80211.h cfg80211_connect_params +!Finclude/net/cfg80211.h cfg80211_pmksa +!Finclude/net/cfg80211.h cfg80211_send_rx_auth +!Finclude/net/cfg80211.h cfg80211_send_auth_timeout +!Finclude/net/cfg80211.h __cfg80211_auth_canceled +!Finclude/net/cfg80211.h cfg80211_send_rx_assoc +!Finclude/net/cfg80211.h cfg80211_send_assoc_timeout +!Finclude/net/cfg80211.h cfg80211_send_deauth +!Finclude/net/cfg80211.h __cfg80211_send_deauth +!Finclude/net/cfg80211.h cfg80211_send_disassoc +!Finclude/net/cfg80211.h __cfg80211_send_disassoc +!Finclude/net/cfg80211.h cfg80211_ibss_joined +!Finclude/net/cfg80211.h cfg80211_connect_result +!Finclude/net/cfg80211.h cfg80211_roamed +!Finclude/net/cfg80211.h cfg80211_disconnected +!Finclude/net/cfg80211.h cfg80211_ready_on_channel +!Finclude/net/cfg80211.h cfg80211_remain_on_channel_expired +!Finclude/net/cfg80211.h cfg80211_new_sta +!Finclude/net/cfg80211.h cfg80211_rx_mgmt +!Finclude/net/cfg80211.h cfg80211_mgmt_tx_status +!Finclude/net/cfg80211.h cfg80211_cqm_rssi_notify +!Finclude/net/cfg80211.h cfg80211_michael_mic_failure + + + Scanning and BSS list handling +!Pinclude/net/cfg80211.h Scanning and BSS list handling +!Finclude/net/cfg80211.h cfg80211_ssid +!Finclude/net/cfg80211.h cfg80211_scan_request +!Finclude/net/cfg80211.h cfg80211_scan_done +!Finclude/net/cfg80211.h cfg80211_bss +!Finclude/net/cfg80211.h cfg80211_inform_bss_frame +!Finclude/net/cfg80211.h cfg80211_inform_bss +!Finclude/net/cfg80211.h cfg80211_unlink_bss +!Finclude/net/cfg80211.h cfg80211_find_ie +!Finclude/net/cfg80211.h ieee80211_bss_get_ie + + + Utility functions +!Pinclude/net/cfg80211.h Utility functions +!Finclude/net/cfg80211.h ieee80211_channel_to_frequency +!Finclude/net/cfg80211.h ieee80211_frequency_to_channel +!Finclude/net/cfg80211.h ieee80211_get_channel +!Finclude/net/cfg80211.h ieee80211_get_response_rate +!Finclude/net/cfg80211.h ieee80211_hdrlen +!Finclude/net/cfg80211.h ieee80211_get_hdrlen_from_skb +!Finclude/net/cfg80211.h ieee80211_radiotap_iterator + + + Data path helpers +!Pinclude/net/cfg80211.h Data path helpers +!Finclude/net/cfg80211.h ieee80211_data_to_8023 +!Finclude/net/cfg80211.h ieee80211_data_from_8023 +!Finclude/net/cfg80211.h ieee80211_amsdu_to_8023s +!Finclude/net/cfg80211.h cfg80211_classify8021d + + + Regulatory enforcement infrastructure +!Pinclude/net/cfg80211.h Regulatory enforcement infrastructure +!Finclude/net/cfg80211.h regulatory_hint +!Finclude/net/cfg80211.h wiphy_apply_custom_regulatory +!Finclude/net/cfg80211.h freq_reg_info + + + RFkill integration +!Pinclude/net/cfg80211.h RFkill integration +!Finclude/net/cfg80211.h wiphy_rfkill_set_hw_state +!Finclude/net/cfg80211.h wiphy_rfkill_start_polling +!Finclude/net/cfg80211.h wiphy_rfkill_stop_polling + + + Test mode +!Pinclude/net/cfg80211.h Test mode +!Finclude/net/cfg80211.h cfg80211_testmode_alloc_reply_skb +!Finclude/net/cfg80211.h cfg80211_testmode_reply +!Finclude/net/cfg80211.h cfg80211_testmode_alloc_event_skb +!Finclude/net/cfg80211.h cfg80211_testmode_event + diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6a98b1b3bfde..f2740537b5d6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -25,6 +25,43 @@ #include +/** + * DOC: Introduction + * + * cfg80211 is the configuration API for 802.11 devices in Linux. It bridges + * userspace and drivers, and offers some utility functionality associated + * with 802.11. cfg80211 must, directly or indirectly via mac80211, be used + * by all modern wireless drivers in Linux, so that they offer a consistent + * API through nl80211. For backward compatibility, cfg80211 also offers + * wireless extensions to userspace, but hides them from drivers completely. + * + * Additionally, cfg80211 contains code to help enforce regulatory spectrum + * use restrictions. + */ + + +/** + * DOC: Device registration + * + * In order for a driver to use cfg80211, it must register the hardware device + * with cfg80211. This happens through a number of hardware capability structs + * described below. + * + * The fundamental structure for each device is the 'wiphy', of which each + * instance describes a physical wireless device connected to the system. Each + * such wiphy can have zero, one, or many virtual interfaces associated with + * it, which need to be identified as such by pointing the network interface's + * @ieee80211_ptr pointer to a &struct wireless_dev which further describes + * the wireless part of the interface, normally this struct is embedded in the + * network interface's private data area. Drivers can optionally allow creating + * or destroying virtual interfaces on the fly, but without at least one or the + * ability to create some the wireless device isn't useful. + * + * Each wiphy structure contains device capability information, and also has + * a pointer to the various operations the driver offers. The definitions and + * structures here describe these capabilities in detail. + */ + /* * wireless hardware capability structures */ @@ -204,6 +241,21 @@ struct ieee80211_supported_band { * Wireless hardware/device configuration structures and methods */ +/** + * DOC: Actions and configuration + * + * Each wireless device and each virtual interface offer a set of configuration + * operations and other actions that are invoked by userspace. Each of these + * actions is described in the operations structure, and the parameters these + * operations use are described separately. + * + * Additionally, some operations are asynchronous and expect to get status + * information via some functions that drivers need to call. + * + * Scanning and BSS list handling with its associated functionality is described + * in a separate chapter. + */ + /** * struct vif_params - describes virtual interface parameters * @mesh_id: mesh ID to use @@ -570,8 +622,28 @@ struct ieee80211_txq_params { /* from net/wireless.h */ struct wiphy; -/* from net/ieee80211.h */ -struct ieee80211_channel; +/** + * DOC: Scanning and BSS list handling + * + * The scanning process itself is fairly simple, but cfg80211 offers quite + * a bit of helper functionality. To start a scan, the scan operation will + * be invoked with a scan definition. This scan definition contains the + * channels to scan, and the SSIDs to send probe requests for (including the + * wildcard, if desired). A passive scan is indicated by having no SSIDs to + * probe. Additionally, a scan request may contain extra information elements + * that should be added to the probe request. The IEs are guaranteed to be + * well-formed, and will not exceed the maximum length the driver advertised + * in the wiphy structure. + * + * When scanning finds a BSS, cfg80211 needs to be notified of that, because + * it is responsible for maintaining the BSS list; the driver should not + * maintain a list itself. For this notification, various functions exist. + * + * Since drivers do not maintain a BSS list, there are also a number of + * functions to search for a BSS and obtain information about it from the + * BSS structure cfg80211 maintains. The BSS list is also made available + * to userspace. + */ /** * struct cfg80211_ssid - SSID description @@ -1574,8 +1646,10 @@ static inline void *wdev_priv(struct wireless_dev *wdev) return wiphy_priv(wdev->wiphy); } -/* - * Utility functions +/** + * DOC: Utility functions + * + * cfg80211 offers a number of utility functions that can be useful. */ /** @@ -1728,6 +1802,14 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); */ unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc); +/** + * DOC: Data path helpers + * + * In addition to generic utilities, cfg80211 also offers + * functions that help implement the data path for devices + * that do not do the 802.11/802.3 conversion on the device. + */ + /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 * @skb: the 802.11 data frame @@ -1788,8 +1870,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb); */ const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); -/* - * Regulatory helper functions for wiphys +/** + * DOC: Regulatory enforcement infrastructure + * + * TODO */ /** @@ -2191,6 +2275,20 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, */ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp); +/** + * DOC: RFkill integration + * + * RFkill integration in cfg80211 is almost invisible to drivers, + * as cfg80211 automatically registers an rfkill instance for each + * wireless device it knows about. Soft kill is also translated + * into disconnecting and turning all interfaces off, drivers are + * expected to turn off the device when all interfaces are down. + * + * However, devices may have a hard RFkill line, in which case they + * also need to interact with the rfkill subsystem, via cfg80211. + * They can do this with a few helper functions documented here. + */ + /** * wiphy_rfkill_set_hw_state - notify cfg80211 about hw block state * @wiphy: the wiphy @@ -2211,6 +2309,17 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy); void wiphy_rfkill_stop_polling(struct wiphy *wiphy); #ifdef CONFIG_NL80211_TESTMODE +/** + * DOC: Test mode + * + * Test mode is a set of utility functions to allow drivers to + * interact with driver-specific tools to aid, for instance, + * factory programming. + * + * This chapter describes how drivers interact with it, for more + * information see the nl80211 book's chapter on it. + */ + /** * cfg80211_testmode_alloc_reply_skb - allocate testmode reply * @wiphy: the wiphy -- cgit v1.2.3 From 2b8fd9186d9275b07aef43e5bb4e98cd571f9a7d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 23 Aug 2010 23:55:59 +0200 Subject: ACPI/PCI: Do not preserve _OSC control bits returned by a query There is the assumption in acpi_pci_osc_control_set() that it is always sufficient to compare the mask of _OSC control bits to be requested with the result of an _OSC query where all of the known control bits have been checked. However, in general, that need not be the case. For example, if an _OSC feature A depends on an _OSC feature B and control of A, B plus another _OSC feature C is requested simultaneously, the BIOS may return A, B, C, while it would only return C if A and C were requested without B. That may result in passing a wrong mask of _OSC control bits to an _OSC control request, in which case the BIOS may only grant control of a subset of the requested features. Moreover, acpi_pci_run_osc() will return error code if that happens and the caller of acpi_pci_osc_control_set() will not know that it's been granted control of some _OSC features. Consequently, the system will generally not work as expected. Apart from this acpi_pci_osc_control_set() always uses the mask of _OSC control bits returned by the very first invocation of acpi_pci_query_osc(), but that is done with the second argument equal to OSC_PCI_SEGMENT_GROUPS_SUPPORT which generally happens to affect the returned _OSC control bits. For these reasons, make acpi_pci_osc_control_set() always check if control of the requested _OSC features will be granted before making the final control request. As a result, the osc_control_qry and osc_queried members of struct acpi_pci_root are not necessary any more, so drop them and remove the remaining code referring to them. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/acpi/pci_root.c | 20 +++++++------------- include/acpi/acpi_bus.h | 3 --- 2 files changed, 7 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index d2ae816df0f5..77cd19697b1e 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -249,12 +249,8 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, status = acpi_pci_run_osc(root->device->handle, capbuf, &result); if (ACPI_SUCCESS(status)) { root->osc_support_set = support; - if (control) { + if (control) *control = result; - } else { - root->osc_control_qry = result; - root->osc_queried = 1; - } } return status; } @@ -409,14 +405,12 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags) goto out; /* Need to query controls first before requesting them */ - if (!root->osc_queried) { - status = acpi_pci_query_osc(root, root->osc_support_set, NULL); - if (ACPI_FAILURE(status)) - goto out; - } - if ((root->osc_control_qry & control_req) != control_req) { - printk(KERN_DEBUG - "Firmware did not grant requested _OSC control\n"); + flags = control_req; + status = acpi_pci_query_osc(root, root->osc_support_set, &flags); + if (ACPI_FAILURE(status)) + goto out; + + if (flags != control_req) { status = AE_SUPPORT; goto out; } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index baacd98e7cc6..4de84ce3a927 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -377,9 +377,6 @@ struct acpi_pci_root { u32 osc_support_set; /* _OSC state of support bits */ u32 osc_control_set; /* _OSC state of control bits */ - u32 osc_control_qry; /* the latest _OSC query result */ - - u32 osc_queried:1; /* has _OSC control been queried? */ }; /* helper */ -- cgit v1.2.3 From 75fb60f26befb59dbfa05cb122972642b7bdd219 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 23 Aug 2010 23:53:11 +0200 Subject: ACPI/PCI: Negotiate _OSC control bits before requesting them It is possible that the BIOS will not grant control of all _OSC features requested via acpi_pci_osc_control_set(), so it is recommended to negotiate the final set of _OSC features with the query flag set before calling _OSC to request control of these features. To implement it, rework acpi_pci_osc_control_set() so that the caller can specify the mask of _OSC control bits to negotiate and the mask of _OSC control bits that are absolutely necessary to it. Then, acpi_pci_osc_control_set() will run _OSC queries in a loop until the mask of _OSC control bits returned by the BIOS is equal to the mask passed to it. Also, before running the _OSC request acpi_pci_osc_control_set() will check if the caller's required control bits are present in the final mask. Using this mechanism we will be able to avoid situations in which the BIOS doesn't grant control of certain _OSC features, because they depend on some other _OSC features that have not been requested. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/acpi/pci_root.c | 59 +++++++++++++++++++++++------------- drivers/pci/hotplug/acpi_pcihp.c | 2 +- drivers/pci/pcie/aer/aerdrv_acpi.c | 6 ++-- drivers/pci/pcie/pme/pcie_pme_acpi.c | 8 +++-- include/linux/acpi.h | 4 +-- 5 files changed, 49 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 77cd19697b1e..c34713112520 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -374,21 +374,32 @@ out: EXPORT_SYMBOL_GPL(acpi_get_pci_dev); /** - * acpi_pci_osc_control_set - commit requested control to Firmware - * @handle: acpi_handle for the target ACPI object - * @flags: driver's requested control bits + * acpi_pci_osc_control_set - Request control of PCI root _OSC features. + * @handle: ACPI handle of a PCI root bridge (or PCIe Root Complex). + * @mask: Mask of _OSC bits to request control of, place to store control mask. + * @req: Mask of _OSC bits the control of is essential to the caller. * - * Attempt to take control from Firmware on requested control bits. + * Run _OSC query for @mask and if that is successful, compare the returned + * mask of control bits with @req. If all of the @req bits are set in the + * returned mask, run _OSC request for it. + * + * The variable at the @mask address may be modified regardless of whether or + * not the function returns success. On success it will contain the mask of + * _OSC bits the BIOS has granted control of, but its contents are meaningless + * on failure. **/ -acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags) +acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req) { + struct acpi_pci_root *root; acpi_status status; - u32 control_req, result, capbuf[3]; + u32 ctrl, capbuf[3]; acpi_handle tmp; - struct acpi_pci_root *root; - control_req = (flags & OSC_PCI_CONTROL_MASKS); - if (!control_req) + if (!mask) + return AE_BAD_PARAMETER; + + ctrl = *mask & OSC_PCI_CONTROL_MASKS; + if ((ctrl & req) != req) return AE_TYPE; root = acpi_pci_find_root(handle); @@ -400,27 +411,33 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags) return status; mutex_lock(&osc_lock); + + *mask = ctrl | root->osc_control_set; /* No need to evaluate _OSC if the control was already granted. */ - if ((root->osc_control_set & control_req) == control_req) + if ((root->osc_control_set & ctrl) == ctrl) goto out; - /* Need to query controls first before requesting them */ - flags = control_req; - status = acpi_pci_query_osc(root, root->osc_support_set, &flags); - if (ACPI_FAILURE(status)) - goto out; + /* Need to check the available controls bits before requesting them. */ + while (*mask) { + status = acpi_pci_query_osc(root, root->osc_support_set, mask); + if (ACPI_FAILURE(status)) + goto out; + if (ctrl == *mask) + break; + ctrl = *mask; + } - if (flags != control_req) { + if ((ctrl & req) != req) { status = AE_SUPPORT; goto out; } capbuf[OSC_QUERY_TYPE] = 0; capbuf[OSC_SUPPORT_TYPE] = root->osc_support_set; - capbuf[OSC_CONTROL_TYPE] = root->osc_control_set | control_req; - status = acpi_pci_run_osc(handle, capbuf, &result); + capbuf[OSC_CONTROL_TYPE] = ctrl; + status = acpi_pci_run_osc(handle, capbuf, mask); if (ACPI_SUCCESS(status)) - root->osc_control_set = result; + root->osc_control_set = *mask; out: mutex_unlock(&osc_lock); return status; @@ -551,8 +568,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) if (flags != base_flags) acpi_pci_osc_support(root, flags); - status = acpi_pci_osc_control_set(root->device->handle, - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + flags = OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL; + status = acpi_pci_osc_control_set(root->device->handle, &flags, flags); if (ACPI_FAILURE(status)) { printk(KERN_INFO "Unable to assume PCIe control: Disabling ASPM\n"); diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index 45fcc1e96df9..3d93d529a7bd 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -360,7 +360,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags) acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); dbg("Trying to get hotplug control for %s\n", (char *)string.pointer); - status = acpi_pci_osc_control_set(handle, flags); + status = acpi_pci_osc_control_set(handle, &flags, flags); if (ACPI_SUCCESS(status)) goto got_one; if (status == AE_SUPPORT) diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index f278d7b0d95d..3a276a0cea93 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -39,9 +39,9 @@ int aer_osc_setup(struct pcie_device *pciedev) handle = acpi_find_root_bridge_handle(pdev); if (handle) { - status = acpi_pci_osc_control_set(handle, - OSC_PCI_EXPRESS_AER_CONTROL | - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + u32 flags = OSC_PCI_EXPRESS_AER_CONTROL | + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL; + status = acpi_pci_osc_control_set(handle, &flags, flags); } if (ACPI_FAILURE(status)) { diff --git a/drivers/pci/pcie/pme/pcie_pme_acpi.c b/drivers/pci/pcie/pme/pcie_pme_acpi.c index 83ab2287ae3f..be20222b12d3 100644 --- a/drivers/pci/pcie/pme/pcie_pme_acpi.c +++ b/drivers/pci/pcie/pme/pcie_pme_acpi.c @@ -28,6 +28,7 @@ int pcie_pme_acpi_setup(struct pcie_device *srv) acpi_status status = AE_NOT_FOUND; struct pci_dev *port = srv->port; acpi_handle handle; + u32 flags; int error = 0; if (acpi_pci_disabled) @@ -39,9 +40,10 @@ int pcie_pme_acpi_setup(struct pcie_device *srv) if (!handle) return -EINVAL; - status = acpi_pci_osc_control_set(handle, - OSC_PCI_EXPRESS_PME_CONTROL | - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + flags = OSC_PCI_EXPRESS_PME_CONTROL | + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL; + + status = acpi_pci_osc_control_set(handle, &flags, flags); if (ACPI_FAILURE(status)) { dev_info(&port->dev, "Failed to receive control of PCIe PME service: %s\n", diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ccf94dc5acdf..c227757feb06 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -304,8 +304,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); OSC_PCI_EXPRESS_PME_CONTROL | \ OSC_PCI_EXPRESS_AER_CONTROL | \ OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) - -extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags); +extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, + u32 *mask, u32 req); extern void acpi_early_init(void); #else /* !CONFIG_ACPI */ -- cgit v1.2.3 From 5a46790ca4c40fdb6ed5336d7d6b593c96326b31 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 24 Aug 2010 14:46:53 -0700 Subject: include/linux/if_ether.h: Remove unused #define MAC_FMT Last use was removed, so remove the #define. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/if_ether.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index bed7a4682b90..f9c3df03db0f 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -137,8 +137,6 @@ extern struct ctl_table ether_table[]; extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len); -#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x" - #endif #endif /* _LINUX_IF_ETHER_H */ -- cgit v1.2.3 From c2e3143e3c46ede22336316b3ff4746727c0d93a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 24 Aug 2010 14:48:10 -0700 Subject: tc: add meta match on receive hash Trivial extension to existing meta data match rules to allow matching on skb receive hash value. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/tc_ematch/tc_em_meta.h | 1 + net/sched/em_meta.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h index 0864206ec1a3..7138962664f8 100644 --- a/include/linux/tc_ematch/tc_em_meta.h +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -79,6 +79,7 @@ enum { TCF_META_ID_SK_SENDMSG_OFF, TCF_META_ID_SK_WRITE_PENDING, TCF_META_ID_VLAN_TAG, + TCF_META_ID_RXHASH, __TCF_META_ID_MAX }; #define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1) diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 3bcac8aa333c..34da5e29ea1a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -223,6 +223,11 @@ META_COLLECTOR(int_maclen) dst->value = skb->mac_len; } +META_COLLECTOR(int_rxhash) +{ + dst->value = skb_get_rxhash(skb); +} + /************************************************************************** * Netfilter **************************************************************************/ @@ -541,6 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag), + [META_ID(RXHASH)] = META_FUNC(int_rxhash), } }; -- cgit v1.2.3 From e7c1c2c46201e46f8ce817196507d2ffd3dafd8e Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 24 Aug 2010 03:46:18 +0000 Subject: mlx4_en: Added self diagnostics test implementation The selftest includes 5 features: 1. Interrupt test: Executing commands and receiving command completion on all our interrupt vectors. 2. Link test: Verifying we are connected to valid link partner. 3. Speed test: Check that we negotiated link speed correctly. 4. Registers test: Activate HW health check command. 5. Loopback test: Send a packet on loopback interface and catch it on RX side. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- drivers/net/mlx4/Makefile | 2 +- drivers/net/mlx4/en_ethtool.c | 79 ++++++++++++------ drivers/net/mlx4/en_netdev.c | 2 +- drivers/net/mlx4/en_port.c | 32 ++++++++ drivers/net/mlx4/en_port.h | 14 ++++ drivers/net/mlx4/en_rx.c | 20 +++++ drivers/net/mlx4/en_selftest.c | 179 +++++++++++++++++++++++++++++++++++++++++ drivers/net/mlx4/en_tx.c | 16 ++++ drivers/net/mlx4/eq.c | 44 ++++++++++ drivers/net/mlx4/fw.c | 3 + drivers/net/mlx4/fw.h | 1 + drivers/net/mlx4/main.c | 1 + drivers/net/mlx4/mlx4_en.h | 19 +++++ include/linux/mlx4/cmd.h | 1 + include/linux/mlx4/device.h | 2 + 15 files changed, 388 insertions(+), 27 deletions(-) create mode 100644 drivers/net/mlx4/en_selftest.c (limited to 'include') diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index 1fd068e1d930..d1aa45a15854 100644 --- a/drivers/net/mlx4/Makefile +++ b/drivers/net/mlx4/Makefile @@ -6,4 +6,4 @@ mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \ obj-$(CONFIG_MLX4_EN) += mlx4_en.o mlx4_en-y := en_main.o en_tx.o en_rx.o en_ethtool.o en_port.o en_cq.o \ - en_resources.o en_netdev.o + en_resources.o en_netdev.o en_selftest.o diff --git a/drivers/net/mlx4/en_ethtool.c b/drivers/net/mlx4/en_ethtool.c index 398d54136967..f7d72d7a8704 100644 --- a/drivers/net/mlx4/en_ethtool.c +++ b/drivers/net/mlx4/en_ethtool.c @@ -125,6 +125,14 @@ static const char main_strings[][ETH_GSTRING_LEN] = { #define NUM_MAIN_STATS 21 #define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + NUM_PERF_STATS) +static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= { + "Interupt Test", + "Link Test", + "Speed Test", + "Register Test", + "Loopback Test", +}; + static u32 mlx4_en_get_msglevel(struct net_device *dev) { return ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable; @@ -146,10 +154,15 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset) { struct mlx4_en_priv *priv = netdev_priv(dev); - if (sset != ETH_SS_STATS) + switch (sset) { + case ETH_SS_STATS: + return NUM_ALL_STATS + + (priv->tx_ring_num + priv->rx_ring_num) * 2; + case ETH_SS_TEST: + return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.loopback_support) * 2; + default: return -EOPNOTSUPP; - - return NUM_ALL_STATS + (priv->tx_ring_num + priv->rx_ring_num) * 2; + } } static void mlx4_en_get_ethtool_stats(struct net_device *dev, @@ -181,6 +194,12 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, } +static void mlx4_en_self_test(struct net_device *dev, + struct ethtool_test *etest, u64 *buf) +{ + mlx4_en_ex_selftest(dev, &etest->flags, buf); +} + static void mlx4_en_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { @@ -188,30 +207,39 @@ static void mlx4_en_get_strings(struct net_device *dev, int index = 0; int i; - if (stringset != ETH_SS_STATS) - return; - - /* Add main counters */ - for (i = 0; i < NUM_MAIN_STATS; i++) - strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]); - for (i = 0; i < NUM_PORT_STATS; i++) - strcpy(data + (index++) * ETH_GSTRING_LEN, + switch (stringset) { + case ETH_SS_TEST: + for (i = 0; i < MLX4_EN_NUM_SELF_TEST - 2; i++) + strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]); + if (priv->mdev->dev->caps.loopback_support) + for (; i < MLX4_EN_NUM_SELF_TEST; i++) + strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]); + break; + + case ETH_SS_STATS: + /* Add main counters */ + for (i = 0; i < NUM_MAIN_STATS; i++) + strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]); + for (i = 0; i< NUM_PORT_STATS; i++) + strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i + NUM_MAIN_STATS]); - for (i = 0; i < priv->tx_ring_num; i++) { - sprintf(data + (index++) * ETH_GSTRING_LEN, - "tx%d_packets", i); - sprintf(data + (index++) * ETH_GSTRING_LEN, - "tx%d_bytes", i); - } - for (i = 0; i < priv->rx_ring_num; i++) { - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_packets", i); - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_bytes", i); - } - for (i = 0; i < NUM_PKT_STATS; i++) - strcpy(data + (index++) * ETH_GSTRING_LEN, + for (i = 0; i < priv->tx_ring_num; i++) { + sprintf(data + (index++) * ETH_GSTRING_LEN, + "tx%d_packets", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "tx%d_bytes", i); + } + for (i = 0; i < priv->rx_ring_num; i++) { + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_packets", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_bytes", i); + } + for (i = 0; i< NUM_PKT_STATS; i++) + strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i + NUM_MAIN_STATS + NUM_PORT_STATS]); + break; + } } static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) @@ -439,6 +467,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .get_strings = mlx4_en_get_strings, .get_sset_count = mlx4_en_get_sset_count, .get_ethtool_stats = mlx4_en_get_ethtool_stats, + .self_test = mlx4_en_self_test, .get_wol = mlx4_en_get_wol, .get_msglevel = mlx4_en_get_msglevel, .set_msglevel = mlx4_en_set_msglevel, diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c index 34cfa3cfbdae..968e75b7acf4 100644 --- a/drivers/net/mlx4/en_netdev.c +++ b/drivers/net/mlx4/en_netdev.c @@ -109,7 +109,7 @@ static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) mutex_unlock(&mdev->state_lock); } -static u64 mlx4_en_mac_to_u64(u8 *addr) +u64 mlx4_en_mac_to_u64(u8 *addr) { u64 mac = 0; int i; diff --git a/drivers/net/mlx4/en_port.c b/drivers/net/mlx4/en_port.c index a29abe845d2e..aa3ef2aee5bf 100644 --- a/drivers/net/mlx4/en_port.c +++ b/drivers/net/mlx4/en_port.c @@ -142,6 +142,38 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, return err; } +int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port) +{ + struct mlx4_en_query_port_context *qport_context; + struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]); + struct mlx4_en_port_state *state = &priv->port_state; + struct mlx4_cmd_mailbox *mailbox; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + memset(mailbox->buf, 0, sizeof(*qport_context)); + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0, + MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B); + if (err) + goto out; + qport_context = mailbox->buf; + + /* This command is always accessed from Ethtool context + * already synchronized, no need in locking */ + state->link_state = !!(qport_context->link_up & MLX4_EN_LINK_UP_MASK); + if ((qport_context->link_speed & MLX4_EN_SPEED_MASK) == + MLX4_EN_1G_SPEED) + state->link_speed = 1000; + else + state->link_speed = 10000; + state->transciver = qport_context->transceiver; + +out: + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return err; +} int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) { diff --git a/drivers/net/mlx4/en_port.h b/drivers/net/mlx4/en_port.h index e6477f12beb5..f6511aa2b7df 100644 --- a/drivers/net/mlx4/en_port.h +++ b/drivers/net/mlx4/en_port.h @@ -84,6 +84,20 @@ enum { MLX4_MCAST_ENABLE = 2, }; +struct mlx4_en_query_port_context { + u8 link_up; +#define MLX4_EN_LINK_UP_MASK 0x80 + u8 reserved; + __be16 mtu; + u8 reserved2; + u8 link_speed; +#define MLX4_EN_SPEED_MASK 0x3 +#define MLX4_EN_1G_SPEED 0x2 + u16 reserved3[5]; + __be64 mac; + u8 transceiver; +}; + struct mlx4_en_stat_out_mbox { /* Received frames with a length of 64 octets */ diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c index 7a5123c4a366..f421a3d42723 100644 --- a/drivers/net/mlx4/en_rx.c +++ b/drivers/net/mlx4/en_rx.c @@ -541,6 +541,21 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, return skb; } +static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb) +{ + int i; + int offset = ETH_HLEN; + + for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) { + if (*(skb->data + offset) != (unsigned char) (i & 0xff)) + goto out_loopback; + } + /* Loopback found */ + priv->loopback_ok = 1; + +out_loopback: + dev_kfree_skb_any(skb); +} int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { @@ -655,6 +670,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud goto next; } + if (unlikely(priv->validate_loopback)) { + validate_loopback(priv, skb); + goto next; + } + skb->ip_summed = ip_summed; skb->protocol = eth_type_trans(skb, dev); skb_record_rx_queue(skb, cq->ring); diff --git a/drivers/net/mlx4/en_selftest.c b/drivers/net/mlx4/en_selftest.c new file mode 100644 index 000000000000..43357d35616a --- /dev/null +++ b/drivers/net/mlx4/en_selftest.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include +#include +#include +#include +#include + +#include "mlx4_en.h" + + +static int mlx4_en_test_registers(struct mlx4_en_priv *priv) +{ + return mlx4_cmd(priv->mdev->dev, 0, 0, 0, MLX4_CMD_HW_HEALTH_CHECK, + MLX4_CMD_TIME_CLASS_A); +} + +static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv) +{ + struct sk_buff *skb; + struct ethhdr *ethh; + unsigned char *packet; + unsigned int packet_size = MLX4_LOOPBACK_TEST_PAYLOAD; + unsigned int i; + int err; + + + /* build the pkt before xmit */ + skb = netdev_alloc_skb(priv->dev, MLX4_LOOPBACK_TEST_PAYLOAD + ETH_HLEN + NET_IP_ALIGN); + if (!skb) { + en_err(priv, "-LOOPBACK_TEST_XMIT- failed to create skb for xmit\n"); + return -ENOMEM; + } + skb_reserve(skb, NET_IP_ALIGN); + + ethh = (struct ethhdr *)skb_put(skb, sizeof(struct ethhdr)); + packet = (unsigned char *)skb_put(skb, packet_size); + memcpy(ethh->h_dest, priv->dev->dev_addr, ETH_ALEN); + memset(ethh->h_source, 0, ETH_ALEN); + ethh->h_proto = htons(ETH_P_ARP); + skb_set_mac_header(skb, 0); + for (i = 0; i < packet_size; ++i) /* fill our packet */ + packet[i] = (unsigned char)(i & 0xff); + + /* xmit the pkt */ + err = mlx4_en_xmit(skb, priv->dev); + return err; +} + +static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) +{ + u32 loopback_ok = 0; + int i; + + + priv->loopback_ok = 0; + priv->validate_loopback = 1; + + /* xmit */ + if (mlx4_en_test_loopback_xmit(priv)) { + en_err(priv, "Transmitting loopback packet failed\n"); + goto mlx4_en_test_loopback_exit; + } + + /* polling for result */ + for (i = 0; i < MLX4_EN_LOOPBACK_RETRIES; ++i) { + msleep(MLX4_EN_LOOPBACK_TIMEOUT); + if (priv->loopback_ok) { + loopback_ok = 1; + break; + } + } + if (!loopback_ok) + en_err(priv, "Loopback packet didn't arrive\n"); + +mlx4_en_test_loopback_exit: + + priv->validate_loopback = 0; + return (!loopback_ok); +} + + +static int mlx4_en_test_link(struct mlx4_en_priv *priv) +{ + if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) + return -ENOMEM; + if (priv->port_state.link_state == 1) + return 0; + else + return 1; +} + +static int mlx4_en_test_speed(struct mlx4_en_priv *priv) +{ + + if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) + return -ENOMEM; + + /* The device currently only supports 10G speed */ + if (priv->port_state.link_speed != SPEED_10000) + return priv->port_state.link_speed; + return 0; +} + + +void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_ring *tx_ring; + int i, carrier_ok; + + memset(buf, 0, sizeof(u64) * MLX4_EN_NUM_SELF_TEST); + + if (*flags & ETH_TEST_FL_OFFLINE) { + /* disable the interface */ + carrier_ok = netif_carrier_ok(dev); + + netif_carrier_off(dev); +retry_tx: + /* Wait untill all tx queues are empty. + * there should not be any additional incoming traffic + * since we turned the carrier off */ + msleep(200); + for (i = 0; i < priv->tx_ring_num && carrier_ok; i++) { + tx_ring = &priv->tx_ring[i]; + if (tx_ring->prod != (tx_ring->cons + tx_ring->last_nr_txbb)) + goto retry_tx; + } + + if (priv->mdev->dev->caps.loopback_support){ + buf[3] = mlx4_en_test_registers(priv); + buf[4] = mlx4_en_test_loopback(priv); + } + + if (carrier_ok) + netif_carrier_on(dev); + + } + buf[0] = mlx4_test_interrupts(mdev->dev); + buf[1] = mlx4_en_test_link(priv); + buf[2] = mlx4_en_test_speed(priv); + + for (i = 0; i < MLX4_EN_NUM_SELF_TEST; i++) { + if (buf[i]) + *flags |= ETH_TEST_FL_FAILED; + } +} diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c index 3deabd1d0357..b875f9c38848 100644 --- a/drivers/net/mlx4/en_tx.c +++ b/drivers/net/mlx4/en_tx.c @@ -600,6 +600,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) struct mlx4_wqe_data_seg *data; struct skb_frag_struct *frag; struct mlx4_en_tx_info *tx_info; + struct ethhdr *ethh; + u64 mac; + u32 mac_l, mac_h; int tx_ind = 0; int nr_txbb; int desc_size; @@ -679,6 +682,19 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) priv->port_stats.tx_chksum_offload++; } + if (unlikely(priv->validate_loopback)) { + /* Copy dst mac address to wqe */ + skb_reset_mac_header(skb); + ethh = eth_hdr(skb); + if (ethh && ethh->h_dest) { + mac = mlx4_en_mac_to_u64(ethh->h_dest); + mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16); + mac_l = (u32) (mac & 0xffffffff); + tx_desc->ctrl.srcrb_flags |= cpu_to_be32(mac_h); + tx_desc->ctrl.imm = cpu_to_be32(mac_l); + } + } + /* Handle LSO (TSO) packets */ if (lso_header_size) { /* Mark opcode as LSO */ diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index 6d7b2bf210ce..552d0fce6f67 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -699,3 +699,47 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) kfree(priv->eq_table.uar_map); } + +/* A test that verifies that we can accept interrupts on all + * the irq vectors of the device. + * Interrupts are checked using the NOP command. + */ +int mlx4_test_interrupts(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int i; + int err; + + err = mlx4_NOP(dev); + /* When not in MSI_X, there is only one irq to check */ + if (!(dev->flags & MLX4_FLAG_MSI_X)) + return err; + + /* A loop over all completion vectors, for each vector we will check + * whether it works by mapping command completions to that vector + * and performing a NOP command + */ + for(i = 0; !err && (i < dev->caps.num_comp_vectors); ++i) { + /* Temporary use polling for command completions */ + mlx4_cmd_use_polling(dev); + + /* Map the new eq to handle all asyncronous events */ + err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, + priv->eq_table.eq[i].eqn); + if (err) { + mlx4_warn(dev, "Failed mapping eq for interrupt test\n"); + mlx4_cmd_use_events(dev); + break; + } + + /* Go back to using events */ + mlx4_cmd_use_events(dev); + err = mlx4_NOP(dev); + } + + /* Return to default */ + mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0, + priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); + return err; +} +EXPORT_SYMBOL(mlx4_test_interrupts); diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index 04f42ae1eda0..a87bf3c97055 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -178,6 +178,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c #define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f +#define QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET 0x43 #define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48 #define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49 @@ -268,6 +269,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_msg_sz = 1 << (field & 0x1f); MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; + MLX4_GET(field, outbox, QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET); + dev_cap->loopback_support = field & 0x1; MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET); dev_cap->reserved_uars = field >> 4; diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index 526d7f30c041..2cc1ba5452e3 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -74,6 +74,7 @@ struct mlx4_dev_cap { u64 def_mac[MLX4_MAX_PORTS + 1]; u16 eth_mtu[MLX4_MAX_PORTS + 1]; u16 stat_rate_support; + int loopback_support; u32 flags; int reserved_uars; int uar_size; diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 5102ab1ac561..7390cdb19414 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -221,6 +221,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.bmme_flags = dev_cap->bmme_flags; dev->caps.reserved_lkey = dev_cap->reserved_lkey; dev->caps.stat_rate_support = dev_cap->stat_rate_support; + dev->caps.loopback_support = dev_cap->loopback_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.log_num_macs = log_num_mac; diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h index edf6523702c0..a09598b2b12e 100644 --- a/drivers/net/mlx4/mlx4_en.h +++ b/drivers/net/mlx4/mlx4_en.h @@ -45,6 +45,7 @@ #include #include #include +#include #include "en_port.h" @@ -139,10 +140,14 @@ enum { #define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN) #define HEADER_COPY_SIZE (128 - NET_IP_ALIGN) +#define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN) #define MLX4_EN_MIN_MTU 46 #define ETH_BCAST 0xffffffffffffULL +#define MLX4_EN_LOOPBACK_RETRIES 5 +#define MLX4_EN_LOOPBACK_TIMEOUT 100 + #ifdef MLX4_EN_PERF_STAT /* Number of samples to 'average' */ #define AVG_SIZE 128 @@ -356,6 +361,12 @@ struct mlx4_en_rss_context { __be32 rss_key[10]; }; +struct mlx4_en_port_state { + int link_state; + int link_speed; + int transciver; +}; + struct mlx4_en_pkt_stats { unsigned long broadcast; unsigned long rx_prio[8]; @@ -404,6 +415,7 @@ struct mlx4_en_priv { struct vlan_group *vlgrp; struct net_device_stats stats; struct net_device_stats ret_stats; + struct mlx4_en_port_state port_state; spinlock_t stats_lock; unsigned long last_moder_packets; @@ -422,6 +434,8 @@ struct mlx4_en_priv { u16 sample_interval; u16 adaptive_rx_coal; u32 msg_enable; + u32 loopback_ok; + u32 validate_loopback; struct mlx4_hwq_resources res; int link_state; @@ -530,6 +544,11 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset); +int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port); + +#define MLX4_EN_NUM_SELF_TEST 5 +void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf); +u64 mlx4_en_mac_to_u64(u8 *addr); /* * Globals diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 0f82293a82ed..78a1b9671752 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -56,6 +56,7 @@ enum { MLX4_CMD_QUERY_HCA = 0xb, MLX4_CMD_QUERY_PORT = 0x43, MLX4_CMD_SENSE_PORT = 0x4d, + MLX4_CMD_HW_HEALTH_CHECK = 0x50, MLX4_CMD_SET_PORT = 0xc, MLX4_CMD_ACCESS_DDR = 0x2e, MLX4_CMD_MAP_ICM = 0xffa, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7a7f9c1e679a..2cec58722738 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -229,6 +229,7 @@ struct mlx4_caps { u32 bmme_flags; u32 reserved_lkey; u16 stat_rate_support; + int loopback_support; u8 port_width_cap[MLX4_MAX_PORTS + 1]; int max_gso_sz; int reserved_qps_cnt[MLX4_NUM_QP_REGION]; @@ -480,5 +481,6 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u32 *lkey, u32 *rkey); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); +int mlx4_test_interrupts(struct mlx4_dev *dev); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 7699517db435fd24143bd32dd644275e3eeb4c86 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 24 Aug 2010 03:46:23 +0000 Subject: mlx4_en: Fixing report in Ethtool get_settings The report now based on query from FW, giving the correct tranciever type and link speed. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- drivers/net/mlx4/en_ethtool.c | 27 +++++++++++++++++++++++++-- drivers/net/mlx4/fw.c | 9 +++++++++ drivers/net/mlx4/fw.h | 4 ++++ drivers/net/mlx4/main.c | 4 ++++ include/linux/mlx4/device.h | 4 ++++ 5 files changed, 46 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/mlx4/en_ethtool.c b/drivers/net/mlx4/en_ethtool.c index f7d72d7a8704..fa2f2f43ab48 100644 --- a/drivers/net/mlx4/en_ethtool.c +++ b/drivers/net/mlx4/en_ethtool.c @@ -244,16 +244,39 @@ static void mlx4_en_get_strings(struct net_device *dev, static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { + struct mlx4_en_priv *priv = netdev_priv(dev); + int trans_type; + cmd->autoneg = AUTONEG_DISABLE; cmd->supported = SUPPORTED_10000baseT_Full; - cmd->advertising = ADVERTISED_1000baseT_Full; + cmd->advertising = ADVERTISED_10000baseT_Full; + + if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) + return -ENOMEM; + + trans_type = priv->port_state.transciver; if (netif_carrier_ok(dev)) { - cmd->speed = SPEED_10000; + cmd->speed = priv->port_state.link_speed; cmd->duplex = DUPLEX_FULL; } else { cmd->speed = -1; cmd->duplex = -1; } + + if (trans_type > 0 && trans_type <= 0xC) { + cmd->port = PORT_FIBRE; + cmd->transceiver = XCVR_EXTERNAL; + cmd->supported |= SUPPORTED_FIBRE; + cmd->advertising |= ADVERTISED_FIBRE; + } else if (trans_type == 0x80 || trans_type == 0) { + cmd->port = PORT_TP; + cmd->transceiver = XCVR_INTERNAL; + cmd->supported |= SUPPORTED_TP; + cmd->advertising |= ADVERTISED_TP; + } else { + cmd->port = -1; + cmd->transceiver = -1; + } return 0; } diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index a87bf3c97055..515c6348f32b 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -141,6 +141,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field; + u32 field32; u16 size; u16 stat_rate; int err; @@ -368,6 +369,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_PORT_MAX_MACVLAN_OFFSET 0x0a #define QUERY_PORT_MAX_VL_OFFSET 0x0b #define QUERY_PORT_MAC_OFFSET 0x10 +#define QUERY_PORT_TRANS_VENDOR_OFFSET 0x18 +#define QUERY_PORT_WAVELENGTH_OFFSET 0x1c +#define QUERY_PORT_TRANS_CODE_OFFSET 0x20 for (i = 1; i <= dev_cap->num_ports; ++i) { err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 0, MLX4_CMD_QUERY_PORT, @@ -391,6 +395,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->log_max_vlans[i] = field >> 4; MLX4_GET(dev_cap->eth_mtu[i], outbox, QUERY_PORT_ETH_MTU_OFFSET); MLX4_GET(dev_cap->def_mac[i], outbox, QUERY_PORT_MAC_OFFSET); + MLX4_GET(field32, outbox, QUERY_PORT_TRANS_VENDOR_OFFSET); + dev_cap->trans_type[i] = field32 >> 24; + dev_cap->vendor_oui[i] = field32 & 0xffffff; + MLX4_GET(dev_cap->wavelength[i], outbox, QUERY_PORT_WAVELENGTH_OFFSET); + MLX4_GET(dev_cap->trans_code[i], outbox, QUERY_PORT_TRANS_CODE_OFFSET); } } diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index 2cc1ba5452e3..443e456c9dab 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -73,6 +73,10 @@ struct mlx4_dev_cap { int max_pkeys[MLX4_MAX_PORTS + 1]; u64 def_mac[MLX4_MAX_PORTS + 1]; u16 eth_mtu[MLX4_MAX_PORTS + 1]; + int trans_type[MLX4_MAX_PORTS + 1]; + int vendor_oui[MLX4_MAX_PORTS + 1]; + u16 wavelength[MLX4_MAX_PORTS + 1]; + u64 trans_code[MLX4_MAX_PORTS + 1]; u16 stat_rate_support; int loopback_support; u32 flags; diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 7390cdb19414..f4791fa2472f 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -184,6 +184,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; dev->caps.def_mac[i] = dev_cap->def_mac[i]; dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; + dev->caps.trans_type[i] = dev_cap->trans_type[i]; + dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; + dev->caps.wavelength[i] = dev_cap->wavelength[i]; + dev->caps.trans_code[i] = dev_cap->trans_code[i]; } dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 2cec58722738..2a36a344fb3d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -186,6 +186,10 @@ struct mlx4_caps { int eth_mtu_cap[MLX4_MAX_PORTS + 1]; int gid_table_len[MLX4_MAX_PORTS + 1]; int pkey_table_len[MLX4_MAX_PORTS + 1]; + int trans_type[MLX4_MAX_PORTS + 1]; + int vendor_oui[MLX4_MAX_PORTS + 1]; + int wavelength[MLX4_MAX_PORTS + 1]; + u64 trans_code[MLX4_MAX_PORTS + 1]; int local_ca_ack_delay; int num_uars; int bf_reg_size; -- cgit v1.2.3 From 0533943c5c45cce2e26432bf0a6b8e114757c897 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 24 Aug 2010 03:46:42 +0000 Subject: mlx4_en: UDP RSS support Adding capability for RSS for UDP traffic, hashing is done based on IP addresses and UDP port number. The support depends on HW/FW capabilities. Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- drivers/net/mlx4/en_main.c | 19 ++++++++++++------- drivers/net/mlx4/en_rx.c | 8 ++++---- drivers/net/mlx4/fw.c | 3 +++ drivers/net/mlx4/fw.h | 1 + drivers/net/mlx4/main.c | 1 + drivers/net/mlx4/mlx4_en.h | 3 +++ include/linux/mlx4/device.h | 1 + 7 files changed, 25 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c index cacac4e966ee..143906417048 100644 --- a/drivers/net/mlx4/en_main.c +++ b/drivers/net/mlx4/en_main.c @@ -63,11 +63,12 @@ static const char mlx4_en_version[] = */ -/* Use a XOR rathern than Toeplitz hash function for RSS */ -MLX4_EN_PARM_INT(rss_xor, 0, "Use XOR hash function for RSS"); - -/* RSS hash type mask - default to */ -MLX4_EN_PARM_INT(rss_mask, 0xf, "RSS hash type bitmask"); +/* Enable RSS TCP traffic */ +MLX4_EN_PARM_INT(tcp_rss, 1, + "Enable RSS for incomming TCP traffic or disabled (0)"); +/* Enable RSS UDP traffic */ +MLX4_EN_PARM_INT(udp_rss, 1, + "Enable RSS for incomming UDP traffic or disabled (0)"); /* Priority pausing */ MLX4_EN_PARM_INT(pfctx, 0, "Priority based Flow Control policy on TX[7:0]." @@ -103,8 +104,12 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) struct mlx4_en_profile *params = &mdev->profile; int i; - params->rss_xor = (rss_xor != 0); - params->rss_mask = rss_mask & 0x1f; + params->tcp_rss = tcp_rss; + params->udp_rss = udp_rss; + if (params->udp_rss && !mdev->dev->caps.udp_rss) { + mlx4_warn(mdev, "UDP RSS is not supported on this device.\n"); + params->udp_rss = 0; + } for (i = 1; i <= MLX4_MAX_PORTS; i++) { params->prof[i].rx_pause = 1; params->prof[i].rx_ppp = pfcrx; diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c index f421a3d42723..e2126c76d1dc 100644 --- a/drivers/net/mlx4/en_rx.c +++ b/drivers/net/mlx4/en_rx.c @@ -859,8 +859,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) struct mlx4_qp_context context; struct mlx4_en_rss_context *rss_context; void *ptr; - int rss_xor = mdev->profile.rss_xor; - u8 rss_mask = mdev->profile.rss_mask; + u8 rss_mask = 0x3f; int i, qpn; int err = 0; int good_qps = 0; @@ -906,9 +905,10 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) rss_context->base_qpn = cpu_to_be32(ilog2(priv->rx_ring_num) << 24 | (rss_map->base_qpn)); rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); - rss_context->hash_fn = rss_xor & 0x3; - rss_context->flags = rss_mask << 2; + rss_context->flags = rss_mask; + if (priv->mdev->profile.udp_rss) + rss_context->base_qpn_udp = rss_context->default_qpn; err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, &rss_map->indir_qp, &rss_map->indir_state); if (err) diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index 515c6348f32b..b716e1a1b298 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -179,6 +179,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c #define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f +#define QUERY_DEV_CAP_UDP_RSS_OFFSET 0x42 #define QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET 0x43 #define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48 @@ -270,6 +271,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_msg_sz = 1 << (field & 0x1f); MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; + MLX4_GET(field, outbox, QUERY_DEV_CAP_UDP_RSS_OFFSET); + dev_cap->udp_rss = field & 0x1; MLX4_GET(field, outbox, QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET); dev_cap->loopback_support = field & 0x1; MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index 443e456c9dab..65cc72eb899d 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -78,6 +78,7 @@ struct mlx4_dev_cap { u16 wavelength[MLX4_MAX_PORTS + 1]; u64 trans_code[MLX4_MAX_PORTS + 1]; u16 stat_rate_support; + int udp_rss; int loopback_support; u32 flags; int reserved_uars; diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index f4791fa2472f..569fa3df381f 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -225,6 +225,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.bmme_flags = dev_cap->bmme_flags; dev->caps.reserved_lkey = dev_cap->reserved_lkey; dev->caps.stat_rate_support = dev_cap->stat_rate_support; + dev->caps.udp_rss = dev_cap->udp_rss; dev->caps.loopback_support = dev_cap->loopback_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h index 5d8f097d7e06..4036a053ee32 100644 --- a/drivers/net/mlx4/mlx4_en.h +++ b/drivers/net/mlx4/mlx4_en.h @@ -318,6 +318,8 @@ struct mlx4_en_port_profile { struct mlx4_en_profile { int rss_xor; + int tcp_rss; + int udp_rss; u8 rss_mask; u32 active_ports; u32 small_pkt_int; @@ -360,6 +362,7 @@ struct mlx4_en_rss_context { u8 hash_fn; u8 flags; __be32 rss_key[10]; + __be32 base_qpn_udp; }; struct mlx4_en_port_state { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 2a36a344fb3d..7338654c02b4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -233,6 +233,7 @@ struct mlx4_caps { u32 bmme_flags; u32 reserved_lkey; u16 stat_rate_support; + int udp_rss; int loopback_support; u8 port_width_cap[MLX4_MAX_PORTS + 1]; int max_gso_sz; -- cgit v1.2.3 From 8a2e8e5dec7e29c56a46ba176c664ab6a3d04118 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Aug 2010 10:33:56 +0200 Subject: workqueue: fix cwq->nr_active underflow cwq->nr_active is used to keep track of how many work items are active for the cpu workqueue, where 'active' is defined as either pending on global worklist or executing. This is used to implement the max_active limit and workqueue freezing. If a work item is queued after nr_active has already reached max_active, the work item doesn't increment nr_active and is put on the delayed queue and gets activated later as previous active work items retire. try_to_grab_pending() which is used in the cancellation path unconditionally decremented nr_active whether the work item being cancelled is currently active or delayed, so cancelling a delayed work item makes nr_active underflow. This breaks max_active enforcement and triggers BUG_ON() in destroy_workqueue() later on. This patch fixes this bug by adding a flag WORK_STRUCT_DELAYED, which is set while a work item in on the delayed list and making try_to_grab_pending() decrement nr_active iff the work item is currently active. The addition of the flag enlarges cwq alignment to 256 bytes which is getting a bit too large. It's scheduled to be reduced back to 128 bytes by merging WORK_STRUCT_PENDING and WORK_STRUCT_CWQ in the next devel cycle. Signed-off-by: Tejun Heo Reported-by: Johannes Berg --- include/linux/workqueue.h | 16 +++++++++------- kernel/workqueue.c | 30 ++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index c959666eafca..f11100f96482 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -25,18 +25,20 @@ typedef void (*work_func_t)(struct work_struct *work); enum { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ - WORK_STRUCT_CWQ_BIT = 1, /* data points to cwq */ - WORK_STRUCT_LINKED_BIT = 2, /* next work is linked to this one */ + WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */ + WORK_STRUCT_CWQ_BIT = 2, /* data points to cwq */ + WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK - WORK_STRUCT_STATIC_BIT = 3, /* static initializer (debugobjects) */ - WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ + WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ + WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */ #else - WORK_STRUCT_COLOR_SHIFT = 3, /* color for workqueue flushing */ + WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ #endif WORK_STRUCT_COLOR_BITS = 4, WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, + WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT, WORK_STRUCT_CWQ = 1 << WORK_STRUCT_CWQ_BIT, WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, #ifdef CONFIG_DEBUG_OBJECTS_WORK @@ -59,8 +61,8 @@ enum { /* * Reserve 7 bits off of cwq pointer w/ debugobjects turned - * off. This makes cwqs aligned to 128 bytes which isn't too - * excessive while allowing 15 workqueue flush colors. + * off. This makes cwqs aligned to 256 bytes and allows 15 + * workqueue flush colors. */ WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 362b50d092e2..a2dccfca03ba 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -941,6 +941,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, struct global_cwq *gcwq; struct cpu_workqueue_struct *cwq; struct list_head *worklist; + unsigned int work_flags; unsigned long flags; debug_work_activate(work); @@ -990,14 +991,17 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, BUG_ON(!list_empty(&work->entry)); cwq->nr_in_flight[cwq->work_color]++; + work_flags = work_color_to_flags(cwq->work_color); if (likely(cwq->nr_active < cwq->max_active)) { cwq->nr_active++; worklist = gcwq_determine_ins_pos(gcwq, cwq); - } else + } else { + work_flags |= WORK_STRUCT_DELAYED; worklist = &cwq->delayed_works; + } - insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color)); + insert_work(cwq, work, worklist, work_flags); spin_unlock_irqrestore(&gcwq->lock, flags); } @@ -1666,6 +1670,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); move_linked_works(work, pos, NULL); + __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); cwq->nr_active++; } @@ -1673,6 +1678,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight * @cwq: cwq of interest * @color: color of work which left the queue + * @delayed: for a delayed work * * A work either has completed or is removed from pending queue, * decrement nr_in_flight of its cwq and handle workqueue flushing. @@ -1680,19 +1686,22 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) * CONTEXT: * spin_lock_irq(gcwq->lock). */ -static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) +static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color, + bool delayed) { /* ignore uncolored works */ if (color == WORK_NO_COLOR) return; cwq->nr_in_flight[color]--; - cwq->nr_active--; - if (!list_empty(&cwq->delayed_works)) { - /* one down, submit a delayed one */ - if (cwq->nr_active < cwq->max_active) - cwq_activate_first_delayed(cwq); + if (!delayed) { + cwq->nr_active--; + if (!list_empty(&cwq->delayed_works)) { + /* one down, submit a delayed one */ + if (cwq->nr_active < cwq->max_active) + cwq_activate_first_delayed(cwq); + } } /* is flush in progress and are we at the flushing tip? */ @@ -1823,7 +1832,7 @@ __acquires(&gcwq->lock) hlist_del_init(&worker->hentry); worker->current_work = NULL; worker->current_cwq = NULL; - cwq_dec_nr_in_flight(cwq, work_color); + cwq_dec_nr_in_flight(cwq, work_color, false); } /** @@ -2388,7 +2397,8 @@ static int try_to_grab_pending(struct work_struct *work) debug_work_deactivate(work); list_del_init(&work->entry); cwq_dec_nr_in_flight(get_work_cwq(work), - get_work_color(work)); + get_work_color(work), + *work_data_bits(work) & WORK_STRUCT_DELAYED); ret = 1; } } -- cgit v1.2.3 From ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 25 Aug 2010 02:27:49 -0700 Subject: tcp: Combat per-cpu skew in orphan tests. As reported by Anton Blanchard when we use percpu_counter_read_positive() to make our orphan socket limit checks, the check can be off by up to num_cpus_online() * batch (which is 32 by default) which on a 128 cpu machine can be as large as the default orphan limit itself. Fix this by doing the full expensive sum check if the optimized check triggers. Reported-by: Anton Blanchard Signed-off-by: David S. Miller Acked-by: Eric Dumazet --- include/net/tcp.h | 18 ++++++++++++++---- net/ipv4/tcp.c | 5 +---- net/ipv4/tcp_timer.c | 8 ++++---- 3 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index df6a2eb20193..eaa9582779d0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -268,11 +268,21 @@ static inline int between(__u32 seq1, __u32 seq2, __u32 seq3) return seq3 - seq2 >= seq1 - seq2; } -static inline int tcp_too_many_orphans(struct sock *sk, int num) +static inline bool tcp_too_many_orphans(struct sock *sk, int shift) { - return (num > sysctl_tcp_max_orphans) || - (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]); + struct percpu_counter *ocp = sk->sk_prot->orphan_count; + int orphans = percpu_counter_read_positive(ocp); + + if (orphans << shift > sysctl_tcp_max_orphans) { + orphans = percpu_counter_sum_positive(ocp); + if (orphans << shift > sysctl_tcp_max_orphans) + return true; + } + + if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && + atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) + return true; + return false; } /* syncookies: remember time of last synqueue overflow */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 176e11aaea77..197b9b77fa3e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2011,11 +2011,8 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { - int orphan_count = percpu_counter_read_positive( - sk->sk_prot->orphan_count); - sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, orphan_count)) { + if (tcp_too_many_orphans(sk, 0)) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 808bb920c9f5..c35b469e851c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -66,18 +66,18 @@ static void tcp_write_err(struct sock *sk) static int tcp_out_of_resources(struct sock *sk, int do_reset) { struct tcp_sock *tp = tcp_sk(sk); - int orphans = percpu_counter_read_positive(&tcp_orphan_count); + int shift = 0; /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) - orphans <<= 1; + shift++; /* If some dubious ICMP arrived, penalize even more. */ if (sk->sk_err_soft) - orphans <<= 1; + shift++; - if (tcp_too_many_orphans(sk, orphans)) { + if (tcp_too_many_orphans(sk, shift)) { if (net_ratelimit()) printk(KERN_INFO "Out of socket memory\n"); -- cgit v1.2.3 From 2738bd682df546f34654ed3d59dfc9ebe8d04979 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sat, 21 Aug 2010 16:39:01 -0400 Subject: mac80211: trivial spelling fixes Fix spelling and readability of a few lines of kernel doc: s/issueing/issuing/g s/approriate/appropriate/g s/supported by simply/supported simply by/ s/IEEE80211_HW_BEACON_FILTERING/IEEE80211_HW_BEACON_FILTER/g Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- include/net/mac80211.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2a1811366076..dcc8c2bf986e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1242,8 +1242,8 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * %IEEE80211_CONF_PS flag enabled means that the powersave mode defined in * IEEE 802.11-2007 section 11.2 is enabled. This is not to be confused * with hardware wakeup and sleep states. Driver is responsible for waking - * up the hardware before issueing commands to the hardware and putting it - * back to sleep at approriate times. + * up the hardware before issuing commands to the hardware and putting it + * back to sleep at appropriate times. * * When PS is enabled, hardware needs to wakeup for beacons and receive the * buffered multicast/broadcast frames after the beacon. Also it must be @@ -1264,7 +1264,7 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * there's data traffic and still saving significantly power in idle * periods. * - * Dynamic powersave is supported by simply mac80211 enabling and disabling + * Dynamic powersave is simply supported by mac80211 enabling and disabling * PS based on traffic. Driver needs to only set %IEEE80211_HW_SUPPORTS_PS * flag and mac80211 will handle everything automatically. Additionally, * hardware having support for the dynamic PS feature may set the @@ -2458,7 +2458,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING and + * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER and * %IEEE80211_CONF_PS is set, the driver needs to inform whenever the * hardware is not receiving beacons with this function. */ @@ -2469,7 +2469,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif); * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING, and + * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER, and * %IEEE80211_CONF_PS and %IEEE80211_HW_CONNECTION_MONITOR are set, the driver * needs to inform if the connection to the AP has been lost. * -- cgit v1.2.3 From 4c5f7d7a1e6cf20ad515dad8a63c0813fac5bcea Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 22 Aug 2010 22:46:28 +0300 Subject: wl12xx: change contact person for the include file Luciano should be the contact person for the include/linux/spi/wl12xx.h file. Signed-off-by: Kalle Valo Acked-by: Luciano Coelho Signed-off-by: John W. Linville --- include/linux/spi/wl12xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/wl12xx.h b/include/linux/spi/wl12xx.h index a223ecbc71ef..a20bccf0b5c2 100644 --- a/include/linux/spi/wl12xx.h +++ b/include/linux/spi/wl12xx.h @@ -3,7 +3,7 @@ * * Copyright (C) 2009 Nokia Corporation * - * Contact: Kalle Valo + * Contact: Luciano Coelho * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License -- cgit v1.2.3 From ad01b7d480a4a135f974afd5c617c417e0b0542f Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 23 Aug 2010 20:40:42 +0000 Subject: stmmac: make ioaddr 'void __iomem *' rather than unsigned long This avoids unnecessary casting and adds the ioaddr in the private structure. This patch also removes many warning when compile the driver. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/stmmac/common.h | 50 +++++++-------- drivers/net/stmmac/dwmac1000_core.c | 18 +++--- drivers/net/stmmac/dwmac1000_dma.c | 8 +-- drivers/net/stmmac/dwmac100_core.c | 20 +++--- drivers/net/stmmac/dwmac100_dma.c | 8 +-- drivers/net/stmmac/dwmac_dma.h | 16 ++--- drivers/net/stmmac/dwmac_lib.c | 22 +++---- drivers/net/stmmac/enh_desc.c | 2 +- drivers/net/stmmac/norm_desc.c | 2 +- drivers/net/stmmac/stmmac.h | 3 +- drivers/net/stmmac/stmmac_ethtool.c | 21 +++--- drivers/net/stmmac/stmmac_main.c | 124 +++++++++++++++++------------------- drivers/net/stmmac/stmmac_mdio.c | 21 +++--- include/linux/stmmac.h | 2 +- 14 files changed, 153 insertions(+), 164 deletions(-) (limited to 'include') diff --git a/drivers/net/stmmac/common.h b/drivers/net/stmmac/common.h index 66b9da0260fe..e8cbcb5c206e 100644 --- a/drivers/net/stmmac/common.h +++ b/drivers/net/stmmac/common.h @@ -167,7 +167,7 @@ struct stmmac_desc_ops { int (*get_tx_ls) (struct dma_desc *p); /* Return the transmit status looking at the TDES1 */ int (*tx_status) (void *data, struct stmmac_extra_stats *x, - struct dma_desc *p, unsigned long ioaddr); + struct dma_desc *p, void __iomem *ioaddr); /* Get the buffer size from the descriptor */ int (*get_tx_len) (struct dma_desc *p); /* Handle extra events on specific interrupts hw dependent */ @@ -182,44 +182,44 @@ struct stmmac_desc_ops { struct stmmac_dma_ops { /* DMA core initialization */ - int (*init) (unsigned long ioaddr, int pbl, u32 dma_tx, u32 dma_rx); + int (*init) (void __iomem *ioaddr, int pbl, u32 dma_tx, u32 dma_rx); /* Dump DMA registers */ - void (*dump_regs) (unsigned long ioaddr); + void (*dump_regs) (void __iomem *ioaddr); /* Set tx/rx threshold in the csr6 register * An invalid value enables the store-and-forward mode */ - void (*dma_mode) (unsigned long ioaddr, int txmode, int rxmode); + void (*dma_mode) (void __iomem *ioaddr, int txmode, int rxmode); /* To track extra statistic (if supported) */ void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, - unsigned long ioaddr); - void (*enable_dma_transmission) (unsigned long ioaddr); - void (*enable_dma_irq) (unsigned long ioaddr); - void (*disable_dma_irq) (unsigned long ioaddr); - void (*start_tx) (unsigned long ioaddr); - void (*stop_tx) (unsigned long ioaddr); - void (*start_rx) (unsigned long ioaddr); - void (*stop_rx) (unsigned long ioaddr); - int (*dma_interrupt) (unsigned long ioaddr, + void __iomem *ioaddr); + void (*enable_dma_transmission) (void __iomem *ioaddr); + void (*enable_dma_irq) (void __iomem *ioaddr); + void (*disable_dma_irq) (void __iomem *ioaddr); + void (*start_tx) (void __iomem *ioaddr); + void (*stop_tx) (void __iomem *ioaddr); + void (*start_rx) (void __iomem *ioaddr); + void (*stop_rx) (void __iomem *ioaddr); + int (*dma_interrupt) (void __iomem *ioaddr, struct stmmac_extra_stats *x); }; struct stmmac_ops { /* MAC core initialization */ - void (*core_init) (unsigned long ioaddr) ____cacheline_aligned; + void (*core_init) (void __iomem *ioaddr) ____cacheline_aligned; /* Dump MAC registers */ - void (*dump_regs) (unsigned long ioaddr); + void (*dump_regs) (void __iomem *ioaddr); /* Handle extra events on specific interrupts hw dependent */ - void (*host_irq_status) (unsigned long ioaddr); + void (*host_irq_status) (void __iomem *ioaddr); /* Multicast filter setting */ void (*set_filter) (struct net_device *dev); /* Flow control setting */ - void (*flow_ctrl) (unsigned long ioaddr, unsigned int duplex, + void (*flow_ctrl) (void __iomem *ioaddr, unsigned int duplex, unsigned int fc, unsigned int pause_time); /* Set power management mode (e.g. magic frame) */ - void (*pmt) (unsigned long ioaddr, unsigned long mode); + void (*pmt) (void __iomem *ioaddr, unsigned long mode); /* Set/Get Unicast MAC addresses */ - void (*set_umac_addr) (unsigned long ioaddr, unsigned char *addr, + void (*set_umac_addr) (void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n); - void (*get_umac_addr) (unsigned long ioaddr, unsigned char *addr, + void (*get_umac_addr) (void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n); }; @@ -243,11 +243,11 @@ struct mac_device_info { struct mac_link link; }; -struct mac_device_info *dwmac1000_setup(unsigned long addr); -struct mac_device_info *dwmac100_setup(unsigned long addr); +struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr); +struct mac_device_info *dwmac100_setup(void __iomem *ioaddr); -extern void stmmac_set_mac_addr(unsigned long ioaddr, u8 addr[6], +extern void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6], unsigned int high, unsigned int low); -extern void stmmac_get_mac_addr(unsigned long ioaddr, unsigned char *addr, +extern void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int high, unsigned int low); -extern void dwmac_dma_flush_tx_fifo(unsigned long ioaddr); +extern void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr); diff --git a/drivers/net/stmmac/dwmac1000_core.c b/drivers/net/stmmac/dwmac1000_core.c index 2b2f5c8caf1c..8bbfc0f48dea 100644 --- a/drivers/net/stmmac/dwmac1000_core.c +++ b/drivers/net/stmmac/dwmac1000_core.c @@ -30,7 +30,7 @@ #include #include "dwmac1000.h" -static void dwmac1000_core_init(unsigned long ioaddr) +static void dwmac1000_core_init(void __iomem *ioaddr) { u32 value = readl(ioaddr + GMAC_CONTROL); value |= GMAC_CORE_INIT; @@ -50,7 +50,7 @@ static void dwmac1000_core_init(unsigned long ioaddr) #endif } -static void dwmac1000_dump_regs(unsigned long ioaddr) +static void dwmac1000_dump_regs(void __iomem *ioaddr) { int i; pr_info("\tDWMAC1000 regs (base addr = 0x%8x)\n", (unsigned int)ioaddr); @@ -62,14 +62,14 @@ static void dwmac1000_dump_regs(unsigned long ioaddr) } } -static void dwmac1000_set_umac_addr(unsigned long ioaddr, unsigned char *addr, +static void dwmac1000_set_umac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n) { stmmac_set_mac_addr(ioaddr, addr, GMAC_ADDR_HIGH(reg_n), GMAC_ADDR_LOW(reg_n)); } -static void dwmac1000_get_umac_addr(unsigned long ioaddr, unsigned char *addr, +static void dwmac1000_get_umac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n) { stmmac_get_mac_addr(ioaddr, addr, GMAC_ADDR_HIGH(reg_n), @@ -78,7 +78,7 @@ static void dwmac1000_get_umac_addr(unsigned long ioaddr, unsigned char *addr, static void dwmac1000_set_filter(struct net_device *dev) { - unsigned long ioaddr = dev->base_addr; + void __iomem *ioaddr = (void __iomem *) dev->base_addr; unsigned int value = 0; CHIP_DBG(KERN_INFO "%s: # mcasts %d, # unicast %d\n", @@ -139,7 +139,7 @@ static void dwmac1000_set_filter(struct net_device *dev) readl(ioaddr + GMAC_HASH_HIGH), readl(ioaddr + GMAC_HASH_LOW)); } -static void dwmac1000_flow_ctrl(unsigned long ioaddr, unsigned int duplex, +static void dwmac1000_flow_ctrl(void __iomem *ioaddr, unsigned int duplex, unsigned int fc, unsigned int pause_time) { unsigned int flow = 0; @@ -162,7 +162,7 @@ static void dwmac1000_flow_ctrl(unsigned long ioaddr, unsigned int duplex, writel(flow, ioaddr + GMAC_FLOW_CTRL); } -static void dwmac1000_pmt(unsigned long ioaddr, unsigned long mode) +static void dwmac1000_pmt(void __iomem *ioaddr, unsigned long mode) { unsigned int pmt = 0; @@ -178,7 +178,7 @@ static void dwmac1000_pmt(unsigned long ioaddr, unsigned long mode) } -static void dwmac1000_irq_status(unsigned long ioaddr) +static void dwmac1000_irq_status(void __iomem *ioaddr) { u32 intr_status = readl(ioaddr + GMAC_INT_STATUS); @@ -211,7 +211,7 @@ struct stmmac_ops dwmac1000_ops = { .get_umac_addr = dwmac1000_get_umac_addr, }; -struct mac_device_info *dwmac1000_setup(unsigned long ioaddr) +struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr) { struct mac_device_info *mac; u32 uid = readl(ioaddr + GMAC_VERSION); diff --git a/drivers/net/stmmac/dwmac1000_dma.c b/drivers/net/stmmac/dwmac1000_dma.c index 415805057cb0..2ef5a56370e9 100644 --- a/drivers/net/stmmac/dwmac1000_dma.c +++ b/drivers/net/stmmac/dwmac1000_dma.c @@ -29,7 +29,7 @@ #include "dwmac1000.h" #include "dwmac_dma.h" -static int dwmac1000_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx, +static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, u32 dma_tx, u32 dma_rx) { u32 value = readl(ioaddr + DMA_BUS_MODE); @@ -58,7 +58,7 @@ static int dwmac1000_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx, return 0; } -static void dwmac1000_dma_operation_mode(unsigned long ioaddr, int txmode, +static void dwmac1000_dma_operation_mode(void __iomem *ioaddr, int txmode, int rxmode) { u32 csr6 = readl(ioaddr + DMA_CONTROL); @@ -111,12 +111,12 @@ static void dwmac1000_dma_operation_mode(unsigned long ioaddr, int txmode, /* Not yet implemented --- no RMON module */ static void dwmac1000_dma_diagnostic_fr(void *data, - struct stmmac_extra_stats *x, unsigned long ioaddr) + struct stmmac_extra_stats *x, void __iomem *ioaddr) { return; } -static void dwmac1000_dump_dma_regs(unsigned long ioaddr) +static void dwmac1000_dump_dma_regs(void __iomem *ioaddr) { int i; pr_info(" DMA registers\n"); diff --git a/drivers/net/stmmac/dwmac100_core.c b/drivers/net/stmmac/dwmac100_core.c index 2fb165fa2ba0..135a8082816e 100644 --- a/drivers/net/stmmac/dwmac100_core.c +++ b/drivers/net/stmmac/dwmac100_core.c @@ -31,7 +31,7 @@ #include #include "dwmac100.h" -static void dwmac100_core_init(unsigned long ioaddr) +static void dwmac100_core_init(void __iomem *ioaddr) { u32 value = readl(ioaddr + MAC_CONTROL); @@ -42,12 +42,12 @@ static void dwmac100_core_init(unsigned long ioaddr) #endif } -static void dwmac100_dump_mac_regs(unsigned long ioaddr) +static void dwmac100_dump_mac_regs(void __iomem *ioaddr) { pr_info("\t----------------------------------------------\n" "\t DWMAC 100 CSR (base addr = 0x%8x)\n" "\t----------------------------------------------\n", - (unsigned int)ioaddr); + (unsigned int) ioaddr); pr_info("\tcontrol reg (offset 0x%x): 0x%08x\n", MAC_CONTROL, readl(ioaddr + MAC_CONTROL)); pr_info("\taddr HI (offset 0x%x): 0x%08x\n ", MAC_ADDR_HIGH, @@ -77,18 +77,18 @@ static void dwmac100_dump_mac_regs(unsigned long ioaddr) MMC_LOW_INTR_MASK, readl(ioaddr + MMC_LOW_INTR_MASK)); } -static void dwmac100_irq_status(unsigned long ioaddr) +static void dwmac100_irq_status(void __iomem *ioaddr) { return; } -static void dwmac100_set_umac_addr(unsigned long ioaddr, unsigned char *addr, +static void dwmac100_set_umac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n) { stmmac_set_mac_addr(ioaddr, addr, MAC_ADDR_HIGH, MAC_ADDR_LOW); } -static void dwmac100_get_umac_addr(unsigned long ioaddr, unsigned char *addr, +static void dwmac100_get_umac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int reg_n) { stmmac_get_mac_addr(ioaddr, addr, MAC_ADDR_HIGH, MAC_ADDR_LOW); @@ -96,7 +96,7 @@ static void dwmac100_get_umac_addr(unsigned long ioaddr, unsigned char *addr, static void dwmac100_set_filter(struct net_device *dev) { - unsigned long ioaddr = dev->base_addr; + void __iomem *ioaddr = (void __iomem *) dev->base_addr; u32 value = readl(ioaddr + MAC_CONTROL); if (dev->flags & IFF_PROMISC) { @@ -145,7 +145,7 @@ static void dwmac100_set_filter(struct net_device *dev) readl(ioaddr + MAC_HASH_HIGH), readl(ioaddr + MAC_HASH_LOW)); } -static void dwmac100_flow_ctrl(unsigned long ioaddr, unsigned int duplex, +static void dwmac100_flow_ctrl(void __iomem *ioaddr, unsigned int duplex, unsigned int fc, unsigned int pause_time) { unsigned int flow = MAC_FLOW_CTRL_ENABLE; @@ -158,7 +158,7 @@ static void dwmac100_flow_ctrl(unsigned long ioaddr, unsigned int duplex, /* No PMT module supported for this Ethernet Controller. * Tested on ST platforms only. */ -static void dwmac100_pmt(unsigned long ioaddr, unsigned long mode) +static void dwmac100_pmt(void __iomem *ioaddr, unsigned long mode) { return; } @@ -174,7 +174,7 @@ struct stmmac_ops dwmac100_ops = { .get_umac_addr = dwmac100_get_umac_addr, }; -struct mac_device_info *dwmac100_setup(unsigned long ioaddr) +struct mac_device_info *dwmac100_setup(void __iomem *ioaddr) { struct mac_device_info *mac; diff --git a/drivers/net/stmmac/dwmac100_dma.c b/drivers/net/stmmac/dwmac100_dma.c index 2fece7b72727..c7279d2b946b 100644 --- a/drivers/net/stmmac/dwmac100_dma.c +++ b/drivers/net/stmmac/dwmac100_dma.c @@ -31,7 +31,7 @@ #include "dwmac100.h" #include "dwmac_dma.h" -static int dwmac100_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx, +static int dwmac100_dma_init(void __iomem *ioaddr, int pbl, u32 dma_tx, u32 dma_rx) { u32 value = readl(ioaddr + DMA_BUS_MODE); @@ -58,7 +58,7 @@ static int dwmac100_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx, /* Store and Forward capability is not used at all.. * The transmit threshold can be programmed by * setting the TTC bits in the DMA control register.*/ -static void dwmac100_dma_operation_mode(unsigned long ioaddr, int txmode, +static void dwmac100_dma_operation_mode(void __iomem *ioaddr, int txmode, int rxmode) { u32 csr6 = readl(ioaddr + DMA_CONTROL); @@ -73,7 +73,7 @@ static void dwmac100_dma_operation_mode(unsigned long ioaddr, int txmode, writel(csr6, ioaddr + DMA_CONTROL); } -static void dwmac100_dump_dma_regs(unsigned long ioaddr) +static void dwmac100_dump_dma_regs(void __iomem *ioaddr) { int i; @@ -91,7 +91,7 @@ static void dwmac100_dump_dma_regs(unsigned long ioaddr) /* DMA controller has two counters to track the number of * the receive missed frames. */ static void dwmac100_dma_diagnostic_fr(void *data, struct stmmac_extra_stats *x, - unsigned long ioaddr) + void __iomem *ioaddr) { struct net_device_stats *stats = (struct net_device_stats *)data; u32 csr8 = readl(ioaddr + DMA_MISSED_FRAME_CTR); diff --git a/drivers/net/stmmac/dwmac_dma.h b/drivers/net/stmmac/dwmac_dma.h index 7b815a1b7b8c..da3f5ccf83d3 100644 --- a/drivers/net/stmmac/dwmac_dma.h +++ b/drivers/net/stmmac/dwmac_dma.h @@ -97,12 +97,12 @@ #define DMA_STATUS_TI 0x00000001 /* Transmit Interrupt */ #define DMA_CONTROL_FTF 0x00100000 /* Flush transmit FIFO */ -extern void dwmac_enable_dma_transmission(unsigned long ioaddr); -extern void dwmac_enable_dma_irq(unsigned long ioaddr); -extern void dwmac_disable_dma_irq(unsigned long ioaddr); -extern void dwmac_dma_start_tx(unsigned long ioaddr); -extern void dwmac_dma_stop_tx(unsigned long ioaddr); -extern void dwmac_dma_start_rx(unsigned long ioaddr); -extern void dwmac_dma_stop_rx(unsigned long ioaddr); -extern int dwmac_dma_interrupt(unsigned long ioaddr, +extern void dwmac_enable_dma_transmission(void __iomem *ioaddr); +extern void dwmac_enable_dma_irq(void __iomem *ioaddr); +extern void dwmac_disable_dma_irq(void __iomem *ioaddr); +extern void dwmac_dma_start_tx(void __iomem *ioaddr); +extern void dwmac_dma_stop_tx(void __iomem *ioaddr); +extern void dwmac_dma_start_rx(void __iomem *ioaddr); +extern void dwmac_dma_stop_rx(void __iomem *ioaddr); +extern int dwmac_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x); diff --git a/drivers/net/stmmac/dwmac_lib.c b/drivers/net/stmmac/dwmac_lib.c index a85415216ef4..d65fab1ba790 100644 --- a/drivers/net/stmmac/dwmac_lib.c +++ b/drivers/net/stmmac/dwmac_lib.c @@ -32,43 +32,43 @@ #endif /* CSR1 enables the transmit DMA to check for new descriptor */ -void dwmac_enable_dma_transmission(unsigned long ioaddr) +void dwmac_enable_dma_transmission(void __iomem *ioaddr) { writel(1, ioaddr + DMA_XMT_POLL_DEMAND); } -void dwmac_enable_dma_irq(unsigned long ioaddr) +void dwmac_enable_dma_irq(void __iomem *ioaddr) { writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA); } -void dwmac_disable_dma_irq(unsigned long ioaddr) +void dwmac_disable_dma_irq(void __iomem *ioaddr) { writel(0, ioaddr + DMA_INTR_ENA); } -void dwmac_dma_start_tx(unsigned long ioaddr) +void dwmac_dma_start_tx(void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_CONTROL); value |= DMA_CONTROL_ST; writel(value, ioaddr + DMA_CONTROL); } -void dwmac_dma_stop_tx(unsigned long ioaddr) +void dwmac_dma_stop_tx(void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_CONTROL); value &= ~DMA_CONTROL_ST; writel(value, ioaddr + DMA_CONTROL); } -void dwmac_dma_start_rx(unsigned long ioaddr) +void dwmac_dma_start_rx(void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_CONTROL); value |= DMA_CONTROL_SR; writel(value, ioaddr + DMA_CONTROL); } -void dwmac_dma_stop_rx(unsigned long ioaddr) +void dwmac_dma_stop_rx(void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_CONTROL); value &= ~DMA_CONTROL_SR; @@ -145,7 +145,7 @@ static void show_rx_process_state(unsigned int status) } #endif -int dwmac_dma_interrupt(unsigned long ioaddr, +int dwmac_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x) { int ret = 0; @@ -219,7 +219,7 @@ int dwmac_dma_interrupt(unsigned long ioaddr, return ret; } -void dwmac_dma_flush_tx_fifo(unsigned long ioaddr) +void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr) { u32 csr6 = readl(ioaddr + DMA_CONTROL); writel((csr6 | DMA_CONTROL_FTF), ioaddr + DMA_CONTROL); @@ -227,7 +227,7 @@ void dwmac_dma_flush_tx_fifo(unsigned long ioaddr) do {} while ((readl(ioaddr + DMA_CONTROL) & DMA_CONTROL_FTF)); } -void stmmac_set_mac_addr(unsigned long ioaddr, u8 addr[6], +void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6], unsigned int high, unsigned int low) { unsigned long data; @@ -238,7 +238,7 @@ void stmmac_set_mac_addr(unsigned long ioaddr, u8 addr[6], writel(data, ioaddr + low); } -void stmmac_get_mac_addr(unsigned long ioaddr, unsigned char *addr, +void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, unsigned int high, unsigned int low) { unsigned int hi_addr, lo_addr; diff --git a/drivers/net/stmmac/enh_desc.c b/drivers/net/stmmac/enh_desc.c index f612f986a7e1..77ff88c3958b 100644 --- a/drivers/net/stmmac/enh_desc.c +++ b/drivers/net/stmmac/enh_desc.c @@ -25,7 +25,7 @@ #include "common.h" static int enh_desc_get_tx_status(void *data, struct stmmac_extra_stats *x, - struct dma_desc *p, unsigned long ioaddr) + struct dma_desc *p, void __iomem *ioaddr) { int ret = 0; struct net_device_stats *stats = (struct net_device_stats *)data; diff --git a/drivers/net/stmmac/norm_desc.c b/drivers/net/stmmac/norm_desc.c index 31ad53643792..51f4440ab98b 100644 --- a/drivers/net/stmmac/norm_desc.c +++ b/drivers/net/stmmac/norm_desc.c @@ -25,7 +25,7 @@ #include "common.h" static int ndesc_get_tx_status(void *data, struct stmmac_extra_stats *x, - struct dma_desc *p, unsigned long ioaddr) + struct dma_desc *p, void __iomem *ioaddr) { int ret = 0; struct net_device_stats *stats = (struct net_device_stats *)data; diff --git a/drivers/net/stmmac/stmmac.h b/drivers/net/stmmac/stmmac.h index ebebc644b1b8..cca53dbac361 100644 --- a/drivers/net/stmmac/stmmac.h +++ b/drivers/net/stmmac/stmmac.h @@ -54,6 +54,7 @@ struct stmmac_priv { unsigned int dma_buf_sz; struct device *device; struct mac_device_info *hw; + void __iomem *ioaddr; struct stmmac_extra_stats xstats; struct napi_struct napi; @@ -65,7 +66,7 @@ struct stmmac_priv { int phy_mask; int (*phy_reset) (void *priv); void (*fix_mac_speed) (void *priv, unsigned int speed); - void (*bus_setup)(unsigned long ioaddr); + void (*bus_setup)(void __iomem *ioaddr); void *bsp_priv; int phy_irq; diff --git a/drivers/net/stmmac/stmmac_ethtool.c b/drivers/net/stmmac/stmmac_ethtool.c index f080509923f0..63b68e61afce 100644 --- a/drivers/net/stmmac/stmmac_ethtool.c +++ b/drivers/net/stmmac/stmmac_ethtool.c @@ -177,21 +177,21 @@ void stmmac_ethtool_gregs(struct net_device *dev, if (!priv->is_gmac) { /* MAC registers */ for (i = 0; i < 12; i++) - reg_space[i] = readl(dev->base_addr + (i * 4)); + reg_space[i] = readl(priv->ioaddr + (i * 4)); /* DMA registers */ for (i = 0; i < 9; i++) reg_space[i + 12] = - readl(dev->base_addr + (DMA_BUS_MODE + (i * 4))); - reg_space[22] = readl(dev->base_addr + DMA_CUR_TX_BUF_ADDR); - reg_space[23] = readl(dev->base_addr + DMA_CUR_RX_BUF_ADDR); + readl(priv->ioaddr + (DMA_BUS_MODE + (i * 4))); + reg_space[22] = readl(priv->ioaddr + DMA_CUR_TX_BUF_ADDR); + reg_space[23] = readl(priv->ioaddr + DMA_CUR_RX_BUF_ADDR); } else { /* MAC registers */ for (i = 0; i < 55; i++) - reg_space[i] = readl(dev->base_addr + (i * 4)); + reg_space[i] = readl(priv->ioaddr + (i * 4)); /* DMA registers */ for (i = 0; i < 22; i++) reg_space[i + 55] = - readl(dev->base_addr + (DMA_BUS_MODE + (i * 4))); + readl(priv->ioaddr + (DMA_BUS_MODE + (i * 4))); } } @@ -263,11 +263,9 @@ stmmac_set_pauseparam(struct net_device *netdev, cmd.phy_address = phy->addr; ret = phy_ethtool_sset(phy, &cmd); } - } else { - unsigned long ioaddr = netdev->base_addr; - priv->hw->mac->flow_ctrl(ioaddr, phy->duplex, + } else + priv->hw->mac->flow_ctrl(priv->ioaddr, phy->duplex, priv->flow_ctrl, priv->pause); - } spin_unlock(&priv->lock); return ret; } @@ -276,12 +274,11 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *dummy, u64 *data) { struct stmmac_priv *priv = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; int i; /* Update HW stats if supported */ priv->hw->dma->dma_diagnostic_fr(&dev->stats, (void *) &priv->xstats, - ioaddr); + priv->ioaddr); for (i = 0; i < STMMAC_STATS_LEN; i++) { char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset; diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c index 86b6c69c068c..c59c1061252a 100644 --- a/drivers/net/stmmac/stmmac_main.c +++ b/drivers/net/stmmac/stmmac_main.c @@ -202,7 +202,6 @@ static void stmmac_adjust_link(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); struct phy_device *phydev = priv->phydev; - unsigned long ioaddr = dev->base_addr; unsigned long flags; int new_state = 0; unsigned int fc = priv->flow_ctrl, pause_time = priv->pause; @@ -215,7 +214,7 @@ static void stmmac_adjust_link(struct net_device *dev) spin_lock_irqsave(&priv->lock, flags); if (phydev->link) { - u32 ctrl = readl(ioaddr + MAC_CTRL_REG); + u32 ctrl = readl(priv->ioaddr + MAC_CTRL_REG); /* Now we make sure that we can be in full duplex mode. * If not, we operate in half-duplex mode. */ @@ -229,7 +228,7 @@ static void stmmac_adjust_link(struct net_device *dev) } /* Flow Control operation */ if (phydev->pause) - priv->hw->mac->flow_ctrl(ioaddr, phydev->duplex, + priv->hw->mac->flow_ctrl(priv->ioaddr, phydev->duplex, fc, pause_time); if (phydev->speed != priv->speed) { @@ -268,7 +267,7 @@ static void stmmac_adjust_link(struct net_device *dev) priv->speed = phydev->speed; } - writel(ctrl, ioaddr + MAC_CTRL_REG); + writel(ctrl, priv->ioaddr + MAC_CTRL_REG); if (!priv->oldlink) { new_state = 1; @@ -345,7 +344,7 @@ static int stmmac_init_phy(struct net_device *dev) return 0; } -static inline void stmmac_mac_enable_rx(unsigned long ioaddr) +static inline void stmmac_mac_enable_rx(void __iomem *ioaddr) { u32 value = readl(ioaddr + MAC_CTRL_REG); value |= MAC_RNABLE_RX; @@ -353,7 +352,7 @@ static inline void stmmac_mac_enable_rx(unsigned long ioaddr) writel(value, ioaddr + MAC_CTRL_REG); } -static inline void stmmac_mac_enable_tx(unsigned long ioaddr) +static inline void stmmac_mac_enable_tx(void __iomem *ioaddr) { u32 value = readl(ioaddr + MAC_CTRL_REG); value |= MAC_ENABLE_TX; @@ -361,14 +360,14 @@ static inline void stmmac_mac_enable_tx(unsigned long ioaddr) writel(value, ioaddr + MAC_CTRL_REG); } -static inline void stmmac_mac_disable_rx(unsigned long ioaddr) +static inline void stmmac_mac_disable_rx(void __iomem *ioaddr) { u32 value = readl(ioaddr + MAC_CTRL_REG); value &= ~MAC_RNABLE_RX; writel(value, ioaddr + MAC_CTRL_REG); } -static inline void stmmac_mac_disable_tx(unsigned long ioaddr) +static inline void stmmac_mac_disable_tx(void __iomem *ioaddr) { u32 value = readl(ioaddr + MAC_CTRL_REG); value &= ~MAC_ENABLE_TX; @@ -577,17 +576,17 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) { if (!priv->is_gmac) { /* MAC 10/100 */ - priv->hw->dma->dma_mode(priv->dev->base_addr, tc, 0); + priv->hw->dma->dma_mode(priv->ioaddr, tc, 0); priv->tx_coe = NO_HW_CSUM; } else { if ((priv->dev->mtu <= ETH_DATA_LEN) && (tx_coe)) { - priv->hw->dma->dma_mode(priv->dev->base_addr, + priv->hw->dma->dma_mode(priv->ioaddr, SF_DMA_MODE, SF_DMA_MODE); tc = SF_DMA_MODE; priv->tx_coe = HW_CSUM; } else { /* Checksum computation is performed in software. */ - priv->hw->dma->dma_mode(priv->dev->base_addr, tc, + priv->hw->dma->dma_mode(priv->ioaddr, tc, SF_DMA_MODE); priv->tx_coe = NO_HW_CSUM; } @@ -603,7 +602,6 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) static void stmmac_tx(struct stmmac_priv *priv) { unsigned int txsize = priv->dma_tx_size; - unsigned long ioaddr = priv->dev->base_addr; while (priv->dirty_tx != priv->cur_tx) { int last; @@ -621,7 +619,7 @@ static void stmmac_tx(struct stmmac_priv *priv) int tx_error = priv->hw->desc->tx_status(&priv->dev->stats, &priv->xstats, p, - ioaddr); + priv->ioaddr); if (likely(tx_error == 0)) { priv->dev->stats.tx_packets++; priv->xstats.tx_pkt_n++; @@ -677,7 +675,7 @@ static inline void stmmac_enable_irq(struct stmmac_priv *priv) priv->tm->timer_start(tmrate); else #endif - priv->hw->dma->enable_dma_irq(priv->dev->base_addr); + priv->hw->dma->enable_dma_irq(priv->ioaddr); } static inline void stmmac_disable_irq(struct stmmac_priv *priv) @@ -687,7 +685,7 @@ static inline void stmmac_disable_irq(struct stmmac_priv *priv) priv->tm->timer_stop(); else #endif - priv->hw->dma->disable_dma_irq(priv->dev->base_addr); + priv->hw->dma->disable_dma_irq(priv->ioaddr); } static int stmmac_has_work(struct stmmac_priv *priv) @@ -742,14 +740,15 @@ static void stmmac_no_timer_stopped(void) */ static void stmmac_tx_err(struct stmmac_priv *priv) { + netif_stop_queue(priv->dev); - priv->hw->dma->stop_tx(priv->dev->base_addr); + priv->hw->dma->stop_tx(priv->ioaddr); dma_free_tx_skbufs(priv); priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size); priv->dirty_tx = 0; priv->cur_tx = 0; - priv->hw->dma->start_tx(priv->dev->base_addr); + priv->hw->dma->start_tx(priv->ioaddr); priv->dev->stats.tx_errors++; netif_wake_queue(priv->dev); @@ -758,11 +757,9 @@ static void stmmac_tx_err(struct stmmac_priv *priv) static void stmmac_dma_interrupt(struct stmmac_priv *priv) { - unsigned long ioaddr = priv->dev->base_addr; int status; - status = priv->hw->dma->dma_interrupt(priv->dev->base_addr, - &priv->xstats); + status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats); if (likely(status == handle_tx_rx)) _stmmac_schedule(priv); @@ -770,7 +767,7 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) /* Try to bump up the dma threshold on this failure */ if (unlikely(tc != SF_DMA_MODE) && (tc <= 256)) { tc += 64; - priv->hw->dma->dma_mode(ioaddr, tc, SF_DMA_MODE); + priv->hw->dma->dma_mode(priv->ioaddr, tc, SF_DMA_MODE); priv->xstats.threshold = tc; } stmmac_tx_err(priv); @@ -790,7 +787,6 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) static int stmmac_open(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; int ret; /* Check that the MAC address is valid. If its not, refuse @@ -846,7 +842,8 @@ static int stmmac_open(struct net_device *dev) init_dma_desc_rings(dev); /* DMA initialization and SW reset */ - if (unlikely(priv->hw->dma->init(ioaddr, priv->pbl, priv->dma_tx_phy, + if (unlikely(priv->hw->dma->init(priv->ioaddr, priv->pbl, + priv->dma_tx_phy, priv->dma_rx_phy) < 0)) { pr_err("%s: DMA initialization failed\n", __func__); @@ -854,22 +851,22 @@ static int stmmac_open(struct net_device *dev) } /* Copy the MAC addr into the HW */ - priv->hw->mac->set_umac_addr(ioaddr, dev->dev_addr, 0); + priv->hw->mac->set_umac_addr(priv->ioaddr, dev->dev_addr, 0); /* If required, perform hw setup of the bus. */ if (priv->bus_setup) - priv->bus_setup(ioaddr); + priv->bus_setup(priv->ioaddr); /* Initialize the MAC Core */ - priv->hw->mac->core_init(ioaddr); + priv->hw->mac->core_init(priv->ioaddr); priv->shutdown = 0; /* Initialise the MMC (if present) to disable all interrupts. */ - writel(0xffffffff, ioaddr + MMC_HIGH_INTR_MASK); - writel(0xffffffff, ioaddr + MMC_LOW_INTR_MASK); + writel(0xffffffff, priv->ioaddr + MMC_HIGH_INTR_MASK); + writel(0xffffffff, priv->ioaddr + MMC_LOW_INTR_MASK); /* Enable the MAC Rx/Tx */ - stmmac_mac_enable_rx(ioaddr); - stmmac_mac_enable_tx(ioaddr); + stmmac_mac_enable_rx(priv->ioaddr); + stmmac_mac_enable_tx(priv->ioaddr); /* Set the HW DMA mode and the COE */ stmmac_dma_operation_mode(priv); @@ -880,16 +877,16 @@ static int stmmac_open(struct net_device *dev) /* Start the ball rolling... */ DBG(probe, DEBUG, "%s: DMA RX/TX processes started...\n", dev->name); - priv->hw->dma->start_tx(ioaddr); - priv->hw->dma->start_rx(ioaddr); + priv->hw->dma->start_tx(priv->ioaddr); + priv->hw->dma->start_rx(priv->ioaddr); #ifdef CONFIG_STMMAC_TIMER priv->tm->timer_start(tmrate); #endif /* Dump DMA/MAC registers */ if (netif_msg_hw(priv)) { - priv->hw->mac->dump_regs(ioaddr); - priv->hw->dma->dump_regs(ioaddr); + priv->hw->mac->dump_regs(priv->ioaddr); + priv->hw->dma->dump_regs(priv->ioaddr); } if (priv->phydev) @@ -933,15 +930,15 @@ static int stmmac_release(struct net_device *dev) free_irq(dev->irq, dev); /* Stop TX/RX DMA and clear the descriptors */ - priv->hw->dma->stop_tx(dev->base_addr); - priv->hw->dma->stop_rx(dev->base_addr); + priv->hw->dma->stop_tx(priv->ioaddr); + priv->hw->dma->stop_rx(priv->ioaddr); /* Release and free the Rx/Tx resources */ free_dma_desc_resources(priv); /* Disable the MAC core */ - stmmac_mac_disable_tx(dev->base_addr); - stmmac_mac_disable_rx(dev->base_addr); + stmmac_mac_disable_tx(priv->ioaddr); + stmmac_mac_disable_rx(priv->ioaddr); netif_carrier_off(dev); @@ -1143,7 +1140,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_bytes += skb->len; - priv->hw->dma->enable_dma_transmission(dev->base_addr); + priv->hw->dma->enable_dma_transmission(priv->ioaddr); return NETDEV_TX_OK; } @@ -1408,11 +1405,9 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) return IRQ_NONE; } - if (priv->is_gmac) { - unsigned long ioaddr = dev->base_addr; + if (priv->is_gmac) /* To handle GMAC own interrupts */ - priv->hw->mac->host_irq_status(ioaddr); - } + priv->hw->mac->host_irq_status((void __iomem *) dev->base_addr); stmmac_dma_interrupt(priv); @@ -1525,7 +1520,8 @@ static int stmmac_probe(struct net_device *dev) netif_napi_add(dev, &priv->napi, stmmac_poll, 64); /* Get the MAC address */ - priv->hw->mac->get_umac_addr(dev->base_addr, dev->dev_addr, 0); + priv->hw->mac->get_umac_addr((void __iomem *) dev->base_addr, + dev->dev_addr, 0); if (!is_valid_ether_addr(dev->dev_addr)) pr_warning("\tno valid MAC address;" @@ -1555,14 +1551,13 @@ static int stmmac_probe(struct net_device *dev) static int stmmac_mac_device_setup(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; struct mac_device_info *device; if (priv->is_gmac) - device = dwmac1000_setup(ioaddr); + device = dwmac1000_setup(priv->ioaddr); else - device = dwmac100_setup(ioaddr); + device = dwmac100_setup(priv->ioaddr); if (!device) return -ENOMEM; @@ -1656,7 +1651,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev) { int ret = 0; struct resource *res; - unsigned int *addr = NULL; + void __iomem *addr = NULL; struct net_device *ndev = NULL; struct stmmac_priv *priv; struct plat_stmmacenet_data *plat_dat; @@ -1711,6 +1706,7 @@ static int stmmac_dvr_probe(struct platform_device *pdev) priv->pbl = plat_dat->pbl; /* TLI */ priv->is_gmac = plat_dat->has_gmac; /* GMAC is on board */ priv->enh_desc = plat_dat->enh_desc; + priv->ioaddr = addr; platform_set_drvdata(pdev, ndev); @@ -1782,11 +1778,11 @@ static int stmmac_dvr_remove(struct platform_device *pdev) pr_info("%s:\n\tremoving driver", __func__); - priv->hw->dma->stop_rx(ndev->base_addr); - priv->hw->dma->stop_tx(ndev->base_addr); + priv->hw->dma->stop_rx(priv->ioaddr); + priv->hw->dma->stop_tx(priv->ioaddr); - stmmac_mac_disable_rx(ndev->base_addr); - stmmac_mac_disable_tx(ndev->base_addr); + stmmac_mac_disable_rx(priv->ioaddr); + stmmac_mac_disable_tx(priv->ioaddr); netif_carrier_off(ndev); @@ -1795,7 +1791,7 @@ static int stmmac_dvr_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); unregister_netdev(ndev); - iounmap((void *)ndev->base_addr); + iounmap((void *)priv->ioaddr); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res)); @@ -1830,22 +1826,21 @@ static int stmmac_suspend(struct platform_device *pdev, pm_message_t state) napi_disable(&priv->napi); /* Stop TX/RX DMA */ - priv->hw->dma->stop_tx(dev->base_addr); - priv->hw->dma->stop_rx(dev->base_addr); + priv->hw->dma->stop_tx(priv->ioaddr); + priv->hw->dma->stop_rx(priv->ioaddr); /* Clear the Rx/Tx descriptors */ priv->hw->desc->init_rx_desc(priv->dma_rx, priv->dma_rx_size, dis_ic); priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size); - stmmac_mac_disable_tx(dev->base_addr); + stmmac_mac_disable_tx(priv->ioaddr); if (device_may_wakeup(&(pdev->dev))) { /* Enable Power down mode by programming the PMT regs */ if (priv->wolenabled == PMT_SUPPORTED) - priv->hw->mac->pmt(dev->base_addr, - priv->wolopts); + priv->hw->mac->pmt(priv->ioaddr, priv->wolopts); } else { - stmmac_mac_disable_rx(dev->base_addr); + stmmac_mac_disable_rx(priv->ioaddr); } } else { priv->shutdown = 1; @@ -1863,7 +1858,6 @@ static int stmmac_resume(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; if (!netif_running(dev)) return 0; @@ -1884,15 +1878,15 @@ static int stmmac_resume(struct platform_device *pdev) * from another devices (e.g. serial console). */ if (device_may_wakeup(&(pdev->dev))) if (priv->wolenabled == PMT_SUPPORTED) - priv->hw->mac->pmt(dev->base_addr, 0); + priv->hw->mac->pmt(priv->ioaddr, 0); netif_device_attach(dev); /* Enable the MAC and DMA */ - stmmac_mac_enable_rx(ioaddr); - stmmac_mac_enable_tx(ioaddr); - priv->hw->dma->start_tx(ioaddr); - priv->hw->dma->start_rx(ioaddr); + stmmac_mac_enable_rx(priv->ioaddr); + stmmac_mac_enable_tx(priv->ioaddr); + priv->hw->dma->start_tx(priv->ioaddr); + priv->hw->dma->start_rx(priv->ioaddr); #ifdef CONFIG_STMMAC_TIMER priv->tm->timer_start(tmrate); diff --git a/drivers/net/stmmac/stmmac_mdio.c b/drivers/net/stmmac/stmmac_mdio.c index 40b2c7929719..03dea1401571 100644 --- a/drivers/net/stmmac/stmmac_mdio.c +++ b/drivers/net/stmmac/stmmac_mdio.c @@ -47,7 +47,6 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) { struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); - unsigned long ioaddr = ndev->base_addr; unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; @@ -56,12 +55,12 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) ((phyreg << 6) & (0x000007C0))); regValue |= MII_BUSY; /* in case of GMAC */ - do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1); - writel(regValue, ioaddr + mii_address); - do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1); + do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1); + writel(regValue, priv->ioaddr + mii_address); + do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1); /* Read the data from the MII data register */ - data = (int)readl(ioaddr + mii_data); + data = (int)readl(priv->ioaddr + mii_data); return data; } @@ -79,7 +78,6 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, { struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); - unsigned long ioaddr = ndev->base_addr; unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; @@ -90,14 +88,14 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, value |= MII_BUSY; /* Wait until any existing MII operation is complete */ - do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1); + do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1); /* Set the MII address register to write */ - writel(phydata, ioaddr + mii_data); - writel(value, ioaddr + mii_address); + writel(phydata, priv->ioaddr + mii_data); + writel(value, priv->ioaddr + mii_address); /* Wait until any existing MII operation is complete */ - do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1); + do {} while (((readl(priv->ioaddr + mii_address)) & MII_BUSY) == 1); return 0; } @@ -111,7 +109,6 @@ static int stmmac_mdio_reset(struct mii_bus *bus) { struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); - unsigned long ioaddr = ndev->base_addr; unsigned int mii_address = priv->hw->mii.addr; if (priv->phy_reset) { @@ -123,7 +120,7 @@ static int stmmac_mdio_reset(struct mii_bus *bus) * It doesn't complete its reset until at least one clock cycle * on MDC, so perform a dummy mdio read. */ - writel(0, ioaddr + mii_address); + writel(0, priv->ioaddr + mii_address); return 0; } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 632ff7c03280..a4adf0de6ed6 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -35,7 +35,7 @@ struct plat_stmmacenet_data { int has_gmac; int enh_desc; void (*fix_mac_speed)(void *priv, unsigned int speed); - void (*bus_setup)(unsigned long ioaddr); + void (*bus_setup)(void __iomem *ioaddr); #ifdef CONFIG_STM_DRIVERS struct stm_pad_config *pad_config; #endif -- cgit v1.2.3 From 2971944582ff43b7dedbb460777052243ac9915a Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Mon, 9 Aug 2010 12:54:43 +0100 Subject: ARM: 6307/1: mmci: allow the card detect GPIO value not to be inverted On some platforms, the GPIO value from the gpio_cd pin doesn't need to be inverted to get it active high. Add a cd_invert platform data parameter and change existing platforms using GPIO for CD (only Realview) to enable it. Acked-by: Linus Walleij Signed-off-by: Rabin Vincent Signed-off-by: Russell King --- arch/arm/mach-realview/core.c | 2 ++ drivers/mmc/host/mmci.c | 5 +++-- include/linux/amba/mmci.h | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 2fa38df28414..07c08151dfe6 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -259,6 +259,7 @@ struct mmci_platform_data realview_mmc0_plat_data = { .status = realview_mmc_status, .gpio_wp = 17, .gpio_cd = 16, + .cd_invert = true, }; struct mmci_platform_data realview_mmc1_plat_data = { @@ -266,6 +267,7 @@ struct mmci_platform_data realview_mmc1_plat_data = { .status = realview_mmc_status, .gpio_wp = 19, .gpio_cd = 18, + .cd_invert = true, }; /* diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 840b301b5671..9a9aeac50a6c 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -570,12 +570,13 @@ static int mmci_get_ro(struct mmc_host *mmc) static int mmci_get_cd(struct mmc_host *mmc) { struct mmci_host *host = mmc_priv(mmc); + struct mmci_platform_data *plat = host->plat; unsigned int status; if (host->gpio_cd == -ENOSYS) - status = host->plat->status(mmc_dev(host->mmc)); + status = plat->status(mmc_dev(host->mmc)); else - status = !gpio_get_value(host->gpio_cd); + status = !!gpio_get_value(host->gpio_cd) ^ plat->cd_invert; /* * Use positive logic throughout - status is zero for no card, diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h index ca84ce70d5d5..f4ee9acc9721 100644 --- a/include/linux/amba/mmci.h +++ b/include/linux/amba/mmci.h @@ -24,6 +24,7 @@ * whether a card is present in the MMC slot or not * @gpio_wp: read this GPIO pin to see if the card is write protected * @gpio_cd: read this GPIO pin to detect card insertion + * @cd_invert: true if the gpio_cd pin value is active low * @capabilities: the capabilities of the block as implemented in * this platform, signify anything MMC_CAP_* from mmc/host.h */ @@ -35,6 +36,7 @@ struct mmci_platform_data { unsigned int (*status)(struct device *); int gpio_wp; int gpio_cd; + bool cd_invert; unsigned long capabilities; }; -- cgit v1.2.3 From 145ce502e44b57c074c72cfdc855557e19026999 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 24 Aug 2010 13:21:08 +0000 Subject: net/sctp: Use pr_fmt and pr_ Change SCTP_DEBUG_PRINTK and SCTP_DEBUG_PRINTK_IPADDR to use do { print } while (0) guards. Add SCTP_DEBUG_PRINTK_CONT to fix errors in log when lines were continued. Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt Add a missing newline in "Failed bind hash alloc" Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 48 ++++++++++++++++++++++++++++++------------------ net/sctp/associola.c | 2 ++ net/sctp/chunk.c | 2 ++ net/sctp/inqueue.c | 2 ++ net/sctp/ipv6.c | 4 +++- net/sctp/objcnt.c | 5 +++-- net/sctp/output.c | 2 ++ net/sctp/outqueue.c | 34 ++++++++++++++++++---------------- net/sctp/probe.c | 4 +++- net/sctp/protocol.c | 17 ++++++++--------- net/sctp/sm_make_chunk.c | 2 ++ net/sctp/sm_sideeffect.c | 21 ++++++++++----------- net/sctp/sm_statefuns.c | 20 ++++++++++---------- net/sctp/sm_statetable.c | 42 +++++++++++++++++++++--------------------- net/sctp/socket.c | 39 +++++++++++++++------------------------ net/sctp/transport.c | 9 +++++---- 16 files changed, 136 insertions(+), 117 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 65946bc43d00..2cb3980b1616 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -275,24 +275,35 @@ struct sctp_mib { /* Print debugging messages. */ #if SCTP_DEBUG extern int sctp_debug_flag; -#define SCTP_DEBUG_PRINTK(whatever...) \ - ((void) (sctp_debug_flag && printk(KERN_DEBUG whatever))) -#define SCTP_DEBUG_PRINTK_IPADDR(lead, trail, leadparm, saddr, otherparms...) \ - if (sctp_debug_flag) { \ - if (saddr->sa.sa_family == AF_INET6) { \ - printk(KERN_DEBUG \ - lead "%pI6" trail, \ - leadparm, \ - &saddr->v6.sin6_addr, \ - otherparms); \ - } else { \ - printk(KERN_DEBUG \ - lead "%pI4" trail, \ - leadparm, \ - &saddr->v4.sin_addr.s_addr, \ - otherparms); \ - } \ - } +#define SCTP_DEBUG_PRINTK(fmt, args...) \ +do { \ + if (sctp_debug_flag) \ + printk(KERN_DEBUG pr_fmt(fmt), ##args); \ +} while (0) +#define SCTP_DEBUG_PRINTK_CONT(fmt, args...) \ +do { \ + if (sctp_debug_flag) \ + pr_cont(fmt, ##args); \ +} while (0) +#define SCTP_DEBUG_PRINTK_IPADDR(fmt_lead, fmt_trail, \ + args_lead, saddr, args_trail...) \ +do { \ + if (sctp_debug_flag) { \ + if (saddr->sa.sa_family == AF_INET6) { \ + printk(KERN_DEBUG \ + pr_fmt(fmt_lead "%pI6" fmt_trail), \ + args_lead, \ + &saddr->v6.sin6_addr, \ + args_trail); \ + } else { \ + printk(KERN_DEBUG \ + pr_fmt(fmt_lead "%pI4" fmt_trail), \ + args_lead, \ + &saddr->v4.sin_addr.s_addr, \ + args_trail); \ + } \ + } \ +} while (0) #define SCTP_ENABLE_DEBUG { sctp_debug_flag = 1; } #define SCTP_DISABLE_DEBUG { sctp_debug_flag = 0; } @@ -306,6 +317,7 @@ extern int sctp_debug_flag; #else /* SCTP_DEBUG */ #define SCTP_DEBUG_PRINTK(whatever...) +#define SCTP_DEBUG_PRINTK_CONT(fmt, args...) #define SCTP_DEBUG_PRINTK_IPADDR(whatever...) #define SCTP_ENABLE_DEBUG #define SCTP_DISABLE_DEBUG diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 0b85e5256434..5f1fb8bd862d 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -48,6 +48,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 476caaf100ed..6c8556459a75 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -37,6 +37,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index ccb6dc48d15b..397296fb156f 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -43,6 +43,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 732689140fb8..95e0c8eda1a0 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -47,6 +47,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -336,7 +338,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, memcpy(saddr, baddr, sizeof(union sctp_addr)); SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr); } else { - printk(KERN_ERR "%s: asoc:%p Could not find a valid source " + pr_err("%s: asoc:%p Could not find a valid source " "address for the dest:%pI6\n", __func__, asoc, &daddr->v6.sin6_addr); } diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index f73ec0ea93ba..8ef8e7d9eb61 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -38,6 +38,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include @@ -134,8 +136,7 @@ void sctp_dbg_objcnt_init(void) ent = proc_create("sctp_dbg_objcnt", 0, proc_net_sctp, &sctp_objcnt_ops); if (!ent) - printk(KERN_WARNING - "sctp_dbg_objcnt: Unable to create /proc entry.\n"); + pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n"); } /* Cleanup the objcount entry in the proc filesystem. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index a646681f5acd..901764b17aee 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -41,6 +41,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c04b2eb59186..8c6d379b4bb6 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -46,6 +46,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include /* For struct list_head */ #include @@ -1463,23 +1465,23 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* Display the end of the * current range. */ - SCTP_DEBUG_PRINTK("-%08x", - dbg_last_ack_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x", + dbg_last_ack_tsn); } /* Start a new range. */ - SCTP_DEBUG_PRINTK(",%08x", tsn); + SCTP_DEBUG_PRINTK_CONT(",%08x", tsn); dbg_ack_tsn = tsn; break; case 1: /* The last TSN was NOT ACKed. */ if (dbg_last_kept_tsn != dbg_kept_tsn) { /* Display the end of current range. */ - SCTP_DEBUG_PRINTK("-%08x", - dbg_last_kept_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x", + dbg_last_kept_tsn); } - SCTP_DEBUG_PRINTK("\n"); + SCTP_DEBUG_PRINTK_CONT("\n"); /* FALL THROUGH... */ default: @@ -1526,18 +1528,18 @@ static void sctp_check_transmitted(struct sctp_outq *q, break; if (dbg_last_kept_tsn != dbg_kept_tsn) - SCTP_DEBUG_PRINTK("-%08x", - dbg_last_kept_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x", + dbg_last_kept_tsn); - SCTP_DEBUG_PRINTK(",%08x", tsn); + SCTP_DEBUG_PRINTK_CONT(",%08x", tsn); dbg_kept_tsn = tsn; break; case 0: if (dbg_last_ack_tsn != dbg_ack_tsn) - SCTP_DEBUG_PRINTK("-%08x", - dbg_last_ack_tsn); - SCTP_DEBUG_PRINTK("\n"); + SCTP_DEBUG_PRINTK_CONT("-%08x", + dbg_last_ack_tsn); + SCTP_DEBUG_PRINTK_CONT("\n"); /* FALL THROUGH... */ default: @@ -1556,17 +1558,17 @@ static void sctp_check_transmitted(struct sctp_outq *q, switch (dbg_prt_state) { case 0: if (dbg_last_ack_tsn != dbg_ack_tsn) { - SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_ack_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn); } else { - SCTP_DEBUG_PRINTK("\n"); + SCTP_DEBUG_PRINTK_CONT("\n"); } break; case 1: if (dbg_last_kept_tsn != dbg_kept_tsn) { - SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_kept_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn); } else { - SCTP_DEBUG_PRINTK("\n"); + SCTP_DEBUG_PRINTK_CONT("\n"); } } #endif /* SCTP_DEBUG */ diff --git a/net/sctp/probe.c b/net/sctp/probe.c index db3a42b8b349..2e63e9dc010e 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -22,6 +22,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -192,7 +194,7 @@ static __init int sctpprobe_init(void) if (ret) goto remove_proc; - pr_info("SCTP probe registered (port=%d)\n", port); + pr_info("probe registered (port=%d)\n", port); return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 5027b83f1cc0..f774e657641a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -46,6 +46,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -707,8 +709,7 @@ static int sctp_ctl_sock_init(void) &init_net); if (err < 0) { - printk(KERN_ERR - "SCTP: Failed to create the SCTP control socket.\n"); + pr_err("Failed to create the SCTP control socket\n"); return err; } return 0; @@ -1206,7 +1207,7 @@ SCTP_STATIC __init int sctp_init(void) __get_free_pages(GFP_ATOMIC, order); } while (!sctp_assoc_hashtable && --order > 0); if (!sctp_assoc_hashtable) { - printk(KERN_ERR "SCTP: Failed association hash alloc.\n"); + pr_err("Failed association hash alloc\n"); status = -ENOMEM; goto err_ahash_alloc; } @@ -1220,7 +1221,7 @@ SCTP_STATIC __init int sctp_init(void) sctp_ep_hashtable = (struct sctp_hashbucket *) kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL); if (!sctp_ep_hashtable) { - printk(KERN_ERR "SCTP: Failed endpoint_hash alloc.\n"); + pr_err("Failed endpoint_hash alloc\n"); status = -ENOMEM; goto err_ehash_alloc; } @@ -1239,7 +1240,7 @@ SCTP_STATIC __init int sctp_init(void) __get_free_pages(GFP_ATOMIC, order); } while (!sctp_port_hashtable && --order > 0); if (!sctp_port_hashtable) { - printk(KERN_ERR "SCTP: Failed bind hash alloc."); + pr_err("Failed bind hash alloc\n"); status = -ENOMEM; goto err_bhash_alloc; } @@ -1248,8 +1249,7 @@ SCTP_STATIC __init int sctp_init(void) INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); } - printk(KERN_INFO "SCTP: Hash tables configured " - "(established %d bind %d)\n", + pr_info("Hash tables configured (established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); /* Disable ADDIP by default. */ @@ -1290,8 +1290,7 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize the control inode/socket for handling OOTB packets. */ if ((status = sctp_ctl_sock_init())) { - printk (KERN_ERR - "SCTP: Failed to initialize the SCTP control sock.\n"); + pr_err("Failed to initialize the SCTP control sock\n"); goto err_ctl_sock_init; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 246f92924658..2cc46f0962ca 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -50,6 +50,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index f5e5e27cac5e..b21b218d564f 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -47,6 +47,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -1146,26 +1148,23 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, case SCTP_DISPOSITION_VIOLATION: if (net_ratelimit()) - printk(KERN_ERR "sctp protocol violation state %d " - "chunkid %d\n", state, subtype.chunk); + pr_err("protocol violation state %d chunkid %d\n", + state, subtype.chunk); break; case SCTP_DISPOSITION_NOT_IMPL: - printk(KERN_WARNING "sctp unimplemented feature in state %d, " - "event_type %d, event_id %d\n", - state, event_type, subtype.chunk); + pr_warn("unimplemented feature in state %d, event_type %d, event_id %d\n", + state, event_type, subtype.chunk); break; case SCTP_DISPOSITION_BUG: - printk(KERN_ERR "sctp bug in state %d, " - "event_type %d, event_id %d\n", + pr_err("bug in state %d, event_type %d, event_id %d\n", state, event_type, subtype.chunk); BUG(); break; default: - printk(KERN_ERR "sctp impossible disposition %d " - "in state %d, event_type %d, event_id %d\n", + pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n", status, state, event_type, subtype.chunk); BUG(); break; @@ -1679,8 +1678,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_cmd_send_asconf(asoc); break; default: - printk(KERN_WARNING "Impossible command: %u, %p\n", - cmd->verb, cmd->obj.ptr); + pr_warn("Impossible command: %u, %p\n", + cmd->verb, cmd->obj.ptr); break; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 24b2cd555637..8b284436be65 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -50,6 +50,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -1138,18 +1140,16 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, if (unlikely(!link)) { if (from_addr.sa.sa_family == AF_INET6) { if (net_ratelimit()) - printk(KERN_WARNING - "%s association %p could not find address %pI6\n", - __func__, - asoc, - &from_addr.v6.sin6_addr); + pr_warn("%s association %p could not find address %pI6\n", + __func__, + asoc, + &from_addr.v6.sin6_addr); } else { if (net_ratelimit()) - printk(KERN_WARNING - "%s association %p could not find address %pI4\n", - __func__, - asoc, - &from_addr.v4.sin_addr.s_addr); + pr_warn("%s association %p could not find address %pI4\n", + __func__, + asoc, + &from_addr.v4.sin_addr.s_addr); } return SCTP_DISPOSITION_DISCARD; } diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 6d9b3aafcc5d..546d4387fb3c 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -46,6 +46,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -66,15 +68,19 @@ static const sctp_sm_table_entry_t bug = { .name = "sctp_sf_bug" }; -#define DO_LOOKUP(_max, _type, _table) \ - if ((event_subtype._type > (_max))) { \ - printk(KERN_WARNING \ - "sctp table %p possible attack:" \ - " event %d exceeds max %d\n", \ - _table, event_subtype._type, _max); \ - return &bug; \ - } \ - return &_table[event_subtype._type][(int)state]; +#define DO_LOOKUP(_max, _type, _table) \ +({ \ + const sctp_sm_table_entry_t *rtn; \ + \ + if ((event_subtype._type > (_max))) { \ + pr_warn("table %p possible attack: event %d exceeds max %d\n", \ + _table, event_subtype._type, _max); \ + rtn = &bug; \ + } else \ + rtn = &_table[event_subtype._type][(int)state]; \ + \ + rtn; \ +}) const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, sctp_state_t state, @@ -83,21 +89,15 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, switch (event_type) { case SCTP_EVENT_T_CHUNK: return sctp_chunk_event_lookup(event_subtype.chunk, state); - break; case SCTP_EVENT_T_TIMEOUT: - DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, - timeout_event_table); - break; - + return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, + timeout_event_table); case SCTP_EVENT_T_OTHER: - DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, other_event_table); - break; - + return DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, + other_event_table); case SCTP_EVENT_T_PRIMITIVE: - DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive, - primitive_event_table); - break; - + return DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive, + primitive_event_table); default: /* Yikes! We got an illegal event type. */ return &bug; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ca44917872d2..f4bec2772351 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -57,6 +57,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -2458,9 +2460,8 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, if (params.sack_delay == 0 && params.sack_freq == 0) return 0; } else if (optlen == sizeof(struct sctp_assoc_value)) { - printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " - "in delayed_ack socket option deprecated\n"); - printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n"); + pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n"); + pr_warn("Use struct sctp_sack_info instead\n"); if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@ -2868,10 +2869,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int val; if (optlen == sizeof(int)) { - printk(KERN_WARNING - "SCTP: Use of int in maxseg socket option deprecated\n"); - printk(KERN_WARNING - "SCTP: Use struct sctp_assoc_value instead\n"); + pr_warn("Use of int in maxseg socket option deprecated\n"); + pr_warn("Use struct sctp_assoc_value instead\n"); if (copy_from_user(&val, optval, optlen)) return -EFAULT; params.assoc_id = 0; @@ -3121,10 +3120,8 @@ static int sctp_setsockopt_maxburst(struct sock *sk, int assoc_id = 0; if (optlen == sizeof(int)) { - printk(KERN_WARNING - "SCTP: Use of int in max_burst socket option deprecated\n"); - printk(KERN_WARNING - "SCTP: Use struct sctp_assoc_value instead\n"); + pr_warn("Use of int in max_burst socket option deprecated\n"); + pr_warn("Use struct sctp_assoc_value instead\n"); if (copy_from_user(&val, optval, optlen)) return -EFAULT; } else if (optlen == sizeof(struct sctp_assoc_value)) { @@ -4281,9 +4278,8 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len, if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else if (len == sizeof(struct sctp_assoc_value)) { - printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " - "in delayed_ack socket option deprecated\n"); - printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n"); + pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n"); + pr_warn("Use struct sctp_sack_info instead\n"); if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else @@ -4929,10 +4925,8 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len, struct sctp_association *asoc; if (len == sizeof(int)) { - printk(KERN_WARNING - "SCTP: Use of int in maxseg socket option deprecated\n"); - printk(KERN_WARNING - "SCTP: Use struct sctp_assoc_value instead\n"); + pr_warn("Use of int in maxseg socket option deprecated\n"); + pr_warn("Use struct sctp_assoc_value instead\n"); params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); @@ -5023,10 +5017,8 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, struct sctp_association *asoc; if (len == sizeof(int)) { - printk(KERN_WARNING - "SCTP: Use of int in max_burst socket option deprecated\n"); - printk(KERN_WARNING - "SCTP: Use struct sctp_assoc_value instead\n"); + pr_warn("Use of int in max_burst socket option deprecated\n"); + pr_warn("Use struct sctp_assoc_value instead\n"); params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); @@ -5586,8 +5578,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { if (net_ratelimit()) { - printk(KERN_INFO - "SCTP: failed to load transform for %s: %ld\n", + pr_info("failed to load transform for %s: %ld\n", sctp_hmac_alg, PTR_ERR(tfm)); } return -ENOSYS; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 132046cb82fc..d3ae493d234a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -48,6 +48,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -244,10 +246,9 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) struct dst_entry *dst; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { - printk(KERN_WARNING "%s: Reported pmtu %d too low, " - "using default minimum of %d\n", - __func__, pmtu, - SCTP_DEFAULT_MINSEGMENT); + pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", + __func__, pmtu, + SCTP_DEFAULT_MINSEGMENT); /* Use default minimum segment size and disable * pmtu discovery on this transport. */ -- cgit v1.2.3 From 480125ba49ba62be93beea37770f266846e077ab Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 26 Aug 2010 13:57:57 -0400 Subject: x86, iommu: Make all IOMMU's detection routines return a value. We return 1 if the IOMMU has been detected. Zero or an error number if we failed to find it. This is in preperation of using the IOMMU_INIT so that we can detect whether an IOMMU is present. I have not tested this for regression on Calgary, nor on AMD Vi chipsets as I don't have that hardware. CC: Muli Ben-Yehuda CC: "Jon D. Mason" CC: "Darrick J. Wong" CC: Jesse Barnes CC: David Woodhouse CC: Chris Wright CC: Yinghai Lu CC: Joerg Roedel CC: H. Peter Anvin CC: Fujita Tomonori Signed-off-by: Konrad Rzeszutek Wilk LKML-Reference: <1282845485-8991-3-git-send-email-konrad.wilk@oracle.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/amd_iommu.h | 4 ++-- arch/x86/include/asm/calgary.h | 4 ++-- arch/x86/include/asm/gart.h | 5 +++-- arch/x86/kernel/amd_iommu_init.c | 8 +++++--- arch/x86/kernel/aperture_64.c | 11 +++++++---- arch/x86/kernel/pci-calgary_64.c | 15 ++++++++------- drivers/pci/dmar.c | 4 +++- include/linux/dmar.h | 6 +++--- 8 files changed, 33 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 5af2982133b5..2798142cdb49 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -24,11 +24,11 @@ #ifdef CONFIG_AMD_IOMMU -extern void amd_iommu_detect(void); +extern int amd_iommu_detect(void); #else -static inline void amd_iommu_detect(void) { } +static inline int amd_iommu_detect(void) { return -ENODEV; } #endif diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h index 0918654305af..0d467b338835 100644 --- a/arch/x86/include/asm/calgary.h +++ b/arch/x86/include/asm/calgary.h @@ -62,9 +62,9 @@ struct cal_chipset_ops { extern int use_calgary; #ifdef CONFIG_CALGARY_IOMMU -extern void detect_calgary(void); +extern int detect_calgary(void); #else -static inline void detect_calgary(void) { return; } +static inline int detect_calgary(void) { return -ENODEV; } #endif #endif /* _ASM_X86_CALGARY_H */ diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 4ac5b0f33fc1..d7d1d4c438a4 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -37,7 +37,7 @@ extern int gart_iommu_aperture_disabled; extern void early_gart_iommu_check(void); extern int gart_iommu_init(void); extern void __init gart_parse_options(char *); -extern void gart_iommu_hole_init(void); +extern int gart_iommu_hole_init(void); #else #define gart_iommu_aperture 0 @@ -50,8 +50,9 @@ static inline void early_gart_iommu_check(void) static inline void gart_parse_options(char *options) { } -static inline void gart_iommu_hole_init(void) +static inline int gart_iommu_hole_init(void) { + return -ENODEV; } #endif diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3cc63e2b8dd4..0b9e2dc4fc9a 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1382,13 +1382,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) return 0; } -void __init amd_iommu_detect(void) +int __init amd_iommu_detect(void) { if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return; + return -ENODEV; if (amd_iommu_disabled) - return; + return -ENODEV; if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { iommu_detected = 1; @@ -1397,7 +1397,9 @@ void __init amd_iommu_detect(void) /* Make sure ACS will be enabled */ pci_request_acs(); + return 1; } + return -ENODEV; } /**************************************************************************** diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index a2e0caf26e17..afa0dab3302f 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -371,7 +371,7 @@ void __init early_gart_iommu_check(void) static int __initdata printed_gart_size_msg; -void __init gart_iommu_hole_init(void) +int __init gart_iommu_hole_init(void) { u32 agp_aper_base = 0, agp_aper_order = 0; u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; @@ -381,7 +381,7 @@ void __init gart_iommu_hole_init(void) if (gart_iommu_aperture_disabled || !fix_aperture || !early_pci_allowed()) - return; + return -ENODEV; printk(KERN_INFO "Checking aperture...\n"); @@ -463,8 +463,9 @@ out: unsigned long n = (32 * 1024 * 1024) << last_aper_order; insert_aperture_resource((u32)last_aper_base, n); + return 1; } - return; + return 0; } if (!fallback_aper_force) { @@ -500,7 +501,7 @@ out: panic("Not enough memory for aperture"); } } else { - return; + return 0; } /* Fix up the north bridges */ @@ -524,4 +525,6 @@ out: } set_up_gart_resume(aper_order, aper_alloc); + + return 1; } diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 078d4ec1a9d9..28c6b389fee6 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1364,7 +1364,7 @@ static int __init calgary_iommu_init(void) return 0; } -void __init detect_calgary(void) +int __init detect_calgary(void) { int bus; void *tbl; @@ -1378,13 +1378,13 @@ void __init detect_calgary(void) * another HW IOMMU already, bail out. */ if (no_iommu || iommu_detected) - return; + return -ENODEV; if (!use_calgary) - return; + return -ENODEV; if (!early_pci_allowed()) - return; + return -ENODEV; printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n"); @@ -1410,13 +1410,13 @@ void __init detect_calgary(void) if (!rio_table_hdr) { printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table " "in EBDA - bailing!\n"); - return; + return -ENODEV; } ret = build_detail_arrays(); if (ret) { printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret); - return; + return -ENOMEM; } specified_table_size = determine_tce_table_size((is_kdump_kernel() ? @@ -1464,7 +1464,7 @@ void __init detect_calgary(void) x86_init.iommu.iommu_init = calgary_iommu_init; } - return; + return calgary_found; cleanup: for (--bus; bus >= 0; --bus) { @@ -1473,6 +1473,7 @@ cleanup: if (info->tce_space) free_tce_table(info->tce_space); } + return -ENOMEM; } static int __init calgary_parse_options(char *p) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 0a19708074c2..5fa64ea5416f 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -687,7 +687,7 @@ failed: return 0; } -void __init detect_intel_iommu(void) +int __init detect_intel_iommu(void) { int ret; @@ -723,6 +723,8 @@ void __init detect_intel_iommu(void) } early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); dmar_tbl = NULL; + + return (ret ? 1 : -ENODEV); } diff --git a/include/linux/dmar.h b/include/linux/dmar.h index d7cecc90ed34..a20602041511 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -57,15 +57,15 @@ extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); /* Intel IOMMU detection */ -extern void detect_intel_iommu(void); +extern int detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); extern int parse_ioapics_under_ir(void); extern int alloc_iommu(struct dmar_drhd_unit *); #else -static inline void detect_intel_iommu(void) +static inline int detect_intel_iommu(void) { - return; + return -ENODEV; } static inline int dmar_table_init(void) -- cgit v1.2.3 From 04cbe1de6fbda9649a6f25666194e6955d3e717e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Aug 2010 21:29:43 +0100 Subject: vgaarb: Wrap vga_(get|put) in CONFIG_VGA_ARB Fix link failure without the vga arbitrator. Signed-off-by: Chris Wilson Cc: Dave Airlie Cc: Jesse Barnes Signed-off-by: Dave Airlie --- include/linux/vgaarb.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index 6228b5b77d35..e9e1524b582c 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -93,8 +93,11 @@ extern void vga_set_legacy_decoding(struct pci_dev *pdev, * Nested calls are supported (a per-resource counter is maintained) */ -extern int vga_get(struct pci_dev *pdev, unsigned int rsrc, - int interruptible); +#if defined(CONFIG_VGA_ARB) +extern int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible); +#else +static inline int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible) { return 0; } +#endif /** * vga_get_interruptible @@ -131,7 +134,11 @@ static inline int vga_get_uninterruptible(struct pci_dev *pdev, * are already locked by another card. It can be called in any context */ +#if defined(CONFIG_VGA_ARB) extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc); +#else +static inline int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) { return 0; } +#endif /** * vga_put - release lock on legacy VGA resources @@ -146,7 +153,11 @@ extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc); * released if the counter reaches 0. */ +#if defined(CONFIG_VGA_ARB) extern void vga_put(struct pci_dev *pdev, unsigned int rsrc); +#else +#define vga_put(pdev, rsrc) +#endif /** -- cgit v1.2.3 From 40d0802b3eb47d57e2d57a5244a18cbbe9632e13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Aug 2010 22:03:08 -0700 Subject: gro: __napi_gro_receive() optimizations compare_ether_header() can have a special implementation on 64 bit arches if CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is defined. __napi_gro_receive() and vlan_gro_common() can avoid a conditional branch to perform device match. On x86_64, __napi_gro_receive() has now 38 instructions instead of 53 As gcc-4.4.3 still choose to not inline it, add inline keyword to this performance critical function. Signed-off-by: Eric Dumazet CC: Herbert Xu Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 18 +++++++++++++++++- net/8021q/vlan_core.c | 9 ++++++--- net/core/dev.c | 10 ++++++---- 3 files changed, 29 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2308fbb4523a..fb6aa6070921 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -237,13 +237,29 @@ static inline bool is_etherdev_addr(const struct net_device *dev, * entry points. */ -static inline int compare_ether_header(const void *a, const void *b) +static inline unsigned long compare_ether_header(const void *a, const void *b) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + unsigned long fold; + + /* + * We want to compare 14 bytes: + * [a0 ... a13] ^ [b0 ... b13] + * Use two long XOR, ORed together, with an overlap of two bytes. + * [a0 a1 a2 a3 a4 a5 a6 a7 ] ^ [b0 b1 b2 b3 b4 b5 b6 b7 ] | + * [a6 a7 a8 a9 a10 a11 a12 a13] ^ [b6 b7 b8 b9 b10 b11 b12 b13] + * This means the [a6 a7] ^ [b6 b7] part is done two times. + */ + fold = *(unsigned long *)a ^ *(unsigned long *)b; + fold |= *(unsigned long *)(a + 6) ^ *(unsigned long *)(b + 6); + return fold; +#else u32 *a32 = (u32 *)((u8 *)a + 2); u32 *b32 = (u32 *)((u8 *)b + 2); return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) | (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]); +#endif } #endif /* _LINUX_ETHERDEVICE_H */ diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 07eeb5b99dce..3438c01bbacf 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -105,9 +105,12 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, goto drop; for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = - p->dev == skb->dev && !compare_ether_header( - skb_mac_header(p), skb_gro_mac_header(skb)); + unsigned long diffs; + + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; + diffs |= compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); + NAPI_GRO_CB(p)->same_flow = !diffs; NAPI_GRO_CB(p)->flush = 0; } diff --git a/net/core/dev.c b/net/core/dev.c index 859e30ff044a..63bd20a75929 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3169,16 +3169,18 @@ normal: } EXPORT_SYMBOL(dev_gro_receive); -static gro_result_t +static inline gro_result_t __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = - (p->dev == skb->dev) && - !compare_ether_header(skb_mac_header(p), + unsigned long diffs; + + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; + diffs |= compare_ether_header(skb_mac_header(p), skb_gro_mac_header(skb)); + NAPI_GRO_CB(p)->same_flow = !diffs; NAPI_GRO_CB(p)->flush = 0; } -- cgit v1.2.3 From 8789d459bc5e837bf37d261453df96ef54018d7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Aug 2010 13:30:26 +0200 Subject: mac80211: allow scan to complete from any context The ieee80211_scan_completed() function was a frequent source of potential deadlocks, since it is called by drivers but may call back into drivers, so drivers had to make sure to call it without any locks held, which frequently lead to more complex code in drivers. Avoid that problem by allowing the function to be called in any context, and queueing the actual work it does. Also update the documentation for it to indicate this. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 3 ++- net/mac80211/ieee80211_i.h | 6 ++++++ net/mac80211/scan.c | 34 ++++++++++++++++++++++++++-------- 3 files changed, 34 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dcc8c2bf986e..8f97548b6d80 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2268,7 +2268,8 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw); * * When hardware scan offload is used (i.e. the hw_scan() callback is * assigned) this function needs to be called by the driver to notify - * mac80211 that the scan finished. + * mac80211 that the scan finished. This function can be called from + * any context, including hardirq context. * * @hw: the hardware that finished the scan * @aborted: set to true if scan was aborted diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9e225f01497b..31713320258c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -596,11 +596,17 @@ enum queue_stop_reason { * determine if we are on the operating channel or not * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning, * gets only set in conjunction with SCAN_SW_SCANNING + * @SCAN_COMPLETED: Set for our scan work function when the driver reported + * that the scan completed. + * @SCAN_ABORTED: Set for our scan work function when the driver reported + * a scan complete for an aborted scan. */ enum { SCAN_SW_SCANNING, SCAN_HW_SCANNING, SCAN_OFF_CHANNEL, + SCAN_COMPLETED, + SCAN_ABORTED, }; /** diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 31f233f7f51a..d60389ba9b95 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -248,13 +248,11 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) return true; } -void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) +static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) { struct ieee80211_local *local = hw_to_local(hw); bool was_hw_scan; - trace_api_scan_completed(local, aborted); - mutex_lock(&local->mtx); /* @@ -312,6 +310,18 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) ieee80211_mesh_notify_scan_completed(local); ieee80211_queue_work(&local->hw, &local->work_work); } + +void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) +{ + struct ieee80211_local *local = hw_to_local(hw); + + trace_api_scan_completed(local, aborted); + + set_bit(SCAN_COMPLETED, &local->scanning); + if (aborted) + set_bit(SCAN_ABORTED, &local->scanning); + ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); +} EXPORT_SYMBOL(ieee80211_scan_completed); static int ieee80211_start_sw_scan(struct ieee80211_local *local) @@ -449,7 +459,7 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local, /* if no more bands/channels left, complete scan and advance to the idle state */ if (local->scan_channel_idx >= local->scan_req->n_channels) { - ieee80211_scan_completed(&local->hw, false); + __ieee80211_scan_completed(&local->hw, false); return 1; } @@ -641,6 +651,14 @@ void ieee80211_scan_work(struct work_struct *work) struct ieee80211_sub_if_data *sdata = local->scan_sdata; unsigned long next_delay = 0; + if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) { + bool aborted; + + aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning); + __ieee80211_scan_completed(&local->hw, aborted); + return; + } + mutex_lock(&local->mtx); if (!sdata || !local->scan_req) { mutex_unlock(&local->mtx); @@ -651,7 +669,7 @@ void ieee80211_scan_work(struct work_struct *work) int rc = drv_hw_scan(local, sdata, local->hw_scan_req); mutex_unlock(&local->mtx); if (rc) - ieee80211_scan_completed(&local->hw, true); + __ieee80211_scan_completed(&local->hw, true); return; } @@ -666,7 +684,7 @@ void ieee80211_scan_work(struct work_struct *work) mutex_unlock(&local->mtx); if (rc) - ieee80211_scan_completed(&local->hw, true); + __ieee80211_scan_completed(&local->hw, true); return; } @@ -676,7 +694,7 @@ void ieee80211_scan_work(struct work_struct *work) * Avoid re-scheduling when the sdata is going away. */ if (!ieee80211_sdata_running(sdata)) { - ieee80211_scan_completed(&local->hw, true); + __ieee80211_scan_completed(&local->hw, true); return; } @@ -783,5 +801,5 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) mutex_unlock(&local->mtx); if (abortscan) - ieee80211_scan_completed(&local->hw, true); + __ieee80211_scan_completed(&local->hw, true); } -- cgit v1.2.3 From c0692b8fe29fb4d4dad33487aabf3ed7e1e880c0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Aug 2010 14:26:53 +0300 Subject: cfg80211: allow changing port control protocol Some vendor specified mechanisms for 802.1X-style functionality use a different protocol than EAP (even if EAP is vendor-extensible). Allow setting the ethertype for the protocol when a driver has support for this. The default if unspecified is EAP, of course. Note: This is suitable only for station mode, not for AP implementation. Signed-off-by: Johannes Berg Signed-off-by: Juuso Oikarinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 16 +++++++++++++++- include/net/cfg80211.h | 24 +++++++++++++++++------- net/wireless/nl80211.c | 25 ++++++++++++++++++++++--- net/wireless/wext-sme.c | 2 ++ 4 files changed, 56 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index ec1690da7845..31603e8b5581 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -295,7 +295,9 @@ * auth and assoc steps. For this, you need to specify the SSID in a * %NL80211_ATTR_SSID attribute, and can optionally specify the association * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_MAC, - * %NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_CONTROL_PORT. + * %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE and + * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT. * It is also sent as an event, with the BSSID and response IEs when the * connection is established or failed to be established. This can be * determined by the STATUS_CODE attribute. @@ -686,6 +688,15 @@ enum nl80211_commands { * request, the driver will assume that the port is unauthorized until * authorized by user space. Otherwise, port is marked authorized by * default in station mode. + * @NL80211_ATTR_CONTROL_PORT_ETHERTYPE: A 16-bit value indicating the + * ethertype that will be used for key negotiation. It can be + * specified with the associate and connect commands. If it is not + * specified, the value defaults to 0x888E (PAE, 802.1X). This + * attribute is also used as a flag in the wiphy information to + * indicate that protocols other than PAE are supported. + * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom + * ethertype frames used for key negotiation must not be encrypted. * * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver. * We recommend using nested, driver-specific attributes within this. @@ -951,6 +962,9 @@ enum nl80211_attrs { NL80211_ATTR_RX_FRAME_TYPES, NL80211_ATTR_FRAME_TYPE, + NL80211_ATTR_CONTROL_PORT_ETHERTYPE, + NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f2740537b5d6..4c8c727d0cca 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -763,6 +763,10 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie); * sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is * required to assume that the port is unauthorized until authorized by * user space. Otherwise, port is marked authorized by default. + * @control_port_ethertype: the control port protocol that should be + * allowed through even on unauthorized ports + * @control_port_no_encrypt: TRUE to prevent encryption of control port + * protocol frames. */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -772,6 +776,8 @@ struct cfg80211_crypto_settings { int n_akm_suites; u32 akm_suites[NL80211_MAX_NR_AKM_SUITES]; bool control_port; + __be16 control_port_ethertype; + bool control_port_no_encrypt; }; /** @@ -1293,15 +1299,19 @@ struct cfg80211_ops { * @WIPHY_FLAG_4ADDR_AP: supports 4addr mode even on AP (with a single station * on a VLAN interface) * @WIPHY_FLAG_4ADDR_STATION: supports 4addr mode even as a station + * @WIPHY_FLAG_CONTROL_PORT_PROTOCOL: This device supports setting the + * control port protocol ethertype. The device also honours the + * control_port_no_encrypt flag. */ enum wiphy_flags { - WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), - WIPHY_FLAG_STRICT_REGULATORY = BIT(1), - WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), - WIPHY_FLAG_NETNS_OK = BIT(3), - WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), - WIPHY_FLAG_4ADDR_AP = BIT(5), - WIPHY_FLAG_4ADDR_STATION = BIT(6), + WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), + WIPHY_FLAG_STRICT_REGULATORY = BIT(1), + WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), + WIPHY_FLAG_NETNS_OK = BIT(3), + WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), + WIPHY_FLAG_4ADDR_AP = BIT(5), + WIPHY_FLAG_4ADDR_STATION = BIT(6), + WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), }; struct mac_address { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 49f5ca35e787..85a23de7bff3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -136,6 +136,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { .len = sizeof(struct nl80211_sta_flag_update), }, [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, + [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 }, + [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG }, [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, @@ -474,6 +476,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, dev->wiphy.max_num_pmkids); + if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) + NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE); + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) goto nla_put_failure; @@ -3691,7 +3696,8 @@ unlock_rtnl: return err; } -static int nl80211_crypto_settings(struct genl_info *info, +static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, + struct genl_info *info, struct cfg80211_crypto_settings *settings, int cipher_limit) { @@ -3699,6 +3705,19 @@ static int nl80211_crypto_settings(struct genl_info *info, settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; + if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) { + u16 proto; + proto = nla_get_u16( + info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]); + settings->control_port_ethertype = cpu_to_be16(proto); + if (!(rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && + proto != ETH_P_PAE) + return -EINVAL; + if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]) + settings->control_port_no_encrypt = true; + } else + settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE); + if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { void *data; int len, i; @@ -3826,7 +3845,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_PREV_BSSID]) prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); - err = nl80211_crypto_settings(info, &crypto, 1); + err = nl80211_crypto_settings(rdev, info, &crypto, 1); if (!err) err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, ssid, ssid_len, ie, ie_len, use_mfp, @@ -4303,7 +4322,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; - err = nl80211_crypto_settings(info, &connect.crypto, + err = nl80211_crypto_settings(rdev, info, &connect.crypto, NL80211_MAX_NR_CIPHER_SUITES); if (err) return err; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 9818198add8a..6fffe62d7c25 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -197,6 +197,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, wdev->wext.connect.ssid_len = len; wdev->wext.connect.crypto.control_port = false; + wdev->wext.connect.crypto.control_port_ethertype = + cpu_to_be16(ETH_P_PAE); err = cfg80211_mgd_wext_connect(rdev, wdev); out: -- cgit v1.2.3 From 34d4bc4d41d282a66dafe1b01a7d46bad468cefb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Aug 2010 12:35:58 +0200 Subject: mac80211: support runtime interface type changes Add support to mac80211 for changing the interface type even when the interface is UP, if the driver supports it. To achieve this * add a new driver callback for switching, * split some of the interface up/down code out into new functions (do_open/do_stop), and * maintain an own __SDATA_RUNNING bit that will not be set during interface type, so that any other code doesn't use the interface. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 9 +++ net/mac80211/cfg.c | 3 - net/mac80211/driver-ops.h | 14 ++++ net/mac80211/driver-trace.h | 25 +++++++ net/mac80211/ieee80211_i.h | 14 +++- net/mac80211/iface.c | 157 ++++++++++++++++++++++++++++++++++---------- 6 files changed, 185 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8f97548b6d80..f91fc331369b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1537,6 +1537,12 @@ enum ieee80211_ampdu_mlme_action { * negative error code (which will be seen in userspace.) * Must be implemented and can sleep. * + * @change_interface: Called when a netdevice changes type. This callback + * is optional, but only if it is supported can interface types be + * switched while the interface is UP. The callback may sleep. + * Note that while an interface is being switched, it will not be + * found by the interface iteration callbacks. + * * @remove_interface: Notifies a driver that an interface is going down. * The @stop callback is called after this if it is the last interface * and no monitor interfaces are present. @@ -1693,6 +1699,9 @@ struct ieee80211_ops { void (*stop)(struct ieee80211_hw *hw); int (*add_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); + int (*change_interface)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + enum nl80211_iftype new_type); void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*config)(struct ieee80211_hw *hw, u32 changed); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f82b18e996b2..5de1ca3f17b9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -52,9 +52,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); int ret; - if (ieee80211_sdata_running(sdata)) - return -EBUSY; - ret = ieee80211_if_change_type(sdata, type); if (ret) return ret; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 14123dce544b..6064b7b09e01 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -54,6 +54,20 @@ static inline int drv_add_interface(struct ieee80211_local *local, return ret; } +static inline int drv_change_interface(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum nl80211_iftype type) +{ + int ret; + + might_sleep(); + + trace_drv_change_interface(local, sdata, type); + ret = local->ops->change_interface(&local->hw, &sdata->vif, type); + trace_drv_return_int(local, ret); + return ret; +} + static inline void drv_remove_interface(struct ieee80211_local *local, struct ieee80211_vif *vif) { diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index b5a95582d816..f6f3d89e43fa 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -136,6 +136,31 @@ TRACE_EVENT(drv_add_interface, ) ); +TRACE_EVENT(drv_change_interface, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum nl80211_iftype type), + + TP_ARGS(local, sdata, type), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u32, new_type) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->new_type = type; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT " new type:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type + ) +); + TRACE_EVENT(drv_remove_interface, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f64837788681..d529bd5eab47 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -472,6 +472,16 @@ enum ieee80211_sub_if_data_flags { IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), }; +/** + * enum ieee80211_sdata_state_bits - virtual interface state bits + * @SDATA_STATE_RUNNING: virtual interface is up & running; this + * mirrors netif_running() but is separate for interface type + * change handling while the interface is up + */ +enum ieee80211_sdata_state_bits { + SDATA_STATE_RUNNING, +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -485,6 +495,8 @@ struct ieee80211_sub_if_data { unsigned int flags; + unsigned long state; + int drop_unencrypted; char name[IFNAMSIZ]; @@ -1087,7 +1099,7 @@ void ieee80211_recalc_idle(struct ieee80211_local *local); static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) { - return netif_running(sdata->dev); + return test_bit(SDATA_STATE_RUNNING, &sdata->state); } /* tx handling */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index cba3d806d722..c1cc200ac81f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -148,7 +148,12 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, return 0; } -static int ieee80211_open(struct net_device *dev) +/* + * NOTE: Be very careful when changing this function, it must NOT return + * an error on interface type changes that have been pre-checked, so most + * checks should be in ieee80211_check_concurrent_iface. + */ +static int ieee80211_do_open(struct net_device *dev, bool coming_up) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -157,15 +162,6 @@ static int ieee80211_open(struct net_device *dev) int res; u32 hw_reconf_flags = 0; - /* fail early if user set an invalid address */ - if (!is_zero_ether_addr(dev->dev_addr) && - !is_valid_ether_addr(dev->dev_addr)) - return -EADDRNOTAVAIL; - - res = ieee80211_check_concurrent_iface(sdata, sdata->vif.type); - if (res) - return res; - switch (sdata->vif.type) { case NL80211_IFTYPE_WDS: if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) @@ -258,9 +254,11 @@ static int ieee80211_open(struct net_device *dev) netif_carrier_on(dev); break; default: - res = drv_add_interface(local, &sdata->vif); - if (res) - goto err_stop; + if (coming_up) { + res = drv_add_interface(local, &sdata->vif); + if (res) + goto err_stop; + } if (ieee80211_vif_is_mesh(&sdata->vif)) { local->fif_other_bss++; @@ -316,7 +314,9 @@ static int ieee80211_open(struct net_device *dev) hw_reconf_flags |= __ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); - local->open_count++; + if (coming_up) + local->open_count++; + if (hw_reconf_flags) { ieee80211_hw_config(local, hw_reconf_flags); /* @@ -331,6 +331,8 @@ static int ieee80211_open(struct net_device *dev) netif_tx_start_all_queues(dev); + set_bit(SDATA_STATE_RUNNING, &sdata->state); + return 0; err_del_interface: drv_remove_interface(local, &sdata->vif); @@ -344,19 +346,38 @@ static int ieee80211_open(struct net_device *dev) return res; } -static int ieee80211_stop(struct net_device *dev) +static int ieee80211_open(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + int err; + + /* fail early if user set an invalid address */ + if (!is_zero_ether_addr(dev->dev_addr) && + !is_valid_ether_addr(dev->dev_addr)) + return -EADDRNOTAVAIL; + + err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type); + if (err) + return err; + + return ieee80211_do_open(dev, true); +} + +static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, + bool going_down) +{ struct ieee80211_local *local = sdata->local; unsigned long flags; struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; int i; + clear_bit(SDATA_STATE_RUNNING, &sdata->state); + /* * Stop TX on this interface first. */ - netif_tx_stop_all_queues(dev); + netif_tx_stop_all_queues(sdata->dev); /* * Purge work for this interface. @@ -394,11 +415,12 @@ static int ieee80211_stop(struct net_device *dev) if (sdata->vif.type == NL80211_IFTYPE_AP) local->fif_pspoll--; - netif_addr_lock_bh(dev); + netif_addr_lock_bh(sdata->dev); spin_lock_bh(&local->filter_lock); - __hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len); + __hw_addr_unsync(&local->mc_list, &sdata->dev->mc, + sdata->dev->addr_len); spin_unlock_bh(&local->filter_lock); - netif_addr_unlock_bh(dev); + netif_addr_unlock_bh(sdata->dev); ieee80211_configure_filter(local); @@ -432,7 +454,8 @@ static int ieee80211_stop(struct net_device *dev) WARN_ON(!list_empty(&sdata->u.ap.vlans)); } - local->open_count--; + if (going_down) + local->open_count--; switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: @@ -504,7 +527,8 @@ static int ieee80211_stop(struct net_device *dev) */ ieee80211_free_keys(sdata); - drv_remove_interface(local, &sdata->vif); + if (going_down) + drv_remove_interface(local, &sdata->vif); } sdata->bss = NULL; @@ -540,6 +564,13 @@ static int ieee80211_stop(struct net_device *dev) } } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); +} + +static int ieee80211_stop(struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + ieee80211_do_stop(sdata, true); return 0; } @@ -857,9 +888,72 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, ieee80211_debugfs_add_netdev(sdata); } +static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, + enum nl80211_iftype type) +{ + struct ieee80211_local *local = sdata->local; + int ret, err; + + ASSERT_RTNL(); + + if (!local->ops->change_interface) + return -EBUSY; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + /* + * Could maybe also all others here? + * Just not sure how that interacts + * with the RX/config path e.g. for + * mesh. + */ + break; + default: + return -EBUSY; + } + + switch (type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + /* + * Could probably support everything + * but WDS here (WDS do_open can fail + * under memory pressure, which this + * code isn't prepared to handle). + */ + break; + default: + return -EBUSY; + } + + ret = ieee80211_check_concurrent_iface(sdata, type); + if (ret) + return ret; + + ieee80211_do_stop(sdata, false); + + ieee80211_teardown_sdata(sdata->dev); + + ret = drv_change_interface(local, sdata, type); + if (ret) + type = sdata->vif.type; + + ieee80211_setup_sdata(sdata, type); + + err = ieee80211_do_open(sdata->dev, false); + WARN(err, "type change: do_open returned %d", err); + + return ret; +} + int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type) { + int ret; + ASSERT_RTNL(); if (type == sdata->vif.type) @@ -870,18 +964,15 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, type == NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; - /* - * We could, here, on changes between IBSS/STA/MESH modes, - * invoke an MLME function instead that disassociates etc. - * and goes into the requested mode. - */ - - if (ieee80211_sdata_running(sdata)) - return -EBUSY; - - /* Purge and reset type-dependent state. */ - ieee80211_teardown_sdata(sdata->dev); - ieee80211_setup_sdata(sdata, type); + if (ieee80211_sdata_running(sdata)) { + ret = ieee80211_runtime_change_iftype(sdata, type); + if (ret) + return ret; + } else { + /* Purge and reset type-dependent state. */ + ieee80211_teardown_sdata(sdata->dev); + ieee80211_setup_sdata(sdata, type); + } /* reset some values that shouldn't be kept across type changes */ sdata->vif.bss_conf.basic_rates = -- cgit v1.2.3 From 7950c407c0288b223a200c1bba8198941599ca37 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:14 -0700 Subject: memblock: Add memblock_free/reserve_reserved_regions() So we can avoid export memblock_reserved_init_regions() Suggested by Ben. -v2: use __init_memblock attribute Signed-off-by: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- include/linux/memblock.h | 2 ++ mm/memblock.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 4df09bdcae42..7d285271130d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -48,6 +48,8 @@ extern int memblock_can_resize; if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align); +int memblock_free_reserved_regions(void); +int memblock_reserve_reserved_regions(void); extern void __init memblock_init(void); extern void __init memblock_analyze(void); diff --git a/mm/memblock.c b/mm/memblock.c index b7ab10a2ef46..65e3ba8d09fb 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -170,6 +170,30 @@ u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 ali return memblock_find_base(size, align, start, end); } +/* + * Free memblock.reserved.regions + */ +int __init_memblock memblock_free_reserved_regions(void) +{ + if (memblock.reserved.regions == memblock_reserved_init_regions) + return 0; + + return memblock_free(__pa(memblock.reserved.regions), + sizeof(struct memblock_region) * memblock.reserved.max); +} + +/* + * Reserve memblock.reserved.regions + */ +int __init_memblock memblock_reserve_reserved_regions(void) +{ + if (memblock.reserved.regions == memblock_reserved_init_regions) + return 0; + + return memblock_reserve(__pa(memblock.reserved.regions), + sizeof(struct memblock_region) * memblock.reserved.max); +} + static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) { unsigned long i; -- cgit v1.2.3 From edbe7d23b4482e7f33179290bcff3b1feae1c5f3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:16 -0700 Subject: memblock: Add find_memory_core_early() According to node range in early_node_map[] with __memblock_find_in_range to find free range. Will be used by memblock_x86_find_in_range_node() memblock_x86_find_in_range_node will be used to find right buffer for NODE_DATA Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 2 ++ mm/page_alloc.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a2b48041b910..993e85f0afcb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1164,6 +1164,8 @@ extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); int add_from_early_node_map(struct range *range, int az, int nr_range, int nid); +u64 __init find_memory_core_early(int nid, u64 size, u64 align, + u64 goal, u64 limit); void *__alloc_memory_core_early(int nodeid, u64 size, u64 align, u64 goal, u64 limit); typedef int (*work_fn_t)(unsigned long, unsigned long, void *); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9bd339eb04c6..8c9b34674d83 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -3612,6 +3613,41 @@ void __init free_bootmem_with_active_regions(int nid, } } +#ifdef CONFIG_HAVE_MEMBLOCK +u64 __init find_memory_core_early(int nid, u64 size, u64 align, + u64 goal, u64 limit) +{ + int i; + + /* Need to go over early_node_map to find out good range for node */ + for_each_active_range_index_in_nid(i, nid) { + u64 addr; + u64 ei_start, ei_last; + u64 final_start, final_end; + + ei_last = early_node_map[i].end_pfn; + ei_last <<= PAGE_SHIFT; + ei_start = early_node_map[i].start_pfn; + ei_start <<= PAGE_SHIFT; + + final_start = max(ei_start, goal); + final_end = min(ei_last, limit); + + if (final_start >= final_end) + continue; + + addr = memblock_find_in_range(final_start, final_end, size, align); + + if (addr == MEMBLOCK_ERROR) + continue; + + return addr; + } + + return MEMBLOCK_ERROR; +} +#endif + int __init add_from_early_node_map(struct range *range, int az, int nr_range, int nid) { -- cgit v1.2.3 From a587d2daebcd2bc159d4348b6a7b028950a6d803 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:18 -0700 Subject: x86: Remove not used early_res code and some functions in e820.c that are not used anymore Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/e820.h | 14 -- arch/x86/kernel/e820.c | 52 ---- include/linux/early_res.h | 23 -- kernel/Makefile | 1 - kernel/early_res.c | 590 -------------------------------------------- 5 files changed, 680 deletions(-) delete mode 100644 include/linux/early_res.h delete mode 100644 kernel/early_res.c (limited to 'include') diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 388fed291467..718646384e03 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -112,31 +112,17 @@ static inline void early_memtest(unsigned long start, unsigned long end) } #endif -extern unsigned long end_user_pfn; - -extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); -extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); -extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); - extern unsigned long e820_end_of_ram_pfn(void); extern unsigned long e820_end_of_low_ram_pfn(void); -extern void e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern u64 e820_hole_size(u64 start, u64 end); - extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); void memblock_x86_fill(void); - extern void finish_e820_parsing(void); extern void e820_reserve_resources(void); extern void e820_reserve_resources_late(void); extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); -void reserve_early(u64 start, u64 end, char *name); -void free_early(u64 start, u64 end); - /* * Returns true iff the specified range [s,e) is completely contained inside * the ISA region. diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a9221d18a5ed..d5fd89462d79 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -738,32 +738,6 @@ static int __init e820_mark_nvs_memory(void) core_initcall(e820_mark_nvs_memory); #endif -/* - * Find a free area with specified alignment in a specific range. - */ -u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) -{ - u64 mem = memblock_find_in_range(start, end, size, align); - - if (mem == MEMBLOCK_ERROR) - return -1ULL; - - return mem; -} - -/* - * Find next free range after *start - */ -u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) -{ - u64 mem = memblock_x86_find_in_range_size(start, sizep, align); - - if (mem == MEMBLOCK_ERROR) - return -1ULL - - return mem; -} - /* * pre allocated 4k and reserved it in memblock and e820_saved */ @@ -856,32 +830,6 @@ unsigned long __init e820_end_of_low_ram_pfn(void) return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); } -/* Walk the e820 map and register active regions within a node */ -void __init e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long last_pfn) -{ - memblock_x86_register_active_regions(nid, start_pfn, last_pfn); -} - -/* - * Find the hole size (in bytes) in the memory range. - * @start: starting address of the memory range to scan - * @end: ending address of the memory range to scan - */ -u64 __init e820_hole_size(u64 start, u64 end) -{ - return memblock_x86_hole_size(start, end); -} - -void reserve_early(u64 start, u64 end, char *name) -{ - memblock_x86_reserve_range(start, end, name); -} -void free_early(u64 start, u64 end) -{ - memblock_x86_free_range(start, end); -} - static void early_panic(char *msg) { early_printk(msg); diff --git a/include/linux/early_res.h b/include/linux/early_res.h deleted file mode 100644 index 29c09f57a13c..000000000000 --- a/include/linux/early_res.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef _LINUX_EARLY_RES_H -#define _LINUX_EARLY_RES_H -#ifdef __KERNEL__ - -extern void reserve_early(u64 start, u64 end, char *name); -extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); -extern void free_early(u64 start, u64 end); -void free_early_partial(u64 start, u64 end); -extern void early_res_to_bootmem(u64 start, u64 end); - -void reserve_early_without_check(u64 start, u64 end, char *name); -u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, - u64 size, u64 align); -u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start, - u64 *sizep, u64 align); -u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align); -u64 get_max_mapped(void); -#include -int get_free_all_memory_range(struct range **rangep, int nodeid); - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_EARLY_RES_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 057472fbc272..80e61c3e44ae 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -11,7 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ async.o range.o -obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o obj-y += groups.o ifdef CONFIG_FUNCTION_TRACER diff --git a/kernel/early_res.c b/kernel/early_res.c deleted file mode 100644 index 7bfae887f211..000000000000 --- a/kernel/early_res.c +++ /dev/null @@ -1,590 +0,0 @@ -/* - * early_res, could be used to replace bootmem - */ -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Early reserved memory areas. - */ -/* - * need to make sure this one is bigger enough before - * find_fw_memmap_area could be used - */ -#define MAX_EARLY_RES_X 32 - -struct early_res { - u64 start, end; - char name[15]; - char overlap_ok; -}; -static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata; - -static int max_early_res __initdata = MAX_EARLY_RES_X; -static struct early_res *early_res __initdata = &early_res_x[0]; -static int early_res_count __initdata; - -static int __init find_overlapped_early(u64 start, u64 end) -{ - int i; - struct early_res *r; - - for (i = 0; i < max_early_res && early_res[i].end; i++) { - r = &early_res[i]; - if (end > r->start && start < r->end) - break; - } - - return i; -} - -/* - * Drop the i-th range from the early reservation map, - * by copying any higher ranges down one over it, and - * clearing what had been the last slot. - */ -static void __init drop_range(int i) -{ - int j; - - for (j = i + 1; j < max_early_res && early_res[j].end; j++) - ; - - memmove(&early_res[i], &early_res[i + 1], - (j - 1 - i) * sizeof(struct early_res)); - - early_res[j - 1].end = 0; - early_res_count--; -} - -static void __init drop_range_partial(int i, u64 start, u64 end) -{ - u64 common_start, common_end; - u64 old_start, old_end; - - old_start = early_res[i].start; - old_end = early_res[i].end; - common_start = max(old_start, start); - common_end = min(old_end, end); - - /* no overlap ? */ - if (common_start >= common_end) - return; - - if (old_start < common_start) { - /* make head segment */ - early_res[i].end = common_start; - if (old_end > common_end) { - char name[15]; - - /* - * Save a local copy of the name, since the - * early_res array could get resized inside - * reserve_early_without_check() -> - * __check_and_double_early_res(), which would - * make the current name pointer invalid. - */ - strncpy(name, early_res[i].name, - sizeof(early_res[i].name) - 1); - /* add another for left over on tail */ - reserve_early_without_check(common_end, old_end, name); - } - return; - } else { - if (old_end > common_end) { - /* reuse the entry for tail left */ - early_res[i].start = common_end; - return; - } - /* all covered */ - drop_range(i); - } -} - -/* - * Split any existing ranges that: - * 1) are marked 'overlap_ok', and - * 2) overlap with the stated range [start, end) - * into whatever portion (if any) of the existing range is entirely - * below or entirely above the stated range. Drop the portion - * of the existing range that overlaps with the stated range, - * which will allow the caller of this routine to then add that - * stated range without conflicting with any existing range. - */ -static void __init drop_overlaps_that_are_ok(u64 start, u64 end) -{ - int i; - struct early_res *r; - u64 lower_start, lower_end; - u64 upper_start, upper_end; - char name[15]; - - for (i = 0; i < max_early_res && early_res[i].end; i++) { - r = &early_res[i]; - - /* Continue past non-overlapping ranges */ - if (end <= r->start || start >= r->end) - continue; - - /* - * Leave non-ok overlaps as is; let caller - * panic "Overlapping early reservations" - * when it hits this overlap. - */ - if (!r->overlap_ok) - return; - - /* - * We have an ok overlap. We will drop it from the early - * reservation map, and add back in any non-overlapping - * portions (lower or upper) as separate, overlap_ok, - * non-overlapping ranges. - */ - - /* 1. Note any non-overlapping (lower or upper) ranges. */ - strncpy(name, r->name, sizeof(name) - 1); - - lower_start = lower_end = 0; - upper_start = upper_end = 0; - if (r->start < start) { - lower_start = r->start; - lower_end = start; - } - if (r->end > end) { - upper_start = end; - upper_end = r->end; - } - - /* 2. Drop the original ok overlapping range */ - drop_range(i); - - i--; /* resume for-loop on copied down entry */ - - /* 3. Add back in any non-overlapping ranges. */ - if (lower_end) - reserve_early_overlap_ok(lower_start, lower_end, name); - if (upper_end) - reserve_early_overlap_ok(upper_start, upper_end, name); - } -} - -static void __init __reserve_early(u64 start, u64 end, char *name, - int overlap_ok) -{ - int i; - struct early_res *r; - - i = find_overlapped_early(start, end); - if (i >= max_early_res) - panic("Too many early reservations"); - r = &early_res[i]; - if (r->end) - panic("Overlapping early reservations " - "%llx-%llx %s to %llx-%llx %s\n", - start, end - 1, name ? name : "", r->start, - r->end - 1, r->name); - r->start = start; - r->end = end; - r->overlap_ok = overlap_ok; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); - early_res_count++; -} - -/* - * A few early reservtations come here. - * - * The 'overlap_ok' in the name of this routine does -not- mean it - * is ok for these reservations to overlap an earlier reservation. - * Rather it means that it is ok for subsequent reservations to - * overlap this one. - * - * Use this entry point to reserve early ranges when you are doing - * so out of "Paranoia", reserving perhaps more memory than you need, - * just in case, and don't mind a subsequent overlapping reservation - * that is known to be needed. - * - * The drop_overlaps_that_are_ok() call here isn't really needed. - * It would be needed if we had two colliding 'overlap_ok' - * reservations, so that the second such would not panic on the - * overlap with the first. We don't have any such as of this - * writing, but might as well tolerate such if it happens in - * the future. - */ -void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) -{ - drop_overlaps_that_are_ok(start, end); - __reserve_early(start, end, name, 1); -} - -static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end) -{ - u64 start, end, size, mem; - struct early_res *new; - - /* do we have enough slots left ? */ - if ((max_early_res - early_res_count) > max(max_early_res/8, 2)) - return; - - /* double it */ - mem = -1ULL; - size = sizeof(struct early_res) * max_early_res * 2; - if (early_res == early_res_x) - start = 0; - else - start = early_res[0].end; - end = ex_start; - if (start + size < end) - mem = find_fw_memmap_area(start, end, size, - sizeof(struct early_res)); - if (mem == -1ULL) { - start = ex_end; - end = get_max_mapped(); - if (start + size < end) - mem = find_fw_memmap_area(start, end, size, - sizeof(struct early_res)); - } - if (mem == -1ULL) - panic("can not find more space for early_res array"); - - new = __va(mem); - /* save the first one for own */ - new[0].start = mem; - new[0].end = mem + size; - new[0].overlap_ok = 0; - /* copy old to new */ - if (early_res == early_res_x) { - memcpy(&new[1], &early_res[0], - sizeof(struct early_res) * max_early_res); - memset(&new[max_early_res+1], 0, - sizeof(struct early_res) * (max_early_res - 1)); - early_res_count++; - } else { - memcpy(&new[1], &early_res[1], - sizeof(struct early_res) * (max_early_res - 1)); - memset(&new[max_early_res], 0, - sizeof(struct early_res) * max_early_res); - } - memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); - early_res = new; - max_early_res *= 2; - printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n", - max_early_res, mem, mem + size - 1); -} - -/* - * Most early reservations come here. - * - * We first have drop_overlaps_that_are_ok() drop any pre-existing - * 'overlap_ok' ranges, so that we can then reserve this memory - * range without risk of panic'ing on an overlapping overlap_ok - * early reservation. - */ -void __init reserve_early(u64 start, u64 end, char *name) -{ - if (start >= end) - return; - - __check_and_double_early_res(start, end); - - drop_overlaps_that_are_ok(start, end); - __reserve_early(start, end, name, 0); -} - -void __init reserve_early_without_check(u64 start, u64 end, char *name) -{ - struct early_res *r; - - if (start >= end) - return; - - __check_and_double_early_res(start, end); - - r = &early_res[early_res_count]; - - r->start = start; - r->end = end; - r->overlap_ok = 0; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); - early_res_count++; -} - -void __init free_early(u64 start, u64 end) -{ - struct early_res *r; - int i; - - kmemleak_free_part(__va(start), end - start); - - i = find_overlapped_early(start, end); - r = &early_res[i]; - if (i >= max_early_res || r->end != end || r->start != start) - panic("free_early on not reserved area: %llx-%llx!", - start, end - 1); - - drop_range(i); -} - -void __init free_early_partial(u64 start, u64 end) -{ - struct early_res *r; - int i; - - kmemleak_free_part(__va(start), end - start); - - if (start == end) - return; - - if (WARN_ONCE(start > end, " wrong range [%#llx, %#llx]\n", start, end)) - return; - -try_next: - i = find_overlapped_early(start, end); - if (i >= max_early_res) - return; - - r = &early_res[i]; - /* hole ? */ - if (r->end >= end && r->start <= start) { - drop_range_partial(i, start, end); - return; - } - - drop_range_partial(i, start, end); - goto try_next; -} - -#ifdef CONFIG_NO_BOOTMEM -static void __init subtract_early_res(struct range *range, int az) -{ - int i, count; - u64 final_start, final_end; - int idx = 0; - - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - /* need to skip first one ?*/ - if (early_res != early_res_x) - idx = 1; - -#define DEBUG_PRINT_EARLY_RES 1 - -#if DEBUG_PRINT_EARLY_RES - printk(KERN_INFO "Subtract (%d early reservations)\n", count); -#endif - for (i = idx; i < count; i++) { - struct early_res *r = &early_res[i]; -#if DEBUG_PRINT_EARLY_RES - printk(KERN_INFO " #%d [%010llx - %010llx] %15s\n", i, - r->start, r->end, r->name); -#endif - final_start = PFN_DOWN(r->start); - final_end = PFN_UP(r->end); - if (final_start >= final_end) - continue; - subtract_range(range, az, final_start, final_end); - } - -} - -int __init get_free_all_memory_range(struct range **rangep, int nodeid) -{ - int i, count; - u64 start = 0, end; - u64 size; - u64 mem; - struct range *range; - int nr_range; - - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - count *= 2; - - size = sizeof(struct range) * count; - end = get_max_mapped(); -#ifdef MAX_DMA32_PFN - if (end > (MAX_DMA32_PFN << PAGE_SHIFT)) - start = MAX_DMA32_PFN << PAGE_SHIFT; -#endif - mem = find_fw_memmap_area(start, end, size, sizeof(struct range)); - if (mem == -1ULL) - panic("can not find more space for range free"); - - range = __va(mem); - /* use early_node_map[] and early_res to get range array at first */ - memset(range, 0, size); - nr_range = 0; - - /* need to go over early_node_map to find out good range for node */ - nr_range = add_from_early_node_map(range, count, nr_range, nodeid); -#ifdef CONFIG_X86_32 - subtract_range(range, count, max_low_pfn, -1ULL); -#endif - subtract_early_res(range, count); - nr_range = clean_sort_range(range, count); - - /* need to clear it ? */ - if (nodeid == MAX_NUMNODES) { - memset(&early_res[0], 0, - sizeof(struct early_res) * max_early_res); - early_res = NULL; - max_early_res = 0; - } - - *rangep = range; - return nr_range; -} -#else -void __init early_res_to_bootmem(u64 start, u64 end) -{ - int i, count; - u64 final_start, final_end; - int idx = 0; - - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - /* need to skip first one ?*/ - if (early_res != early_res_x) - idx = 1; - - printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n", - count - idx, max_early_res, start, end); - for (i = idx; i < count; i++) { - struct early_res *r = &early_res[i]; - printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, - r->start, r->end, r->name); - final_start = max(start, r->start); - final_end = min(end, r->end); - if (final_start >= final_end) { - printk(KERN_CONT "\n"); - continue; - } - printk(KERN_CONT " ==> [%010llx - %010llx]\n", - final_start, final_end); - reserve_bootmem_generic(final_start, final_end - final_start, - BOOTMEM_DEFAULT); - } - /* clear them */ - memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); - early_res = NULL; - max_early_res = 0; - early_res_count = 0; -} -#endif - -/* Check for already reserved areas */ -static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) -{ - int i; - u64 addr = *addrp; - int changed = 0; - struct early_res *r; -again: - i = find_overlapped_early(addr, addr + size); - r = &early_res[i]; - if (i < max_early_res && r->end) { - *addrp = addr = round_up(r->end, align); - changed = 1; - goto again; - } - return changed; -} - -/* Check for already reserved areas */ -static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) -{ - int i; - u64 addr = *addrp, last; - u64 size = *sizep; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < max_early_res && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last > r->start && addr < r->start) { - size = r->start - addr; - changed = 1; - goto again; - } - if (last > r->end && addr < r->end) { - addr = round_up(r->end, align); - size = last - addr; - changed = 1; - goto again; - } - if (last <= r->end && addr >= r->start) { - (*sizep)++; - return 0; - } - } - if (changed) { - *addrp = addr; - *sizep = size; - } - return changed; -} - -/* - * Find a free area with specified alignment in a specific range. - * only with the area.between start to end is active range from early_node_map - * so they are good as RAM - */ -u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, - u64 size, u64 align) -{ - u64 addr, last; - - addr = round_up(ei_start, align); - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - goto out; - while (bad_addr(&addr, size, align) && addr+size <= ei_last) - ; - last = addr + size; - if (last > ei_last) - goto out; - if (last > end) - goto out; - - return addr; - -out: - return -1ULL; -} - -u64 __init find_early_area_size(u64 ei_start, u64 ei_last, u64 start, - u64 *sizep, u64 align) -{ - u64 addr, last; - - addr = round_up(ei_start, align); - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - goto out; - *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last) - ; - last = addr + *sizep; - if (last > ei_last) - goto out; - - return addr; - -out: - return -1ULL; -} -- cgit v1.2.3 From 0fb85621df4f9f7c663c6c77c302e821a832c95e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 20 Aug 2010 10:02:15 +0100 Subject: fanotify: resize pid and reorder structure resize pid and reorder the fanotify_event_metadata so it is naturally aligned and we can work towards dropping the packed attributed Signed-off-by: Tvrtko Ursulin Cc: Andreas Dilger Signed-off-by: Eric Paris --- include/linux/fanotify.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 985435622ecd..63531a6b4d2a 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -65,14 +65,14 @@ FAN_ALL_PERM_EVENTS |\ FAN_Q_OVERFLOW) -#define FANOTIFY_METADATA_VERSION 1 +#define FANOTIFY_METADATA_VERSION 2 struct fanotify_event_metadata { __u32 event_len; __u32 vers; - __s32 fd; __u64 mask; - __s64 pid; + __s32 fd; + __s32 pid; } __attribute__ ((packed)); struct fanotify_response { -- cgit v1.2.3 From bad849b3dc0fae1297c8d47f846f8d202a6145ed Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 26 Aug 2010 16:00:34 +0100 Subject: NOMMU: Stub out vm_get_page_prot() if there's no MMU Stub out vm_get_page_prot() if there's no MMU. This was added by commit 804af2cf6e7a ("[AGPGART] remove private page protection map") and is used in commit c07fbfd17e61 ("fbmem: VM_IO set, but not propagated") in the fbmem video driver, but the function doesn't exist on NOMMU, resulting in an undefined symbol at link time. Signed-off-by: David Howells Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 831c693416b2..e6b1210772ce 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1363,7 +1363,15 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma) return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } +#ifdef CONFIG_MMU pgprot_t vm_get_page_prot(unsigned long vm_flags); +#else +static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) +{ + return __pgprot(0); +} +#endif + struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); -- cgit v1.2.3 From 1da3f87ebb7edb3e0b829ec4bbe5fb3d9d93986f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:24 +0200 Subject: drm: kill kernel_context_switch callbacks Not used by any in-kernel driver. So drop it. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_lock.c | 18 ------------------ include/drm/drmP.h | 3 --- 2 files changed, 21 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index 9bf93bc9a32c..609f2d504f72 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -136,12 +136,6 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv) } } - if (dev->driver->kernel_context_switch && - dev->last_context != lock->context) { - dev->driver->kernel_context_switch(dev, dev->last_context, - lock->context); - } - return 0; } @@ -159,7 +153,6 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv) int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_lock *lock = data; - struct drm_master *master = file_priv->master; if (lock->context == DRM_KERNEL_CONTEXT) { DRM_ERROR("Process %d using kernel context %d\n", @@ -169,17 +162,6 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) atomic_inc(&dev->counts[_DRM_STAT_UNLOCKS]); - /* kernel_context_switch isn't used by any of the x86 drm - * modules but is required by the Sparc driver. - */ - if (dev->driver->kernel_context_switch_unlock) - dev->driver->kernel_context_switch_unlock(dev); - else { - if (drm_lock_free(&master->lock, lock->context)) { - /* FIXME: Should really bail out here. */ - } - } - unblock_all_signals(); return 0; } diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7809d230adee..15ea8c44f28d 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -703,9 +703,6 @@ struct drm_driver { int (*dma_quiescent) (struct drm_device *); int (*context_ctor) (struct drm_device *dev, int context); int (*context_dtor) (struct drm_device *dev, int context); - int (*kernel_context_switch) (struct drm_device *dev, int old, - int new); - void (*kernel_context_switch_unlock) (struct drm_device *dev); /** * get_vblank_counter - get raw hardware vblank counter -- cgit v1.2.3 From be72ae26b11478c00c64858c86b647b438791671 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:26 +0200 Subject: drm: kill procfs callbacks Not used by any driver (rightly so!). Kill them. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_proc.c | 13 ------------- include/drm/drmP.h | 2 -- 2 files changed, 15 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c index a9ba6b69ad35..e571de536dc5 100644 --- a/drivers/gpu/drm/drm_proc.c +++ b/drivers/gpu/drm/drm_proc.c @@ -151,7 +151,6 @@ fail: int drm_proc_init(struct drm_minor *minor, int minor_id, struct proc_dir_entry *root) { - struct drm_device *dev = minor->dev; char name[64]; int ret; @@ -172,14 +171,6 @@ int drm_proc_init(struct drm_minor *minor, int minor_id, return ret; } - if (dev->driver->proc_init) { - ret = dev->driver->proc_init(minor); - if (ret) { - DRM_ERROR("DRM: Driver failed to initialize " - "/proc/dri.\n"); - return ret; - } - } return 0; } @@ -216,15 +207,11 @@ int drm_proc_remove_files(struct drm_info_list *files, int count, */ int drm_proc_cleanup(struct drm_minor *minor, struct proc_dir_entry *root) { - struct drm_device *dev = minor->dev; char name[64]; if (!root || !minor->proc_root) return 0; - if (dev->driver->proc_cleanup) - dev->driver->proc_cleanup(minor); - drm_proc_remove_files(drm_proc_list, DRM_PROC_ENTRIES, minor); sprintf(name, "%d", minor->index); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 15ea8c44f28d..0d7af3f39652 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -792,8 +792,6 @@ struct drm_driver { void (*master_drop)(struct drm_device *dev, struct drm_file *file_priv, bool from_release); - int (*proc_init)(struct drm_minor *minor); - void (*proc_cleanup)(struct drm_minor *minor); int (*debugfs_init)(struct drm_minor *minor); void (*debugfs_cleanup)(struct drm_minor *minor); -- cgit v1.2.3 From 23ddc0243d7313942b94f1a2e44e6394f7bb996e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:25 +0200 Subject: drm: kill dma_ready callbacks Not used by any driver. So drop it. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_lock.c | 3 --- include/drm/drmP.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index 609f2d504f72..d9146f240d33 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -124,9 +124,6 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv) block_all_signals(drm_notifier, &dev->sigdata, &dev->sigmask); } - if (dev->driver->dma_ready && (lock->flags & _DRM_LOCK_READY)) - dev->driver->dma_ready(dev); - if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT)) { if (dev->driver->dma_quiescent(dev)) { diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 0d7af3f39652..d5a2b8869246 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -699,7 +699,6 @@ struct drm_driver { int (*suspend) (struct drm_device *, pm_message_t state); int (*resume) (struct drm_device *); int (*dma_ioctl) (struct drm_device *dev, void *data, struct drm_file *file_priv); - void (*dma_ready) (struct drm_device *); int (*dma_quiescent) (struct drm_device *); int (*context_ctor) (struct drm_device *dev, int context); int (*context_dtor) (struct drm_device *dev, int context); -- cgit v1.2.3 From fd2e7931cdefa8e9acf63f0a4efd61ae0f89e77b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:33 +0200 Subject: drm: kill gem_free_object_unlocked driver callback Not used by any current driver. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 4 +--- include/drm/drmP.h | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index bf92d07510df..cff7317d3830 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -474,9 +474,7 @@ drm_gem_object_free_unlocked(struct kref *kref) struct drm_gem_object *obj = (struct drm_gem_object *) kref; struct drm_device *dev = obj->dev; - if (dev->driver->gem_free_object_unlocked != NULL) - dev->driver->gem_free_object_unlocked(obj); - else if (dev->driver->gem_free_object != NULL) { + if (dev->driver->gem_free_object != NULL) { mutex_lock(&dev->struct_mutex); dev->driver->gem_free_object(obj); mutex_unlock(&dev->struct_mutex); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index d5a2b8869246..6dbdbf45cd1a 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -802,7 +802,6 @@ struct drm_driver { */ int (*gem_init_object) (struct drm_gem_object *obj); void (*gem_free_object) (struct drm_gem_object *obj); - void (*gem_free_object_unlocked) (struct drm_gem_object *obj); /* vga arb irq handler */ void (*vgaarb_irq)(struct drm_device *dev, bool state); -- cgit v1.2.3 From b3da8f7d2d1fa81fb65cb3f5d9e50dde40a83182 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:29 +0200 Subject: drm: kill context_ctor callback It's not used by any driver. The destructor callback is unfortunately used by the via driver in a rather convoluted piece of code used to reimplement something resembling broken futexes. I didn't dare to touch this code. But at least kill the needless NULL assignemt in the sis driver. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_context.c | 8 -------- drivers/gpu/drm/sis/sis_drv.c | 1 - include/drm/drmP.h | 1 - 3 files changed, 10 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c index 2607753a320b..6d440fb894cf 100644 --- a/drivers/gpu/drm/drm_context.c +++ b/drivers/gpu/drm/drm_context.c @@ -333,14 +333,6 @@ int drm_addctx(struct drm_device *dev, void *data, return -ENOMEM; } - if (ctx->handle != DRM_KERNEL_CONTEXT) { - if (dev->driver->context_ctor) - if (!dev->driver->context_ctor(dev, ctx->handle)) { - DRM_DEBUG("Running out of ctxs or memory.\n"); - return -ENOMEM; - } - } - ctx_entry = kmalloc(sizeof(*ctx_entry), GFP_KERNEL); if (!ctx_entry) { DRM_DEBUG("out of memory\n"); diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c index 776bf9e9ea1a..2d1292131500 100644 --- a/drivers/gpu/drm/sis/sis_drv.c +++ b/drivers/gpu/drm/sis/sis_drv.c @@ -67,7 +67,6 @@ static struct drm_driver driver = { .driver_features = DRIVER_USE_AGP | DRIVER_USE_MTRR, .load = sis_driver_load, .unload = sis_driver_unload, - .context_dtor = NULL, .dma_quiescent = sis_idle, .reclaim_buffers = NULL, .reclaim_buffers_idlelocked = sis_reclaim_buffers_locked, diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 6dbdbf45cd1a..eb4f7edcc314 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -700,7 +700,6 @@ struct drm_driver { int (*resume) (struct drm_device *); int (*dma_ioctl) (struct drm_device *dev, void *data, struct drm_file *file_priv); int (*dma_quiescent) (struct drm_device *); - int (*context_ctor) (struct drm_device *dev, int context); int (*context_dtor) (struct drm_device *dev, int context); /** -- cgit v1.2.3 From a2a273c94357ffd24e635cf9ec9b2e5c6f02b63b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:30 +0200 Subject: drm: don't export drm_get_drawable_info Not used by any in-tree user. So drop it. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_drawable.c | 3 +-- include/drm/drmP.h | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_drawable.c b/drivers/gpu/drm/drm_drawable.c index c53c9768cc11..170e53178d8b 100644 --- a/drivers/gpu/drm/drm_drawable.c +++ b/drivers/gpu/drm/drm_drawable.c @@ -173,11 +173,10 @@ error: /** * Caller must hold the drawable spinlock! */ -struct drm_drawable_info *drm_get_drawable_info(struct drm_device *dev, drm_drawable_t id) +static struct drm_drawable_info *drm_get_drawable_info(struct drm_device *dev, drm_drawable_t id) { return idr_find(&dev->drw_idr, id); } -EXPORT_SYMBOL(drm_get_drawable_info); static int drm_drawable_free(int idr, void *p, void *data) { diff --git a/include/drm/drmP.h b/include/drm/drmP.h index eb4f7edcc314..7a5c91c2aa60 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1238,8 +1238,6 @@ extern int drm_rmdraw(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int drm_update_drawable_info(struct drm_device *dev, void *data, struct drm_file *file_priv); -extern struct drm_drawable_info *drm_get_drawable_info(struct drm_device *dev, - drm_drawable_t id); extern void drm_drawable_free_all(struct drm_device *dev); /* Authentication IOCTL support (drm_auth.h) */ -- cgit v1.2.3 From 690bb51b54a986e48c7b8b2dba51a3cd262a7266 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:35 +0200 Subject: drm: drop return value of drm_free_agp No caller (rightly) cares about it, so drop it. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_memory.c | 4 ++-- include/drm/drmP.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c index 7732268eced2..70ca27edc3c9 100644 --- a/drivers/gpu/drm/drm_memory.c +++ b/drivers/gpu/drm/drm_memory.c @@ -106,9 +106,9 @@ DRM_AGP_MEM *drm_alloc_agp(struct drm_device * dev, int pages, u32 type) } /** Wrapper around agp_free_memory() */ -int drm_free_agp(DRM_AGP_MEM * handle, int pages) +void drm_free_agp(DRM_AGP_MEM * handle, int pages) { - return drm_agp_free_memory(handle) ? 0 : -EINVAL; + drm_agp_free_memory(handle); } EXPORT_SYMBOL(drm_free_agp); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7a5c91c2aa60..7fd1870b6a70 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1178,8 +1178,8 @@ extern int drm_mem_info(char *buf, char **start, off_t offset, int request, int *eof, void *data); extern void *drm_realloc(void *oldpt, size_t oldsize, size_t size, int area); +extern void drm_free_agp(DRM_AGP_MEM * handle, int pages); extern DRM_AGP_MEM *drm_alloc_agp(struct drm_device *dev, int pages, u32 type); -extern int drm_free_agp(DRM_AGP_MEM * handle, int pages); extern int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start); extern DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev, struct page **pages, -- cgit v1.2.3 From 793a97e4cc38f834e0488ccc1ecbfe52ff6f5b84 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:27 +0200 Subject: drm: kill drm_map_ofs callbacks All drivers happily copy&pasted the default implementation without checking whether this callback is used at all. It's not. Sigh. Kill it. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_vm.c | 7 ------- drivers/gpu/drm/i810/i810_drv.c | 1 - drivers/gpu/drm/i830/i830_drv.c | 1 - drivers/gpu/drm/i915/i915_drv.c | 1 - drivers/gpu/drm/mga/mga_drv.c | 1 - drivers/gpu/drm/nouveau/nouveau_drv.c | 1 - drivers/gpu/drm/r128/r128_drv.c | 1 - drivers/gpu/drm/radeon/radeon_drv.c | 2 -- drivers/gpu/drm/savage/savage_drv.c | 1 - drivers/gpu/drm/sis/sis_drv.c | 1 - drivers/gpu/drm/tdfx/tdfx_drv.c | 1 - drivers/gpu/drm/via/via_drv.c | 1 - drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 1 - include/drm/drmP.h | 2 -- 14 files changed, 22 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index fda67468e603..2fea2e63a313 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -515,13 +515,6 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma) return 0; } -resource_size_t drm_core_get_map_ofs(struct drm_local_map * map) -{ - return map->offset; -} - -EXPORT_SYMBOL(drm_core_get_map_ofs); - resource_size_t drm_core_get_reg_ofs(struct drm_device *dev) { #ifdef __alpha__ diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c index b4250b2cac1f..084a85c3d5d5 100644 --- a/drivers/gpu/drm/i810/i810_drv.c +++ b/drivers/gpu/drm/i810/i810_drv.c @@ -52,7 +52,6 @@ static struct drm_driver driver = { .device_is_agp = i810_driver_device_is_agp, .reclaim_buffers_locked = i810_driver_reclaim_buffers_locked, .dma_quiescent = i810_driver_dma_quiescent, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = i810_ioctls, .fops = { diff --git a/drivers/gpu/drm/i830/i830_drv.c b/drivers/gpu/drm/i830/i830_drv.c index a5c66aa82f0c..16352954311c 100644 --- a/drivers/gpu/drm/i830/i830_drv.c +++ b/drivers/gpu/drm/i830/i830_drv.c @@ -57,7 +57,6 @@ static struct drm_driver driver = { .device_is_agp = i830_driver_device_is_agp, .reclaim_buffers_locked = i830_driver_reclaim_buffers_locked, .dma_quiescent = i830_driver_dma_quiescent, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, #if USE_IRQS .irq_preinstall = i830_driver_irq_preinstall, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 00befce8fbb7..d079f7b86cca 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -524,7 +524,6 @@ static struct drm_driver driver = { .irq_uninstall = i915_driver_irq_uninstall, .irq_handler = i915_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .master_create = i915_master_create, .master_destroy = i915_master_destroy, diff --git a/drivers/gpu/drm/mga/mga_drv.c b/drivers/gpu/drm/mga/mga_drv.c index 26d0d8ced80d..e9c0cbc21fe2 100644 --- a/drivers/gpu/drm/mga/mga_drv.c +++ b/drivers/gpu/drm/mga/mga_drv.c @@ -60,7 +60,6 @@ static struct drm_driver driver = { .irq_uninstall = mga_driver_irq_uninstall, .irq_handler = mga_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = mga_ioctls, .dma_ioctl = mga_dma_buffers, diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c index 1de5eb53e016..0d64259b21cc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.c +++ b/drivers/gpu/drm/nouveau/nouveau_drv.c @@ -379,7 +379,6 @@ static struct drm_driver driver = { .irq_uninstall = nouveau_irq_uninstall, .irq_handler = nouveau_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = nouveau_ioctls, .fops = { diff --git a/drivers/gpu/drm/r128/r128_drv.c b/drivers/gpu/drm/r128/r128_drv.c index 1e2971f13aa1..42ec20a10eed 100644 --- a/drivers/gpu/drm/r128/r128_drv.c +++ b/drivers/gpu/drm/r128/r128_drv.c @@ -56,7 +56,6 @@ static struct drm_driver driver = { .irq_uninstall = r128_driver_irq_uninstall, .irq_handler = r128_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = r128_ioctls, .dma_ioctl = r128_cce_buffers, diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 795403b0e2cd..8fd89bb8e610 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -203,7 +203,6 @@ static struct drm_driver driver_old = { .irq_uninstall = radeon_driver_irq_uninstall, .irq_handler = radeon_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = radeon_ioctls, .dma_ioctl = radeon_cp_buffers, @@ -290,7 +289,6 @@ static struct drm_driver kms_driver = { .irq_uninstall = radeon_driver_irq_uninstall_kms, .irq_handler = radeon_driver_irq_handler_kms, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = radeon_ioctls_kms, .gem_init_object = radeon_gem_object_init, diff --git a/drivers/gpu/drm/savage/savage_drv.c b/drivers/gpu/drm/savage/savage_drv.c index 021de44c15ab..f539996ba230 100644 --- a/drivers/gpu/drm/savage/savage_drv.c +++ b/drivers/gpu/drm/savage/savage_drv.c @@ -42,7 +42,6 @@ static struct drm_driver driver = { .lastclose = savage_driver_lastclose, .unload = savage_driver_unload, .reclaim_buffers = savage_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = savage_ioctls, .dma_ioctl = savage_bci_buffers, diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c index 2d1292131500..72b0a74f265f 100644 --- a/drivers/gpu/drm/sis/sis_drv.c +++ b/drivers/gpu/drm/sis/sis_drv.c @@ -71,7 +71,6 @@ static struct drm_driver driver = { .reclaim_buffers = NULL, .reclaim_buffers_idlelocked = sis_reclaim_buffers_locked, .lastclose = sis_lastclose, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = sis_ioctls, .fops = { diff --git a/drivers/gpu/drm/tdfx/tdfx_drv.c b/drivers/gpu/drm/tdfx/tdfx_drv.c index ec5a43e65722..38a562647d6f 100644 --- a/drivers/gpu/drm/tdfx/tdfx_drv.c +++ b/drivers/gpu/drm/tdfx/tdfx_drv.c @@ -42,7 +42,6 @@ static struct pci_device_id pciidlist[] = { static struct drm_driver driver = { .driver_features = DRIVER_USE_MTRR, .reclaim_buffers = drm_core_reclaim_buffers, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .fops = { .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/via/via_drv.c b/drivers/gpu/drm/via/via_drv.c index 7a1b210401e0..0b9ad8bbb031 100644 --- a/drivers/gpu/drm/via/via_drv.c +++ b/drivers/gpu/drm/via/via_drv.c @@ -51,7 +51,6 @@ static struct drm_driver driver = { .reclaim_buffers_locked = NULL, .reclaim_buffers_idlelocked = via_reclaim_buffers_locked, .lastclose = via_lastclose, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = via_ioctls, .fops = { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 72ec2e2b6e97..4a832de43dd5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -723,7 +723,6 @@ static struct drm_driver driver = { .irq_uninstall = vmw_irq_uninstall, .irq_handler = vmw_irq_handler, .reclaim_buffers_locked = NULL, - .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = vmw_ioctls, .num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls), diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7fd1870b6a70..70a14a4faa1e 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -772,7 +772,6 @@ struct drm_driver { struct drm_file *file_priv); void (*reclaim_buffers_idlelocked) (struct drm_device *dev, struct drm_file *file_priv); - resource_size_t (*get_map_ofs) (struct drm_local_map * map); resource_size_t (*get_reg_ofs) (struct drm_device *dev); void (*set_version) (struct drm_device *dev, struct drm_set_version *sv); @@ -1167,7 +1166,6 @@ extern int drm_release(struct inode *inode, struct file *filp); extern int drm_mmap(struct file *filp, struct vm_area_struct *vma); extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma); extern void drm_vm_open_locked(struct vm_area_struct *vma); -extern resource_size_t drm_core_get_map_ofs(struct drm_local_map * map); extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev); extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait); -- cgit v1.2.3 From 4ac5ec40ec70022e4dea8cc6254d2dadd1e43d57 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:34 +0200 Subject: drm: don't export dri1 locking functions Only used by ioctl, not by any in-tree drivers. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_lock.c | 10 +++------- include/drm/drmP.h | 1 - 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index d9146f240d33..1e28b9072068 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -37,6 +37,8 @@ static int drm_notifier(void *priv); +static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context); + /** * Lock ioctl. * @@ -172,6 +174,7 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) * * Attempt to mark the lock as held by the given context, via the \p cmpxchg instruction. */ +static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context) { @@ -208,7 +211,6 @@ int drm_lock_take(struct drm_lock_data *lock_data, } return 0; } -EXPORT_SYMBOL(drm_lock_take); /** * This takes a lock forcibly and hands it to context. Should ONLY be used @@ -276,7 +278,6 @@ int drm_lock_free(struct drm_lock_data *lock_data, unsigned int context) wake_up_interruptible(&lock_data->lock_queue); return 0; } -EXPORT_SYMBOL(drm_lock_free); /** * If we get here, it means that the process has called DRM_IOCTL_LOCK @@ -339,7 +340,6 @@ void drm_idlelock_take(struct drm_lock_data *lock_data) } spin_unlock_bh(&lock_data->spinlock); } -EXPORT_SYMBOL(drm_idlelock_take); void drm_idlelock_release(struct drm_lock_data *lock_data) { @@ -359,8 +359,6 @@ void drm_idlelock_release(struct drm_lock_data *lock_data) } spin_unlock_bh(&lock_data->spinlock); } -EXPORT_SYMBOL(drm_idlelock_release); - int drm_i_have_hw_lock(struct drm_device *dev, struct drm_file *file_priv) { @@ -369,5 +367,3 @@ int drm_i_have_hw_lock(struct drm_device *dev, struct drm_file *file_priv) _DRM_LOCK_IS_HELD(master->lock.hw_lock->lock) && master->lock.file_priv == file_priv); } - -EXPORT_SYMBOL(drm_i_have_hw_lock); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 70a14a4faa1e..45d09639e9d2 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1252,7 +1252,6 @@ extern int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv); -extern int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context); extern int drm_lock_free(struct drm_lock_data *lock_data, unsigned int context); extern void drm_idlelock_take(struct drm_lock_data *lock_data); extern void drm_idlelock_release(struct drm_lock_data *lock_data); -- cgit v1.2.3 From 8f879194f88742d9c452f669482b6d6abdc1e1e7 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:31 +0200 Subject: drm: replace drawable ioctl by noops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The information supplied by userspace through these ioctls is only accessible by dev->drw_idr. But there's no in-tree user of that. Also userspace does not really care about return values of these ioctls, either. Only hw/xfree86/dri/dri.c from the xserver actually checks the return from adddraw and keeps on trying to create a kernel drawable every time somebody creates a dri drawable. But since that's now a noop, who cares. Therefore it's safe to replace these three ioctls with noops and rip out the implementation. Signed-off-by: Daniel Vetter Reviewed-by: Kristian Høgsberg Reviewed-by: Michel Dänzer Signed-off-by: Dave Airlie --- drivers/gpu/drm/Makefile | 2 +- drivers/gpu/drm/drm_drawable.c | 197 ----------------------------------------- drivers/gpu/drm/drm_drv.c | 8 +- drivers/gpu/drm/drm_stub.c | 3 - include/drm/drmP.h | 15 ---- 5 files changed, 4 insertions(+), 221 deletions(-) delete mode 100644 drivers/gpu/drm/drm_drawable.c (limited to 'include') diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index f3a23a329f4e..997c43d04909 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -5,7 +5,7 @@ ccflags-y := -Iinclude/drm drm-y := drm_auth.o drm_buffer.o drm_bufs.o drm_cache.o \ - drm_context.o drm_dma.o drm_drawable.o \ + drm_context.o drm_dma.o \ drm_drv.o drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \ drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \ drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \ diff --git a/drivers/gpu/drm/drm_drawable.c b/drivers/gpu/drm/drm_drawable.c deleted file mode 100644 index 170e53178d8b..000000000000 --- a/drivers/gpu/drm/drm_drawable.c +++ /dev/null @@ -1,197 +0,0 @@ -/** - * \file drm_drawable.c - * IOCTLs for drawables - * - * \author Rickard E. (Rik) Faith - * \author Gareth Hughes - * \author Michel Dänzer - */ - -/* - * Created: Tue Feb 2 08:37:54 1999 by faith@valinux.com - * - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, North Dakota. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "drmP.h" - -/** - * Allocate drawable ID and memory to store information about it. - */ -int drm_adddraw(struct drm_device *dev, void *data, struct drm_file *file_priv) -{ - unsigned long irqflags; - struct drm_draw *draw = data; - int new_id = 0; - int ret; - -again: - if (idr_pre_get(&dev->drw_idr, GFP_KERNEL) == 0) { - DRM_ERROR("Out of memory expanding drawable idr\n"); - return -ENOMEM; - } - - spin_lock_irqsave(&dev->drw_lock, irqflags); - ret = idr_get_new_above(&dev->drw_idr, NULL, 1, &new_id); - if (ret == -EAGAIN) { - spin_unlock_irqrestore(&dev->drw_lock, irqflags); - goto again; - } - - spin_unlock_irqrestore(&dev->drw_lock, irqflags); - - draw->handle = new_id; - - DRM_DEBUG("%d\n", draw->handle); - - return 0; -} - -/** - * Free drawable ID and memory to store information about it. - */ -int drm_rmdraw(struct drm_device *dev, void *data, struct drm_file *file_priv) -{ - struct drm_draw *draw = data; - unsigned long irqflags; - struct drm_drawable_info *info; - - spin_lock_irqsave(&dev->drw_lock, irqflags); - - info = drm_get_drawable_info(dev, draw->handle); - if (info == NULL) { - spin_unlock_irqrestore(&dev->drw_lock, irqflags); - return -EINVAL; - } - kfree(info->rects); - kfree(info); - - idr_remove(&dev->drw_idr, draw->handle); - - spin_unlock_irqrestore(&dev->drw_lock, irqflags); - DRM_DEBUG("%d\n", draw->handle); - return 0; -} - -int drm_update_drawable_info(struct drm_device *dev, void *data, struct drm_file *file_priv) -{ - struct drm_update_draw *update = data; - unsigned long irqflags; - struct drm_clip_rect *rects; - struct drm_drawable_info *info; - int err; - - info = idr_find(&dev->drw_idr, update->handle); - if (!info) { - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - if (IS_ERR(idr_replace(&dev->drw_idr, info, update->handle))) { - DRM_ERROR("No such drawable %d\n", update->handle); - kfree(info); - return -EINVAL; - } - } - - switch (update->type) { - case DRM_DRAWABLE_CLIPRECTS: - if (update->num == 0) - rects = NULL; - else if (update->num != info->num_rects) { - rects = kmalloc(update->num * - sizeof(struct drm_clip_rect), - GFP_KERNEL); - } else - rects = info->rects; - - if (update->num && !rects) { - DRM_ERROR("Failed to allocate cliprect memory\n"); - err = -ENOMEM; - goto error; - } - - if (update->num && DRM_COPY_FROM_USER(rects, - (struct drm_clip_rect __user *) - (unsigned long)update->data, - update->num * - sizeof(*rects))) { - DRM_ERROR("Failed to copy cliprects from userspace\n"); - err = -EFAULT; - goto error; - } - - spin_lock_irqsave(&dev->drw_lock, irqflags); - - if (rects != info->rects) { - kfree(info->rects); - } - - info->rects = rects; - info->num_rects = update->num; - - spin_unlock_irqrestore(&dev->drw_lock, irqflags); - - DRM_DEBUG("Updated %d cliprects for drawable %d\n", - info->num_rects, update->handle); - break; - default: - DRM_ERROR("Invalid update type %d\n", update->type); - return -EINVAL; - } - - return 0; - -error: - if (rects != info->rects) - kfree(rects); - - return err; -} - -/** - * Caller must hold the drawable spinlock! - */ -static struct drm_drawable_info *drm_get_drawable_info(struct drm_device *dev, drm_drawable_t id) -{ - return idr_find(&dev->drw_idr, id); -} - -static int drm_drawable_free(int idr, void *p, void *data) -{ - struct drm_drawable_info *info = p; - - if (info) { - kfree(info->rects); - kfree(info); - } - - return 0; -} - -void drm_drawable_free_all(struct drm_device *dev) -{ - idr_for_each(&dev->drw_idr, drm_drawable_free, NULL); - idr_remove_all(&dev->drw_idr); -} diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 84da748555bc..a35a41002c33 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -91,8 +91,8 @@ static struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_NEW_CTX, drm_newctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_RES_CTX, drm_resctx, DRM_AUTH), - DRM_IOCTL_DEF(DRM_IOCTL_ADD_DRAW, drm_adddraw, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF(DRM_IOCTL_RM_DRAW, drm_rmdraw, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_IOCTL_ADD_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_IOCTL_RM_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_LOCK, drm_lock, DRM_AUTH), DRM_IOCTL_DEF(DRM_IOCTL_UNLOCK, drm_unlock, DRM_AUTH), @@ -127,7 +127,7 @@ static struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_MODESET_CTL, drm_modeset_ctl, 0), - DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_update_drawable_info, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED), @@ -180,8 +180,6 @@ int drm_lastclose(struct drm_device * dev) mutex_lock(&dev->struct_mutex); - /* Free drawable information memory */ - drm_drawable_free_all(dev); del_timer(&dev->timer); /* Clear AGP information */ diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index d1ad57450df1..f797ae9da77c 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -240,14 +240,11 @@ int drm_fill_in_dev(struct drm_device *dev, INIT_LIST_HEAD(&dev->vblank_event_list); spin_lock_init(&dev->count_lock); - spin_lock_init(&dev->drw_lock); spin_lock_init(&dev->event_lock); init_timer(&dev->timer); mutex_init(&dev->struct_mutex); mutex_init(&dev->ctxlist_mutex); - idr_init(&dev->drw_idr); - if (drm_ht_create(&dev->map_hash, 12)) { return -ENOMEM; } diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 45d09639e9d2..989cefe33c7b 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1037,12 +1037,6 @@ struct drm_device { struct drm_minor *control; /**< Control node for card */ struct drm_minor *primary; /**< render type primary screen head */ - /** \name Drawable information */ - /*@{ */ - spinlock_t drw_lock; - struct idr drw_idr; - /*@} */ - struct drm_mode_config mode_config; /**< Current mode config */ /** \name GEM information */ @@ -1229,15 +1223,6 @@ extern int drm_setsareactx(struct drm_device *dev, void *data, extern int drm_getsareactx(struct drm_device *dev, void *data, struct drm_file *file_priv); - /* Drawable IOCTL support (drm_drawable.h) */ -extern int drm_adddraw(struct drm_device *dev, void *data, - struct drm_file *file_priv); -extern int drm_rmdraw(struct drm_device *dev, void *data, - struct drm_file *file_priv); -extern int drm_update_drawable_info(struct drm_device *dev, void *data, - struct drm_file *file_priv); -extern void drm_drawable_free_all(struct drm_device *dev); - /* Authentication IOCTL support (drm_auth.h) */ extern int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv); -- cgit v1.2.3 From 89c372647d1d698a96e2189ef4312a977b939839 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:36 +0200 Subject: drm: kill agp indirection mess There's no point in jumping through two indirections. So kill one and call the kernels agp functions directly. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_agpsupport.c | 40 ++++------------------------------------ drivers/gpu/drm/drm_memory.c | 12 +++--------- include/drm/drmP.h | 5 ----- 3 files changed, 7 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c index ba38e0147220..252fdb98b73a 100644 --- a/drivers/gpu/drm/drm_agpsupport.c +++ b/drivers/gpu/drm/drm_agpsupport.c @@ -193,7 +193,7 @@ int drm_agp_enable_ioctl(struct drm_device *dev, void *data, * \return zero on success or a negative number on failure. * * Verifies the AGP device is present and has been acquired, allocates the - * memory via alloc_agp() and creates a drm_agp_mem entry for it. + * memory via agp_allocate_memory() and creates a drm_agp_mem entry for it. */ int drm_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request) { @@ -211,7 +211,7 @@ int drm_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request) pages = (request->size + PAGE_SIZE - 1) / PAGE_SIZE; type = (u32) request->type; - if (!(memory = drm_alloc_agp(dev, pages, type))) { + if (!(memory = agp_allocate_memory(dev->agp->bridge, pages, type))) { kfree(entry); return -ENOMEM; } @@ -423,38 +423,6 @@ struct drm_agp_head *drm_agp_init(struct drm_device *dev) return head; } -/** Calls agp_allocate_memory() */ -DRM_AGP_MEM *drm_agp_allocate_memory(struct agp_bridge_data * bridge, - size_t pages, u32 type) -{ - return agp_allocate_memory(bridge, pages, type); -} - -/** Calls agp_free_memory() */ -int drm_agp_free_memory(DRM_AGP_MEM * handle) -{ - if (!handle) - return 0; - agp_free_memory(handle); - return 1; -} - -/** Calls agp_bind_memory() */ -int drm_agp_bind_memory(DRM_AGP_MEM * handle, off_t start) -{ - if (!handle) - return -EINVAL; - return agp_bind_memory(handle, start); -} - -/** Calls agp_unbind_memory() */ -int drm_agp_unbind_memory(DRM_AGP_MEM * handle) -{ - if (!handle) - return -EINVAL; - return agp_unbind_memory(handle); -} - /** * Binds a collection of pages into AGP memory at the given offset, returning * the AGP memory structure containing them. @@ -474,7 +442,7 @@ drm_agp_bind_pages(struct drm_device *dev, DRM_DEBUG("\n"); - mem = drm_agp_allocate_memory(dev->agp->bridge, num_pages, + mem = agp_allocate_memory(dev->agp->bridge, num_pages, type); if (mem == NULL) { DRM_ERROR("Failed to allocate memory for %ld pages\n", @@ -487,7 +455,7 @@ drm_agp_bind_pages(struct drm_device *dev, mem->page_count = num_pages; mem->is_flushed = true; - ret = drm_agp_bind_memory(mem, gtt_offset / PAGE_SIZE); + ret = agp_bind_memory(mem, gtt_offset / PAGE_SIZE); if (ret != 0) { DRM_ERROR("Failed to bind AGP memory: %d\n", ret); agp_free_memory(mem); diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c index 70ca27edc3c9..c9b805000a11 100644 --- a/drivers/gpu/drm/drm_memory.c +++ b/drivers/gpu/drm/drm_memory.c @@ -99,29 +99,23 @@ static void *agp_remap(unsigned long offset, unsigned long size, return addr; } -/** Wrapper around agp_allocate_memory() */ -DRM_AGP_MEM *drm_alloc_agp(struct drm_device * dev, int pages, u32 type) -{ - return drm_agp_allocate_memory(dev->agp->bridge, pages, type); -} - /** Wrapper around agp_free_memory() */ void drm_free_agp(DRM_AGP_MEM * handle, int pages) { - drm_agp_free_memory(handle); + agp_free_memory(handle); } EXPORT_SYMBOL(drm_free_agp); /** Wrapper around agp_bind_memory() */ int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start) { - return drm_agp_bind_memory(handle, start); + return agp_bind_memory(handle, start); } /** Wrapper around agp_unbind_memory() */ int drm_unbind_agp(DRM_AGP_MEM * handle) { - return drm_agp_unbind_memory(handle); + return agp_unbind_memory(handle); } EXPORT_SYMBOL(drm_unbind_agp); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 989cefe33c7b..ffe6035cf471 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1171,7 +1171,6 @@ extern int drm_mem_info(char *buf, char **start, off_t offset, extern void *drm_realloc(void *oldpt, size_t oldsize, size_t size, int area); extern void drm_free_agp(DRM_AGP_MEM * handle, int pages); -extern DRM_AGP_MEM *drm_alloc_agp(struct drm_device *dev, int pages, u32 type); extern int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start); extern DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev, struct page **pages, @@ -1331,10 +1330,6 @@ extern int drm_agp_unbind_ioctl(struct drm_device *dev, void *data, extern int drm_agp_bind(struct drm_device *dev, struct drm_agp_binding *request); extern int drm_agp_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -extern DRM_AGP_MEM *drm_agp_allocate_memory(struct agp_bridge_data *bridge, size_t pages, u32 type); -extern int drm_agp_free_memory(DRM_AGP_MEM * handle); -extern int drm_agp_bind_memory(DRM_AGP_MEM * handle, off_t start); -extern int drm_agp_unbind_memory(DRM_AGP_MEM * handle); extern void drm_agp_chipset_flush(struct drm_device *dev); /* Stub support (drm_stub.h) */ -- cgit v1.2.3 From df8fcb09667c1b2c9dcf65de23f0bfa851e8138e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:32 +0200 Subject: drm: kill dev->timer Totally unused. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_drv.c | 2 -- drivers/gpu/drm/drm_stub.c | 1 - include/drm/drmP.h | 1 - 3 files changed, 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index a35a41002c33..5ff75a3a6b9d 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -180,8 +180,6 @@ int drm_lastclose(struct drm_device * dev) mutex_lock(&dev->struct_mutex); - del_timer(&dev->timer); - /* Clear AGP information */ if (drm_core_has_AGP(dev) && dev->agp && !drm_core_check_feature(dev, DRIVER_MODESET)) { diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index f797ae9da77c..cdc89ee042cc 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -241,7 +241,6 @@ int drm_fill_in_dev(struct drm_device *dev, spin_lock_init(&dev->count_lock); spin_lock_init(&dev->event_lock); - init_timer(&dev->timer); mutex_init(&dev->struct_mutex); mutex_init(&dev->ctxlist_mutex); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index ffe6035cf471..757b63a23b14 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -964,7 +964,6 @@ struct drm_device { __volatile__ long context_flag; /**< Context swapping flag */ __volatile__ long interrupt_flag; /**< Interruption handler flag */ __volatile__ long dma_flag; /**< DMA dispatch flag */ - struct timer_list timer; /**< Timer for delaying ctx switch */ wait_queue_head_t context_wait; /**< Processes waiting on ctx switch */ int last_checked; /**< Last context checked for DMA */ int last_context; /**< Last current context */ -- cgit v1.2.3 From cbc60ca04b342a4e1f2a1086a7277c077f07dbed Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Aug 2010 22:53:28 +0200 Subject: drm: kill get_reg_ofs callback Every driver used the default implementation. Fold that one into the only callsite and drop the callback. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_vm.c | 6 ++---- drivers/gpu/drm/i810/i810_drv.c | 1 - drivers/gpu/drm/i830/i830_drv.c | 1 - drivers/gpu/drm/i915/i915_drv.c | 1 - drivers/gpu/drm/mga/mga_drv.c | 1 - drivers/gpu/drm/nouveau/nouveau_drv.c | 1 - drivers/gpu/drm/r128/r128_drv.c | 1 - drivers/gpu/drm/radeon/radeon_drv.c | 2 -- drivers/gpu/drm/savage/savage_drv.c | 1 - drivers/gpu/drm/sis/sis_drv.c | 1 - drivers/gpu/drm/tdfx/tdfx_drv.c | 1 - drivers/gpu/drm/via/via_drv.c | 1 - drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 1 - include/drm/drmP.h | 2 -- 14 files changed, 2 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index 2fea2e63a313..ee879d6bb522 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -515,7 +515,7 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma) return 0; } -resource_size_t drm_core_get_reg_ofs(struct drm_device *dev) +static resource_size_t drm_core_get_reg_ofs(struct drm_device *dev) { #ifdef __alpha__ return dev->hose->dense_mem_base - dev->hose->mem_space->start; @@ -524,8 +524,6 @@ resource_size_t drm_core_get_reg_ofs(struct drm_device *dev) #endif } -EXPORT_SYMBOL(drm_core_get_reg_ofs); - /** * mmap DMA memory. * @@ -612,7 +610,7 @@ int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma) #endif case _DRM_FRAME_BUFFER: case _DRM_REGISTERS: - offset = dev->driver->get_reg_ofs(dev); + offset = drm_core_get_reg_ofs(dev); vma->vm_flags |= VM_IO; /* not in core dump */ vma->vm_page_prot = drm_io_prot(map->type, vma); #if !defined(__arm__) diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c index 084a85c3d5d5..1c73b0c43c1e 100644 --- a/drivers/gpu/drm/i810/i810_drv.c +++ b/drivers/gpu/drm/i810/i810_drv.c @@ -52,7 +52,6 @@ static struct drm_driver driver = { .device_is_agp = i810_driver_device_is_agp, .reclaim_buffers_locked = i810_driver_reclaim_buffers_locked, .dma_quiescent = i810_driver_dma_quiescent, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = i810_ioctls, .fops = { .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/i830/i830_drv.c b/drivers/gpu/drm/i830/i830_drv.c index 16352954311c..7140ffc12eee 100644 --- a/drivers/gpu/drm/i830/i830_drv.c +++ b/drivers/gpu/drm/i830/i830_drv.c @@ -57,7 +57,6 @@ static struct drm_driver driver = { .device_is_agp = i830_driver_device_is_agp, .reclaim_buffers_locked = i830_driver_reclaim_buffers_locked, .dma_quiescent = i830_driver_dma_quiescent, - .get_reg_ofs = drm_core_get_reg_ofs, #if USE_IRQS .irq_preinstall = i830_driver_irq_preinstall, .irq_postinstall = i830_driver_irq_postinstall, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d079f7b86cca..e6afa68775b0 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -524,7 +524,6 @@ static struct drm_driver driver = { .irq_uninstall = i915_driver_irq_uninstall, .irq_handler = i915_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .master_create = i915_master_create, .master_destroy = i915_master_destroy, #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/mga/mga_drv.c b/drivers/gpu/drm/mga/mga_drv.c index e9c0cbc21fe2..65ea42cf1795 100644 --- a/drivers/gpu/drm/mga/mga_drv.c +++ b/drivers/gpu/drm/mga/mga_drv.c @@ -60,7 +60,6 @@ static struct drm_driver driver = { .irq_uninstall = mga_driver_irq_uninstall, .irq_handler = mga_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = mga_ioctls, .dma_ioctl = mga_dma_buffers, .fops = { diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c index 0d64259b21cc..209912a1b7a5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.c +++ b/drivers/gpu/drm/nouveau/nouveau_drv.c @@ -379,7 +379,6 @@ static struct drm_driver driver = { .irq_uninstall = nouveau_irq_uninstall, .irq_handler = nouveau_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = nouveau_ioctls, .fops = { .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/r128/r128_drv.c b/drivers/gpu/drm/r128/r128_drv.c index 42ec20a10eed..67309f84f16d 100644 --- a/drivers/gpu/drm/r128/r128_drv.c +++ b/drivers/gpu/drm/r128/r128_drv.c @@ -56,7 +56,6 @@ static struct drm_driver driver = { .irq_uninstall = r128_driver_irq_uninstall, .irq_handler = r128_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = r128_ioctls, .dma_ioctl = r128_cce_buffers, .fops = { diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 8fd89bb8e610..663cdc10a5c2 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -203,7 +203,6 @@ static struct drm_driver driver_old = { .irq_uninstall = radeon_driver_irq_uninstall, .irq_handler = radeon_driver_irq_handler, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = radeon_ioctls, .dma_ioctl = radeon_cp_buffers, .fops = { @@ -289,7 +288,6 @@ static struct drm_driver kms_driver = { .irq_uninstall = radeon_driver_irq_uninstall_kms, .irq_handler = radeon_driver_irq_handler_kms, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = radeon_ioctls_kms, .gem_init_object = radeon_gem_object_init, .gem_free_object = radeon_gem_object_free, diff --git a/drivers/gpu/drm/savage/savage_drv.c b/drivers/gpu/drm/savage/savage_drv.c index f539996ba230..c0385633667d 100644 --- a/drivers/gpu/drm/savage/savage_drv.c +++ b/drivers/gpu/drm/savage/savage_drv.c @@ -42,7 +42,6 @@ static struct drm_driver driver = { .lastclose = savage_driver_lastclose, .unload = savage_driver_unload, .reclaim_buffers = savage_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = savage_ioctls, .dma_ioctl = savage_bci_buffers, .fops = { diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c index 72b0a74f265f..4d9f311d249d 100644 --- a/drivers/gpu/drm/sis/sis_drv.c +++ b/drivers/gpu/drm/sis/sis_drv.c @@ -71,7 +71,6 @@ static struct drm_driver driver = { .reclaim_buffers = NULL, .reclaim_buffers_idlelocked = sis_reclaim_buffers_locked, .lastclose = sis_lastclose, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = sis_ioctls, .fops = { .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/tdfx/tdfx_drv.c b/drivers/gpu/drm/tdfx/tdfx_drv.c index 38a562647d6f..e0768adbeccd 100644 --- a/drivers/gpu/drm/tdfx/tdfx_drv.c +++ b/drivers/gpu/drm/tdfx/tdfx_drv.c @@ -42,7 +42,6 @@ static struct pci_device_id pciidlist[] = { static struct drm_driver driver = { .driver_features = DRIVER_USE_MTRR, .reclaim_buffers = drm_core_reclaim_buffers, - .get_reg_ofs = drm_core_get_reg_ofs, .fops = { .owner = THIS_MODULE, .open = drm_open, diff --git a/drivers/gpu/drm/via/via_drv.c b/drivers/gpu/drm/via/via_drv.c index 0b9ad8bbb031..02f733db61c1 100644 --- a/drivers/gpu/drm/via/via_drv.c +++ b/drivers/gpu/drm/via/via_drv.c @@ -51,7 +51,6 @@ static struct drm_driver driver = { .reclaim_buffers_locked = NULL, .reclaim_buffers_idlelocked = via_reclaim_buffers_locked, .lastclose = via_lastclose, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = via_ioctls, .fops = { .owner = THIS_MODULE, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 4a832de43dd5..e645f44e4302 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -723,7 +723,6 @@ static struct drm_driver driver = { .irq_uninstall = vmw_irq_uninstall, .irq_handler = vmw_irq_handler, .reclaim_buffers_locked = NULL, - .get_reg_ofs = drm_core_get_reg_ofs, .ioctls = vmw_ioctls, .num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls), .dma_quiescent = NULL, /*vmw_dma_quiescent, */ diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 757b63a23b14..30e827aeba02 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -772,7 +772,6 @@ struct drm_driver { struct drm_file *file_priv); void (*reclaim_buffers_idlelocked) (struct drm_device *dev, struct drm_file *file_priv); - resource_size_t (*get_reg_ofs) (struct drm_device *dev); void (*set_version) (struct drm_device *dev, struct drm_set_version *sv); @@ -1159,7 +1158,6 @@ extern int drm_release(struct inode *inode, struct file *filp); extern int drm_mmap(struct file *filp, struct vm_area_struct *vma); extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma); extern void drm_vm_open_locked(struct vm_area_struct *vma); -extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev); extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait); /* Memory management support (drm_memory.h) */ -- cgit v1.2.3 From 409456b10f87b28303643fec37543103f9ada00c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 29 Aug 2010 21:57:55 -0700 Subject: net: fix datapath typo Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if.h b/include/linux/if.h index 6ed43c1f07ab..123959927745 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -76,7 +76,7 @@ #define IFF_MACVLAN_PORT 0x4000 /* device used as macvlan port */ #define IFF_BRIDGE_PORT 0x8000 /* device used as bridge port */ #define IFF_OVS_DATAPATH 0x10000 /* device used as Open vSwitch - * dapath port */ + * datapath port */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 -- cgit v1.2.3 From fcaf780b2ad352edaeb1d1c07a6da053266b1eed Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 24 Aug 2010 23:22:57 -0300 Subject: i7300_edac: start a driver for i7300 chipset (Clarksboro) Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/Kconfig | 7 + drivers/edac/Makefile | 1 + drivers/edac/i7300_edac.c | 1373 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 19 +- 4 files changed, 1392 insertions(+), 8 deletions(-) create mode 100644 drivers/edac/i7300_edac.c (limited to 'include') diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 70bb350de996..4573ccc11f93 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -199,6 +199,13 @@ config EDAC_I5100 Support for error detection and correction the Intel San Clemente MCH. +config EDAC_I7300 + tristate "Intel Clarksboro MCH" + depends on EDAC_MM_EDAC && X86 && PCI + help + Support for error detection and correction the Intel + Clarksboro MCH (Intel 7300 chipset). + config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx) diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index ca6b1bb24ccc..bca4369d6b7a 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o obj-$(CONFIG_EDAC_I5000) += i5000_edac.o obj-$(CONFIG_EDAC_I5100) += i5100_edac.o obj-$(CONFIG_EDAC_I5400) += i5400_edac.o +obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c new file mode 100644 index 000000000000..eb3f30e96ee3 --- /dev/null +++ b/drivers/edac/i7300_edac.c @@ -0,0 +1,1373 @@ +/* + * Intel 7300 class Memory Controllers kernel module (Clarksboro) + * + * This file may be distributed under the terms of the + * GNU General Public License version 2 only. + * + * Copyright (c) 2010 by: + * Mauro Carvalho Chehab + * + * Red Hat Inc. http://www.redhat.com + * + * Intel 7300 Chipset Memory Controller Hub (MCH) - Datasheet + * http://www.intel.com/Assets/PDF/datasheet/318082.pdf + * + * TODO: The chipset allow checking for PCI Express errors also. Currently, + * the driver covers only memory error errors + * + * This driver uses "csrows" EDAC attribute to represent DIMM slot# + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "edac_core.h" + +/* + * Alter this version for the I7300 module when modifications are made + */ +#define I7300_REVISION " Ver: 1.0.0 " __DATE__ + +#define EDAC_MOD_STR "i7300_edac" + +#define i7300_printk(level, fmt, arg...) \ + edac_printk(level, "i7300", fmt, ##arg) + +#define i7300_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "i7300", fmt, ##arg) + +/* + * Memory topology is organized as: + * Branch 0 - 2 channels: channels 0 and 1 (FDB0 PCI dev 21.0) + * Branch 1 - 2 channels: channels 2 and 3 (FDB1 PCI dev 22.0) + * Each channel can have to 8 DIMM sets (called as SLOTS) + * Slots should generally be filled in pairs + * Except on Single Channel mode of operation + * just slot 0/channel0 filled on this mode + * On normal operation mode, the two channels on a branch should be + filled together for the same SLOT# + * When in mirrored mode, Branch 1 replicate memory at Branch 0, so, the four + * channels on both branches should be filled + */ + +/* Limits for i7300 */ +#define MAX_SLOTS 8 +#define MAX_BRANCHES 2 +#define MAX_CH_PER_BRANCH 2 +#define MAX_CHANNELS (MAX_CH_PER_BRANCH * MAX_BRANCHES) +#define MAX_MIR 3 + +#define to_channel(ch, branch) ((((branch)) << 1) | (ch)) + +#define to_csrow(slot, ch, branch) \ + (to_channel(ch, branch) | ((slot) << 2)) + + +/* Device 16, + * Function 0: System Address (not documented) + * Function 1: Memory Branch Map, Control, Errors Register + * Function 2: FSB Error Registers + * + * All 3 functions of Device 16 (0,1,2) share the SAME DID and + * uses PCI_DEVICE_ID_INTEL_I7300_MCH_ERR for device 16 (0,1,2), + * PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 and PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 + * for device 21 (0,1). + */ + + /* OFFSETS for Function 0 */ +#define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */ +#define MAXCH 0x56 /* Max Channel Number */ +#define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */ + + /* OFFSETS for Function 1 */ +#define TOLM 0x6C +#define REDMEMB 0x7C + +#define MIR0 0x80 +#define MIR1 0x84 +#define MIR2 0x88 + +#if 0 +#define AMIR0 0x8c +#define AMIR1 0x90 +#define AMIR2 0x94 + +/*TODO: double check it */ +#define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3fe00) /* bits [17:9] indicate ODD, [8:0] indicate EVEN */ + + /* Fatal error registers */ +#define FERR_FAT_FBD 0x98 + +/*TODO: double check it */ +#define FERR_FAT_FBDCHAN (3<<28) /* channel index where the highest-order error occurred */ + +#define NERR_FAT_FBD 0x9c +#define FERR_NF_FBD 0xa0 + + /* Non-fatal error register */ +#define NERR_NF_FBD 0xa4 + + /* Enable error mask */ +#define EMASK_FBD 0xa8 + +#define ERR0_FBD 0xac +#define ERR1_FBD 0xb0 +#define ERR2_FBD 0xb4 +#define MCERR_FBD 0xb8 + +#endif + +/* TODO: Dev 16 fn1 allows memory error injection - offsets 0x100-0x10b */ + + /* TODO: OFFSETS for Device 16 Function 2 */ + +/* + * Device 21, + * Function 0: Memory Map Branch 0 + * + * Device 22, + * Function 0: Memory Map Branch 1 + */ + + /* OFFSETS for Function 0 */ + +/* + * Note: Other Intel EDAC drivers use AMBPRESENT to identify if the available + * memory. From datasheet item 7.3.1 (FB-DIMM technology & organization), it + * seems that we cannot use this information directly for the same usage. + * Each memory slot may have up to 2 AMB interfaces, one for income and another + * for outcome interface to the next slot. + * For now, the driver just stores the AMB present registers, but rely only at + * the MTR info to detect memory. + * Datasheet is also not clear about how to map each AMBPRESENT registers to + * one of the 4 available channels. + */ +#define AMBPRESENT_0 0x64 +#define AMBPRESENT_1 0x66 + +const static u16 mtr_regs [MAX_SLOTS] = { + 0x80, 0x84, 0x88, 0x8c, + 0x82, 0x86, 0x8a, 0x8e +}; + +/* Defines to extract the vaious fields from the + * MTRx - Memory Technology Registers + */ +#define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 8)) +#define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 7)) +#define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 6)) ? 8 : 4) +#define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 5)) ? 8 : 4) +#define MTR_DIMM_RANKS(mtr) (((mtr) & (1 << 4)) ? 1 : 0) +#define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3) +#define MTR_DRAM_BANKS_ADDR_BITS 2 +#define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13) +#define MTR_DIMM_COLS(mtr) ((mtr) & 0x3) +#define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10) + +#if 0 + /* OFFSETS for Function 1 */ + +/* TODO */ +#define NRECFGLOG 0x74 +#define RECFGLOG 0x78 +#define NRECMEMA 0xbe +#define NRECMEMB 0xc0 +#define NRECFB_DIMMA 0xc4 +#define NRECFB_DIMMB 0xc8 +#define NRECFB_DIMMC 0xcc +#define NRECFB_DIMMD 0xd0 +#define NRECFB_DIMME 0xd4 +#define NRECFB_DIMMF 0xd8 +#define REDMEMA 0xdC +#define RECMEMA 0xf0 +#define RECMEMB 0xf4 +#define RECFB_DIMMA 0xf8 +#define RECFB_DIMMB 0xec +#define RECFB_DIMMC 0xf0 +#define RECFB_DIMMD 0xf4 +#define RECFB_DIMME 0xf8 +#define RECFB_DIMMF 0xfC + +/* This applies to FERR_NF_FB-DIMM as well as FERR_FAT_FB-DIMM */ +static inline int extract_fbdchan_indx(u32 x) +{ + return (x>>28) & 0x3; +} +#endif + +#ifdef CONFIG_EDAC_DEBUG +/* MTR NUMROW */ +static const char *numrow_toString[] = { + "8,192 - 13 rows", + "16,384 - 14 rows", + "32,768 - 15 rows", + "65,536 - 16 rows" +}; + +/* MTR NUMCOL */ +static const char *numcol_toString[] = { + "1,024 - 10 columns", + "2,048 - 11 columns", + "4,096 - 12 columns", + "reserved" +}; +#endif + +#if 0 + +/* + * Error indicator bits and masks + * Error masks are according with Table 5-17 of i7300 datasheet + */ + +enum error_mask { + EMASK_M1 = 1<<0, /* Memory Write error on non-redundant retry */ + EMASK_M2 = 1<<1, /* Memory or FB-DIMM configuration CRC read error */ + EMASK_M3 = 1<<2, /* Reserved */ + EMASK_M4 = 1<<3, /* Uncorrectable Data ECC on Replay */ + EMASK_M5 = 1<<4, /* Aliased Uncorrectable Non-Mirrored Demand Data ECC */ + EMASK_M6 = 1<<5, /* Unsupported on i7300 */ + EMASK_M7 = 1<<6, /* Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ + EMASK_M8 = 1<<7, /* Aliased Uncorrectable Patrol Data ECC */ + EMASK_M9 = 1<<8, /* Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC */ + EMASK_M10 = 1<<9, /* Unsupported on i7300 */ + EMASK_M11 = 1<<10, /* Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC */ + EMASK_M12 = 1<<11, /* Non-Aliased Uncorrectable Patrol Data ECC */ + EMASK_M13 = 1<<12, /* Memory Write error on first attempt */ + EMASK_M14 = 1<<13, /* FB-DIMM Configuration Write error on first attempt */ + EMASK_M15 = 1<<14, /* Memory or FB-DIMM configuration CRC read error */ + EMASK_M16 = 1<<15, /* Channel Failed-Over Occurred */ + EMASK_M17 = 1<<16, /* Correctable Non-Mirrored Demand Data ECC */ + EMASK_M18 = 1<<17, /* Unsupported on i7300 */ + EMASK_M19 = 1<<18, /* Correctable Resilver- or Spare-Copy Data ECC */ + EMASK_M20 = 1<<19, /* Correctable Patrol Data ECC */ + EMASK_M21 = 1<<20, /* FB-DIMM Northbound parity error on FB-DIMM Sync Status */ + EMASK_M22 = 1<<21, /* SPD protocol Error */ + EMASK_M23 = 1<<22, /* Non-Redundant Fast Reset Timeout */ + EMASK_M24 = 1<<23, /* Refresh error */ + EMASK_M25 = 1<<24, /* Memory Write error on redundant retry */ + EMASK_M26 = 1<<25, /* Redundant Fast Reset Timeout */ + EMASK_M27 = 1<<26, /* Correctable Counter Threshold Exceeded */ + EMASK_M28 = 1<<27, /* DIMM-Spare Copy Completed */ + EMASK_M29 = 1<<28, /* DIMM-Isolation Completed */ +}; + +/* + * Names to translate bit error into something useful + */ +static const char *error_name[] = { + [0] = "Memory Write error on non-redundant retry", + [1] = "Memory or FB-DIMM configuration CRC read error", + /* Reserved */ + [3] = "Uncorrectable Data ECC on Replay", + [4] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC", + /* M6 Unsupported on i7300 */ + [6] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", + [7] = "Aliased Uncorrectable Patrol Data ECC", + [8] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC", + /* M10 Unsupported on i7300 */ + [10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC", + [11] = "Non-Aliased Uncorrectable Patrol Data ECC", + [12] = "Memory Write error on first attempt", + [13] = "FB-DIMM Configuration Write error on first attempt", + [14] = "Memory or FB-DIMM configuration CRC read error", + [15] = "Channel Failed-Over Occurred", + [16] = "Correctable Non-Mirrored Demand Data ECC", + /* M18 Unsupported on i7300 */ + [18] = "Correctable Resilver- or Spare-Copy Data ECC", + [19] = "Correctable Patrol Data ECC", + [20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status", + [21] = "SPD protocol Error", + [22] = "Non-Redundant Fast Reset Timeout", + [23] = "Refresh error", + [24] = "Memory Write error on redundant retry", + [25] = "Redundant Fast Reset Timeout", + [26] = "Correctable Counter Threshold Exceeded", + [27] = "DIMM-Spare Copy Completed", + [28] = "DIMM-Isolation Completed", +}; + +/* Fatal errors */ +#define ERROR_FAT_MASK (EMASK_M1 | \ + EMASK_M2 | \ + EMASK_M23) + +/* Correctable errors */ +#define ERROR_NF_CORRECTABLE (EMASK_M27 | \ + EMASK_M20 | \ + EMASK_M19 | \ + EMASK_M18 | \ + EMASK_M17 | \ + EMASK_M16) +#define ERROR_NF_DIMM_SPARE (EMASK_M29 | \ + EMASK_M28) +#define ERROR_NF_SPD_PROTOCOL (EMASK_M22) +#define ERROR_NF_NORTH_CRC (EMASK_M21) + +/* Recoverable errors */ +#define ERROR_NF_RECOVERABLE (EMASK_M26 | \ + EMASK_M25 | \ + EMASK_M24 | \ + EMASK_M15 | \ + EMASK_M14 | \ + EMASK_M13 | \ + EMASK_M12 | \ + EMASK_M11 | \ + EMASK_M9 | \ + EMASK_M8 | \ + EMASK_M7 | \ + EMASK_M5) + +/* uncorrectable errors */ +#define ERROR_NF_UNCORRECTABLE (EMASK_M4) + +/* mask to all non-fatal errors */ +#define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \ + ERROR_NF_UNCORRECTABLE | \ + ERROR_NF_RECOVERABLE | \ + ERROR_NF_DIMM_SPARE | \ + ERROR_NF_SPD_PROTOCOL | \ + ERROR_NF_NORTH_CRC) + +/* + * Define error masks for the several registers + */ + +/* Enable all fatal and non fatal errors */ +#define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK) + +/* mask for fatal error registers */ +#define FERR_FAT_MASK ERROR_FAT_MASK + +/* masks for non-fatal error register */ +static inline int to_nf_mask(unsigned int mask) +{ + return (mask & EMASK_M29) | (mask >> 3); +}; + +static inline int from_nf_ferr(unsigned int mask) +{ + return (mask & EMASK_M29) | /* Bit 28 */ + (mask & ((1 << 28) - 1) << 3); /* Bits 0 to 27 */ +}; + +#define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK) +#define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE) +#define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE) +#define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL) +#define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC) +#define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE) +#define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE) + +#endif + +/* Device name and register DID (Device ID) */ +struct i7300_dev_info { + const char *ctl_name; /* name for this device */ + u16 fsb_mapping_errors; /* DID for the branchmap,control */ +}; + +/* Table of devices attributes supported by this driver */ +static const struct i7300_dev_info i7300_devs[] = { + { + .ctl_name = "I7300", + .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, + }, +}; + +struct i7300_dimm_info { + int megabytes; /* size, 0 means not present */ +}; + +/* driver private data structure */ +struct i7300_pvt { + struct pci_dev *system_address; /* 16.0 */ + struct pci_dev *branchmap_werrors; /* 16.1 */ + struct pci_dev *fsb_error_regs; /* 16.2 */ + struct pci_dev *branch_pci[MAX_BRANCHES]; /* 21.0 and 22.0 */ + + u16 tolm; /* top of low memory */ + u64 ambase; /* AMB BAR */ + + u16 mir[MAX_MIR]; + + u16 mtr[MAX_SLOTS][MAX_BRANCHES]; /* Memory Technlogy Reg */ + u16 ambpresent[MAX_CHANNELS]; /* AMB present regs */ + + /* DIMM information matrix, allocating architecture maximums */ + struct i7300_dimm_info dimm_info[MAX_SLOTS][MAX_CHANNELS]; +}; + +#if 0 +/* I7300 MCH error information retrieved from Hardware */ +struct i7300_error_info { + /* These registers are always read from the MC */ + u32 ferr_fat_fbd; /* First Errors Fatal */ + u32 nerr_fat_fbd; /* Next Errors Fatal */ + u32 ferr_nf_fbd; /* First Errors Non-Fatal */ + u32 nerr_nf_fbd; /* Next Errors Non-Fatal */ + + /* These registers are input ONLY if there was a Recoverable Error */ + u32 redmemb; /* Recoverable Mem Data Error log B */ + u16 recmema; /* Recoverable Mem Error log A */ + u32 recmemb; /* Recoverable Mem Error log B */ + + /* These registers are input ONLY if there was a Non-Rec Error */ + u16 nrecmema; /* Non-Recoverable Mem log A */ + u16 nrecmemb; /* Non-Recoverable Mem log B */ + +}; +#endif + +/* FIXME: Why do we need to have this static? */ +static struct edac_pci_ctl_info *i7300_pci; + + +#if 0 +/* note that nrec_rdwr changed from NRECMEMA to NRECMEMB between the 5000 and + 5400 better to use an inline function than a macro in this case */ +static inline int nrec_bank(struct i7300_error_info *info) +{ + return ((info->nrecmema) >> 12) & 0x7; +} +static inline int nrec_rank(struct i7300_error_info *info) +{ + return ((info->nrecmema) >> 8) & 0xf; +} +static inline int nrec_buf_id(struct i7300_error_info *info) +{ + return ((info->nrecmema)) & 0xff; +} +static inline int nrec_rdwr(struct i7300_error_info *info) +{ + return (info->nrecmemb) >> 31; +} +/* This applies to both NREC and REC string so it can be used with nrec_rdwr + and rec_rdwr */ +static inline const char *rdwr_str(int rdwr) +{ + return rdwr ? "Write" : "Read"; +} +static inline int nrec_cas(struct i7300_error_info *info) +{ + return ((info->nrecmemb) >> 16) & 0x1fff; +} +static inline int nrec_ras(struct i7300_error_info *info) +{ + return (info->nrecmemb) & 0xffff; +} +static inline int rec_bank(struct i7300_error_info *info) +{ + return ((info->recmema) >> 12) & 0x7; +} +static inline int rec_rank(struct i7300_error_info *info) +{ + return ((info->recmema) >> 8) & 0xf; +} +static inline int rec_rdwr(struct i7300_error_info *info) +{ + return (info->recmemb) >> 31; +} +static inline int rec_cas(struct i7300_error_info *info) +{ + return ((info->recmemb) >> 16) & 0x1fff; +} +static inline int rec_ras(struct i7300_error_info *info) +{ + return (info->recmemb) & 0xffff; +} + +/* + * i7300_get_error_info Retrieve the hardware error information from + * the hardware and cache it in the 'info' + * structure + */ +static void i7300_get_error_info(struct mem_ctl_info *mci, + struct i7300_error_info *info) +{ + struct i7300_pvt *pvt; + u32 value; + + pvt = mci->pvt_info; + + /* read in the 1st FATAL error register */ + pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value); + + /* Mask only the bits that the doc says are valid + */ + value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK); + + /* If there is an error, then read in the + NEXT FATAL error register and the Memory Error Log Register A + */ + if (value & FERR_FAT_MASK) { + info->ferr_fat_fbd = value; + + /* harvest the various error data we need */ + pci_read_config_dword(pvt->branchmap_werrors, + NERR_FAT_FBD, &info->nerr_fat_fbd); + pci_read_config_word(pvt->branchmap_werrors, + NRECMEMA, &info->nrecmema); + pci_read_config_word(pvt->branchmap_werrors, + NRECMEMB, &info->nrecmemb); + + /* Clear the error bits, by writing them back */ + pci_write_config_dword(pvt->branchmap_werrors, + FERR_FAT_FBD, value); + } else { + info->ferr_fat_fbd = 0; + info->nerr_fat_fbd = 0; + info->nrecmema = 0; + info->nrecmemb = 0; + } + + /* read in the 1st NON-FATAL error register */ + pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value); + + /* If there is an error, then read in the 1st NON-FATAL error + * register as well */ + if (value & FERR_NF_MASK) { + info->ferr_nf_fbd = value; + + /* harvest the various error data we need */ + pci_read_config_dword(pvt->branchmap_werrors, + NERR_NF_FBD, &info->nerr_nf_fbd); + pci_read_config_word(pvt->branchmap_werrors, + RECMEMA, &info->recmema); + pci_read_config_dword(pvt->branchmap_werrors, + RECMEMB, &info->recmemb); + pci_read_config_dword(pvt->branchmap_werrors, + REDMEMB, &info->redmemb); + + /* Clear the error bits, by writing them back */ + pci_write_config_dword(pvt->branchmap_werrors, + FERR_NF_FBD, value); + } else { + info->ferr_nf_fbd = 0; + info->nerr_nf_fbd = 0; + info->recmema = 0; + info->recmemb = 0; + info->redmemb = 0; + } +} + +/* + * i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, + * struct i7300_error_info *info, + * int handle_errors); + * + * handle the Intel FATAL and unrecoverable errors, if any + */ +static void i7300_proccess_non_recoverable_info(struct mem_ctl_info *mci, + struct i7300_error_info *info, + unsigned long allErrors) +{ + char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; + int branch; + int channel; + int bank; + int buf_id; + int rank; + int rdwr; + int ras, cas; + int errnum; + char *type = NULL; + + if (!allErrors) + return; /* if no error, return now */ + + if (allErrors & ERROR_FAT_MASK) + type = "FATAL"; + else if (allErrors & FERR_NF_UNCORRECTABLE) + type = "NON-FATAL uncorrected"; + else + type = "NON-FATAL recoverable"; + + /* ONLY ONE of the possible error bits will be set, as per the docs */ + + branch = extract_fbdchan_indx(info->ferr_fat_fbd); + channel = branch; + + /* Use the NON-Recoverable macros to extract data */ + bank = nrec_bank(info); + rank = nrec_rank(info); + buf_id = nrec_buf_id(info); + rdwr = nrec_rdwr(info); + ras = nrec_ras(info); + cas = nrec_cas(info); + + debugf0("\t\tCSROW= %d Channels= %d,%d (Branch= %d " + "DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, channel + 1, branch >> 1, bank, + buf_id, rdwr_str(rdwr), ras, cas); + + /* Only 1 bit will be on */ + errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); + + /* Form out message */ + snprintf(msg, sizeof(msg), + "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s " + "RAS=%d CAS=%d %s Err=0x%lx (%s))", + type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas, + type, allErrors, error_name[errnum]); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); +} + +/* + * i7300_process_fatal_error_info(struct mem_ctl_info *mci, + * struct i7300_error_info *info, + * int handle_errors); + * + * handle the Intel NON-FATAL errors, if any + */ +static void i7300_process_nonfatal_error_info(struct mem_ctl_info *mci, + struct i7300_error_info *info) +{ + char msg[EDAC_MC_LABEL_LEN + 1 + 90 + 80]; + unsigned long allErrors; + int branch; + int channel; + int bank; + int rank; + int rdwr; + int ras, cas; + int errnum; + + /* mask off the Error bits that are possible */ + allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK); + if (!allErrors) + return; /* if no error, return now */ + + /* ONLY ONE of the possible error bits will be set, as per the docs */ + + if (allErrors & (ERROR_NF_UNCORRECTABLE | ERROR_NF_RECOVERABLE)) { + i7300_proccess_non_recoverable_info(mci, info, allErrors); + return; + } + + /* Correctable errors */ + if (allErrors & ERROR_NF_CORRECTABLE) { + debugf0("\tCorrected bits= 0x%lx\n", allErrors); + + branch = extract_fbdchan_indx(info->ferr_nf_fbd); + + channel = 0; + if (REC_ECC_LOCATOR_ODD(info->redmemb)) + channel = 1; + + /* Convert channel to be based from zero, instead of + * from branch base of 0 */ + channel += branch; + + bank = rec_bank(info); + rank = rec_rank(info); + rdwr = rec_rdwr(info); + ras = rec_ras(info); + cas = rec_cas(info); + + /* Only 1 bit will be on */ + errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); + + debugf0("\t\tCSROW= %d Channel= %d (Branch %d " + "DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n", + rank, channel, branch >> 1, bank, + rdwr_str(rdwr), ras, cas); + + /* Form out message */ + snprintf(msg, sizeof(msg), + "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s " + "RAS=%d CAS=%d, CE Err=0x%lx (%s))", + branch >> 1, bank, rdwr_str(rdwr), ras, cas, + allErrors, error_name[errnum]); + + /* Call the helper to output message */ + edac_mc_handle_fbd_ce(mci, rank, channel, msg); + + return; + } + + /* Miscelaneous errors */ + errnum = find_first_bit(&allErrors, ARRAY_SIZE(error_name)); + + branch = extract_fbdchan_indx(info->ferr_nf_fbd); + + i7300_mc_printk(mci, KERN_EMERG, + "Non-Fatal misc error (Branch=%d Err=%#lx (%s))", + branch >> 1, allErrors, error_name[errnum]); +} + +/* + * i7300_process_error_info Process the error info that is + * in the 'info' structure, previously retrieved from hardware + */ +static void i7300_process_error_info(struct mem_ctl_info *mci, + struct i7300_error_info *info) +{ u32 allErrors; + + /* First handle any fatal errors that occurred */ + allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK); + i7300_proccess_non_recoverable_info(mci, info, allErrors); + + /* now handle any non-fatal errors that occurred */ + i7300_process_nonfatal_error_info(mci, info); +} + +/* + * i7300_clear_error Retrieve any error from the hardware + * but do NOT process that error. + * Used for 'clearing' out of previous errors + * Called by the Core module. + */ +static void i7300_clear_error(struct mem_ctl_info *mci) +{ + struct i7300_error_info info; + + i7300_get_error_info(mci, &info); +} + +/* + * i7300_check_error Retrieve and process errors reported by the + * hardware. Called by the Core module. + */ +static void i7300_check_error(struct mem_ctl_info *mci) +{ + struct i7300_error_info info; + debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__); + i7300_get_error_info(mci, &info); + i7300_process_error_info(mci, &info); +} + +/* + * i7300_enable_error_reporting + * Turn on the memory reporting features of the hardware + */ +static void i7300_enable_error_reporting(struct mem_ctl_info *mci) +{ + struct i7300_pvt *pvt; + u32 fbd_error_mask; + + pvt = mci->pvt_info; + + /* Read the FBD Error Mask Register */ + pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD, + &fbd_error_mask); + + /* Enable with a '0' */ + fbd_error_mask &= ~(ENABLE_EMASK_ALL); + + pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD, + fbd_error_mask); +} +#endif + +/* + * determine_mtr(pvt, csrow, channel) + * + * return the proper MTR register as determine by the csrow and desired channel + */ +static int decode_mtr(struct i7300_pvt *pvt, + int slot, int ch, int branch, + struct i7300_dimm_info *dinfo, + struct csrow_info *p_csrow) +{ + int mtr, ans, addrBits, channel; + + channel = to_channel(ch, branch); + + mtr = pvt->mtr[slot][branch]; + ans = MTR_DIMMS_PRESENT(mtr) ? 1 : 0; + + debugf2("\tMTR%d CH%d: DIMMs are %s (mtr)\n", + slot, channel, + ans ? "Present" : "NOT Present"); + + /* Determine if there is a DIMM present in this DIMM slot */ + +#if 0 + if (!amb_present || !ans) + return 0; +#else + if (!ans) + return 0; +#endif + + /* Start with the number of bits for a Bank + * on the DRAM */ + addrBits = MTR_DRAM_BANKS_ADDR_BITS; + /* Add thenumber of ROW bits */ + addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr); + /* add the number of COLUMN bits */ + addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr); + /* add the number of RANK bits */ + addrBits += MTR_DIMM_RANKS(mtr); + + addrBits += 6; /* add 64 bits per DIMM */ + addrBits -= 20; /* divide by 2^^20 */ + addrBits -= 3; /* 8 bits per bytes */ + + dinfo->megabytes = 1 << addrBits; + + debugf2("\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr)); + + debugf2("\t\tELECTRICAL THROTTLING is %s\n", + MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled"); + + debugf2("\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr)); + debugf2("\t\tNUMRANK: %s\n", MTR_DIMM_RANKS(mtr) ? "double" : "single"); + debugf2("\t\tNUMROW: %s\n", numrow_toString[MTR_DIMM_ROWS(mtr)]); + debugf2("\t\tNUMCOL: %s\n", numcol_toString[MTR_DIMM_COLS(mtr)]); + debugf2("\t\tSIZE: %d MB\n", dinfo->megabytes); + + p_csrow->grain = 8; + p_csrow->nr_pages = dinfo->megabytes << 8; + p_csrow->mtype = MEM_FB_DDR2; + p_csrow->edac_mode = EDAC_S8ECD8ED; + + /* ask what device type on this row */ + if (MTR_DRAM_WIDTH(mtr)) + p_csrow->dtype = DEV_X8; + else + p_csrow->dtype = DEV_X4; + + return mtr; +} + +/* + * print_dimm_size + * + * also will output a DIMM matrix map, if debug is enabled, for viewing + * how the DIMMs are populated + */ +static void print_dimm_size(struct i7300_pvt *pvt) +{ + struct i7300_dimm_info *dinfo; + char *p, *mem_buffer; + int space, n; + int channel, slot; + + space = PAGE_SIZE; + mem_buffer = p = kmalloc(space, GFP_KERNEL); + if (p == NULL) { + i7300_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n", + __FILE__, __func__); + return; + } + + n = snprintf(p, space, " "); + p += n; + space -= n; + for (channel = 0; channel < MAX_CHANNELS; channel++) { + n = snprintf(p, space, "channel %d | ", channel); + p += n; + space -= n; + } + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + n = snprintf(p, space, "-------------------------------" + "------------------------------"); + p += n; + space -= n; + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + + for (slot = 0; slot < MAX_SLOTS; slot++) { + n = snprintf(p, space, "csrow/SLOT %d ", slot); + p += n; + space -= n; + + for (channel = 0; channel < MAX_CHANNELS; channel++) { + dinfo = &pvt->dimm_info[slot][channel]; + n = snprintf(p, space, "%4d MB | ", dinfo->megabytes); + p += n; + space -= n; + } + + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + } + + n = snprintf(p, space, "-------------------------------" + "------------------------------"); + p += n; + space -= n; + debugf2("%s\n", mem_buffer); + p = mem_buffer; + space = PAGE_SIZE; + + kfree(mem_buffer); +} + +/* + * i7300_init_csrows Initialize the 'csrows' table within + * the mci control structure with the + * addressing of memory. + * + * return: + * 0 success + * 1 no actual memory found on this MC + */ +static int i7300_init_csrows(struct mem_ctl_info *mci) +{ + struct i7300_pvt *pvt; + struct i7300_dimm_info *dinfo; + struct csrow_info *p_csrow; + int empty; + int mtr; + int ch, branch, slot, channel; + + pvt = mci->pvt_info; + + empty = 1; /* Assume NO memory */ + + debugf2("Memory Technology Registers:\n"); + + /* Get the AMB present registers for the four channels */ + for (branch = 0; branch < MAX_BRANCHES; branch++) { + /* Read and dump branch 0's MTRs */ + channel = to_channel(0, branch); + pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_0, + &pvt->ambpresent[channel]); + debugf2("\t\tAMB-present CH%d = 0x%x:\n", + channel, pvt->ambpresent[channel]); + + channel = to_channel(1, branch); + pci_read_config_word(pvt->branch_pci[branch], AMBPRESENT_1, + &pvt->ambpresent[channel]); + debugf2("\t\tAMB-present CH%d = 0x%x:\n", + channel, pvt->ambpresent[channel]); + } + + /* Get the set of MTR[0-7] regs by each branch */ + for (slot = 0; slot < MAX_SLOTS; slot++) { + int where = mtr_regs[slot]; + for (branch = 0; branch < MAX_BRANCHES; branch++) { + pci_read_config_word(pvt->branch_pci[branch], + where, + &pvt->mtr[slot][branch]); + for (ch = 0; ch < MAX_BRANCHES; ch++) { + int channel = to_channel(ch, branch); + + dinfo = &pvt->dimm_info[slot][channel]; + p_csrow = &mci->csrows[slot]; + + mtr = decode_mtr(pvt, slot, ch, branch, + dinfo, p_csrow); + /* if no DIMMS on this row, continue */ + if (!MTR_DIMMS_PRESENT(mtr)) + continue; + + p_csrow->csrow_idx = slot; + + /* FAKE OUT VALUES, FIXME */ + p_csrow->first_page = 0 + slot * 20; + p_csrow->last_page = 9 + slot * 20; + p_csrow->page_mask = 0xfff; + + empty = 0; + } + } + } + + return empty; +} + +static void decode_mir(int mir_no, u16 mir[MAX_MIR]) +{ + if (mir[mir_no] & 3) + debugf2("MIR%d: limit= 0x%x Branch(es) that participate: %s %s\n", + mir_no, + (mir[mir_no] >> 4) & 0xfff, + (mir[mir_no] & 1) ? "B0" : "", + (mir[mir_no] & 2) ? "B1": ""); +} + +/* + * i7300_get_mc_regs read in the necessary registers and + * cache locally + * + * Fills in the private data members + */ +static int i7300_get_mc_regs(struct mem_ctl_info *mci) +{ + struct i7300_pvt *pvt; + u32 actual_tolm; + int i, rc; + + pvt = mci->pvt_info; + + pci_read_config_dword(pvt->system_address, AMBASE, + (u32 *) &pvt->ambase); + + debugf2("AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase); + + /* Get the Branch Map regs */ + pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm); + pvt->tolm >>= 12; + debugf2("TOLM (number of 256M regions) =%u (0x%x)\n", pvt->tolm, + pvt->tolm); + + actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28)); + debugf2("Actual TOLM byte addr=%u.%03u GB (0x%x)\n", + actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28); + + pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir[0]); + pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir[1]); + pci_read_config_word(pvt->branchmap_werrors, MIR2, &pvt->mir[2]); + + /* Decode the MIR regs */ + for (i = 0; i < MAX_MIR; i++) + decode_mir(i, pvt->mir); + + rc = i7300_init_csrows(mci); + if (rc < 0) + return rc; + + /* Go and determine the size of each DIMM and place in an + * orderly matrix */ + print_dimm_size(pvt); + + return 0; +} + +/* + * i7300_put_devices 'put' all the devices that we have + * reserved via 'get' + */ +static void i7300_put_devices(struct mem_ctl_info *mci) +{ + struct i7300_pvt *pvt; + int branch; + + pvt = mci->pvt_info; + + /* Decrement usage count for devices */ + for (branch = 0; branch < MAX_CH_PER_BRANCH; branch++) + pci_dev_put(pvt->branch_pci[branch]); + pci_dev_put(pvt->fsb_error_regs); + pci_dev_put(pvt->branchmap_werrors); +} + +/* + * i7300_get_devices Find and perform 'get' operation on the MCH's + * device/functions we want to reference for this driver + * + * Need to 'get' device 16 func 1 and func 2 + */ +static int i7300_get_devices(struct mem_ctl_info *mci, int dev_idx) +{ + struct i7300_pvt *pvt; + struct pci_dev *pdev; + + pvt = mci->pvt_info; + + /* Attempt to 'get' the MCH register we want */ + pdev = NULL; + while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev); + if (!pdev) { + /* End of list, leave */ + i7300_printk(KERN_ERR, + "'system address,Process Bus' " + "device not found:" + "vendor 0x%x device 0x%x ERR funcs " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR); + goto error; + } + + /* Store device 16 funcs 1 and 2 */ + switch (PCI_FUNC(pdev->devfn)) { + case 1: + pvt->branchmap_werrors = pdev; + break; + case 2: + pvt->fsb_error_regs = pdev; + break; + } + } + + debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", + pci_name(pvt->system_address), + pvt->system_address->vendor, pvt->system_address->device); + debugf1("Branchmap, control and errors - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->branchmap_werrors), + pvt->branchmap_werrors->vendor, pvt->branchmap_werrors->device); + debugf1("FSB Error Regs - PCI Bus ID: %s %x:%x\n", + pci_name(pvt->fsb_error_regs), + pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device); + + pvt->branch_pci[0] = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_FB0, + NULL); + if (!pvt->branch_pci[0]) { + i7300_printk(KERN_ERR, + "MC: 'BRANCH 0' device not found:" + "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_FB0); + goto error; + } + + pvt->branch_pci[1] = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_FB1, + NULL); + if (!pvt->branch_pci[1]) { + i7300_printk(KERN_ERR, + "MC: 'BRANCH 1' device not found:" + "vendor 0x%x device 0x%x Func 0 " + "(broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_FB1); + goto error; + } + + return 0; + +error: + i7300_put_devices(mci); + return -ENODEV; +} + +/* + * i7300_probe1 Probe for ONE instance of device to see if it is + * present. + * return: + * 0 for FOUND a device + * < 0 for error code + */ +static int i7300_probe1(struct pci_dev *pdev, int dev_idx) +{ + struct mem_ctl_info *mci; + struct i7300_pvt *pvt; + int num_channels; + int num_dimms_per_channel; + int num_csrows; + + if (dev_idx >= ARRAY_SIZE(i7300_devs)) + return -EINVAL; + + debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n", + __func__, + pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + /* We only are looking for func 0 of the set */ + if (PCI_FUNC(pdev->devfn) != 0) + return -ENODEV; + + /* As we don't have a motherboard identification routine to determine + * actual number of slots/dimms per channel, we thus utilize the + * resource as specified by the chipset. Thus, we might have + * have more DIMMs per channel than actually on the mobo, but this + * allows the driver to support upto the chipset max, without + * some fancy mobo determination. + */ + num_dimms_per_channel = MAX_SLOTS; + num_channels = MAX_CHANNELS; + num_csrows = MAX_SLOTS * MAX_CHANNELS; + + debugf0("MC: %s(): Number of - Channels= %d DIMMS= %d CSROWS= %d\n", + __func__, num_channels, num_dimms_per_channel, num_csrows); + + /* allocate a new MC control structure */ + mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); + + if (mci == NULL) + return -ENOMEM; + + debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); + + mci->dev = &pdev->dev; /* record ptr to the generic device */ + + pvt = mci->pvt_info; + pvt->system_address = pdev; /* Record this device in our private */ + + /* 'get' the pci devices we want to reserve for our use */ + if (i7300_get_devices(mci, dev_idx)) + goto fail0; + + mci->mc_idx = 0; + mci->mtype_cap = MEM_FLAG_FB_DDR2; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "i7300_edac.c"; + mci->mod_ver = I7300_REVISION; + mci->ctl_name = i7300_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); + mci->ctl_page_to_phys = NULL; + +#if 0 + /* Set the function pointer to an actual operation function */ + mci->edac_check = i7300_check_error; +#endif + + /* initialize the MC control structure 'csrows' table + * with the mapping and control information */ + if (i7300_get_mc_regs(mci)) { + debugf0("MC: Setting mci->edac_cap to EDAC_FLAG_NONE\n" + " because i7300_init_csrows() returned nonzero " + "value\n"); + mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */ + } else { +#if 0 + debugf1("MC: Enable error reporting now\n"); + i7300_enable_error_reporting(mci); +#endif + } + + /* add this new MC control structure to EDAC's list of MCs */ + if (edac_mc_add_mc(mci)) { + debugf0("MC: " __FILE__ + ": %s(): failed edac_mc_add_mc()\n", __func__); + /* FIXME: perhaps some code should go here that disables error + * reporting if we just enabled it + */ + goto fail1; + } + +#if 0 + i7300_clear_error(mci); +#endif + + /* allocating generic PCI control info */ + i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); + if (!i7300_pci) { + printk(KERN_WARNING + "%s(): Unable to create PCI control\n", + __func__); + printk(KERN_WARNING + "%s(): PCI error report via EDAC not setup\n", + __func__); + } + + return 0; + + /* Error exit unwinding stack */ +fail1: + + i7300_put_devices(mci); + +fail0: + edac_mc_free(mci); + return -ENODEV; +} + +/* + * i7300_init_one constructor for one instance of device + * + * returns: + * negative on error + * count (>= 0) + */ +static int __devinit i7300_init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int rc; + + debugf0("MC: " __FILE__ ": %s()\n", __func__); + + /* wake up device */ + rc = pci_enable_device(pdev); + if (rc == -EIO) + return rc; + + /* now probe and enable the device */ + return i7300_probe1(pdev, id->driver_data); +} + +/* + * i7300_remove_one destructor for one instance of device + * + */ +static void __devexit i7300_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + + debugf0(__FILE__ ": %s()\n", __func__); + + if (i7300_pci) + edac_pci_release_generic_ctl(i7300_pci); + + mci = edac_mc_del_mc(&pdev->dev); + if (!mci) + return; + + /* retrieve references to resources, and free those resources */ + i7300_put_devices(mci); + + edac_mc_free(mci); +} + +/* + * pci_device_id table for which devices we are looking for + * + * The "E500P" device is the first device supported. + */ +static const struct pci_device_id i7300_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7300_MCH_ERR)}, + {0,} /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i7300_pci_tbl); + +/* + * i7300_driver pci_driver structure for this module + * + */ +static struct pci_driver i7300_driver = { + .name = "i7300_edac", + .probe = i7300_init_one, + .remove = __devexit_p(i7300_remove_one), + .id_table = i7300_pci_tbl, +}; + +/* + * i7300_init Module entry function + * Try to initialize this module for its devices + */ +static int __init i7300_init(void) +{ + int pci_rc; + + debugf2("MC: " __FILE__ ": %s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + pci_rc = pci_register_driver(&i7300_driver); + + return (pci_rc < 0) ? pci_rc : 0; +} + +/* + * i7300_exit() Module exit function + * Unregister the driver + */ +static void __exit i7300_exit(void) +{ + debugf2("MC: " __FILE__ ": %s()\n", __func__); + pci_unregister_driver(&i7300_driver); +} + +module_init(i7300_init); +module_exit(i7300_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mauro Carvalho Chehab "); +MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)"); +MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - " + I7300_REVISION); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36cad..2eabe311f0d3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -815,7 +815,7 @@ #define PCI_VENDOR_ID_ANIGMA 0x1051 #define PCI_DEVICE_ID_ANIGMA_MC145575 0x0100 - + #define PCI_VENDOR_ID_EFAR 0x1055 #define PCI_DEVICE_ID_EFAR_SLC90E66_1 0x9130 #define PCI_DEVICE_ID_EFAR_SLC90E66_3 0x9463 @@ -1446,7 +1446,7 @@ #define PCI_VENDOR_ID_ZIATECH 0x1138 #define PCI_DEVICE_ID_ZIATECH_5550_HC 0x5550 - + #define PCI_VENDOR_ID_SYSKONNECT 0x1148 #define PCI_DEVICE_ID_SYSKONNECT_TR 0x4200 @@ -1600,8 +1600,8 @@ #define PCI_DEVICE_ID_RP8OCTA 0x0005 #define PCI_DEVICE_ID_RP8J 0x0006 #define PCI_DEVICE_ID_RP4J 0x0007 -#define PCI_DEVICE_ID_RP8SNI 0x0008 -#define PCI_DEVICE_ID_RP16SNI 0x0009 +#define PCI_DEVICE_ID_RP8SNI 0x0008 +#define PCI_DEVICE_ID_RP16SNI 0x0009 #define PCI_DEVICE_ID_RPP4 0x000A #define PCI_DEVICE_ID_RPP8 0x000B #define PCI_DEVICE_ID_RP4M 0x000D @@ -1611,9 +1611,9 @@ #define PCI_DEVICE_ID_URP8INTF 0x0802 #define PCI_DEVICE_ID_URP16INTF 0x0803 #define PCI_DEVICE_ID_URP8OCTA 0x0805 -#define PCI_DEVICE_ID_UPCI_RM3_8PORT 0x080C +#define PCI_DEVICE_ID_UPCI_RM3_8PORT 0x080C #define PCI_DEVICE_ID_UPCI_RM3_4PORT 0x080D -#define PCI_DEVICE_ID_CRP16INTF 0x0903 +#define PCI_DEVICE_ID_CRP16INTF 0x0903 #define PCI_VENDOR_ID_CYCLADES 0x120e #define PCI_DEVICE_ID_CYCLOM_Y_Lo 0x0100 @@ -2139,7 +2139,7 @@ #define PCI_DEVICE_ID_RASTEL_2PORT 0x2000 #define PCI_VENDOR_ID_ZOLTRIX 0x15b0 -#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0 +#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0 #define PCI_VENDOR_ID_MELLANOX 0x15b3 #define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 @@ -2413,7 +2413,7 @@ #define PCI_DEVICE_ID_INTEL_82815_MC 0x1130 #define PCI_DEVICE_ID_INTEL_82815_CGC 0x1132 #define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 -#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 +#define PCI_DEVICE_ID_INTEL_7505_0 0x2550 #define PCI_DEVICE_ID_INTEL_7205_0 0x255d #define PCI_DEVICE_ID_INTEL_82437 0x122d #define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e @@ -2616,6 +2616,9 @@ #define PCI_DEVICE_ID_INTEL_MCH_PC 0x3599 #define PCI_DEVICE_ID_INTEL_MCH_PC1 0x359a #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e +#define PCI_DEVICE_ID_INTEL_I7300_MCH_ERR 0x360c +#define PCI_DEVICE_ID_INTEL_I7300_MCH_FB0 0x360f +#define PCI_DEVICE_ID_INTEL_I7300_MCH_FB1 0x3610 #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b #define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c #define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 -- cgit v1.2.3 From dca43c75e7e545694a9dd6288553f55c53e2a3a3 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Fri, 27 Aug 2010 19:13:28 +0000 Subject: tcp: Add TCP_USER_TIMEOUT socket option. This patch provides a "user timeout" support as described in RFC793. The socket option is also needed for the the local half of RFC5482 "TCP User Timeout Option". TCP_USER_TIMEOUT is a TCP level socket option that takes an unsigned int, when > 0, to specify the maximum amount of time in ms that transmitted data may remain unacknowledged before TCP will forcefully close the corresponding connection and return ETIMEDOUT to the application. If 0 is given, TCP will continue to use the system default. Increasing the user timeouts allows a TCP connection to survive extended periods without end-to-end connectivity. Decreasing the user timeouts allows applications to "fail fast" if so desired. Otherwise it may take upto 20 minutes with the current system defaults in a normal WAN environment. The socket option can be made during any state of a TCP connection, but is only effective during the synchronized states of a connection (ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, or LAST-ACK). Moreover, when used with the TCP keepalive (SO_KEEPALIVE) option, TCP_USER_TIMEOUT will overtake keepalive to determine when to close a connection due to keepalive failure. The option does not change in anyway when TCP retransmits a packet, nor when a keepalive probe will be sent. This option, like many others, will be inherited by an acceptor from its listener. Signed-off-by: H.K. Jerry Chu Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/inet_connection_sock.h | 1 + net/ipv4/tcp.c | 11 ++++++++++- net/ipv4/tcp_timer.c | 40 ++++++++++++++++++++++++-------------- 4 files changed, 37 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a778ee024590..e64f4c67d0ef 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -105,6 +105,7 @@ enum { #define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ #define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ +#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ /* for TCP_INFO socket option */ #define TCPI_OPT_TIMESTAMPS 1 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b6d3b55da19b..e4f494b42e06 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -125,6 +125,7 @@ struct inet_connection_sock { int probe_size; } icsk_mtup; u32 icsk_ca_priv[16]; + u32 icsk_user_timeout; #define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 176e11aaea77..cf3254528753 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2391,7 +2391,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = tp->af_specific->md5_parse(sk, optval, optlen); break; #endif - + case TCP_USER_TIMEOUT: + /* Cap the max timeout in ms TCP will retry/retrans + * before giving up and aborting (ETIMEDOUT) a connection. + */ + icsk->icsk_user_timeout = msecs_to_jiffies(val); + break; default: err = -ENOPROTOOPT; break; @@ -2610,6 +2615,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_THIN_DUPACK: val = tp->thin_dupack; break; + + case TCP_USER_TIMEOUT: + val = jiffies_to_msecs(icsk->icsk_user_timeout); + break; default: return -ENOPROTOOPT; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 808bb920c9f5..11569deccbea 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -138,10 +138,10 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) * retransmissions with an initial RTO of TCP_RTO_MIN. */ static bool retransmits_timed_out(struct sock *sk, - unsigned int boundary) + unsigned int boundary, + unsigned int timeout) { - unsigned int timeout, linear_backoff_thresh; - unsigned int start_ts; + unsigned int linear_backoff_thresh, start_ts; if (!inet_csk(sk)->icsk_retransmits) return false; @@ -151,14 +151,15 @@ static bool retransmits_timed_out(struct sock *sk, else start_ts = tcp_sk(sk)->retrans_stamp; - linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); - - if (boundary <= linear_backoff_thresh) - timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; - else - timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + - (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + if (likely(timeout == 0)) { + linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); + if (boundary <= linear_backoff_thresh) + timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; + else + timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + } return (tcp_time_stamp - start_ts) >= timeout; } @@ -174,7 +175,7 @@ static int tcp_write_timeout(struct sock *sk) dst_negative_advice(sk); retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; } else { - if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { + if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); @@ -187,14 +188,16 @@ static int tcp_write_timeout(struct sock *sk) retry_until = tcp_orphan_retries(sk, alive); do_reset = alive || - !retransmits_timed_out(sk, retry_until); + !retransmits_timed_out(sk, retry_until, 0); if (tcp_out_of_resources(sk, do_reset)) return 1; } } - if (retransmits_timed_out(sk, retry_until)) { + if (retransmits_timed_out(sk, retry_until, + (1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) ? 0 : + icsk->icsk_user_timeout)) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; @@ -436,7 +439,7 @@ out_reset_timer: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); - if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) + if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0)) __sk_dst_reset(sk); out:; @@ -556,7 +559,14 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = keepalive_time_elapsed(tp); if (elapsed >= keepalive_time_when(tp)) { - if (icsk->icsk_probes_out >= keepalive_probes(tp)) { + /* If the TCP_USER_TIMEOUT option is enabled, use that + * to determine when to timeout instead. + */ + if ((icsk->icsk_user_timeout != 0 && + elapsed >= icsk->icsk_user_timeout && + icsk->icsk_probes_out > 0) || + (icsk->icsk_user_timeout == 0 && + icsk->icsk_probes_out >= keepalive_probes(tp))) { tcp_send_active_reset(sk, GFP_ATOMIC); tcp_write_err(sk); goto out; -- cgit v1.2.3 From 22b71c8f4f3db8df92f5e7b081c265bc56c0bd2f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 29 Aug 2010 19:23:12 +0000 Subject: tcp/dccp: Consolidate common code for RFC 3390 conversion This patch consolidates initial-window code common to TCP and CCID-2: * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341). Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/tcp.h | 15 +++++++++++++++ net/dccp/ccids/ccid2.c | 8 ++------ net/ipv4/tcp_input.c | 17 ++--------------- 3 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index df6a2eb20193..a64022199b62 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -779,6 +779,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * + * John Heffner states: + * + * The RFC specifies a window of no more than 4380 bytes + * unless 2*MSS > 4380. Reading the pseudocode in the RFC + * is a bit misleading because they use a clamp at 4380 bytes + * rather than a multiplier in the relevant range. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3); +} + extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 8c95813bcc67..b9c942a09c98 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -641,12 +641,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ hc->tx_ssthresh = ~0U; - /* - * RFC 4341, 5: "The cwnd parameter is initialized to at most four - * packets for new connections, following the rules from [RFC3390]". - * We need to convert the bytes of RFC3390 into the packets of RFC 4341. - */ - hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); + /* Use larger initial windows (RFC 4341, section 5). */ + hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); /* Make sure that Ack Ratio is enabled and within bounds. */ max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e663b78a2ef6..1bc87a05c734 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -805,25 +805,12 @@ void tcp_update_metrics(struct sock *sk) } } -/* Numbers are taken from RFC3390. - * - * John Heffner states: - * - * The RFC specifies a window of no more than 4380 bytes - * unless 2*MSS > 4380. Reading the pseudocode in the RFC - * is a bit misleading because they use a clamp at 4380 bytes - * rather than use a multiplier in the relevant range. - */ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) { __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); - if (!cwnd) { - if (tp->mss_cache > 1460) - cwnd = 2; - else - cwnd = (tp->mss_cache > 1095) ? 3 : 4; - } + if (!cwnd) + cwnd = rfc3390_bytes_to_packets(tp->mss_cache); return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } -- cgit v1.2.3 From 3d5b99ae82f8742e3bb1f8634fd11ac36ea19ee1 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 29 Aug 2010 19:27:34 +0000 Subject: TCP: update initial windows according to RFC 5681 This updates the use of larger initial windows, as originally specified in RFC 3390, to use the newer IW values specified in RFC 5681, section 3.1. The changes made in RFC 5681 are: a) the setting now is more clearly specified in units of segments (as the comments by John Heffner emphasized, this was not very clear in RFC 3390); b) for connections with 1095 < SMSS <= 2190 there is now a change: - RFC 3390 says that IW <= 4380, - RFC 5681 says that IW = 3 * SMSS <= 6570. Since RFC 3390 is older and "only" proposed standard, whereas the newer RFC 5681 is already draft standard, it seems preferable to use the newer IW variant. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/tcp.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index a64022199b62..11dbacc886c0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -781,17 +781,11 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* * Convert RFC 3390 larger initial window into an equivalent number of packets. - * - * John Heffner states: - * - * The RFC specifies a window of no more than 4380 bytes - * unless 2*MSS > 4380. Reading the pseudocode in the RFC - * is a bit misleading because they use a clamp at 4380 bytes - * rather than a multiplier in the relevant range. + * This is based on the numbers specified in RFC 5681, 3.1. */ static inline u32 rfc3390_bytes_to_packets(const u32 smss) { - return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3); + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); } extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); -- cgit v1.2.3 From 4dc89133f49b8cfd77ba7e83f5960aed63aaa99e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Aug 2010 07:40:16 +0000 Subject: net: add a comment on netdev->last_rx As some driver authors seem to reintroduce dev->last_rx use, add a comment to strongly discourage this. Since commit 6cf3f41e6c0 (bonding, net: Move last_rx update into bonding recv logic), network drivers dont need to update last_rx themselves, unless they use this field to implement a timeout. Not updating last_rx helps not dirtying a cache line, improving performance in SMP. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0cf9448a32c4..c82220a9f3d5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -953,7 +953,14 @@ struct net_device { /* * Cache line mostly used on receive path (including eth_type_trans()) */ - unsigned long last_rx; /* Time of last Rx */ + unsigned long last_rx; /* Time of last Rx + * This should not be set in + * drivers, unless really needed, + * because network stack (bonding) + * use it if/when necessary, to + * avoid dirtying this cache line. + */ + /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; /* hw address, (before bcast because most packets are -- cgit v1.2.3 From a28dec2f26013aad89446b1f708f948617bc28a2 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sun, 8 Aug 2010 18:03:33 +0400 Subject: powerpc/85xx: Add P1021 PCI IDs and quirks This is needed for proper PCI-E support on P1021 SoCs. Signed-off-by: Anton Vorontsov Signed-off-by: Kumar Gala --- arch/powerpc/sysdev/fsl_pci.c | 2 ++ include/linux/pci_ids.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 209384b6e039..4ae933225251 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -399,6 +399,8 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1021E, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1021, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010E, quirk_fsl_pcie_header); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36cad..10d33309e9a6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2300,6 +2300,8 @@ #define PCI_DEVICE_ID_P2010 0x0079 #define PCI_DEVICE_ID_P1020E 0x0100 #define PCI_DEVICE_ID_P1020 0x0101 +#define PCI_DEVICE_ID_P1021E 0x0102 +#define PCI_DEVICE_ID_P1021 0x0103 #define PCI_DEVICE_ID_P1011E 0x0108 #define PCI_DEVICE_ID_P1011 0x0109 #define PCI_DEVICE_ID_P1022E 0x0110 -- cgit v1.2.3 From 4e4438b86527e8bf1f49503a30d487e401e64f9c Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 1 Sep 2010 08:55:24 -0600 Subject: gpiolib: Add 'struct gpio_chip' forward declaration for !GPIOLIB case With CONFIG_GPIOLIB=n, the 'struct gpio_chip' is not declared, so the following pops up on PowerPC: cc1: warnings being treated as errors In file included from arch/powerpc/platforms/52xx/mpc52xx_common.c:19: include/linux/of_gpio.h:74: warning: 'struct gpio_chip' declared inside parameter list include/linux/of_gpio.h:74: warning: its scope is only this definition or declaration, which is probably not what you want include/linux/of_gpio.h:75: warning: 'struct gpio_chip' declared inside parameter list make[2]: *** [arch/powerpc/platforms/52xx/mpc52xx_common.o] Error 1 This patch fixes the issue by providing the proper forward declaration. Signed-off-by: Anton Vorontsov Signed-off-by: Grant Likely --- include/linux/gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 03f616b78cfa..e41f7dd1ae67 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -13,6 +13,7 @@ #include struct device; +struct gpio_chip; /* * Some platforms don't support the GPIO programming interface. -- cgit v1.2.3 From 86cac58b71227cc34a3d0e78f19585c0eff49ea3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Aug 2010 18:25:32 +0000 Subject: skge: add GRO support - napi_gro_flush() is exported from net/core/dev.c, to avoid an irq_save/irq_restore in the packet receive path. - use napi_gro_receive() instead of netif_receive_skb() - use napi_gro_flush() before calling __napi_complete() - turn on NETIF_F_GRO by default - Tested on a Marvell 88E8001 Gigabit NIC Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/skge.c | 5 +++-- include/linux/netdevice.h | 1 + net/core/dev.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 40e5c46e7571..a8a63581d63d 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -3178,8 +3178,7 @@ static int skge_poll(struct napi_struct *napi, int to_do) skb = skge_rx_get(dev, e, control, rd->status, rd->csum2); if (likely(skb)) { - netif_receive_skb(skb); - + napi_gro_receive(napi, skb); ++work_done; } } @@ -3192,6 +3191,7 @@ static int skge_poll(struct napi_struct *napi, int to_do) if (work_done < to_do) { unsigned long flags; + napi_gro_flush(napi); spin_lock_irqsave(&hw->hw_lock, flags); __napi_complete(napi); hw->intr_mask |= napimask[skge->port]; @@ -3849,6 +3849,7 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port, dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; skge->rx_csum = 1; } + dev->features |= NETIF_F_GRO; /* read the mac address */ memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port*8, ETH_ALEN); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c82220a9f3d5..af05186d5b36 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1702,6 +1702,7 @@ extern gro_result_t dev_gro_receive(struct napi_struct *napi, extern gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb); extern gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); +extern void napi_gro_flush(struct napi_struct *napi); extern void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb); extern struct sk_buff * napi_get_frags(struct napi_struct *napi); diff --git a/net/core/dev.c b/net/core/dev.c index 63bd20a75929..d8c43e73f0b7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3063,7 +3063,7 @@ out: return netif_receive_skb(skb); } -static void napi_gro_flush(struct napi_struct *napi) +inline void napi_gro_flush(struct napi_struct *napi) { struct sk_buff *skb, *next; @@ -3076,6 +3076,7 @@ static void napi_gro_flush(struct napi_struct *napi) napi->gro_count = 0; napi->gro_list = NULL; } +EXPORT_SYMBOL(napi_gro_flush); enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { -- cgit v1.2.3 From c68839963426d42bdb2c915b435f9860d060e645 Mon Sep 17 00:00:00 2001 From: Peter Meerwald Date: Thu, 2 Sep 2010 04:06:24 +0000 Subject: net: Improve comments in include/linux/phy.h Correct state range of PHY bus addresses (i.e. 0-31) in comment, make spelling of PHY consistent in comments. Signed-off-by: Peter Meerwald Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6b0a782c6224..a6e047a04f79 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -116,7 +116,7 @@ struct mii_bus { /* list of all PHYs on bus */ struct phy_device *phy_map[PHY_MAX_ADDR]; - /* Phy addresses to be ignored when probing */ + /* PHY addresses to be ignored when probing */ u32 phy_mask; /* @@ -283,7 +283,7 @@ struct phy_device { phy_interface_t interface; - /* Bus address of the PHY (0-32) */ + /* Bus address of the PHY (0-31) */ int addr; /* -- cgit v1.2.3 From bc8acf2c8c3e43fcc192762a9f964b3e9a17748b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Sep 2010 13:07:41 -0700 Subject: drivers/net: avoid some skb->ip_summed initializations fresh skbs have ip_summed set to CHECKSUM_NONE (0) We can avoid setting again skb->ip_summed to CHECKSUM_NONE in drivers. Introduce skb_checksum_none_assert() helper so that we keep this assertion documented in driver sources. Change most occurrences of : skb->ip_summed = CHECKSUM_NONE; by : skb_checksum_none_assert(skb); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/8139cp.c | 2 +- drivers/net/acenic.c | 2 +- drivers/net/atl1c/atl1c_main.c | 2 +- drivers/net/atl1e/atl1e_main.c | 2 +- drivers/net/atlx/atl1.c | 2 +- drivers/net/b44.c | 2 +- drivers/net/benet/be_main.c | 2 +- drivers/net/bna/bnad.c | 2 +- drivers/net/bnx2.c | 2 +- drivers/net/bnx2x/bnx2x_cmn.c | 2 +- drivers/net/cassini.c | 2 +- drivers/net/chelsio/sge.c | 2 +- drivers/net/cpmac.c | 2 +- drivers/net/cxgb3/sge.c | 2 +- drivers/net/cxgb4/sge.c | 2 +- drivers/net/cxgb4vf/sge.c | 2 +- drivers/net/dm9000.c | 2 +- drivers/net/e1000/e1000_main.c | 3 ++- drivers/net/e1000e/netdev.c | 3 ++- drivers/net/gianfar.c | 2 +- drivers/net/greth.c | 2 +- drivers/net/ibmlana.c | 2 +- drivers/net/igb/igb_main.c | 2 +- drivers/net/igbvf/netdev.c | 2 +- drivers/net/ipg.c | 6 +++--- drivers/net/iseries_veth.c | 2 +- drivers/net/ixgb/ixgb_main.c | 4 ++-- drivers/net/ixgbe/ixgbe_fcoe.c | 5 +++-- drivers/net/ixgbe/ixgbe_main.c | 2 +- drivers/net/ixgbevf/ixgbevf_main.c | 2 +- drivers/net/jme.c | 2 +- drivers/net/ll_temac_main.c | 2 +- drivers/net/macb.c | 2 +- drivers/net/niu.c | 2 +- drivers/net/ns83820.c | 2 +- drivers/net/pasemi_mac.c | 2 +- drivers/net/ps3_gelic_net.c | 4 ++-- drivers/net/qla3xxx.c | 4 ++-- drivers/net/qlcnic/qlcnic_init.c | 2 +- drivers/net/qlge/qlge_main.c | 6 +++--- drivers/net/r8169.c | 2 +- drivers/net/s2io.c | 4 ++-- drivers/net/sb1250-mac.c | 2 +- drivers/net/sfc/rx.c | 2 +- drivers/net/sh_eth.c | 2 +- drivers/net/smsc911x.c | 2 +- drivers/net/spider_net.c | 4 ++-- drivers/net/stmmac/stmmac_main.c | 2 +- drivers/net/tehuti.c | 5 +++-- drivers/net/tg3.c | 2 +- drivers/net/typhoon.c | 2 +- drivers/net/via-velocity.c | 2 +- drivers/net/vmxnet3/vmxnet3_drv.c | 4 ++-- drivers/net/vxge/vxge-main.c | 2 +- drivers/net/xilinx_emaclite.c | 2 +- include/linux/skbuff.h | 15 +++++++++++++++ 56 files changed, 86 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index 4a4f6b81e32d..237d4ea5a416 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -561,7 +561,7 @@ rx_status_loop: if (cp_rx_csum_ok(status)) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb_put(skb, len); diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c index b9a591604e5b..41d9911202d0 100644 --- a/drivers/net/acenic.c +++ b/drivers/net/acenic.c @@ -2033,7 +2033,7 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm) skb->csum = htons(csum); skb->ip_summed = CHECKSUM_COMPLETE; } else { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } /* send it up */ diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c index 61f1634ab3f8..553230eb365c 100644 --- a/drivers/net/atl1c/atl1c_main.c +++ b/drivers/net/atl1c/atl1c_main.c @@ -1719,7 +1719,7 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter, * cannot figure out if the packet is fragmented or not, * so we tell the KERNEL CHECKSUM_NONE */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, const int ringid) diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c index 1ae44bb26317..56ace3fbe40d 100644 --- a/drivers/net/atl1e/atl1e_main.c +++ b/drivers/net/atl1e/atl1e_main.c @@ -1331,7 +1331,7 @@ static inline void atl1e_rx_checksum(struct atl1e_adapter *adapter, u16 pkt_flags; u16 err_flags; - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); pkt_flags = prrs->pkt_flag; err_flags = prrs->err_flag; if (((pkt_flags & RRS_IS_IPV4) || (pkt_flags & RRS_IS_IPV6)) && diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 5837b0184d4b..e1e0171d6e62 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -1805,7 +1805,7 @@ static void atl1_rx_checksum(struct atl1_adapter *adapter, * the higher layers and let it be sorted out there. */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (unlikely(rrd->pkt_flg & PACKET_FLAG_ERR)) { if (rrd->err_flg & (ERR_FLAG_CRC | ERR_FLAG_TRUNC | diff --git a/drivers/net/b44.c b/drivers/net/b44.c index 37617abc1647..7342308718a0 100644 --- a/drivers/net/b44.c +++ b/drivers/net/b44.c @@ -818,7 +818,7 @@ static int b44_rx(struct b44 *bp, int budget) copy_skb->data, len); skb = copy_skb; } - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, bp->dev); netif_receive_skb(skb); received++; diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c index 9db10fec8235..dcee7a20c0b9 100644 --- a/drivers/net/benet/be_main.c +++ b/drivers/net/benet/be_main.c @@ -1016,7 +1016,7 @@ static void be_rx_compl_process(struct be_adapter *adapter, skb_fill_rx_data(adapter, skb, rxcp, num_rcvd); if (do_pkt_csum(rxcp, adapter->rx_csum)) - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); else skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/drivers/net/bna/bnad.c b/drivers/net/bna/bnad.c index 79c4c2441449..44adc7aefddc 100644 --- a/drivers/net/bna/bnad.c +++ b/drivers/net/bna/bnad.c @@ -510,7 +510,7 @@ bnad_poll_cq(struct bnad *bnad, struct bna_ccb *ccb, int budget) (flags & BNA_CQ_EF_L4_CKSUM_OK))) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); rcb->rxq->rx_packets++; rcb->rxq->rx_bytes += skb->len; diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index a0e02aa42214..4ff76e38e788 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -3218,7 +3218,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) } - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (bp->rx_csum && (status & (L2_FHDR_STATUS_TCP_SEGMENT | L2_FHDR_STATUS_UDP_DATAGRAM))) { diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c index da96d1a18c20..0e4caf411905 100644 --- a/drivers/net/bnx2x/bnx2x_cmn.c +++ b/drivers/net/bnx2x/bnx2x_cmn.c @@ -623,7 +623,7 @@ reuse_rx: /* Set Toeplitz hash for a none-LRO skb */ bnx2x_set_skb_rxhash(bp, cqe, skb); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (bp->rx_csum) { if (likely(BNX2X_RX_CSUM_OK(cqe))) skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c index 28c88eeec757..32aaadc4734f 100644 --- a/drivers/net/cassini.c +++ b/drivers/net/cassini.c @@ -2149,7 +2149,7 @@ end_copy_pkt: skb->csum = csum_unfold(~csum); skb->ip_summed = CHECKSUM_COMPLETE; } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); return len; } diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index f01cfdb995de..1950b9a20ecd 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1388,7 +1388,7 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) ++st->rx_cso_good; skb->ip_summed = CHECKSUM_UNNECESSARY; } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (unlikely(adapter->vlan_grp && p->vlan_valid)) { st->vlan_xtract++; diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c index 5a5af1ca7541..fec939f8f65f 100644 --- a/drivers/net/cpmac.c +++ b/drivers/net/cpmac.c @@ -391,7 +391,7 @@ static struct sk_buff *cpmac_rx_one(struct cpmac_priv *priv, if (likely(skb)) { skb_put(desc->skb, desc->datalen); desc->skb->protocol = eth_type_trans(desc->skb, priv->dev); - desc->skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(desc->skb); priv->dev->stats.rx_packets++; priv->dev->stats.rx_bytes += desc->datalen; result = desc->skb; diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 8ff96c6f6de5..c5a142bea5e9 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -2022,7 +2022,7 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq, qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; skb->ip_summed = CHECKSUM_UNNECESSARY; } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb_record_rx_queue(skb, qs - &adap->sge.qs[0]); if (unlikely(p->vlan_valid)) { diff --git a/drivers/net/cxgb4/sge.c b/drivers/net/cxgb4/sge.c index 6ddb3bb0ce67..9967f3debce7 100644 --- a/drivers/net/cxgb4/sge.c +++ b/drivers/net/cxgb4/sge.c @@ -1605,7 +1605,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, rxq->stats.rx_cso++; } } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (unlikely(pkt->vlan_ex)) { struct vlan_group *grp = pi->vlan_grp; diff --git a/drivers/net/cxgb4vf/sge.c b/drivers/net/cxgb4vf/sge.c index e00fd9c36e8b..f10864ddafbe 100644 --- a/drivers/net/cxgb4vf/sge.c +++ b/drivers/net/cxgb4vf/sge.c @@ -1534,7 +1534,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, } rxq->stats.rx_cso++; } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (unlikely(pkt->vlan_ex)) { struct vlan_group *grp = pi->vlan_grp; diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c index 4fd6b2b4554b..9f6aeefa06bf 100644 --- a/drivers/net/dm9000.c +++ b/drivers/net/dm9000.c @@ -1056,7 +1056,7 @@ dm9000_rx(struct net_device *dev) if ((((rxbyte & 0x1c) << 3) & rxbyte) == 0) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } netif_rx(skb); dev->stats.rx_packets++; diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 3e8ac4baae1b..17f5867b5d9b 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -3552,7 +3552,8 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, struct e1000_hw *hw = &adapter->hw; u16 status = (u16)status_err; u8 errors = (u8)(status_err >> 24); - skb->ip_summed = CHECKSUM_NONE; + + skb_checksum_none_assert(skb); /* 82543 or newer only */ if (unlikely(hw->mac_type < e1000_82543)) return; diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 5f3eac6432cb..c9b66f4727e4 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -475,7 +475,8 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, { u16 status = (u16)status_err; u8 errors = (u8)(status_err >> 24); - skb->ip_summed = CHECKSUM_NONE; + + skb_checksum_none_assert(skb); /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index e6048d6ab0ea..f30adbf86bb2 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -2654,7 +2654,7 @@ static inline void gfar_rx_checksum(struct sk_buff *skb, struct rxfcb *fcb) if ((fcb->flags & RXFCB_CSUM_MASK) == (RXFCB_CIP | RXFCB_CTU)) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } diff --git a/drivers/net/greth.c b/drivers/net/greth.c index fbeaf70d1727..27d6960ce09e 100644 --- a/drivers/net/greth.c +++ b/drivers/net/greth.c @@ -893,7 +893,7 @@ static int greth_rx_gbit(struct net_device *dev, int limit) if (greth->flags & GRETH_FLAG_RX_CSUM && hw_checksummed(status)) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, dev); dev->stats.rx_packets++; diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c index 294ccfb427cf..0037a696cd0a 100644 --- a/drivers/net/ibmlana.c +++ b/drivers/net/ibmlana.c @@ -602,7 +602,7 @@ static void irqrx_handler(struct net_device *dev) /* set up skb fields */ skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* bookkeeping */ dev->stats.rx_packets++; diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index d35cc38bf8b2..c4d861b557ca 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -5455,7 +5455,7 @@ static void igb_receive_skb(struct igb_q_vector *q_vector, static inline void igb_rx_checksum_adv(struct igb_ring *ring, u32 status_err, struct sk_buff *skb) { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Ignore Checksum bit is set or checksum is disabled through ethtool */ if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) || diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c index c539f7c9c3e0..c7fab80d2490 100644 --- a/drivers/net/igbvf/netdev.c +++ b/drivers/net/igbvf/netdev.c @@ -103,7 +103,7 @@ static void igbvf_receive_skb(struct igbvf_adapter *adapter, static inline void igbvf_rx_checksum_adv(struct igbvf_adapter *adapter, u32 status_err, struct sk_buff *skb) { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Ignore Checksum bit is set or checksum is disabled through ethtool */ if ((status_err & E1000_RXD_STAT_IXSM) || diff --git a/drivers/net/ipg.c b/drivers/net/ipg.c index 72e3d2da9e9f..dc0198092343 100644 --- a/drivers/net/ipg.c +++ b/drivers/net/ipg.c @@ -1213,7 +1213,7 @@ static void ipg_nic_rx_with_start_and_end(struct net_device *dev, skb_put(skb, framelen); skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); netif_rx(skb); sp->rx_buff[entry] = NULL; } @@ -1278,7 +1278,7 @@ static void ipg_nic_rx_with_end(struct net_device *dev, jumbo->skb->protocol = eth_type_trans(jumbo->skb, dev); - jumbo->skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(jumbo->skb); netif_rx(jumbo->skb); } } @@ -1476,7 +1476,7 @@ static int ipg_nic_rx(struct net_device *dev) * IP/TCP/UDP frame was received. Let the * upper layer decide. */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Hand off frame for higher layer processing. * The function netif_rx() releases the sk_buff diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index ba1de5973fb2..8df645e78f2e 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -1524,7 +1524,7 @@ static void veth_receive(struct veth_lpar_connection *cnx, skb_put(skb, length); skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); netif_rx(skb); /* send it up */ dev->stats.rx_packets++; dev->stats.rx_bytes += length; diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 33c4ffe6e103..c2f6e71e1181 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -1905,7 +1905,7 @@ ixgb_rx_checksum(struct ixgb_adapter *adapter, */ if ((rx_desc->status & IXGB_RX_DESC_STATUS_IXSM) || (!(rx_desc->status & IXGB_RX_DESC_STATUS_TCPCS))) { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); return; } @@ -1913,7 +1913,7 @@ ixgb_rx_checksum(struct ixgb_adapter *adapter, /* now look at the TCP checksum error bit */ if (rx_desc->errors & IXGB_RX_DESC_ERRORS_TCPE) { /* let the stack verify checksum errors */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); adapter->hw_csum_rx_error++; } else { /* TCP checksum is good */ diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c index 86fa07cb061d..2f1de8b90f9e 100644 --- a/drivers/net/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ixgbe/ixgbe_fcoe.c @@ -304,12 +304,13 @@ int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter, if (!ixgbe_rx_is_fcoe(rx_desc)) goto ddp_out; - skb->ip_summed = CHECKSUM_UNNECESSARY; sterr = le32_to_cpu(rx_desc->wb.upper.status_error); fcerr = (sterr & IXGBE_RXDADV_ERR_FCERR); fceofe = (sterr & IXGBE_RXDADV_ERR_FCEOFE); if (fcerr == IXGBE_FCERR_BADCRC) - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); + else + skb->ip_summed = CHECKSUM_UNNECESSARY; if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)) fh = (struct fc_frame_header *)(skb->data + diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 5e4dc1b0a1bd..3aafe94741ba 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -980,7 +980,7 @@ static inline void ixgbe_rx_checksum(struct ixgbe_adapter *adapter, { u32 status_err = le32_to_cpu(rx_desc->wb.upper.status_error); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Rx csum disabled */ if (!(adapter->flags & IXGBE_FLAG_RX_CSUM_ENABLED)) diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c index 5d3c869283a5..bdbd26c60ae6 100644 --- a/drivers/net/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ixgbevf/ixgbevf_main.c @@ -356,7 +356,7 @@ static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector, static inline void ixgbevf_rx_checksum(struct ixgbevf_adapter *adapter, u32 status_err, struct sk_buff *skb) { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Rx csum disabled */ if (!(adapter->flags & IXGBE_FLAG_RX_CSUM_ENABLED)) diff --git a/drivers/net/jme.c b/drivers/net/jme.c index 99f24f5cac53..1fcd533b1a61 100644 --- a/drivers/net/jme.c +++ b/drivers/net/jme.c @@ -936,7 +936,7 @@ jme_alloc_and_feed_skb(struct jme_adapter *jme, int idx) if (jme_rxsum_ok(jme, le16_to_cpu(rxdesc->descwb.flags))) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (rxdesc->descwb.flags & cpu_to_le16(RXWBFLAG_TAGON)) { if (jme->vlgrp) { diff --git a/drivers/net/ll_temac_main.c b/drivers/net/ll_temac_main.c index bdf2149e5296..874ee01e8d9d 100644 --- a/drivers/net/ll_temac_main.c +++ b/drivers/net/ll_temac_main.c @@ -760,7 +760,7 @@ static void ll_temac_recv(struct net_device *ndev) skb_put(skb, length); skb->dev = ndev; skb->protocol = eth_type_trans(skb, ndev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* if we're doing rx csum offload, set it up */ if (((lp->temac_features & TEMAC_FEATURE_RX_CSUM) != 0) && diff --git a/drivers/net/macb.c b/drivers/net/macb.c index ff2f158ab0b9..4297f6e8c4bc 100644 --- a/drivers/net/macb.c +++ b/drivers/net/macb.c @@ -407,7 +407,7 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, } skb_reserve(skb, RX_OFFSET); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb_put(skb, len); for (frag = first_frag; ; frag = NEXT_RX(frag)) { diff --git a/drivers/net/niu.c b/drivers/net/niu.c index b4cc61f1fc59..1f89f472cbfb 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -3484,7 +3484,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np, RCR_ENTRY_ERROR))) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } else if (!(val & RCR_ENTRY_MULTI)) append_size = len - skb->len; diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index 447c2c43769a..4475ca97f031 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -923,7 +923,7 @@ static void rx_irq(struct net_device *ndev) if ((extsts & 0x002a0000) && !(extsts & 0x00540000)) { skb->ip_summed = CHECKSUM_UNNECESSARY; } else { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } skb->protocol = eth_type_trans(skb, ndev); #ifdef NS83820_VLAN_ACCEL_SUPPORT diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c index 8ab6ae0a6107..828e97cacdbf 100644 --- a/drivers/net/pasemi_mac.c +++ b/drivers/net/pasemi_mac.c @@ -808,7 +808,7 @@ static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx, skb->csum = (macrx & XCT_MACRX_CSUM_M) >> XCT_MACRX_CSUM_S; } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); packets++; tot_bytes += len; diff --git a/drivers/net/ps3_gelic_net.c b/drivers/net/ps3_gelic_net.c index 87d6b8f36304..5526ab4895e6 100644 --- a/drivers/net/ps3_gelic_net.c +++ b/drivers/net/ps3_gelic_net.c @@ -956,9 +956,9 @@ static void gelic_net_pass_skb_up(struct gelic_descr *descr, (!(data_error & GELIC_DESCR_DATA_ERROR_CHK_MASK))) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* update netdevice statistics */ netdev->stats.rx_packets++; diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c index 6168a130f33f..7496ed2c34ab 100644 --- a/drivers/net/qla3xxx.c +++ b/drivers/net/qla3xxx.c @@ -2029,7 +2029,7 @@ static void ql_process_mac_rx_intr(struct ql3_adapter *qdev, dma_unmap_len(lrg_buf_cb2, maplen), PCI_DMA_FROMDEVICE); prefetch(skb->data); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, qdev->ndev); netif_receive_skb(skb); @@ -2076,7 +2076,7 @@ static void ql_process_macip_rx_intr(struct ql3_adapter *qdev, PCI_DMA_FROMDEVICE); prefetch(skb2->data); - skb2->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb2); if (qdev->device_id == QL3022_DEVICE_ID) { /* * Copy the ethhdr from first buffer to second. This diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c index 8e47d7aea562..26a7d6bca5c7 100644 --- a/drivers/net/qlcnic/qlcnic_init.c +++ b/drivers/net/qlcnic/qlcnic_init.c @@ -1369,7 +1369,7 @@ static struct sk_buff *qlcnic_process_rxbuf(struct qlcnic_adapter *adapter, adapter->stats.csummed++; skb->ip_summed = CHECKSUM_UNNECESSARY; } else { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } skb->dev = adapter->netdev; diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c index 5a245211fc53..e2d0e108b9aa 100644 --- a/drivers/net/qlge/qlge_main.c +++ b/drivers/net/qlge/qlge_main.c @@ -1566,7 +1566,7 @@ static void ql_process_mac_rx_page(struct ql_adapter *qdev, rx_ring->rx_packets++; rx_ring->rx_bytes += skb->len; skb->protocol = eth_type_trans(skb, ndev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (qdev->rx_csum && !(ib_mac_rsp->flags1 & IB_MAC_CSUM_ERR_MASK)) { @@ -1676,7 +1676,7 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev, rx_ring->rx_packets++; rx_ring->rx_bytes += skb->len; skb->protocol = eth_type_trans(skb, ndev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* If rx checksum is on, and there are no * csum or frame errors. @@ -1996,7 +1996,7 @@ static void ql_process_mac_split_rx_intr(struct ql_adapter *qdev, } skb->protocol = eth_type_trans(skb, ndev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* If rx checksum is on, and there are no * csum or frame errors. diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 078bbf4e6f19..07b3fb5175e5 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -4460,7 +4460,7 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, struct RxDesc *desc) ((status == RxProtoIP) && !(opts1 & IPFail))) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } static inline bool rtl8169_try_rx_copy(struct sk_buff **sk_buff, diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 7061fc8e99c7..c70ad515383a 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -7603,10 +7603,10 @@ static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp) * Packet with erroneous checksum, let the * upper layers deal with it. */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); swstats->mem_freed += skb->truesize; send_up: diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c index 8e6bd45b9f31..d8249d7653c6 100644 --- a/drivers/net/sb1250-mac.c +++ b/drivers/net/sb1250-mac.c @@ -1170,7 +1170,7 @@ again: sb->ip_summed = CHECKSUM_UNNECESSARY; /* don't need to set sb->csum */ } else { - sb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(sb); } } prefetch(sb->data); diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c index 799c461ce7b8..acb372e841b2 100644 --- a/drivers/net/sfc/rx.c +++ b/drivers/net/sfc/rx.c @@ -615,7 +615,7 @@ void __efx_rx_packet(struct efx_channel *channel, EFX_BUG_ON_PARANOID(!skb); /* Set the SKB flags */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Pass the packet up */ netif_receive_skb(skb); diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c index a812efc3632e..50259dfec583 100644 --- a/drivers/net/sh_eth.c +++ b/drivers/net/sh_eth.c @@ -798,7 +798,7 @@ static int sh_eth_rx(struct net_device *ndev) skb->dev = ndev; sh_eth_set_receive_align(skb); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); rxdesc->addr = virt_to_phys(PTR_ALIGN(skb->data, 4)); } if (entry >= RX_RING_SIZE - 1) diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index 0909ae934ad0..13ddcd487200 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -1048,7 +1048,7 @@ static int smsc911x_poll(struct napi_struct *napi, int budget) smsc911x_rx_readfifo(pdata, (unsigned int *)skb->head, pktwords); skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); netif_receive_skb(skb); /* Update counters */ diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c index 1636a34d95dd..cb6bcca9d541 100644 --- a/drivers/net/spider_net.c +++ b/drivers/net/spider_net.c @@ -1000,9 +1000,9 @@ spider_net_pass_skb_up(struct spider_net_descr *descr, !(data_error & SPIDER_NET_DATA_ERR_CKSUM_MASK)) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (data_status & SPIDER_NET_VLAN_PACKET) { /* further enhancements: HW-accel VLAN diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c index e3f002eba89a..1ccea76d89ae 100644 --- a/drivers/net/stmmac/stmmac_main.c +++ b/drivers/net/stmmac/stmmac_main.c @@ -1256,7 +1256,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) if (unlikely(status == csum_none)) { /* always for the old mac 10/100 */ - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); netif_receive_skb(skb); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index 3128d6a8e9ce..8b3dc1eb4015 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -1297,12 +1297,13 @@ static int bdx_rx_receive(struct bdx_priv *priv, struct rxd_fifo *f, int budget) ndev->stats.rx_bytes += len; skb_put(skb, len); - skb->ip_summed = CHECKSUM_UNNECESSARY; skb->protocol = eth_type_trans(skb, ndev); /* Non-IP packets aren't checksum-offloaded */ if (GET_RXD_PKT_ID(rxd_val1) == 0) - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); + else + skb->ip_summed = CHECKSUM_UNNECESSARY; NETIF_RX_MUX(priv, rxd_val1, rxd_vlan, skb); diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index bc3af78a869f..9f6ffffc8376 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -4719,7 +4719,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) >> RXD_TCPCSUM_SHIFT) == 0xffff)) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, tp->dev); diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c index 3f4681f78262..5dfb39539b3e 100644 --- a/drivers/net/typhoon.c +++ b/drivers/net/typhoon.c @@ -1760,7 +1760,7 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile __le32 * read (TYPHOON_RX_IP_CHK_GOOD | TYPHOON_RX_UDP_CHK_GOOD)) { new_skb->ip_summed = CHECKSUM_UNNECESSARY; } else - new_skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(new_skb); spin_lock(&tp->state_lock); if(tp->vlgrp != NULL && rx->rxStatus & TYPHOON_RX_VLAN) diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index fd69095ef6e3..ed7f4f5c4062 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -1954,7 +1954,7 @@ static int velocity_tx_srv(struct velocity_info *vptr) */ static inline void velocity_rx_csum(struct rx_desc *rd, struct sk_buff *skb) { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); if (rd->rdesc1.CSM & CSM_IPKT) { if (rd->rdesc1.CSM & CSM_IPOK) { diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index abe0ff53daf3..198ce92af0c3 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1042,11 +1042,11 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, skb->csum = htons(gdesc->rcd.csum); skb->ip_summed = CHECKSUM_PARTIAL; } else { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } } } else { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); } } diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c index 01cdec712b64..5378b849f54f 100644 --- a/drivers/net/vxge/vxge-main.c +++ b/drivers/net/vxge/vxge-main.c @@ -501,7 +501,7 @@ vxge_rx_1b_compl(struct __vxge_hw_ring *ringh, void *dtr, ext_info.l4_cksum == VXGE_HW_L4_CKSUM_OK) skb->ip_summed = CHECKSUM_UNNECESSARY; else - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); vxge_rx_complete(ring, skb, ext_info.vlan, pkt_length, &ext_info); diff --git a/drivers/net/xilinx_emaclite.c b/drivers/net/xilinx_emaclite.c index 71122ee4e830..f3f8be5a35fa 100644 --- a/drivers/net/xilinx_emaclite.c +++ b/drivers/net/xilinx_emaclite.c @@ -641,7 +641,7 @@ static void xemaclite_rx_handler(struct net_device *dev) skb_put(skb, len); /* Tell the skb how much data we got */ skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); dev->stats.rx_packets++; dev->stats.rx_bytes += len; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f900ffcd847e..9e8085a89589 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2206,6 +2206,21 @@ static inline void skb_forward_csum(struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; } +/** + * skb_checksum_none_assert - make sure skb ip_summed is CHECKSUM_NONE + * @skb: skb to check + * + * fresh skbs have their ip_summed set to CHECKSUM_NONE. + * Instead of forcing ip_summed to CHECKSUM_NONE, we can + * use this helper, to document places where we make this assertion. + */ +static inline void skb_checksum_none_assert(struct sk_buff *skb) +{ +#ifdef DEBUG + BUG_ON(skb->ip_summed != CHECKSUM_NONE); +#endif +} + bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3 From ef5dc121d5a0bb1fa477c5395277259f07d318a3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 2 Sep 2010 15:48:16 -0700 Subject: mutex: Fix annotations to include it in kernel-locking docbook Fix kernel-doc notation in linux/mutex.h and kernel/mutex.c, then add these 2 files to the kernel-locking docbook as the Mutex API reference chapter. Add one API function to mutex-design.txt and correct a typo in that file. Signed-off-by: Randy Dunlap Cc: Rusty Russell LKML-Reference: <20100902154816.6cc2f9ad.randy.dunlap@oracle.com> Signed-off-by: Ingo Molnar --- Documentation/DocBook/kernel-locking.tmpl | 6 ++++++ Documentation/mutex-design.txt | 3 ++- include/linux/mutex.h | 8 ++++++++ kernel/mutex.c | 23 +++++++---------------- 4 files changed, 23 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 0b1a3f97f285..a0d479d1e1dd 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1961,6 +1961,12 @@ machines due to caching. + + Mutex API reference +!Iinclude/linux/mutex.h +!Ekernel/mutex.c + + Further reading diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt index c91ccc0720fa..38c10fd7f411 100644 --- a/Documentation/mutex-design.txt +++ b/Documentation/mutex-design.txt @@ -9,7 +9,7 @@ firstly, there's nothing wrong with semaphores. But if the simpler mutex semantics are sufficient for your code, then there are a couple of advantages of mutexes: - - 'struct mutex' is smaller on most architectures: .e.g on x86, + - 'struct mutex' is smaller on most architectures: E.g. on x86, 'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes. A smaller structure size means less RAM footprint, and better CPU-cache utilization. @@ -136,3 +136,4 @@ the APIs of 'struct mutex' have been streamlined: void mutex_lock_nested(struct mutex *lock, unsigned int subclass); int mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); + int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 878cab4f5fcc..f363bc8fdc74 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -78,6 +78,14 @@ struct mutex_waiter { # include #else # define __DEBUG_MUTEX_INITIALIZER(lockname) +/** + * mutex_init - initialize the mutex + * @mutex: the mutex to be initialized + * + * Initialize the mutex to unlocked state. + * + * It is not allowed to initialize an already locked mutex. + */ # define mutex_init(mutex) \ do { \ static struct lock_class_key __key; \ diff --git a/kernel/mutex.c b/kernel/mutex.c index 4c0b7b3e6d2e..200407c1502f 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -36,15 +36,6 @@ # include #endif -/*** - * mutex_init - initialize the mutex - * @lock: the mutex to be initialized - * @key: the lock_class_key for the class; used by mutex lock debugging - * - * Initialize the mutex to unlocked state. - * - * It is not allowed to initialize an already locked mutex. - */ void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { @@ -68,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init); static __used noinline void __sched __mutex_lock_slowpath(atomic_t *lock_count); -/*** +/** * mutex_lock - acquire the mutex * @lock: the mutex to be acquired * @@ -105,7 +96,7 @@ EXPORT_SYMBOL(mutex_lock); static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); -/*** +/** * mutex_unlock - release the mutex * @lock: the mutex to be released * @@ -364,8 +355,8 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count); static noinline int __sched __mutex_lock_interruptible_slowpath(atomic_t *lock_count); -/*** - * mutex_lock_interruptible - acquire the mutex, interruptable +/** + * mutex_lock_interruptible - acquire the mutex, interruptible * @lock: the mutex to be acquired * * Lock the mutex like mutex_lock(), and return 0 if the mutex has @@ -456,15 +447,15 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) return prev == 1; } -/*** - * mutex_trylock - try acquire the mutex, without waiting +/** + * mutex_trylock - try to acquire the mutex, without waiting * @lock: the mutex to be acquired * * Try to acquire the mutex atomically. Returns 1 if the mutex * has been acquired successfully, and 0 on contention. * * NOTE: this function follows the spin_trylock() convention, so - * it is negated to the down_trylock() return values! Be careful + * it is negated from the down_trylock() return values! Be careful * about this when converting semaphore users to mutexes. * * This function must not be used in interrupt context. The -- cgit v1.2.3 From 57a2ce5f54f3120467be760662c6ef3bea3f9579 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 3 Sep 2010 19:09:46 +0800 Subject: padata: add missing __percpu markup in include/linux/padata.h parallel_data->queue is a percpu pointer but was missing __percpu markup. Add it. Signed-off-by: Namhyung Kim Acked-by: Steffen Klassert Signed-off-by: Herbert Xu --- include/linux/padata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/padata.h b/include/linux/padata.h index bdcd1e9eacea..4633b2f726b6 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -127,8 +127,8 @@ struct padata_cpumask { */ struct parallel_data { struct padata_instance *pinst; - struct padata_parallel_queue *pqueue; - struct padata_serial_queue *squeue; + struct padata_parallel_queue __percpu *pqueue; + struct padata_serial_queue __percpu *squeue; atomic_t seq_nr; atomic_t reorder_objects; atomic_t refcnt; -- cgit v1.2.3 From 3fb5a991916091a908d53608a5899240039fb51e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 2 Sep 2010 15:42:43 +0000 Subject: cls_cgroup: Fix rcu lockdep warning Dave reported an rcu lockdep warning on 2.6.35.4 kernel task->cgroups and task->cgroups->subsys[i] are protected by RCU. So we avoid accessing invalid pointers here. This might happen, for example, when you are deref-ing those pointers while someone move @task from one cgroup to another. Reported-by: Dave Jones Signed-off-by: Li Zefan Signed-off-by: David S. Miller --- include/net/cls_cgroup.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 726cc3536409..ef6c24a529e1 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -27,11 +27,17 @@ struct cgroup_cls_state #ifdef CONFIG_NET_CLS_CGROUP static inline u32 task_cls_classid(struct task_struct *p) { + int classid; + if (in_interrupt()) return 0; - return container_of(task_subsys_state(p, net_cls_subsys_id), - struct cgroup_cls_state, css)->classid; + rcu_read_lock(); + classid = container_of(task_subsys_state(p, net_cls_subsys_id), + struct cgroup_cls_state, css)->classid; + rcu_read_unlock(); + + return classid; } #else extern int net_cls_subsys_id; -- cgit v1.2.3 From 71cad0554956de87c3fc413b1eac9313887eb14f Mon Sep 17 00:00:00 2001 From: Philippe Langlais Date: Tue, 31 Aug 2010 14:19:09 +0200 Subject: serial: fix port type conflict between NS16550A & U6_16550A Bug seen by Dr. David Alan Gilbert with sparse Signed-off-by: Philippe Langlais Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/serial.h | 3 +-- include/linux/serial_core.h | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/serial.h b/include/linux/serial.h index 1ebc694a6d52..ef914061511e 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -77,8 +77,7 @@ struct serial_struct { #define PORT_16654 11 #define PORT_16850 12 #define PORT_RSA 13 /* RSA-DV II/S card */ -#define PORT_U6_16550A 14 -#define PORT_MAX 14 +#define PORT_MAX 13 #define SERIAL_IO_PORT 0 #define SERIAL_IO_HUB6 1 diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 64458a9a8938..563e23400913 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -44,7 +44,8 @@ #define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ #define PORT_OCTEON 17 /* Cavium OCTEON internal UART */ #define PORT_AR7 18 /* Texas Instruments AR7 internal UART */ -#define PORT_MAX_8250 18 /* max port ID */ +#define PORT_U6_16550A 19 /* ST-Ericsson U6xxx internal UART */ +#define PORT_MAX_8250 19 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed -- cgit v1.2.3 From 29bc17ecb856ffb2b47c7009a71971c6f9334205 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sat, 4 Sep 2010 22:56:44 +0200 Subject: io-mapping: Fix the address space annotations Fixes a bunch of sparse warnings in io-mapping.h because of the inconsistent __iomem usage. Signed-off-by: Francisco Jerez LKML-Reference: <1283633804-11749-2-git-send-email-currojerez@riseup.net> Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/io-mapping.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 0a6b3d5c490c..7fb592793738 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -79,7 +79,7 @@ io_mapping_free(struct io_mapping *mapping) } /* Atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset, int slot) @@ -94,12 +94,12 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, } static inline void -io_mapping_unmap_atomic(void *vaddr, int slot) +io_mapping_unmap_atomic(void __iomem *vaddr, int slot) { iounmap_atomic(vaddr, slot); } -static inline void * +static inline void __iomem * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { resource_size_t phys_addr; @@ -111,7 +111,7 @@ io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) } static inline void -io_mapping_unmap(void *vaddr) +io_mapping_unmap(void __iomem *vaddr) { iounmap(vaddr); } @@ -125,38 +125,38 @@ struct io_mapping; static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { - return (struct io_mapping *) ioremap_wc(base, size); + return (struct io_mapping __force *) ioremap_wc(base, size); } static inline void io_mapping_free(struct io_mapping *mapping) { - iounmap(mapping); + iounmap((void __force __iomem *) mapping); } /* Atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset, int slot) { - return ((char *) mapping) + offset; + return ((char __force __iomem *) mapping) + offset; } static inline void -io_mapping_unmap_atomic(void *vaddr, int slot) +io_mapping_unmap_atomic(void __iomem *vaddr, int slot) { } /* Non-atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - return ((char *) mapping) + offset; + return ((char __force __iomem *) mapping) + offset; } static inline void -io_mapping_unmap(void *vaddr) +io_mapping_unmap(void __iomem *vaddr) { } -- cgit v1.2.3 From 73457f0f836956747e0394320be2163c050e96ef Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 6 Aug 2010 01:59:14 +0300 Subject: cgroups: fix API thinko cgroup_attach_task_current_cg API that have upstream is backwards: we really need an API to attach to the cgroups from another process A to the current one. In our case (vhost), a priveledged user wants to attach it's task to cgroups from a less priveledged one, the API makes us run it in the other task's context, and this fails. So let's make the API generic and just pass in 'from' and 'to' tasks. Add an inline wrapper for cgroup_attach_task_current_cg to avoid breaking bisect. Signed-off-by: Michael S. Tsirkin Acked-by: Li Zefan Acked-by: Paul Menage --- include/linux/cgroup.h | 11 ++++++++++- kernel/cgroup.c | 9 +++++---- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e41c6e..5a53d8f039a2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -578,7 +578,11 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); -int cgroup_attach_task_current_cg(struct task_struct *); +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); +static inline int cgroup_attach_task_current_cg(struct task_struct *tsk) +{ + return cgroup_attach_task_all(current, tsk); +} /* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works @@ -636,6 +640,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats, } /* No cgroups - nothing to do */ +static inline int cgroup_attach_task_all(struct task_struct *from, + struct task_struct *t) +{ + return 0; +} static inline int cgroup_attach_task_current_cg(struct task_struct *t) { return 0; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a8ce09954404..9d90c08f3bde 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1789,10 +1789,11 @@ out: } /** - * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup + * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' + * @from: attach to all cgroups of a given task * @tsk: the task to be attached */ -int cgroup_attach_task_current_cg(struct task_struct *tsk) +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) { struct cgroupfs_root *root; struct cgroup *cur_cg; @@ -1800,7 +1801,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk) cgroup_lock(); for_each_active_root(root) { - cur_cg = task_cgroup_from_root(current, root); + cur_cg = task_cgroup_from_root(from, root); retval = cgroup_attach_task(cur_cg, tsk); if (retval) break; @@ -1809,7 +1810,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk) return retval; } -EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg); +EXPORT_SYMBOL_GPL(cgroup_attach_task_all); /* * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex -- cgit v1.2.3 From b8ef3204f460912a46659cdc74d237adbe705053 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 6 Aug 2010 03:02:37 -0500 Subject: [SCSI] fc class: add fc host default default dev loss setting This patch adds a fc_host setting to store the default dev_loss_tmo. It is used if the driver has a callack to get the value from the LLD. If the callback is not set, then we use the fc class module default value. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_fc.c | 6 +++++- include/scsi/scsi_transport_fc.h | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index d7e470a06180..9f0f7d9c7422 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -2525,7 +2525,11 @@ fc_rport_create(struct Scsi_Host *shost, int channel, rport->maxframe_size = -1; rport->supported_classes = FC_COS_UNSPECIFIED; - rport->dev_loss_tmo = fc_dev_loss_tmo; + if (fci->f->get_host_def_dev_loss_tmo) { + fci->f->get_host_def_dev_loss_tmo(shost); + rport->dev_loss_tmo = fc_host_def_dev_loss_tmo(shost); + } else + rport->dev_loss_tmo = fc_dev_loss_tmo; memcpy(&rport->node_name, &ids->node_name, sizeof(rport->node_name)); memcpy(&rport->port_name, &ids->port_name, sizeof(rport->port_name)); rport->port_id = ids->port_id; diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index 87d81b3ce564..9f98fca9b763 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -496,6 +496,7 @@ struct fc_host_attrs { u64 fabric_name; char symbolic_name[FC_SYMBOLIC_NAME_SIZE]; char system_hostname[FC_SYMBOLIC_NAME_SIZE]; + u32 def_dev_loss_tmo; /* Private (Transport-managed) Attributes */ enum fc_tgtid_binding_type tgtid_bind_type; @@ -580,6 +581,8 @@ struct fc_host_attrs { (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q_name) #define fc_host_devloss_work_q(x) \ (((struct fc_host_attrs *)(x)->shost_data)->devloss_work_q) +#define fc_host_def_dev_loss_tmo(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->def_dev_loss_tmo) struct fc_bsg_buffer { @@ -640,6 +643,7 @@ struct fc_function_template { void (*get_host_fabric_name)(struct Scsi_Host *); void (*get_host_symbolic_name)(struct Scsi_Host *); void (*set_host_system_hostname)(struct Scsi_Host *); + void (*get_host_def_dev_loss_tmo)(struct Scsi_Host *); struct fc_host_statistics * (*get_fc_host_stats)(struct Scsi_Host *); void (*reset_fc_host_stats)(struct Scsi_Host *); -- cgit v1.2.3 From 144c0f8833d0458e4369a27a53aea8856c665c41 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 3 Sep 2010 10:31:05 -0700 Subject: Input: fix a few typos Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 6 +++--- include/linux/input.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/input/input.c b/drivers/input/input.c index ab6982056518..acb3c8095c65 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -171,7 +171,7 @@ static int input_handle_abs_event(struct input_dev *dev, if (code == ABS_MT_SLOT) { /* * "Stage" the event; we'll flush it later, when we - * get actiual touch data. + * get actual touch data. */ if (*pval >= 0 && *pval < dev->mtsize) dev->slot = *pval; @@ -188,7 +188,7 @@ static int input_handle_abs_event(struct input_dev *dev, pold = &mtslot->abs[code - ABS_MT_FIRST]; } else { /* - * Bypass filtering for multitouch events when + * Bypass filtering for multi-touch events when * not employing slots. */ pold = NULL; @@ -1601,7 +1601,7 @@ EXPORT_SYMBOL(input_free_device); * * This function allocates all necessary memory for MT slot handling in the * input device, and adds ABS_MT_SLOT to the device capabilities. All slots - * are initially marked as unused iby setting ABS_MT_TRACKING_ID to -1. + * are initially marked as unused by setting ABS_MT_TRACKING_ID to -1. */ int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots) { diff --git a/include/linux/input.h b/include/linux/input.h index 896a92227bc4..789265123531 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -67,7 +67,7 @@ struct input_absinfo { #define EVIOCGPHYS(len) _IOC(_IOC_READ, 'E', 0x07, len) /* get physical location */ #define EVIOCGUNIQ(len) _IOC(_IOC_READ, 'E', 0x08, len) /* get unique identifier */ -#define EVIOCGKEY(len) _IOC(_IOC_READ, 'E', 0x18, len) /* get global keystate */ +#define EVIOCGKEY(len) _IOC(_IOC_READ, 'E', 0x18, len) /* get global key state */ #define EVIOCGLED(len) _IOC(_IOC_READ, 'E', 0x19, len) /* get all LEDs */ #define EVIOCGSND(len) _IOC(_IOC_READ, 'E', 0x1a, len) /* get all sounds status */ #define EVIOCGSW(len) _IOC(_IOC_READ, 'E', 0x1b, len) /* get all switch states */ -- cgit v1.2.3 From fe8e0c25cad28e8858ecfa5863333c70685a6811 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Mon, 6 Sep 2010 20:53:42 +0200 Subject: x86, 32-bit: Align percpu area and irq stacks to THREAD_SIZE The irq stacks, located in the percpu-area, need to be THREAD_SIZE aligned. Add the infrastucture to align percpu variables to larger-than-pagesize amounts within the percpu area, and use it to specify the alignment for the irq stacks. Also align the percpu area itself to THREAD_SIZE. This should make irq stacks work with 8K THREAD_SIZE. Signed-off-by: Alexander van Heukelum Cc: Tejun Heo Cc: hch@lst.de LKML-Reference: <1283799222.15941.1393621887@webmail.messagingengine.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_32.c | 4 ++-- arch/x86/kernel/vmlinux.lds.S | 2 +- include/linux/percpu-defs.h | 12 ++++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 3b5609f54c4b..50fbbe60e507 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -60,8 +60,8 @@ union irq_ctx { static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); -static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack); -static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack); +static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, hardirq_stack, THREAD_SIZE); +static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, softirq_stack, THREAD_SIZE); static void call_on_stack(void *func, void *stack) { diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index d0bb52296fa3..bb899475355d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -273,7 +273,7 @@ SECTIONS } #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) - PERCPU(PAGE_SIZE) + PERCPU(THREAD_SIZE) #endif . = ALIGN(PAGE_SIZE); diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index ce2dc655cd1d..ab20d119a85d 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -138,6 +138,18 @@ DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ __aligned(PAGE_SIZE) +/* + * Declaration/definition used for large per-CPU variables that must be + * aligned to something larger than the pagesize. + */ +#define DECLARE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size) \ + DECLARE_PER_CPU_SECTION(type, name, "..page_aligned") \ + __aligned(size) + +#define DEFINE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size) \ + DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ + __aligned(size) + /* * Intermodule exports for per-CPU variables. sparse forgets about * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to -- cgit v1.2.3 From 831853c87fb7234a8650484d30993242ea9ad6d3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 6 Sep 2010 16:08:56 +0100 Subject: ALSA: Add more jack button slots Some devices have more flexible microphone detection and can detect a wider range of buttons. Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai --- include/sound/jack.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/jack.h b/include/sound/jack.h index d90b9fa32707..c140fc7cbd3f 100644 --- a/include/sound/jack.h +++ b/include/sound/jack.h @@ -47,6 +47,9 @@ enum snd_jack_types { SND_JACK_BTN_0 = 0x4000, SND_JACK_BTN_1 = 0x2000, SND_JACK_BTN_2 = 0x1000, + SND_JACK_BTN_3 = 0x0800, + SND_JACK_BTN_4 = 0x0400, + SND_JACK_BTN_5 = 0x0200, }; struct snd_jack { @@ -55,7 +58,7 @@ struct snd_jack { int type; const char *id; char name[100]; - unsigned int key[3]; /* Keep in sync with definitions above */ + unsigned int key[6]; /* Keep in sync with definitions above */ void *private_data; void (*private_free)(struct snd_jack *); }; -- cgit v1.2.3 From f8f235e5bbf4e61f3e0886a44afb1dc4cfe8f337 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 27 Aug 2010 11:08:57 +0800 Subject: agp/intel: Fix cache control for Sandybridge Sandybridge GTT has new cache control bits in PTE, which controls graphics page cache in LLC or LLC/MLC, so we need to extend the mask function to respect the new bits. And set cache control to always LLC only by default on Gen6. Signed-off-by: Zhenyu Wang Cc: stable@kernel.org Signed-off-by: Chris Wilson --- drivers/char/agp/intel-agp.c | 1 + drivers/char/agp/intel-gtt.c | 50 ++++++++++++++++++++++++++++++++--------- drivers/gpu/drm/i915/i915_gem.c | 1 + include/linux/intel-gtt.h | 20 +++++++++++++++++ 4 files changed, 62 insertions(+), 10 deletions(-) create mode 100644 include/linux/intel-gtt.h (limited to 'include') diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 710af89b176d..74461d177baf 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -12,6 +12,7 @@ #include #include "agp.h" #include "intel-agp.h" +#include #include "intel-gtt.c" diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 7f35854d33a3..64b10551a3f8 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -49,6 +49,26 @@ static struct gatt_mask intel_i810_masks[] = .type = INTEL_AGP_CACHED_MEMORY} }; +#define INTEL_AGP_UNCACHED_MEMORY 0 +#define INTEL_AGP_CACHED_MEMORY_LLC 1 +#define INTEL_AGP_CACHED_MEMORY_LLC_GFDT 2 +#define INTEL_AGP_CACHED_MEMORY_LLC_MLC 3 +#define INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT 4 + +static struct gatt_mask intel_gen6_masks[] = +{ + {.mask = I810_PTE_VALID | GEN6_PTE_UNCACHED, + .type = INTEL_AGP_UNCACHED_MEMORY }, + {.mask = I810_PTE_VALID | GEN6_PTE_LLC, + .type = INTEL_AGP_CACHED_MEMORY_LLC }, + {.mask = I810_PTE_VALID | GEN6_PTE_LLC | GEN6_PTE_GFDT, + .type = INTEL_AGP_CACHED_MEMORY_LLC_GFDT }, + {.mask = I810_PTE_VALID | GEN6_PTE_LLC_MLC, + .type = INTEL_AGP_CACHED_MEMORY_LLC_MLC }, + {.mask = I810_PTE_VALID | GEN6_PTE_LLC_MLC | GEN6_PTE_GFDT, + .type = INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT }, +}; + static struct _intel_private { struct pci_dev *pcidev; /* device one */ u8 __iomem *registers; @@ -178,13 +198,6 @@ static void intel_agp_insert_sg_entries(struct agp_memory *mem, off_t pg_start, int mask_type) { int i, j; - u32 cache_bits = 0; - - if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB || - agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB) - { - cache_bits = GEN6_PTE_LLC_MLC; - } for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { writel(agp_bridge->driver->mask_memory(agp_bridge, @@ -317,6 +330,23 @@ static int intel_i830_type_to_mask_type(struct agp_bridge_data *bridge, return 0; } +static int intel_gen6_type_to_mask_type(struct agp_bridge_data *bridge, + int type) +{ + unsigned int type_mask = type & ~AGP_USER_CACHED_MEMORY_GFDT; + unsigned int gfdt = type & AGP_USER_CACHED_MEMORY_GFDT; + + if (type_mask == AGP_USER_UNCACHED_MEMORY) + return INTEL_AGP_UNCACHED_MEMORY; + else if (type_mask == AGP_USER_CACHED_MEMORY_LLC_MLC) + return gfdt ? INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT : + INTEL_AGP_CACHED_MEMORY_LLC_MLC; + else /* set 'normal'/'cached' to LLC by default */ + return gfdt ? INTEL_AGP_CACHED_MEMORY_LLC_GFDT : + INTEL_AGP_CACHED_MEMORY_LLC; +} + + static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start, int type) { @@ -1163,7 +1193,7 @@ static int intel_i915_insert_entries(struct agp_memory *mem, off_t pg_start, mask_type = agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type); - if (mask_type != 0 && mask_type != AGP_PHYS_MEMORY && + if (!IS_SNB && mask_type != 0 && mask_type != AGP_PHYS_MEMORY && mask_type != INTEL_AGP_CACHED_MEMORY) goto out_err; @@ -1563,7 +1593,7 @@ static const struct agp_bridge_driver intel_gen6_driver = { .fetch_size = intel_i9xx_fetch_size, .cleanup = intel_i915_cleanup, .mask_memory = intel_gen6_mask_memory, - .masks = intel_i810_masks, + .masks = intel_gen6_masks, .agp_enable = intel_i810_agp_enable, .cache_flush = global_cache_flush, .create_gatt_table = intel_i965_create_gatt_table, @@ -1576,7 +1606,7 @@ static const struct agp_bridge_driver intel_gen6_driver = { .agp_alloc_pages = agp_generic_alloc_pages, .agp_destroy_page = agp_generic_destroy_page, .agp_destroy_pages = agp_generic_destroy_pages, - .agp_type_to_mask_type = intel_i830_type_to_mask_type, + .agp_type_to_mask_type = intel_gen6_type_to_mask_type, .chipset_flush = intel_i915_chipset_flush, #ifdef USE_PCI_DMA_API .agp_map_page = intel_agp_map_page, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 748c26340c35..16fca1d1799a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -34,6 +34,7 @@ #include #include #include +#include static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj); static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); diff --git a/include/linux/intel-gtt.h b/include/linux/intel-gtt.h new file mode 100644 index 000000000000..1d19ab2afa39 --- /dev/null +++ b/include/linux/intel-gtt.h @@ -0,0 +1,20 @@ +/* + * Common Intel AGPGART and GTT definitions. + */ +#ifndef _INTEL_GTT_H +#define _INTEL_GTT_H + +#include + +/* This is for Intel only GTT controls. + * + * Sandybridge: AGP_USER_CACHED_MEMORY default to LLC only + */ + +#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2) +#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4) + +/* flag for GFDT type */ +#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3) + +#endif -- cgit v1.2.3 From 2bf2160d8805de64308e2e7c3cd97813cb58ed2f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 23 Aug 2010 18:42:48 +0900 Subject: irq: Add tracepoint to softirq_raise Add a tracepoint for tracing when softirq action is raised. This and the existing tracepoints complete softirq's tracepoints: softirq_raise, softirq_entry and softirq_exit. And when this tracepoint is used in combination with the softirq_entry tracepoint we can determine the softirq raise latency. Signed-off-by: Lai Jiangshan Acked-by: Mathieu Desnoyers Acked-by: Neil Horman Cc: David Miller Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C724298.4050509@jp.fujitsu.com> [ factorize softirq events with DECLARE_EVENT_CLASS ] Signed-off-by: Koki Sanagi Signed-off-by: Frederic Weisbecker --- include/linux/interrupt.h | 8 +++++++- include/trace/events/irq.h | 26 ++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a0384a4d1e6f..531495db1708 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -407,7 +408,12 @@ asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) +static inline void __raise_softirq_irqoff(unsigned int nr) +{ + trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL); + or_softirq_pending(1UL << nr); +} + extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); extern void wakeup_softirqd(void); diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 0e4cfb694fe7..6fa7cbab7d93 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -5,7 +5,9 @@ #define _TRACE_IRQ_H #include -#include + +struct irqaction; +struct softirq_action; #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } #define show_softirq_name(val) \ @@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq, ), TP_fast_assign( - __entry->vec = (int)(h - vec); + if (vec) + __entry->vec = (int)(h - vec); + else + __entry->vec = (int)(long)h; ), TP_printk("vec=%d [action=%s]", __entry->vec, @@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit, TP_ARGS(h, vec) ); +/** + * softirq_raise - called immediately when a softirq is raised + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter contains a pointer to the softirq vector number which is + * raised. @vec is NULL and it means @h includes vector number not + * softirq_action. When used in combination with the softirq_entry tracepoint + * we can determine the softirq raise latency. + */ +DEFINE_EVENT(softirq, softirq_raise, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec) +); + #endif /* _TRACE_IRQ_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 3e4b10d7a4d2a78af64f8096dc7cdb3bebd65adb Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 23 Aug 2010 18:43:51 +0900 Subject: napi: Convert trace_napi_poll to TRACE_EVENT This patch converts trace_napi_poll from DECLARE_EVENT to TRACE_EVENT to improve the usability of napi_poll tracepoint. -0 [001] 241302.750777: napi_poll: napi poll on napi struct f6acc480 for device eth3 -0 [000] 241302.852389: napi_poll: napi poll on napi struct f5d0d70c for device eth1 The original patch is below: http://marc.info/?l=linux-kernel&m=126021713809450&w=2 [ sanagi.koki@jp.fujitsu.com: And add a fix by Steven Rostedt: http://marc.info/?l=linux-kernel&m=126150506519173&w=2 ] Signed-off-by: Neil Horman Acked-by: David S. Miller Acked-by: Neil Horman Cc: Mathieu Desnoyers Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C7242D7.4050009@jp.fujitsu.com> Signed-off-by: Koki Sanagi Signed-off-by: Frederic Weisbecker --- include/trace/events/napi.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index 188deca2f3c7..8fe1e93f531d 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -6,10 +6,31 @@ #include #include +#include + +#define NO_DEV "(no_device)" + +TRACE_EVENT(napi_poll, -DECLARE_TRACE(napi_poll, TP_PROTO(struct napi_struct *napi), - TP_ARGS(napi)); + + TP_ARGS(napi), + + TP_STRUCT__entry( + __field( struct napi_struct *, napi) + __string( dev_name, napi->dev ? napi->dev->name : NO_DEV) + ), + + TP_fast_assign( + __entry->napi = napi; + __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV); + ), + + TP_printk("napi poll on napi struct %p for device %s", + __entry->napi, __get_str(dev_name)) +); + +#undef NO_DEV #endif /* _TRACE_NAPI_H_ */ -- cgit v1.2.3 From cf66ba58b5cb8b1526e9dd2fb96ff8db048d4d44 Mon Sep 17 00:00:00 2001 From: Koki Sanagi Date: Mon, 23 Aug 2010 18:45:02 +0900 Subject: netdev: Add tracepoints to netdev layer This patch adds tracepoint to dev_queue_xmit, dev_hard_start_xmit, netif_rx and netif_receive_skb. These tracepoints help you to monitor network driver's input/output. -0 [001] 112447.902030: netif_rx: dev=eth1 skbaddr=f3ef0900 len=84 -0 [001] 112447.902039: netif_receive_skb: dev=eth1 skbaddr=f3ef0900 len=84 sshd-6828 [000] 112447.903257: net_dev_queue: dev=eth4 skbaddr=f3fca538 len=226 sshd-6828 [000] 112447.903260: net_dev_xmit: dev=eth4 skbaddr=f3fca538 len=226 rc=0 Signed-off-by: Koki Sanagi Acked-by: David S. Miller Acked-by: Neil Horman Cc: Mathieu Desnoyers Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C72431E.3000901@jp.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/trace/events/net.h | 82 ++++++++++++++++++++++++++++++++++++++++++++++ net/core/dev.c | 6 ++++ net/core/net-traces.c | 1 + 3 files changed, 89 insertions(+) create mode 100644 include/trace/events/net.h (limited to 'include') diff --git a/include/trace/events/net.h b/include/trace/events/net.h new file mode 100644 index 000000000000..5f247f5ffc56 --- /dev/null +++ b/include/trace/events/net.h @@ -0,0 +1,82 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM net + +#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NET_H + +#include +#include +#include +#include + +TRACE_EVENT(net_dev_xmit, + + TP_PROTO(struct sk_buff *skb, + int rc), + + TP_ARGS(skb, rc), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned int, len ) + __field( int, rc ) + __string( name, skb->dev->name ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = skb->len; + __entry->rc = rc; + __assign_str(name, skb->dev->name); + ), + + TP_printk("dev=%s skbaddr=%p len=%u rc=%d", + __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) +); + +DECLARE_EVENT_CLASS(net_dev_template, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned int, len ) + __string( name, skb->dev->name ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = skb->len; + __assign_str(name, skb->dev->name); + ), + + TP_printk("dev=%s skbaddr=%p len=%u", + __get_str(name), __entry->skbaddr, __entry->len) +) + +DEFINE_EVENT(net_dev_template, net_dev_queue, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_template, netif_receive_skb, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_template, netif_rx, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); +#endif /* _TRACE_NET_H */ + +/* This part must be outside protection */ +#include diff --git a/net/core/dev.c b/net/core/dev.c index 3721fbb9a83c..5a4fbc7405e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -128,6 +128,7 @@ #include #include #include +#include #include #include "net-sysfs.h" @@ -1978,6 +1979,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } rc = ops->ndo_start_xmit(skb, dev); + trace_net_dev_xmit(skb, rc); if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc; @@ -1998,6 +2000,7 @@ gso: skb_dst_drop(nskb); rc = ops->ndo_start_xmit(nskb, dev); + trace_net_dev_xmit(nskb, rc); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) goto out_kfree_gso_skb; @@ -2186,6 +2189,7 @@ int dev_queue_xmit(struct sk_buff *skb) #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); #endif + trace_net_dev_queue(skb); if (q->enqueue) { rc = __dev_xmit_skb(skb, q, dev, txq); goto out; @@ -2512,6 +2516,7 @@ int netif_rx(struct sk_buff *skb) if (netdev_tstamp_prequeue) net_timestamp_check(skb); + trace_netif_rx(skb); #ifdef CONFIG_RPS { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -2828,6 +2833,7 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!netdev_tstamp_prequeue) net_timestamp_check(skb); + trace_netif_receive_skb(skb); if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) return NET_RX_SUCCESS; diff --git a/net/core/net-traces.c b/net/core/net-traces.c index afa6380ed88a..7f1bb2aba03b 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -26,6 +26,7 @@ #define CREATE_TRACE_POINTS #include +#include #include EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); -- cgit v1.2.3 From 07dc22e7295f25526f110d704655ff0ea7687420 Mon Sep 17 00:00:00 2001 From: Koki Sanagi Date: Mon, 23 Aug 2010 18:46:12 +0900 Subject: skb: Add tracepoints to freeing skb This patch adds tracepoint to consume_skb and add trace_kfree_skb before __kfree_skb in skb_free_datagram_locked and net_tx_action. Combinating with tracepoint on dev_hard_start_xmit, we can check how long it takes to free transmitted packets. And using it, we can calculate how many packets driver had at that time. It is useful when a drop of transmitted packet is a problem. sshd-6828 [000] 112689.258154: consume_skb: skbaddr=f2d99bb8 Signed-off-by: Koki Sanagi Acked-by: David S. Miller Acked-by: Neil Horman Cc: Mathieu Desnoyers Cc: Kaneshige Kenji Cc: Izumo Taku Cc: Kosaki Motohiro Cc: Lai Jiangshan Cc: Scott Mcmillan Cc: Steven Rostedt Cc: Eric Dumazet LKML-Reference: <4C724364.50903@jp.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/trace/events/skb.h | 17 +++++++++++++++++ net/core/datagram.c | 1 + net/core/dev.c | 2 ++ net/core/skbuff.c | 1 + 4 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 4b2be6dc76f0..75ce9d500d8e 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb, __entry->skbaddr, __entry->protocol, __entry->location) ); +TRACE_EVENT(consume_skb, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + ), + + TP_printk("skbaddr=%p", __entry->skbaddr) +); + TRACE_EVENT(skb_copy_datagram_iovec, TP_PROTO(const struct sk_buff *skb, int len), diff --git a/net/core/datagram.c b/net/core/datagram.c index 251997a95483..282806ba7a57 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -243,6 +243,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ + trace_kfree_skb(skb, skb_free_datagram_locked); __kfree_skb(skb); } EXPORT_SYMBOL(skb_free_datagram_locked); diff --git a/net/core/dev.c b/net/core/dev.c index 5a4fbc7405e2..2308cce48048 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -129,6 +129,7 @@ #include #include #include +#include #include #include "net-sysfs.h" @@ -2576,6 +2577,7 @@ static void net_tx_action(struct softirq_action *h) clist = clist->next; WARN_ON(atomic_read(&skb->users)); + trace_kfree_skb(skb, net_tx_action); __kfree_skb(skb); } } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3a2513f0d0c3..12e61e351d0e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -466,6 +466,7 @@ void consume_skb(struct sk_buff *skb) smp_rmb(); else if (likely(!atomic_dec_and_test(&skb->users))) return; + trace_consume_skb(skb); __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); -- cgit v1.2.3 From 269cddd44e3588d1c50a7ec055b78de4d6c72cb6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 7 Sep 2010 14:17:10 -0500 Subject: dlm: Fix dlm lock status block comment in dlm.h There is only one place in the dlm where the sb_status is set and that is queue_cast(). Tracing back the callers of that function shows that the listed set of return values is out of date, so here are an updated set. Signed-off-by: Steven Whitehouse Signed-off-by: David Teigland --- include/linux/dlm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index 0b3518c42356..d4e02f5353a0 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -48,10 +48,10 @@ typedef void dlm_lockspace_t; * * 0 if lock request was successful * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE - * -ENOMEM if there is no memory to process request - * -EINVAL if there are invalid parameters * -DLM_EUNLOCK if unlock request was successful * -DLM_ECANCEL if a cancel completed successfully + * -EDEADLK if a deadlock was detected + * -ETIMEDOUT if the lock request was canceled due to a timeout */ #define DLM_SBF_DEMOTED 0x01 -- cgit v1.2.3 From 17cebf658e088935d4bdebfc7ad9800e9fc4a0b2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 16:55:22 +1000 Subject: sunrpc: extract some common sunrpc_cache code from nfsd Rather can duplicating this idiom twice, put it in an inline function. This reduces the usage of 'expiry_time' out side the sunrpc/cache.c code and thus the impact of a change that is about to be made to that field. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 9 +++------ include/linux/sunrpc/cache.h | 6 ++++++ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c2a4f71d87dd..e56827b88fd2 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -935,10 +935,9 @@ static void exp_fsid_unhash(struct svc_export *exp) ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); if (!IS_ERR(ek)) { - ek->h.expiry_time = get_seconds()-1; + sunrpc_invalidate(&ek->h, &svc_expkey_cache); cache_put(&ek->h, &svc_expkey_cache); } - svc_expkey_cache.nextcheck = get_seconds(); } static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) @@ -973,10 +972,9 @@ static void exp_unhash(struct svc_export *exp) ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); if (!IS_ERR(ek)) { - ek->h.expiry_time = get_seconds()-1; + sunrpc_invalidate(&ek->h, &svc_expkey_cache); cache_put(&ek->h, &svc_expkey_cache); } - svc_expkey_cache.nextcheck = get_seconds(); } /* @@ -1097,8 +1095,7 @@ out: static void exp_do_unexport(svc_export *unexp) { - unexp->h.expiry_time = get_seconds()-1; - svc_export_cache.nextcheck = get_seconds(); + sunrpc_invalidate(&unexp->h, &svc_export_cache); exp_unhash(unexp); exp_fsid_unhash(unexp); } diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 7bf3e84b92f4..0e1febf4e5bc 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -228,4 +228,10 @@ static inline time_t get_expiry(char **bpp) return rv; } +static inline void sunrpc_invalidate(struct cache_head *h, + struct cache_detail *detail) +{ + h->expiry_time = get_seconds() - 1; + detail->nextcheck = get_seconds(); +} #endif /* _LINUX_SUNRPC_CACHE_H_ */ -- cgit v1.2.3 From c5b29f885afe890f953f7f23424045cdad31d3e4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 16:55:22 +1000 Subject: sunrpc: use seconds since boot in expiry cache This protects us from confusion when the wallclock time changes. We convert to and from wallclock when setting or reading expiry times. Also use seconds since boot for last_clost time. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfs/dns_resolve.c | 6 +++--- fs/nfsd/nfs4idmap.c | 2 +- include/linux/sunrpc/cache.h | 28 +++++++++++++++++++++++++--- net/sunrpc/cache.c | 36 +++++++++++++++++++----------------- 4 files changed, 48 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index dba50a5625db..a6e711ad130f 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd, return 0; } item = container_of(h, struct nfs_dns_ent, h); - ttl = (long)item->h.expiry_time - (long)get_seconds(); + ttl = item->h.expiry_time - seconds_since_boot(); if (ttl < 0) ttl = 0; @@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) ttl = get_expiry(&buf); if (ttl == 0) goto out; - key.h.expiry_time = ttl + get_seconds(); + key.h.expiry_time = ttl + seconds_since_boot(); ret = -ENOMEM; item = nfs_dns_lookup(cd, &key); @@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd, goto out_err; ret = -ETIMEDOUT; if (!test_bit(CACHE_VALID, &(*item)->h.flags) - || (*item)->h.expiry_time < get_seconds() + || (*item)->h.expiry_time < seconds_since_boot() || cd->flush_time > (*item)->h.last_refresh) goto out_put; ret = -ENOENT; diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index c78dbf493424..808b33a4a090 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -550,7 +550,7 @@ do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *), goto out_err; ret = -ETIMEDOUT; if (!test_bit(CACHE_VALID, &(*item)->h.flags) - || (*item)->h.expiry_time < get_seconds() + || (*item)->h.expiry_time < seconds_since_boot() || detail->flush_time > (*item)->h.last_refresh) goto out_put; ret = -ENOENT; diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 0e1febf4e5bc..ece432b7f87f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -218,20 +218,42 @@ static inline int get_int(char **bpp, int *anint) return 0; } +/* + * timestamps kept in the cache are expressed in seconds + * since boot. This is the best for measuring differences in + * real time. + */ +static inline time_t seconds_since_boot(void) +{ + struct timespec boot; + getboottime(&boot); + return get_seconds() - boot.tv_sec; +} + +static inline time_t convert_to_wallclock(time_t sinceboot) +{ + struct timespec boot; + getboottime(&boot); + return boot.tv_sec + sinceboot; +} + static inline time_t get_expiry(char **bpp) { int rv; + struct timespec boot; + if (get_int(bpp, &rv)) return 0; if (rv < 0) return 0; - return rv; + getboottime(&boot); + return rv - boot.tv_sec; } static inline void sunrpc_invalidate(struct cache_head *h, struct cache_detail *detail) { - h->expiry_time = get_seconds() - 1; - detail->nextcheck = get_seconds(); + h->expiry_time = seconds_since_boot() - 1; + detail->nextcheck = seconds_since_boot(); } #endif /* _LINUX_SUNRPC_CACHE_H_ */ diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2b06410e584e..8dc121955fdc 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -42,7 +42,7 @@ static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) { - time_t now = get_seconds(); + time_t now = seconds_since_boot(); h->next = NULL; h->flags = 0; kref_init(&h->ref); @@ -52,7 +52,7 @@ static void cache_init(struct cache_head *h) static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) { - return (h->expiry_time < get_seconds()) || + return (h->expiry_time < seconds_since_boot()) || (detail->flush_time > h->last_refresh); } @@ -127,7 +127,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); static void cache_fresh_locked(struct cache_head *head, time_t expiry) { head->expiry_time = expiry; - head->last_refresh = get_seconds(); + head->last_refresh = seconds_since_boot(); set_bit(CACHE_VALID, &head->flags); } @@ -238,7 +238,7 @@ int cache_check(struct cache_detail *detail, /* now see if we want to start an upcall */ refresh_age = (h->expiry_time - h->last_refresh); - age = get_seconds() - h->last_refresh; + age = seconds_since_boot() - h->last_refresh; if (rqstp == NULL) { if (rv == -EAGAIN) @@ -253,7 +253,7 @@ int cache_check(struct cache_detail *detail, cache_revisit_request(h); if (rv == -EAGAIN) { set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY); + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); cache_fresh_unlocked(h, detail); rv = -ENOENT; } @@ -388,11 +388,11 @@ static int cache_clean(void) return -1; } current_detail = list_entry(next, struct cache_detail, others); - if (current_detail->nextcheck > get_seconds()) + if (current_detail->nextcheck > seconds_since_boot()) current_index = current_detail->hash_size; else { current_index = 0; - current_detail->nextcheck = get_seconds()+30*60; + current_detail->nextcheck = seconds_since_boot()+30*60; } } @@ -477,7 +477,7 @@ EXPORT_SYMBOL_GPL(cache_flush); void cache_purge(struct cache_detail *detail) { detail->flush_time = LONG_MAX; - detail->nextcheck = get_seconds(); + detail->nextcheck = seconds_since_boot(); cache_flush(); detail->flush_time = 1; } @@ -902,7 +902,7 @@ static int cache_release(struct inode *inode, struct file *filp, filp->private_data = NULL; kfree(rp); - cd->last_close = get_seconds(); + cd->last_close = seconds_since_boot(); atomic_dec(&cd->readers); } module_put(cd->owner); @@ -1034,7 +1034,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, int len; if (atomic_read(&detail->readers) == 0 && - detail->last_close < get_seconds() - 30) { + detail->last_close < seconds_since_boot() - 30) { warn_no_listener(detail); return -EINVAL; } @@ -1219,7 +1219,8 @@ static int c_show(struct seq_file *m, void *p) ifdebug(CACHE) seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", - cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags); + convert_to_wallclock(cp->expiry_time), + atomic_read(&cp->ref.refcount), cp->flags); cache_get(cp); if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ @@ -1285,7 +1286,7 @@ static ssize_t read_flush(struct file *file, char __user *buf, unsigned long p = *ppos; size_t len; - sprintf(tbuf, "%lu\n", cd->flush_time); + sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time)); len = strlen(tbuf); if (p >= len) return 0; @@ -1303,19 +1304,20 @@ static ssize_t write_flush(struct file *file, const char __user *buf, struct cache_detail *cd) { char tbuf[20]; - char *ep; - long flushtime; + char *bp, *ep; + if (*ppos || count > sizeof(tbuf)-1) return -EINVAL; if (copy_from_user(tbuf, buf, count)) return -EFAULT; tbuf[count] = 0; - flushtime = simple_strtoul(tbuf, &ep, 0); + simple_strtoul(tbuf, &ep, 0); if (*ep && *ep != '\n') return -EINVAL; - cd->flush_time = flushtime; - cd->nextcheck = get_seconds(); + bp = tbuf; + cd->flush_time = get_expiry(&bp); + cd->nextcheck = seconds_since_boot(); cache_flush(); *ppos += count; -- cgit v1.2.3 From f16b6e8d838b2e2bb4561201311c66ac02ad67df Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Aug 2010 17:04:06 +1000 Subject: sunrpc/cache: allow threads to block while waiting for cache update. The current practice of waiting for cache updates by queueing the whole request to be retried has (at least) two problems. 1/ With NFSv4, requests can be quite complex and re-trying a whole request when a later part fails should only be a last-resort, not a normal practice. 2/ Large requests, and in particular any 'write' request, will not be queued by the current code and doing so would be undesirable. In many cases only a very sort wait is needed before the cache gets valid data. So, providing the underlying transport permits it by setting ->thread_wait, arrange to wait briefly for an upcall to be completed (as reflected in the clearing of CACHE_PENDING). If the short wait was not long enough and CACHE_PENDING is still set, fall back on the old approach. The 'thread_wait' value is set to 5 seconds when there are spare threads, and 1 second when there are no spare threads. These values are probably much higher than needed, but will ensure some forward progress. Note that as we only request an update for a non-valid item, and as non-valid items are updated in place it is extremely unlikely that cache_check will return -ETIMEDOUT. Normally cache_defer_req will sleep for a short while and then find that the item is_valid. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 3 +++ net/sunrpc/cache.c | 59 +++++++++++++++++++++++++++++++++++++++++++- net/sunrpc/svc_xprt.c | 11 +++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index ece432b7f87f..52a7d7224e90 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -125,6 +125,9 @@ struct cache_detail { */ struct cache_req { struct cache_deferred_req *(*defer)(struct cache_req *req); + int thread_wait; /* How long (jiffies) we can block the + * current thread to wait for updates. + */ }; /* this must be embedded in a deferred_request that is being * delayed awaiting cache-fill diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 8dc121955fdc..2c5297f245b4 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -509,10 +509,22 @@ static LIST_HEAD(cache_defer_list); static struct list_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; +struct thread_deferred_req { + struct cache_deferred_req handle; + struct completion completion; +}; +static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) +{ + struct thread_deferred_req *dr = + container_of(dreq, struct thread_deferred_req, handle); + complete(&dr->completion); +} + static int cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq, *discard; int hash = DFR_HASH(item); + struct thread_deferred_req sleeper; if (cache_defer_cnt >= DFR_MAX) { /* too much in the cache, randomly drop this one, @@ -521,7 +533,15 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) if (net_random()&1) return -ENOMEM; } - dreq = req->defer(req); + if (req->thread_wait) { + dreq = &sleeper.handle; + sleeper.completion = + COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); + dreq->revisit = cache_restart_thread; + } else + dreq = req->defer(req); + + retry: if (dreq == NULL) return -ENOMEM; @@ -555,6 +575,43 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item) cache_revisit_request(item); return -EAGAIN; } + + if (dreq == &sleeper.handle) { + if (wait_for_completion_interruptible_timeout( + &sleeper.completion, req->thread_wait) <= 0) { + /* The completion wasn't completed, so we need + * to clean up + */ + spin_lock(&cache_defer_lock); + if (!list_empty(&sleeper.handle.hash)) { + list_del_init(&sleeper.handle.recent); + list_del_init(&sleeper.handle.hash); + cache_defer_cnt--; + spin_unlock(&cache_defer_lock); + } else { + /* cache_revisit_request already removed + * this from the hash table, but hasn't + * called ->revisit yet. It will very soon + * and we need to wait for it. + */ + spin_unlock(&cache_defer_lock); + wait_for_completion(&sleeper.completion); + } + } + if (test_bit(CACHE_PENDING, &item->flags)) { + /* item is still pending, try request + * deferral + */ + dreq = req->defer(req); + goto retry; + } + /* only return success if we actually deferred the + * request. In this case we waited until it was + * answered so no deferral has happened - rather + * an answer already exists. + */ + return -EEXIST; + } return 0; } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index cbc084939dd8..8ff6840866fa 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -651,6 +651,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (signalled() || kthread_should_stop()) return -EINTR; + /* Normally we will wait up to 5 seconds for any required + * cache information to be provided. + */ + rqstp->rq_chandle.thread_wait = 5*HZ; + spin_lock_bh(&pool->sp_lock); xprt = svc_xprt_dequeue(pool); if (xprt) { @@ -658,6 +663,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) svc_xprt_get(xprt); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); + + /* As there is a shortage of threads and this request + * had to be queue, don't allow the thread to wait so + * long for cache updates. + */ + rqstp->rq_chandle.thread_wait = 1*HZ; } else { /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); -- cgit v1.2.3 From 4f8b02b4e5c6896e073bed736136d420bd44b627 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:47 +0200 Subject: vmalloc: pcpu_get/free_vm_areas() aren't needed on UP These functions are used only by percpu memory allocator on SMP. Don't build them on UP. Signed-off-by: Tejun Heo Cc: Nick Piggin Reviewed-by: Chrsitoph Lameter --- include/linux/vmalloc.h | 2 ++ mm/vmalloc.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 01c2145118dc..63a4fe6d51bd 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); +#ifdef CONFIG_SMP struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align, gfp_t gfp_mask); void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); +#endif #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b8889da69a6..c623e0ce3f00 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2056,6 +2056,7 @@ void free_vm_area(struct vm_struct *area) } EXPORT_SYMBOL_GPL(free_vm_area); +#ifdef CONFIG_SMP static struct vmap_area *node_to_va(struct rb_node *n) { return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; @@ -2336,6 +2337,7 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) free_vm_area(vms[i]); kfree(vms); } +#endif /* CONFIG_SMP */ #ifdef CONFIG_PROC_FS static void *s_start(struct seq_file *m, loff_t *pos) -- cgit v1.2.3 From 6abad5acac09921f4944af77d3860f82d49f528d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:47 +0200 Subject: percpu: reduce PCPU_MIN_UNIT_SIZE to 32k In preparation of enabling percpu allocator for UP, reduce PCPU_MIN_UNIT_SIZE to 32k. On UP, the first chunk doesn't have to include static percpu variables and chunk size can be smaller which is important as UP percpu allocator will use contiguous kernel memory to populate chunks. PCPU_MIN_UNIT_SIZE also determines the maximum supported allocation size but 32k should still be enough. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 49466b13c5c6..fc8130a7cac0 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -42,7 +42,7 @@ #ifdef CONFIG_SMP /* minimum unit size, also is the maximum supported allocation size */ -#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) +#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) /* * Percpu allocator can serve percpu allocations before slab is -- cgit v1.2.3 From bbddff0545878a8649c091a9dd7c43ce91516734 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:48 +0200 Subject: percpu: use percpu allocator on UP too On UP, percpu allocations were redirected to kmalloc. This has the following problems. * For certain amount of allocations (determined by PERCPU_DYNAMIC_EARLY_SLOTS and PERCPU_DYNAMIC_EARLY_SIZE), percpu allocator can be used before the usual kernel memory allocator is brought online. On SMP, this is used to initialize the kernel memory allocator. * percpu allocator honors alignment upto PAGE_SIZE but kmalloc() doesn't. For example, workqueue makes use of larger alignments for cpu_workqueues. Currently, users of percpu allocators need to handle UP differently, which is somewhat fragile and ugly. Other than small amount of memory, there isn't much to lose by enabling percpu allocator on UP. It can simply use kernel memory based chunk allocation which was added for SMP archs w/o MMUs. This patch removes mm/percpu_up.c, builds mm/percpu.c on UP too and makes UP build use percpu-km. As percpu addresses and kernel addresses are always identity mapped and static percpu variables don't need any special treatment, nothing is arch dependent and mm/percpu.c implements generic setup_per_cpu_areas() for UP. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Acked-by: Pekka Enberg --- include/linux/percpu.h | 29 +++++------------------- mm/Kconfig | 8 +++++++ mm/Makefile | 7 +----- mm/percpu-km.c | 2 +- mm/percpu.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++---- mm/percpu_up.c | 30 ------------------------- 6 files changed, 71 insertions(+), 65 deletions(-) delete mode 100644 mm/percpu_up.c (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index fc8130a7cac0..aeeeef1093cd 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -39,8 +39,6 @@ preempt_enable(); \ } while (0) -#ifdef CONFIG_SMP - /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) @@ -137,37 +135,20 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, * dynamically allocated. Non-atomic access to the current CPU's * version should probably be combined with get_cpu()/put_cpu(). */ +#ifdef CONFIG_SMP #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +#else +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) +#endif extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); extern bool is_kernel_percpu_address(unsigned long addr); -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) extern void __init setup_per_cpu_areas(void); #endif extern void __init percpu_init_late(void); -#else /* CONFIG_SMP */ - -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) - -/* can't distinguish from other static vars, always false */ -static inline bool is_kernel_percpu_address(unsigned long addr) -{ - return false; -} - -static inline void __init setup_per_cpu_areas(void) { } - -static inline void __init percpu_init_late(void) { } - -static inline void *pcpu_lpage_remapped(void *kaddr) -{ - return NULL; -} - -#endif /* CONFIG_SMP */ - extern void __percpu *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void __percpu *__pdata); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); diff --git a/mm/Kconfig b/mm/Kconfig index f4e516e9c37c..01a57447a410 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -301,3 +301,11 @@ config NOMMU_INITIAL_TRIM_EXCESS of 1 says that all excess pages should be trimmed. See Documentation/nommu-mmap.txt for more information. + +# +# UP and nommu archs use km based percpu allocator +# +config NEED_PER_CPU_KM + depends on !SMP + bool + default y diff --git a/mm/Makefile b/mm/Makefile index 34b2546a9e37..f73f75a29f82 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ - page_isolation.o mm_init.o mmu_context.o \ + page_isolation.o mm_init.o mmu_context.o percpu.o \ $(mmu-y) obj-y += init-mm.o @@ -36,11 +36,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o -ifdef CONFIG_SMP -obj-y += percpu.o -else -obj-y += percpu_up.o -endif obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o diff --git a/mm/percpu-km.c b/mm/percpu-km.c index df680855540a..7037bc73bfa4 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -27,7 +27,7 @@ * chunk size is not aligned. percpu-km code will whine about it. */ -#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK +#if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) #error "contiguous percpu allocation is incompatible with paged first chunk" #endif diff --git a/mm/percpu.c b/mm/percpu.c index 58c572b18b07..fa70122dfdd0 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -76,6 +76,7 @@ #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ +#ifdef CONFIG_SMP /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ #ifndef __addr_to_pcpu_ptr #define __addr_to_pcpu_ptr(addr) \ @@ -89,6 +90,11 @@ (unsigned long)pcpu_base_addr - \ (unsigned long)__per_cpu_start) #endif +#else /* CONFIG_SMP */ +/* on UP, it's always identity mapped */ +#define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr) +#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr) +#endif /* CONFIG_SMP */ struct pcpu_chunk { struct list_head list; /* linked to pcpu_slot lists */ @@ -949,6 +955,7 @@ EXPORT_SYMBOL_GPL(free_percpu); */ bool is_kernel_percpu_address(unsigned long addr) { +#ifdef CONFIG_SMP const size_t static_size = __per_cpu_end - __per_cpu_start; void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); unsigned int cpu; @@ -959,6 +966,8 @@ bool is_kernel_percpu_address(unsigned long addr) if ((void *)addr >= start && (void *)addr < start + static_size) return true; } +#endif + /* on UP, can't distinguish from other static vars, always false */ return false; } @@ -1066,6 +1075,8 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) free_bootmem(__pa(ai), ai->__ai_size); } +#if defined(CONFIG_SMP) && (defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ + defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)) /** * pcpu_build_alloc_info - build alloc_info considering distances between CPUs * @reserved_size: the size of reserved percpu area in bytes @@ -1220,6 +1231,8 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( return ai; } +#endif /* CONFIG_SMP && (CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || + CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) */ /** * pcpu_dump_alloc_info - print out information about pcpu_alloc_info @@ -1363,7 +1376,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, /* sanity checks */ PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); +#ifdef CONFIG_SMP PCPU_SETUP_BUG_ON(!ai->static_size); +#endif PCPU_SETUP_BUG_ON(!base_addr); PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); @@ -1488,6 +1503,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, return 0; } +#ifdef CONFIG_SMP + const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { [PCPU_FC_AUTO] = "auto", [PCPU_FC_EMBED] = "embed", @@ -1758,8 +1775,9 @@ out_free_ar: } #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ +#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA /* - * Generic percpu area setup. + * Generic SMP percpu area setup. * * The embedding helper is used because its behavior closely resembles * the original non-dynamic generic percpu area setup. This is @@ -1770,7 +1788,6 @@ out_free_ar: * on the physical linear memory mapping which uses large page * mappings on applicable archs. */ -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); @@ -1799,13 +1816,48 @@ void __init setup_per_cpu_areas(void) PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); if (rc < 0) - panic("Failed to initialized percpu areas."); + panic("Failed to initialize percpu areas."); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; } -#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ +#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ + +#else /* CONFIG_SMP */ + +/* + * UP percpu area setup. + * + * UP always uses km-based percpu allocator with identity mapping. + * Static percpu variables are indistinguishable from the usual static + * variables and don't require any special preparation. + */ +void __init setup_per_cpu_areas(void) +{ + const size_t unit_size = + roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE, + PERCPU_DYNAMIC_RESERVE)); + struct pcpu_alloc_info *ai; + void *fc; + + ai = pcpu_alloc_alloc_info(1, 1); + fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + if (!ai || !fc) + panic("Failed to allocate memory for percpu areas."); + + ai->dyn_size = unit_size; + ai->unit_size = unit_size; + ai->atom_size = unit_size; + ai->alloc_size = unit_size; + ai->groups[0].nr_units = 1; + ai->groups[0].cpu_map[0] = 0; + + if (pcpu_setup_first_chunk(ai, fc) < 0) + panic("Failed to initialize percpu areas."); +} + +#endif /* CONFIG_SMP */ /* * First and reserved chunks are initialized with temporary allocation diff --git a/mm/percpu_up.c b/mm/percpu_up.c deleted file mode 100644 index db884fae5721..000000000000 --- a/mm/percpu_up.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * mm/percpu_up.c - dummy percpu memory allocator implementation for UP - */ - -#include -#include -#include - -void __percpu *__alloc_percpu(size_t size, size_t align) -{ - /* - * Can't easily make larger alignment work with kmalloc. WARN - * on it. Larger alignment should only be used for module - * percpu sections on SMP for which this path isn't used. - */ - WARN_ON_ONCE(align > SMP_CACHE_BYTES); - return (void __percpu __force *)kzalloc(size, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(__alloc_percpu); - -void free_percpu(void __percpu *p) -{ - kfree(this_cpu_ptr(p)); -} -EXPORT_SYMBOL_GPL(free_percpu); - -phys_addr_t per_cpu_ptr_to_phys(void *addr) -{ - return __pa(addr); -} -- cgit v1.2.3 From febc88c5948f81114f64c3412011d695aecae233 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Sep 2010 14:46:37 +0200 Subject: semaphore: Add DEFINE_SEMAPHORE The full cleanup of init_MUTEX[_LOCKED] and DECLARE_MUTEX has not been done. Some of the users are real semaphores and we should name them as such instead of confusing everyone with "MUTEX". Provide the infrastructure to get finally rid of init_MUTEX[_LOCKED] and DECLARE_MUTEX. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Christoph Hellwig LKML-Reference: <20100907125054.795929962@linutronix.de> --- include/linux/semaphore.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 7415839ac890..5310d27abd2a 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -26,6 +26,9 @@ struct semaphore { .wait_list = LIST_HEAD_INIT((name).wait_list), \ } +#define DEFINE_SEMAPHORE(name) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) + #define DECLARE_MUTEX(name) \ struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) -- cgit v1.2.3 From e3e55ff5854655d8723ad8b307f02515aecc3df5 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 7 Sep 2010 15:52:06 +0800 Subject: spi/dw_spi: clean the cs_control code commit 052dc7c45i "spi/dw_spi: conditional transfer mode change" introduced cs_control code, which has a bug by using bit offset for spi mode to set transfer mode in control register. Also it forces devices who don't need cs_control to re-configure the control registers for each spi transfer. This patch will fix them Signed-off-by: Feng Tang Signed-off-by: Grant Likely --- drivers/spi/dw_spi.c | 17 +++++------------ include/linux/spi/dw_spi.h | 2 ++ 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index 11fbbf6fb07b..56247853c298 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -181,10 +181,6 @@ static void flush(struct dw_spi *dws) wait_till_not_busy(dws); } -static void null_cs_control(u32 command) -{ -} - static int null_writer(struct dw_spi *dws) { u8 n_bytes = dws->n_bytes; @@ -322,7 +318,7 @@ static void giveback(struct dw_spi *dws) struct spi_transfer, transfer_list); - if (!last_transfer->cs_change) + if (!last_transfer->cs_change && dws->cs_control) dws->cs_control(MRST_SPI_DEASSERT); msg->state = NULL; @@ -549,13 +545,13 @@ static void pump_transfers(unsigned long data) */ if (dws->cs_control) { if (dws->rx && dws->tx) - chip->tmode = 0x00; + chip->tmode = SPI_TMOD_TR; else if (dws->rx) - chip->tmode = 0x02; + chip->tmode = SPI_TMOD_RO; else - chip->tmode = 0x01; + chip->tmode = SPI_TMOD_TO; - cr0 &= ~(0x3 << SPI_MODE_OFFSET); + cr0 &= ~SPI_TMOD_MASK; cr0 |= (chip->tmode << SPI_TMOD_OFFSET); } @@ -704,9 +700,6 @@ static int dw_spi_setup(struct spi_device *spi) chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL); if (!chip) return -ENOMEM; - - chip->cs_control = null_cs_control; - chip->enable_dma = 0; } /* diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h index cc813f95a2f2..c91302f3a257 100644 --- a/include/linux/spi/dw_spi.h +++ b/include/linux/spi/dw_spi.h @@ -14,7 +14,9 @@ #define SPI_MODE_OFFSET 6 #define SPI_SCPH_OFFSET 6 #define SPI_SCOL_OFFSET 7 + #define SPI_TMOD_OFFSET 8 +#define SPI_TMOD_MASK (0x3 << SPI_TMOD_OFFSET) #define SPI_TMOD_TR 0x0 /* xmit & recv */ #define SPI_TMOD_TO 0x1 /* xmit only */ #define SPI_TMOD_RO 0x2 /* recv only */ -- cgit v1.2.3 From 6523ce1525e88c598c75a1a6b8c4edddfa9defe8 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 5 Sep 2010 18:02:29 +0000 Subject: ipvs: fix active FTP - Do not create expectation when forwarding the PORT command to avoid blocking the connection. The problem is that nf_conntrack_ftp.c:help() tries to create the same expectation later in POST_ROUTING and drops the packet with "dropping packet" message after failure in nf_ct_expect_related. - Change ip_vs_update_conntrack to alter the conntrack for related connections from real server. If we do not alter the reply in this direction the next packet from client sent to vport 20 comes as NEW connection. We alter it but may be some collision happens for both conntracks and the second conntrack gets destroyed immediately. The connection stucks too. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip_vs.h | 3 +++ net/netfilter/ipvs/ip_vs_core.c | 1 + net/netfilter/ipvs/ip_vs_ftp.c | 6 ------ net/netfilter/ipvs/ip_vs_xmit.c | 18 ++++++++++++------ 4 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a4747a0f7303..f976885f686f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -955,6 +955,9 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, + int outin); + #endif /* __KERNEL__ */ #endif /* _NET_IP_VS_H */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f8ddba48011..4c2f89df5cce 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -924,6 +924,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + ip_vs_update_conntrack(skb, cp, 0); ip_vs_conn_put(cp); skb->ipvs_property = 1; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 33b329bfc2d2..7e9af5b76d9e 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -410,7 +410,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; - struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -497,11 +496,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_control_add(n_cp, cp); } - ct = (struct nf_conn *)skb->nfct; - if (ct && ct != &nf_conntrack_untracked) - ip_vs_expect_related(skb, ct, n_cp, - IPPROTO_TCP, &n_cp->dport, 1); - /* * Move tunnel to listen state */ diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 21e1a5e9b9d3..49df6bea6a2d 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -349,8 +349,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } #endif -static void -ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) +void +ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) { struct nf_conn *ct = (struct nf_conn *)skb->nfct; struct nf_conntrack_tuple new_tuple; @@ -365,11 +365,17 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) * real-server we will see RIP->DIP. */ new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - new_tuple.src.u3 = cp->daddr; + if (outin) + new_tuple.src.u3 = cp->daddr; + else + new_tuple.dst.u3 = cp->vaddr; /* * This will also take care of UDP and other protocols. */ - new_tuple.src.u.tcp.port = cp->dport; + if (outin) + new_tuple.src.u.tcp.port = cp->dport; + else + new_tuple.dst.u.tcp.port = cp->vport; nf_conntrack_alter_reply(ct, &new_tuple); } @@ -428,7 +434,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp); + ip_vs_update_conntrack(skb, cp, 1); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still @@ -506,7 +512,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp); + ip_vs_update_conntrack(skb, cp, 1); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still -- cgit v1.2.3 From 0ade638655f0ef4d807295c14a4c97544bd6b9ca Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 24 Aug 2010 22:18:41 +0200 Subject: intel-gtt: introduce drm/intel-gtt.h Add a few definitions to it that are already shared and that will be shared in the future (like the number of stolen entries). No functional changes in here. Signed-off-by: Daniel Vetter Signed-off-by: Chris Wilson --- drivers/char/agp/intel-gtt.c | 42 +++++++++++++++++++---------------------- drivers/gpu/drm/i915/i915_dma.c | 2 -- drivers/gpu/drm/i915/i915_drv.h | 1 + include/drm/intel-gtt.h | 18 ++++++++++++++++++ 4 files changed, 38 insertions(+), 25 deletions(-) create mode 100644 include/drm/intel-gtt.h (limited to 'include') diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 72f937615056..0a3e91ba0f2b 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -25,6 +25,7 @@ #include "agp.h" #include "intel-agp.h" #include +#include /* * If we have Intel graphics, we're not going to have anything other than @@ -81,17 +82,11 @@ static struct gatt_mask intel_gen6_masks[] = }; static struct _intel_private { + struct intel_gtt base; struct pci_dev *pcidev; /* device one */ u8 __iomem *registers; u32 __iomem *gtt; /* I915G */ int num_dcache_entries; - /* gtt_entries is the number of gtt entries that are already mapped - * to stolen memory. Stolen memory is larger than the memory mapped - * through gtt_entries, as it includes some reserved space for the BIOS - * popup and for the GTT. - */ - int gtt_entries; /* i830+ */ - int gtt_total_size; union { void __iomem *i9xx_flush_page; void *i8xx_flush_page; @@ -772,7 +767,7 @@ static void intel_i830_init_gtt_entries(void) gtt_entries = 0; } - intel_private.gtt_entries = gtt_entries; + intel_private.base.gtt_stolen_entries = gtt_entries; } static void intel_i830_fini_flush(void) @@ -849,7 +844,7 @@ static int intel_i830_create_gatt_table(struct agp_bridge_data *bridge) /* we have to call this as early as possible after the MMIO base address is known */ intel_i830_init_gtt_entries(); - if (intel_private.gtt_entries == 0) { + if (intel_private.base.gtt_stolen_entries == 0) { iounmap(intel_private.registers); return -ENOMEM; } @@ -919,7 +914,7 @@ static int intel_i830_configure(void) readl(intel_private.registers+I810_PGETBL_CTL); /* PCI Posting. */ if (agp_bridge->driver->needs_scratch_page) { - for (i = intel_private.gtt_entries; i < current_size->num_entries; i++) { + for (i = intel_private.base.gtt_stolen_entries; i < current_size->num_entries; i++) { writel(agp_bridge->scratch_page, intel_private.registers+I810_PTE_BASE+(i*4)); } readl(intel_private.registers+I810_PTE_BASE+((i-1)*4)); /* PCI Posting. */ @@ -950,10 +945,10 @@ static int intel_i830_insert_entries(struct agp_memory *mem, off_t pg_start, temp = agp_bridge->current_size; num_entries = A_SIZE_FIX(temp)->num_entries; - if (pg_start < intel_private.gtt_entries) { + if (pg_start < intel_private.base.gtt_stolen_entries) { dev_printk(KERN_DEBUG, &intel_private.pcidev->dev, - "pg_start == 0x%.8lx, intel_private.gtt_entries == 0x%.8x\n", - pg_start, intel_private.gtt_entries); + "pg_start == 0x%.8lx, gtt_stolen_entries == 0x%.8x\n", + pg_start, intel_private.base.gtt_stolen_entries); dev_info(&intel_private.pcidev->dev, "trying to insert into local/stolen memory\n"); @@ -1001,7 +996,7 @@ static int intel_i830_remove_entries(struct agp_memory *mem, off_t pg_start, if (mem->page_count == 0) return 0; - if (pg_start < intel_private.gtt_entries) { + if (pg_start < intel_private.base.gtt_stolen_entries) { dev_info(&intel_private.pcidev->dev, "trying to disable local/stolen memory\n"); return -EINVAL; @@ -1136,7 +1131,8 @@ static int intel_i9xx_configure(void) readl(intel_private.registers+I810_PGETBL_CTL); /* PCI Posting. */ if (agp_bridge->driver->needs_scratch_page) { - for (i = intel_private.gtt_entries; i < intel_private.gtt_total_size; i++) { + for (i = intel_private.base.gtt_stolen_entries; i < + intel_private.base.gtt_total_entries; i++) { writel(agp_bridge->scratch_page, intel_private.gtt+i); } readl(intel_private.gtt+i-1); /* PCI Posting. */ @@ -1181,10 +1177,10 @@ static int intel_i915_insert_entries(struct agp_memory *mem, off_t pg_start, temp = agp_bridge->current_size; num_entries = A_SIZE_FIX(temp)->num_entries; - if (pg_start < intel_private.gtt_entries) { + if (pg_start < intel_private.base.gtt_stolen_entries) { dev_printk(KERN_DEBUG, &intel_private.pcidev->dev, - "pg_start == 0x%.8lx, intel_private.gtt_entries == 0x%.8x\n", - pg_start, intel_private.gtt_entries); + "pg_start == 0x%.8lx, gtt_stolen_entries == 0x%.8x\n", + pg_start, intel_private.base.gtt_stolen_entries); dev_info(&intel_private.pcidev->dev, "trying to insert into local/stolen memory\n"); @@ -1227,7 +1223,7 @@ static int intel_i915_remove_entries(struct agp_memory *mem, off_t pg_start, if (mem->page_count == 0) return 0; - if (pg_start < intel_private.gtt_entries) { + if (pg_start < intel_private.base.gtt_stolen_entries) { dev_info(&intel_private.pcidev->dev, "trying to disable local/stolen memory\n"); return -EINVAL; @@ -1323,7 +1319,7 @@ static int intel_i915_create_gatt_table(struct agp_bridge_data *bridge) if (!intel_private.gtt) return -ENOMEM; - intel_private.gtt_total_size = gtt_map_size / 4; + intel_private.base.gtt_total_entries = gtt_map_size / 4; temp &= 0xfff80000; @@ -1338,7 +1334,7 @@ static int intel_i915_create_gatt_table(struct agp_bridge_data *bridge) /* we have to call this as early as possible after the MMIO base address is known */ intel_i830_init_gtt_entries(); - if (intel_private.gtt_entries == 0) { + if (intel_private.base.gtt_stolen_entries == 0) { iounmap(intel_private.gtt); iounmap(intel_private.registers); return -ENOMEM; @@ -1449,7 +1445,7 @@ static int intel_i965_create_gatt_table(struct agp_bridge_data *bridge) if (!intel_private.gtt) return -ENOMEM; - intel_private.gtt_total_size = gtt_size / 4; + intel_private.base.gtt_total_entries = gtt_size / 4; intel_private.registers = ioremap(temp, 128 * 4096); if (!intel_private.registers) { @@ -1462,7 +1458,7 @@ static int intel_i965_create_gatt_table(struct agp_bridge_data *bridge) /* we have to call this as early as possible after the MMIO base address is known */ intel_i830_init_gtt_entries(); - if (intel_private.gtt_entries == 0) { + if (intel_private.base.gtt_stolen_entries == 0) { iounmap(intel_private.gtt); iounmap(intel_private.registers); return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 8d52f01a6d90..47228cb16901 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -42,8 +42,6 @@ #include #include -extern int intel_max_stolen; /* from AGP driver */ - /** * Sets up the hardware status page for devices that need a physical address * in the register. diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cfc8bfd0fd7e..d825ef207b2c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -34,6 +34,7 @@ #include "intel_bios.h" #include "intel_ringbuffer.h" #include +#include /* General customization: */ diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h new file mode 100644 index 000000000000..6769cb704e9b --- /dev/null +++ b/include/drm/intel-gtt.h @@ -0,0 +1,18 @@ +/* Common header for intel-gtt.ko and i915.ko */ + +#ifndef _DRM_INTEL_GTT_H +#define _DRM_INTEL_GTT_H +extern int intel_max_stolen; /* from AGP driver */ + +struct intel_gtt { + /* Number of stolen gtt entries at the beginning. */ + unsigned int gtt_stolen_entries; + /* Total number of gtt entries. */ + unsigned int gtt_total_entries; + /* Part of the gtt that is mappable by the cpu, for those chips where + * this is not the full gtt. */ + unsigned int gtt_mappable_entries; +}; + +#endif + -- cgit v1.2.3 From 19966754328d99ee003ddfc7a8c31ceb115483ac Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 6 Sep 2010 20:08:44 +0200 Subject: drm/i915: die, i915_probe_agp, die Use the detection from intel-gtt.ko instead. Hooray! Also move the stolen mem allocator to the other gtt stuff in dev_prv->mem. v2: Chris Wilson noted that my error handling was crap. Fix it. He also said that this fixes a problem on his i845. Indeed, i915_probe_agp misses a special case for i830/i845 stolen mem detection. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=25476 Cc: stable@kernel.org Signed-off-by: Daniel Vetter Signed-off-by: Chris Wilson --- drivers/char/agp/intel-gtt.c | 6 ++ drivers/gpu/drm/i915/i915_dma.c | 190 +++------------------------------------- drivers/gpu/drm/i915/i915_drv.h | 7 +- include/drm/intel-gtt.h | 2 + 4 files changed, 25 insertions(+), 180 deletions(-) (limited to 'include') diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 6eb64c19af0e..9cb7c98afb9c 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -1714,6 +1714,12 @@ int intel_gmch_probe(struct pci_dev *pdev, } EXPORT_SYMBOL(intel_gmch_probe); +struct intel_gtt *intel_gtt_get(void) +{ + return &intel_private.base; +} +EXPORT_SYMBOL(intel_gtt_get); + void intel_gmch_remove(struct pci_dev *pdev) { if (intel_private.pcidev) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index a693b27f3df4..428c75b466aa 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -989,172 +989,6 @@ intel_teardown_mchbar(struct drm_device *dev) release_resource(&dev_priv->mch_res); } -/** - * i915_probe_agp - get AGP bootup configuration - * @pdev: PCI device - * @aperture_size: returns AGP aperture configured size - * @preallocated_size: returns size of BIOS preallocated AGP space - * - * Since Intel integrated graphics are UMA, the BIOS has to set aside - * some RAM for the framebuffer at early boot. This code figures out - * how much was set aside so we can use it for our own purposes. - */ -static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size, - uint32_t *preallocated_size) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u16 tmp = 0; - unsigned long overhead; - unsigned long stolen; - - /* Get the fb aperture size and "stolen" memory amount. */ - pci_read_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, &tmp); - - *aperture_size = 1024 * 1024; - *preallocated_size = 1024 * 1024; - - switch (dev->pdev->device) { - case PCI_DEVICE_ID_INTEL_82830_CGC: - case PCI_DEVICE_ID_INTEL_82845G_IG: - case PCI_DEVICE_ID_INTEL_82855GM_IG: - case PCI_DEVICE_ID_INTEL_82865_IG: - if ((tmp & INTEL_GMCH_MEM_MASK) == INTEL_GMCH_MEM_64M) - *aperture_size *= 64; - else - *aperture_size *= 128; - break; - default: - /* 9xx supports large sizes, just look at the length */ - *aperture_size = pci_resource_len(dev->pdev, 2); - break; - } - - /* - * Some of the preallocated space is taken by the GTT - * and popup. GTT is 1K per MB of aperture size, and popup is 4K. - */ - if (IS_G4X(dev) || IS_PINEVIEW(dev) || IS_IRONLAKE(dev) || IS_GEN6(dev)) - overhead = 4096; - else - overhead = (*aperture_size / 1024) + 4096; - - if (IS_GEN6(dev)) { - /* SNB has memory control reg at 0x50.w */ - pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &tmp); - - switch (tmp & SNB_GMCH_GMS_STOLEN_MASK) { - case INTEL_855_GMCH_GMS_DISABLED: - DRM_ERROR("video memory is disabled\n"); - return -1; - case SNB_GMCH_GMS_STOLEN_32M: - stolen = 32 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_64M: - stolen = 64 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_96M: - stolen = 96 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_128M: - stolen = 128 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_160M: - stolen = 160 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_192M: - stolen = 192 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_224M: - stolen = 224 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_256M: - stolen = 256 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_288M: - stolen = 288 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_320M: - stolen = 320 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_352M: - stolen = 352 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_384M: - stolen = 384 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_416M: - stolen = 416 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_448M: - stolen = 448 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_480M: - stolen = 480 * 1024 * 1024; - break; - case SNB_GMCH_GMS_STOLEN_512M: - stolen = 512 * 1024 * 1024; - break; - default: - DRM_ERROR("unexpected GMCH_GMS value: 0x%02x\n", - tmp & SNB_GMCH_GMS_STOLEN_MASK); - return -1; - } - } else { - switch (tmp & INTEL_GMCH_GMS_MASK) { - case INTEL_855_GMCH_GMS_DISABLED: - DRM_ERROR("video memory is disabled\n"); - return -1; - case INTEL_855_GMCH_GMS_STOLEN_1M: - stolen = 1 * 1024 * 1024; - break; - case INTEL_855_GMCH_GMS_STOLEN_4M: - stolen = 4 * 1024 * 1024; - break; - case INTEL_855_GMCH_GMS_STOLEN_8M: - stolen = 8 * 1024 * 1024; - break; - case INTEL_855_GMCH_GMS_STOLEN_16M: - stolen = 16 * 1024 * 1024; - break; - case INTEL_855_GMCH_GMS_STOLEN_32M: - stolen = 32 * 1024 * 1024; - break; - case INTEL_915G_GMCH_GMS_STOLEN_48M: - stolen = 48 * 1024 * 1024; - break; - case INTEL_915G_GMCH_GMS_STOLEN_64M: - stolen = 64 * 1024 * 1024; - break; - case INTEL_GMCH_GMS_STOLEN_128M: - stolen = 128 * 1024 * 1024; - break; - case INTEL_GMCH_GMS_STOLEN_256M: - stolen = 256 * 1024 * 1024; - break; - case INTEL_GMCH_GMS_STOLEN_96M: - stolen = 96 * 1024 * 1024; - break; - case INTEL_GMCH_GMS_STOLEN_160M: - stolen = 160 * 1024 * 1024; - break; - case INTEL_GMCH_GMS_STOLEN_224M: - stolen = 224 * 1024 * 1024; - break; - case INTEL_GMCH_GMS_STOLEN_352M: - stolen = 352 * 1024 * 1024; - break; - default: - DRM_ERROR("unexpected GMCH_GMS value: 0x%02x\n", - tmp & INTEL_GMCH_GMS_MASK); - return -1; - } - } - - *preallocated_size = stolen - overhead; - - return 0; -} - #define PTE_ADDRESS_MASK 0xfffff000 #define PTE_ADDRESS_MASK_HIGH 0x000000f0 /* i915+ */ #define PTE_MAPPING_TYPE_UNCACHED (0 << 1) @@ -1249,7 +1083,7 @@ static void i915_setup_compression(struct drm_device *dev, int size) unsigned long ll_base = 0; /* Leave 1M for line length buffer & misc. */ - compressed_fb = drm_mm_search_free(&dev_priv->vram, size, 4096, 0); + compressed_fb = drm_mm_search_free(&dev_priv->mm.vram, size, 4096, 0); if (!compressed_fb) { dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL; i915_warn_stolen(dev); @@ -1270,7 +1104,7 @@ static void i915_setup_compression(struct drm_device *dev, int size) } if (!(IS_GM45(dev) || IS_IRONLAKE_M(dev))) { - compressed_llb = drm_mm_search_free(&dev_priv->vram, 4096, + compressed_llb = drm_mm_search_free(&dev_priv->mm.vram, 4096, 4096, 0); if (!compressed_llb) { i915_warn_stolen(dev); @@ -1366,8 +1200,8 @@ static int i915_load_modeset_init(struct drm_device *dev, struct drm_i915_private *dev_priv = dev->dev_private; int ret = 0; - /* Basic memrange allocator for stolen space (aka vram) */ - drm_mm_init(&dev_priv->vram, 0, prealloc_size); + /* Basic memrange allocator for stolen space (aka mm.vram) */ + drm_mm_init(&dev_priv->mm.vram, 0, prealloc_size); DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024)); /* We're off and running w/KMS */ @@ -2107,16 +1941,16 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) "performance may suffer.\n"); } - ret = i915_probe_agp(dev, &agp_size, &prealloc_size); - if (ret) + dev_priv->mm.gtt = intel_gtt_get(); + if (!dev_priv->mm.gtt) { + DRM_ERROR("Failed to initialize GTT\n"); + ret = -ENODEV; goto out_iomapfree; - - if (prealloc_size > intel_max_stolen) { - DRM_INFO("detected %dM stolen memory, trimming to %dM\n", - prealloc_size >> 20, intel_max_stolen >> 20); - prealloc_size = intel_max_stolen; } + prealloc_size = dev_priv->mm.gtt->gtt_stolen_entries << PAGE_SHIFT; + agp_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; + dev_priv->wq = create_singlethread_workqueue("i915"); if (dev_priv->wq == NULL) { DRM_ERROR("Failed to create our workqueue.\n"); @@ -2301,7 +2135,7 @@ int i915_driver_unload(struct drm_device *dev) mutex_unlock(&dev->struct_mutex); if (I915_HAS_FBC(dev) && i915_powersave) i915_cleanup_compression(dev); - drm_mm_takedown(&dev_priv->vram); + drm_mm_takedown(&dev_priv->mm.vram); intel_cleanup_overlay(dev); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d825ef207b2c..c8b22005ec18 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -305,8 +305,6 @@ typedef struct drm_i915_private { uint32_t last_instdone; uint32_t last_instdone1; - struct drm_mm vram; - unsigned long cfb_size; unsigned long cfb_pitch; int cfb_fence; @@ -511,6 +509,11 @@ typedef struct drm_i915_private { u32 saveMCHBAR_RENDER_STANDBY; struct { + /** Bridge to intel-gtt-ko */ + struct intel_gtt *gtt; + /** Memory allocator for GTT stolen memory */ + struct drm_mm vram; + /** Memory allocator for GTT */ struct drm_mm gtt_space; struct io_mapping *gtt_mapping; diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 6769cb704e9b..b3aa7ab72d09 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -14,5 +14,7 @@ struct intel_gtt { unsigned int gtt_mappable_entries; }; +struct intel_gtt *intel_gtt_get(void); + #endif -- cgit v1.2.3 From e3634169bcc0cce33c815865d62ab378739f7389 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 7 Sep 2010 05:55:38 +0000 Subject: include/net/raw.h: Convert raw_seq_private macro to inline Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/raw.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/raw.h b/include/net/raw.h index 43c57502659b..42ce6fe7a2d5 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -45,7 +45,10 @@ struct raw_iter_state { struct raw_hashinfo *h; }; -#define raw_seq_private(seq) ((struct raw_iter_state *)(seq)->private) +static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq) +{ + return seq->private; +} void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); -- cgit v1.2.3 From a6e0fc8514d41dfdd98b1d15cacc432cf040f8af Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Sep 2010 14:15:32 -0700 Subject: net: introduce rcu_dereference_rtnl We use rcu_dereference_check(p, rcu_read_lock_held() || lockdep_rtnl_is_held()) several times in network stack. More usages to come too, so its time to create a helper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 58d44491880f..263690d991a8 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -749,6 +749,17 @@ extern int rtnl_is_locked(void); extern int lockdep_rtnl_is_held(void); #endif /* #ifdef CONFIG_PROVE_LOCKING */ +/** + * rcu_dereference_rtnl - rcu_dereference with debug checking + * @p: The pointer to read, prior to dereferencing + * + * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() + * or RTNL + */ +#define rcu_dereference_rtnl(p) \ + rcu_dereference_check(p, rcu_read_lock_held() || \ + lockdep_rtnl_is_held()) + extern void rtnetlink_init(void); extern void __rtnl_unlock(void); -- cgit v1.2.3 From 15133f6e67d8d646d0744336b4daa3135452cb0d Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 12 Jan 2010 14:33:38 -0800 Subject: RDS: Implement atomic operations Implement a CMSG-based interface to do FADD and CSWP ops. Alter send routines to handle atomic ops. Add atomic counters to stats. Add xmit_atomic() to struct rds_transport Inline rds_ib_send_unmap_rdma into unmap_rm Signed-off-by: Andy Grover --- include/linux/rds.h | 19 +++++++ net/rds/ib.c | 1 + net/rds/ib.h | 1 + net/rds/ib_rdma.c | 4 +- net/rds/ib_send.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++------ net/rds/rdma.c | 73 +++++++++++++++++++++++++++ net/rds/rds.h | 33 +++++++++++-- net/rds/send.c | 71 ++++++++++++++++++++++++-- net/rds/stats.c | 2 + 9 files changed, 321 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 7f3971d9fc5c..9239152abf7a 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -73,6 +73,8 @@ #define RDS_CMSG_RDMA_MAP 3 #define RDS_CMSG_RDMA_STATUS 4 #define RDS_CMSG_CONG_UPDATE 5 +#define RDS_CMSG_ATOMIC_FADD 6 +#define RDS_CMSG_ATOMIC_CSWP 7 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 @@ -237,6 +239,23 @@ struct rds_rdma_args { u_int64_t user_token; }; +struct rds_atomic_args { + rds_rdma_cookie_t cookie; + uint64_t local_addr; + uint64_t remote_addr; + union { + struct { + uint64_t compare; + uint64_t swap; + } cswp; + struct { + uint64_t add; + } fadd; + }; + uint64_t flags; + uint64_t user_token; +}; + struct rds_rdma_notify { u_int64_t user_token; int32_t status; diff --git a/net/rds/ib.c b/net/rds/ib.c index 8f2d6dd7700a..f0d29656baff 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -264,6 +264,7 @@ struct rds_transport rds_ib_transport = { .xmit = rds_ib_xmit, .xmit_cong_map = NULL, .xmit_rdma = rds_ib_xmit_rdma, + .xmit_atomic = rds_ib_xmit_atomic, .recv = rds_ib_recv, .conn_alloc = rds_ib_conn_alloc, .conn_free = rds_ib_conn_free, diff --git a/net/rds/ib.h b/net/rds/ib.h index 64df4e79b29f..d2fd0aa4fde7 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -336,6 +336,7 @@ void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, u32 *adv_credits, int need_posted, int max_posted); +int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op); /* ib_stats.c */ DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 0f3b5a2f3fe0..242231f09464 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -298,7 +298,9 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE), + IB_ACCESS_REMOTE_WRITE| + IB_ACCESS_REMOTE_ATOMIC), + &pool->fmr_attr); if (IS_ERR(ibmr->fmr)) { err = PTR_ERR(ibmr->fmr); diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index f0edfdb2866c..b2bd164434ad 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -62,15 +62,17 @@ static void rds_ib_send_rdma_complete(struct rds_message *rm, rds_rdma_send_complete(rm, notify_status); } -static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, - struct rds_rdma_op *op) +static void rds_ib_send_atomic_complete(struct rds_message *rm, + int wc_status) { - if (op->r_mapped) { - ib_dma_unmap_sg(ic->i_cm_id->device, - op->r_sg, op->r_nents, - op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - op->r_mapped = 0; - } + int notify_status; + + if (wc_status != IB_WC_SUCCESS) + notify_status = RDS_RDMA_OTHER_ERROR; + else + notify_status = RDS_RDMA_SUCCESS; + + rds_atomic_send_complete(rm, notify_status); } static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, @@ -86,7 +88,14 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, DMA_TO_DEVICE); if (rm->rdma.m_rdma_op.r_active) { - rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); + struct rds_rdma_op *op = &rm->rdma.m_rdma_op; + + if (op->r_mapped) { + ib_dma_unmap_sg(ic->i_cm_id->device, + op->r_sg, op->r_nents, + op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + op->r_mapped = 0; + } /* If the user asked for a completion notification on this * message, we can implement three different semantics: @@ -116,6 +125,24 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes); } + if (rm->atomic.op_active) { + struct rm_atomic_op *op = &rm->atomic; + + /* unmap atomic recvbuf */ + if (op->op_mapped) { + ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1, + DMA_FROM_DEVICE); + op->op_mapped = 0; + } + + rds_ib_send_atomic_complete(rm, wc_status); + + if (rm->atomic.op_type == RDS_ATOMIC_TYPE_CSWP) + rds_stats_inc(s_atomic_cswp); + else + rds_stats_inc(s_atomic_fadd); + } + /* If anyone waited for this message to get flushed out, wake * them up now */ rds_message_unmapped(rm); @@ -158,12 +185,9 @@ void rds_ib_send_clear_ring(struct rds_ib_connection *ic) u32 i; for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { - if (send->s_wr.opcode == 0xdead) + if (!send->s_rm || send->s_wr.opcode == 0xdead) continue; - if (send->s_rm) - rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); - if (send->s_op) - rds_ib_send_unmap_rdma(ic, send->s_op); + rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); } } @@ -218,6 +242,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_READ: + case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_ATOMIC_CMP_AND_SWP: /* Nothing to be done - the SG list will be unmapped * when the SEND completes. */ break; @@ -243,8 +269,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) rm = rds_send_get_message(conn, send->s_op); if (rm) { - if (rm->rdma.m_rdma_op.r_active) - rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); + rds_ib_send_unmap_rm(ic, send, wc.status); rds_ib_send_rdma_complete(rm, wc.status); rds_message_put(rm); } @@ -736,6 +761,89 @@ out: return ret; } +/* + * Issue atomic operation. + * A simplified version of the rdma case, we always map 1 SG, and + * only 8 bytes, for the return value from the atomic operation. + */ +int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + struct rds_ib_send_work *send = NULL; + struct ib_send_wr *failed_wr; + struct rds_ib_device *rds_ibdev; + u32 pos; + u32 work_alloc; + int ret; + + rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); + + work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos); + if (work_alloc != 1) { + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + rds_ib_stats_inc(s_ib_tx_ring_full); + ret = -ENOMEM; + goto out; + } + + /* address of send request in ring */ + send = &ic->i_sends[pos]; + send->s_queued = jiffies; + + if (op->op_type == RDS_ATOMIC_TYPE_CSWP) { + send->s_wr.opcode = IB_WR_ATOMIC_CMP_AND_SWP; + send->s_wr.wr.atomic.compare_add = op->op_compare; + send->s_wr.wr.atomic.swap = op->op_swap_add; + } else { /* FADD */ + send->s_wr.opcode = IB_WR_ATOMIC_FETCH_AND_ADD; + send->s_wr.wr.atomic.compare_add = op->op_swap_add; + send->s_wr.wr.atomic.swap = 0; + } + send->s_wr.send_flags = IB_SEND_SIGNALED; + send->s_wr.num_sge = 1; + send->s_wr.next = NULL; + send->s_wr.wr.atomic.remote_addr = op->op_remote_addr; + send->s_wr.wr.atomic.rkey = op->op_rkey; + + /* map 8 byte retval buffer to the device */ + ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE); + rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret); + if (ret != 1) { + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); + ret = -ENOMEM; /* XXX ? */ + goto out; + } + + /* Convert our struct scatterlist to struct ib_sge */ + send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg); + send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg); + send->s_sge[0].lkey = ic->i_mr->lkey; + + rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr, + send->s_sge[0].addr, send->s_sge[0].length); + + failed_wr = &send->s_wr; + ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr); + rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic, + send, &send->s_wr, ret, failed_wr); + BUG_ON(failed_wr != &send->s_wr); + if (ret) { + printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 " + "returned %d\n", &conn->c_faddr, ret); + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + goto out; + } + + if (unlikely(failed_wr != &send->s_wr)) { + printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret); + BUG_ON(failed_wr != &send->s_wr); + } + +out: + return ret; +} + int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) { struct rds_ib_connection *ic = conn->c_transport_data; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 4fda33045598..a7019df38c70 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -719,3 +719,76 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.m_rdma_mr); } + +/* + * Fill in rds_message for an atomic request. + */ +int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg) +{ + struct page *page = NULL; + struct rds_atomic_args *args; + int ret = 0; + + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args)) + || rm->atomic.op_active) + return -EINVAL; + + args = CMSG_DATA(cmsg); + + if (cmsg->cmsg_type == RDS_CMSG_ATOMIC_CSWP) { + rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; + rm->atomic.op_swap_add = args->cswp.swap; + rm->atomic.op_compare = args->cswp.compare; + } else { + rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; + rm->atomic.op_swap_add = args->fadd.add; + } + + rm->m_rdma_cookie = args->cookie; + rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); + rm->atomic.op_recverr = rs->rs_recverr; + rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); + + /* verify 8 byte-aligned */ + if (args->local_addr & 0x7) { + ret = -EFAULT; + goto err; + } + + ret = rds_pin_pages(args->local_addr, 1, &page, 1); + if (ret != 1) + goto err; + ret = 0; + + sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr)); + + if (rm->atomic.op_notify || rm->atomic.op_recverr) { + /* We allocate an uninitialized notifier here, because + * we don't want to do that in the completion handler. We + * would have to use GFP_ATOMIC there, and don't want to deal + * with failed allocations. + */ + rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL); + if (!rm->atomic.op_notifier) { + ret = -ENOMEM; + goto err; + } + + rm->atomic.op_notifier->n_user_token = args->user_token; + rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS; + } + + rm->atomic.op_rkey = rds_rdma_cookie_key(rm->m_rdma_cookie); + rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie); + + rm->atomic.op_active = 1; + + return ret; +err: + if (page) + put_page(page); + kfree(rm->atomic.op_notifier); + + return ret; +} diff --git a/net/rds/rds.h b/net/rds/rds.h index 0bb4957e0cfc..830e2bbb3332 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -97,6 +97,7 @@ struct rds_connection { unsigned int c_xmit_hdr_off; unsigned int c_xmit_data_off; unsigned int c_xmit_rdma_sent; + unsigned int c_xmit_atomic_sent; spinlock_t c_lock; /* protect msg queues */ u64 c_next_tx_seq; @@ -260,6 +261,10 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) return cookie >> 32; } +/* atomic operation types */ +#define RDS_ATOMIC_TYPE_CSWP 0 +#define RDS_ATOMIC_TYPE_FADD 1 + /* * m_sock_item and m_conn_item are on lists that are serialized under * conn->c_lock. m_sock_item has additional meaning in that once it is empty @@ -315,11 +320,27 @@ struct rds_message { struct rds_sock *m_rs; rds_rdma_cookie_t m_rdma_cookie; struct { - struct { + struct rm_atomic_op { + int op_type; + uint64_t op_swap_add; + uint64_t op_compare; + + u32 op_rkey; + u64 op_remote_addr; + unsigned int op_notify:1; + unsigned int op_recverr:1; + unsigned int op_mapped:1; + unsigned int op_active:1; + struct rds_notifier *op_notifier; + struct scatterlist *op_sg; + + struct rds_mr *op_rdma_mr; + } atomic; + struct rm_rdma_op { struct rds_rdma_op m_rdma_op; struct rds_mr *m_rdma_mr; } rdma; - struct { + struct rm_data_op { unsigned int m_nents; unsigned int m_count; struct scatterlist *m_sg; @@ -397,6 +418,7 @@ struct rds_transport { int (*xmit_cong_map)(struct rds_connection *conn, struct rds_cong_map *map, unsigned long offset); int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op); + int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op); int (*recv)(struct rds_connection *conn); int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, size_t size); @@ -546,6 +568,8 @@ struct rds_statistics { uint64_t s_cong_update_received; uint64_t s_cong_send_error; uint64_t s_cong_send_blocked; + uint64_t s_atomic_cswp; + uint64_t s_atomic_fadd; }; /* af_rds.c */ @@ -722,7 +746,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); void rds_rdma_free_op(struct rds_rdma_op *ro); -void rds_rdma_send_complete(struct rds_message *rm, int); +void rds_rdma_send_complete(struct rds_message *rm, int wc_status); +void rds_atomic_send_complete(struct rds_message *rm, int wc_status); +int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg); extern void __rds_put_mr_final(struct rds_mr *mr); static inline void rds_mr_put(struct rds_mr *mr) diff --git a/net/rds/send.c b/net/rds/send.c index b751a8e77c41..f3f4e79274bf 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -73,6 +73,7 @@ void rds_send_reset(struct rds_connection *conn) conn->c_xmit_hdr_off = 0; conn->c_xmit_data_off = 0; conn->c_xmit_rdma_sent = 0; + conn->c_xmit_atomic_sent = 0; conn->c_map_queued = 0; @@ -171,6 +172,7 @@ int rds_send_xmit(struct rds_connection *conn) conn->c_xmit_hdr_off = 0; conn->c_xmit_data_off = 0; conn->c_xmit_rdma_sent = 0; + conn->c_xmit_atomic_sent = 0; /* Release the reference to the previous message. */ rds_message_put(rm); @@ -262,6 +264,17 @@ int rds_send_xmit(struct rds_connection *conn) conn->c_xmit_rm = rm; } + + if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) { + ret = conn->c_trans->xmit_atomic(conn, &rm->atomic); + if (ret) + break; + conn->c_xmit_atomic_sent = 1; + /* The transport owns the mapped memory for now. + * You can't unmap it while it's on the send queue */ + set_bit(RDS_MSG_MAPPED, &rm->m_flags); + } + /* * Try and send an rdma message. Let's see if we can * keep this simple and require that the transport either @@ -442,6 +455,41 @@ void rds_rdma_send_complete(struct rds_message *rm, int status) } EXPORT_SYMBOL_GPL(rds_rdma_send_complete); +/* + * Just like above, except looks at atomic op + */ +void rds_atomic_send_complete(struct rds_message *rm, int status) +{ + struct rds_sock *rs = NULL; + struct rm_atomic_op *ao; + struct rds_notifier *notifier; + + spin_lock(&rm->m_rs_lock); + + ao = &rm->atomic; + if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) + && ao->op_active && ao->op_notify && ao->op_notifier) { + notifier = ao->op_notifier; + rs = rm->m_rs; + sock_hold(rds_rs_to_sk(rs)); + + notifier->n_status = status; + spin_lock(&rs->rs_lock); + list_add_tail(¬ifier->n_list, &rs->rs_notify_queue); + spin_unlock(&rs->rs_lock); + + ao->op_notifier = NULL; + } + + spin_unlock(&rm->m_rs_lock); + + if (rs) { + rds_wake_sk_sleep(rs); + sock_put(rds_rs_to_sk(rs)); + } +} +EXPORT_SYMBOL_GPL(rds_atomic_send_complete); + /* * This is the same as rds_rdma_send_complete except we * don't do any locking - we have all the ingredients (message, @@ -788,6 +836,11 @@ static int rds_rm_size(struct msghdr *msg, int data_len) /* these are valid but do no add any size */ break; + case RDS_CMSG_ATOMIC_CSWP: + case RDS_CMSG_ATOMIC_FADD: + size += sizeof(struct scatterlist); + break; + default: return -EINVAL; } @@ -813,7 +866,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, continue; /* As a side effect, RDMA_DEST and RDMA_MAP will set - * rm->m_rdma_cookie and rm->m_rdma_mr. + * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr. */ switch (cmsg->cmsg_type) { case RDS_CMSG_RDMA_ARGS: @@ -829,6 +882,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, if (!ret) *allocated_mr = 1; break; + case RDS_CMSG_ATOMIC_CSWP: + case RDS_CMSG_ATOMIC_FADD: + ret = rds_cmsg_atomic(rs, rm, cmsg); + break; default: return -EINVAL; @@ -926,10 +983,18 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, goto out; if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op.r_active) && - !conn->c_trans->xmit_rdma) { + !conn->c_trans->xmit_rdma) { if (printk_ratelimit()) printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", - &rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma); + &rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma); + ret = -EOPNOTSUPP; + goto out; + } + + if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) { + if (printk_ratelimit()) + printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n", + &rm->atomic, conn->c_trans->xmit_atomic); ret = -EOPNOTSUPP; goto out; } diff --git a/net/rds/stats.c b/net/rds/stats.c index 7598eb07cfb1..c66d95d9c262 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -75,6 +75,8 @@ static const char *const rds_stat_names[] = { "cong_update_received", "cong_send_error", "cong_send_blocked", + "s_atomic_cswp", + "s_atomic_fadd", }; void rds_stats_info_copy(struct rds_info_iterator *iter, -- cgit v1.2.3 From 2c3a5f9abb1dc5efdab8ba9a568b1661c65fd1e3 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 1 Mar 2010 16:10:40 -0800 Subject: RDS: Add flag for silent ops. Do atomic op before RDMA Add a flag to the API so users can indicate they want silent operations. This is needed because silent ops cannot be used with USE_ONCE MRs, so we can't just assume silent. Also, change send_xmit to do atomic op before rdma op if both are present, and centralize the hairy logic to determine if we want to attempt silent, or not. Signed-off-by: Andy Grover --- include/linux/rds.h | 1 + net/rds/rdma.c | 2 ++ net/rds/rds.h | 2 ++ net/rds/send.c | 55 ++++++++++++++++++++++++++++++----------------------- 4 files changed, 36 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 9239152abf7a..109f1d343318 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -276,5 +276,6 @@ struct rds_rdma_notify { #define RDS_RDMA_USE_ONCE 0x0008 /* free MR after use */ #define RDS_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ #define RDS_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ +#define RDS_RDMA_SILENT 0x0040 /* Do not interrupt remote */ #endif /* IB_RDS_H */ diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 5ba514684431..48781fe4431c 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -559,6 +559,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, op->op_write = !!(args->flags & RDS_RDMA_READWRITE); op->op_fence = !!(args->flags & RDS_RDMA_FENCE); op->op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); + op->op_silent = !!(args->flags & RDS_RDMA_SILENT); op->op_active = 1; op->op_recverr = rs->rs_recverr; WARN_ON(!nr_pages); @@ -747,6 +748,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, } rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); + rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT); rm->atomic.op_active = 1; rm->atomic.op_recverr = rs->rs_recverr; rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); diff --git a/net/rds/rds.h b/net/rds/rds.h index 46d190d08549..23b921000e74 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -319,6 +319,7 @@ struct rds_message { unsigned int op_notify:1; unsigned int op_recverr:1; unsigned int op_mapped:1; + unsigned int op_silent:1; unsigned int op_active:1; struct scatterlist *op_sg; struct rds_notifier *op_notifier; @@ -333,6 +334,7 @@ struct rds_message { unsigned int op_notify:1; unsigned int op_recverr:1; unsigned int op_mapped:1; + unsigned int op_silent:1; unsigned int op_active:1; unsigned int op_bytes; unsigned int op_nents; diff --git a/net/rds/send.c b/net/rds/send.c index cdca9747fcbc..38567f3ee7e8 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -250,42 +250,50 @@ int rds_send_xmit(struct rds_connection *conn) conn->c_xmit_rm = rm; } - if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) { - ret = conn->c_trans->xmit_atomic(conn, rm); + /* The transport either sends the whole rdma or none of it */ + if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) { + ret = conn->c_trans->xmit_rdma(conn, &rm->rdma); if (ret) break; - conn->c_xmit_atomic_sent = 1; + conn->c_xmit_rdma_sent = 1; + /* The transport owns the mapped memory for now. * You can't unmap it while it's on the send queue */ set_bit(RDS_MSG_MAPPED, &rm->m_flags); - - /* - * This is evil, muahaha. - * We permit 0-byte sends. (rds-ping depends on this.) - * BUT if there is an atomic op and no sent data, - * we turn off sending the header, to achieve - * "silent" atomics. - * But see below; RDMA op might toggle this back on! - */ - if (rm->data.op_nents == 0) - rm->data.op_active = 0; } - /* The transport either sends the whole rdma or none of it */ - if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) { - ret = conn->c_trans->xmit_rdma(conn, &rm->rdma); + if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) { + ret = conn->c_trans->xmit_atomic(conn, rm); if (ret) break; - conn->c_xmit_rdma_sent = 1; - - /* rdmas need data sent, even if just the header */ - rm->data.op_active = 1; - + conn->c_xmit_atomic_sent = 1; /* The transport owns the mapped memory for now. * You can't unmap it while it's on the send queue */ set_bit(RDS_MSG_MAPPED, &rm->m_flags); } + /* + * A number of cases require an RDS header to be sent + * even if there is no data. + * We permit 0-byte sends; rds-ping depends on this. + * However, if there are exclusively attached silent ops, + * we skip the hdr/data send, to enable silent operation. + */ + if (rm->data.op_nents == 0) { + int ops_present; + int all_ops_are_silent = 1; + + ops_present = (rm->atomic.op_active || rm->rdma.op_active); + if (rm->atomic.op_active && !rm->atomic.op_silent) + all_ops_are_silent = 0; + if (rm->rdma.op_active && !rm->rdma.op_silent) + all_ops_are_silent = 0; + + if (ops_present && all_ops_are_silent + && !rm->m_rdma_cookie) + rm->data.op_active = 0; + } + if (rm->data.op_active && !conn->c_xmit_data_sent) { ret = conn->c_trans->xmit(conn, rm, conn->c_xmit_hdr_off, @@ -1009,8 +1017,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, if (ret) goto out; - if ((rm->m_rdma_cookie || rm->rdma.op_active) && - !conn->c_trans->xmit_rdma) { + if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { if (printk_ratelimit()) printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", &rm->rdma, conn->c_trans->xmit_rdma); -- cgit v1.2.3 From 20c72bd5f5f902e5a8745d51573699605bf8d21c Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 25 Aug 2010 05:51:28 -0700 Subject: RDS: Implement masked atomic operations Add two CMSGs for masked versions of cswp and fadd. args struct modified to use a union for different atomic op type's arguments. Change IB to do masked atomic ops. Atomic op type in rds_message similarly unionized. Signed-off-by: Andy Grover --- include/linux/rds.h | 12 ++++++++++++ net/rds/ib_send.c | 14 +++++++++----- net/rds/rdma.c | 33 +++++++++++++++++++++++++++------ net/rds/rds.h | 14 ++++++++++++-- net/rds/send.c | 4 ++++ 5 files changed, 64 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 109f1d343318..a2a5edb4a276 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -75,6 +75,8 @@ #define RDS_CMSG_CONG_UPDATE 5 #define RDS_CMSG_ATOMIC_FADD 6 #define RDS_CMSG_ATOMIC_CSWP 7 +#define RDS_CMSG_MASKED_ATOMIC_FADD 8 +#define RDS_CMSG_MASKED_ATOMIC_CSWP 9 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 @@ -251,6 +253,16 @@ struct rds_atomic_args { struct { uint64_t add; } fadd; + struct { + uint64_t compare; + uint64_t swap; + uint64_t compare_mask; + uint64_t swap_mask; + } m_cswp; + struct { + uint64_t add; + uint64_t nocarry_mask; + } m_fadd; }; uint64_t flags; uint64_t user_token; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 808544aebb70..71f373c421bc 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -807,13 +807,17 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) send->s_queued = jiffies; if (op->op_type == RDS_ATOMIC_TYPE_CSWP) { - send->s_wr.opcode = IB_WR_ATOMIC_CMP_AND_SWP; - send->s_wr.wr.atomic.compare_add = op->op_compare; - send->s_wr.wr.atomic.swap = op->op_swap_add; + send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP; + send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare; + send->s_wr.wr.atomic.swap = op->op_m_cswp.swap; + send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask; + send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask; } else { /* FADD */ - send->s_wr.opcode = IB_WR_ATOMIC_FETCH_AND_ADD; - send->s_wr.wr.atomic.compare_add = op->op_swap_add; + send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD; + send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add; send->s_wr.wr.atomic.swap = 0; + send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask; + send->s_wr.wr.atomic.swap_mask = 0; } nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify); send->s_wr.num_sge = 1; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 48781fe4431c..48064673fc76 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -738,13 +738,34 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, args = CMSG_DATA(cmsg); - if (cmsg->cmsg_type == RDS_CMSG_ATOMIC_CSWP) { - rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; - rm->atomic.op_swap_add = args->cswp.swap; - rm->atomic.op_compare = args->cswp.compare; - } else { + /* Nonmasked & masked cmsg ops converted to masked hw ops */ + switch (cmsg->cmsg_type) { + case RDS_CMSG_ATOMIC_FADD: + rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; + rm->atomic.op_m_fadd.add = args->fadd.add; + rm->atomic.op_m_fadd.nocarry_mask = 0; + break; + case RDS_CMSG_MASKED_ATOMIC_FADD: rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; - rm->atomic.op_swap_add = args->fadd.add; + rm->atomic.op_m_fadd.add = args->m_fadd.add; + rm->atomic.op_m_fadd.nocarry_mask = args->m_fadd.nocarry_mask; + break; + case RDS_CMSG_ATOMIC_CSWP: + rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; + rm->atomic.op_m_cswp.compare = args->cswp.compare; + rm->atomic.op_m_cswp.swap = args->cswp.swap; + rm->atomic.op_m_cswp.compare_mask = ~0; + rm->atomic.op_m_cswp.swap_mask = ~0; + break; + case RDS_CMSG_MASKED_ATOMIC_CSWP: + rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; + rm->atomic.op_m_cswp.compare = args->m_cswp.compare; + rm->atomic.op_m_cswp.swap = args->m_cswp.swap; + rm->atomic.op_m_cswp.compare_mask = args->m_cswp.compare_mask; + rm->atomic.op_m_cswp.swap_mask = args->m_cswp.swap_mask; + break; + default: + BUG(); /* should never happen */ } rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); diff --git a/net/rds/rds.h b/net/rds/rds.h index aadaddba88a7..8103dcf8b976 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -316,8 +316,18 @@ struct rds_message { struct { struct rm_atomic_op { int op_type; - uint64_t op_swap_add; - uint64_t op_compare; + union { + struct { + uint64_t compare; + uint64_t swap; + uint64_t compare_mask; + uint64_t swap_mask; + } op_m_cswp; + struct { + uint64_t add; + uint64_t nocarry_mask; + } op_m_fadd; + }; u32 op_rkey; u64 op_remote_addr; diff --git a/net/rds/send.c b/net/rds/send.c index 81471b25373b..9b951a0ab6b7 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -843,6 +843,8 @@ static int rds_rm_size(struct msghdr *msg, int data_len) case RDS_CMSG_ATOMIC_CSWP: case RDS_CMSG_ATOMIC_FADD: + case RDS_CMSG_MASKED_ATOMIC_CSWP: + case RDS_CMSG_MASKED_ATOMIC_FADD: cmsg_groups |= 1; size += sizeof(struct scatterlist); break; @@ -894,6 +896,8 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, break; case RDS_CMSG_ATOMIC_CSWP: case RDS_CMSG_ATOMIC_FADD: + case RDS_CMSG_MASKED_ATOMIC_CSWP: + case RDS_CMSG_MASKED_ATOMIC_FADD: ret = rds_cmsg_atomic(rs, rm, cmsg); break; -- cgit v1.2.3 From fd128dfa50cfc4f2959dc4aa5d7468d33b988332 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 25 Aug 2010 09:32:17 -0700 Subject: RDS: Add rds.h to exported headers list Also, a number of changes were made based on the assumption that rds.h wasn't exported, so roll these back. Signed-off-by: Andy Grover --- include/linux/Kbuild | 1 + include/linux/rds.h | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 626b629429ff..c7fbf298ad68 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -302,6 +302,7 @@ header-y += quota.h header-y += radeonfb.h header-y += random.h header-y += raw.h +header-y += rds.h header-y += reboot.h header-y += reiserfs_fs.h header-y += reiserfs_xattr.h diff --git a/include/linux/rds.h b/include/linux/rds.h index a2a5edb4a276..f371f885a352 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -95,7 +95,7 @@ struct rds_info_counter { u_int8_t name[32]; u_int64_t value; -} __packed; +} __attribute__((packed)); #define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 #define RDS_INFO_CONNECTION_FLAG_CONNECTING 0x02 @@ -110,7 +110,7 @@ struct rds_info_connection { __be32 faddr; u_int8_t transport[TRANSNAMSIZ]; /* null term ascii */ u_int8_t flags; -} __packed; +} __attribute__((packed)); struct rds_info_flow { __be32 laddr; @@ -118,7 +118,7 @@ struct rds_info_flow { u_int32_t bytes; __be16 lport; __be16 fport; -} __packed; +} __attribute__((packed)); #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 @@ -131,7 +131,7 @@ struct rds_info_message { __be16 lport; __be16 fport; u_int8_t flags; -} __packed; +} __attribute__((packed)); struct rds_info_socket { u_int32_t sndbuf; @@ -141,7 +141,7 @@ struct rds_info_socket { __be16 connected_port; u_int32_t rcvbuf; u_int64_t inum; -} __packed; +} __attribute__((packed)); struct rds_info_tcp_socket { __be32 local_addr; @@ -153,7 +153,7 @@ struct rds_info_tcp_socket { u_int32_t last_sent_nxt; u_int32_t last_expected_una; u_int32_t last_seen_una; -} __packed; +} __attribute__((packed)); #define RDS_IB_GID_LEN 16 struct rds_info_rdma_connection { -- cgit v1.2.3 From a46f561b774d90d8616473d56696e7d44fa1c9f1 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 25 Aug 2010 09:34:10 -0700 Subject: RDS: rds.h: Replace u_int[size]_t with uint[size]_t Replace e.g. u_int32_t types with the more common uint32_t. Reported-by: Matthew Wilcox Signed-off-by: Andy Grover --- include/linux/rds.h | 58 ++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index f371f885a352..3576b31b6b7b 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -93,8 +93,8 @@ #define RDS_INFO_LAST 10010 struct rds_info_counter { - u_int8_t name[32]; - u_int64_t value; + uint8_t name[32]; + uint64_t value; } __attribute__((packed)); #define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 @@ -104,18 +104,18 @@ struct rds_info_counter { #define TRANSNAMSIZ 16 struct rds_info_connection { - u_int64_t next_tx_seq; - u_int64_t next_rx_seq; + uint64_t next_tx_seq; + uint64_t next_rx_seq; __be32 laddr; __be32 faddr; - u_int8_t transport[TRANSNAMSIZ]; /* null term ascii */ - u_int8_t flags; + uint8_t transport[TRANSNAMSIZ]; /* null term ascii */ + uint8_t flags; } __attribute__((packed)); struct rds_info_flow { __be32 laddr; __be32 faddr; - u_int32_t bytes; + uint32_t bytes; __be16 lport; __be16 fport; } __attribute__((packed)); @@ -124,23 +124,23 @@ struct rds_info_flow { #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 struct rds_info_message { - u_int64_t seq; - u_int32_t len; + uint64_t seq; + uint32_t len; __be32 laddr; __be32 faddr; __be16 lport; __be16 fport; - u_int8_t flags; + uint8_t flags; } __attribute__((packed)); struct rds_info_socket { - u_int32_t sndbuf; + uint32_t sndbuf; __be32 bound_addr; __be32 connected_addr; __be16 bound_port; __be16 connected_port; - u_int32_t rcvbuf; - u_int64_t inum; + uint32_t rcvbuf; + uint64_t inum; } __attribute__((packed)); struct rds_info_tcp_socket { @@ -148,11 +148,11 @@ struct rds_info_tcp_socket { __be16 local_port; __be32 peer_addr; __be16 peer_port; - u_int64_t hdr_rem; - u_int64_t data_rem; - u_int32_t last_sent_nxt; - u_int32_t last_expected_una; - u_int32_t last_seen_una; + uint64_t hdr_rem; + uint64_t data_rem; + uint32_t last_sent_nxt; + uint32_t last_expected_una; + uint32_t last_seen_una; } __attribute__((packed)); #define RDS_IB_GID_LEN 16 @@ -207,38 +207,38 @@ struct rds_info_rdma_connection { * (so that the application does not have to worry about * alignment). */ -typedef u_int64_t rds_rdma_cookie_t; +typedef uint64_t rds_rdma_cookie_t; struct rds_iovec { - u_int64_t addr; - u_int64_t bytes; + uint64_t addr; + uint64_t bytes; }; struct rds_get_mr_args { struct rds_iovec vec; - u_int64_t cookie_addr; + uint64_t cookie_addr; uint64_t flags; }; struct rds_get_mr_for_dest_args { struct sockaddr_storage dest_addr; struct rds_iovec vec; - u_int64_t cookie_addr; + uint64_t cookie_addr; uint64_t flags; }; struct rds_free_mr_args { rds_rdma_cookie_t cookie; - u_int64_t flags; + uint64_t flags; }; struct rds_rdma_args { rds_rdma_cookie_t cookie; struct rds_iovec remote_vec; - u_int64_t local_vec_addr; - u_int64_t nr_local; - u_int64_t flags; - u_int64_t user_token; + uint64_t local_vec_addr; + uint64_t nr_local; + uint64_t flags; + uint64_t user_token; }; struct rds_atomic_args { @@ -269,7 +269,7 @@ struct rds_atomic_args { }; struct rds_rdma_notify { - u_int64_t user_token; + uint64_t user_token; int32_t status; }; -- cgit v1.2.3 From 905d64c89e2a9d71d0606904b7c3908633db6072 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 8 Sep 2010 18:03:54 -0700 Subject: RDS: Remove dead struct from rds.h flows are an obsolete date type. Signed-off-by: Andy Grover --- include/linux/rds.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/rds.h b/include/linux/rds.h index 3576b31b6b7b..91950950aa59 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -112,14 +112,6 @@ struct rds_info_connection { uint8_t flags; } __attribute__((packed)); -struct rds_info_flow { - __be32 laddr; - __be32 faddr; - uint32_t bytes; - __be16 lport; - __be16 fport; -} __attribute__((packed)); - #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 -- cgit v1.2.3 From 719f835853a92f6090258114a72ffe41f09155cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Sep 2010 05:08:44 +0000 Subject: udp: add rehash on connect() commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation) added a secondary hash on UDP, hashed on (local addr, local port). Problem is that following sequence : fd = socket(...) connect(fd, &remote, ...) not only selects remote end point (address and port), but also sets local address, while UDP stack stored in secondary hash table the socket while its local address was INADDR_ANY (or ipv6 equivalent) Sequence is : - autobind() : choose a random local port, insert socket in hash tables [while local address is INADDR_ANY] - connect() : set remote address and port, change local address to IP given by a route lookup. When an incoming UDP frame comes, if more than 10 sockets are found in primary hash table, we switch to secondary table, and fail to find socket because its local address changed. One solution to this problem is to rehash datagram socket if needed. We add a new rehash(struct socket *) method in "struct proto", and implement this method for UDP v4 & v6, using a common helper. This rehashing only takes care of secondary hash table, since primary hash (based on local port only) is not changed. Reported-by: Krzysztof Piotr Oledzki Signed-off-by: Eric Dumazet Tested-by: Krzysztof Piotr Oledzki Signed-off-by: David S. Miller --- include/net/sock.h | 1 + include/net/udp.h | 1 + net/ipv4/datagram.c | 5 ++++- net/ipv4/udp.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/datagram.c | 7 ++++++- net/ipv6/udp.c | 10 ++++++++++ 6 files changed, 66 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index ac53bfbdfe16..adab9dc58183 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -752,6 +752,7 @@ struct proto { /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); + void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); /* Keeping track of sockets in use */ diff --git a/include/net/udp.h b/include/net/udp.h index 7abdf305da50..a184d3496b13 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -151,6 +151,7 @@ static inline void udp_lib_hash(struct sock *sk) } extern void udp_lib_unhash(struct sock *sk); +extern void udp_lib_rehash(struct sock *sk, u16 new_hash); static inline void udp_lib_close(struct sock *sk, long timeout) { diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index f0550941df7b..721a8a37b45c 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -62,8 +62,11 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (!inet->inet_saddr) inet->inet_saddr = rt->rt_src; /* Update source address */ - if (!inet->inet_rcv_saddr) + if (!inet->inet_rcv_saddr) { inet->inet_rcv_saddr = rt->rt_src; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } inet->inet_daddr = rt->rt_dst; inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32e0bef60d0a..fb23c2e63b52 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1260,6 +1260,49 @@ void udp_lib_unhash(struct sock *sk) } EXPORT_SYMBOL(udp_lib_unhash); +/* + * inet_rcv_saddr was changed, we must rehash secondary hash + */ +void udp_lib_rehash(struct sock *sk, u16 newhash) +{ + if (sk_hashed(sk)) { + struct udp_table *udptable = sk->sk_prot->h.udp_table; + struct udp_hslot *hslot, *hslot2, *nhslot2; + + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); + nhslot2 = udp_hashslot2(udptable, newhash); + udp_sk(sk)->udp_portaddr_hash = newhash; + if (hslot2 != nhslot2) { + hslot = udp_hashslot(udptable, sock_net(sk), + udp_sk(sk)->udp_port_hash); + /* we must lock primary chain too */ + spin_lock_bh(&hslot->lock); + + spin_lock(&hslot2->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hslot2->count--; + spin_unlock(&hslot2->lock); + + spin_lock(&nhslot2->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &nhslot2->head); + nhslot2->count++; + spin_unlock(&nhslot2->lock); + + spin_unlock_bh(&hslot->lock); + } + } +} +EXPORT_SYMBOL(udp_lib_rehash); + +static void udp_v4_rehash(struct sock *sk) +{ + u16 new_hash = udp4_portaddr_hash(sock_net(sk), + inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_num); + udp_lib_rehash(sk, new_hash); +} + static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; @@ -1843,6 +1886,7 @@ struct proto udp_prot = { .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 7d929a22cbc2..ef371aa01ac5 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -105,9 +105,12 @@ ipv4_connected: if (ipv6_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } goto out; } @@ -181,6 +184,8 @@ ipv4_connected: if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); inet->inet_rcv_saddr = LOOPBACK4_IPV6; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1dd1affdead2..5acb3560ff15 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -111,6 +111,15 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); } +static void udp_v6_rehash(struct sock *sk) +{ + u16 new_hash = udp6_portaddr_hash(sock_net(sk), + &inet6_sk(sk)->rcv_saddr, + inet_sk(sk)->inet_num); + + udp_lib_rehash(sk, new_hash); +} + static inline int compute_score(struct sock *sk, struct net *net, unsigned short hnum, struct in6_addr *saddr, __be16 sport, @@ -1447,6 +1456,7 @@ struct proto udpv6_prot = { .backlog_rcv = udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, -- cgit v1.2.3 From d530148ae8bffe1b33f50d1776d185a6e85dc774 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 20 Aug 2010 16:49:43 +0800 Subject: dquot: do full inode dirty in allocating space Alex Shi found a regression when doing ffsb test. The test has several threads, and each thread creates a small file, write to it and then delete it. ffsb reports about 20% regression and Alex bisected it to 43d2932d88e4. The test will call __mark_inode_dirty 3 times. without this commit, we only take inode_lock one time, while with it, we take the lock 3 times with flags ( I_DIRTY_SYNC,I_DIRTY_PAGES,I_DIRTY). Perf shows the lock contention increased too much. Below proposed patch fixes it. fs is allocating blocks, which usually means file writes and the inode will be dirtied soon. We fully dirty the inode to reduce some inode_lock contention in several calls of __mark_inode_dirty. Jan Kara: Added comment. Signed-off-by: Shaohua Li Signed-off-by: Alex Shi Signed-off-by: Jan Kara --- include/linux/quotaops.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index d50ba858cfe0..d1a9193960f1 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -274,8 +274,14 @@ static inline int dquot_alloc_space(struct inode *inode, qsize_t nr) int ret; ret = dquot_alloc_space_nodirty(inode, nr); - if (!ret) - mark_inode_dirty_sync(inode); + if (!ret) { + /* + * Mark inode fully dirty. Since we are allocating blocks, inode + * would become fully dirty soon anyway and it reportedly + * reduces inode_lock contention. + */ + mark_inode_dirty(inode); + } return ret; } -- cgit v1.2.3 From 39aa3cb3e8250db9188a6f1e3fb62ffa1a717678 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Tue, 31 Aug 2010 15:52:27 +0200 Subject: mm: Move vma_stack_continue into mm.h So it can be used by all that need to check for that. Signed-off-by: Stefan Bader Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 3 ++- include/linux/mm.h | 6 ++++++ mm/mlock.c | 6 ------ 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 439fc1f1c1c4..271afc48b9a5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -224,7 +224,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; if (vma->vm_flags & VM_GROWSDOWN) - start += PAGE_SIZE; + if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) + start += PAGE_SIZE; seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", start, diff --git a/include/linux/mm.h b/include/linux/mm.h index e6b1210772ce..74949fbef8c6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -864,6 +864,12 @@ int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); +/* Is the vma a continuation of the stack vma above it? */ +static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) +{ + return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); +} + extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len); diff --git a/mm/mlock.c b/mm/mlock.c index cbae7c5b9568..b70919ce4f72 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -135,12 +135,6 @@ void munlock_vma_page(struct page *page) } } -/* Is the vma a continuation of the stack vma above it? */ -static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) -{ - return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); -} - static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) { return (vma->vm_flags & VM_GROWSDOWN) && -- cgit v1.2.3 From a73f8844e1fc54c3762555c1cf1f71774142ca91 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 8 Sep 2010 16:54:54 -0600 Subject: lglock: make lg_lock_global() actually lock globally lg_lock_global() currently only acquires spinlocks for online CPUs, but it's meant to lock all possible CPUs. Lglock-protected resources may be associated with removed CPUs - and, indeed, that could happen with the per-superblock open files lists. At Nick's suggestion, change for_each_online_cpu() to for_each_possible_cpu() to protect accesses to those resources. Cc: Al Viro Acked-by: Nick Piggin Signed-off-by: Jonathan Corbet Signed-off-by: Linus Torvalds --- include/linux/lglock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index b288cb713b90..f549056fb20b 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -150,7 +150,7 @@ int i; \ preempt_disable(); \ rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_online_cpu(i) { \ + for_each_possible_cpu(i) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_lock(lock); \ @@ -161,7 +161,7 @@ void name##_global_unlock(void) { \ int i; \ rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_online_cpu(i) { \ + for_each_possible_cpu(i) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_unlock(lock); \ -- cgit v1.2.3 From 01a08546af311c065f34727787dd0cc8dc0c216f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 31 Aug 2010 10:28:16 +0200 Subject: sched: Add book scheduling domain On top of the SMT and MC scheduling domains this adds the BOOK scheduling domain. This is useful for NUMA like machines which do not have an interface which tells which piece of memory is attached to which node or where the hardware performs striping. Signed-off-by: Heiko Carstens Signed-off-by: Peter Zijlstra LKML-Reference: <20100831082844.253053798@de.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + include/linux/topology.h | 6 ++++ kernel/sched.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 82 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7dd..b51c53c285b8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -875,6 +875,7 @@ enum sched_domain_level { SD_LV_NONE = 0, SD_LV_SIBLING, SD_LV_MC, + SD_LV_BOOK, SD_LV_CPU, SD_LV_NODE, SD_LV_ALLNODES, diff --git a/include/linux/topology.h b/include/linux/topology.h index 64e084ff5e5c..b91a40e847d2 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -201,6 +201,12 @@ int arch_update_cpu_topology(void); .balance_interval = 64, \ } +#ifdef CONFIG_SCHED_BOOK +#ifndef SD_BOOK_INIT +#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!! +#endif +#endif /* CONFIG_SCHED_BOOK */ + #ifdef CONFIG_NUMA #ifndef SD_NODE_INIT #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! diff --git a/kernel/sched.c b/kernel/sched.c index 1a0c084b1cf9..26f83e2f1534 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6506,6 +6506,7 @@ struct s_data { cpumask_var_t nodemask; cpumask_var_t this_sibling_map; cpumask_var_t this_core_map; + cpumask_var_t this_book_map; cpumask_var_t send_covered; cpumask_var_t tmpmask; struct sched_group **sched_group_nodes; @@ -6517,6 +6518,7 @@ enum s_alloc { sa_rootdomain, sa_tmpmask, sa_send_covered, + sa_this_book_map, sa_this_core_map, sa_this_sibling_map, sa_nodemask, @@ -6570,6 +6572,31 @@ cpu_to_core_group(int cpu, const struct cpumask *cpu_map, } #endif /* CONFIG_SCHED_MC */ +/* + * book sched-domains: + */ +#ifdef CONFIG_SCHED_BOOK +static DEFINE_PER_CPU(struct static_sched_domain, book_domains); +static DEFINE_PER_CPU(struct static_sched_group, sched_group_book); + +static int +cpu_to_book_group(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, struct cpumask *mask) +{ + int group = cpu; +#ifdef CONFIG_SCHED_MC + cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); + group = cpumask_first(mask); +#elif defined(CONFIG_SCHED_SMT) + cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); + group = cpumask_first(mask); +#endif + if (sg) + *sg = &per_cpu(sched_group_book, group).sg; + return group; +} +#endif /* CONFIG_SCHED_BOOK */ + static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); @@ -6578,7 +6605,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, struct sched_group **sg, struct cpumask *mask) { int group; -#ifdef CONFIG_SCHED_MC +#ifdef CONFIG_SCHED_BOOK + cpumask_and(mask, cpu_book_mask(cpu), cpu_map); + group = cpumask_first(mask); +#elif defined(CONFIG_SCHED_MC) cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); group = cpumask_first(mask); #elif defined(CONFIG_SCHED_SMT) @@ -6839,6 +6869,9 @@ SD_INIT_FUNC(CPU) #ifdef CONFIG_SCHED_MC SD_INIT_FUNC(MC) #endif +#ifdef CONFIG_SCHED_BOOK + SD_INIT_FUNC(BOOK) +#endif static int default_relax_domain_level = -1; @@ -6888,6 +6921,8 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what, free_cpumask_var(d->tmpmask); /* fall through */ case sa_send_covered: free_cpumask_var(d->send_covered); /* fall through */ + case sa_this_book_map: + free_cpumask_var(d->this_book_map); /* fall through */ case sa_this_core_map: free_cpumask_var(d->this_core_map); /* fall through */ case sa_this_sibling_map: @@ -6934,8 +6969,10 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, return sa_nodemask; if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL)) return sa_this_sibling_map; - if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) + if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL)) return sa_this_core_map; + if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) + return sa_this_book_map; if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) return sa_send_covered; d->rd = alloc_rootdomain(); @@ -6993,6 +7030,23 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d, return sd; } +static struct sched_domain *__build_book_sched_domain(struct s_data *d, + const struct cpumask *cpu_map, struct sched_domain_attr *attr, + struct sched_domain *parent, int i) +{ + struct sched_domain *sd = parent; +#ifdef CONFIG_SCHED_BOOK + sd = &per_cpu(book_domains, i).sd; + SD_INIT(sd, BOOK); + set_domain_attribute(sd, attr); + cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); + sd->parent = parent; + parent->child = sd; + cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask); +#endif + return sd; +} + static struct sched_domain *__build_mc_sched_domain(struct s_data *d, const struct cpumask *cpu_map, struct sched_domain_attr *attr, struct sched_domain *parent, int i) @@ -7049,6 +7103,15 @@ static void build_sched_groups(struct s_data *d, enum sched_domain_level l, &cpu_to_core_group, d->send_covered, d->tmpmask); break; +#endif +#ifdef CONFIG_SCHED_BOOK + case SD_LV_BOOK: /* set up book groups */ + cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu)); + if (cpu == cpumask_first(d->this_book_map)) + init_sched_build_groups(d->this_book_map, cpu_map, + &cpu_to_book_group, + d->send_covered, d->tmpmask); + break; #endif case SD_LV_CPU: /* set up physical groups */ cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map); @@ -7097,12 +7160,14 @@ static int __build_sched_domains(const struct cpumask *cpu_map, sd = __build_numa_sched_domains(&d, cpu_map, attr, i); sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i); + sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i); sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i); sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); } for_each_cpu(i, cpu_map) { build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i); + build_sched_groups(&d, SD_LV_BOOK, cpu_map, i); build_sched_groups(&d, SD_LV_MC, cpu_map, i); } @@ -7133,6 +7198,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map, init_sched_groups_power(i, sd); } #endif +#ifdef CONFIG_SCHED_BOOK + for_each_cpu(i, cpu_map) { + sd = &per_cpu(book_domains, i).sd; + init_sched_groups_power(i, sd); + } +#endif for_each_cpu(i, cpu_map) { sd = &per_cpu(phys_domains, i).sd; @@ -7158,6 +7229,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map, sd = &per_cpu(cpu_domains, i).sd; #elif defined(CONFIG_SCHED_MC) sd = &per_cpu(core_domains, i).sd; +#elif defined(CONFIG_SCHED_BOOK) + sd = &per_cpu(book_domains, i).sd; #else sd = &per_cpu(phys_domains, i).sd; #endif -- cgit v1.2.3 From 51b0fe39549a04858001922919ab355dee9bdfcf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 13:35:57 +0200 Subject: perf: Deconstify struct pmu sed -ie 's/const struct pmu\>/struct pmu/g' `git grep -l "const struct pmu\>"` Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/alpha/kernel/perf_event.c | 4 ++-- arch/arm/kernel/perf_event.c | 2 +- arch/powerpc/kernel/perf_event.c | 8 ++++---- arch/powerpc/kernel/perf_event_fsl_emb.c | 2 +- arch/sh/kernel/perf_event.c | 4 ++-- arch/sparc/kernel/perf_event.c | 10 +++++----- arch/x86/kernel/cpu/perf_event.c | 14 +++++++------- include/linux/perf_event.h | 10 +++++----- kernel/perf_event.c | 26 +++++++++++++------------- 9 files changed, 40 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 51c39fa41693..56fa41590381 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -642,7 +642,7 @@ static int __hw_perf_event_init(struct perf_event *event) return 0; } -static const struct pmu pmu = { +static struct pmu pmu = { .enable = alpha_pmu_enable, .disable = alpha_pmu_disable, .read = alpha_pmu_read, @@ -653,7 +653,7 @@ static const struct pmu pmu = { /* * Main entry point to initialise a HW performance event. */ -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { int err; diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 64ca8c3ab94b..0671e92c5111 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -491,7 +491,7 @@ __hw_perf_event_init(struct perf_event *event) return err; } -const struct pmu * +struct pmu * hw_perf_event_init(struct perf_event *event) { int err = 0; diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index d301a30445e0..5f78681ad902 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -857,7 +857,7 @@ static void power_pmu_unthrottle(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -void power_pmu_start_txn(const struct pmu *pmu) +void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -870,7 +870,7 @@ void power_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -void power_pmu_cancel_txn(const struct pmu *pmu) +void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -882,7 +882,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -int power_pmu_commit_txn(const struct pmu *pmu) +int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; @@ -1014,7 +1014,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { u64 ev; unsigned long flags; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 1ba45471ae43..d7619b5e7a6e 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -428,7 +428,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { u64 ev; struct perf_event *events[MAX_HWEVENTS]; diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 7a3dc3567258..395572c94c6a 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -257,13 +257,13 @@ static void sh_pmu_read(struct perf_event *event) sh_perf_event_update(event, &event->hw, event->hw.idx); } -static const struct pmu pmu = { +static struct pmu pmu = { .enable = sh_pmu_enable, .disable = sh_pmu_disable, .read = sh_pmu_read, }; -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { int err = __hw_perf_event_init(event); if (unlikely(err)) { diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 4bc402938575..481b894a5018 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1099,7 +1099,7 @@ static int __hw_perf_event_init(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void sparc_pmu_start_txn(const struct pmu *pmu) +static void sparc_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -1111,7 +1111,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -static void sparc_pmu_cancel_txn(const struct pmu *pmu) +static void sparc_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -1123,7 +1123,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -static int sparc_pmu_commit_txn(const struct pmu *pmu) +static int sparc_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int n; @@ -1142,7 +1142,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu) return 0; } -static const struct pmu pmu = { +static struct pmu pmu = { .enable = sparc_pmu_enable, .disable = sparc_pmu_disable, .read = sparc_pmu_read, @@ -1152,7 +1152,7 @@ static const struct pmu pmu = { .commit_txn = sparc_pmu_commit_txn, }; -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { int err = __hw_perf_event_init(event); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index de6569c04cd0..fdd97f2e9961 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -618,7 +618,7 @@ static void x86_pmu_enable_all(int added) } } -static const struct pmu pmu; +static struct pmu pmu; static inline int is_x86_event(struct perf_event *event) { @@ -1427,7 +1427,7 @@ static inline void x86_pmu_read(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void x86_pmu_start_txn(const struct pmu *pmu) +static void x86_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1440,7 +1440,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -static void x86_pmu_cancel_txn(const struct pmu *pmu) +static void x86_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1457,7 +1457,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -static int x86_pmu_commit_txn(const struct pmu *pmu) +static int x86_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int assign[X86_PMC_IDX_MAX]; @@ -1483,7 +1483,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) return 0; } -static const struct pmu pmu = { +static struct pmu pmu = { .enable = x86_pmu_enable, .disable = x86_pmu_disable, .start = x86_pmu_start, @@ -1569,9 +1569,9 @@ out: return ret; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +struct pmu *hw_perf_event_init(struct perf_event *event) { - const struct pmu *tmp; + struct pmu *tmp; int err; err = __hw_perf_event_init(event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 000610c4de71..09d048b52115 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -578,19 +578,19 @@ struct pmu { * Start the transaction, after this ->enable() doesn't need * to do schedulability tests. */ - void (*start_txn) (const struct pmu *pmu); + void (*start_txn) (struct pmu *pmu); /* * If ->start_txn() disabled the ->enable() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ - int (*commit_txn) (const struct pmu *pmu); + int (*commit_txn) (struct pmu *pmu); /* * Will cancel the transaction, assumes ->disable() is called for * each successfull ->enable() during the transaction. */ - void (*cancel_txn) (const struct pmu *pmu); + void (*cancel_txn) (struct pmu *pmu); }; /** @@ -669,7 +669,7 @@ struct perf_event { int nr_siblings; int group_flags; struct perf_event *group_leader; - const struct pmu *pmu; + struct pmu *pmu; enum perf_event_active_state state; unsigned int attach_state; @@ -849,7 +849,7 @@ struct perf_output_handle { */ extern int perf_max_events; -extern const struct pmu *hw_perf_event_init(struct perf_event *event); +extern struct pmu *hw_perf_event_init(struct perf_event *event); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2d74f31220ad..fb46fd13f31f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -75,7 +75,7 @@ static DEFINE_SPINLOCK(perf_resource_lock); /* * Architecture provided APIs - weak aliases: */ -extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) +extern __weak struct pmu *hw_perf_event_init(struct perf_event *event) { return NULL; } @@ -691,7 +691,7 @@ group_sched_in(struct perf_event *group_event, struct perf_event_context *ctx) { struct perf_event *event, *partial_group = NULL; - const struct pmu *pmu = group_event->pmu; + struct pmu *pmu = group_event->pmu; bool txn = false; if (group_event->state == PERF_EVENT_STATE_OFF) @@ -4501,7 +4501,7 @@ static int perf_swevent_int(struct perf_event *event) return 0; } -static const struct pmu perf_ops_generic = { +static struct pmu perf_ops_generic = { .enable = perf_swevent_enable, .disable = perf_swevent_disable, .start = perf_swevent_int, @@ -4614,7 +4614,7 @@ static void cpu_clock_perf_event_read(struct perf_event *event) cpu_clock_perf_event_update(event); } -static const struct pmu perf_ops_cpu_clock = { +static struct pmu perf_ops_cpu_clock = { .enable = cpu_clock_perf_event_enable, .disable = cpu_clock_perf_event_disable, .read = cpu_clock_perf_event_read, @@ -4671,7 +4671,7 @@ static void task_clock_perf_event_read(struct perf_event *event) task_clock_perf_event_update(event, time); } -static const struct pmu perf_ops_task_clock = { +static struct pmu perf_ops_task_clock = { .enable = task_clock_perf_event_enable, .disable = task_clock_perf_event_disable, .read = task_clock_perf_event_read, @@ -4785,7 +4785,7 @@ static int swevent_hlist_get(struct perf_event *event) #ifdef CONFIG_EVENT_TRACING -static const struct pmu perf_ops_tracepoint = { +static struct pmu perf_ops_tracepoint = { .enable = perf_trace_enable, .disable = perf_trace_disable, .start = perf_swevent_int, @@ -4849,7 +4849,7 @@ static void tp_perf_event_destroy(struct perf_event *event) perf_trace_destroy(event); } -static const struct pmu *tp_perf_event_init(struct perf_event *event) +static struct pmu *tp_perf_event_init(struct perf_event *event) { int err; @@ -4896,7 +4896,7 @@ static void perf_event_free_filter(struct perf_event *event) #else -static const struct pmu *tp_perf_event_init(struct perf_event *event) +static struct pmu *tp_perf_event_init(struct perf_event *event) { return NULL; } @@ -4918,7 +4918,7 @@ static void bp_perf_event_destroy(struct perf_event *event) release_bp_slot(event); } -static const struct pmu *bp_perf_event_init(struct perf_event *bp) +static struct pmu *bp_perf_event_init(struct perf_event *bp) { int err; @@ -4942,7 +4942,7 @@ void perf_bp_event(struct perf_event *bp, void *data) perf_swevent_add(bp, 1, 1, &sample, regs); } #else -static const struct pmu *bp_perf_event_init(struct perf_event *bp) +static struct pmu *bp_perf_event_init(struct perf_event *bp) { return NULL; } @@ -4964,9 +4964,9 @@ static void sw_perf_event_destroy(struct perf_event *event) swevent_hlist_put(event); } -static const struct pmu *sw_perf_event_init(struct perf_event *event) +static struct pmu *sw_perf_event_init(struct perf_event *event) { - const struct pmu *pmu = NULL; + struct pmu *pmu = NULL; u64 event_id = event->attr.config; /* @@ -5028,7 +5028,7 @@ perf_event_alloc(struct perf_event_attr *attr, perf_overflow_handler_t overflow_handler, gfp_t gfpflags) { - const struct pmu *pmu; + struct pmu *pmu; struct perf_event *event; struct hw_perf_event *hwc; long err; -- cgit v1.2.3 From b0a873ebbf87bf38bf70b5e39a7cadc96099fa13 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 13:35:08 +0200 Subject: perf: Register PMU implementations Simple registration interface for struct pmu, this provides the infrastructure for removing all the weak functions. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/alpha/kernel/perf_event.c | 37 +- arch/arm/kernel/perf_event.c | 38 +- arch/powerpc/kernel/perf_event.c | 46 +-- arch/powerpc/kernel/perf_event_fsl_emb.c | 37 +- arch/sh/kernel/perf_event.c | 35 +- arch/sparc/kernel/perf_event.c | 29 +- arch/x86/kernel/cpu/perf_event.c | 45 ++- include/linux/perf_event.h | 10 +- kernel/hw_breakpoint.c | 35 +- kernel/perf_event.c | 588 +++++++++++++++---------------- 10 files changed, 488 insertions(+), 412 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 56fa41590381..19660b5c298f 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -642,34 +642,39 @@ static int __hw_perf_event_init(struct perf_event *event) return 0; } -static struct pmu pmu = { - .enable = alpha_pmu_enable, - .disable = alpha_pmu_disable, - .read = alpha_pmu_read, - .unthrottle = alpha_pmu_unthrottle, -}; - - /* * Main entry point to initialise a HW performance event. */ -struct pmu *hw_perf_event_init(struct perf_event *event) +static int alpha_pmu_event_init(struct perf_event *event) { int err; + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + if (!alpha_pmu) - return ERR_PTR(-ENODEV); + return -ENODEV; /* Do the real initialisation work. */ err = __hw_perf_event_init(event); - if (err) - return ERR_PTR(err); - - return &pmu; + return err; } - +static struct pmu pmu = { + .event_init = alpha_pmu_event_init, + .enable = alpha_pmu_enable, + .disable = alpha_pmu_disable, + .read = alpha_pmu_read, + .unthrottle = alpha_pmu_unthrottle, +}; /* * Main entry point - enable HW performance counters. @@ -838,5 +843,7 @@ void __init init_hw_perf_events(void) /* And set up PMU specification */ alpha_pmu = &ev67_pmu; perf_max_events = alpha_pmu->num_pmcs; + + perf_pmu_register(&pmu); } diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 0671e92c5111..f62f9db35db3 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -306,12 +306,7 @@ out: return err; } -static struct pmu pmu = { - .enable = armpmu_enable, - .disable = armpmu_disable, - .unthrottle = armpmu_unthrottle, - .read = armpmu_read, -}; +static struct pmu pmu; static int validate_event(struct cpu_hw_events *cpuc, @@ -491,20 +486,29 @@ __hw_perf_event_init(struct perf_event *event) return err; } -struct pmu * -hw_perf_event_init(struct perf_event *event) +static int armpmu_event_init(struct perf_event *event) { int err = 0; + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + if (!armpmu) - return ERR_PTR(-ENODEV); + return -ENODEV; event->destroy = hw_perf_event_destroy; if (!atomic_inc_not_zero(&active_events)) { if (atomic_read(&active_events) > perf_max_events) { atomic_dec(&active_events); - return ERR_PTR(-ENOSPC); + return -ENOSPC; } mutex_lock(&pmu_reserve_mutex); @@ -518,15 +522,23 @@ hw_perf_event_init(struct perf_event *event) } if (err) - return ERR_PTR(err); + return err; err = __hw_perf_event_init(event); if (err) hw_perf_event_destroy(event); - return err ? ERR_PTR(err) : &pmu; + return err; } +static struct pmu pmu = { + .event_init = armpmu_event_init, + .enable = armpmu_enable, + .disable = armpmu_disable, + .unthrottle = armpmu_unthrottle, + .read = armpmu_read, +}; + void hw_perf_enable(void) { @@ -2994,6 +3006,8 @@ init_hw_perf_events(void) perf_max_events = -1; } + perf_pmu_register(&pmu); + return 0; } arch_initcall(init_hw_perf_events); diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 5f78681ad902..19131b2614b9 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -904,16 +904,6 @@ int power_pmu_commit_txn(struct pmu *pmu) return 0; } -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, - .start_txn = power_pmu_start_txn, - .cancel_txn = power_pmu_cancel_txn, - .commit_txn = power_pmu_commit_txn, -}; - /* * Return 1 if we might be able to put event on a limited PMC, * or 0 if not. @@ -1014,7 +1004,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -struct pmu *hw_perf_event_init(struct perf_event *event) +static int power_pmu_event_init(struct perf_event *event) { u64 ev; unsigned long flags; @@ -1026,25 +1016,27 @@ struct pmu *hw_perf_event_init(struct perf_event *event) struct cpu_hw_events *cpuhw; if (!ppmu) - return ERR_PTR(-ENXIO); + return -ENOENT; + switch (event->attr.type) { case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: ev = event->attr.config; break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } + event->hw.config_base = ev; event->hw.idx = 0; @@ -1081,7 +1073,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) */ ev = normal_pmc_alternative(ev, flags); if (!ev) - return ERR_PTR(-EINVAL); + return -EINVAL; } } @@ -1095,19 +1087,19 @@ struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, ctrs, events, cflags); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } events[n] = ev; ctrs[n] = event; cflags[n] = flags; if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); + return -EINVAL; cpuhw = &get_cpu_var(cpu_hw_events); err = power_check_constraints(cpuhw, events, cflags, n + 1); put_cpu_var(cpu_hw_events); if (err) - return ERR_PTR(-EINVAL); + return -EINVAL; event->hw.config = events[n]; event->hw.event_base = cflags[n]; @@ -1132,11 +1124,20 @@ struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &power_pmu; + return err; } +struct pmu power_pmu = { + .event_init = power_pmu_event_init, + .enable = power_pmu_enable, + .disable = power_pmu_disable, + .read = power_pmu_read, + .unthrottle = power_pmu_unthrottle, + .start_txn = power_pmu_start_txn, + .cancel_txn = power_pmu_cancel_txn, + .commit_txn = power_pmu_commit_txn, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -1342,6 +1343,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ + perf_pmu_register(&power_pmu); perf_cpu_notifier(power_pmu_notifier); return 0; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index d7619b5e7a6e..ea6a804e43fd 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -378,13 +378,6 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event) local_irq_restore(flags); } -static struct pmu fsl_emb_pmu = { - .enable = fsl_emb_pmu_enable, - .disable = fsl_emb_pmu_disable, - .read = fsl_emb_pmu_read, - .unthrottle = fsl_emb_pmu_unthrottle, -}; - /* * Release the PMU if this is the last perf_event. */ @@ -428,7 +421,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -struct pmu *hw_perf_event_init(struct perf_event *event) +static int fsl_emb_pmu_event_init(struct perf_event *event) { u64 ev; struct perf_event *events[MAX_HWEVENTS]; @@ -441,14 +434,14 @@ struct pmu *hw_perf_event_init(struct perf_event *event) case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: @@ -456,12 +449,12 @@ struct pmu *hw_perf_event_init(struct perf_event *event) break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } event->hw.config = ppmu->xlate_event(ev); if (!(event->hw.config & FSL_EMB_EVENT_VALID)) - return ERR_PTR(-EINVAL); + return -EINVAL; /* * If this is in a group, check if it can go on with all the @@ -473,7 +466,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, events); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { @@ -484,7 +477,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) } if (num_restricted >= ppmu->n_restricted) - return ERR_PTR(-EINVAL); + return -EINVAL; } event->hw.idx = -1; @@ -497,7 +490,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) if (event->attr.exclude_kernel) event->hw.config_base |= PMLCA_FCS; if (event->attr.exclude_idle) - return ERR_PTR(-ENOTSUPP); + return -ENOTSUPP; event->hw.last_period = event->hw.sample_period; local64_set(&event->hw.period_left, event->hw.last_period); @@ -523,11 +516,17 @@ struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &fsl_emb_pmu; + return err; } +static struct pmu fsl_emb_pmu = { + .event_init = fsl_emb_pmu_event_init, + .enable = fsl_emb_pmu_enable, + .disable = fsl_emb_pmu_disable, + .read = fsl_emb_pmu_read, + .unthrottle = fsl_emb_pmu_unthrottle, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -651,5 +650,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); + perf_pmu_register(&fsl_emb_pmu); + return 0; } diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 395572c94c6a..8cb206597e0c 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -257,26 +257,38 @@ static void sh_pmu_read(struct perf_event *event) sh_perf_event_update(event, &event->hw, event->hw.idx); } -static struct pmu pmu = { - .enable = sh_pmu_enable, - .disable = sh_pmu_disable, - .read = sh_pmu_read, -}; - -struct pmu *hw_perf_event_init(struct perf_event *event) +static int sh_pmu_event_init(struct perf_event *event) { - int err = __hw_perf_event_init(event); + int err; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_HARDWARE: + err = __hw_perf_event_init(event); + break; + + default: + return -ENOENT; + } + if (unlikely(err)) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; } +static struct pmu pmu = { + .event_init = sh_pmu_event_init, + .enable = sh_pmu_enable, + .disable = sh_pmu_disable, + .read = sh_pmu_read, +}; + static void sh_pmu_setup(int cpu) -{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); memset(cpuhw, 0, sizeof(struct cpu_hw_events)); @@ -325,6 +337,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *pmu) WARN_ON(pmu->num_events > MAX_HWEVENTS); + perf_pmu_register(&pmu); perf_cpu_notifier(sh_pmu_notifier); return 0; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 481b894a5018..bed4327f5a7a 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1025,7 +1025,7 @@ out: return ret; } -static int __hw_perf_event_init(struct perf_event *event) +static int sparc_pmu_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; struct perf_event *evts[MAX_HWEVENTS]; @@ -1038,17 +1038,27 @@ static int __hw_perf_event_init(struct perf_event *event) if (atomic_read(&nmi_active) < 0) return -ENODEV; - if (attr->type == PERF_TYPE_HARDWARE) { + switch (attr->type) { + case PERF_TYPE_HARDWARE: if (attr->config >= sparc_pmu->max_events) return -EINVAL; pmap = sparc_pmu->event_map(attr->config); - } else if (attr->type == PERF_TYPE_HW_CACHE) { + break; + + case PERF_TYPE_HW_CACHE: pmap = sparc_map_cache_event(attr->config); if (IS_ERR(pmap)) return PTR_ERR(pmap); - } else + break; + + case PERF_TYPE_RAW: return -EOPNOTSUPP; + default: + return -ENOENT; + + } + /* We save the enable bits in the config_base. */ hwc->config_base = sparc_pmu->irq_bit; if (!attr->exclude_user) @@ -1143,6 +1153,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) } static struct pmu pmu = { + .event_init = sparc_pmu_event_init, .enable = sparc_pmu_enable, .disable = sparc_pmu_disable, .read = sparc_pmu_read, @@ -1152,15 +1163,6 @@ static struct pmu pmu = { .commit_txn = sparc_pmu_commit_txn, }; -struct pmu *hw_perf_event_init(struct perf_event *event) -{ - int err = __hw_perf_event_init(event); - - if (err) - return ERR_PTR(err); - return &pmu; -} - void perf_event_print_debug(void) { unsigned long flags; @@ -1280,6 +1282,7 @@ void __init init_hw_perf_events(void) /* All sparc64 PMUs currently have 2 events. */ perf_max_events = 2; + perf_pmu_register(&pmu); register_die_notifier(&perf_event_nmi_notifier); } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fdd97f2e9961..2c89264ee791 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -530,7 +530,7 @@ static int x86_pmu_hw_config(struct perf_event *event) /* * Setup the hardware configuration for a given attr_type */ -static int __hw_perf_event_init(struct perf_event *event) +static int __x86_pmu_event_init(struct perf_event *event) { int err; @@ -1414,6 +1414,7 @@ void __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); + perf_pmu_register(&pmu); perf_cpu_notifier(x86_pmu_notifier); } @@ -1483,18 +1484,6 @@ static int x86_pmu_commit_txn(struct pmu *pmu) return 0; } -static struct pmu pmu = { - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, - .start = x86_pmu_start, - .stop = x86_pmu_stop, - .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, - .start_txn = x86_pmu_start_txn, - .cancel_txn = x86_pmu_cancel_txn, - .commit_txn = x86_pmu_commit_txn, -}; - /* * validate that we can schedule this event */ @@ -1569,12 +1558,22 @@ out: return ret; } -struct pmu *hw_perf_event_init(struct perf_event *event) +int x86_pmu_event_init(struct perf_event *event) { struct pmu *tmp; int err; - err = __hw_perf_event_init(event); + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + + err = __x86_pmu_event_init(event); if (!err) { /* * we temporarily connect event to its pmu @@ -1594,12 +1593,24 @@ struct pmu *hw_perf_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; } +static struct pmu pmu = { + .event_init = x86_pmu_event_init, + .enable = x86_pmu_enable, + .disable = x86_pmu_disable, + .start = x86_pmu_start, + .stop = x86_pmu_stop, + .read = x86_pmu_read, + .unthrottle = x86_pmu_unthrottle, + .start_txn = x86_pmu_start_txn, + .cancel_txn = x86_pmu_cancel_txn, + .commit_txn = x86_pmu_commit_txn, +}; + /* * callchain support */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 09d048b52115..ab72f56eb372 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -561,6 +561,13 @@ struct perf_event; * struct pmu - generic performance monitoring unit */ struct pmu { + struct list_head entry; + + /* + * Should return -ENOENT when the @event doesn't match this pmu + */ + int (*event_init) (struct perf_event *event); + int (*enable) (struct perf_event *event); void (*disable) (struct perf_event *event); int (*start) (struct perf_event *event); @@ -849,7 +856,8 @@ struct perf_output_handle { */ extern int perf_max_events; -extern struct pmu *hw_perf_event_init(struct perf_event *event); +extern int perf_pmu_register(struct pmu *pmu); +extern void perf_pmu_unregister(struct pmu *pmu); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index d71a987fd2bf..e9c5cfa1fd20 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -565,6 +565,34 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { .priority = 0x7fffffff }; +static void bp_perf_event_destroy(struct perf_event *event) +{ + release_bp_slot(event); +} + +static int hw_breakpoint_event_init(struct perf_event *bp) +{ + int err; + + if (bp->attr.type != PERF_TYPE_BREAKPOINT) + return -ENOENT; + + err = register_perf_hw_breakpoint(bp); + if (err) + return err; + + bp->destroy = bp_perf_event_destroy; + + return 0; +} + +static struct pmu perf_breakpoint = { + .event_init = hw_breakpoint_event_init, + .enable = arch_install_hw_breakpoint, + .disable = arch_uninstall_hw_breakpoint, + .read = hw_breakpoint_pmu_read, +}; + static int __init init_hw_breakpoint(void) { unsigned int **task_bp_pinned; @@ -586,6 +614,8 @@ static int __init init_hw_breakpoint(void) constraints_initialized = 1; + perf_pmu_register(&perf_breakpoint); + return register_die_notifier(&hw_breakpoint_exceptions_nb); err_alloc: @@ -601,8 +631,3 @@ static int __init init_hw_breakpoint(void) core_initcall(init_hw_breakpoint); -struct pmu perf_ops_bp = { - .enable = arch_install_hw_breakpoint, - .disable = arch_uninstall_hw_breakpoint, - .read = hw_breakpoint_pmu_read, -}; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index fb46fd13f31f..288ce43de57c 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -31,7 +31,6 @@ #include #include #include -#include #include @@ -72,14 +71,6 @@ static atomic64_t perf_event_id; */ static DEFINE_SPINLOCK(perf_resource_lock); -/* - * Architecture provided APIs - weak aliases: - */ -extern __weak struct pmu *hw_perf_event_init(struct perf_event *event) -{ - return NULL; -} - void __weak hw_perf_disable(void) { barrier(); } void __weak hw_perf_enable(void) { barrier(); } @@ -4501,182 +4492,6 @@ static int perf_swevent_int(struct perf_event *event) return 0; } -static struct pmu perf_ops_generic = { - .enable = perf_swevent_enable, - .disable = perf_swevent_disable, - .start = perf_swevent_int, - .stop = perf_swevent_void, - .read = perf_swevent_read, - .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ -}; - -/* - * hrtimer based swevent callback - */ - -static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) -{ - enum hrtimer_restart ret = HRTIMER_RESTART; - struct perf_sample_data data; - struct pt_regs *regs; - struct perf_event *event; - u64 period; - - event = container_of(hrtimer, struct perf_event, hw.hrtimer); - event->pmu->read(event); - - perf_sample_data_init(&data, 0); - data.period = event->hw.last_period; - regs = get_irq_regs(); - - if (regs && !perf_exclude_event(event, regs)) { - if (!(event->attr.exclude_idle && current->pid == 0)) - if (perf_event_overflow(event, 0, &data, regs)) - ret = HRTIMER_NORESTART; - } - - period = max_t(u64, 10000, event->hw.sample_period); - hrtimer_forward_now(hrtimer, ns_to_ktime(period)); - - return ret; -} - -static void perf_swevent_start_hrtimer(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swevent_hrtimer; - if (hwc->sample_period) { - u64 period; - - if (hwc->remaining) { - if (hwc->remaining < 0) - period = 10000; - else - period = hwc->remaining; - hwc->remaining = 0; - } else { - period = max_t(u64, 10000, hwc->sample_period); - } - __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); - } -} - -static void perf_swevent_cancel_hrtimer(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - if (hwc->sample_period) { - ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); - hwc->remaining = ktime_to_ns(remaining); - - hrtimer_cancel(&hwc->hrtimer); - } -} - -/* - * Software event: cpu wall time clock - */ - -static void cpu_clock_perf_event_update(struct perf_event *event) -{ - int cpu = raw_smp_processor_id(); - s64 prev; - u64 now; - - now = cpu_clock(cpu); - prev = local64_xchg(&event->hw.prev_count, now); - local64_add(now - prev, &event->count); -} - -static int cpu_clock_perf_event_enable(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int cpu = raw_smp_processor_id(); - - local64_set(&hwc->prev_count, cpu_clock(cpu)); - perf_swevent_start_hrtimer(event); - - return 0; -} - -static void cpu_clock_perf_event_disable(struct perf_event *event) -{ - perf_swevent_cancel_hrtimer(event); - cpu_clock_perf_event_update(event); -} - -static void cpu_clock_perf_event_read(struct perf_event *event) -{ - cpu_clock_perf_event_update(event); -} - -static struct pmu perf_ops_cpu_clock = { - .enable = cpu_clock_perf_event_enable, - .disable = cpu_clock_perf_event_disable, - .read = cpu_clock_perf_event_read, -}; - -/* - * Software event: task time clock - */ - -static void task_clock_perf_event_update(struct perf_event *event, u64 now) -{ - u64 prev; - s64 delta; - - prev = local64_xchg(&event->hw.prev_count, now); - delta = now - prev; - local64_add(delta, &event->count); -} - -static int task_clock_perf_event_enable(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - u64 now; - - now = event->ctx->time; - - local64_set(&hwc->prev_count, now); - - perf_swevent_start_hrtimer(event); - - return 0; -} - -static void task_clock_perf_event_disable(struct perf_event *event) -{ - perf_swevent_cancel_hrtimer(event); - task_clock_perf_event_update(event, event->ctx->time); - -} - -static void task_clock_perf_event_read(struct perf_event *event) -{ - u64 time; - - if (!in_nmi()) { - update_context_time(event->ctx); - time = event->ctx->time; - } else { - u64 now = perf_clock(); - u64 delta = now - event->ctx->timestamp; - time = event->ctx->time + delta; - } - - task_clock_perf_event_update(event, time); -} - -static struct pmu perf_ops_task_clock = { - .enable = task_clock_perf_event_enable, - .disable = task_clock_perf_event_disable, - .read = task_clock_perf_event_read, -}; - /* Deref the hlist from the update side */ static inline struct swevent_hlist * swevent_hlist_deref(struct perf_cpu_context *cpuctx) @@ -4783,17 +4598,63 @@ static int swevent_hlist_get(struct perf_event *event) return err; } -#ifdef CONFIG_EVENT_TRACING +atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; -static struct pmu perf_ops_tracepoint = { - .enable = perf_trace_enable, - .disable = perf_trace_disable, +static void sw_perf_event_destroy(struct perf_event *event) +{ + u64 event_id = event->attr.config; + + WARN_ON(event->parent); + + atomic_dec(&perf_swevent_enabled[event_id]); + swevent_hlist_put(event); +} + +static int perf_swevent_init(struct perf_event *event) +{ + int event_id = event->attr.config; + + if (event->attr.type != PERF_TYPE_SOFTWARE) + return -ENOENT; + + switch (event_id) { + case PERF_COUNT_SW_CPU_CLOCK: + case PERF_COUNT_SW_TASK_CLOCK: + return -ENOENT; + + default: + break; + } + + if (event_id > PERF_COUNT_SW_MAX) + return -ENOENT; + + if (!event->parent) { + int err; + + err = swevent_hlist_get(event); + if (err) + return err; + + atomic_inc(&perf_swevent_enabled[event_id]); + event->destroy = sw_perf_event_destroy; + } + + return 0; +} + +static struct pmu perf_swevent = { + .event_init = perf_swevent_init, + .enable = perf_swevent_enable, + .disable = perf_swevent_disable, .start = perf_swevent_int, .stop = perf_swevent_void, .read = perf_swevent_read, - .unthrottle = perf_swevent_void, + .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ }; +#ifdef CONFIG_EVENT_TRACING + static int perf_tp_filter_match(struct perf_event *event, struct perf_sample_data *data) { @@ -4849,10 +4710,13 @@ static void tp_perf_event_destroy(struct perf_event *event) perf_trace_destroy(event); } -static struct pmu *tp_perf_event_init(struct perf_event *event) +static int perf_tp_event_init(struct perf_event *event) { int err; + if (event->attr.type != PERF_TYPE_TRACEPOINT) + return -ENOENT; + /* * Raw tracepoint data is a severe data leak, only allow root to * have these. @@ -4860,15 +4724,30 @@ static struct pmu *tp_perf_event_init(struct perf_event *event) if ((event->attr.sample_type & PERF_SAMPLE_RAW) && perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); + return -EPERM; err = perf_trace_init(event); if (err) - return NULL; + return err; event->destroy = tp_perf_event_destroy; - return &perf_ops_tracepoint; + return 0; +} + +static struct pmu perf_tracepoint = { + .event_init = perf_tp_event_init, + .enable = perf_trace_enable, + .disable = perf_trace_disable, + .start = perf_swevent_int, + .stop = perf_swevent_void, + .read = perf_swevent_read, + .unthrottle = perf_swevent_void, +}; + +static inline void perf_tp_register(void) +{ + perf_pmu_register(&perf_tracepoint); } static int perf_event_set_filter(struct perf_event *event, void __user *arg) @@ -4896,9 +4775,8 @@ static void perf_event_free_filter(struct perf_event *event) #else -static struct pmu *tp_perf_event_init(struct perf_event *event) +static inline void perf_tp_register(void) { - return NULL; } static int perf_event_set_filter(struct perf_event *event, void __user *arg) @@ -4913,105 +4791,247 @@ static void perf_event_free_filter(struct perf_event *event) #endif /* CONFIG_EVENT_TRACING */ #ifdef CONFIG_HAVE_HW_BREAKPOINT -static void bp_perf_event_destroy(struct perf_event *event) +void perf_bp_event(struct perf_event *bp, void *data) { - release_bp_slot(event); + struct perf_sample_data sample; + struct pt_regs *regs = data; + + perf_sample_data_init(&sample, bp->attr.bp_addr); + + if (!perf_exclude_event(bp, regs)) + perf_swevent_add(bp, 1, 1, &sample, regs); } +#endif + +/* + * hrtimer based swevent callback + */ -static struct pmu *bp_perf_event_init(struct perf_event *bp) +static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) { - int err; + enum hrtimer_restart ret = HRTIMER_RESTART; + struct perf_sample_data data; + struct pt_regs *regs; + struct perf_event *event; + u64 period; - err = register_perf_hw_breakpoint(bp); - if (err) - return ERR_PTR(err); + event = container_of(hrtimer, struct perf_event, hw.hrtimer); + event->pmu->read(event); + + perf_sample_data_init(&data, 0); + data.period = event->hw.last_period; + regs = get_irq_regs(); + + if (regs && !perf_exclude_event(event, regs)) { + if (!(event->attr.exclude_idle && current->pid == 0)) + if (perf_event_overflow(event, 0, &data, regs)) + ret = HRTIMER_NORESTART; + } - bp->destroy = bp_perf_event_destroy; + period = max_t(u64, 10000, event->hw.sample_period); + hrtimer_forward_now(hrtimer, ns_to_ktime(period)); - return &perf_ops_bp; + return ret; } -void perf_bp_event(struct perf_event *bp, void *data) +static void perf_swevent_start_hrtimer(struct perf_event *event) { - struct perf_sample_data sample; - struct pt_regs *regs = data; + struct hw_perf_event *hwc = &event->hw; - perf_sample_data_init(&sample, bp->attr.bp_addr); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swevent_hrtimer; + if (hwc->sample_period) { + u64 period; - if (!perf_exclude_event(bp, regs)) - perf_swevent_add(bp, 1, 1, &sample, regs); + if (hwc->remaining) { + if (hwc->remaining < 0) + period = 10000; + else + period = hwc->remaining; + hwc->remaining = 0; + } else { + period = max_t(u64, 10000, hwc->sample_period); + } + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(period), 0, + HRTIMER_MODE_REL, 0); + } } -#else -static struct pmu *bp_perf_event_init(struct perf_event *bp) + +static void perf_swevent_cancel_hrtimer(struct perf_event *event) { - return NULL; + struct hw_perf_event *hwc = &event->hw; + + if (hwc->sample_period) { + ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); + hwc->remaining = ktime_to_ns(remaining); + + hrtimer_cancel(&hwc->hrtimer); + } } -void perf_bp_event(struct perf_event *bp, void *regs) +/* + * Software event: cpu wall time clock + */ + +static void cpu_clock_event_update(struct perf_event *event) { + int cpu = raw_smp_processor_id(); + s64 prev; + u64 now; + + now = cpu_clock(cpu); + prev = local64_xchg(&event->hw.prev_count, now); + local64_add(now - prev, &event->count); } -#endif -atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; +static int cpu_clock_event_enable(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int cpu = raw_smp_processor_id(); -static void sw_perf_event_destroy(struct perf_event *event) + local64_set(&hwc->prev_count, cpu_clock(cpu)); + perf_swevent_start_hrtimer(event); + + return 0; +} + +static void cpu_clock_event_disable(struct perf_event *event) { - u64 event_id = event->attr.config; + perf_swevent_cancel_hrtimer(event); + cpu_clock_event_update(event); +} - WARN_ON(event->parent); +static void cpu_clock_event_read(struct perf_event *event) +{ + cpu_clock_event_update(event); +} - atomic_dec(&perf_swevent_enabled[event_id]); - swevent_hlist_put(event); +static int cpu_clock_event_init(struct perf_event *event) +{ + if (event->attr.type != PERF_TYPE_SOFTWARE) + return -ENOENT; + + if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) + return -ENOENT; + + return 0; } -static struct pmu *sw_perf_event_init(struct perf_event *event) +static struct pmu perf_cpu_clock = { + .event_init = cpu_clock_event_init, + .enable = cpu_clock_event_enable, + .disable = cpu_clock_event_disable, + .read = cpu_clock_event_read, +}; + +/* + * Software event: task time clock + */ + +static void task_clock_event_update(struct perf_event *event, u64 now) { - struct pmu *pmu = NULL; - u64 event_id = event->attr.config; + u64 prev; + s64 delta; - /* - * Software events (currently) can't in general distinguish - * between user, kernel and hypervisor events. - * However, context switches and cpu migrations are considered - * to be kernel events, and page faults are never hypervisor - * events. - */ - switch (event_id) { - case PERF_COUNT_SW_CPU_CLOCK: - pmu = &perf_ops_cpu_clock; + prev = local64_xchg(&event->hw.prev_count, now); + delta = now - prev; + local64_add(delta, &event->count); +} - break; - case PERF_COUNT_SW_TASK_CLOCK: - /* - * If the user instantiates this as a per-cpu event, - * use the cpu_clock event instead. - */ - if (event->ctx->task) - pmu = &perf_ops_task_clock; - else - pmu = &perf_ops_cpu_clock; +static int task_clock_event_enable(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 now; - break; - case PERF_COUNT_SW_PAGE_FAULTS: - case PERF_COUNT_SW_PAGE_FAULTS_MIN: - case PERF_COUNT_SW_PAGE_FAULTS_MAJ: - case PERF_COUNT_SW_CONTEXT_SWITCHES: - case PERF_COUNT_SW_CPU_MIGRATIONS: - case PERF_COUNT_SW_ALIGNMENT_FAULTS: - case PERF_COUNT_SW_EMULATION_FAULTS: - if (!event->parent) { - int err; - - err = swevent_hlist_get(event); - if (err) - return ERR_PTR(err); + now = event->ctx->time; - atomic_inc(&perf_swevent_enabled[event_id]); - event->destroy = sw_perf_event_destroy; + local64_set(&hwc->prev_count, now); + + perf_swevent_start_hrtimer(event); + + return 0; +} + +static void task_clock_event_disable(struct perf_event *event) +{ + perf_swevent_cancel_hrtimer(event); + task_clock_event_update(event, event->ctx->time); + +} + +static void task_clock_event_read(struct perf_event *event) +{ + u64 time; + + if (!in_nmi()) { + update_context_time(event->ctx); + time = event->ctx->time; + } else { + u64 now = perf_clock(); + u64 delta = now - event->ctx->timestamp; + time = event->ctx->time + delta; + } + + task_clock_event_update(event, time); +} + +static int task_clock_event_init(struct perf_event *event) +{ + if (event->attr.type != PERF_TYPE_SOFTWARE) + return -ENOENT; + + if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) + return -ENOENT; + + return 0; +} + +static struct pmu perf_task_clock = { + .event_init = task_clock_event_init, + .enable = task_clock_event_enable, + .disable = task_clock_event_disable, + .read = task_clock_event_read, +}; + +static LIST_HEAD(pmus); +static DEFINE_MUTEX(pmus_lock); +static struct srcu_struct pmus_srcu; + +int perf_pmu_register(struct pmu *pmu) +{ + mutex_lock(&pmus_lock); + list_add_rcu(&pmu->entry, &pmus); + mutex_unlock(&pmus_lock); + + return 0; +} + +void perf_pmu_unregister(struct pmu *pmu) +{ + mutex_lock(&pmus_lock); + list_del_rcu(&pmu->entry); + mutex_unlock(&pmus_lock); + + synchronize_srcu(&pmus_srcu); +} + +struct pmu *perf_init_event(struct perf_event *event) +{ + struct pmu *pmu = NULL; + int idx; + + idx = srcu_read_lock(&pmus_srcu); + list_for_each_entry_rcu(pmu, &pmus, entry) { + int ret = pmu->event_init(event); + if (!ret) + break; + if (ret != -ENOENT) { + pmu = ERR_PTR(ret); + break; } - pmu = &perf_ops_generic; - break; } + srcu_read_unlock(&pmus_srcu, idx); return pmu; } @@ -5092,29 +5112,8 @@ perf_event_alloc(struct perf_event_attr *attr, if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) goto done; - switch (attr->type) { - case PERF_TYPE_RAW: - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - pmu = hw_perf_event_init(event); - break; - - case PERF_TYPE_SOFTWARE: - pmu = sw_perf_event_init(event); - break; - - case PERF_TYPE_TRACEPOINT: - pmu = tp_perf_event_init(event); - break; + pmu = perf_init_event(event); - case PERF_TYPE_BREAKPOINT: - pmu = bp_perf_event_init(event); - break; - - - default: - break; - } done: err = 0; if (!pmu) @@ -5979,22 +5978,15 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } -/* - * This has to have a higher priority than migration_notifier in sched.c. - */ -static struct notifier_block __cpuinitdata perf_cpu_nb = { - .notifier_call = perf_cpu_notify, - .priority = 20, -}; - void __init perf_event_init(void) { perf_event_init_all_cpus(); - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, - (void *)(long)smp_processor_id()); - register_cpu_notifier(&perf_cpu_nb); + init_srcu_struct(&pmus_srcu); + perf_pmu_register(&perf_swevent); + perf_pmu_register(&perf_cpu_clock); + perf_pmu_register(&perf_task_clock); + perf_tp_register(); + perf_cpu_notifier(perf_cpu_notify); } static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, -- cgit v1.2.3 From 24cd7f54a0d47e1d5b3de29e2456bfbd2d8447b7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Jun 2010 17:32:03 +0200 Subject: perf: Reduce perf_disable() usage Since the current perf_disable() usage is only an optimization, remove it for now. This eases the removal of the __weak hw_perf_enable() interface. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/arm/kernel/perf_event.c | 3 +++ arch/powerpc/kernel/perf_event.c | 3 +++ arch/powerpc/kernel/perf_event_fsl_emb.c | 8 +++++-- arch/sh/kernel/perf_event.c | 11 +++++++--- arch/sparc/kernel/perf_event.c | 3 +++ arch/x86/kernel/cpu/perf_event.c | 22 ++++++++++++------- include/linux/perf_event.h | 20 ++++++++--------- kernel/perf_event.c | 37 +------------------------------- 8 files changed, 48 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index f62f9db35db3..afc92c580d18 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -277,6 +277,8 @@ armpmu_enable(struct perf_event *event) int idx; int err = 0; + perf_disable(); + /* If we don't have a space for the counter then finish early. */ idx = armpmu->get_event_idx(cpuc, hwc); if (idx < 0) { @@ -303,6 +305,7 @@ armpmu_enable(struct perf_event *event) perf_event_update_userpage(event); out: + perf_enable(); return err; } diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 19131b2614b9..c1408821dbc2 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -861,6 +861,7 @@ void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + perf_disable(); cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -875,6 +876,7 @@ void power_pmu_cancel_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_enable(); } /* @@ -901,6 +903,7 @@ int power_pmu_commit_txn(struct pmu *pmu) cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_enable(); return 0; } diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index ea6a804e43fd..9bc84a7fd901 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -262,7 +262,7 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -/* perf must be disabled, context locked on entry */ +/* context locked on entry */ static int fsl_emb_pmu_enable(struct perf_event *event) { struct cpu_hw_events *cpuhw; @@ -271,6 +271,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) u64 val; int i; + perf_disable(); cpuhw = &get_cpu_var(cpu_hw_events); if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) @@ -310,15 +311,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ret = 0; out: put_cpu_var(cpu_hw_events); + perf_enable(); return ret; } -/* perf must be disabled, context locked on entry */ +/* context locked on entry */ static void fsl_emb_pmu_disable(struct perf_event *event) { struct cpu_hw_events *cpuhw; int i = event->hw.idx; + perf_disable(); if (i < 0) goto out; @@ -346,6 +349,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event) cpuhw->n_events--; out: + perf_enable(); put_cpu_var(cpu_hw_events); } diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 8cb206597e0c..d042989ceb45 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -230,11 +230,14 @@ static int sh_pmu_enable(struct perf_event *event) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + int ret = -EAGAIN; + + perf_disable(); if (test_and_set_bit(idx, cpuc->used_mask)) { idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); if (idx == sh_pmu->num_events) - return -EAGAIN; + goto out; set_bit(idx, cpuc->used_mask); hwc->idx = idx; @@ -248,8 +251,10 @@ static int sh_pmu_enable(struct perf_event *event) sh_pmu->enable(hwc, idx); perf_event_update_userpage(event); - - return 0; + ret = 0; +out: + perf_enable(); + return ret; } static void sh_pmu_read(struct perf_event *event) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index bed4327f5a7a..d0131deeeaf6 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1113,6 +1113,7 @@ static void sparc_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + perf_disable(); cpuhw->group_flag |= PERF_EVENT_TXN; } @@ -1126,6 +1127,7 @@ static void sparc_pmu_cancel_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_enable(); } /* @@ -1149,6 +1151,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) return -EAGAIN; cpuc->group_flag &= ~PERF_EVENT_TXN; + perf_enable(); return 0; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2c89264ee791..846070ce49c3 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -969,10 +969,11 @@ static int x86_pmu_enable(struct perf_event *event) hwc = &event->hw; + perf_disable(); n0 = cpuc->n_events; - n = collect_events(cpuc, event, false); - if (n < 0) - return n; + ret = n = collect_events(cpuc, event, false); + if (ret < 0) + goto out; /* * If group events scheduling transaction was started, @@ -980,23 +981,26 @@ static int x86_pmu_enable(struct perf_event *event) * at commit time(->commit_txn) as a whole */ if (cpuc->group_flag & PERF_EVENT_TXN) - goto out; + goto done_collect; ret = x86_pmu.schedule_events(cpuc, n, assign); if (ret) - return ret; + goto out; /* * copy new assignment, now we know it is possible * will be used by hw_perf_enable() */ memcpy(cpuc->assign, assign, n*sizeof(int)); -out: +done_collect: cpuc->n_events = n; cpuc->n_added += n - n0; cpuc->n_txn += n - n0; - return 0; + ret = 0; +out: + perf_enable(); + return ret; } static int x86_pmu_start(struct perf_event *event) @@ -1432,6 +1436,7 @@ static void x86_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + perf_disable(); cpuc->group_flag |= PERF_EVENT_TXN; cpuc->n_txn = 0; } @@ -1451,6 +1456,7 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) */ cpuc->n_added -= cpuc->n_txn; cpuc->n_events -= cpuc->n_txn; + perf_enable(); } /* @@ -1480,7 +1486,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) memcpy(cpuc->assign, assign, n*sizeof(int)); cpuc->group_flag &= ~PERF_EVENT_TXN; - + perf_enable(); return 0; } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ab72f56eb372..243286a8ded7 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -564,26 +564,26 @@ struct pmu { struct list_head entry; /* - * Should return -ENOENT when the @event doesn't match this pmu + * Should return -ENOENT when the @event doesn't match this PMU. */ int (*event_init) (struct perf_event *event); - int (*enable) (struct perf_event *event); + int (*enable) (struct perf_event *event); void (*disable) (struct perf_event *event); - int (*start) (struct perf_event *event); + int (*start) (struct perf_event *event); void (*stop) (struct perf_event *event); void (*read) (struct perf_event *event); void (*unthrottle) (struct perf_event *event); /* - * Group events scheduling is treated as a transaction, add group - * events as a whole and perform one schedulability test. If the test - * fails, roll back the whole group + * Group events scheduling is treated as a transaction, add + * group events as a whole and perform one schedulability test. + * If the test fails, roll back the whole group */ /* - * Start the transaction, after this ->enable() doesn't need - * to do schedulability tests. + * Start the transaction, after this ->enable() doesn't need to + * do schedulability tests. */ void (*start_txn) (struct pmu *pmu); /* @@ -594,8 +594,8 @@ struct pmu { */ int (*commit_txn) (struct pmu *pmu); /* - * Will cancel the transaction, assumes ->disable() is called for - * each successfull ->enable() during the transaction. + * Will cancel the transaction, assumes ->disable() is called + * for each successfull ->enable() during the transaction. */ void (*cancel_txn) (struct pmu *pmu); }; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 149ca18371b7..9a98ce953561 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -478,11 +478,6 @@ static void __perf_event_remove_from_context(void *info) return; raw_spin_lock(&ctx->lock); - /* - * Protect the list operation against NMI by disabling the - * events on a global level. - */ - perf_disable(); event_sched_out(event, cpuctx, ctx); @@ -498,7 +493,6 @@ static void __perf_event_remove_from_context(void *info) perf_max_events - perf_reserved_percpu); } - perf_enable(); raw_spin_unlock(&ctx->lock); } @@ -803,12 +797,6 @@ static void __perf_install_in_context(void *info) ctx->is_active = 1; update_context_time(ctx); - /* - * Protect the list operation against NMI by disabling the - * events on a global level. NOP for non NMI based events. - */ - perf_disable(); - add_event_to_ctx(event, ctx); if (event->cpu != -1 && event->cpu != smp_processor_id()) @@ -850,8 +838,6 @@ static void __perf_install_in_context(void *info) cpuctx->max_pertask--; unlock: - perf_enable(); - raw_spin_unlock(&ctx->lock); } @@ -972,12 +958,10 @@ static void __perf_event_enable(void *info) if (!group_can_go_on(event, cpuctx, 1)) { err = -EEXIST; } else { - perf_disable(); if (event == leader) err = group_sched_in(event, cpuctx, ctx); else err = event_sched_in(event, cpuctx, ctx); - perf_enable(); } if (err) { @@ -1090,9 +1074,8 @@ static void ctx_sched_out(struct perf_event_context *ctx, goto out; update_context_time(ctx); - perf_disable(); if (!ctx->nr_active) - goto out_enable; + goto out; if (event_type & EVENT_PINNED) { list_for_each_entry(event, &ctx->pinned_groups, group_entry) @@ -1103,9 +1086,6 @@ static void ctx_sched_out(struct perf_event_context *ctx, list_for_each_entry(event, &ctx->flexible_groups, group_entry) group_sched_out(event, cpuctx, ctx); } - - out_enable: - perf_enable(); out: raw_spin_unlock(&ctx->lock); } @@ -1364,8 +1344,6 @@ ctx_sched_in(struct perf_event_context *ctx, ctx->timestamp = perf_clock(); - perf_disable(); - /* * First go through the list and put on any pinned groups * in order to give them the best chance of going on. @@ -1377,7 +1355,6 @@ ctx_sched_in(struct perf_event_context *ctx, if (event_type & EVENT_FLEXIBLE) ctx_flexible_sched_in(ctx, cpuctx); - perf_enable(); out: raw_spin_unlock(&ctx->lock); } @@ -1425,8 +1402,6 @@ void perf_event_task_sched_in(struct task_struct *task) if (cpuctx->task_ctx == ctx) return; - perf_disable(); - /* * We want to keep the following priority order: * cpu pinned (that don't need to move), task pinned, @@ -1439,8 +1414,6 @@ void perf_event_task_sched_in(struct task_struct *task) ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); cpuctx->task_ctx = ctx; - - perf_enable(); } #define MAX_INTERRUPTS (~0ULL) @@ -1555,11 +1528,9 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) hwc->sample_period = sample_period; if (local64_read(&hwc->period_left) > 8*sample_period) { - perf_disable(); perf_event_stop(event); local64_set(&hwc->period_left, 0); perf_event_start(event); - perf_enable(); } } @@ -1588,15 +1559,12 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) */ if (interrupts == MAX_INTERRUPTS) { perf_log_throttle(event, 1); - perf_disable(); event->pmu->unthrottle(event); - perf_enable(); } if (!event->attr.freq || !event->attr.sample_freq) continue; - perf_disable(); event->pmu->read(event); now = local64_read(&event->count); delta = now - hwc->freq_count_stamp; @@ -1604,7 +1572,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) if (delta > 0) perf_adjust_period(event, TICK_NSEC, delta); - perf_enable(); } raw_spin_unlock(&ctx->lock); } @@ -1647,7 +1614,6 @@ void perf_event_task_tick(struct task_struct *curr) if (!rotate) return; - perf_disable(); cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); if (ctx) task_ctx_sched_out(ctx, EVENT_FLEXIBLE); @@ -1659,7 +1625,6 @@ void perf_event_task_tick(struct task_struct *curr) cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); if (ctx) task_ctx_sched_in(curr, EVENT_FLEXIBLE); - perf_enable(); } static int event_enable_on_exec(struct perf_event *event, -- cgit v1.2.3 From 33696fc0d141bbbcb12f75b69608ea83282e3117 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Jun 2010 08:49:00 +0200 Subject: perf: Per PMU disable Changes perf_disable() into perf_pmu_disable(). Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/alpha/kernel/perf_event.c | 30 +++++++++++++------------ arch/arm/kernel/perf_event.c | 28 +++++++++++------------ arch/powerpc/kernel/perf_event.c | 24 +++++++++++--------- arch/powerpc/kernel/perf_event_fsl_emb.c | 18 ++++++++------- arch/sh/kernel/perf_event.c | 38 +++++++++++++++++--------------- arch/sparc/kernel/perf_event.c | 20 +++++++++-------- arch/x86/kernel/cpu/perf_event.c | 16 ++++++++------ include/linux/perf_event.h | 13 ++++++----- kernel/perf_event.c | 31 ++++++++++++++++---------- 9 files changed, 119 insertions(+), 99 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 19660b5c298f..3e260731f8e6 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -435,7 +435,7 @@ static int alpha_pmu_enable(struct perf_event *event) * nevertheless we disable the PMCs first to enable a potential * final PMI to occur before we disable interrupts. */ - perf_disable(); + perf_pmu_disable(event->pmu); local_irq_save(flags); /* Default to error to be returned */ @@ -456,7 +456,7 @@ static int alpha_pmu_enable(struct perf_event *event) } local_irq_restore(flags); - perf_enable(); + perf_pmu_enable(event->pmu); return ret; } @@ -474,7 +474,7 @@ static void alpha_pmu_disable(struct perf_event *event) unsigned long flags; int j; - perf_disable(); + perf_pmu_disable(event->pmu); local_irq_save(flags); for (j = 0; j < cpuc->n_events; j++) { @@ -502,7 +502,7 @@ static void alpha_pmu_disable(struct perf_event *event) } local_irq_restore(flags); - perf_enable(); + perf_pmu_enable(event->pmu); } @@ -668,18 +668,10 @@ static int alpha_pmu_event_init(struct perf_event *event) return err; } -static struct pmu pmu = { - .event_init = alpha_pmu_event_init, - .enable = alpha_pmu_enable, - .disable = alpha_pmu_disable, - .read = alpha_pmu_read, - .unthrottle = alpha_pmu_unthrottle, -}; - /* * Main entry point - enable HW performance counters. */ -void hw_perf_enable(void) +static void alpha_pmu_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -705,7 +697,7 @@ void hw_perf_enable(void) * Main entry point - disable HW performance counters. */ -void hw_perf_disable(void) +static void alpha_pmu_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -718,6 +710,16 @@ void hw_perf_disable(void) wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); } +static struct pmu pmu = { + .pmu_enable = alpha_pmu_pmu_enable, + .pmu_disable = alpha_pmu_pmu_disable, + .event_init = alpha_pmu_event_init, + .enable = alpha_pmu_enable, + .disable = alpha_pmu_disable, + .read = alpha_pmu_read, + .unthrottle = alpha_pmu_unthrottle, +}; + /* * Main entry point - don't know when this is called but it diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index afc92c580d18..3343f3f4b973 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -277,7 +277,7 @@ armpmu_enable(struct perf_event *event) int idx; int err = 0; - perf_disable(); + perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ idx = armpmu->get_event_idx(cpuc, hwc); @@ -305,7 +305,7 @@ armpmu_enable(struct perf_event *event) perf_event_update_userpage(event); out: - perf_enable(); + perf_pmu_enable(event->pmu); return err; } @@ -534,16 +534,7 @@ static int armpmu_event_init(struct perf_event *event) return err; } -static struct pmu pmu = { - .event_init = armpmu_event_init, - .enable = armpmu_enable, - .disable = armpmu_disable, - .unthrottle = armpmu_unthrottle, - .read = armpmu_read, -}; - -void -hw_perf_enable(void) +static void armpmu_pmu_enable(struct pmu *pmu) { /* Enable all of the perf events on hardware. */ int idx; @@ -564,13 +555,22 @@ hw_perf_enable(void) armpmu->start(); } -void -hw_perf_disable(void) +static void armpmu_pmu_disable(struct pmu *pmu) { if (armpmu) armpmu->stop(); } +static struct pmu pmu = { + .pmu_enable = armpmu_pmu_enable, + .pmu_disable= armpmu_pmu_disable, + .event_init = armpmu_event_init, + .enable = armpmu_enable, + .disable = armpmu_disable, + .unthrottle = armpmu_unthrottle, + .read = armpmu_read, +}; + /* * ARMv6 Performance counter handling code. * diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index c1408821dbc2..deb84bbcb0e6 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -517,7 +517,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void power_pmu_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -565,7 +565,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void power_pmu_pmu_enable(struct pmu *pmu) { struct perf_event *event; struct cpu_hw_events *cpuhw; @@ -735,7 +735,7 @@ static int power_pmu_enable(struct perf_event *event) int ret = -EAGAIN; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); /* * Add the event to the list (if there is room) @@ -769,7 +769,7 @@ nocheck: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } @@ -784,7 +784,7 @@ static void power_pmu_disable(struct perf_event *event) unsigned long flags; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); power_pmu_read(event); @@ -821,7 +821,7 @@ static void power_pmu_disable(struct perf_event *event) cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); } - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -837,7 +837,7 @@ static void power_pmu_unthrottle(struct perf_event *event) if (!event->hw.idx || !event->hw.sample_period) return; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); power_pmu_read(event); left = event->hw.sample_period; event->hw.last_period = left; @@ -848,7 +848,7 @@ static void power_pmu_unthrottle(struct perf_event *event) local64_set(&event->hw.prev_count, val); local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -861,7 +861,7 @@ void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - perf_disable(); + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -876,7 +876,7 @@ void power_pmu_cancel_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; - perf_enable(); + perf_pmu_enable(pmu); } /* @@ -903,7 +903,7 @@ int power_pmu_commit_txn(struct pmu *pmu) cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuhw->group_flag &= ~PERF_EVENT_TXN; - perf_enable(); + perf_pmu_enable(pmu); return 0; } @@ -1131,6 +1131,8 @@ static int power_pmu_event_init(struct perf_event *event) } struct pmu power_pmu = { + .pmu_enable = power_pmu_pmu_enable, + .pmu_disable = power_pmu_pmu_disable, .event_init = power_pmu_event_init, .enable = power_pmu_enable, .disable = power_pmu_disable, diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 9bc84a7fd901..84b1974c628f 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -177,7 +177,7 @@ static void fsl_emb_pmu_read(struct perf_event *event) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void fsl_emb_pmu_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -216,7 +216,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void fsl_emb_pmu_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -271,7 +271,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) u64 val; int i; - perf_disable(); + perf_pmu_disable(event->pmu); cpuhw = &get_cpu_var(cpu_hw_events); if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) @@ -311,7 +311,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ret = 0; out: put_cpu_var(cpu_hw_events); - perf_enable(); + perf_pmu_enable(event->pmu); return ret; } @@ -321,7 +321,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event) struct cpu_hw_events *cpuhw; int i = event->hw.idx; - perf_disable(); + perf_pmu_disable(event->pmu); if (i < 0) goto out; @@ -349,7 +349,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event) cpuhw->n_events--; out: - perf_enable(); + perf_pmu_enable(event->pmu); put_cpu_var(cpu_hw_events); } @@ -367,7 +367,7 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event) if (event->hw.idx < 0 || !event->hw.sample_period) return; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); fsl_emb_pmu_read(event); left = event->hw.sample_period; event->hw.last_period = left; @@ -378,7 +378,7 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event) local64_set(&event->hw.prev_count, val); local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -524,6 +524,8 @@ static int fsl_emb_pmu_event_init(struct perf_event *event) } static struct pmu fsl_emb_pmu = { + .pmu_enable = fsl_emb_pmu_pmu_enable, + .pmu_disable = fsl_emb_pmu_pmu_disable, .event_init = fsl_emb_pmu_event_init, .enable = fsl_emb_pmu_enable, .disable = fsl_emb_pmu_disable, diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index d042989ceb45..4bbe19058a58 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -232,7 +232,7 @@ static int sh_pmu_enable(struct perf_event *event) int idx = hwc->idx; int ret = -EAGAIN; - perf_disable(); + perf_pmu_disable(event->pmu); if (test_and_set_bit(idx, cpuc->used_mask)) { idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); @@ -253,7 +253,7 @@ static int sh_pmu_enable(struct perf_event *event) perf_event_update_userpage(event); ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); return ret; } @@ -285,7 +285,25 @@ static int sh_pmu_event_init(struct perf_event *event) return err; } +static void sh_pmu_pmu_enable(struct pmu *pmu) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->enable_all(); +} + +static void sh_pmu_pmu_disable(struct pmu *pmu) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->disable_all(); +} + static struct pmu pmu = { + .pmu_enable = sh_pmu_pmu_enable, + .pmu_disable = sh_pmu_pmu_disable, .event_init = sh_pmu_event_init, .enable = sh_pmu_enable, .disable = sh_pmu_disable, @@ -316,22 +334,6 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } -void hw_perf_enable(void) -{ - if (!sh_pmu_initialized()) - return; - - sh_pmu->enable_all(); -} - -void hw_perf_disable(void) -{ - if (!sh_pmu_initialized()) - return; - - sh_pmu->disable_all(); -} - int __cpuinit register_sh_pmu(struct sh_pmu *pmu) { if (sh_pmu) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index d0131deeeaf6..37cae676536c 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -664,7 +664,7 @@ out: return pcr; } -void hw_perf_enable(void) +static void sparc_pmu_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 pcr; @@ -691,7 +691,7 @@ void hw_perf_enable(void) pcr_ops->write(cpuc->pcr); } -void hw_perf_disable(void) +static void sparc_pmu_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 val; @@ -718,7 +718,7 @@ static void sparc_pmu_disable(struct perf_event *event) int i; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event[i]) { @@ -748,7 +748,7 @@ static void sparc_pmu_disable(struct perf_event *event) } } - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -991,7 +991,7 @@ static int sparc_pmu_enable(struct perf_event *event) unsigned long flags; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); n0 = cpuc->n_events; if (n0 >= perf_max_events) @@ -1020,7 +1020,7 @@ nocheck: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } @@ -1113,7 +1113,7 @@ static void sparc_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); - perf_disable(); + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; } @@ -1127,7 +1127,7 @@ static void sparc_pmu_cancel_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; - perf_enable(); + perf_pmu_enable(pmu); } /* @@ -1151,11 +1151,13 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) return -EAGAIN; cpuc->group_flag &= ~PERF_EVENT_TXN; - perf_enable(); + perf_pmu_enable(pmu); return 0; } static struct pmu pmu = { + .pmu_enable = sparc_pmu_pmu_enable, + .pmu_disable = sparc_pmu_pmu_disable, .event_init = sparc_pmu_event_init, .enable = sparc_pmu_enable, .disable = sparc_pmu_disable, diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 846070ce49c3..79705ac45019 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -583,7 +583,7 @@ static void x86_pmu_disable_all(void) } } -void hw_perf_disable(void) +static void x86_pmu_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -803,7 +803,7 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, static int x86_pmu_start(struct perf_event *event); static void x86_pmu_stop(struct perf_event *event); -void hw_perf_enable(void) +static void x86_pmu_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct perf_event *event; @@ -969,7 +969,7 @@ static int x86_pmu_enable(struct perf_event *event) hwc = &event->hw; - perf_disable(); + perf_pmu_disable(event->pmu); n0 = cpuc->n_events; ret = n = collect_events(cpuc, event, false); if (ret < 0) @@ -999,7 +999,7 @@ done_collect: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); return ret; } @@ -1436,7 +1436,7 @@ static void x86_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - perf_disable(); + perf_pmu_disable(pmu); cpuc->group_flag |= PERF_EVENT_TXN; cpuc->n_txn = 0; } @@ -1456,7 +1456,7 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) */ cpuc->n_added -= cpuc->n_txn; cpuc->n_events -= cpuc->n_txn; - perf_enable(); + perf_pmu_enable(pmu); } /* @@ -1486,7 +1486,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) memcpy(cpuc->assign, assign, n*sizeof(int)); cpuc->group_flag &= ~PERF_EVENT_TXN; - perf_enable(); + perf_pmu_enable(pmu); return 0; } @@ -1605,6 +1605,8 @@ int x86_pmu_event_init(struct perf_event *event) } static struct pmu pmu = { + .pmu_enable = x86_pmu_pmu_enable, + .pmu_disable = x86_pmu_pmu_disable, .event_init = x86_pmu_event_init, .enable = x86_pmu_enable, .disable = x86_pmu_disable, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 243286a8ded7..6abf103fb7f8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -563,6 +563,11 @@ struct perf_event; struct pmu { struct list_head entry; + int *pmu_disable_count; + + void (*pmu_enable) (struct pmu *pmu); + void (*pmu_disable) (struct pmu *pmu); + /* * Should return -ENOENT when the @event doesn't match this PMU. */ @@ -868,10 +873,8 @@ extern void perf_event_free_task(struct task_struct *task); extern void set_perf_event_pending(void); extern void perf_event_do_pending(void); extern void perf_event_print_debug(void); -extern void __perf_disable(void); -extern bool __perf_enable(void); -extern void perf_disable(void); -extern void perf_enable(void); +extern void perf_pmu_disable(struct pmu *pmu); +extern void perf_pmu_enable(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern void perf_event_update_userpage(struct perf_event *event); @@ -1056,8 +1059,6 @@ static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_do_pending(void) { } static inline void perf_event_print_debug(void) { } -static inline void perf_disable(void) { } -static inline void perf_enable(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 9a98ce953561..5ed0c06765bb 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -71,23 +71,20 @@ static atomic64_t perf_event_id; */ static DEFINE_SPINLOCK(perf_resource_lock); -void __weak hw_perf_disable(void) { barrier(); } -void __weak hw_perf_enable(void) { barrier(); } - void __weak perf_event_print_debug(void) { } -static DEFINE_PER_CPU(int, perf_disable_count); - -void perf_disable(void) +void perf_pmu_disable(struct pmu *pmu) { - if (!__get_cpu_var(perf_disable_count)++) - hw_perf_disable(); + int *count = this_cpu_ptr(pmu->pmu_disable_count); + if (!(*count)++) + pmu->pmu_disable(pmu); } -void perf_enable(void) +void perf_pmu_enable(struct pmu *pmu) { - if (!--__get_cpu_var(perf_disable_count)) - hw_perf_enable(); + int *count = this_cpu_ptr(pmu->pmu_disable_count); + if (!--(*count)) + pmu->pmu_enable(pmu); } static void get_ctx(struct perf_event_context *ctx) @@ -4970,11 +4967,19 @@ static struct srcu_struct pmus_srcu; int perf_pmu_register(struct pmu *pmu) { + int ret; + mutex_lock(&pmus_lock); + ret = -ENOMEM; + pmu->pmu_disable_count = alloc_percpu(int); + if (!pmu->pmu_disable_count) + goto unlock; list_add_rcu(&pmu->entry, &pmus); + ret = 0; +unlock: mutex_unlock(&pmus_lock); - return 0; + return ret; } void perf_pmu_unregister(struct pmu *pmu) @@ -4984,6 +4989,8 @@ void perf_pmu_unregister(struct pmu *pmu) mutex_unlock(&pmus_lock); synchronize_srcu(&pmus_srcu); + + free_percpu(pmu->pmu_disable_count); } struct pmu *perf_init_event(struct perf_event *event) -- cgit v1.2.3 From ad5133b7030d04ce7701aa7cbe98f561347c79c2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Jun 2010 12:22:39 +0200 Subject: perf: Default PMU ops Provide default implementations for the pmu txn methods, this allows us to remove some conditional code. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 10 ++++---- kernel/perf_event.c | 64 +++++++++++++++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6abf103fb7f8..bf85733597ec 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -565,8 +565,8 @@ struct pmu { int *pmu_disable_count; - void (*pmu_enable) (struct pmu *pmu); - void (*pmu_disable) (struct pmu *pmu); + void (*pmu_enable) (struct pmu *pmu); /* optional */ + void (*pmu_disable) (struct pmu *pmu); /* optional */ /* * Should return -ENOENT when the @event doesn't match this PMU. @@ -590,19 +590,19 @@ struct pmu { * Start the transaction, after this ->enable() doesn't need to * do schedulability tests. */ - void (*start_txn) (struct pmu *pmu); + void (*start_txn) (struct pmu *pmu); /* optional */ /* * If ->start_txn() disabled the ->enable() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ - int (*commit_txn) (struct pmu *pmu); + int (*commit_txn) (struct pmu *pmu); /* optional */ /* * Will cancel the transaction, assumes ->disable() is called * for each successfull ->enable() during the transaction. */ - void (*cancel_txn) (struct pmu *pmu); + void (*cancel_txn) (struct pmu *pmu); /* optional */ }; /** diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 5ed0c06765bb..8ef4ba3bcb1f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -674,21 +674,14 @@ group_sched_in(struct perf_event *group_event, { struct perf_event *event, *partial_group = NULL; struct pmu *pmu = group_event->pmu; - bool txn = false; if (group_event->state == PERF_EVENT_STATE_OFF) return 0; - /* Check if group transaction availabe */ - if (pmu->start_txn) - txn = true; - - if (txn) - pmu->start_txn(pmu); + pmu->start_txn(pmu); if (event_sched_in(group_event, cpuctx, ctx)) { - if (txn) - pmu->cancel_txn(pmu); + pmu->cancel_txn(pmu); return -EAGAIN; } @@ -702,7 +695,7 @@ group_sched_in(struct perf_event *group_event, } } - if (!txn || !pmu->commit_txn(pmu)) + if (!pmu->commit_txn(pmu)) return 0; group_error: @@ -717,8 +710,7 @@ group_error: } event_sched_out(group_event, cpuctx, ctx); - if (txn) - pmu->cancel_txn(pmu); + pmu->cancel_txn(pmu); return -EAGAIN; } @@ -4965,6 +4957,31 @@ static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); static struct srcu_struct pmus_srcu; +static void perf_pmu_nop_void(struct pmu *pmu) +{ +} + +static int perf_pmu_nop_int(struct pmu *pmu) +{ + return 0; +} + +static void perf_pmu_start_txn(struct pmu *pmu) +{ + perf_pmu_disable(pmu); +} + +static int perf_pmu_commit_txn(struct pmu *pmu) +{ + perf_pmu_enable(pmu); + return 0; +} + +static void perf_pmu_cancel_txn(struct pmu *pmu) +{ + perf_pmu_enable(pmu); +} + int perf_pmu_register(struct pmu *pmu) { int ret; @@ -4974,6 +4991,29 @@ int perf_pmu_register(struct pmu *pmu) pmu->pmu_disable_count = alloc_percpu(int); if (!pmu->pmu_disable_count) goto unlock; + + if (!pmu->start_txn) { + if (pmu->pmu_enable) { + /* + * If we have pmu_enable/pmu_disable calls, install + * transaction stubs that use that to try and batch + * hardware accesses. + */ + pmu->start_txn = perf_pmu_start_txn; + pmu->commit_txn = perf_pmu_commit_txn; + pmu->cancel_txn = perf_pmu_cancel_txn; + } else { + pmu->start_txn = perf_pmu_nop_void; + pmu->commit_txn = perf_pmu_nop_int; + pmu->cancel_txn = perf_pmu_nop_void; + } + } + + if (!pmu->pmu_enable) { + pmu->pmu_enable = perf_pmu_nop_void; + pmu->pmu_disable = perf_pmu_nop_void; + } + list_add_rcu(&pmu->entry, &pmus); ret = 0; unlock: -- cgit v1.2.3 From fa407f35e0298d841e4088f95a7f9cf6e725c6d5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Jun 2010 12:35:12 +0200 Subject: perf: Shrink hw_perf_event Use hw_perf_event::period_left instead of hw_perf_event::remaining and win back 8 bytes. Signed-off-by: Peter Zijlstra Cc: paulus Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 - kernel/perf_event.c | 13 ++++++------- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index bf85733597ec..8cafa15af60d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -529,7 +529,6 @@ struct hw_perf_event { int last_cpu; }; struct { /* software */ - s64 remaining; struct hrtimer hrtimer; }; #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 8ef4ba3bcb1f..1a6cdbf0d091 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4800,14 +4800,13 @@ static void perf_swevent_start_hrtimer(struct perf_event *event) hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swevent_hrtimer; if (hwc->sample_period) { - u64 period; + s64 period = local64_read(&hwc->period_left); - if (hwc->remaining) { - if (hwc->remaining < 0) + if (period) { + if (period < 0) period = 10000; - else - period = hwc->remaining; - hwc->remaining = 0; + + local64_set(&hwc->period_left, 0); } else { period = max_t(u64, 10000, hwc->sample_period); } @@ -4823,7 +4822,7 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) if (hwc->sample_period) { ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); - hwc->remaining = ktime_to_ns(remaining); + local64_set(&hwc->period_left, ktime_to_ns(remaining)); hrtimer_cancel(&hwc->hrtimer); } -- cgit v1.2.3 From a4eaf7f14675cb512d69f0c928055e73d0c6d252 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Jun 2010 14:37:10 +0200 Subject: perf: Rework the PMU methods Replace pmu::{enable,disable,start,stop,unthrottle} with pmu::{add,del,start,stop}, all of which take a flags argument. The new interface extends the capability to stop a counter while keeping it scheduled on the PMU. We replace the throttled state with the generic stopped state. This also allows us to efficiently stop/start counters over certain code paths (like IRQ handlers). It also allows scheduling a counter without it starting, allowing for a generic frozen state (useful for rotating stopped counters). The stopped state is implemented in two different ways, depending on how the architecture implemented the throttled state: 1) We disable the counter: a) the pmu has per-counter enable bits, we flip that b) we program a NOP event, preserving the counter state 2) We store the counter state and ignore all read/overflow events Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- arch/alpha/kernel/perf_event.c | 71 +++++++++++---- arch/arm/kernel/perf_event.c | 96 ++++++++++++-------- arch/powerpc/kernel/perf_event.c | 105 ++++++++++++++-------- arch/powerpc/kernel/perf_event_fsl_emb.c | 107 ++++++++++++++--------- arch/sh/kernel/perf_event.c | 75 +++++++++++----- arch/sparc/kernel/perf_event.c | 109 ++++++++++++++--------- arch/x86/kernel/cpu/perf_event.c | 106 ++++++++++++---------- arch/x86/kernel/cpu/perf_event_intel.c | 2 +- arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +- include/linux/ftrace_event.h | 4 +- include/linux/perf_event.h | 54 +++++++++--- kernel/hw_breakpoint.c | 29 ++++++- kernel/perf_event.c | 140 +++++++++++++++--------------- kernel/trace/trace_event_perf.c | 7 +- 14 files changed, 576 insertions(+), 331 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 3e260731f8e6..380ef02d557a 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -307,7 +307,7 @@ again: new_raw_count) != prev_raw_count) goto again; - delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; + delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; /* It is possible on very rare occasions that the PMC has overflowed * but the interrupt is yet to come. Detect and fix this situation. @@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) struct hw_perf_event *hwc = &pe->hw; int idx = hwc->idx; - if (cpuc->current_idx[j] != PMC_NO_INDEX) { - cpuc->idx_mask |= (1<current_idx[j]); - continue; + if (cpuc->current_idx[j] == PMC_NO_INDEX) { + alpha_perf_event_set_period(pe, hwc, idx); + cpuc->current_idx[j] = idx; } - alpha_perf_event_set_period(pe, hwc, idx); - cpuc->current_idx[j] = idx; - cpuc->idx_mask |= (1<current_idx[j]); + if (!(hwc->state & PERF_HES_STOPPED)) + cpuc->idx_mask |= (1<current_idx[j]); } cpuc->config = cpuc->event[0]->hw.config_base; } @@ -420,7 +419,7 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) * - this function is called from outside this module via the pmu struct * returned from perf event initialisation. */ -static int alpha_pmu_enable(struct perf_event *event) +static int alpha_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int n0; @@ -455,6 +454,10 @@ static int alpha_pmu_enable(struct perf_event *event) } } + hwc->state = PERF_HES_UPTODATE; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_STOPPED; + local_irq_restore(flags); perf_pmu_enable(event->pmu); @@ -467,7 +470,7 @@ static int alpha_pmu_enable(struct perf_event *event) * - this function is called from outside this module via the pmu struct * returned from perf event initialisation. */ -static void alpha_pmu_disable(struct perf_event *event) +static void alpha_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -514,13 +517,44 @@ static void alpha_pmu_read(struct perf_event *event) } -static void alpha_pmu_unthrottle(struct perf_event *event) +static void alpha_pmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + if (!(hwc->state & PERF_HES_STOPPED)) { + cpuc->idx_mask &= !(1UL<idx); + hwc->state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + alpha_perf_event_update(event, hwc, hwc->idx, 0); + hwc->state |= PERF_HES_UPTODATE; + } + + if (cpuc->enabled) + wrperfmon(PERFMON_CMD_ENABLE, (1UL<idx)); +} + + +static void alpha_pmu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + alpha_perf_event_set_period(event, hwc, hwc->idx); + } + + hwc->state = 0; + cpuc->idx_mask |= 1UL<idx; - wrperfmon(PERFMON_CMD_ENABLE, (1UL<idx)); + if (cpuc->enabled) + wrperfmon(PERFMON_CMD_ENABLE, (1UL<idx)); } @@ -671,7 +705,7 @@ static int alpha_pmu_event_init(struct perf_event *event) /* * Main entry point - enable HW performance counters. */ -static void alpha_pmu_pmu_enable(struct pmu *pmu) +static void alpha_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -697,7 +731,7 @@ static void alpha_pmu_pmu_enable(struct pmu *pmu) * Main entry point - disable HW performance counters. */ -static void alpha_pmu_pmu_disable(struct pmu *pmu) +static void alpha_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -711,13 +745,14 @@ static void alpha_pmu_pmu_disable(struct pmu *pmu) } static struct pmu pmu = { - .pmu_enable = alpha_pmu_pmu_enable, - .pmu_disable = alpha_pmu_pmu_disable, + .pmu_enable = alpha_pmu_enable, + .pmu_disable = alpha_pmu_disable, .event_init = alpha_pmu_event_init, - .enable = alpha_pmu_enable, - .disable = alpha_pmu_disable, + .add = alpha_pmu_add, + .del = alpha_pmu_del, + .start = alpha_pmu_start, + .stop = alpha_pmu_stop, .read = alpha_pmu_read, - .unthrottle = alpha_pmu_unthrottle, }; diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 3343f3f4b973..448cfa6b3ef0 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -221,46 +221,56 @@ again: } static void -armpmu_disable(struct perf_event *event) +armpmu_read(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - WARN_ON(idx < 0); - - clear_bit(idx, cpuc->active_mask); - armpmu->disable(hwc, idx); - - barrier(); - armpmu_event_update(event, hwc, idx); - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + /* Don't read disabled counters! */ + if (hwc->idx < 0) + return; - perf_event_update_userpage(event); + armpmu_event_update(event, hwc, hwc->idx); } static void -armpmu_read(struct perf_event *event) +armpmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - /* Don't read disabled counters! */ - if (hwc->idx < 0) + if (!armpmu) return; - armpmu_event_update(event, hwc, hwc->idx); + /* + * ARM pmu always has to update the counter, so ignore + * PERF_EF_UPDATE, see comments in armpmu_start(). + */ + if (!(hwc->state & PERF_HES_STOPPED)) { + armpmu->disable(hwc, hwc->idx); + barrier(); /* why? */ + armpmu_event_update(event, hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } } static void -armpmu_unthrottle(struct perf_event *event) +armpmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; + if (!armpmu) + return; + + /* + * ARM pmu always has to reprogram the period, so ignore + * PERF_EF_RELOAD, see the comment below. + */ + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; /* * Set the period again. Some counters can't be stopped, so when we - * were throttled we simply disabled the IRQ source and the counter + * were stopped we simply disabled the IRQ source and the counter * may have been left counting. If we don't do this step then we may * get an interrupt too soon or *way* too late if the overflow has * happened since disabling. @@ -269,8 +279,25 @@ armpmu_unthrottle(struct perf_event *event) armpmu->enable(hwc, hwc->idx); } +static void +armpmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + WARN_ON(idx < 0); + + clear_bit(idx, cpuc->active_mask); + armpmu_stop(event, PERF_EF_UPDATE); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + static int -armpmu_enable(struct perf_event *event) +armpmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -295,11 +322,9 @@ armpmu_enable(struct perf_event *event) cpuc->events[idx] = event; set_bit(idx, cpuc->active_mask); - /* Set the period for the event. */ - armpmu_event_set_period(event, hwc, idx); - - /* Enable the event. */ - armpmu->enable(hwc, idx); + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + armpmu_start(event, PERF_EF_RELOAD); /* Propagate our changes to the userspace mapping. */ perf_event_update_userpage(event); @@ -534,7 +559,7 @@ static int armpmu_event_init(struct perf_event *event) return err; } -static void armpmu_pmu_enable(struct pmu *pmu) +static void armpmu_enable(struct pmu *pmu) { /* Enable all of the perf events on hardware. */ int idx; @@ -555,20 +580,21 @@ static void armpmu_pmu_enable(struct pmu *pmu) armpmu->start(); } -static void armpmu_pmu_disable(struct pmu *pmu) +static void armpmu_disable(struct pmu *pmu) { if (armpmu) armpmu->stop(); } static struct pmu pmu = { - .pmu_enable = armpmu_pmu_enable, - .pmu_disable= armpmu_pmu_disable, - .event_init = armpmu_event_init, - .enable = armpmu_enable, - .disable = armpmu_disable, - .unthrottle = armpmu_unthrottle, - .read = armpmu_read, + .pmu_enable = armpmu_enable, + .pmu_disable = armpmu_disable, + .event_init = armpmu_event_init, + .add = armpmu_add, + .del = armpmu_del, + .start = armpmu_start, + .stop = armpmu_stop, + .read = armpmu_read, }; /* diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index deb84bbcb0e6..9cb4924b6c07 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + if (!event->hw.idx) return; /* @@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -static void power_pmu_pmu_disable(struct pmu *pmu) +static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -565,7 +568,7 @@ static void power_pmu_pmu_disable(struct pmu *pmu) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -static void power_pmu_pmu_enable(struct pmu *pmu) +static void power_pmu_enable(struct pmu *pmu) { struct perf_event *event; struct cpu_hw_events *cpuhw; @@ -672,6 +675,8 @@ static void power_pmu_pmu_enable(struct pmu *pmu) } local64_set(&event->hw.prev_count, val); event->hw.idx = idx; + if (event->hw.state & PERF_HES_STOPPED) + val = 0; write_pmc(idx, val); perf_event_update_userpage(event); } @@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count, * re-enable the PMU in order to get hw_perf_enable to do the * actual work of reconfiguring the PMU. */ -static int power_pmu_enable(struct perf_event *event) +static int power_pmu_add(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event) cpuhw->events[n0] = event->hw.config; cpuhw->flags[n0] = event->hw.event_base; + if (!(ef_flags & PERF_EF_START)) + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed @@ -777,7 +785,7 @@ nocheck: /* * Remove a event from the PMU. */ -static void power_pmu_disable(struct perf_event *event) +static void power_pmu_del(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; long i; @@ -826,27 +834,53 @@ static void power_pmu_disable(struct perf_event *event) } /* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. + * POWER-PMU does not support disabling individual counters, hence + * program their cycle counter to their max value and ignore the interrupts. */ -static void power_pmu_unthrottle(struct perf_event *event) + +static void power_pmu_start(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; + s64 left; if (!event->hw.idx || !event->hw.sample_period) return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} + +static void power_pmu_stop(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + + if (!event->hw.idx || !event->hw.sample_period) + return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + local_irq_save(flags); perf_pmu_disable(event->pmu); + power_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + perf_event_update_userpage(event); perf_pmu_enable(event->pmu); local_irq_restore(flags); @@ -1131,13 +1165,14 @@ static int power_pmu_event_init(struct perf_event *event) } struct pmu power_pmu = { - .pmu_enable = power_pmu_pmu_enable, - .pmu_disable = power_pmu_pmu_disable, + .pmu_enable = power_pmu_enable, + .pmu_disable = power_pmu_disable, .event_init = power_pmu_event_init, - .enable = power_pmu_enable, - .disable = power_pmu_disable, + .add = power_pmu_add, + .del = power_pmu_del, + .start = power_pmu_start, + .stop = power_pmu_stop, .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, .start_txn = power_pmu_start_txn, .cancel_txn = power_pmu_cancel_txn, .commit_txn = power_pmu_commit_txn, @@ -1155,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -1177,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -1189,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + power_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } /* diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 84b1974c628f..7ecca59ddf77 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + /* * Performance monitor interrupts come even when interrupts * are soft-disabled, as long as interrupts are hard-enabled. @@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -static void fsl_emb_pmu_pmu_disable(struct pmu *pmu) +static void fsl_emb_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -216,7 +219,7 @@ static void fsl_emb_pmu_pmu_disable(struct pmu *pmu) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -static void fsl_emb_pmu_pmu_enable(struct pmu *pmu) +static void fsl_emb_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -263,7 +266,7 @@ static int collect_events(struct perf_event *group, int max_count, } /* context locked on entry */ -static int fsl_emb_pmu_enable(struct perf_event *event) +static int fsl_emb_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int ret = -EAGAIN; @@ -302,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event) val = 0x80000000L - left; } local64_set(&event->hw.prev_count, val); + + if (!(flags & PERF_EF_START)) { + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + val = 0; + } + write_pmc(i, val); perf_event_update_userpage(event); @@ -316,7 +325,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) } /* context locked on entry */ -static void fsl_emb_pmu_disable(struct perf_event *event) +static void fsl_emb_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int i = event->hw.idx; @@ -353,30 +362,49 @@ static void fsl_emb_pmu_disable(struct perf_event *event) put_cpu_var(cpu_hw_events); } -/* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. - * - * Context is locked on entry, but perf is not disabled. - */ -static void fsl_emb_pmu_unthrottle(struct perf_event *event) +static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + s64 left; + + if (event->hw.idx < 0 || !event->hw.sample_period) + return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} + +static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; if (event->hw.idx < 0 || !event->hw.sample_period) return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + local_irq_save(flags); perf_pmu_disable(event->pmu); + fsl_emb_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + perf_event_update_userpage(event); perf_pmu_enable(event->pmu); local_irq_restore(flags); @@ -524,13 +552,14 @@ static int fsl_emb_pmu_event_init(struct perf_event *event) } static struct pmu fsl_emb_pmu = { - .pmu_enable = fsl_emb_pmu_pmu_enable, - .pmu_disable = fsl_emb_pmu_pmu_disable, + .pmu_enable = fsl_emb_pmu_enable, + .pmu_disable = fsl_emb_pmu_disable, .event_init = fsl_emb_pmu_event_init, - .enable = fsl_emb_pmu_enable, - .disable = fsl_emb_pmu_disable, + .add = fsl_emb_pmu_add, + .del = fsl_emb_pmu_del, + .start = fsl_emb_pmu_start, + .stop = fsl_emb_pmu_stop, .read = fsl_emb_pmu_read, - .unthrottle = fsl_emb_pmu_unthrottle, }; /* @@ -545,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -567,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -576,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, perf_sample_data_init(&data, 0); data.period = event->hw.last_period; - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + fsl_emb_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } static void perf_event_interrupt(struct pt_regs *regs) diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 4bbe19058a58..cf39c4873468 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -206,26 +206,52 @@ again: local64_add(delta, &event->count); } -static void sh_pmu_disable(struct perf_event *event) +static void sh_pmu_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - clear_bit(idx, cpuc->active_mask); - sh_pmu->disable(hwc, idx); + if (!(event->hw.state & PERF_HES_STOPPED)) { + sh_pmu->disable(hwc, idx); + cpuc->events[idx] = NULL; + event->hw.state |= PERF_HES_STOPPED; + } - barrier(); + if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { + sh_perf_event_update(event, &event->hw, idx); + event->hw.state |= PERF_HES_UPTODATE; + } +} - sh_perf_event_update(event, &event->hw, idx); +static void sh_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + cpuc->events[idx] = event; + event->hw.state = 0; + sh_pmu->enable(hwc, idx); +} + +static void sh_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + sh_pmu_stop(event, PERF_EF_UPDATE); + __clear_bit(event->hw.idx, cpuc->used_mask); perf_event_update_userpage(event); } -static int sh_pmu_enable(struct perf_event *event) +static int sh_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -234,21 +260,20 @@ static int sh_pmu_enable(struct perf_event *event) perf_pmu_disable(event->pmu); - if (test_and_set_bit(idx, cpuc->used_mask)) { + if (__test_and_set_bit(idx, cpuc->used_mask)) { idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); if (idx == sh_pmu->num_events) goto out; - set_bit(idx, cpuc->used_mask); + __set_bit(idx, cpuc->used_mask); hwc->idx = idx; } sh_pmu->disable(hwc, idx); - cpuc->events[idx] = event; - set_bit(idx, cpuc->active_mask); - - sh_pmu->enable(hwc, idx); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (flags & PERF_EF_START) + sh_pmu_start(event, PERF_EF_RELOAD); perf_event_update_userpage(event); ret = 0; @@ -285,7 +310,7 @@ static int sh_pmu_event_init(struct perf_event *event) return err; } -static void sh_pmu_pmu_enable(struct pmu *pmu) +static void sh_pmu_enable(struct pmu *pmu) { if (!sh_pmu_initialized()) return; @@ -293,7 +318,7 @@ static void sh_pmu_pmu_enable(struct pmu *pmu) sh_pmu->enable_all(); } -static void sh_pmu_pmu_disable(struct pmu *pmu) +static void sh_pmu_disable(struct pmu *pmu) { if (!sh_pmu_initialized()) return; @@ -302,11 +327,13 @@ static void sh_pmu_pmu_disable(struct pmu *pmu) } static struct pmu pmu = { - .pmu_enable = sh_pmu_pmu_enable, - .pmu_disable = sh_pmu_pmu_disable, + .pmu_enable = sh_pmu_enable, + .pmu_disable = sh_pmu_disable, .event_init = sh_pmu_event_init, - .enable = sh_pmu_enable, - .disable = sh_pmu_disable, + .add = sh_pmu_add, + .del = sh_pmu_del, + .start = sh_pmu_start, + .stop = sh_pmu_stop, .read = sh_pmu_read, }; @@ -334,15 +361,15 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } -int __cpuinit register_sh_pmu(struct sh_pmu *pmu) +int __cpuinit register_sh_pmu(struct sh_pmu *_pmu) { if (sh_pmu) return -EBUSY; - sh_pmu = pmu; + sh_pmu = _pmu; - pr_info("Performance Events: %s support registered\n", pmu->name); + pr_info("Performance Events: %s support registered\n", _pmu->name); - WARN_ON(pmu->num_events > MAX_HWEVENTS); + WARN_ON(_pmu->num_events > MAX_HWEVENTS); perf_pmu_register(&pmu); perf_cpu_notifier(sh_pmu_notifier); diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 37cae676536c..516be2314b54 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) enc = perf_event_get_enc(cpuc->events[i]); pcr &= ~mask_for_index(idx); - pcr |= event_encoding(enc, idx); + if (hwc->state & PERF_HES_STOPPED) + pcr |= nop_for_index(idx); + else + pcr |= event_encoding(enc, idx); } out: return pcr; } -static void sparc_pmu_pmu_enable(struct pmu *pmu) +static void sparc_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 pcr; @@ -691,7 +694,7 @@ static void sparc_pmu_pmu_enable(struct pmu *pmu) pcr_ops->write(cpuc->pcr); } -static void sparc_pmu_pmu_disable(struct pmu *pmu) +static void sparc_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 val; @@ -710,10 +713,53 @@ static void sparc_pmu_pmu_disable(struct pmu *pmu) pcr_ops->write(cpuc->pcr); } -static void sparc_pmu_disable(struct perf_event *event) +static int active_event_index(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + int i; + + for (i = 0; i < cpuc->n_events; i++) { + if (cpuc->event[i] == event) + break; + } + BUG_ON(i == cpuc->n_events); + return cpuc->current_idx[i]; +} + +static void sparc_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx = active_event_index(cpuc, event); + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + sparc_perf_event_set_period(event, &event->hw, idx); + } + + event->hw.state = 0; + + sparc_pmu_enable_event(cpuc, &event->hw, idx); +} + +static void sparc_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx = active_event_index(cpuc, event); + + if (!(event->hw.state & PERF_HES_STOPPED)) { + sparc_pmu_disable_event(cpuc, &event->hw, idx); + event->hw.state |= PERF_HES_STOPPED; + } + + if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) { + sparc_perf_event_update(event, &event->hw, idx); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static void sparc_pmu_del(struct perf_event *event, int _flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; unsigned long flags; int i; @@ -722,7 +768,10 @@ static void sparc_pmu_disable(struct perf_event *event) for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event[i]) { - int idx = cpuc->current_idx[i]; + /* Absorb the final count and turn off the + * event. + */ + sparc_pmu_stop(event, PERF_EF_UPDATE); /* Shift remaining entries down into * the existing slot. @@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event) cpuc->current_idx[i]; } - /* Absorb the final count and turn off the - * event. - */ - sparc_pmu_disable_event(cpuc, hwc, idx); - barrier(); - sparc_perf_event_update(event, hwc, idx); - perf_event_update_userpage(event); cpuc->n_events--; @@ -752,19 +794,6 @@ static void sparc_pmu_disable(struct perf_event *event) local_irq_restore(flags); } -static int active_event_index(struct cpu_hw_events *cpuc, - struct perf_event *event) -{ - int i; - - for (i = 0; i < cpuc->n_events; i++) { - if (cpuc->event[i] == event) - break; - } - BUG_ON(i == cpuc->n_events); - return cpuc->current_idx[i]; -} - static void sparc_pmu_read(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event) sparc_perf_event_update(event, hwc, idx); } -static void sparc_pmu_unthrottle(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int idx = active_event_index(cpuc, event); - struct hw_perf_event *hwc = &event->hw; - - sparc_pmu_enable_event(cpuc, hwc, idx); -} - static atomic_t active_events = ATOMIC_INIT(0); static DEFINE_MUTEX(pmc_grab_mutex); @@ -984,7 +1004,7 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -static int sparc_pmu_enable(struct perf_event *event) +static int sparc_pmu_add(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int n0, ret = -EAGAIN; @@ -1001,6 +1021,10 @@ static int sparc_pmu_enable(struct perf_event *event) cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; + event->hw.state = PERF_HES_UPTODATE; + if (!(ef_flags & PERF_EF_START)) + event->hw.state |= PERF_HES_STOPPED; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed @@ -1156,13 +1180,14 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) } static struct pmu pmu = { - .pmu_enable = sparc_pmu_pmu_enable, - .pmu_disable = sparc_pmu_pmu_disable, + .pmu_enable = sparc_pmu_enable, + .pmu_disable = sparc_pmu_disable, .event_init = sparc_pmu_event_init, - .enable = sparc_pmu_enable, - .disable = sparc_pmu_disable, + .add = sparc_pmu_add, + .del = sparc_pmu_del, + .start = sparc_pmu_start, + .stop = sparc_pmu_stop, .read = sparc_pmu_read, - .unthrottle = sparc_pmu_unthrottle, .start_txn = sparc_pmu_start_txn, .cancel_txn = sparc_pmu_cancel_txn, .commit_txn = sparc_pmu_commit_txn, @@ -1243,7 +1268,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, continue; if (perf_event_overflow(event, 1, &data, regs)) - sparc_pmu_disable_event(cpuc, hwc, idx); + sparc_pmu_stop(event, 0); } return NOTIFY_STOP; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 79705ac45019..dd6fec710677 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -583,7 +583,7 @@ static void x86_pmu_disable_all(void) } } -static void x86_pmu_pmu_disable(struct pmu *pmu) +static void x86_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -800,10 +800,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, hwc->last_tag == cpuc->tags[i]; } -static int x86_pmu_start(struct perf_event *event); -static void x86_pmu_stop(struct perf_event *event); +static void x86_pmu_start(struct perf_event *event, int flags); +static void x86_pmu_stop(struct perf_event *event, int flags); -static void x86_pmu_pmu_enable(struct pmu *pmu) +static void x86_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct perf_event *event; @@ -839,7 +839,14 @@ static void x86_pmu_pmu_enable(struct pmu *pmu) match_prev_assignment(hwc, cpuc, i)) continue; - x86_pmu_stop(event); + /* + * Ensure we don't accidentally enable a stopped + * counter simply because we rescheduled. + */ + if (hwc->state & PERF_HES_STOPPED) + hwc->state |= PERF_HES_ARCH; + + x86_pmu_stop(event, PERF_EF_UPDATE); } for (i = 0; i < cpuc->n_events; i++) { @@ -851,7 +858,10 @@ static void x86_pmu_pmu_enable(struct pmu *pmu) else if (i < n_running) continue; - x86_pmu_start(event); + if (hwc->state & PERF_HES_ARCH) + continue; + + x86_pmu_start(event, PERF_EF_RELOAD); } cpuc->n_added = 0; perf_events_lapic_init(); @@ -952,15 +962,12 @@ static void x86_pmu_enable_event(struct perf_event *event) } /* - * activate a single event + * Add a single event to the PMU. * * The event is added to the group of enabled events * but only if it can be scehduled with existing events. - * - * Called with PMU disabled. If successful and return value 1, - * then guaranteed to call perf_enable() and hw_perf_enable() */ -static int x86_pmu_enable(struct perf_event *event) +static int x86_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc; @@ -975,10 +982,14 @@ static int x86_pmu_enable(struct perf_event *event) if (ret < 0) goto out; + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed - * at commit time(->commit_txn) as a whole + * at commit time (->commit_txn) as a whole */ if (cpuc->group_flag & PERF_EVENT_TXN) goto done_collect; @@ -1003,27 +1014,28 @@ out: return ret; } -static int x86_pmu_start(struct perf_event *event) +static void x86_pmu_start(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx = event->hw.idx; - if (idx == -1) - return -EAGAIN; + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + x86_perf_event_set_period(event); + } + + event->hw.state = 0; - x86_perf_event_set_period(event); cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); x86_pmu.enable(event); perf_event_update_userpage(event); - - return 0; -} - -static void x86_pmu_unthrottle(struct perf_event *event) -{ - int ret = x86_pmu_start(event); - WARN_ON_ONCE(ret); } void perf_event_print_debug(void) @@ -1080,27 +1092,29 @@ void perf_event_print_debug(void) local_irq_restore(flags); } -static void x86_pmu_stop(struct perf_event *event) +static void x86_pmu_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - if (!__test_and_clear_bit(idx, cpuc->active_mask)) - return; - - x86_pmu.disable(event); - /* - * Drain the remaining delta count out of a event - * that we are disabling: - */ - x86_perf_event_update(event); + if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { + x86_pmu.disable(event); + cpuc->events[hwc->idx] = NULL; + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } - cpuc->events[idx] = NULL; + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + x86_perf_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } } -static void x86_pmu_disable(struct perf_event *event) +static void x86_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int i; @@ -1113,7 +1127,7 @@ static void x86_pmu_disable(struct perf_event *event) if (cpuc->group_flag & PERF_EVENT_TXN) return; - x86_pmu_stop(event); + x86_pmu_stop(event, PERF_EF_UPDATE); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { @@ -1165,7 +1179,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) continue; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } if (handled) @@ -1605,15 +1619,17 @@ int x86_pmu_event_init(struct perf_event *event) } static struct pmu pmu = { - .pmu_enable = x86_pmu_pmu_enable, - .pmu_disable = x86_pmu_pmu_disable, + .pmu_enable = x86_pmu_enable, + .pmu_disable = x86_pmu_disable, + .event_init = x86_pmu_event_init, - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, + + .add = x86_pmu_add, + .del = x86_pmu_del, .start = x86_pmu_start, .stop = x86_pmu_stop, .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, + .start_txn = x86_pmu_start_txn, .cancel_txn = x86_pmu_cancel_txn, .commit_txn = x86_pmu_commit_txn, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ee05c90012d2..82395f2378ec 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -763,7 +763,7 @@ again: data.period = event->hw.last_period; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 18018d1311cd..9893a2f77b7a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -491,7 +491,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.flags &= ~PERF_EFLAGS_EXACT; if (perf_event_overflow(event, 1, &data, ®s)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 5f8ad7bec636..8beabb958f61 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); -extern int perf_trace_enable(struct perf_event *event); -extern void perf_trace_disable(struct perf_event *event); +extern int perf_trace_add(struct perf_event *event, int flags); +extern void perf_trace_del(struct perf_event *event, int flags); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8cafa15af60d..402073c61669 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -538,6 +538,7 @@ struct hw_perf_event { }; #endif }; + int state; local64_t prev_count; u64 sample_period; u64 last_period; @@ -549,6 +550,13 @@ struct hw_perf_event { #endif }; +/* + * hw_perf_event::state flags + */ +#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ +#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ +#define PERF_HES_ARCH 0x04 + struct perf_event; /* @@ -564,42 +572,62 @@ struct pmu { int *pmu_disable_count; + /* + * Fully disable/enable this PMU, can be used to protect from the PMI + * as well as for lazy/batch writing of the MSRs. + */ void (*pmu_enable) (struct pmu *pmu); /* optional */ void (*pmu_disable) (struct pmu *pmu); /* optional */ /* + * Try and initialize the event for this PMU. * Should return -ENOENT when the @event doesn't match this PMU. */ int (*event_init) (struct perf_event *event); - int (*enable) (struct perf_event *event); - void (*disable) (struct perf_event *event); - int (*start) (struct perf_event *event); - void (*stop) (struct perf_event *event); +#define PERF_EF_START 0x01 /* start the counter when adding */ +#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ +#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ + + /* + * Adds/Removes a counter to/from the PMU, can be done inside + * a transaction, see the ->*_txn() methods. + */ + int (*add) (struct perf_event *event, int flags); + void (*del) (struct perf_event *event, int flags); + + /* + * Starts/Stops a counter present on the PMU. The PMI handler + * should stop the counter when perf_event_overflow() returns + * !0. ->start() will be used to continue. + */ + void (*start) (struct perf_event *event, int flags); + void (*stop) (struct perf_event *event, int flags); + + /* + * Updates the counter value of the event. + */ void (*read) (struct perf_event *event); - void (*unthrottle) (struct perf_event *event); /* * Group events scheduling is treated as a transaction, add * group events as a whole and perform one schedulability test. * If the test fails, roll back the whole group - */ - - /* - * Start the transaction, after this ->enable() doesn't need to + * + * Start the transaction, after this ->add() doesn't need to * do schedulability tests. */ void (*start_txn) (struct pmu *pmu); /* optional */ /* - * If ->start_txn() disabled the ->enable() schedulability test + * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ int (*commit_txn) (struct pmu *pmu); /* optional */ /* - * Will cancel the transaction, assumes ->disable() is called - * for each successfull ->enable() during the transaction. + * Will cancel the transaction, assumes ->del() is called + * for each successfull ->add() during the transaction. */ void (*cancel_txn) (struct pmu *pmu); /* optional */ }; @@ -680,7 +708,7 @@ struct perf_event { int nr_siblings; int group_flags; struct perf_event *group_leader; - struct pmu *pmu; + struct pmu *pmu; enum perf_event_active_state state; unsigned int attach_state; diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index e9c5cfa1fd20..6f150095cafe 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -586,10 +586,35 @@ static int hw_breakpoint_event_init(struct perf_event *bp) return 0; } +static int hw_breakpoint_add(struct perf_event *bp, int flags) +{ + if (!(flags & PERF_EF_START)) + bp->hw.state = PERF_HES_STOPPED; + + return arch_install_hw_breakpoint(bp); +} + +static void hw_breakpoint_del(struct perf_event *bp, int flags) +{ + arch_uninstall_hw_breakpoint(bp); +} + +static void hw_breakpoint_start(struct perf_event *bp, int flags) +{ + bp->hw.state = 0; +} + +static void hw_breakpoint_stop(struct perf_event *bp, int flags) +{ + bp->hw.state = PERF_HES_STOPPED; +} + static struct pmu perf_breakpoint = { .event_init = hw_breakpoint_event_init, - .enable = arch_install_hw_breakpoint, - .disable = arch_uninstall_hw_breakpoint, + .add = hw_breakpoint_add, + .del = hw_breakpoint_del, + .start = hw_breakpoint_start, + .stop = hw_breakpoint_stop, .read = hw_breakpoint_pmu_read, }; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 1a6cdbf0d091..3bace4fd0355 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -424,7 +424,7 @@ event_sched_out(struct perf_event *event, event->state = PERF_EVENT_STATE_OFF; } event->tstamp_stopped = ctx->time; - event->pmu->disable(event); + event->pmu->del(event, 0); event->oncpu = -1; if (!is_software_event(event)) @@ -649,7 +649,7 @@ event_sched_in(struct perf_event *event, */ smp_wmb(); - if (event->pmu->enable(event)) { + if (event->pmu->add(event, PERF_EF_START)) { event->state = PERF_EVENT_STATE_INACTIVE; event->oncpu = -1; return -EAGAIN; @@ -1482,22 +1482,6 @@ do { \ return div64_u64(dividend, divisor); } -static void perf_event_stop(struct perf_event *event) -{ - if (!event->pmu->stop) - return event->pmu->disable(event); - - return event->pmu->stop(event); -} - -static int perf_event_start(struct perf_event *event) -{ - if (!event->pmu->start) - return event->pmu->enable(event); - - return event->pmu->start(event); -} - static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) { struct hw_perf_event *hwc = &event->hw; @@ -1517,9 +1501,9 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) hwc->sample_period = sample_period; if (local64_read(&hwc->period_left) > 8*sample_period) { - perf_event_stop(event); + event->pmu->stop(event, PERF_EF_UPDATE); local64_set(&hwc->period_left, 0); - perf_event_start(event); + event->pmu->start(event, PERF_EF_RELOAD); } } @@ -1548,7 +1532,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) */ if (interrupts == MAX_INTERRUPTS) { perf_log_throttle(event, 1); - event->pmu->unthrottle(event); + event->pmu->start(event, 0); } if (!event->attr.freq || !event->attr.sample_freq) @@ -2506,6 +2490,9 @@ int perf_event_task_disable(void) static int perf_event_index(struct perf_event *event) { + if (event->hw.state & PERF_HES_STOPPED) + return 0; + if (event->state != PERF_EVENT_STATE_ACTIVE) return 0; @@ -4120,8 +4107,6 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, struct hw_perf_event *hwc = &event->hw; int ret = 0; - throttle = (throttle && event->pmu->unthrottle != NULL); - if (!throttle) { hwc->interrupts++; } else { @@ -4246,7 +4231,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, } } -static void perf_swevent_add(struct perf_event *event, u64 nr, +static void perf_swevent_event(struct perf_event *event, u64 nr, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { @@ -4272,6 +4257,9 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { + if (event->hw.state & PERF_HES_STOPPED) + return 0; + if (regs) { if (event->attr.exclude_user && user_mode(regs)) return 1; @@ -4371,7 +4359,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, hlist_for_each_entry_rcu(event, node, head, hlist_entry) { if (perf_swevent_match(event, type, event_id, data, regs)) - perf_swevent_add(event, nr, nmi, data, regs); + perf_swevent_event(event, nr, nmi, data, regs); } end: rcu_read_unlock(); @@ -4415,7 +4403,7 @@ static void perf_swevent_read(struct perf_event *event) { } -static int perf_swevent_enable(struct perf_event *event) +static int perf_swevent_add(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct perf_cpu_context *cpuctx; @@ -4428,6 +4416,8 @@ static int perf_swevent_enable(struct perf_event *event) perf_swevent_set_period(event); } + hwc->state = !(flags & PERF_EF_START); + head = find_swevent_head(cpuctx, event); if (WARN_ON_ONCE(!head)) return -EINVAL; @@ -4437,18 +4427,19 @@ static int perf_swevent_enable(struct perf_event *event) return 0; } -static void perf_swevent_disable(struct perf_event *event) +static void perf_swevent_del(struct perf_event *event, int flags) { hlist_del_rcu(&event->hlist_entry); } -static void perf_swevent_void(struct perf_event *event) +static void perf_swevent_start(struct perf_event *event, int flags) { + event->hw.state = 0; } -static int perf_swevent_int(struct perf_event *event) +static void perf_swevent_stop(struct perf_event *event, int flags) { - return 0; + event->hw.state = PERF_HES_STOPPED; } /* Deref the hlist from the update side */ @@ -4604,12 +4595,11 @@ static int perf_swevent_init(struct perf_event *event) static struct pmu perf_swevent = { .event_init = perf_swevent_init, - .enable = perf_swevent_enable, - .disable = perf_swevent_disable, - .start = perf_swevent_int, - .stop = perf_swevent_void, + .add = perf_swevent_add, + .del = perf_swevent_del, + .start = perf_swevent_start, + .stop = perf_swevent_stop, .read = perf_swevent_read, - .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ }; #ifdef CONFIG_EVENT_TRACING @@ -4657,7 +4647,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, hlist_for_each_entry_rcu(event, node, head, hlist_entry) { if (perf_tp_event_match(event, &data, regs)) - perf_swevent_add(event, count, 1, &data, regs); + perf_swevent_event(event, count, 1, &data, regs); } perf_swevent_put_recursion_context(rctx); @@ -4696,12 +4686,11 @@ static int perf_tp_event_init(struct perf_event *event) static struct pmu perf_tracepoint = { .event_init = perf_tp_event_init, - .enable = perf_trace_enable, - .disable = perf_trace_disable, - .start = perf_swevent_int, - .stop = perf_swevent_void, + .add = perf_trace_add, + .del = perf_trace_del, + .start = perf_swevent_start, + .stop = perf_swevent_stop, .read = perf_swevent_read, - .unthrottle = perf_swevent_void, }; static inline void perf_tp_register(void) @@ -4757,8 +4746,8 @@ void perf_bp_event(struct perf_event *bp, void *data) perf_sample_data_init(&sample, bp->attr.bp_addr); - if (!perf_exclude_event(bp, regs)) - perf_swevent_add(bp, 1, 1, &sample, regs); + if (!bp->hw.state && !perf_exclude_event(bp, regs)) + perf_swevent_event(bp, 1, 1, &sample, regs); } #endif @@ -4834,32 +4823,39 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) static void cpu_clock_event_update(struct perf_event *event) { - int cpu = raw_smp_processor_id(); s64 prev; u64 now; - now = cpu_clock(cpu); + now = local_clock(); prev = local64_xchg(&event->hw.prev_count, now); local64_add(now - prev, &event->count); } -static int cpu_clock_event_enable(struct perf_event *event) +static void cpu_clock_event_start(struct perf_event *event, int flags) { - struct hw_perf_event *hwc = &event->hw; - int cpu = raw_smp_processor_id(); - - local64_set(&hwc->prev_count, cpu_clock(cpu)); + local64_set(&event->hw.prev_count, local_clock()); perf_swevent_start_hrtimer(event); - - return 0; } -static void cpu_clock_event_disable(struct perf_event *event) +static void cpu_clock_event_stop(struct perf_event *event, int flags) { perf_swevent_cancel_hrtimer(event); cpu_clock_event_update(event); } +static int cpu_clock_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + cpu_clock_event_start(event, flags); + + return 0; +} + +static void cpu_clock_event_del(struct perf_event *event, int flags) +{ + cpu_clock_event_stop(event, flags); +} + static void cpu_clock_event_read(struct perf_event *event) { cpu_clock_event_update(event); @@ -4878,8 +4874,10 @@ static int cpu_clock_event_init(struct perf_event *event) static struct pmu perf_cpu_clock = { .event_init = cpu_clock_event_init, - .enable = cpu_clock_event_enable, - .disable = cpu_clock_event_disable, + .add = cpu_clock_event_add, + .del = cpu_clock_event_del, + .start = cpu_clock_event_start, + .stop = cpu_clock_event_stop, .read = cpu_clock_event_read, }; @@ -4897,25 +4895,29 @@ static void task_clock_event_update(struct perf_event *event, u64 now) local64_add(delta, &event->count); } -static int task_clock_event_enable(struct perf_event *event) +static void task_clock_event_start(struct perf_event *event, int flags) { - struct hw_perf_event *hwc = &event->hw; - u64 now; - - now = event->ctx->time; - - local64_set(&hwc->prev_count, now); - + local64_set(&event->hw.prev_count, event->ctx->time); perf_swevent_start_hrtimer(event); - - return 0; } -static void task_clock_event_disable(struct perf_event *event) +static void task_clock_event_stop(struct perf_event *event, int flags) { perf_swevent_cancel_hrtimer(event); task_clock_event_update(event, event->ctx->time); +} + +static int task_clock_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + task_clock_event_start(event, flags); + return 0; +} + +static void task_clock_event_del(struct perf_event *event, int flags) +{ + task_clock_event_stop(event, PERF_EF_UPDATE); } static void task_clock_event_read(struct perf_event *event) @@ -4947,8 +4949,10 @@ static int task_clock_event_init(struct perf_event *event) static struct pmu perf_task_clock = { .event_init = task_clock_event_init, - .enable = task_clock_event_enable, - .disable = task_clock_event_disable, + .add = task_clock_event_add, + .del = task_clock_event_del, + .start = task_clock_event_start, + .stop = task_clock_event_stop, .read = task_clock_event_read, }; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index f3bbcd1c90c8..39c059ca670e 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -101,7 +101,7 @@ int perf_trace_init(struct perf_event *p_event) return ret; } -int perf_trace_enable(struct perf_event *p_event) +int perf_trace_add(struct perf_event *p_event, int flags) { struct ftrace_event_call *tp_event = p_event->tp_event; struct hlist_head __percpu *pcpu_list; @@ -111,13 +111,16 @@ int perf_trace_enable(struct perf_event *p_event) if (WARN_ON_ONCE(!pcpu_list)) return -EINVAL; + if (!(flags & PERF_EF_START)) + p_event->hw.state = PERF_HES_STOPPED; + list = this_cpu_ptr(pcpu_list); hlist_add_head_rcu(&p_event->hlist_entry, list); return 0; } -void perf_trace_disable(struct perf_event *p_event) +void perf_trace_del(struct perf_event *p_event, int flags) { hlist_del_rcu(&p_event->hlist_entry); } -- cgit v1.2.3 From 15ac9a395a753cb28c674e7ea80386ffdff21785 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 15:51:45 +0200 Subject: perf: Remove the sysfs bits Neither the overcommit nor the reservation sysfs parameter were actually working, remove them as they'll only get in the way. Signed-off-by: Peter Zijlstra Cc: paulus LKML-Reference: Signed-off-by: Ingo Molnar --- arch/alpha/kernel/perf_event.c | 3 +- arch/arm/kernel/perf_event.c | 9 +-- arch/sparc/kernel/perf_event.c | 9 +-- arch/x86/kernel/cpu/perf_event.c | 1 - include/linux/perf_event.h | 6 -- kernel/perf_event.c | 124 --------------------------------------- 6 files changed, 5 insertions(+), 147 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 380ef02d557a..9bb8c024080c 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -808,7 +808,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); /* la_ptr is the counter that overflowed. */ - if (unlikely(la_ptr >= perf_max_events)) { + if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) { /* This should never occur! */ irq_err_count++; pr_warning("PMI: silly index %ld\n", la_ptr); @@ -879,7 +879,6 @@ void __init init_hw_perf_events(void) /* And set up PMU specification */ alpha_pmu = &ev67_pmu; - perf_max_events = alpha_pmu->num_pmcs; perf_pmu_register(&pmu); } diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 448cfa6b3ef0..45d6a35217c1 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -534,7 +534,7 @@ static int armpmu_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; if (!atomic_inc_not_zero(&active_events)) { - if (atomic_read(&active_events) > perf_max_events) { + if (atomic_read(&active_events) > armpmu.num_events) { atomic_dec(&active_events); return -ENOSPC; } @@ -2974,14 +2974,12 @@ init_hw_perf_events(void) armpmu = &armv6pmu; memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, sizeof(armv6_perf_cache_map)); - perf_max_events = armv6pmu.num_events; break; case 0xB020: /* ARM11mpcore */ armpmu = &armv6mpcore_pmu; memcpy(armpmu_perf_cache_map, armv6mpcore_perf_cache_map, sizeof(armv6mpcore_perf_cache_map)); - perf_max_events = armv6mpcore_pmu.num_events; break; case 0xC080: /* Cortex-A8 */ armv7pmu.id = ARM_PERF_PMU_ID_CA8; @@ -2993,7 +2991,6 @@ init_hw_perf_events(void) /* Reset PMNC and read the nb of CNTx counters supported */ armv7pmu.num_events = armv7_reset_read_pmnc(); - perf_max_events = armv7pmu.num_events; break; case 0xC090: /* Cortex-A9 */ armv7pmu.id = ARM_PERF_PMU_ID_CA9; @@ -3005,7 +3002,6 @@ init_hw_perf_events(void) /* Reset PMNC and read the nb of CNTx counters supported */ armv7pmu.num_events = armv7_reset_read_pmnc(); - perf_max_events = armv7pmu.num_events; break; } /* Intel CPUs [xscale]. */ @@ -3016,13 +3012,11 @@ init_hw_perf_events(void) armpmu = &xscale1pmu; memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, sizeof(xscale_perf_cache_map)); - perf_max_events = xscale1pmu.num_events; break; case 2: armpmu = &xscale2pmu; memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, sizeof(xscale_perf_cache_map)); - perf_max_events = xscale2pmu.num_events; break; } } @@ -3032,7 +3026,6 @@ init_hw_perf_events(void) arm_pmu_names[armpmu->id], armpmu->num_events); } else { pr_info("no hardware support available\n"); - perf_max_events = -1; } perf_pmu_register(&pmu); diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 516be2314b54..f9a706759364 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -897,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts, if (!n_ev) return 0; - if (n_ev > perf_max_events) + if (n_ev > MAX_HWEVENTS) return -1; msk0 = perf_event_get_msk(events[0]); @@ -1014,7 +1014,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) perf_pmu_disable(event->pmu); n0 = cpuc->n_events; - if (n0 >= perf_max_events) + if (n0 >= MAX_HWEVENTS) goto out; cpuc->event[n0] = event; @@ -1097,7 +1097,7 @@ static int sparc_pmu_event_init(struct perf_event *event) n = 0; if (event->group_leader != event) { n = collect_events(event->group_leader, - perf_max_events - 1, + MAX_HWEVENTS - 1, evts, events, current_idx_dmy); if (n < 0) return -EINVAL; @@ -1309,9 +1309,6 @@ void __init init_hw_perf_events(void) pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); - /* All sparc64 PMUs currently have 2 events. */ - perf_max_events = 2; - perf_pmu_register(&pmu); register_die_notifier(&perf_event_nmi_notifier); } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index dd6fec710677..0fb17050360f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1396,7 +1396,6 @@ void __init init_hw_perf_events(void) x86_pmu.num_counters = X86_PMC_MAX_GENERIC; } x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; - perf_max_events = x86_pmu.num_counters; if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 402073c61669..b22176d3ebdf 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -860,7 +860,6 @@ struct perf_cpu_context { struct perf_event_context ctx; struct perf_event_context *task_ctx; int active_oncpu; - int max_pertask; int exclusive; struct swevent_hlist *swevent_hlist; struct mutex hlist_mutex; @@ -883,11 +882,6 @@ struct perf_output_handle { #ifdef CONFIG_PERF_EVENTS -/* - * Set by architecture code: - */ -extern int perf_max_events; - extern int perf_pmu_register(struct pmu *pmu); extern void perf_pmu_unregister(struct pmu *pmu); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3bace4fd0355..8462e69409ae 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -39,10 +39,6 @@ */ static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); -int perf_max_events __read_mostly = 1; -static int perf_reserved_percpu __read_mostly; -static int perf_overcommit __read_mostly = 1; - static atomic_t nr_events __read_mostly; static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -66,11 +62,6 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000; static atomic64_t perf_event_id; -/* - * Lock for (sysadmin-configurable) event reservations: - */ -static DEFINE_SPINLOCK(perf_resource_lock); - void __weak perf_event_print_debug(void) { } void perf_pmu_disable(struct pmu *pmu) @@ -480,16 +471,6 @@ static void __perf_event_remove_from_context(void *info) list_del_event(event, ctx); - if (!ctx->task) { - /* - * Allow more per task events with respect to the - * reservation: - */ - cpuctx->max_pertask = - min(perf_max_events - ctx->nr_events, - perf_max_events - perf_reserved_percpu); - } - raw_spin_unlock(&ctx->lock); } @@ -823,9 +804,6 @@ static void __perf_install_in_context(void *info) } } - if (!err && !ctx->task && cpuctx->max_pertask) - cpuctx->max_pertask--; - unlock: raw_spin_unlock(&ctx->lock); } @@ -5930,10 +5908,6 @@ static void __cpuinit perf_event_init_cpu(int cpu) cpuctx = &per_cpu(perf_cpu_context, cpu); - spin_lock(&perf_resource_lock); - cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; - spin_unlock(&perf_resource_lock); - mutex_lock(&cpuctx->hlist_mutex); if (cpuctx->hlist_refcount > 0) { struct swevent_hlist *hlist; @@ -6008,101 +5982,3 @@ void __init perf_event_init(void) perf_tp_register(); perf_cpu_notifier(perf_cpu_notify); } - -static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", perf_reserved_percpu); -} - -static ssize_t -perf_set_reserve_percpu(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - const char *buf, - size_t count) -{ - struct perf_cpu_context *cpuctx; - unsigned long val; - int err, cpu, mpt; - - err = strict_strtoul(buf, 10, &val); - if (err) - return err; - if (val > perf_max_events) - return -EINVAL; - - spin_lock(&perf_resource_lock); - perf_reserved_percpu = val; - for_each_online_cpu(cpu) { - cpuctx = &per_cpu(perf_cpu_context, cpu); - raw_spin_lock_irq(&cpuctx->ctx.lock); - mpt = min(perf_max_events - cpuctx->ctx.nr_events, - perf_max_events - perf_reserved_percpu); - cpuctx->max_pertask = mpt; - raw_spin_unlock_irq(&cpuctx->ctx.lock); - } - spin_unlock(&perf_resource_lock); - - return count; -} - -static ssize_t perf_show_overcommit(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", perf_overcommit); -} - -static ssize_t -perf_set_overcommit(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - const char *buf, size_t count) -{ - unsigned long val; - int err; - - err = strict_strtoul(buf, 10, &val); - if (err) - return err; - if (val > 1) - return -EINVAL; - - spin_lock(&perf_resource_lock); - perf_overcommit = val; - spin_unlock(&perf_resource_lock); - - return count; -} - -static SYSDEV_CLASS_ATTR( - reserve_percpu, - 0644, - perf_show_reserve_percpu, - perf_set_reserve_percpu - ); - -static SYSDEV_CLASS_ATTR( - overcommit, - 0644, - perf_show_overcommit, - perf_set_overcommit - ); - -static struct attribute *perfclass_attrs[] = { - &attr_reserve_percpu.attr, - &attr_overcommit.attr, - NULL -}; - -static struct attribute_group perfclass_attr_group = { - .attrs = perfclass_attrs, - .name = "perf_events", -}; - -static int __init perf_event_sysfs_init(void) -{ - return sysfs_create_group(&cpu_sysdev_class.kset.kobj, - &perfclass_attr_group); -} -device_initcall(perf_event_sysfs_init); -- cgit v1.2.3 From b28ab83c595e767f2028276b7398d17f2253cec0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 14:48:15 +0200 Subject: perf: Remove the swevent hash-table from the cpu context Separate the swevent hash-table from the cpu_context bits in preparation for per pmu cpu contexts. This keeps the swevent hash a global entity. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 --- kernel/perf_event.c | 104 +++++++++++++++++++++++++-------------------- 2 files changed, 58 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b22176d3ebdf..4ab4f0ca09a1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -861,12 +861,6 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; - struct swevent_hlist *swevent_hlist; - struct mutex hlist_mutex; - int hlist_refcount; - - /* Recursion avoidance in each contexts */ - int recursion[PERF_NR_CONTEXTS]; }; struct perf_output_handle { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index a3c86a8335c4..2c47ed6c4f26 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4154,6 +4154,17 @@ int perf_event_overflow(struct perf_event *event, int nmi, * Generic software event infrastructure */ +struct swevent_htable { + struct swevent_hlist *swevent_hlist; + struct mutex hlist_mutex; + int hlist_refcount; + + /* Recursion avoidance in each contexts */ + int recursion[PERF_NR_CONTEXTS]; +}; + +static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); + /* * We directly increment event->count and keep a second value in * event->hw.period_left to count intervals. This period event @@ -4286,11 +4297,11 @@ __find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id) /* For the read side: events when they trigger */ static inline struct hlist_head * -find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) +find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id) { struct swevent_hlist *hlist; - hlist = rcu_dereference(ctx->swevent_hlist); + hlist = rcu_dereference(swhash->swevent_hlist); if (!hlist) return NULL; @@ -4299,7 +4310,7 @@ find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) /* For the event head insertion and removal in the hlist */ static inline struct hlist_head * -find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) +find_swevent_head(struct swevent_htable *swhash, struct perf_event *event) { struct swevent_hlist *hlist; u32 event_id = event->attr.config; @@ -4310,7 +4321,7 @@ find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) * and release. Which makes the protected version suitable here. * The context lock guarantees that. */ - hlist = rcu_dereference_protected(ctx->swevent_hlist, + hlist = rcu_dereference_protected(swhash->swevent_hlist, lockdep_is_held(&event->ctx->lock)); if (!hlist) return NULL; @@ -4323,17 +4334,13 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, struct perf_sample_data *data, struct pt_regs *regs) { - struct perf_cpu_context *cpuctx; + struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); struct perf_event *event; struct hlist_node *node; struct hlist_head *head; - cpuctx = &__get_cpu_var(perf_cpu_context); - rcu_read_lock(); - - head = find_swevent_head_rcu(cpuctx, type, event_id); - + head = find_swevent_head_rcu(swhash, type, event_id); if (!head) goto end; @@ -4347,17 +4354,17 @@ end: int perf_swevent_get_recursion_context(void) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); - return get_recursion_context(cpuctx->recursion); + return get_recursion_context(swhash->recursion); } EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); void inline perf_swevent_put_recursion_context(int rctx) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); - put_recursion_context(cpuctx->recursion, rctx); + put_recursion_context(swhash->recursion, rctx); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, @@ -4385,12 +4392,10 @@ static void perf_swevent_read(struct perf_event *event) static int perf_swevent_add(struct perf_event *event, int flags) { + struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); struct hw_perf_event *hwc = &event->hw; - struct perf_cpu_context *cpuctx; struct hlist_head *head; - cpuctx = &__get_cpu_var(perf_cpu_context); - if (hwc->sample_period) { hwc->last_period = hwc->sample_period; perf_swevent_set_period(event); @@ -4398,7 +4403,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) hwc->state = !(flags & PERF_EF_START); - head = find_swevent_head(cpuctx, event); + head = find_swevent_head(swhash, event); if (WARN_ON_ONCE(!head)) return -EINVAL; @@ -4424,10 +4429,10 @@ static void perf_swevent_stop(struct perf_event *event, int flags) /* Deref the hlist from the update side */ static inline struct swevent_hlist * -swevent_hlist_deref(struct perf_cpu_context *cpuctx) +swevent_hlist_deref(struct swevent_htable *swhash) { - return rcu_dereference_protected(cpuctx->swevent_hlist, - lockdep_is_held(&cpuctx->hlist_mutex)); + return rcu_dereference_protected(swhash->swevent_hlist, + lockdep_is_held(&swhash->hlist_mutex)); } static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) @@ -4438,27 +4443,27 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) kfree(hlist); } -static void swevent_hlist_release(struct perf_cpu_context *cpuctx) +static void swevent_hlist_release(struct swevent_htable *swhash) { - struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx); + struct swevent_hlist *hlist = swevent_hlist_deref(swhash); if (!hlist) return; - rcu_assign_pointer(cpuctx->swevent_hlist, NULL); + rcu_assign_pointer(swhash->swevent_hlist, NULL); call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); } static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); - mutex_lock(&cpuctx->hlist_mutex); + mutex_lock(&swhash->hlist_mutex); - if (!--cpuctx->hlist_refcount) - swevent_hlist_release(cpuctx); + if (!--swhash->hlist_refcount) + swevent_hlist_release(swhash); - mutex_unlock(&cpuctx->hlist_mutex); + mutex_unlock(&swhash->hlist_mutex); } static void swevent_hlist_put(struct perf_event *event) @@ -4476,12 +4481,12 @@ static void swevent_hlist_put(struct perf_event *event) static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); int err = 0; - mutex_lock(&cpuctx->hlist_mutex); + mutex_lock(&swhash->hlist_mutex); - if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) { + if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) { struct swevent_hlist *hlist; hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); @@ -4489,11 +4494,11 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) err = -ENOMEM; goto exit; } - rcu_assign_pointer(cpuctx->swevent_hlist, hlist); + rcu_assign_pointer(swhash->swevent_hlist, hlist); } - cpuctx->hlist_refcount++; + swhash->hlist_refcount++; exit: - mutex_unlock(&cpuctx->hlist_mutex); + mutex_unlock(&swhash->hlist_mutex); return err; } @@ -5889,12 +5894,15 @@ int perf_event_init_task(struct task_struct *child) static void __init perf_event_init_all_cpus(void) { - int cpu; struct perf_cpu_context *cpuctx; + struct swevent_htable *swhash; + int cpu; for_each_possible_cpu(cpu) { + swhash = &per_cpu(swevent_htable, cpu); + mutex_init(&swhash->hlist_mutex); + cpuctx = &per_cpu(perf_cpu_context, cpu); - mutex_init(&cpuctx->hlist_mutex); __perf_event_init_context(&cpuctx->ctx, NULL); } } @@ -5902,18 +5910,21 @@ static void __init perf_event_init_all_cpus(void) static void __cpuinit perf_event_init_cpu(int cpu) { struct perf_cpu_context *cpuctx; + struct swevent_htable *swhash; cpuctx = &per_cpu(perf_cpu_context, cpu); - mutex_lock(&cpuctx->hlist_mutex); - if (cpuctx->hlist_refcount > 0) { + swhash = &per_cpu(swevent_htable, cpu); + + mutex_lock(&swhash->hlist_mutex); + if (swhash->hlist_refcount > 0) { struct swevent_hlist *hlist; - hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); - WARN_ON_ONCE(!hlist); - rcu_assign_pointer(cpuctx->swevent_hlist, hlist); + hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu)); + WARN_ON(!hlist); + rcu_assign_pointer(swhash->swevent_hlist, hlist); } - mutex_unlock(&cpuctx->hlist_mutex); + mutex_unlock(&swhash->hlist_mutex); } #ifdef CONFIG_HOTPLUG_CPU @@ -5931,11 +5942,12 @@ static void __perf_event_exit_cpu(void *info) static void perf_event_exit_cpu(int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); struct perf_event_context *ctx = &cpuctx->ctx; - mutex_lock(&cpuctx->hlist_mutex); - swevent_hlist_release(cpuctx); - mutex_unlock(&cpuctx->hlist_mutex); + mutex_lock(&swhash->hlist_mutex); + swevent_hlist_release(swhash); + mutex_unlock(&swhash->hlist_mutex); mutex_lock(&ctx->mutex); smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); -- cgit v1.2.3 From b5ab4cd563e7ab49b27957704112a8ecade54e1f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 16:32:21 +0200 Subject: perf: Per cpu-context rotation timer Give each cpu-context its own timer so that it is a self contained entity, this eases the way for per-pmu-per-cpu contexts as well as provides the basic infrastructure to allow different rotation times per pmu. Things to look at: - folding the tick and these TICK_NSEC timers - separate task context rotation Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 ++- kernel/perf_event.c | 80 ++++++++++++++++++++++++++++++++++++---------- kernel/sched.c | 2 -- 3 files changed, 65 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4ab4f0ca09a1..fa04537df55b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -861,6 +861,8 @@ struct perf_cpu_context { struct perf_event_context *task_ctx; int active_oncpu; int exclusive; + u64 timer_interval; + struct hrtimer timer; }; struct perf_output_handle { @@ -881,7 +883,6 @@ extern void perf_pmu_unregister(struct pmu *pmu); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); -extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); @@ -1067,8 +1068,6 @@ perf_event_task_sched_in(struct task_struct *task) { } static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { } -static inline void -perf_event_task_tick(struct task_struct *task) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2c47ed6c4f26..d75e4c8727f9 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -78,6 +78,25 @@ void perf_pmu_enable(struct pmu *pmu) pmu->pmu_enable(pmu); } +static void perf_pmu_rotate_start(void) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + + if (hrtimer_active(&cpuctx->timer)) + return; + + __hrtimer_start_range_ns(&cpuctx->timer, + ns_to_ktime(cpuctx->timer_interval), 0, + HRTIMER_MODE_REL_PINNED, 0); +} + +static void perf_pmu_rotate_stop(void) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + + hrtimer_cancel(&cpuctx->timer); +} + static void get_ctx(struct perf_event_context *ctx) { WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); @@ -281,6 +300,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) } list_add_rcu(&event->event_entry, &ctx->event_list); + if (!ctx->nr_events) + perf_pmu_rotate_start(); ctx->nr_events++; if (event->attr.inherit_stat) ctx->nr_stat++; @@ -1383,6 +1404,12 @@ void perf_event_task_sched_in(struct task_struct *task) ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); cpuctx->task_ctx = ctx; + + /* + * Since these rotations are per-cpu, we need to ensure the + * cpu-context we got scheduled on is actually rotating. + */ + perf_pmu_rotate_start(); } #define MAX_INTERRUPTS (~0ULL) @@ -1487,7 +1514,7 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) } } -static void perf_ctx_adjust_freq(struct perf_event_context *ctx) +static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) { struct perf_event *event; struct hw_perf_event *hwc; @@ -1524,7 +1551,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) hwc->freq_count_stamp = now; if (delta > 0) - perf_adjust_period(event, TICK_NSEC, delta); + perf_adjust_period(event, period, delta); } raw_spin_unlock(&ctx->lock); } @@ -1542,30 +1569,39 @@ static void rotate_ctx(struct perf_event_context *ctx) raw_spin_unlock(&ctx->lock); } -void perf_event_task_tick(struct task_struct *curr) +/* + * Cannot race with ->pmu_rotate_start() because this is ran from hardirq + * context, and ->pmu_rotate_start() is called with irqs disabled (both are + * cpu affine, so there are no SMP races). + */ +static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer) { + enum hrtimer_restart restart = HRTIMER_NORESTART; struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; int rotate = 0; - if (!atomic_read(&nr_events)) - return; + cpuctx = container_of(timer, struct perf_cpu_context, timer); - cpuctx = &__get_cpu_var(perf_cpu_context); - if (cpuctx->ctx.nr_events && - cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) - rotate = 1; + if (cpuctx->ctx.nr_events) { + restart = HRTIMER_RESTART; + if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) + rotate = 1; + } - ctx = curr->perf_event_ctxp; - if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active) - rotate = 1; + ctx = current->perf_event_ctxp; + if (ctx && ctx->nr_events) { + restart = HRTIMER_RESTART; + if (ctx->nr_events != ctx->nr_active) + rotate = 1; + } - perf_ctx_adjust_freq(&cpuctx->ctx); + perf_ctx_adjust_freq(&cpuctx->ctx, cpuctx->timer_interval); if (ctx) - perf_ctx_adjust_freq(ctx); + perf_ctx_adjust_freq(ctx, cpuctx->timer_interval); if (!rotate) - return; + goto done; cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); if (ctx) @@ -1577,7 +1613,12 @@ void perf_event_task_tick(struct task_struct *curr) cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); if (ctx) - task_ctx_sched_in(curr, EVENT_FLEXIBLE); + task_ctx_sched_in(current, EVENT_FLEXIBLE); + +done: + hrtimer_forward_now(timer, ns_to_ktime(cpuctx->timer_interval)); + + return restart; } static int event_enable_on_exec(struct perf_event *event, @@ -4786,7 +4827,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event) } __hrtimer_start_range_ns(&hwc->hrtimer, ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); + HRTIMER_MODE_REL_PINNED, 0); } } @@ -5904,6 +5945,9 @@ static void __init perf_event_init_all_cpus(void) cpuctx = &per_cpu(perf_cpu_context, cpu); __perf_event_init_context(&cpuctx->ctx, NULL); + cpuctx->timer_interval = TICK_NSEC; + hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cpuctx->timer.function = perf_event_context_tick; } } @@ -5934,6 +5978,8 @@ static void __perf_event_exit_cpu(void *info) struct perf_event_context *ctx = &cpuctx->ctx; struct perf_event *event, *tmp; + perf_pmu_rotate_stop(); + list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) __perf_event_remove_from_context(event); list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) diff --git a/kernel/sched.c b/kernel/sched.c index 09b574e7f4df..66a02ba83c01 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3578,8 +3578,6 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); raw_spin_unlock(&rq->lock); - perf_event_task_tick(curr); - #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); trigger_load_balance(rq, cpu); -- cgit v1.2.3 From 108b02cfce04ee90b0a07ee0b104baffd39f5934 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Sep 2010 14:32:03 +0200 Subject: perf: Per-pmu-per-cpu contexts Allocate per-cpu contexts per pmu. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 +- kernel/perf_event.c | 178 ++++++++++++++++++++++++++++----------------- 2 files changed, 113 insertions(+), 69 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fa04537df55b..22155ef3b362 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -570,7 +570,8 @@ struct perf_event; struct pmu { struct list_head entry; - int *pmu_disable_count; + int * __percpu pmu_disable_count; + struct perf_cpu_context * __percpu pmu_cpu_context; /* * Fully disable/enable this PMU, can be used to protect from the PMI @@ -808,6 +809,7 @@ struct perf_event { * Used as a container for task events and CPU events as well: */ struct perf_event_context { + struct pmu *pmu; /* * Protect the states of the events in the list, * nr_active, and the list: diff --git a/kernel/perf_event.c b/kernel/perf_event.c index d75e4c8727f9..8ca6e690ffe3 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -34,16 +34,15 @@ #include -/* - * Each CPU has a list of per CPU events: - */ -static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); - static atomic_t nr_events __read_mostly; static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; static atomic_t nr_task_events __read_mostly; +static LIST_HEAD(pmus); +static DEFINE_MUTEX(pmus_lock); +static struct srcu_struct pmus_srcu; + /* * perf event paranoia level: * -1 - not paranoid at all @@ -78,9 +77,9 @@ void perf_pmu_enable(struct pmu *pmu) pmu->pmu_enable(pmu); } -static void perf_pmu_rotate_start(void) +static void perf_pmu_rotate_start(struct pmu *pmu) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); if (hrtimer_active(&cpuctx->timer)) return; @@ -90,9 +89,9 @@ static void perf_pmu_rotate_start(void) HRTIMER_MODE_REL_PINNED, 0); } -static void perf_pmu_rotate_stop(void) +static void perf_pmu_rotate_stop(struct pmu *pmu) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); hrtimer_cancel(&cpuctx->timer); } @@ -301,7 +300,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) list_add_rcu(&event->event_entry, &ctx->event_list); if (!ctx->nr_events) - perf_pmu_rotate_start(); + perf_pmu_rotate_start(ctx->pmu); ctx->nr_events++; if (event->attr.inherit_stat) ctx->nr_stat++; @@ -466,6 +465,12 @@ group_sched_out(struct perf_event *group_event, cpuctx->exclusive = 0; } +static inline struct perf_cpu_context * +__get_cpu_context(struct perf_event_context *ctx) +{ + return this_cpu_ptr(ctx->pmu->pmu_cpu_context); +} + /* * Cross CPU call to remove a performance event * @@ -474,9 +479,9 @@ group_sched_out(struct perf_event *group_event, */ static void __perf_event_remove_from_context(void *info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); /* * If this is a task context, we need to check whether it is @@ -556,8 +561,8 @@ retry: static void __perf_event_disable(void *info) { struct perf_event *event = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); /* * If this is a per-task event, need to check whether this @@ -765,10 +770,10 @@ static void add_event_to_ctx(struct perf_event *event, */ static void __perf_install_in_context(void *info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; struct perf_event *leader = event->group_leader; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; /* @@ -912,9 +917,9 @@ static void __perf_event_mark_enabled(struct perf_event *event, static void __perf_event_enable(void *info) { struct perf_event *event = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = event->ctx; struct perf_event *leader = event->group_leader; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; /* @@ -1188,15 +1193,19 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = task->perf_event_ctxp; struct perf_event_context *next_ctx; struct perf_event_context *parent; + struct perf_cpu_context *cpuctx; int do_switch = 1; perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); - if (likely(!ctx || !cpuctx->task_ctx)) + if (likely(!ctx)) + return; + + cpuctx = __get_cpu_context(ctx); + if (!cpuctx->task_ctx) return; rcu_read_lock(); @@ -1242,7 +1251,7 @@ void perf_event_task_sched_out(struct task_struct *task, static void task_ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); if (!cpuctx->task_ctx) return; @@ -1360,8 +1369,8 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, static void task_ctx_sched_in(struct task_struct *task, enum event_type_t event_type) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); if (likely(!ctx)) return; @@ -1383,12 +1392,13 @@ static void task_ctx_sched_in(struct task_struct *task, */ void perf_event_task_sched_in(struct task_struct *task) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_cpu_context *cpuctx; if (likely(!ctx)) return; + cpuctx = __get_cpu_context(ctx); if (cpuctx->task_ctx == ctx) return; @@ -1409,7 +1419,7 @@ void perf_event_task_sched_in(struct task_struct *task) * Since these rotations are per-cpu, we need to ensure the * cpu-context we got scheduled on is actually rotating. */ - perf_pmu_rotate_start(); + perf_pmu_rotate_start(ctx->pmu); } #define MAX_INTERRUPTS (~0ULL) @@ -1687,9 +1697,9 @@ out: */ static void __perf_event_read(void *info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); /* * If this is a task context, we need to check whether it is @@ -1962,7 +1972,8 @@ __perf_event_init_context(struct perf_event_context *ctx, ctx->task = task; } -static struct perf_event_context *find_get_context(pid_t pid, int cpu) +static struct perf_event_context * +find_get_context(struct pmu *pmu, pid_t pid, int cpu) { struct perf_event_context *ctx; struct perf_cpu_context *cpuctx; @@ -1986,7 +1997,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) if (!cpu_online(cpu)) return ERR_PTR(-ENODEV); - cpuctx = &per_cpu(perf_cpu_context, cpu); + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); ctx = &cpuctx->ctx; get_ctx(ctx); @@ -2030,6 +2041,7 @@ retry: if (!ctx) goto errout; __perf_event_init_context(ctx, task); + ctx->pmu = pmu; get_ctx(ctx); if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { /* @@ -3745,18 +3757,20 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, static void perf_event_task_event(struct perf_task_event *task_event) { - struct perf_cpu_context *cpuctx; struct perf_event_context *ctx = task_event->task_ctx; + struct perf_cpu_context *cpuctx; + struct pmu *pmu; - rcu_read_lock(); - cpuctx = &get_cpu_var(perf_cpu_context); - perf_event_task_ctx(&cpuctx->ctx, task_event); + rcu_read_lock_sched(); + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + perf_event_task_ctx(&cpuctx->ctx, task_event); + } if (!ctx) ctx = rcu_dereference(current->perf_event_ctxp); if (ctx) perf_event_task_ctx(ctx, task_event); - put_cpu_var(perf_cpu_context); - rcu_read_unlock(); + rcu_read_unlock_sched(); } static void perf_event_task(struct task_struct *task, @@ -3861,6 +3875,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; unsigned int size; + struct pmu *pmu; char comm[TASK_COMM_LEN]; memset(comm, 0, sizeof(comm)); @@ -3872,14 +3887,15 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; - rcu_read_lock(); - cpuctx = &get_cpu_var(perf_cpu_context); - perf_event_comm_ctx(&cpuctx->ctx, comm_event); + rcu_read_lock_sched(); + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + perf_event_comm_ctx(&cpuctx->ctx, comm_event); + } ctx = rcu_dereference(current->perf_event_ctxp); if (ctx) perf_event_comm_ctx(ctx, comm_event); - put_cpu_var(perf_cpu_context); - rcu_read_unlock(); + rcu_read_unlock_sched(); } void perf_event_comm(struct task_struct *task) @@ -3989,6 +4005,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) char tmp[16]; char *buf = NULL; const char *name; + struct pmu *pmu; memset(tmp, 0, sizeof(tmp)); @@ -4040,14 +4057,16 @@ got_name: mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; - rcu_read_lock(); - cpuctx = &get_cpu_var(perf_cpu_context); - perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); + rcu_read_lock_sched(); + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, + vma->vm_flags & VM_EXEC); + } ctx = rcu_dereference(current->perf_event_ctxp); if (ctx) perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC); - put_cpu_var(perf_cpu_context); - rcu_read_unlock(); + rcu_read_unlock_sched(); kfree(buf); } @@ -4982,10 +5001,6 @@ static struct pmu perf_task_clock = { .read = task_clock_event_read, }; -static LIST_HEAD(pmus); -static DEFINE_MUTEX(pmus_lock); -static struct srcu_struct pmus_srcu; - static void perf_pmu_nop_void(struct pmu *pmu) { } @@ -5013,7 +5028,7 @@ static void perf_pmu_cancel_txn(struct pmu *pmu) int perf_pmu_register(struct pmu *pmu) { - int ret; + int cpu, ret; mutex_lock(&pmus_lock); ret = -ENOMEM; @@ -5021,6 +5036,21 @@ int perf_pmu_register(struct pmu *pmu) if (!pmu->pmu_disable_count) goto unlock; + pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); + if (!pmu->pmu_cpu_context) + goto free_pdc; + + for_each_possible_cpu(cpu) { + struct perf_cpu_context *cpuctx; + + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + __perf_event_init_context(&cpuctx->ctx, NULL); + cpuctx->ctx.pmu = pmu; + cpuctx->timer_interval = TICK_NSEC; + hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cpuctx->timer.function = perf_event_context_tick; + } + if (!pmu->start_txn) { if (pmu->pmu_enable) { /* @@ -5049,6 +5079,10 @@ unlock: mutex_unlock(&pmus_lock); return ret; + +free_pdc: + free_percpu(pmu->pmu_disable_count); + goto unlock; } void perf_pmu_unregister(struct pmu *pmu) @@ -5057,9 +5091,14 @@ void perf_pmu_unregister(struct pmu *pmu) list_del_rcu(&pmu->entry); mutex_unlock(&pmus_lock); + /* + * We use the pmu list either under SRCU or preempt_disable, + * synchronize_srcu() implies synchronize_sched() so we're good. + */ synchronize_srcu(&pmus_srcu); free_percpu(pmu->pmu_disable_count); + free_percpu(pmu->pmu_cpu_context); } struct pmu *perf_init_event(struct perf_event *event) @@ -5374,7 +5413,7 @@ SYSCALL_DEFINE5(perf_event_open, /* * Get the target context (task or percpu): */ - ctx = find_get_context(pid, cpu); + ctx = find_get_context(event->pmu, pid, cpu); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err_alloc; @@ -5489,7 +5528,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, goto err; } - ctx = find_get_context(pid, cpu); + ctx = find_get_context(event->pmu, pid, cpu); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err_free; @@ -5833,6 +5872,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, return -ENOMEM; __perf_event_init_context(child_ctx, child); + child_ctx->pmu = event->pmu; child->perf_event_ctxp = child_ctx; get_task_struct(child); } @@ -5935,30 +5975,18 @@ int perf_event_init_task(struct task_struct *child) static void __init perf_event_init_all_cpus(void) { - struct perf_cpu_context *cpuctx; struct swevent_htable *swhash; int cpu; for_each_possible_cpu(cpu) { swhash = &per_cpu(swevent_htable, cpu); mutex_init(&swhash->hlist_mutex); - - cpuctx = &per_cpu(perf_cpu_context, cpu); - __perf_event_init_context(&cpuctx->ctx, NULL); - cpuctx->timer_interval = TICK_NSEC; - hrtimer_init(&cpuctx->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - cpuctx->timer.function = perf_event_context_tick; } } static void __cpuinit perf_event_init_cpu(int cpu) { - struct perf_cpu_context *cpuctx; - struct swevent_htable *swhash; - - cpuctx = &per_cpu(perf_cpu_context, cpu); - - swhash = &per_cpu(swevent_htable, cpu); + struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); mutex_lock(&swhash->hlist_mutex); if (swhash->hlist_refcount > 0) { @@ -5972,32 +6000,46 @@ static void __cpuinit perf_event_init_cpu(int cpu) } #ifdef CONFIG_HOTPLUG_CPU -static void __perf_event_exit_cpu(void *info) +static void __perf_event_exit_context(void *__info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_event_context *ctx = &cpuctx->ctx; + struct perf_event_context *ctx = __info; struct perf_event *event, *tmp; - perf_pmu_rotate_stop(); + perf_pmu_rotate_stop(ctx->pmu); list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) __perf_event_remove_from_context(event); list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) __perf_event_remove_from_context(event); } + +static void perf_event_exit_cpu_context(int cpu) +{ + struct perf_event_context *ctx; + struct pmu *pmu; + int idx; + + idx = srcu_read_lock(&pmus_srcu); + list_for_each_entry_rcu(pmu, &pmus, entry) { + ctx = &this_cpu_ptr(pmu->pmu_cpu_context)->ctx; + + mutex_lock(&ctx->mutex); + smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1); + mutex_unlock(&ctx->mutex); + } + srcu_read_unlock(&pmus_srcu, idx); + +} + static void perf_event_exit_cpu(int cpu) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); - struct perf_event_context *ctx = &cpuctx->ctx; mutex_lock(&swhash->hlist_mutex); swevent_hlist_release(swhash); mutex_unlock(&swhash->hlist_mutex); - mutex_lock(&ctx->mutex); - smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); - mutex_unlock(&ctx->mutex); + perf_event_exit_cpu_context(cpu); } #else static inline void perf_event_exit_cpu(int cpu) { } -- cgit v1.2.3 From 8dc85d547285668e509f86c177bcd4ea055bcaaf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Sep 2010 16:50:03 +0200 Subject: perf: Multiple task contexts Provide the infrastructure for multiple task contexts. A more flexible approach would have resulted in more pointer chases in the scheduling hot-paths. This approach has the limitation of a static number of task contexts. Since I expect most external PMUs to be system wide, or at least node wide (as per the intel uncore unit) they won't actually need a task context. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + include/linux/sched.h | 8 +- kernel/perf_event.c | 336 +++++++++++++++++++++++++++++++-------------- 3 files changed, 239 insertions(+), 106 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 22155ef3b362..9ecfd856ce6e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -572,6 +572,7 @@ struct pmu { int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; + int task_ctx_nr; /* * Fully disable/enable this PMU, can be used to protect from the PMI diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7dd..89d6023c6f82 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1160,6 +1160,12 @@ struct sched_rt_entity { struct rcu_node; +enum perf_event_task_context { + perf_invalid_context = -1, + perf_hw_context = 0, + perf_nr_task_contexts, +}; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1431,7 +1437,7 @@ struct task_struct { struct futex_pi_state *pi_state_cache; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp; + struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 13d98d756347..7223ea875861 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -148,13 +148,13 @@ static u64 primary_event_id(struct perf_event *event) * the context could get moved to another task. */ static struct perf_event_context * -perf_lock_task_context(struct task_struct *task, unsigned long *flags) +perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) { struct perf_event_context *ctx; rcu_read_lock(); retry: - ctx = rcu_dereference(task->perf_event_ctxp); + ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); if (ctx) { /* * If this context is a clone of another, it might @@ -167,7 +167,7 @@ retry: * can't get swapped on us any more. */ raw_spin_lock_irqsave(&ctx->lock, *flags); - if (ctx != rcu_dereference(task->perf_event_ctxp)) { + if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { raw_spin_unlock_irqrestore(&ctx->lock, *flags); goto retry; } @@ -186,12 +186,13 @@ retry: * can't get swapped to another task. This also increments its * reference count so that the context can't get freed. */ -static struct perf_event_context *perf_pin_task_context(struct task_struct *task) +static struct perf_event_context * +perf_pin_task_context(struct task_struct *task, int ctxn) { struct perf_event_context *ctx; unsigned long flags; - ctx = perf_lock_task_context(task, &flags); + ctx = perf_lock_task_context(task, ctxn, &flags); if (ctx) { ++ctx->pin_count; raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -1179,28 +1180,15 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, } } -/* - * Called from scheduler to remove the events of the current task, - * with interrupts disabled. - * - * We stop each event and update the event value in event->count. - * - * This does not protect us against NMI, but disable() - * sets the disabled bit in the control field of event _before_ - * accessing the event control register. If a NMI hits, then it will - * not restart the event. - */ -void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next) +void perf_event_context_sched_out(struct task_struct *task, int ctxn, + struct task_struct *next) { - struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; struct perf_event_context *next_ctx; struct perf_event_context *parent; struct perf_cpu_context *cpuctx; int do_switch = 1; - perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); - if (likely(!ctx)) return; @@ -1210,7 +1198,7 @@ void perf_event_task_sched_out(struct task_struct *task, rcu_read_lock(); parent = rcu_dereference(ctx->parent_ctx); - next_ctx = next->perf_event_ctxp; + next_ctx = next->perf_event_ctxp[ctxn]; if (parent && next_ctx && rcu_dereference(next_ctx->parent_ctx) == parent) { /* @@ -1229,8 +1217,8 @@ void perf_event_task_sched_out(struct task_struct *task, * XXX do we need a memory barrier of sorts * wrt to rcu_dereference() of perf_event_ctxp */ - task->perf_event_ctxp = next_ctx; - next->perf_event_ctxp = ctx; + task->perf_event_ctxp[ctxn] = next_ctx; + next->perf_event_ctxp[ctxn] = ctx; ctx->task = next; next_ctx->task = task; do_switch = 0; @@ -1248,6 +1236,31 @@ void perf_event_task_sched_out(struct task_struct *task, } } +#define for_each_task_context_nr(ctxn) \ + for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) + +/* + * Called from scheduler to remove the events of the current task, + * with interrupts disabled. + * + * We stop each event and update the event value in event->count. + * + * This does not protect us against NMI, but disable() + * sets the disabled bit in the control field of event _before_ + * accessing the event control register. If a NMI hits, then it will + * not restart the event. + */ +void perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next) +{ + int ctxn; + + perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); + + for_each_task_context_nr(ctxn) + perf_event_context_sched_out(task, ctxn, next); +} + static void task_ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type) { @@ -1366,38 +1379,23 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, ctx_sched_in(ctx, cpuctx, event_type); } -static void task_ctx_sched_in(struct task_struct *task, +static void task_ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type) { - struct perf_event_context *ctx = task->perf_event_ctxp; - struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); + struct perf_cpu_context *cpuctx; - if (likely(!ctx)) - return; + cpuctx = __get_cpu_context(ctx); if (cpuctx->task_ctx == ctx) return; + ctx_sched_in(ctx, cpuctx, event_type); cpuctx->task_ctx = ctx; } -/* - * Called from scheduler to add the events of the current task - * with interrupts disabled. - * - * We restore the event value and then enable it. - * - * This does not protect us against NMI, but enable() - * sets the enabled bit in the control field of event _before_ - * accessing the event control register. If a NMI hits, then it will - * keep the event running. - */ -void perf_event_task_sched_in(struct task_struct *task) + +void perf_event_context_sched_in(struct perf_event_context *ctx) { - struct perf_event_context *ctx = task->perf_event_ctxp; struct perf_cpu_context *cpuctx; - if (likely(!ctx)) - return; - cpuctx = __get_cpu_context(ctx); if (cpuctx->task_ctx == ctx) return; @@ -1422,6 +1420,31 @@ void perf_event_task_sched_in(struct task_struct *task) perf_pmu_rotate_start(ctx->pmu); } +/* + * Called from scheduler to add the events of the current task + * with interrupts disabled. + * + * We restore the event value and then enable it. + * + * This does not protect us against NMI, but enable() + * sets the enabled bit in the control field of event _before_ + * accessing the event control register. If a NMI hits, then it will + * keep the event running. + */ +void perf_event_task_sched_in(struct task_struct *task) +{ + struct perf_event_context *ctx; + int ctxn; + + for_each_task_context_nr(ctxn) { + ctx = task->perf_event_ctxp[ctxn]; + if (likely(!ctx)) + continue; + + perf_event_context_sched_in(ctx); + } +} + #define MAX_INTERRUPTS (~0ULL) static void perf_log_throttle(struct perf_event *event, int enable); @@ -1588,7 +1611,7 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer) { enum hrtimer_restart restart = HRTIMER_NORESTART; struct perf_cpu_context *cpuctx; - struct perf_event_context *ctx; + struct perf_event_context *ctx = NULL; int rotate = 0; cpuctx = container_of(timer, struct perf_cpu_context, timer); @@ -1599,7 +1622,7 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer) rotate = 1; } - ctx = current->perf_event_ctxp; + ctx = cpuctx->task_ctx; if (ctx && ctx->nr_events) { restart = HRTIMER_RESTART; if (ctx->nr_events != ctx->nr_active) @@ -1623,7 +1646,7 @@ static enum hrtimer_restart perf_event_context_tick(struct hrtimer *timer) cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); if (ctx) - task_ctx_sched_in(current, EVENT_FLEXIBLE); + task_ctx_sched_in(ctx, EVENT_FLEXIBLE); done: hrtimer_forward_now(timer, ns_to_ktime(cpuctx->timer_interval)); @@ -1650,20 +1673,18 @@ static int event_enable_on_exec(struct perf_event *event, * Enable all of a task's events that have been marked enable-on-exec. * This expects task == current. */ -static void perf_event_enable_on_exec(struct task_struct *task) +static void perf_event_enable_on_exec(struct perf_event_context *ctx) { - struct perf_event_context *ctx; struct perf_event *event; unsigned long flags; int enabled = 0; int ret; local_irq_save(flags); - ctx = task->perf_event_ctxp; if (!ctx || !ctx->nr_events) goto out; - __perf_event_task_sched_out(ctx); + task_ctx_sched_out(ctx, EVENT_ALL); raw_spin_lock(&ctx->lock); @@ -1687,7 +1708,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) raw_spin_unlock(&ctx->lock); - perf_event_task_sched_in(task); + perf_event_context_sched_in(ctx); out: local_irq_restore(flags); } @@ -1995,7 +2016,7 @@ find_get_context(struct pmu *pmu, pid_t pid, int cpu) struct perf_cpu_context *cpuctx; struct task_struct *task; unsigned long flags; - int err; + int ctxn, err; if (pid == -1 && cpu != -1) { /* Must be root to operate on a CPU event: */ @@ -2044,8 +2065,13 @@ find_get_context(struct pmu *pmu, pid_t pid, int cpu) if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto errout; + err = -EINVAL; + ctxn = pmu->task_ctx_nr; + if (ctxn < 0) + goto errout; + retry: - ctx = perf_lock_task_context(task, &flags); + ctx = perf_lock_task_context(task, ctxn, &flags); if (ctx) { unclone_ctx(ctx); raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -2059,7 +2085,7 @@ retry: get_ctx(ctx); - if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { + if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { /* * We raced with some other task; use * the context they set. @@ -3773,19 +3799,26 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, static void perf_event_task_event(struct perf_task_event *task_event) { - struct perf_event_context *ctx = task_event->task_ctx; struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; struct pmu *pmu; + int ctxn; rcu_read_lock_sched(); list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); perf_event_task_ctx(&cpuctx->ctx, task_event); + + ctx = task_event->task_ctx; + if (!ctx) { + ctxn = pmu->task_ctx_nr; + if (ctxn < 0) + continue; + ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); + } + if (ctx) + perf_event_task_ctx(ctx, task_event); } - if (!ctx) - ctx = rcu_dereference(current->perf_event_ctxp); - if (ctx) - perf_event_task_ctx(ctx, task_event); rcu_read_unlock_sched(); } @@ -3890,9 +3923,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) { struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; + char comm[TASK_COMM_LEN]; unsigned int size; struct pmu *pmu; - char comm[TASK_COMM_LEN]; + int ctxn; memset(comm, 0, sizeof(comm)); strlcpy(comm, comm_event->task->comm, sizeof(comm)); @@ -3907,19 +3941,31 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); perf_event_comm_ctx(&cpuctx->ctx, comm_event); + + ctxn = pmu->task_ctx_nr; + if (ctxn < 0) + continue; + + ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); + if (ctx) + perf_event_comm_ctx(ctx, comm_event); } - ctx = rcu_dereference(current->perf_event_ctxp); - if (ctx) - perf_event_comm_ctx(ctx, comm_event); rcu_read_unlock_sched(); } void perf_event_comm(struct task_struct *task) { struct perf_comm_event comm_event; + struct perf_event_context *ctx; + int ctxn; - if (task->perf_event_ctxp) - perf_event_enable_on_exec(task); + for_each_task_context_nr(ctxn) { + ctx = task->perf_event_ctxp[ctxn]; + if (!ctx) + continue; + + perf_event_enable_on_exec(ctx); + } if (!atomic_read(&nr_comm_events)) return; @@ -4022,6 +4068,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) char *buf = NULL; const char *name; struct pmu *pmu; + int ctxn; memset(tmp, 0, sizeof(tmp)); @@ -4078,10 +4125,17 @@ got_name: cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); + + ctxn = pmu->task_ctx_nr; + if (ctxn < 0) + continue; + + ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); + if (ctx) { + perf_event_mmap_ctx(ctx, mmap_event, + vma->vm_flags & VM_EXEC); + } } - ctx = rcu_dereference(current->perf_event_ctxp); - if (ctx) - perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC); rcu_read_unlock_sched(); kfree(buf); @@ -5042,6 +5096,43 @@ static void perf_pmu_cancel_txn(struct pmu *pmu) perf_pmu_enable(pmu); } +/* + * Ensures all contexts with the same task_ctx_nr have the same + * pmu_cpu_context too. + */ +static void *find_pmu_context(int ctxn) +{ + struct pmu *pmu; + + if (ctxn < 0) + return NULL; + + list_for_each_entry(pmu, &pmus, entry) { + if (pmu->task_ctx_nr == ctxn) + return pmu->pmu_cpu_context; + } + + return NULL; +} + +static void free_pmu_context(void * __percpu cpu_context) +{ + struct pmu *pmu; + + mutex_lock(&pmus_lock); + /* + * Like a real lame refcount. + */ + list_for_each_entry(pmu, &pmus, entry) { + if (pmu->pmu_cpu_context == cpu_context) + goto out; + } + + free_percpu(cpu_context); +out: + mutex_unlock(&pmus_lock); +} + int perf_pmu_register(struct pmu *pmu) { int cpu, ret; @@ -5052,6 +5143,10 @@ int perf_pmu_register(struct pmu *pmu) if (!pmu->pmu_disable_count) goto unlock; + pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); + if (pmu->pmu_cpu_context) + goto got_cpu_context; + pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); if (!pmu->pmu_cpu_context) goto free_pdc; @@ -5067,6 +5162,7 @@ int perf_pmu_register(struct pmu *pmu) cpuctx->timer.function = perf_event_context_tick; } +got_cpu_context: if (!pmu->start_txn) { if (pmu->pmu_enable) { /* @@ -5114,7 +5210,7 @@ void perf_pmu_unregister(struct pmu *pmu) synchronize_srcu(&pmus_srcu); free_percpu(pmu->pmu_disable_count); - free_percpu(pmu->pmu_cpu_context); + free_pmu_context(pmu->pmu_cpu_context); } struct pmu *perf_init_event(struct perf_event *event) @@ -5628,16 +5724,13 @@ __perf_event_exit_task(struct perf_event *child_event, } } -/* - * When a child task exits, feed back event values to parent events. - */ -void perf_event_exit_task(struct task_struct *child) +static void perf_event_exit_task_context(struct task_struct *child, int ctxn) { struct perf_event *child_event, *tmp; struct perf_event_context *child_ctx; unsigned long flags; - if (likely(!child->perf_event_ctxp)) { + if (likely(!child->perf_event_ctxp[ctxn])) { perf_event_task(child, NULL, 0); return; } @@ -5649,7 +5742,7 @@ void perf_event_exit_task(struct task_struct *child) * scheduled, so we are now safe from rescheduling changing * our context. */ - child_ctx = child->perf_event_ctxp; + child_ctx = child->perf_event_ctxp[ctxn]; __perf_event_task_sched_out(child_ctx); /* @@ -5658,7 +5751,7 @@ void perf_event_exit_task(struct task_struct *child) * incremented the context's refcount before we do put_ctx below. */ raw_spin_lock(&child_ctx->lock); - child->perf_event_ctxp = NULL; + child->perf_event_ctxp[ctxn] = NULL; /* * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all @@ -5711,6 +5804,17 @@ again: put_ctx(child_ctx); } +/* + * When a child task exits, feed back event values to parent events. + */ +void perf_event_exit_task(struct task_struct *child) +{ + int ctxn; + + for_each_task_context_nr(ctxn) + perf_event_exit_task_context(child, ctxn); +} + static void perf_free_event(struct perf_event *event, struct perf_event_context *ctx) { @@ -5732,32 +5836,37 @@ static void perf_free_event(struct perf_event *event, /* * free an unexposed, unused context as created by inheritance by - * init_task below, used by fork() in case of fail. + * perf_event_init_task below, used by fork() in case of fail. */ void perf_event_free_task(struct task_struct *task) { - struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_event_context *ctx; struct perf_event *event, *tmp; + int ctxn; - if (!ctx) - return; + for_each_task_context_nr(ctxn) { + ctx = task->perf_event_ctxp[ctxn]; + if (!ctx) + continue; - mutex_lock(&ctx->mutex); + mutex_lock(&ctx->mutex); again: - list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) - perf_free_event(event, ctx); + list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, + group_entry) + perf_free_event(event, ctx); - list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, - group_entry) - perf_free_event(event, ctx); + list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, + group_entry) + perf_free_event(event, ctx); - if (!list_empty(&ctx->pinned_groups) || - !list_empty(&ctx->flexible_groups)) - goto again; + if (!list_empty(&ctx->pinned_groups) || + !list_empty(&ctx->flexible_groups)) + goto again; - mutex_unlock(&ctx->mutex); + mutex_unlock(&ctx->mutex); - put_ctx(ctx); + put_ctx(ctx); + } } /* @@ -5863,17 +5972,18 @@ static int inherit_group(struct perf_event *parent_event, static int inherit_task_group(struct perf_event *event, struct task_struct *parent, struct perf_event_context *parent_ctx, - struct task_struct *child, + struct task_struct *child, int ctxn, int *inherited_all) { int ret; - struct perf_event_context *child_ctx = child->perf_event_ctxp; + struct perf_event_context *child_ctx; if (!event->attr.inherit) { *inherited_all = 0; return 0; } + child_ctx = child->perf_event_ctxp[ctxn]; if (!child_ctx) { /* * This is executed from the parent task context, so @@ -5886,7 +5996,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, if (!child_ctx) return -ENOMEM; - child->perf_event_ctxp = child_ctx; + child->perf_event_ctxp[ctxn] = child_ctx; } ret = inherit_group(event, parent, parent_ctx, @@ -5901,7 +6011,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, /* * Initialize the perf_event context in task_struct */ -int perf_event_init_task(struct task_struct *child) +int perf_event_init_context(struct task_struct *child, int ctxn) { struct perf_event_context *child_ctx, *parent_ctx; struct perf_event_context *cloned_ctx; @@ -5910,19 +6020,19 @@ int perf_event_init_task(struct task_struct *child) int inherited_all = 1; int ret = 0; - child->perf_event_ctxp = NULL; + child->perf_event_ctxp[ctxn] = NULL; mutex_init(&child->perf_event_mutex); INIT_LIST_HEAD(&child->perf_event_list); - if (likely(!parent->perf_event_ctxp)) + if (likely(!parent->perf_event_ctxp[ctxn])) return 0; /* * If the parent's context is a clone, pin it so it won't get * swapped under us. */ - parent_ctx = perf_pin_task_context(parent); + parent_ctx = perf_pin_task_context(parent, ctxn); /* * No need to check if parent_ctx != NULL here; since we saw @@ -5942,20 +6052,20 @@ int perf_event_init_task(struct task_struct *child) * the list, not manipulating it: */ list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { - ret = inherit_task_group(event, parent, parent_ctx, child, - &inherited_all); + ret = inherit_task_group(event, parent, parent_ctx, + child, ctxn, &inherited_all); if (ret) break; } list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { - ret = inherit_task_group(event, parent, parent_ctx, child, - &inherited_all); + ret = inherit_task_group(event, parent, parent_ctx, + child, ctxn, &inherited_all); if (ret) break; } - child_ctx = child->perf_event_ctxp; + child_ctx = child->perf_event_ctxp[ctxn]; if (child_ctx && inherited_all) { /* @@ -5984,6 +6094,22 @@ int perf_event_init_task(struct task_struct *child) return ret; } +/* + * Initialize the perf_event context in task_struct + */ +int perf_event_init_task(struct task_struct *child) +{ + int ctxn, ret; + + for_each_task_context_nr(ctxn) { + ret = perf_event_init_context(child, ctxn); + if (ret) + return ret; + } + + return 0; +} + static void __init perf_event_init_all_cpus(void) { struct swevent_htable *swhash; -- cgit v1.2.3 From 89a1e18731959e9953fae15ddc1a983eb15a4f19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Sep 2010 17:34:50 +0200 Subject: perf: Provide a separate task context for swevents Since software events are always schedulable, mixing them up with hardware events (who are not) can lead to funny scheduling oddities. Giving them their own context solves this. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Lin Ming Cc: Yanmin LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 9 +-------- include/linux/sched.h | 1 + kernel/hw_breakpoint.c | 2 ++ kernel/perf_event.c | 40 +++++++++++++++++++++++++++++----------- 4 files changed, 33 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9ecfd856ce6e..c1173520f14d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -952,14 +952,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, */ static inline int is_software_event(struct perf_event *event) { - switch (event->attr.type) { - case PERF_TYPE_SOFTWARE: - case PERF_TYPE_TRACEPOINT: - /* for now the breakpoint stuff also works as software event */ - case PERF_TYPE_BREAKPOINT: - return 1; - } - return 0; + return event->pmu->task_ctx_nr == perf_sw_context; } extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; diff --git a/include/linux/sched.h b/include/linux/sched.h index 89d6023c6f82..eb3c1ceec06e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1163,6 +1163,7 @@ struct rcu_node; enum perf_event_task_context { perf_invalid_context = -1, perf_hw_context = 0, + perf_sw_context, perf_nr_task_contexts, }; diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 6f150095cafe..3b2aaffb65f0 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -610,6 +610,8 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags) } static struct pmu perf_breakpoint = { + .task_ctx_nr = perf_sw_context, /* could eventually get its own */ + .event_init = hw_breakpoint_event_init, .add = hw_breakpoint_add, .del = hw_breakpoint_del, diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 7223ea875861..357ee8d5e8ae 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4709,6 +4709,8 @@ static int perf_swevent_init(struct perf_event *event) } static struct pmu perf_swevent = { + .task_ctx_nr = perf_sw_context, + .event_init = perf_swevent_init, .add = perf_swevent_add, .del = perf_swevent_del, @@ -4800,6 +4802,8 @@ static int perf_tp_event_init(struct perf_event *event) } static struct pmu perf_tracepoint = { + .task_ctx_nr = perf_sw_context, + .event_init = perf_tp_event_init, .add = perf_trace_add, .del = perf_trace_del, @@ -4988,6 +4992,8 @@ static int cpu_clock_event_init(struct perf_event *event) } static struct pmu perf_cpu_clock = { + .task_ctx_nr = perf_sw_context, + .event_init = cpu_clock_event_init, .add = cpu_clock_event_add, .del = cpu_clock_event_del, @@ -5063,6 +5069,8 @@ static int task_clock_event_init(struct perf_event *event) } static struct pmu perf_task_clock = { + .task_ctx_nr = perf_sw_context, + .event_init = task_clock_event_init, .add = task_clock_event_add, .del = task_clock_event_del, @@ -5490,6 +5498,7 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event_context *ctx; struct file *event_file = NULL; struct file *group_file = NULL; + struct pmu *pmu; int event_fd; int fput_needed = 0; int err; @@ -5522,20 +5531,11 @@ SYSCALL_DEFINE5(perf_event_open, goto err_fd; } - /* - * Get the target context (task or percpu): - */ - ctx = find_get_context(event->pmu, pid, cpu); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto err_alloc; - } - if (group_fd != -1) { group_leader = perf_fget_light(group_fd, &fput_needed); if (IS_ERR(group_leader)) { err = PTR_ERR(group_leader); - goto err_context; + goto err_alloc; } group_file = group_leader->filp; if (flags & PERF_FLAG_FD_OUTPUT) @@ -5544,6 +5544,23 @@ SYSCALL_DEFINE5(perf_event_open, group_leader = NULL; } + /* + * Special case software events and allow them to be part of + * any hardware group. + */ + pmu = event->pmu; + if ((pmu->task_ctx_nr == perf_sw_context) && group_leader) + pmu = group_leader->pmu; + + /* + * Get the target context (task or percpu): + */ + ctx = find_get_context(pmu, pid, cpu); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto err_group_fd; + } + /* * Look up the group leader (we will attach this event to it): */ @@ -5605,8 +5622,9 @@ SYSCALL_DEFINE5(perf_event_open, return event_fd; err_context: - fput_light(group_file, fput_needed); put_ctx(ctx); +err_group_fd: + fput_light(group_file, fput_needed); err_alloc: free_event(event); err_fd: -- cgit v1.2.3 From 4e231c7962ce711c7d8c2a4dc23ecd1e8fc28363 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Sep 2010 21:01:59 +0200 Subject: perf: Fix up delayed_put_task_struct() I missed a perf_event_ctxp user when converting it to an array. Pull this last user into perf_event.c as well and fix it up. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ kernel/exit.c | 4 +--- kernel/perf_event.c | 8 ++++++++ 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c1173520f14d..93bf53aa50e5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -889,6 +889,7 @@ extern void perf_event_task_sched_out(struct task_struct *task, struct task_stru extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); +extern void perf_event_delayed_put(struct task_struct *task); extern void set_perf_event_pending(void); extern void perf_event_do_pending(void); extern void perf_event_print_debug(void); @@ -1067,6 +1068,7 @@ perf_event_task_sched_out(struct task_struct *task, static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } +static inline void perf_event_delayed_put(struct task_struct *task) { } static inline void perf_event_do_pending(void) { } static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } diff --git a/kernel/exit.c b/kernel/exit.c index 03120229db28..e2bdf37f9fde 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -149,9 +149,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); -#ifdef CONFIG_PERF_EVENTS - WARN_ON_ONCE(tsk->perf_event_ctxp); -#endif + perf_event_delayed_put(tsk); trace_sched_process_free(tsk); put_task_struct(tsk); } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 9819a69a61a1..eaf1c5de6dcc 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -5893,6 +5893,14 @@ again: } } +void perf_event_delayed_put(struct task_struct *task) +{ + int ctxn; + + for_each_task_context_nr(ctxn) + WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); +} + /* * inherit a event from parent task to child task: */ -- cgit v1.2.3 From 3b8fad3e2f5f69bfd8e42d099ca8582fb2342edf Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Sep 2010 14:26:00 +0200 Subject: irq: Fix circular headers dependency asm-generic/hardirq.h needs asm/irq.h which might include linux/interrupt.h as in the sparc 32 case. At this point we need irq_cpustat generic definitions, but those are included later in asm-generic/hardirq.h. Then delay a bit the inclusion of irq.h from asm-generic/hardirq.h, it doesn't need to be included early. This fixes: include/linux/interrupt.h: In function '__raise_softirq_irqoff': include/linux/interrupt.h:414: error: implicit declaration of function 'local_softirq_pending' include/linux/interrupt.h:414: error: lvalue required as left operand of assignment Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Lai Jiangshan Cc: Koki Sanagi Cc: mathieu.desnoyers@efficios.com Cc: rostedt@goodmis.org Cc: nhorman@tuxdriver.com Cc: scott.a.mcmillan@intel.com Cc: eric.dumazet@gmail.com Cc: kaneshige.kenji@jp.fujitsu.com Cc: davem@davemloft.net Cc: izumi.taku@jp.fujitsu.com Cc: kosaki.motohiro@jp.fujitsu.com LKML-Reference: <20100908122557.GA5310@nowhere> Signed-off-by: Ingo Molnar --- include/asm-generic/hardirq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h index 62f59080e5cc..04d0a977cd43 100644 --- a/include/asm-generic/hardirq.h +++ b/include/asm-generic/hardirq.h @@ -3,13 +3,13 @@ #include #include -#include typedef struct { unsigned int __softirq_pending; } ____cacheline_aligned irq_cpustat_t; #include /* Standard mappings for irq_cpustat_t above */ +#include #ifndef ack_bad_irq static inline void ack_bad_irq(unsigned int irq) -- cgit v1.2.3 From f3c65b2870f2481f3646bc410a58a12989ecc704 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 9 Sep 2010 16:37:24 -0700 Subject: mmc: avoid getting CID on SDIO-only cards The introduction of support for SD combo cards breaks the initialization of all CSR SDIO chips. The GO_IDLE (CMD0) in mmc_sd_get_cid() causes CSR chips to be reset (this is non-standard behavior). When initializing an SDIO card check for a combo card by using the memory present bit in the R4 response to IO_SEND_OP_COND (CMD5). This avoids the call to mmc_sd_get_cid() on an SDIO-only card. Signed-off-by: David Vrabel Acked-by: Michal Mirolaw Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/core/sdio.c | 5 ++--- include/linux/mmc/sdio.h | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index bd2755e8d9a3..f332c52968b7 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -362,9 +362,8 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, goto err; } - err = mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid); - - if (!err) { + if (ocr & R4_MEMORY_PRESENT + && mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid) == 0) { card->type = MMC_TYPE_SD_COMBO; if (oldcard && (oldcard->type != MMC_TYPE_SD_COMBO || diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h index 329a8faa6e37..245cdacee544 100644 --- a/include/linux/mmc/sdio.h +++ b/include/linux/mmc/sdio.h @@ -38,6 +38,8 @@ * [8:0] Byte/block count */ +#define R4_MEMORY_PRESENT (1 << 27) + /* SDIO status in R5 Type -- cgit v1.2.3 From e0bf1024b36be90da241af3c2767311e055b612c Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 9 Sep 2010 16:37:26 -0700 Subject: kfifo: add parenthesis for macro parameter reference Some macro parameter references inside typeof() operator are not enclosed with parenthesis. It should be safer to add them. Signed-off-by: Huang Ying Acked-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 58 +++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 4aa95f203f3e..62dbee554f60 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -214,7 +214,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_reset(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ __tmp->kfifo.in = __tmp->kfifo.out = 0; \ }) @@ -228,7 +228,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_reset_out(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ __tmp->kfifo.out = __tmp->kfifo.in; \ }) @@ -238,7 +238,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_len(fifo) \ ({ \ - typeof(fifo + 1) __tmpl = (fifo); \ + typeof((fifo) + 1) __tmpl = (fifo); \ __tmpl->kfifo.in - __tmpl->kfifo.out; \ }) @@ -248,7 +248,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_is_empty(fifo) \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ __tmpq->kfifo.in == __tmpq->kfifo.out; \ }) @@ -258,7 +258,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_is_full(fifo) \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ kfifo_len(__tmpq) > __tmpq->kfifo.mask; \ }) @@ -269,7 +269,7 @@ __kfifo_must_check_helper(unsigned int val) #define kfifo_avail(fifo) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ const size_t __recsize = sizeof(*__tmpq->rectype); \ unsigned int __avail = kfifo_size(__tmpq) - kfifo_len(__tmpq); \ (__recsize) ? ((__avail <= __recsize) ? 0 : \ @@ -284,7 +284,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_skip(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ if (__recsize) \ @@ -302,7 +302,7 @@ __kfifo_must_check_helper( \ #define kfifo_peek_len(fifo) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ (!__recsize) ? kfifo_len(__tmp) * sizeof(*__tmp->type) : \ @@ -325,7 +325,7 @@ __kfifo_must_check_helper( \ #define kfifo_alloc(fifo, size, gfp_mask) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ __is_kfifo_ptr(__tmp) ? \ __kfifo_alloc(__kfifo, size, sizeof(*__tmp->type), gfp_mask) : \ @@ -339,7 +339,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_free(fifo) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ if (__is_kfifo_ptr(__tmp)) \ __kfifo_free(__kfifo); \ @@ -358,7 +358,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_init(fifo, buffer, size) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ __is_kfifo_ptr(__tmp) ? \ __kfifo_init(__kfifo, buffer, size, sizeof(*__tmp->type)) : \ @@ -379,8 +379,8 @@ __kfifo_must_check_helper( \ */ #define kfifo_put(fifo, val) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -421,8 +421,8 @@ __kfifo_must_check_helper( \ #define kfifo_get(fifo, val) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -462,8 +462,8 @@ __kfifo_must_check_helper( \ #define kfifo_peek(fifo, val) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -501,8 +501,8 @@ __kfifo_must_check_helper( \ */ #define kfifo_in(fifo, buf, n) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -554,8 +554,8 @@ __kfifo_must_check_helper( \ #define kfifo_out(fifo, buf, n) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -611,7 +611,7 @@ __kfifo_must_check_helper( \ #define kfifo_from_user(fifo, from, len, copied) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const void __user *__from = (from); \ unsigned int __len = (len); \ unsigned int *__copied = (copied); \ @@ -639,7 +639,7 @@ __kfifo_must_check_helper( \ #define kfifo_to_user(fifo, to, len, copied) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ void __user *__to = (to); \ unsigned int __len = (len); \ unsigned int *__copied = (copied); \ @@ -666,7 +666,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_in_prepare(fifo, sgl, nents, len) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct scatterlist *__sgl = (sgl); \ int __nents = (nents); \ unsigned int __len = (len); \ @@ -690,7 +690,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_in_finish(fifo, len) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ unsigned int __len = (len); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -717,7 +717,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_out_prepare(fifo, sgl, nents, len) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct scatterlist *__sgl = (sgl); \ int __nents = (nents); \ unsigned int __len = (len); \ @@ -741,7 +741,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_out_finish(fifo, len) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ unsigned int __len = (len); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -766,8 +766,8 @@ __kfifo_must_check_helper( \ #define kfifo_out_peek(fifo, buf, n) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ -- cgit v1.2.3 From 31583bb0cf6cc40f2a468a4d2f3b9cbefd24f891 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 9 Sep 2010 16:37:37 -0700 Subject: cgroups: fix API thinko Add cgroup_attach_task_all() The existing cgroup_attach_task_current_cg() API is called by a thread to attach another thread to all of its cgroups; this is unsuitable for cases where a privileged task wants to attach itself to the cgroups of a less privileged one, since the call must be made from the context of the target task. This patch adds a more generic cgroup_attach_task_all() API that allows both the source task and to-be-moved task to be specified. cgroup_attach_task_current_cg() becomes a specialization of the more generic new function. [menage@google.com: rewrote changelog] [akpm@linux-foundation.org: address reviewer comments] Signed-off-by: Michael S. Tsirkin Tested-by: Alex Williamson Acked-by: Paul Menage Cc: Li Zefan Cc: Ben Blum Cc: Sridhar Samudrala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 12 +++++++++++- kernel/cgroup.c | 13 +++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e41c6e..0c991023ee47 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -578,7 +578,12 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); -int cgroup_attach_task_current_cg(struct task_struct *); +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); + +static inline int cgroup_attach_task_current_cg(struct task_struct *tsk) +{ + return cgroup_attach_task_all(current, tsk); +} /* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works @@ -636,6 +641,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats, } /* No cgroups - nothing to do */ +static inline int cgroup_attach_task_all(struct task_struct *from, + struct task_struct *t) +{ + return 0; +} static inline int cgroup_attach_task_current_cg(struct task_struct *t) { return 0; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 192f88c5b0f9..c9483d8f6140 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1791,19 +1791,20 @@ out: } /** - * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup + * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' + * @from: attach to all cgroups of a given task * @tsk: the task to be attached */ -int cgroup_attach_task_current_cg(struct task_struct *tsk) +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) { struct cgroupfs_root *root; - struct cgroup *cur_cg; int retval = 0; cgroup_lock(); for_each_active_root(root) { - cur_cg = task_cgroup_from_root(current, root); - retval = cgroup_attach_task(cur_cg, tsk); + struct cgroup *from_cg = task_cgroup_from_root(from, root); + + retval = cgroup_attach_task(from_cg, tsk); if (retval) break; } @@ -1811,7 +1812,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk) return retval; } -EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg); +EXPORT_SYMBOL_GPL(cgroup_attach_task_all); /* * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex -- cgit v1.2.3 From 4969c1192d15afa3389e7ae3302096ff684ba655 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Sep 2010 16:37:52 -0700 Subject: mm: fix swapin race condition The pte_same check is reliable only if the swap entry remains pinned (by the page lock on swapcache). We've also to ensure the swapcache isn't removed before we take the lock as try_to_free_swap won't care about the page pin. One of the possible impacts of this patch is that a KSM-shared page can point to the anon_vma of another process, which could exit before the page is freed. This can leave a page with a pointer to a recycled anon_vma object, or worse, a pointer to something that is no longer an anon_vma. [riel@redhat.com: changelog help] Signed-off-by: Andrea Arcangeli Acked-by: Hugh Dickins Reviewed-by: Rik van Riel Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ksm.h | 20 +++++++++----------- mm/ksm.c | 3 --- mm/memory.c | 39 ++++++++++++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 74d691ee9121..3319a6967626 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -16,6 +16,9 @@ struct stable_node; struct mem_cgroup; +struct page *ksm_does_need_to_copy(struct page *page, + struct vm_area_struct *vma, unsigned long address); + #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); @@ -70,19 +73,14 @@ static inline void set_page_stable_node(struct page *page, * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, * but what if the vma was unmerged while the page was swapped out? */ -struct page *ksm_does_need_to_copy(struct page *page, - struct vm_area_struct *vma, unsigned long address); -static inline struct page *ksm_might_need_to_copy(struct page *page, +static inline int ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { struct anon_vma *anon_vma = page_anon_vma(page); - if (!anon_vma || - (anon_vma->root == vma->anon_vma->root && - page->index == linear_page_index(vma, address))) - return page; - - return ksm_does_need_to_copy(page, vma, address); + return anon_vma && + (anon_vma->root != vma->anon_vma->root || + page->index != linear_page_index(vma, address)); } int page_referenced_ksm(struct page *page, @@ -115,10 +113,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, return 0; } -static inline struct page *ksm_might_need_to_copy(struct page *page, +static inline int ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { - return page; + return 0; } static inline int page_referenced_ksm(struct page *page, diff --git a/mm/ksm.c b/mm/ksm.c index e2ae00458320..b1873cf03ed9 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1504,8 +1504,6 @@ struct page *ksm_does_need_to_copy(struct page *page, { struct page *new_page; - unlock_page(page); /* any racers will COW it, not modify it */ - new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); if (new_page) { copy_user_highpage(new_page, page, address, vma); @@ -1521,7 +1519,6 @@ struct page *ksm_does_need_to_copy(struct page *page, add_page_to_unevictable_list(new_page); } - page_cache_release(page); return new_page; } diff --git a/mm/memory.c b/mm/memory.c index 6b2ab1051851..71b161b73bb5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2623,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned int flags, pte_t orig_pte) { spinlock_t *ptl; - struct page *page; + struct page *page, *swapcache = NULL; swp_entry_t entry; pte_t pte; struct mem_cgroup *ptr = NULL; @@ -2679,10 +2679,23 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, lock_page(page); delayacct_clear_flag(DELAYACCT_PF_SWAPIN); - page = ksm_might_need_to_copy(page, vma, address); - if (!page) { - ret = VM_FAULT_OOM; - goto out; + /* + * Make sure try_to_free_swap didn't release the swapcache + * from under us. The page pin isn't enough to prevent that. + */ + if (unlikely(!PageSwapCache(page))) + goto out_page; + + if (ksm_might_need_to_copy(page, vma, address)) { + swapcache = page; + page = ksm_does_need_to_copy(page, vma, address); + + if (unlikely(!page)) { + ret = VM_FAULT_OOM; + page = swapcache; + swapcache = NULL; + goto out_page; + } } if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { @@ -2735,6 +2748,18 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) try_to_free_swap(page); unlock_page(page); + if (swapcache) { + /* + * Hold the lock to avoid the swap entry to be reused + * until we take the PT lock for the pte_same() check + * (to avoid false positives from pte_same). For + * further safety release the lock after the swap_free + * so that the swap count won't change under a + * parallel locked swapcache. + */ + unlock_page(swapcache); + page_cache_release(swapcache); + } if (flags & FAULT_FLAG_WRITE) { ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte); @@ -2756,6 +2781,10 @@ out_page: unlock_page(page); out_release: page_cache_release(page); + if (swapcache) { + unlock_page(swapcache); + page_cache_release(swapcache); + } return ret; } -- cgit v1.2.3 From 5affb607720d734ca572b8a77c5c7d62d3042b6f Mon Sep 17 00:00:00 2001 From: Gregory Bean Date: Thu, 9 Sep 2010 16:38:02 -0700 Subject: gpio: sx150x: correct and refine reset-on-probe behavior Replace the arbitrary software-reset call from the device-probe method, because: - It is defective. To work correctly, it should be two byte writes, not a single word write. As it stands, it does nothing. - Some devices with sx150x expanders installed have their NRESET pins ganged on the same line, so resetting one causes the others to reset - not a nice thing to do arbitrarily! - The probe, usually taking place at boot, implies a recent hard-reset, so a software reset at this point is just a waste of energy anyway. Therefore, make it optional, defaulting to off, as this will match the common case of probing at powerup and also matches the current broken no-op behavior. Signed-off-by: Gregory Bean Reviewed-by: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/sx150x.c | 26 +++++++++++++++++++++----- include/linux/i2c/sx150x.h | 4 ++++ 2 files changed, 25 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/gpio/sx150x.c b/drivers/gpio/sx150x.c index b42f42ca70c3..823559ab0e24 100644 --- a/drivers/gpio/sx150x.c +++ b/drivers/gpio/sx150x.c @@ -459,17 +459,33 @@ static int sx150x_init_io(struct sx150x_chip *chip, u8 base, u16 cfg) return err; } -static int sx150x_init_hw(struct sx150x_chip *chip, - struct sx150x_platform_data *pdata) +static int sx150x_reset(struct sx150x_chip *chip) { - int err = 0; + int err; - err = i2c_smbus_write_word_data(chip->client, + err = i2c_smbus_write_byte_data(chip->client, chip->dev_cfg->reg_reset, - 0x3412); + 0x12); if (err < 0) return err; + err = i2c_smbus_write_byte_data(chip->client, + chip->dev_cfg->reg_reset, + 0x34); + return err; +} + +static int sx150x_init_hw(struct sx150x_chip *chip, + struct sx150x_platform_data *pdata) +{ + int err = 0; + + if (pdata->reset_during_probe) { + err = sx150x_reset(chip); + if (err < 0) + return err; + } + err = sx150x_i2c_write(chip->client, chip->dev_cfg->reg_misc, 0x01); diff --git a/include/linux/i2c/sx150x.h b/include/linux/i2c/sx150x.h index ee3049cb9ba5..52baa79d69a7 100644 --- a/include/linux/i2c/sx150x.h +++ b/include/linux/i2c/sx150x.h @@ -63,6 +63,9 @@ * IRQ lines will appear. Similarly to gpio_base, the expander * will create a block of irqs beginning at this number. * This value is ignored if irq_summary is < 0. + * @reset_during_probe: If set to true, the driver will trigger a full + * reset of the chip at the beginning of the probe + * in order to place it in a known state. */ struct sx150x_platform_data { unsigned gpio_base; @@ -73,6 +76,7 @@ struct sx150x_platform_data { u16 io_polarity; int irq_summary; unsigned irq_base; + bool reset_during_probe; }; #endif /* __LINUX_I2C_SX150X_H */ -- cgit v1.2.3 From c956126c137d97acb6f4d56fa9572d0bcc84e4ed Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 9 Sep 2010 16:38:03 -0700 Subject: gpio: doc updates There's been some recent confusion about error checking GPIO numbers. briefly, it should be handled mostly during setup, when gpio_request() is called, and NEVER by expectig gpio_is_valid to report more than never-usable GPIO numbers. [akpm@linux-foundation.org: terminate unterminated comment] Signed-off-by: David Brownell Cc: Eric Miao" Cc: "Ryan Mallon" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/gpio.txt | 22 ++++++++++++++-------- include/asm-generic/gpio.h | 14 +++++++++++++- 2 files changed, 27 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index d96a6dba5748..9633da01ff46 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt @@ -109,17 +109,19 @@ use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. If you want to initialize a structure with an invalid GPIO number, use some negative number (perhaps "-EINVAL"); that will never be valid. To -test if a number could reference a GPIO, you may use this predicate: +test if such number from such a structure could reference a GPIO, you +may use this predicate: int gpio_is_valid(int number); A number that's not valid will be rejected by calls which may request or free GPIOs (see below). Other numbers may also be rejected; for -example, a number might be valid but unused on a given board. - -Whether a platform supports multiple GPIO controllers is currently a -platform-specific implementation issue. +example, a number might be valid but temporarily unused on a given board. +Whether a platform supports multiple GPIO controllers is a platform-specific +implementation issue, as are whether that support can leave "holes" in the space +of GPIO numbers, and whether new controllers can be added at runtime. Such issues +can affect things including whether adjacent GPIO numbers are both valid. Using GPIOs ----------- @@ -480,12 +482,16 @@ To support this framework, a platform's Kconfig will "select" either ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB and arrange that its includes and defines three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep(). -They may also want to provide a custom value for ARCH_NR_GPIOS. -ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled +It may also provide a custom value for ARCH_NR_GPIOS, so that it better +reflects the number of GPIOs in actual use on that platform, without +wasting static table space. (It should count both built-in/SoC GPIOs and +also ones on GPIO expanders. + +ARCH_REQUIRE_GPIOLIB means that the gpiolib code will always get compiled into the kernel on that architecture. -ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user +ARCH_WANT_OPTIONAL_GPIOLIB means the gpiolib code defaults to off and the user can enable it and build it into the kernel optionally. If neither of these options are selected, the platform does not support diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index c7376bf80b06..8ca18e26d7e3 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -16,15 +16,27 @@ * While the GPIO programming interface defines valid GPIO numbers * to be in the range 0..MAX_INT, this library restricts them to the * smaller range 0..ARCH_NR_GPIOS-1. + * + * ARCH_NR_GPIOS is somewhat arbitrary; it usually reflects the sum of + * builtin/SoC GPIOs plus a number of GPIOs on expanders; the latter is + * actually an estimate of a board-specific value. */ #ifndef ARCH_NR_GPIOS #define ARCH_NR_GPIOS 256 #endif +/* + * "valid" GPIO numbers are nonnegative and may be passed to + * setup routines like gpio_request(). only some valid numbers + * can successfully be requested and used. + * + * Invalid GPIO numbers are useful for indicating no-such-GPIO in + * platform data and other tables. + */ + static inline int gpio_is_valid(int number) { - /* only some non-negative numbers are valid */ return ((unsigned)number) < ARCH_NR_GPIOS; } -- cgit v1.2.3 From 910321ea817a202ff70fac666e37e2c8e2f88823 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 9 Sep 2010 16:38:07 -0700 Subject: swap: revert special hibernation allocation Please revert 2.6.36-rc commit d2997b1042ec150616c1963b5e5e919ffd0b0ebf "hibernation: freeze swap at hibernation". It complicated matters by adding a second swap allocation path, just for hibernation; without in any way fixing the issue that it was intended to address - page reclaim after fixing the hibernation image might free swap from a page already imaged as swapcache, letting its swap be reallocated to store a different page of the image: resulting in data corruption if the imaged page were freed as clean then swapped back in. Pages freed to si->swap_map were still in danger of being reallocated by the alternative allocation path. I guess it inadvertently fixed slow SSD swap allocation for hibernation, as reported by Nigel Cunningham: by missing out the discards that occur on the usual swap allocation path; but that was unintentional, and needs a separate fix. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: "Rafael J. Wysocki" Cc: Ondrej Zary Cc: Andrea Gelmini Cc: Balbir Singh Cc: Andrea Arcangeli Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 8 +---- kernel/power/hibernate.c | 1 - kernel/power/snapshot.c | 1 - kernel/power/swap.c | 6 ++-- mm/swapfile.c | 94 ++++++++++++------------------------------------ 5 files changed, 26 insertions(+), 84 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 2fee51a11b73..bf4eb62506db 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -315,6 +315,7 @@ extern long nr_swap_pages; extern long total_swap_pages; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); +extern swp_entry_t get_swap_page_of_type(int); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); @@ -331,13 +332,6 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; -#ifdef CONFIG_HIBERNATION -void hibernation_freeze_swap(void); -void hibernation_thaw_swap(void); -swp_entry_t get_swap_for_hibernation(int type); -void swap_free_for_hibernation(swp_entry_t val); -#endif - /* linux/mm/thrash.c */ extern struct mm_struct *swap_token_mm; extern void grab_swap_token(struct mm_struct *); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index c77963938bca..8dc31e02ae12 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -338,7 +338,6 @@ int hibernation_snapshot(int platform_mode) goto Close; suspend_console(); - hibernation_freeze_swap(); saved_mask = clear_gfp_allowed_mask(GFP_IOFS); error = dpm_suspend_start(PMSG_FREEZE); if (error) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 5e7edfb05e66..f6cd6faf84fd 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1086,7 +1086,6 @@ void swsusp_free(void) buffer = NULL; alloc_normal = 0; alloc_highmem = 0; - hibernation_thaw_swap(); } /* Helper functions used for the shrinking of memory. */ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 5d0059eed3e4..e6a5bdf61a37 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -136,10 +136,10 @@ sector_t alloc_swapdev_block(int swap) { unsigned long offset; - offset = swp_offset(get_swap_for_hibernation(swap)); + offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { if (swsusp_extents_insert(offset)) - swap_free_for_hibernation(swp_entry(swap, offset)); + swap_free(swp_entry(swap, offset)); else return swapdev_block(swap, offset); } @@ -163,7 +163,7 @@ void free_all_swap_pages(int swap) ext = container_of(node, struct swsusp_extent, node); rb_erase(node, &swsusp_extents); for (offset = ext->start; offset <= ext->end; offset++) - swap_free_for_hibernation(swp_entry(swap, offset)); + swap_free(swp_entry(swap, offset)); kfree(ext); } diff --git a/mm/swapfile.c b/mm/swapfile.c index 1f3f9c59a73a..f08d165871b3 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -47,8 +47,6 @@ long nr_swap_pages; long total_swap_pages; static int least_priority; -static bool swap_for_hibernation; - static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -453,8 +451,6 @@ swp_entry_t get_swap_page(void) spin_lock(&swap_lock); if (nr_swap_pages <= 0) goto noswap; - if (swap_for_hibernation) - goto noswap; nr_swap_pages--; for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { @@ -487,6 +483,28 @@ noswap: return (swp_entry_t) {0}; } +/* The only caller of this function is now susupend routine */ +swp_entry_t get_swap_page_of_type(int type) +{ + struct swap_info_struct *si; + pgoff_t offset; + + spin_lock(&swap_lock); + si = swap_info[type]; + if (si && (si->flags & SWP_WRITEOK)) { + nr_swap_pages--; + /* This is called for allocating swap entry, not cache */ + offset = scan_swap_map(si, 1); + if (offset) { + spin_unlock(&swap_lock); + return swp_entry(type, offset); + } + nr_swap_pages++; + } + spin_unlock(&swap_lock); + return (swp_entry_t) {0}; +} + static struct swap_info_struct *swap_info_get(swp_entry_t entry) { struct swap_info_struct *p; @@ -746,74 +764,6 @@ int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep) #endif #ifdef CONFIG_HIBERNATION - -static pgoff_t hibernation_offset[MAX_SWAPFILES]; -/* - * Once hibernation starts to use swap, we freeze swap_map[]. Otherwise, - * saved swap_map[] image to the disk will be an incomplete because it's - * changing without synchronization with hibernation snap shot. - * At resume, we just make swap_for_hibernation=false. We can forget - * used maps easily. - */ -void hibernation_freeze_swap(void) -{ - int i; - - spin_lock(&swap_lock); - - printk(KERN_INFO "PM: Freeze Swap\n"); - swap_for_hibernation = true; - for (i = 0; i < MAX_SWAPFILES; i++) - hibernation_offset[i] = 1; - spin_unlock(&swap_lock); -} - -void hibernation_thaw_swap(void) -{ - spin_lock(&swap_lock); - if (swap_for_hibernation) { - printk(KERN_INFO "PM: Thaw Swap\n"); - swap_for_hibernation = false; - } - spin_unlock(&swap_lock); -} - -/* - * Because updateing swap_map[] can make not-saved-status-change, - * we use our own easy allocator. - * Please see kernel/power/swap.c, Used swaps are recorded into - * RB-tree. - */ -swp_entry_t get_swap_for_hibernation(int type) -{ - pgoff_t off; - swp_entry_t val = {0}; - struct swap_info_struct *si; - - spin_lock(&swap_lock); - - si = swap_info[type]; - if (!si || !(si->flags & SWP_WRITEOK)) - goto done; - - for (off = hibernation_offset[type]; off < si->max; ++off) { - if (!si->swap_map[off]) - break; - } - if (off < si->max) { - val = swp_entry(type, off); - hibernation_offset[type] = off + 1; - } -done: - spin_unlock(&swap_lock); - return val; -} - -void swap_free_for_hibernation(swp_entry_t ent) -{ - /* Nothing to do */ -} - /* * Find the swap type that corresponds to given device (if any). * -- cgit v1.2.3 From 3399446632739fcd05fd8b272b476a69c6e6d14a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 9 Sep 2010 16:38:11 -0700 Subject: swap: discard while swapping only if SWAP_FLAG_DISCARD Tests with recent firmware on Intel X25-M 80GB and OCZ Vertex 60GB SSDs show a shift since I last tested in December: in part because of firmware updates, in part because of the necessary move from barriers to awaiting completion at the block layer. While discard at swapon still shows as slightly beneficial on both, discarding 1MB swap cluster when allocating is now disadvanteous: adds 25% overhead on Intel, adds 230% on OCZ (YMMV). Surrender: discard as presently implemented is more hindrance than help for swap; but might prove useful on other devices, or with improvements. So continue to do the discard at swapon, but make discard while swapping conditional on a SWAP_FLAG_DISCARD to sys_swapon() (which has been using only the lower 16 bits of int flags). We can add a --discard or -d to swapon(8), and a "discard" to swap in /etc/fstab: matching the mount option for btrfs, ext4, fat, gfs2, nilfs2. Signed-off-by: Hugh Dickins Cc: Christoph Hellwig Cc: Nigel Cunningham Cc: Tejun Heo Cc: Jens Axboe Cc: James Bottomley Cc: "Martin K. Petersen" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 ++- mm/swapfile.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index bf4eb62506db..7cdd63366f88 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -19,6 +19,7 @@ struct bio; #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 +#define SWAP_FLAG_DISCARD 0x10000 /* discard swap cluster after use */ static inline int current_is_kswapd(void) { @@ -142,7 +143,7 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ - SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ + SWP_DISCARDABLE = (1 << 2), /* swapon+blkdev support discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ diff --git a/mm/swapfile.c b/mm/swapfile.c index 1894dead0b58..7c703ff2f36f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2047,7 +2047,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->flags |= SWP_SOLIDSTATE; p->cluster_next = 1 + (random32() % p->highest_bit); } - if (discard_swap(p) == 0) + if (discard_swap(p) == 0 && (swap_flags & SWAP_FLAG_DISCARD)) p->flags |= SWP_DISCARDABLE; } -- cgit v1.2.3 From aa45484031ddee09b06350ab8528bfe5b2c76d1c Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 9 Sep 2010 16:38:17 -0700 Subject: mm: page allocator: calculate a better estimate of NR_FREE_PAGES when memory is low and kswapd is awake Ordinarily watermark checks are based on the vmstat NR_FREE_PAGES as it is cheaper than scanning a number of lists. To avoid synchronization overhead, counter deltas are maintained on a per-cpu basis and drained both periodically and when the delta is above a threshold. On large CPU systems, the difference between the estimated and real value of NR_FREE_PAGES can be very high. If NR_FREE_PAGES is much higher than number of real free page in buddy, the VM can allocate pages below min watermark, at worst reducing the real number of pages to zero. Even if the OOM killer kills some victim for freeing memory, it may not free memory if the exit path requires a new page resulting in livelock. This patch introduces a zone_page_state_snapshot() function (courtesy of Christoph) that takes a slightly more accurate view of an arbitrary vmstat counter. It is used to read NR_FREE_PAGES while kswapd is awake to avoid the watermark being accidentally broken. The estimate is not perfect and may result in cache line bounces but is expected to be lighter than the IPI calls necessary to continually drain the per-cpu counters while kswapd is awake. Signed-off-by: Christoph Lameter Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 13 +++++++++++++ include/linux/vmstat.h | 22 ++++++++++++++++++++++ mm/mmzone.c | 21 +++++++++++++++++++++ mm/page_alloc.c | 4 ++-- mm/vmstat.c | 15 ++++++++++++++- 5 files changed, 72 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6e6e62648a4d..3984c4eb41fd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -283,6 +283,13 @@ struct zone { /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long watermark[NR_WMARK]; + /* + * When free pages are below this point, additional steps are taken + * when reading the number of free pages to avoid per-cpu counter + * drift allowing watermarks to be breached + */ + unsigned long percpu_drift_mark; + /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several @@ -441,6 +448,12 @@ static inline int zone_is_oom_locked(const struct zone *zone) return test_bit(ZONE_OOM_LOCKED, &zone->flags); } +#ifdef CONFIG_SMP +unsigned long zone_nr_free_pages(struct zone *zone); +#else +#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES) +#endif /* CONFIG_SMP */ + /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 7f43ccdc1d38..eaaea37b3b75 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -170,6 +170,28 @@ static inline unsigned long zone_page_state(struct zone *zone, return x; } +/* + * More accurate version that also considers the currently pending + * deltas. For that we need to loop over all cpus to find the current + * deltas. There is no synchronization so the result cannot be + * exactly accurate either. + */ +static inline unsigned long zone_page_state_snapshot(struct zone *zone, + enum zone_stat_item item) +{ + long x = atomic_long_read(&zone->vm_stat[item]); + +#ifdef CONFIG_SMP + int cpu; + for_each_online_cpu(cpu) + x += per_cpu_ptr(zone->pageset, cpu)->vm_stat_diff[item]; + + if (x < 0) + x = 0; +#endif + return x; +} + extern unsigned long global_reclaimable_pages(void); extern unsigned long zone_reclaimable_pages(struct zone *zone); diff --git a/mm/mmzone.c b/mm/mmzone.c index f5b7d1760213..e35bfb82c855 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -87,3 +87,24 @@ int memmap_valid_within(unsigned long pfn, return 1; } #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ + +#ifdef CONFIG_SMP +/* Called when a more accurate view of NR_FREE_PAGES is needed */ +unsigned long zone_nr_free_pages(struct zone *zone) +{ + unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES); + + /* + * While kswapd is awake, it is considered the zone is under some + * memory pressure. Under pressure, there is a risk that + * per-cpu-counter-drift will allow the min watermark to be breached + * potentially causing a live-lock. While kswapd is awake and + * free pages are low, get a better estimate for free pages + */ + if (nr_free_pages < zone->percpu_drift_mark && + !waitqueue_active(&zone->zone_pgdat->kswapd_wait)) + return zone_page_state_snapshot(zone, NR_FREE_PAGES); + + return nr_free_pages; +} +#endif /* CONFIG_SMP */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 452e2ba06c7c..b2d21e06d45d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1462,7 +1462,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, { /* free_pages my go negative - that's OK */ long min = mark; - long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1; + long free_pages = zone_nr_free_pages(z) - (1 << order) + 1; int o; if (alloc_flags & ALLOC_HIGH) @@ -2424,7 +2424,7 @@ void show_free_areas(void) " all_unreclaimable? %s" "\n", zone->name, - K(zone_page_state(zone, NR_FREE_PAGES)), + K(zone_nr_free_pages(zone)), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), diff --git a/mm/vmstat.c b/mm/vmstat.c index a8d6b59e609a..355a9e669aaa 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -138,11 +138,24 @@ static void refresh_zone_stat_thresholds(void) int threshold; for_each_populated_zone(zone) { + unsigned long max_drift, tolerate_drift; + threshold = calculate_threshold(zone); for_each_online_cpu(cpu) per_cpu_ptr(zone->pageset, cpu)->stat_threshold = threshold; + + /* + * Only set percpu_drift_mark if there is a danger that + * NR_FREE_PAGES reports the low watermark is ok when in fact + * the min watermark could be breached by an allocation + */ + tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone); + max_drift = num_online_cpus() * threshold; + if (max_drift > tolerate_drift) + zone->percpu_drift_mark = high_wmark_pages(zone) + + max_drift; } } @@ -813,7 +826,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n scanned %lu" "\n spanned %lu" "\n present %lu", - zone_page_state(zone, NR_FREE_PAGES), + zone_nr_free_pages(zone), min_wmark_pages(zone), low_wmark_pages(zone), high_wmark_pages(zone), -- cgit v1.2.3 From e2f3d75fc0e4a0d03c61872bad39ffa2e74a04ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 7 Sep 2010 14:05:31 +0200 Subject: libata: skip EH autopsy and recovery during suspend For some mysterious reason, certain hardware reacts badly to usual EH actions while the system is going for suspend. As the devices won't be needed until the system is resumed, ask EH to skip usual autopsy and recovery and proceed directly to suspend. Signed-off-by: Tejun Heo Tested-by: Stephan Diestelhorst Cc: stable@kernel.org Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 14 +++++++++++++- drivers/ata/libata-eh.c | 4 ++++ include/linux/libata.h | 1 + 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c035b3d041ee..932eaee50245 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5418,6 +5418,7 @@ static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg, */ int ata_host_suspend(struct ata_host *host, pm_message_t mesg) { + unsigned int ehi_flags = ATA_EHI_QUIET; int rc; /* @@ -5426,7 +5427,18 @@ int ata_host_suspend(struct ata_host *host, pm_message_t mesg) */ ata_lpm_enable(host); - rc = ata_host_request_pm(host, mesg, 0, ATA_EHI_QUIET, 1); + /* + * On some hardware, device fails to respond after spun down + * for suspend. As the device won't be used before being + * resumed, we don't need to touch the device. Ask EH to skip + * the usual stuff and proceed directly to suspend. + * + * http://thread.gmane.org/gmane.linux.ide/46764 + */ + if (mesg.event == PM_EVENT_SUSPEND) + ehi_flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_NO_RECOVERY; + + rc = ata_host_request_pm(host, mesg, 0, ehi_flags, 1); if (rc == 0) host->dev->power.power_state = mesg; return rc; diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index c9ae299b8342..e48302eae55f 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -3235,6 +3235,10 @@ static int ata_eh_skip_recovery(struct ata_link *link) if (link->flags & ATA_LFLAG_DISABLED) return 1; + /* skip if explicitly requested */ + if (ehc->i.flags & ATA_EHI_NO_RECOVERY) + return 1; + /* thaw frozen port and recover failed devices */ if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) return 0; diff --git a/include/linux/libata.h b/include/linux/libata.h index f010f18a0f86..7de282d8bedf 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -335,6 +335,7 @@ enum { ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ ATA_EHI_NO_AUTOPSY = (1 << 2), /* no autopsy */ ATA_EHI_QUIET = (1 << 3), /* be quiet */ + ATA_EHI_NO_RECOVERY = (1 << 4), /* no recovery */ ATA_EHI_DID_SOFTRESET = (1 << 16), /* already soft-reset this port */ ATA_EHI_DID_HARDRESET = (1 << 17), /* already soft-reset this port */ -- cgit v1.2.3 From ea3c64506ea7965f86f030155e6fdef381de10e2 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 31 Aug 2010 16:20:36 -0700 Subject: libata-sff: Reenable Port Multiplier after libata-sff remodeling. Keep track of the link on the which the current request is in progress. It allows support of links behind port multiplier. Not all libata-sff is PMP compliant. Code for native BMDMA controller does not take in accound PMP. Tested on Marvell 7042 and Sil7526. Signed-off-by: Gwendal Grignou Signed-off-by: Jeff Garzik --- drivers/ata/libata-sff.c | 38 ++++++++++++++++++++++++++++---------- drivers/ata/sata_mv.c | 2 +- include/linux/libata.h | 3 ++- 3 files changed, 31 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index dee3c2c52562..e30c537cce32 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1045,7 +1045,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq) { - struct ata_eh_info *ehi = &ap->link.eh_info; + struct ata_link *link = qc->dev->link; + struct ata_eh_info *ehi = &link->eh_info; unsigned long flags = 0; int poll_next; @@ -1301,8 +1302,14 @@ fsm_start: } EXPORT_SYMBOL_GPL(ata_sff_hsm_move); -void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay) +void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay) { + struct ata_port *ap = link->ap; + + WARN_ON((ap->sff_pio_task_link != NULL) && + (ap->sff_pio_task_link != link)); + ap->sff_pio_task_link = link; + /* may fail if ata_sff_flush_pio_task() in progress */ queue_delayed_work(ata_sff_wq, &ap->sff_pio_task, msecs_to_jiffies(delay)); @@ -1324,14 +1331,18 @@ static void ata_sff_pio_task(struct work_struct *work) { struct ata_port *ap = container_of(work, struct ata_port, sff_pio_task.work); + struct ata_link *link = ap->sff_pio_task_link; struct ata_queued_cmd *qc; u8 status; int poll_next; + BUG_ON(ap->sff_pio_task_link == NULL); /* qc can be NULL if timeout occurred */ - qc = ata_qc_from_tag(ap, ap->link.active_tag); - if (!qc) + qc = ata_qc_from_tag(ap, link->active_tag); + if (!qc) { + ap->sff_pio_task_link = NULL; return; + } fsm_start: WARN_ON_ONCE(ap->hsm_task_state == HSM_ST_IDLE); @@ -1348,11 +1359,16 @@ fsm_start: msleep(2); status = ata_sff_busy_wait(ap, ATA_BUSY, 10); if (status & ATA_BUSY) { - ata_sff_queue_pio_task(ap, ATA_SHORT_PAUSE); + ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE); return; } } + /* + * hsm_move() may trigger another command to be processed. + * clean the link beforehand. + */ + ap->sff_pio_task_link = NULL; /* move the HSM */ poll_next = ata_sff_hsm_move(ap, qc, status, 1); @@ -1379,6 +1395,7 @@ fsm_start: unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + struct ata_link *link = qc->dev->link; /* Use polling pio if the LLD doesn't handle * interrupt driven pio and atapi CDB interrupt. @@ -1399,7 +1416,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) ap->hsm_task_state = HSM_ST_LAST; if (qc->tf.flags & ATA_TFLAG_POLLING) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); break; @@ -1412,7 +1429,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) if (qc->tf.flags & ATA_TFLAG_WRITE) { /* PIO data out protocol */ ap->hsm_task_state = HSM_ST_FIRST; - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); /* always send first data block using the * ata_sff_pio_task() codepath. @@ -1422,7 +1439,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) ap->hsm_task_state = HSM_ST; if (qc->tf.flags & ATA_TFLAG_POLLING) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); /* if polling, ata_sff_pio_task() handles the * rest. otherwise, interrupt handler takes @@ -1444,7 +1461,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) /* send cdb by polling if no cdb interrupt */ if ((!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) || (qc->tf.flags & ATA_TFLAG_POLLING)) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); break; default: @@ -2737,6 +2754,7 @@ EXPORT_SYMBOL_GPL(ata_bmdma_dumb_qc_prep); unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + struct ata_link *link = qc->dev->link; /* defer PIO handling to sff_qc_issue */ if (!ata_is_dma(qc->tf.protocol)) @@ -2765,7 +2783,7 @@ unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc) /* send cdb by polling if no cdb interrupt */ if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); break; default: diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 81982594a014..a9fd9709c262 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -2284,7 +2284,7 @@ static unsigned int mv_qc_issue_fis(struct ata_queued_cmd *qc) } if (qc->tf.flags & ATA_TFLAG_POLLING) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); return 0; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 7de282d8bedf..45fb2967b66d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -724,6 +724,7 @@ struct ata_port { struct ata_ioports ioaddr; /* ATA cmd/ctl/dma register blocks */ u8 ctl; /* cache of ATA control register */ u8 last_ctl; /* Cache last written value */ + struct ata_link* sff_pio_task_link; /* link currently used */ struct delayed_work sff_pio_task; #ifdef CONFIG_ATA_BMDMA struct ata_bmdma_prd *bmdma_prd; /* BMDMA SG list */ @@ -1595,7 +1596,7 @@ extern void ata_sff_irq_on(struct ata_port *ap); extern void ata_sff_irq_clear(struct ata_port *ap); extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq); -extern void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay); +extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay); extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc); extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc); extern unsigned int ata_sff_port_intr(struct ata_port *ap, -- cgit v1.2.3 From 8613e4c2872a87cc309a42de2c7091744dc54d0e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 9 Sep 2010 21:54:22 -0700 Subject: Input: add support for large scancodes Several devices use a high number of bits for scancodes. One important group is the Remote Controllers. Some new protocols like RC-6 define a scancode space of 64 bits. The current EVIO[CS]GKEYCODE ioctls allow replace the scancode/keycode translation tables, but it is limited to up to 32 bits for scancode. Also, if userspace wants to clean the existing table, replacing it by a new one, it needs to run a loop calling the ioctls over the entire sparse scancode space. To solve those problems, this patch extends the ioctls to allow drivers handle scancodes up to 32 bytes long (the length could be extended in the future should such need arise) and allow userspace to query and set scancode to keycode mappings not only by scancode but also by index. Compatibility code were also added to handle the old format of EVIO[CS]GKEYCODE ioctls. Folded fixes by: - Dan Carpenter: locking fixes for the original implementation - Jarod Wilson: fix crash when setting keycode and wiring up get/set handlers in original implementation. - Dmitry Torokhov: rework to consolidate old and new scancode handling, provide options to act either by index or scancode. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Dan Carpenter Signed-off-by: Jarod Wilson Signed-off-by: Dmitry Torokhov --- drivers/char/keyboard.c | 31 ++++++-- drivers/input/evdev.c | 100 ++++++++++++++++++++----- drivers/input/input.c | 192 ++++++++++++++++++++++++++++++++++++------------ include/linux/input.h | 55 +++++++++++--- 4 files changed, 292 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index a7ca75212bfe..e95d7876ca6b 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -175,8 +175,7 @@ EXPORT_SYMBOL_GPL(unregister_keyboard_notifier); */ struct getset_keycode_data { - unsigned int scancode; - unsigned int keycode; + struct input_keymap_entry ke; int error; }; @@ -184,32 +183,50 @@ static int getkeycode_helper(struct input_handle *handle, void *data) { struct getset_keycode_data *d = data; - d->error = input_get_keycode(handle->dev, d->scancode, &d->keycode); + d->error = input_get_keycode(handle->dev, &d->ke); return d->error == 0; /* stop as soon as we successfully get one */ } int getkeycode(unsigned int scancode) { - struct getset_keycode_data d = { scancode, 0, -ENODEV }; + struct getset_keycode_data d = { + .ke = { + .flags = 0, + .len = sizeof(scancode), + .keycode = 0, + }, + .error = -ENODEV, + }; + + memcpy(d.ke.scancode, &scancode, sizeof(scancode)); input_handler_for_each_handle(&kbd_handler, &d, getkeycode_helper); - return d.error ?: d.keycode; + return d.error ?: d.ke.keycode; } static int setkeycode_helper(struct input_handle *handle, void *data) { struct getset_keycode_data *d = data; - d->error = input_set_keycode(handle->dev, d->scancode, d->keycode); + d->error = input_set_keycode(handle->dev, &d->ke); return d->error == 0; /* stop as soon as we successfully set one */ } int setkeycode(unsigned int scancode, unsigned int keycode) { - struct getset_keycode_data d = { scancode, keycode, -ENODEV }; + struct getset_keycode_data d = { + .ke = { + .flags = 0, + .len = sizeof(scancode), + .keycode = keycode, + }, + .error = -ENODEV, + }; + + memcpy(d.ke.scancode, &scancode, sizeof(scancode)); input_handler_for_each_handle(&kbd_handler, &d, setkeycode_helper); diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index c908c5f83645..1ce9bf663206 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -534,6 +534,80 @@ static int handle_eviocgbit(struct input_dev *dev, } #undef OLD_KEY_MAX +static int evdev_handle_get_keycode(struct input_dev *dev, + void __user *p, size_t size) +{ + struct input_keymap_entry ke; + int error; + + memset(&ke, 0, sizeof(ke)); + + if (size == sizeof(unsigned int[2])) { + /* legacy case */ + int __user *ip = (int __user *)p; + + if (copy_from_user(ke.scancode, p, sizeof(unsigned int))) + return -EFAULT; + + ke.len = sizeof(unsigned int); + ke.flags = 0; + + error = input_get_keycode(dev, &ke); + if (error) + return error; + + if (put_user(ke.keycode, ip + 1)) + return -EFAULT; + + } else { + size = min(size, sizeof(ke)); + + if (copy_from_user(&ke, p, size)) + return -EFAULT; + + error = input_get_keycode(dev, &ke); + if (error) + return error; + + if (copy_to_user(p, &ke, size)) + return -EFAULT; + } + return 0; +} + +static int evdev_handle_set_keycode(struct input_dev *dev, + void __user *p, size_t size) +{ + struct input_keymap_entry ke; + + memset(&ke, 0, sizeof(ke)); + + if (size == sizeof(unsigned int[2])) { + /* legacy case */ + int __user *ip = (int __user *)p; + + if (copy_from_user(ke.scancode, p, sizeof(unsigned int))) + return -EFAULT; + + if (get_user(ke.keycode, ip + 1)) + return -EFAULT; + + ke.len = sizeof(unsigned int); + ke.flags = 0; + + } else { + size = min(size, sizeof(ke)); + + if (copy_from_user(&ke, p, size)) + return -EFAULT; + + if (ke.len > sizeof(ke.scancode)) + return -EINVAL; + } + + return input_set_keycode(dev, &ke); +} + static long evdev_do_ioctl(struct file *file, unsigned int cmd, void __user *p, int compat_mode) { @@ -580,25 +654,6 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, return 0; - case EVIOCGKEYCODE: - if (get_user(t, ip)) - return -EFAULT; - - error = input_get_keycode(dev, t, &v); - if (error) - return error; - - if (put_user(v, ip + 1)) - return -EFAULT; - - return 0; - - case EVIOCSKEYCODE: - if (get_user(t, ip) || get_user(v, ip + 1)) - return -EFAULT; - - return input_set_keycode(dev, t, v); - case EVIOCRMFF: return input_ff_erase(dev, (int)(unsigned long) p, file); @@ -620,7 +675,6 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, /* Now check variable-length commands */ #define EVIOC_MASK_SIZE(nr) ((nr) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) - switch (EVIOC_MASK_SIZE(cmd)) { case EVIOCGKEY(0): @@ -654,6 +708,12 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, return -EFAULT; return error; + + case EVIOC_MASK_SIZE(EVIOCGKEYCODE): + return evdev_handle_get_keycode(dev, p, size); + + case EVIOC_MASK_SIZE(EVIOCSKEYCODE): + return evdev_handle_set_keycode(dev, p, size); } /* Multi-number variable-length handlers */ diff --git a/drivers/input/input.c b/drivers/input/input.c index acb3c8095c65..832771e73663 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -634,78 +634,141 @@ static void input_disconnect_device(struct input_dev *dev) spin_unlock_irq(&dev->event_lock); } -static int input_fetch_keycode(struct input_dev *dev, int scancode) +/** + * input_scancode_to_scalar() - converts scancode in &struct input_keymap_entry + * @ke: keymap entry containing scancode to be converted. + * @scancode: pointer to the location where converted scancode should + * be stored. + * + * This function is used to convert scancode stored in &struct keymap_entry + * into scalar form understood by legacy keymap handling methods. These + * methods expect scancodes to be represented as 'unsigned int'. + */ +int input_scancode_to_scalar(const struct input_keymap_entry *ke, + unsigned int *scancode) +{ + switch (ke->len) { + case 1: + *scancode = *((u8 *)ke->scancode); + break; + + case 2: + *scancode = *((u16 *)ke->scancode); + break; + + case 4: + *scancode = *((u32 *)ke->scancode); + break; + + default: + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(input_scancode_to_scalar); + +/* + * Those routines handle the default case where no [gs]etkeycode() is + * defined. In this case, an array indexed by the scancode is used. + */ + +static unsigned int input_fetch_keycode(struct input_dev *dev, + unsigned int index) { switch (dev->keycodesize) { - case 1: - return ((u8 *)dev->keycode)[scancode]; + case 1: + return ((u8 *)dev->keycode)[index]; - case 2: - return ((u16 *)dev->keycode)[scancode]; + case 2: + return ((u16 *)dev->keycode)[index]; - default: - return ((u32 *)dev->keycode)[scancode]; + default: + return ((u32 *)dev->keycode)[index]; } } static int input_default_getkeycode(struct input_dev *dev, - unsigned int scancode, - unsigned int *keycode) + struct input_keymap_entry *ke) { + unsigned int index; + int error; + if (!dev->keycodesize) return -EINVAL; - if (scancode >= dev->keycodemax) + if (ke->flags & INPUT_KEYMAP_BY_INDEX) + index = ke->index; + else { + error = input_scancode_to_scalar(ke, &index); + if (error) + return error; + } + + if (index >= dev->keycodemax) return -EINVAL; - *keycode = input_fetch_keycode(dev, scancode); + ke->keycode = input_fetch_keycode(dev, index); + ke->index = index; + ke->len = sizeof(index); + memcpy(ke->scancode, &index, sizeof(index)); return 0; } static int input_default_setkeycode(struct input_dev *dev, - unsigned int scancode, - unsigned int keycode) + const struct input_keymap_entry *ke, + unsigned int *old_keycode) { - int old_keycode; + unsigned int index; + int error; int i; - if (scancode >= dev->keycodemax) + if (!dev->keycodesize) return -EINVAL; - if (!dev->keycodesize) + if (ke->flags & INPUT_KEYMAP_BY_INDEX) { + index = ke->index; + } else { + error = input_scancode_to_scalar(ke, &index); + if (error) + return error; + } + + if (index >= dev->keycodemax) return -EINVAL; - if (dev->keycodesize < sizeof(keycode) && (keycode >> (dev->keycodesize * 8))) + if (dev->keycodesize < sizeof(dev->keycode) && + (ke->keycode >> (dev->keycodesize * 8))) return -EINVAL; switch (dev->keycodesize) { case 1: { u8 *k = (u8 *)dev->keycode; - old_keycode = k[scancode]; - k[scancode] = keycode; + *old_keycode = k[index]; + k[index] = ke->keycode; break; } case 2: { u16 *k = (u16 *)dev->keycode; - old_keycode = k[scancode]; - k[scancode] = keycode; + *old_keycode = k[index]; + k[index] = ke->keycode; break; } default: { u32 *k = (u32 *)dev->keycode; - old_keycode = k[scancode]; - k[scancode] = keycode; + *old_keycode = k[index]; + k[index] = ke->keycode; break; } } - __clear_bit(old_keycode, dev->keybit); - __set_bit(keycode, dev->keybit); + __clear_bit(*old_keycode, dev->keybit); + __set_bit(ke->keycode, dev->keybit); for (i = 0; i < dev->keycodemax; i++) { - if (input_fetch_keycode(dev, i) == old_keycode) { - __set_bit(old_keycode, dev->keybit); + if (input_fetch_keycode(dev, i) == *old_keycode) { + __set_bit(*old_keycode, dev->keybit); break; /* Setting the bit twice is useless, so break */ } } @@ -716,53 +779,86 @@ static int input_default_setkeycode(struct input_dev *dev, /** * input_get_keycode - retrieve keycode currently mapped to a given scancode * @dev: input device which keymap is being queried - * @scancode: scancode (or its equivalent for device in question) for which - * keycode is needed - * @keycode: result + * @ke: keymap entry * * This function should be called by anyone interested in retrieving current - * keymap. Presently keyboard and evdev handlers use it. + * keymap. Presently evdev handlers use it. */ -int input_get_keycode(struct input_dev *dev, - unsigned int scancode, unsigned int *keycode) +int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke) { unsigned long flags; int retval; spin_lock_irqsave(&dev->event_lock, flags); - retval = dev->getkeycode(dev, scancode, keycode); - spin_unlock_irqrestore(&dev->event_lock, flags); + if (dev->getkeycode) { + /* + * Support for legacy drivers, that don't implement the new + * ioctls + */ + u32 scancode = ke->index; + + memcpy(ke->scancode, &scancode, sizeof(scancode)); + ke->len = sizeof(scancode); + retval = dev->getkeycode(dev, scancode, &ke->keycode); + } else { + retval = dev->getkeycode_new(dev, ke); + } + + spin_unlock_irqrestore(&dev->event_lock, flags); return retval; } EXPORT_SYMBOL(input_get_keycode); /** - * input_get_keycode - assign new keycode to a given scancode + * input_set_keycode - attribute a keycode to a given scancode * @dev: input device which keymap is being updated - * @scancode: scancode (or its equivalent for device in question) - * @keycode: new keycode to be assigned to the scancode + * @ke: new keymap entry * * This function should be called by anyone needing to update current * keymap. Presently keyboard and evdev handlers use it. */ int input_set_keycode(struct input_dev *dev, - unsigned int scancode, unsigned int keycode) + const struct input_keymap_entry *ke) { unsigned long flags; unsigned int old_keycode; int retval; - if (keycode > KEY_MAX) + if (ke->keycode > KEY_MAX) return -EINVAL; spin_lock_irqsave(&dev->event_lock, flags); - retval = dev->getkeycode(dev, scancode, &old_keycode); - if (retval) - goto out; + if (dev->setkeycode) { + /* + * Support for legacy drivers, that don't implement the new + * ioctls + */ + unsigned int scancode; + + retval = input_scancode_to_scalar(ke, &scancode); + if (retval) + goto out; + + /* + * We need to know the old scancode, in order to generate a + * keyup effect, if the set operation happens successfully + */ + if (!dev->getkeycode) { + retval = -EINVAL; + goto out; + } + + retval = dev->getkeycode(dev, scancode, &old_keycode); + if (retval) + goto out; + + retval = dev->setkeycode(dev, scancode, ke->keycode); + } else { + retval = dev->setkeycode_new(dev, ke, &old_keycode); + } - retval = dev->setkeycode(dev, scancode, keycode); if (retval) goto out; @@ -1759,11 +1855,11 @@ int input_register_device(struct input_dev *dev) dev->rep[REP_PERIOD] = 33; } - if (!dev->getkeycode) - dev->getkeycode = input_default_getkeycode; + if (!dev->getkeycode && !dev->getkeycode_new) + dev->getkeycode_new = input_default_getkeycode; - if (!dev->setkeycode) - dev->setkeycode = input_default_setkeycode; + if (!dev->setkeycode && !dev->setkeycode_new) + dev->setkeycode_new = input_default_setkeycode; dev_set_name(&dev->dev, "input%ld", (unsigned long) atomic_inc_return(&input_no) - 1); diff --git a/include/linux/input.h b/include/linux/input.h index 789265123531..0057698fd975 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -34,7 +34,7 @@ struct input_event { * Protocol version. */ -#define EV_VERSION 0x010000 +#define EV_VERSION 0x010001 /* * IOCTLs (0x00 - 0x7f) @@ -56,12 +56,37 @@ struct input_absinfo { __s32 resolution; }; +/** + * struct input_keymap_entry - used by EVIOCGKEYCODE/EVIOCSKEYCODE ioctls + * @scancode: scancode represented in machine-endian form. + * @len: length of the scancode that resides in @scancode buffer. + * @index: index in the keymap, may be used instead of scancode + * @flags: allows to specify how kernel should handle the request. For + * example, setting INPUT_KEYMAP_BY_INDEX flag indicates that kernel + * should perform lookup in keymap by @index instead of @scancode + * @keycode: key code assigned to this scancode + * + * The structure is used to retrieve and modify keymap data. Users have + * option of performing lookup either by @scancode itself or by @index + * in keymap entry. EVIOCGKEYCODE will also return scancode or index + * (depending on which element was used to perform lookup). + */ +struct input_keymap_entry { +#define INPUT_KEYMAP_BY_INDEX (1 << 0) + __u8 flags; + __u8 len; + __u16 index; + __u32 keycode; + __u8 scancode[32]; +}; + #define EVIOCGVERSION _IOR('E', 0x01, int) /* get driver version */ #define EVIOCGID _IOR('E', 0x02, struct input_id) /* get device ID */ #define EVIOCGREP _IOR('E', 0x03, unsigned int[2]) /* get repeat settings */ #define EVIOCSREP _IOW('E', 0x03, unsigned int[2]) /* set repeat settings */ -#define EVIOCGKEYCODE _IOR('E', 0x04, unsigned int[2]) /* get keycode */ -#define EVIOCSKEYCODE _IOW('E', 0x04, unsigned int[2]) /* set keycode */ + +#define EVIOCGKEYCODE _IOR('E', 0x04, struct input_keymap_entry) /* get keycode */ +#define EVIOCSKEYCODE _IOW('E', 0x04, struct input_keymap_entry) /* set keycode */ #define EVIOCGNAME(len) _IOC(_IOC_READ, 'E', 0x06, len) /* get device name */ #define EVIOCGPHYS(len) _IOC(_IOC_READ, 'E', 0x07, len) /* get physical location */ @@ -73,8 +98,8 @@ struct input_absinfo { #define EVIOCGSW(len) _IOC(_IOC_READ, 'E', 0x1b, len) /* get all switch states */ #define EVIOCGBIT(ev,len) _IOC(_IOC_READ, 'E', 0x20 + ev, len) /* get event bits */ -#define EVIOCGABS(abs) _IOR('E', 0x40 + abs, struct input_absinfo) /* get abs value/limits */ -#define EVIOCSABS(abs) _IOW('E', 0xc0 + abs, struct input_absinfo) /* set abs value/limits */ +#define EVIOCGABS(abs) _IOR('E', 0x40 + abs, struct input_absinfo) /* get abs value/limits */ +#define EVIOCSABS(abs) _IOW('E', 0xc0 + abs, struct input_absinfo) /* set abs value/limits */ #define EVIOCSFF _IOC(_IOC_WRITE, 'E', 0x80, sizeof(struct ff_effect)) /* send a force effect to a force feedback device */ #define EVIOCRMFF _IOW('E', 0x81, int) /* Erase a force effect */ @@ -1088,13 +1113,13 @@ struct input_mt_slot { * @keycodemax: size of keycode table * @keycodesize: size of elements in keycode table * @keycode: map of scancodes to keycodes for this device + * @getkeycode: optional legacy method to retrieve current keymap. * @setkeycode: optional method to alter current keymap, used to implement * sparse keymaps. If not supplied default mechanism will be used. * The method is being called while holding event_lock and thus must * not sleep - * @getkeycode: optional method to retrieve current keymap. If not supplied - * default mechanism will be used. The method is being called while - * holding event_lock and thus must not sleep + * @getkeycode_new: transition method + * @setkeycode_new: transition method * @ff: force feedback structure associated with the device if device * supports force feedback effects * @repeat_key: stores key code of the last key pressed; used to implement @@ -1168,10 +1193,16 @@ struct input_dev { unsigned int keycodemax; unsigned int keycodesize; void *keycode; + int (*setkeycode)(struct input_dev *dev, unsigned int scancode, unsigned int keycode); int (*getkeycode)(struct input_dev *dev, unsigned int scancode, unsigned int *keycode); + int (*setkeycode_new)(struct input_dev *dev, + const struct input_keymap_entry *ke, + unsigned int *old_keycode); + int (*getkeycode_new)(struct input_dev *dev, + struct input_keymap_entry *ke); struct ff_device *ff; @@ -1478,10 +1509,12 @@ INPUT_GENERATE_ABS_ACCESSORS(fuzz, fuzz) INPUT_GENERATE_ABS_ACCESSORS(flat, flat) INPUT_GENERATE_ABS_ACCESSORS(res, resolution) -int input_get_keycode(struct input_dev *dev, - unsigned int scancode, unsigned int *keycode); +int input_scancode_to_scalar(const struct input_keymap_entry *ke, + unsigned int *scancode); + +int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke); int input_set_keycode(struct input_dev *dev, - unsigned int scancode, unsigned int keycode); + const struct input_keymap_entry *ke); extern struct class input_class; -- cgit v1.2.3 From 9f470095068e415658ccc6977cf4b3f5be418526 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 9 Sep 2010 21:59:11 -0700 Subject: Input: media/IR - switch to using new keycode interface Switch the code to use new style of getkeycode and setkeycode methods to allow retrieving and setting keycodes not only by their scancodes but also by index. Acked-by: Mauro Carvalho Chehab Signed-off-by: Dmitry Torokhov --- drivers/media/IR/ir-keytable.c | 393 +++++++++++++++++++++++++++-------------- include/media/rc-map.h | 2 +- 2 files changed, 263 insertions(+), 132 deletions(-) (limited to 'include') diff --git a/drivers/media/IR/ir-keytable.c b/drivers/media/IR/ir-keytable.c index 7e82a9df726b..5ca36a42f019 100644 --- a/drivers/media/IR/ir-keytable.c +++ b/drivers/media/IR/ir-keytable.c @@ -24,15 +24,57 @@ /* FIXME: IR_KEYPRESS_TIMEOUT should be protocol specific */ #define IR_KEYPRESS_TIMEOUT 250 +/** + * ir_create_table() - initializes a scancode table + * @rc_tab: the ir_scancode_table to initialize + * @name: name to assign to the table + * @ir_type: ir type to assign to the new table + * @size: initial size of the table + * @return: zero on success or a negative error code + * + * This routine will initialize the ir_scancode_table and will allocate + * memory to hold at least the specified number elements. + */ +static int ir_create_table(struct ir_scancode_table *rc_tab, + const char *name, u64 ir_type, size_t size) +{ + rc_tab->name = name; + rc_tab->ir_type = ir_type; + rc_tab->alloc = roundup_pow_of_two(size * sizeof(struct ir_scancode)); + rc_tab->size = rc_tab->alloc / sizeof(struct ir_scancode); + rc_tab->scan = kmalloc(rc_tab->alloc, GFP_KERNEL); + if (!rc_tab->scan) + return -ENOMEM; + + IR_dprintk(1, "Allocated space for %u keycode entries (%u bytes)\n", + rc_tab->size, rc_tab->alloc); + return 0; +} + +/** + * ir_free_table() - frees memory allocated by a scancode table + * @rc_tab: the table whose mappings need to be freed + * + * This routine will free memory alloctaed for key mappings used by given + * scancode table. + */ +static void ir_free_table(struct ir_scancode_table *rc_tab) +{ + rc_tab->size = 0; + kfree(rc_tab->scan); + rc_tab->scan = NULL; +} + /** * ir_resize_table() - resizes a scancode table if necessary * @rc_tab: the ir_scancode_table to resize + * @gfp_flags: gfp flags to use when allocating memory * @return: zero on success or a negative error code * * This routine will shrink the ir_scancode_table if it has lots of * unused entries and grow it if it is full. */ -static int ir_resize_table(struct ir_scancode_table *rc_tab) +static int ir_resize_table(struct ir_scancode_table *rc_tab, gfp_t gfp_flags) { unsigned int oldalloc = rc_tab->alloc; unsigned int newalloc = oldalloc; @@ -57,7 +99,7 @@ static int ir_resize_table(struct ir_scancode_table *rc_tab) if (newalloc == oldalloc) return 0; - newscan = kmalloc(newalloc, GFP_ATOMIC); + newscan = kmalloc(newalloc, gfp_flags); if (!newscan) { IR_dprintk(1, "Failed to kmalloc %u bytes\n", newalloc); return -ENOMEM; @@ -72,26 +114,78 @@ static int ir_resize_table(struct ir_scancode_table *rc_tab) } /** - * ir_do_setkeycode() - internal function to set a keycode in the - * scancode->keycode table + * ir_update_mapping() - set a keycode in the scancode->keycode table * @dev: the struct input_dev device descriptor - * @rc_tab: the struct ir_scancode_table to set the keycode in - * @scancode: the scancode for the ir command - * @keycode: the keycode for the ir command - * @resize: whether the keytable may be shrunk - * @return: -EINVAL if the keycode could not be inserted, otherwise zero. + * @rc_tab: scancode table to be adjusted + * @index: index of the mapping that needs to be updated + * @keycode: the desired keycode + * @return: previous keycode assigned to the mapping + * + * This routine is used to update scancode->keycopde mapping at given + * position. + */ +static unsigned int ir_update_mapping(struct input_dev *dev, + struct ir_scancode_table *rc_tab, + unsigned int index, + unsigned int new_keycode) +{ + int old_keycode = rc_tab->scan[index].keycode; + int i; + + /* Did the user wish to remove the mapping? */ + if (new_keycode == KEY_RESERVED || new_keycode == KEY_UNKNOWN) { + IR_dprintk(1, "#%d: Deleting scan 0x%04x\n", + index, rc_tab->scan[index].scancode); + rc_tab->len--; + memmove(&rc_tab->scan[index], &rc_tab->scan[index+ 1], + (rc_tab->len - index) * sizeof(struct ir_scancode)); + } else { + IR_dprintk(1, "#%d: %s scan 0x%04x with key 0x%04x\n", + index, + old_keycode == KEY_RESERVED ? "New" : "Replacing", + rc_tab->scan[index].scancode, new_keycode); + rc_tab->scan[index].keycode = new_keycode; + __set_bit(new_keycode, dev->keybit); + } + + if (old_keycode != KEY_RESERVED) { + /* A previous mapping was updated... */ + __clear_bit(old_keycode, dev->keybit); + /* ... but another scancode might use the same keycode */ + for (i = 0; i < rc_tab->len; i++) { + if (rc_tab->scan[i].keycode == old_keycode) { + __set_bit(old_keycode, dev->keybit); + break; + } + } + + /* Possibly shrink the keytable, failure is not a problem */ + ir_resize_table(rc_tab, GFP_ATOMIC); + } + + return old_keycode; +} + +/** + * ir_locate_scancode() - set a keycode in the scancode->keycode table + * @ir_dev: the struct ir_input_dev device descriptor + * @rc_tab: scancode table to be searched + * @scancode: the desired scancode + * @resize: controls whether we allowed to resize the table to + * accomodate not yet present scancodes + * @return: index of the mapping containing scancode in question + * or -1U in case of failure. * - * This routine is used internally to manipulate the scancode->keycode table. - * The caller has to hold @rc_tab->lock. + * This routine is used to locate given scancode in ir_scancode_table. + * If scancode is not yet present the routine will allocate a new slot + * for it. */ -static int ir_do_setkeycode(struct input_dev *dev, - struct ir_scancode_table *rc_tab, - unsigned scancode, unsigned keycode, - bool resize) +static unsigned int ir_establish_scancode(struct ir_input_dev *ir_dev, + struct ir_scancode_table *rc_tab, + unsigned int scancode, + bool resize) { unsigned int i; - int old_keycode = KEY_RESERVED; - struct ir_input_dev *ir_dev = input_get_drvdata(dev); /* * Unfortunately, some hardware-based IR decoders don't provide @@ -100,65 +194,34 @@ static int ir_do_setkeycode(struct input_dev *dev, * the provided IR with another one, it is needed to allow loading * IR tables from other remotes. So, */ - if (ir_dev->props && ir_dev->props->scanmask) { + if (ir_dev->props && ir_dev->props->scanmask) scancode &= ir_dev->props->scanmask; - } /* First check if we already have a mapping for this ir command */ for (i = 0; i < rc_tab->len; i++) { + if (rc_tab->scan[i].scancode == scancode) + return i; + /* Keytable is sorted from lowest to highest scancode */ - if (rc_tab->scan[i].scancode > scancode) + if (rc_tab->scan[i].scancode >= scancode) break; - else if (rc_tab->scan[i].scancode < scancode) - continue; - - old_keycode = rc_tab->scan[i].keycode; - rc_tab->scan[i].keycode = keycode; - - /* Did the user wish to remove the mapping? */ - if (keycode == KEY_RESERVED || keycode == KEY_UNKNOWN) { - IR_dprintk(1, "#%d: Deleting scan 0x%04x\n", - i, scancode); - rc_tab->len--; - memmove(&rc_tab->scan[i], &rc_tab->scan[i + 1], - (rc_tab->len - i) * sizeof(struct ir_scancode)); - } - - /* Possibly shrink the keytable, failure is not a problem */ - ir_resize_table(rc_tab); - break; } - if (old_keycode == KEY_RESERVED && keycode != KEY_RESERVED) { - /* No previous mapping found, we might need to grow the table */ - if (resize && ir_resize_table(rc_tab)) - return -ENOMEM; - - IR_dprintk(1, "#%d: New scan 0x%04x with key 0x%04x\n", - i, scancode, keycode); + /* No previous mapping found, we might need to grow the table */ + if (rc_tab->size == rc_tab->len) { + if (!resize || ir_resize_table(rc_tab, GFP_ATOMIC)) + return -1U; + } - /* i is the proper index to insert our new keycode */ + /* i is the proper index to insert our new keycode */ + if (i < rc_tab->len) memmove(&rc_tab->scan[i + 1], &rc_tab->scan[i], (rc_tab->len - i) * sizeof(struct ir_scancode)); - rc_tab->scan[i].scancode = scancode; - rc_tab->scan[i].keycode = keycode; - rc_tab->len++; - set_bit(keycode, dev->keybit); - } else { - IR_dprintk(1, "#%d: Replacing scan 0x%04x with key 0x%04x\n", - i, scancode, keycode); - /* A previous mapping was updated... */ - clear_bit(old_keycode, dev->keybit); - /* ...but another scancode might use the same keycode */ - for (i = 0; i < rc_tab->len; i++) { - if (rc_tab->scan[i].keycode == old_keycode) { - set_bit(old_keycode, dev->keybit); - break; - } - } - } + rc_tab->scan[i].scancode = scancode; + rc_tab->scan[i].keycode = KEY_RESERVED; + rc_tab->len++; - return 0; + return i; } /** @@ -171,17 +234,41 @@ static int ir_do_setkeycode(struct input_dev *dev, * This routine is used to handle evdev EVIOCSKEY ioctl. */ static int ir_setkeycode(struct input_dev *dev, - unsigned int scancode, unsigned int keycode) + const struct input_keymap_entry *ke, + unsigned int *old_keycode) { - int rc; - unsigned long flags; struct ir_input_dev *ir_dev = input_get_drvdata(dev); struct ir_scancode_table *rc_tab = &ir_dev->rc_tab; + unsigned int index; + unsigned int scancode; + int retval; + unsigned long flags; spin_lock_irqsave(&rc_tab->lock, flags); - rc = ir_do_setkeycode(dev, rc_tab, scancode, keycode, true); + + if (ke->flags & INPUT_KEYMAP_BY_INDEX) { + index = ke->index; + if (index >= rc_tab->len) { + retval = -EINVAL; + goto out; + } + } else { + retval = input_scancode_to_scalar(ke, &scancode); + if (retval) + goto out; + + index = ir_establish_scancode(ir_dev, rc_tab, scancode, true); + if (index >= rc_tab->len) { + retval = -ENOMEM; + goto out; + } + } + + *old_keycode = ir_update_mapping(dev, rc_tab, index, ke->keycode); + +out: spin_unlock_irqrestore(&rc_tab->lock, flags); - return rc; + return retval; } /** @@ -189,31 +276,72 @@ static int ir_setkeycode(struct input_dev *dev, * @dev: the struct input_dev device descriptor * @to: the struct ir_scancode_table to copy entries to * @from: the struct ir_scancode_table to copy entries from - * @return: -EINVAL if all keycodes could not be inserted, otherwise zero. + * @return: -ENOMEM if all keycodes could not be inserted, otherwise zero. * * This routine is used to handle table initialization. */ -static int ir_setkeytable(struct input_dev *dev, - struct ir_scancode_table *to, +static int ir_setkeytable(struct ir_input_dev *ir_dev, const struct ir_scancode_table *from) { - struct ir_input_dev *ir_dev = input_get_drvdata(dev); struct ir_scancode_table *rc_tab = &ir_dev->rc_tab; - unsigned long flags; - unsigned int i; - int rc = 0; + unsigned int i, index; + int rc; + + rc = ir_create_table(&ir_dev->rc_tab, + from->name, from->ir_type, from->size); + if (rc) + return rc; + + IR_dprintk(1, "Allocated space for %u keycode entries (%u bytes)\n", + rc_tab->size, rc_tab->alloc); - spin_lock_irqsave(&rc_tab->lock, flags); for (i = 0; i < from->size; i++) { - rc = ir_do_setkeycode(dev, to, from->scan[i].scancode, - from->scan[i].keycode, false); - if (rc) + index = ir_establish_scancode(ir_dev, rc_tab, + from->scan[i].scancode, false); + if (index >= rc_tab->len) { + rc = -ENOMEM; break; + } + + ir_update_mapping(ir_dev->input_dev, rc_tab, index, + from->scan[i].keycode); } - spin_unlock_irqrestore(&rc_tab->lock, flags); + + if (rc) + ir_free_table(rc_tab); + return rc; } +/** + * ir_lookup_by_scancode() - locate mapping by scancode + * @rc_tab: the &struct ir_scancode_table to search + * @scancode: scancode to look for in the table + * @return: index in the table, -1U if not found + * + * This routine performs binary search in RC keykeymap table for + * given scancode. + */ +static unsigned int ir_lookup_by_scancode(const struct ir_scancode_table *rc_tab, + unsigned int scancode) +{ + unsigned int start = 0; + unsigned int end = rc_tab->len - 1; + unsigned int mid; + + while (start <= end) { + mid = (start + end) / 2; + if (rc_tab->scan[mid].scancode < scancode) + start = mid + 1; + else if (rc_tab->scan[mid].scancode > scancode) + end = mid - 1; + else + return mid; + } + + return -1U; +} + /** * ir_getkeycode() - get a keycode from the scancode->keycode table * @dev: the struct input_dev device descriptor @@ -224,36 +352,46 @@ static int ir_setkeytable(struct input_dev *dev, * This routine is used to handle evdev EVIOCGKEY ioctl. */ static int ir_getkeycode(struct input_dev *dev, - unsigned int scancode, unsigned int *keycode) + struct input_keymap_entry *ke) { - int start, end, mid; - unsigned long flags; - int key = KEY_RESERVED; struct ir_input_dev *ir_dev = input_get_drvdata(dev); struct ir_scancode_table *rc_tab = &ir_dev->rc_tab; + struct ir_scancode *entry; + unsigned long flags; + unsigned int index; + unsigned int scancode; + int retval; spin_lock_irqsave(&rc_tab->lock, flags); - start = 0; - end = rc_tab->len - 1; - while (start <= end) { - mid = (start + end) / 2; - if (rc_tab->scan[mid].scancode < scancode) - start = mid + 1; - else if (rc_tab->scan[mid].scancode > scancode) - end = mid - 1; - else { - key = rc_tab->scan[mid].keycode; - break; - } + + if (ke->flags & INPUT_KEYMAP_BY_INDEX) { + index = ke->index; + } else { + retval = input_scancode_to_scalar(ke, &scancode); + if (retval) + goto out; + + index = ir_lookup_by_scancode(rc_tab, scancode); + } + + if (index >= rc_tab->len) { + if (!(ke->flags & INPUT_KEYMAP_BY_INDEX)) + IR_dprintk(1, "unknown key for scancode 0x%04x\n", + scancode); + retval = -EINVAL; + goto out; } - spin_unlock_irqrestore(&rc_tab->lock, flags); - if (key == KEY_RESERVED) - IR_dprintk(1, "unknown key for scancode 0x%04x\n", - scancode); + entry = &rc_tab->scan[index]; - *keycode = key; - return 0; + ke->index = index; + ke->keycode = entry->keycode; + ke->len = sizeof(entry->scancode); + memcpy(ke->scancode, &entry->scancode, sizeof(entry->scancode)); + +out: + spin_unlock_irqrestore(&rc_tab->lock, flags); + return retval; } /** @@ -268,12 +406,24 @@ static int ir_getkeycode(struct input_dev *dev, */ u32 ir_g_keycode_from_table(struct input_dev *dev, u32 scancode) { - int keycode; + struct ir_input_dev *ir_dev = input_get_drvdata(dev); + struct ir_scancode_table *rc_tab = &ir_dev->rc_tab; + unsigned int keycode; + unsigned int index; + unsigned long flags; + + spin_lock_irqsave(&rc_tab->lock, flags); + + index = ir_lookup_by_scancode(rc_tab, scancode); + keycode = index < rc_tab->len ? + rc_tab->scan[index].keycode : KEY_RESERVED; + + spin_unlock_irqrestore(&rc_tab->lock, flags); - ir_getkeycode(dev, scancode, &keycode); if (keycode != KEY_RESERVED) IR_dprintk(1, "%s: scancode 0x%04x keycode 0x%02x\n", dev->name, scancode, keycode); + return keycode; } EXPORT_SYMBOL_GPL(ir_g_keycode_from_table); @@ -453,8 +603,8 @@ int __ir_input_register(struct input_dev *input_dev, goto out_dev; } - input_dev->getkeycode = ir_getkeycode; - input_dev->setkeycode = ir_setkeycode; + input_dev->getkeycode_new = ir_getkeycode; + input_dev->setkeycode_new = ir_setkeycode; input_set_drvdata(input_dev, ir_dev); ir_dev->input_dev = input_dev; @@ -462,12 +612,6 @@ int __ir_input_register(struct input_dev *input_dev, spin_lock_init(&ir_dev->keylock); setup_timer(&ir_dev->timer_keyup, ir_timer_keyup, (unsigned long)ir_dev); - ir_dev->rc_tab.name = rc_tab->name; - ir_dev->rc_tab.ir_type = rc_tab->ir_type; - ir_dev->rc_tab.alloc = roundup_pow_of_two(rc_tab->size * - sizeof(struct ir_scancode)); - ir_dev->rc_tab.scan = kmalloc(ir_dev->rc_tab.alloc, GFP_KERNEL); - ir_dev->rc_tab.size = ir_dev->rc_tab.alloc / sizeof(struct ir_scancode); if (props) { ir_dev->props = props; if (props->open) @@ -476,23 +620,14 @@ int __ir_input_register(struct input_dev *input_dev, input_dev->close = ir_close; } - if (!ir_dev->rc_tab.scan) { - rc = -ENOMEM; - goto out_name; - } - - IR_dprintk(1, "Allocated space for %u keycode entries (%u bytes)\n", - ir_dev->rc_tab.size, ir_dev->rc_tab.alloc); - set_bit(EV_KEY, input_dev->evbit); set_bit(EV_REP, input_dev->evbit); set_bit(EV_MSC, input_dev->evbit); set_bit(MSC_SCAN, input_dev->mscbit); - if (ir_setkeytable(input_dev, &ir_dev->rc_tab, rc_tab)) { - rc = -ENOMEM; - goto out_table; - } + rc = ir_setkeytable(ir_dev, rc_tab); + if (rc) + goto out_name; rc = ir_register_class(input_dev); if (rc < 0) @@ -515,7 +650,7 @@ int __ir_input_register(struct input_dev *input_dev, out_event: ir_unregister_class(input_dev); out_table: - kfree(ir_dev->rc_tab.scan); + ir_free_table(&ir_dev->rc_tab); out_name: kfree(ir_dev->driver_name); out_dev: @@ -533,7 +668,6 @@ EXPORT_SYMBOL_GPL(__ir_input_register); void ir_input_unregister(struct input_dev *input_dev) { struct ir_input_dev *ir_dev = input_get_drvdata(input_dev); - struct ir_scancode_table *rc_tab; if (!ir_dev) return; @@ -545,10 +679,7 @@ void ir_input_unregister(struct input_dev *input_dev) if (ir_dev->props->driver_type == RC_DRIVER_IR_RAW) ir_raw_event_unregister(input_dev); - rc_tab = &ir_dev->rc_tab; - rc_tab->size = 0; - kfree(rc_tab->scan); - rc_tab->scan = NULL; + ir_free_table(&ir_dev->rc_tab); ir_unregister_class(input_dev); diff --git a/include/media/rc-map.h b/include/media/rc-map.h index a9c041d49662..9b201ec8dc7f 100644 --- a/include/media/rc-map.h +++ b/include/media/rc-map.h @@ -35,7 +35,7 @@ struct ir_scancode_table { unsigned int len; /* Used number of entries */ unsigned int alloc; /* Size of *scan in bytes */ u64 ir_type; - char *name; + const char *name; spinlock_t lock; }; -- cgit v1.2.3 From db7829c6cc32f3c0c9a324118d743acb1abff081 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 9 Sep 2010 18:17:26 +0200 Subject: x86, percpu: Optimize this_cpu_ptr Allow arches to implement __this_cpu_ptr, and provide an x86 version. Before: movq $foo, %rax movq %gs:this_cpu_off, %rdx addq %rdx, %rax After: movq $foo, %rax addq %gs:this_cpu_off, %rax The benefit is doing it in one less instruction and not clobbering a temporary register. tj: * Beefed up the comment a bit and renamed in-macro temp variable to match neighboring macros. * Folded fix for const pointer case found in linux-next. * Fixed sparse notation. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 14 ++++++++++++++ include/asm-generic/percpu.h | 9 +++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index cd28f9ad910d..f899e01a8ac9 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -47,6 +47,20 @@ #ifdef CONFIG_SMP #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x #define __my_cpu_offset percpu_read(this_cpu_off) + +/* + * Compared to the generic __my_cpu_offset version, the following + * saves one instruction and avoids clobbering a temp register. + */ +#define __this_cpu_ptr(ptr) \ +({ \ + unsigned long tcp_ptr__; \ + __verify_pcpu_ptr(ptr); \ + asm volatile("add " __percpu_arg(1) ", %0" \ + : "=r" (tcp_ptr__) \ + : "m" (this_cpu_off), "0" (ptr)); \ + (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ +}) #else #define __percpu_arg(x) "%P" #x #endif diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 08923b684768..ec643115bb56 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -60,9 +60,14 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define __raw_get_cpu_var(var) \ (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) -#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#ifndef __this_cpu_ptr #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) - +#endif +#ifdef CONFIG_DEBUG_PREEMPT +#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#else +#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) +#endif #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); -- cgit v1.2.3 From 677243d7494d09bfa782425f063a6013de53c35b Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 9 Sep 2010 18:17:26 +0200 Subject: percpu: Optimize __get_cpu_var() Redefine __get_cpu_var() using this_cpu_ptr() which can be arch-optimized. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index ec643115bb56..d17784ea37ff 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -55,10 +55,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; */ #define per_cpu(var, cpu) \ (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) -#define __get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&(var), my_cpu_offset)) -#define __raw_get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) #ifndef __this_cpu_ptr #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) @@ -69,6 +65,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) #endif +#define __get_cpu_var(var) (*this_cpu_ptr(&(var))) +#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var))) + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); #endif -- cgit v1.2.3 From 0da2f50944976e890ccc9436ab88c0da87788d02 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: ide: remove unnecessary blk_queue_flushing() test in do_ide_request() Unplugging from a request function doesn't really help much (it's already in the request_fn) and soon block layer will be updated to mix barrier sequence with other commands, so there's no need to treat queue flushing any differently. ide was the only user of blk_queue_flushing(). Remove it. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Acked-by: David S. Miller Signed-off-by: Jens Axboe --- drivers/ide/ide-io.c | 13 ------------- include/linux/blkdev.h | 1 - 2 files changed, 14 deletions(-) (limited to 'include') diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index a381be814070..999dac054bcc 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -441,19 +441,6 @@ void do_ide_request(struct request_queue *q) struct request *rq = NULL; ide_startstop_t startstop; - /* - * drive is doing pre-flush, ordered write, post-flush sequence. even - * though that is 3 requests, it must be seen as a single transaction. - * we must not preempt this drive until that is complete - */ - if (blk_queue_flushing(q)) - /* - * small race where queue could get replugged during - * the 3-request flush cycle, just yank the plug since - * we want it to finish asap - */ - blk_remove_plug(q); - spin_unlock_irq(q->queue_lock); /* HLD do_request() callback might sleep, make sure it's okay */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c54906f678f..015375c7d031 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -521,7 +521,6 @@ enum { #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) -#define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) -- cgit v1.2.3 From 6958f145459ca7ad9715024de97445addacb8510 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: kill QUEUE_ORDERED_BY_TAG Nobody is making meaningful use of ORDERED_BY_TAG now and queue draining for barrier requests will be removed soon which will render the advantage of tag ordering moot. Kill ORDERED_BY_TAG. The following users are affected. * brd: converted to ORDERED_DRAIN. * virtio_blk: ORDERED_TAG path was already marked deprecated. Removed. * xen-blkfront: ORDERED_TAG case dropped. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Cc: Nick Piggin Cc: Michael S. Tsirkin Cc: Jeremy Fitzhardinge Cc: Chris Wright Signed-off-by: Jens Axboe --- block/blk-barrier.c | 35 +++++++---------------------------- drivers/block/brd.c | 2 +- drivers/block/virtio_blk.c | 9 --------- drivers/block/xen-blkfront.c | 8 +++----- drivers/scsi/sd.c | 4 +--- include/linux/blkdev.h | 17 +---------------- 6 files changed, 13 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index f0faefca032f..c807e9ca3a68 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -26,10 +26,7 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered) if (ordered != QUEUE_ORDERED_NONE && ordered != QUEUE_ORDERED_DRAIN && ordered != QUEUE_ORDERED_DRAIN_FLUSH && - ordered != QUEUE_ORDERED_DRAIN_FUA && - ordered != QUEUE_ORDERED_TAG && - ordered != QUEUE_ORDERED_TAG_FLUSH && - ordered != QUEUE_ORDERED_TAG_FUA) { + ordered != QUEUE_ORDERED_DRAIN_FUA) { printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); return -EINVAL; } @@ -155,21 +152,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) * For an empty barrier, there's no actual BAR request, which * in turn makes POSTFLUSH unnecessary. Mask them off. */ - if (!blk_rq_sectors(rq)) { + if (!blk_rq_sectors(rq)) q->ordered &= ~(QUEUE_ORDERED_DO_BAR | QUEUE_ORDERED_DO_POSTFLUSH); - /* - * Empty barrier on a write-through device w/ ordered - * tag has no command to issue and without any command - * to issue, ordering by tag can't be used. Drain - * instead. - */ - if ((q->ordered & QUEUE_ORDERED_BY_TAG) && - !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) { - q->ordered &= ~QUEUE_ORDERED_BY_TAG; - q->ordered |= QUEUE_ORDERED_BY_DRAIN; - } - } /* stash away the original request */ blk_dequeue_request(rq); @@ -210,7 +195,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) } else skip |= QUEUE_ORDSEQ_PREFLUSH; - if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q)) + if (queue_in_flight(q)) rq = NULL; else skip |= QUEUE_ORDSEQ_DRAIN; @@ -257,16 +242,10 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) rq != &q->pre_flush_rq && rq != &q->post_flush_rq) return true; - if (q->ordered & QUEUE_ORDERED_BY_TAG) { - /* Ordered by tag. Blocking the next barrier is enough. */ - if (is_barrier && rq != &q->bar_rq) - *rqp = NULL; - } else { - /* Ordered by draining. Wait for turn. */ - WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); - if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - *rqp = NULL; - } + /* Ordered by draining. Wait for turn. */ + WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); + if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) + *rqp = NULL; return true; } diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 1c7f63792ff8..47a41272d26b 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -482,7 +482,7 @@ static struct brd_device *brd_alloc(int i) if (!brd->brd_queue) goto out_free_dev; blk_queue_make_request(brd->brd_queue, brd_make_request); - blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG); + blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_DRAIN); blk_queue_max_hw_sectors(brd->brd_queue, 1024); blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2aafafca2b13..79652809eee8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -395,15 +395,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) * to implement write barrier support. */ blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); - } else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) { - /* - * If the BARRIER feature is supported the host expects us - * to order request by tags. This implies there is not - * volatile write cache on the host, and that the host - * never re-orders outstanding I/O. This feature is not - * useful for real life scenarious and deprecated. - */ - blk_queue_ordered(q, QUEUE_ORDERED_TAG); } else { /* * If the FLUSH feature is not supported we must assume that diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ac1b682edecb..50ec6f834996 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -424,8 +424,7 @@ static int xlvbd_barrier(struct blkfront_info *info) const char *barrier; switch (info->feature_barrier) { - case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break; - case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break; + case QUEUE_ORDERED_DRAIN: barrier = "enabled"; break; case QUEUE_ORDERED_NONE: barrier = "disabled"; break; default: return -EINVAL; } @@ -1078,8 +1077,7 @@ static void blkfront_connect(struct blkfront_info *info) * we're dealing with a very old backend which writes * synchronously; draining will do what needs to get done. * - * If there are barriers, then we can do full queued writes - * with tagged barriers. + * If there are barriers, then we use flush. * * If barriers are not supported, then there's no much we can * do, so just set ordering to NONE. @@ -1087,7 +1085,7 @@ static void blkfront_connect(struct blkfront_info *info) if (err) info->feature_barrier = QUEUE_ORDERED_DRAIN; else if (barrier) - info->feature_barrier = QUEUE_ORDERED_TAG; + info->feature_barrier = QUEUE_ORDERED_DRAIN_FLUSH; else info->feature_barrier = QUEUE_ORDERED_NONE; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 2714becc2eaf..cdfc51ab9cf2 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2151,9 +2151,7 @@ static int sd_revalidate_disk(struct gendisk *disk) /* * We now have all cache related info, determine how we deal - * with ordered requests. Note that as the current SCSI - * dispatch function can alter request order, we cannot use - * QUEUE_ORDERED_TAG_* even when ordered tag is supported. + * with ordered requests. */ if (sdkp->WCE) ordered = sdkp->DPOFUA diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 015375c7d031..7077bc0d6138 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -470,12 +470,7 @@ enum { * DRAIN : ordering by draining is enough * DRAIN_FLUSH : ordering by draining w/ pre and post flushes * DRAIN_FUA : ordering by draining w/ pre flush and FUA write - * TAG : ordering by tag is enough - * TAG_FLUSH : ordering by tag w/ pre and post flushes - * TAG_FUA : ordering by tag w/ pre flush and FUA write */ - QUEUE_ORDERED_BY_DRAIN = 0x01, - QUEUE_ORDERED_BY_TAG = 0x02, QUEUE_ORDERED_DO_PREFLUSH = 0x10, QUEUE_ORDERED_DO_BAR = 0x20, QUEUE_ORDERED_DO_POSTFLUSH = 0x40, @@ -483,8 +478,7 @@ enum { QUEUE_ORDERED_NONE = 0x00, - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN | - QUEUE_ORDERED_DO_BAR, + QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | QUEUE_ORDERED_DO_PREFLUSH | QUEUE_ORDERED_DO_POSTFLUSH, @@ -492,15 +486,6 @@ enum { QUEUE_ORDERED_DO_PREFLUSH | QUEUE_ORDERED_DO_FUA, - QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG | - QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - /* * Ordered operation sequence */ -- cgit v1.2.3 From 4913efe456c987057e5d36a3f0a55422a9072cae Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: deprecate barrier and replace blk_queue_ordered() with blk_queue_flush() Barrier is deemed too heavy and will soon be replaced by FLUSH/FUA requests. Deprecate barrier. All REQ_HARDBARRIERs are failed with -EOPNOTSUPP and blk_queue_ordered() is replaced with simpler blk_queue_flush(). blk_queue_flush() takes combinations of REQ_FLUSH and FUA. If a device has write cache and can flush it, it should set REQ_FLUSH. If the device can handle FUA writes, it should also set REQ_FUA. All blk_queue_ordered() users are converted. * ORDERED_DRAIN is mapped to 0 which is the default value. * ORDERED_DRAIN_FLUSH is mapped to REQ_FLUSH. * ORDERED_DRAIN_FLUSH_FUA is mapped to REQ_FLUSH | REQ_FUA. Signed-off-by: Tejun Heo Acked-by: Boaz Harrosh Cc: Christoph Hellwig Cc: Nick Piggin Cc: Michael S. Tsirkin Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: FUJITA Tomonori Cc: Geert Uytterhoeven Cc: David S. Miller Cc: Alasdair G Kergon Cc: Pierre Ossman Cc: Stefan Weinhuber Signed-off-by: Jens Axboe --- block/blk-barrier.c | 29 ----------------------------- block/blk-core.c | 6 ++++-- block/blk-settings.c | 20 ++++++++++++++++++++ drivers/block/brd.c | 1 - drivers/block/loop.c | 2 +- drivers/block/osdblk.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/block/virtio_blk.c | 25 +++++++++---------------- drivers/block/xen-blkfront.c | 43 ++++++++++++------------------------------- drivers/ide/ide-disk.c | 13 ++++++------- drivers/md/dm.c | 2 +- drivers/mmc/card/queue.c | 1 - drivers/s390/block/dasd.c | 1 - drivers/scsi/sd.c | 16 ++++++++-------- include/linux/blkdev.h | 6 ++++-- 15 files changed, 67 insertions(+), 102 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index c807e9ca3a68..ed0aba5463ab 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -9,35 +9,6 @@ #include "blk.h" -/** - * blk_queue_ordered - does this queue support ordered writes - * @q: the request queue - * @ordered: one of QUEUE_ORDERED_* - * - * Description: - * For journalled file systems, doing ordered writes on a commit - * block instead of explicitly doing wait_on_buffer (which is bad - * for performance) can be a big win. Block drivers supporting this - * feature should call this function and indicate so. - * - **/ -int blk_queue_ordered(struct request_queue *q, unsigned ordered) -{ - if (ordered != QUEUE_ORDERED_NONE && - ordered != QUEUE_ORDERED_DRAIN && - ordered != QUEUE_ORDERED_DRAIN_FLUSH && - ordered != QUEUE_ORDERED_DRAIN_FUA) { - printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); - return -EINVAL; - } - - q->ordered = ordered; - q->next_ordered = ordered; - - return 0; -} -EXPORT_SYMBOL(blk_queue_ordered); - /* * Cache flushing for ordered writes handling */ diff --git a/block/blk-core.c b/block/blk-core.c index ee1a1e7e63cc..f06354183b29 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1203,11 +1203,13 @@ static int __make_request(struct request_queue *q, struct bio *bio) const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; int rw_flags; - if ((bio->bi_rw & REQ_HARDBARRIER) && - (q->next_ordered == QUEUE_ORDERED_NONE)) { + /* REQ_HARDBARRIER is no more */ + if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER, + "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) { bio_endio(bio, -EOPNOTSUPP); return 0; } + /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even diff --git a/block/blk-settings.c b/block/blk-settings.c index a234f4bf1d6f..9b18afcfe925 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -794,6 +794,26 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) } EXPORT_SYMBOL(blk_queue_update_dma_alignment); +/** + * blk_queue_flush - configure queue's cache flush capability + * @q: the request queue for the device + * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA + * + * Tell block layer cache flush capability of @q. If it supports + * flushing, REQ_FLUSH should be set. If it supports bypassing + * write cache for individual writes, REQ_FUA should be set. + */ +void blk_queue_flush(struct request_queue *q, unsigned int flush) +{ + WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA)); + + if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA))) + flush &= ~REQ_FUA; + + q->flush_flags = flush & (REQ_FLUSH | REQ_FUA); +} +EXPORT_SYMBOL_GPL(blk_queue_flush); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 47a41272d26b..fa33f97722ba 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -482,7 +482,6 @@ static struct brd_device *brd_alloc(int i) if (!brd->brd_queue) goto out_free_dev; blk_queue_make_request(brd->brd_queue, brd_make_request); - blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_DRAIN); blk_queue_max_hw_sectors(brd->brd_queue, 1024); blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c3a4a2e176da..953d1e12f4d4 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -832,7 +832,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_queue->unplug_fn = loop_unplug; if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) - blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(lo->lo_queue, REQ_FLUSH); set_capacity(lo->lo_disk, size); bd_set_size(bdev, size << 9); diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 2284b4f05c62..72d62462433d 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -439,7 +439,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev) blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); blk_queue_prep_rq(q, blk_queue_start_tag); - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(q, REQ_FLUSH); disk->queue = q; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index e9da874d0419..4911f9e57bc7 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -468,7 +468,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev) blk_queue_dma_alignment(queue, dev->blk_size-1); blk_queue_logical_block_size(queue, dev->blk_size); - blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(queue, REQ_FLUSH); blk_queue_max_segments(queue, -1); blk_queue_max_segment_size(queue, dev->bounce_size); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 79652809eee8..d10b635b3946 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -388,22 +388,15 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) vblk->disk->driverfs_dev = &vdev->dev; index++; - if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) { - /* - * If the FLUSH feature is supported we do have support for - * flushing a volatile write cache on the host. Use that - * to implement write barrier support. - */ - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); - } else { - /* - * If the FLUSH feature is not supported we must assume that - * the host does not perform any kind of volatile write - * caching. We still need to drain the queue to provider - * proper barrier semantics. - */ - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN); - } + /* + * If the FLUSH feature is supported we do have support for + * flushing a volatile write cache on the host. Use that to + * implement write barrier support; otherwise, we must assume + * that the host does not perform any kind of volatile write + * caching. + */ + if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) + blk_queue_flush(q, REQ_FLUSH); /* If disk is read-only in the host, the guest should obey */ if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 50ec6f834996..0b1eea643262 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -95,7 +95,7 @@ struct blkfront_info struct gnttab_free_callback callback; struct blk_shadow shadow[BLK_RING_SIZE]; unsigned long shadow_free; - int feature_barrier; + unsigned int feature_flush; int is_ready; }; @@ -418,25 +418,12 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) } -static int xlvbd_barrier(struct blkfront_info *info) +static void xlvbd_flush(struct blkfront_info *info) { - int err; - const char *barrier; - - switch (info->feature_barrier) { - case QUEUE_ORDERED_DRAIN: barrier = "enabled"; break; - case QUEUE_ORDERED_NONE: barrier = "disabled"; break; - default: return -EINVAL; - } - - err = blk_queue_ordered(info->rq, info->feature_barrier); - - if (err) - return err; - + blk_queue_flush(info->rq, info->feature_flush); printk(KERN_INFO "blkfront: %s: barriers %s\n", - info->gd->disk_name, barrier); - return 0; + info->gd->disk_name, + info->feature_flush ? "enabled" : "disabled"); } @@ -515,7 +502,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, info->rq = gd->queue; info->gd = gd; - xlvbd_barrier(info); + xlvbd_flush(info); if (vdisk_info & VDISK_READONLY) set_disk_ro(gd, 1); @@ -661,8 +648,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", info->gd->disk_name); error = -EOPNOTSUPP; - info->feature_barrier = QUEUE_ORDERED_NONE; - xlvbd_barrier(info); + info->feature_flush = 0; + xlvbd_flush(info); } /* fall through */ case BLKIF_OP_READ: @@ -1075,19 +1062,13 @@ static void blkfront_connect(struct blkfront_info *info) /* * If there's no "feature-barrier" defined, then it means * we're dealing with a very old backend which writes - * synchronously; draining will do what needs to get done. + * synchronously; nothing to do. * * If there are barriers, then we use flush. - * - * If barriers are not supported, then there's no much we can - * do, so just set ordering to NONE. */ - if (err) - info->feature_barrier = QUEUE_ORDERED_DRAIN; - else if (barrier) - info->feature_barrier = QUEUE_ORDERED_DRAIN_FLUSH; - else - info->feature_barrier = QUEUE_ORDERED_NONE; + info->feature_flush = 0; + if (!err && barrier) + info->feature_flush = REQ_FLUSH; err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 7433e07de30e..7c5b01ce51d2 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -516,10 +516,10 @@ static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect) return ide_no_data_taskfile(drive, &cmd); } -static void update_ordered(ide_drive_t *drive) +static void update_flush(ide_drive_t *drive) { u16 *id = drive->id; - unsigned ordered = QUEUE_ORDERED_NONE; + unsigned flush = 0; if (drive->dev_flags & IDE_DFLAG_WCACHE) { unsigned long long capacity; @@ -543,13 +543,12 @@ static void update_ordered(ide_drive_t *drive) drive->name, barrier ? "" : "not "); if (barrier) { - ordered = QUEUE_ORDERED_DRAIN_FLUSH; + flush = REQ_FLUSH; blk_queue_prep_rq(drive->queue, idedisk_prep_fn); } - } else - ordered = QUEUE_ORDERED_DRAIN; + } - blk_queue_ordered(drive->queue, ordered); + blk_queue_flush(drive->queue, flush); } ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE); @@ -572,7 +571,7 @@ static int set_wcache(ide_drive_t *drive, int arg) } } - update_ordered(drive); + update_flush(drive); return err; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ac384b2a6a33..b1d92be8f990 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2245,7 +2245,7 @@ static int dm_init_request_based_queue(struct mapped_device *md) blk_queue_softirq_done(md->queue, dm_softirq_done); blk_queue_prep_rq(md->queue, dm_prep_fn); blk_queue_lld_busy(md->queue, dm_lld_busy); - blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(md->queue, REQ_FLUSH); elv_register_queue(md->queue); diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index e876678176be..9c0b42bfe089 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -128,7 +128,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock mq->req = NULL; blk_queue_prep_rq(mq->queue, mmc_prep_request); - blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); if (mmc_can_erase(card)) { queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 8373ca0de8e0..9b106d83b0cd 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2197,7 +2197,6 @@ static void dasd_setup_queue(struct dasd_block *block) */ blk_queue_max_segment_size(block->request_queue, PAGE_SIZE); blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1); - blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN); } /* diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cdfc51ab9cf2..63bd01ae534f 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2109,7 +2109,7 @@ static int sd_revalidate_disk(struct gendisk *disk) struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; unsigned char *buffer; - unsigned ordered; + unsigned flush = 0; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_revalidate_disk\n")); @@ -2151,15 +2151,15 @@ static int sd_revalidate_disk(struct gendisk *disk) /* * We now have all cache related info, determine how we deal - * with ordered requests. + * with flush requests. */ - if (sdkp->WCE) - ordered = sdkp->DPOFUA - ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH; - else - ordered = QUEUE_ORDERED_DRAIN; + if (sdkp->WCE) { + flush |= REQ_FLUSH; + if (sdkp->DPOFUA) + flush |= REQ_FUA; + } - blk_queue_ordered(sdkp->disk->queue, ordered); + blk_queue_flush(sdkp->disk->queue, flush); set_capacity(disk, sdkp->capacity); kfree(buffer); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7077bc0d6138..e97911d4dec3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -355,8 +355,10 @@ struct request_queue struct blk_trace *blk_trace; #endif /* - * reserved for flush operations + * for flush operations */ + unsigned int flush_flags; + unsigned int ordered, next_ordered, ordseq; int orderr, ordcolor; struct request pre_flush_rq, bar_rq, post_flush_rq; @@ -865,8 +867,8 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); +extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern int blk_queue_ordered(struct request_queue *, unsigned); extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); -- cgit v1.2.3 From 9cbbdca44ae1a6f512ea1e2be11ced8bbb9d430a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: remove spurious uses of REQ_HARDBARRIER REQ_HARDBARRIER is deprecated. Remove spurious uses in the following users. Please note that other than osdblk, all other uses were already spurious before deprecation. * osdblk: osdblk_rq_fn() won't receive any request with REQ_HARDBARRIER set. Remove the test for it. * pktcdvd: use of REQ_HARDBARRIER in pkt_generic_packet() doesn't mean anything. Removed. * aic7xxx_old: Setting MSG_ORDERED_Q_TAG on REQ_HARDBARRIER is spurious. Removed. * sas_scsi_host: Setting TASK_ATTR_ORDERED on REQ_HARDBARRIER is spurious. Removed. * scsi_tcq: The ordered tag path wasn't being used anyway. Removed. Signed-off-by: Tejun Heo Acked-by: Boaz Harrosh Cc: James Bottomley Cc: Peter Osterlund Signed-off-by: Jens Axboe --- drivers/block/osdblk.c | 3 +-- drivers/block/pktcdvd.c | 1 - drivers/scsi/aic7xxx_old.c | 21 ++------------------- drivers/scsi/libsas/sas_scsi_host.c | 13 +------------ include/scsi/scsi_tcq.h | 6 +----- 5 files changed, 5 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 72d62462433d..87311ebac0db 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -310,8 +310,7 @@ static void osdblk_rq_fn(struct request_queue *q) break; /* filter out block requests we don't understand */ - if (rq->cmd_type != REQ_TYPE_FS && - !(rq->cmd_flags & REQ_HARDBARRIER)) { + if (rq->cmd_type != REQ_TYPE_FS) { blk_end_request_all(rq, 0); continue; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index b1cbeb59bb76..0166ea136045 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -753,7 +753,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->timeout = 60*HZ; rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->cmd_flags |= REQ_HARDBARRIER; if (cgc->quiet) rq->cmd_flags |= REQ_QUIET; diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c index 93984c9dfe14..e1cd6062776c 100644 --- a/drivers/scsi/aic7xxx_old.c +++ b/drivers/scsi/aic7xxx_old.c @@ -2850,12 +2850,6 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb) aic_dev->r_total++; ptr = aic_dev->r_bins; } - if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER) - { - aic_dev->barrier_total++; - if(scb->tag_action == MSG_ORDERED_Q_TAG) - aic_dev->ordered_total++; - } x = scb->sg_length; x >>= 10; for(i=0; i<6; i++) @@ -10144,19 +10138,8 @@ static void aic7xxx_buildscb(struct aic7xxx_host *p, struct scsi_cmnd *cmd, /* We always force TEST_UNIT_READY to untagged */ if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags) { - if (req->cmd_flags & REQ_HARDBARRIER) - { - if(sdptr->ordered_tags) - { - hscb->control |= MSG_ORDERED_Q_TAG; - scb->tag_action = MSG_ORDERED_Q_TAG; - } - } - else - { - hscb->control |= MSG_SIMPLE_Q_TAG; - scb->tag_action = MSG_SIMPLE_Q_TAG; - } + hscb->control |= MSG_SIMPLE_Q_TAG; + scb->tag_action = MSG_SIMPLE_Q_TAG; } } if ( !(aic_dev->dtr_pending) && diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index f0cfba9a1fc8..535085cd27ec 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -130,17 +130,6 @@ static void sas_scsi_task_done(struct sas_task *task) sc->scsi_done(sc); } -static enum task_attribute sas_scsi_get_task_attr(struct scsi_cmnd *cmd) -{ - enum task_attribute ta = TASK_ATTR_SIMPLE; - if (cmd->request && blk_rq_tagged(cmd->request)) { - if (cmd->device->ordered_tags && - (cmd->request->cmd_flags & REQ_HARDBARRIER)) - ta = TASK_ATTR_ORDERED; - } - return ta; -} - static struct sas_task *sas_create_task(struct scsi_cmnd *cmd, struct domain_device *dev, gfp_t gfp_flags) @@ -160,7 +149,7 @@ static struct sas_task *sas_create_task(struct scsi_cmnd *cmd, task->ssp_task.retry_count = 1; int_to_scsilun(cmd->device->lun, &lun); memcpy(task->ssp_task.LUN, &lun.scsi_lun, 8); - task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd); + task->ssp_task.task_attr = TASK_ATTR_SIMPLE; memcpy(task->ssp_task.cdb, cmd->cmnd, 16); task->scatter = scsi_sglist(cmd); diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index 17231385cb37..d6e7994aa634 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -97,13 +97,9 @@ static inline void scsi_deactivate_tcq(struct scsi_device *sdev, int depth) static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg) { struct request *req = cmd->request; - struct scsi_device *sdev = cmd->device; if (blk_rq_tagged(req)) { - if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER) - *msg++ = MSG_ORDERED_TAG; - else - *msg++ = MSG_SIMPLE_TAG; + *msg++ = MSG_SIMPLE_TAG; *msg++ = req->tag; return 2; } -- cgit v1.2.3 From dd831006d5be7f74c3fe7aef82380c51c3637960 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: misc cleanups in barrier code Make the following cleanups in preparation of barrier/flush update. * blk_do_ordered() declaration is moved from include/linux/blkdev.h to block/blk.h. * blk_do_ordered() now returns pointer to struct request, with %NULL meaning "try the next request" and ERR_PTR(-EAGAIN) "try again later". The third case will be dropped with further changes. * In the initialization of proxy barrier request, data direction is already set by init_request_from_bio(). Drop unnecessary explicit REQ_WRITE setting and move init_request_from_bio() above REQ_FUA flag setting. * add_request() is collapsed into __make_request(). These changes don't make any functional difference. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-barrier.c | 32 ++++++++++++++------------------ block/blk-core.c | 21 ++++----------------- block/blk.h | 7 +++++-- include/linux/blkdev.h | 1 - 4 files changed, 23 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index ed0aba5463ab..f1be85ba2bb5 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -110,9 +110,9 @@ static void queue_flush(struct request_queue *q, unsigned which) elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static inline bool start_ordered(struct request_queue *q, struct request **rqp) +static inline struct request *start_ordered(struct request_queue *q, + struct request *rq) { - struct request *rq = *rqp; unsigned skip = 0; q->orderr = 0; @@ -149,11 +149,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) /* initialize proxy request and queue it */ blk_rq_init(q, rq); - if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) - rq->cmd_flags |= REQ_WRITE; + init_request_from_bio(rq, q->orig_bar_rq->bio); if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; - init_request_from_bio(rq, q->orig_bar_rq->bio); rq->end_io = bar_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); @@ -171,27 +169,26 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) else skip |= QUEUE_ORDSEQ_DRAIN; - *rqp = rq; - /* * Complete skipped sequences. If whole sequence is complete, - * return false to tell elevator that this request is gone. + * return %NULL to tell elevator that this request is gone. */ - return !blk_ordered_complete_seq(q, skip, 0); + if (blk_ordered_complete_seq(q, skip, 0)) + rq = NULL; + return rq; } -bool blk_do_ordered(struct request_queue *q, struct request **rqp) +struct request *blk_do_ordered(struct request_queue *q, struct request *rq) { - struct request *rq = *rqp; const int is_barrier = rq->cmd_type == REQ_TYPE_FS && (rq->cmd_flags & REQ_HARDBARRIER); if (!q->ordseq) { if (!is_barrier) - return true; + return rq; if (q->next_ordered != QUEUE_ORDERED_NONE) - return start_ordered(q, rqp); + return start_ordered(q, rq); else { /* * Queue ordering not supported. Terminate @@ -199,8 +196,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) */ blk_dequeue_request(rq); __blk_end_request_all(rq, -EOPNOTSUPP); - *rqp = NULL; - return false; + return NULL; } } @@ -211,14 +207,14 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) /* Special requests are not subject to ordering rules. */ if (rq->cmd_type != REQ_TYPE_FS && rq != &q->pre_flush_rq && rq != &q->post_flush_rq) - return true; + return rq; /* Ordered by draining. Wait for turn. */ WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - *rqp = NULL; + rq = ERR_PTR(-EAGAIN); - return true; + return rq; } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk-core.c b/block/blk-core.c index f06354183b29..f8d37a8e2c55 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1037,22 +1037,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_insert_request); -/* - * add-request adds a request to the linked list. - * queue lock is held and interrupts disabled, as we muck with the - * request queue list. - */ -static inline void add_request(struct request_queue *q, struct request *req) -{ - drive_stat_acct(req, 1); - - /* - * elevator indicated where it wants this request to be - * inserted at elevator_merge time - */ - __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); -} - static void part_round_stats_single(int cpu, struct hd_struct *part, unsigned long now) { @@ -1316,7 +1300,10 @@ get_rq: req->cpu = blk_cpu_to_group(smp_processor_id()); if (queue_should_plug(q) && elv_queue_empty(q)) blk_plug_device(q); - add_request(q, req); + + /* insert the request into the elevator */ + drive_stat_acct(req, 1); + __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); out: if (unplug || !queue_should_plug(q)) __generic_unplug_device(q); diff --git a/block/blk.h b/block/blk.h index 6e7dc87141e4..874eb4ea8093 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,6 +51,8 @@ static inline void blk_clear_rq_complete(struct request *rq) */ #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) +struct request *blk_do_ordered(struct request_queue *q, struct request *rq); + static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; @@ -58,8 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - if (blk_do_ordered(q, &rq)) - return rq; + rq = blk_do_ordered(q, rq); + if (rq) + return !IS_ERR(rq) ? rq : NULL; } if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e97911d4dec3..996549d71923 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -869,7 +869,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); -- cgit v1.2.3 From 28e7d1845216538303bb95d679d8fd4de50e2f1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: drop barrier ordering by queue draining Filesystems will take all the responsibilities for ordering requests around commit writes and will only indicate how the commit writes themselves should be handled by block layers. This patch drops barrier ordering by queue draining from block layer. Ordering by draining implementation was somewhat invasive to request handling. List of notable changes follow. * Each queue has 1 bit color which is flipped on each barrier issue. This is used to track whether a given request is issued before the current barrier or not. REQ_ORDERED_COLOR flag and coloring implementation in __elv_add_request() are removed. * Requests which shouldn't be processed yet for draining were stalled by returning -EAGAIN from blk_do_ordered() according to the test result between blk_ordered_req_seq() and blk_blk_ordered_cur_seq(). This logic is removed. * Draining completion logic in elv_completed_request() removed. * All barrier sequence requests were queued to request queue and then trckled to lower layer according to progress and thus maintaining request orders during requeue was necessary. This is replaced by queueing the next request in the barrier sequence only after the current one is complete from blk_ordered_complete_seq(), which removes the need for multiple proxy requests in struct request_queue and the request sorting logic in the ELEVATOR_INSERT_REQUEUE path of elv_insert(). * As barriers no longer have ordering constraints, there's no need to dump the whole elevator onto the dispatch queue on each barrier. Insert barriers at the front instead. * If other barrier requests come to the front of the dispatch queue while one is already in progress, they are stored in q->pending_barriers and restored to dispatch queue one-by-one after each barrier completion from blk_ordered_complete_seq(). Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 220 ++++++++++++++++++---------------------------- block/blk-core.c | 11 ++- block/blk.h | 2 +- block/elevator.c | 79 ++--------------- include/linux/blk_types.h | 2 - include/linux/blkdev.h | 19 ++-- 6 files changed, 113 insertions(+), 220 deletions(-) (limited to 'include') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index f1be85ba2bb5..e8b2e5c091b1 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -9,6 +9,8 @@ #include "blk.h" +static struct request *queue_next_ordseq(struct request_queue *q); + /* * Cache flushing for ordered writes handling */ @@ -19,38 +21,10 @@ unsigned blk_ordered_cur_seq(struct request_queue *q) return 1 << ffz(q->ordseq); } -unsigned blk_ordered_req_seq(struct request *rq) -{ - struct request_queue *q = rq->q; - - BUG_ON(q->ordseq == 0); - - if (rq == &q->pre_flush_rq) - return QUEUE_ORDSEQ_PREFLUSH; - if (rq == &q->bar_rq) - return QUEUE_ORDSEQ_BAR; - if (rq == &q->post_flush_rq) - return QUEUE_ORDSEQ_POSTFLUSH; - - /* - * !fs requests don't need to follow barrier ordering. Always - * put them at the front. This fixes the following deadlock. - * - * http://thread.gmane.org/gmane.linux.kernel/537473 - */ - if (rq->cmd_type != REQ_TYPE_FS) - return QUEUE_ORDSEQ_DRAIN; - - if ((rq->cmd_flags & REQ_ORDERED_COLOR) == - (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) - return QUEUE_ORDSEQ_DRAIN; - else - return QUEUE_ORDSEQ_DONE; -} - -bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) +static struct request *blk_ordered_complete_seq(struct request_queue *q, + unsigned seq, int error) { - struct request *rq; + struct request *next_rq = NULL; if (error && !q->orderr) q->orderr = error; @@ -58,16 +32,22 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) BUG_ON(q->ordseq & seq); q->ordseq |= seq; - if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) - return false; - - /* - * Okay, sequence complete. - */ - q->ordseq = 0; - rq = q->orig_bar_rq; - __blk_end_request_all(rq, q->orderr); - return true; + if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) { + /* not complete yet, queue the next ordered sequence */ + next_rq = queue_next_ordseq(q); + } else { + /* complete this barrier request */ + __blk_end_request_all(q->orig_bar_rq, q->orderr); + q->orig_bar_rq = NULL; + q->ordseq = 0; + + /* dispatch the next barrier if there's one */ + if (!list_empty(&q->pending_barriers)) { + next_rq = list_entry_rq(q->pending_barriers.next); + list_move(&next_rq->queuelist, &q->queue_head); + } + } + return next_rq; } static void pre_flush_end_io(struct request *rq, int error) @@ -88,133 +68,105 @@ static void post_flush_end_io(struct request *rq, int error) blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); } -static void queue_flush(struct request_queue *q, unsigned which) +static void queue_flush(struct request_queue *q, struct request *rq, + rq_end_io_fn *end_io) { - struct request *rq; - rq_end_io_fn *end_io; - - if (which == QUEUE_ORDERED_DO_PREFLUSH) { - rq = &q->pre_flush_rq; - end_io = pre_flush_end_io; - } else { - rq = &q->post_flush_rq; - end_io = post_flush_end_io; - } - blk_rq_init(q, rq); rq->cmd_type = REQ_TYPE_FS; - rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; + rq->cmd_flags = REQ_FLUSH; rq->rq_disk = q->orig_bar_rq->rq_disk; rq->end_io = end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static inline struct request *start_ordered(struct request_queue *q, - struct request *rq) +static struct request *queue_next_ordseq(struct request_queue *q) { - unsigned skip = 0; - - q->orderr = 0; - q->ordered = q->next_ordered; - q->ordseq |= QUEUE_ORDSEQ_STARTED; - - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - - /* stash away the original request */ - blk_dequeue_request(rq); - q->orig_bar_rq = rq; - rq = NULL; - - /* - * Queue ordered sequence. As we stack them at the head, we - * need to queue in reverse order. Note that we rely on that - * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs - * request gets inbetween ordered sequence. - */ - if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { - queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); - rq = &q->post_flush_rq; - } else - skip |= QUEUE_ORDSEQ_POSTFLUSH; + struct request *rq = &q->bar_rq; - if (q->ordered & QUEUE_ORDERED_DO_BAR) { - rq = &q->bar_rq; + switch (blk_ordered_cur_seq(q)) { + case QUEUE_ORDSEQ_PREFLUSH: + queue_flush(q, rq, pre_flush_end_io); + break; + case QUEUE_ORDSEQ_BAR: /* initialize proxy request and queue it */ blk_rq_init(q, rq); init_request_from_bio(rq, q->orig_bar_rq->bio); + rq->cmd_flags &= ~REQ_HARDBARRIER; if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; rq->end_io = bar_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); - } else - skip |= QUEUE_ORDSEQ_BAR; + break; - if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { - queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); - rq = &q->pre_flush_rq; - } else - skip |= QUEUE_ORDSEQ_PREFLUSH; + case QUEUE_ORDSEQ_POSTFLUSH: + queue_flush(q, rq, post_flush_end_io); + break; - if (queue_in_flight(q)) - rq = NULL; - else - skip |= QUEUE_ORDSEQ_DRAIN; - - /* - * Complete skipped sequences. If whole sequence is complete, - * return %NULL to tell elevator that this request is gone. - */ - if (blk_ordered_complete_seq(q, skip, 0)) - rq = NULL; + default: + BUG(); + } return rq; } struct request *blk_do_ordered(struct request_queue *q, struct request *rq) { - const int is_barrier = rq->cmd_type == REQ_TYPE_FS && - (rq->cmd_flags & REQ_HARDBARRIER); - - if (!q->ordseq) { - if (!is_barrier) - return rq; - - if (q->next_ordered != QUEUE_ORDERED_NONE) - return start_ordered(q, rq); - else { - /* - * Queue ordering not supported. Terminate - * with prejudice. - */ - blk_dequeue_request(rq); - __blk_end_request_all(rq, -EOPNOTSUPP); - return NULL; - } + unsigned skip = 0; + + if (!(rq->cmd_flags & REQ_HARDBARRIER)) + return rq; + + if (q->ordseq) { + /* + * Barrier is already in progress and they can't be + * processed in parallel. Queue for later processing. + */ + list_move_tail(&rq->queuelist, &q->pending_barriers); + return NULL; + } + + if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { + /* + * Queue ordering not supported. Terminate + * with prejudice. + */ + blk_dequeue_request(rq); + __blk_end_request_all(rq, -EOPNOTSUPP); + return NULL; } /* - * Ordered sequence in progress + * Start a new ordered sequence */ + q->orderr = 0; + q->ordered = q->next_ordered; + q->ordseq |= QUEUE_ORDSEQ_STARTED; - /* Special requests are not subject to ordering rules. */ - if (rq->cmd_type != REQ_TYPE_FS && - rq != &q->pre_flush_rq && rq != &q->post_flush_rq) - return rq; + /* + * For an empty barrier, there's no actual BAR request, which + * in turn makes POSTFLUSH unnecessary. Mask them off. + */ + if (!blk_rq_sectors(rq)) + q->ordered &= ~(QUEUE_ORDERED_DO_BAR | + QUEUE_ORDERED_DO_POSTFLUSH); - /* Ordered by draining. Wait for turn. */ - WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); - if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - rq = ERR_PTR(-EAGAIN); + /* stash away the original request */ + blk_dequeue_request(rq); + q->orig_bar_rq = rq; - return rq; + if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) + skip |= QUEUE_ORDSEQ_PREFLUSH; + + if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) + skip |= QUEUE_ORDSEQ_BAR; + + if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) + skip |= QUEUE_ORDSEQ_POSTFLUSH; + + /* complete skipped sequences and return the first sequence */ + return blk_ordered_complete_seq(q, skip, 0); } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk-core.c b/block/blk-core.c index f8d37a8e2c55..d316662682c8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -520,6 +520,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) init_timer(&q->unplug_timer); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->timeout_list); + INIT_LIST_HEAD(&q->pending_barriers); INIT_WORK(&q->unplug_work, blk_unplug_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -1185,6 +1186,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) const bool sync = (bio->bi_rw & REQ_SYNC); const bool unplug = (bio->bi_rw & REQ_UNPLUG); const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; + int where = ELEVATOR_INSERT_SORT; int rw_flags; /* REQ_HARDBARRIER is no more */ @@ -1203,7 +1205,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) + if (bio->bi_rw & REQ_HARDBARRIER) { + where = ELEVATOR_INSERT_FRONT; + goto get_rq; + } + + if (elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); @@ -1303,7 +1310,7 @@ get_rq: /* insert the request into the elevator */ drive_stat_acct(req, 1); - __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); + __elv_add_request(q, req, where, 0); out: if (unplug || !queue_should_plug(q)) __generic_unplug_device(q); diff --git a/block/blk.h b/block/blk.h index 874eb4ea8093..08081e4b294e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -62,7 +62,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) rq = list_entry_rq(q->queue_head.next); rq = blk_do_ordered(q, rq); if (rq) - return !IS_ERR(rq) ? rq : NULL; + return rq; } if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) diff --git a/block/elevator.c b/block/elevator.c index ec585c9554d3..241c69c45c5f 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -617,8 +617,6 @@ void elv_quiesce_end(struct request_queue *q) void elv_insert(struct request_queue *q, struct request *rq, int where) { - struct list_head *pos; - unsigned ordseq; int unplug_it = 1; trace_block_rq_insert(q, rq); @@ -626,9 +624,16 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) rq->q = q; switch (where) { + case ELEVATOR_INSERT_REQUEUE: + /* + * Most requeues happen because of a busy condition, + * don't force unplug of the queue for that case. + * Clear unplug_it and fall through. + */ + unplug_it = 0; + case ELEVATOR_INSERT_FRONT: rq->cmd_flags |= REQ_SOFTBARRIER; - list_add(&rq->queuelist, &q->queue_head); break; @@ -668,36 +673,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) q->elevator->ops->elevator_add_req_fn(q, rq); break; - case ELEVATOR_INSERT_REQUEUE: - /* - * If ordered flush isn't in progress, we do front - * insertion; otherwise, requests should be requeued - * in ordseq order. - */ - rq->cmd_flags |= REQ_SOFTBARRIER; - - /* - * Most requeues happen because of a busy condition, - * don't force unplug of the queue for that case. - */ - unplug_it = 0; - - if (q->ordseq == 0) { - list_add(&rq->queuelist, &q->queue_head); - break; - } - - ordseq = blk_ordered_req_seq(rq); - - list_for_each(pos, &q->queue_head) { - struct request *pos_rq = list_entry_rq(pos); - if (ordseq <= blk_ordered_req_seq(pos_rq)) - break; - } - - list_add_tail(&rq->queuelist, pos); - break; - default: printk(KERN_ERR "%s: bad insertion point %d\n", __func__, where); @@ -716,26 +691,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) void __elv_add_request(struct request_queue *q, struct request *rq, int where, int plug) { - if (q->ordcolor) - rq->cmd_flags |= REQ_ORDERED_COLOR; - if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { - /* - * toggle ordered color - */ - if (rq->cmd_flags & REQ_HARDBARRIER) - q->ordcolor ^= 1; - - /* - * barriers implicitly indicate back insertion - */ - if (where == ELEVATOR_INSERT_SORT) - where = ELEVATOR_INSERT_BACK; - - /* - * this request is scheduling boundary, update - * end_sector - */ + /* barriers are scheduling boundary, update end_sector */ if (rq->cmd_type == REQ_TYPE_FS || (rq->cmd_flags & REQ_DISCARD)) { q->end_sector = rq_end_sector(rq); @@ -855,24 +812,6 @@ void elv_completed_request(struct request_queue *q, struct request *rq) e->ops->elevator_completed_req_fn) e->ops->elevator_completed_req_fn(q, rq); } - - /* - * Check if the queue is waiting for fs requests to be - * drained for flush sequence. - */ - if (unlikely(q->ordseq)) { - struct request *next = NULL; - - if (!list_empty(&q->queue_head)) - next = list_entry_rq(q->queue_head.next); - - if (!queue_in_flight(q) && - blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && - (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) { - blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); - __blk_run_queue(q); - } - } } #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ca83a97c9715..9192282b4259 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -143,7 +143,6 @@ enum rq_flag_bits { __REQ_FAILED, /* set if the request failed */ __REQ_QUIET, /* don't worry about errors */ __REQ_PREEMPT, /* set for "ide_preempt" requests */ - __REQ_ORDERED_COLOR, /* is before or after barrier */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ @@ -184,7 +183,6 @@ enum rq_flag_bits { #define REQ_FAILED (1 << __REQ_FAILED) #define REQ_QUIET (1 << __REQ_QUIET) #define REQ_PREEMPT (1 << __REQ_PREEMPT) -#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) #define REQ_ALLOCED (1 << __REQ_ALLOCED) #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 996549d71923..20a3710a481b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -360,9 +360,10 @@ struct request_queue unsigned int flush_flags; unsigned int ordered, next_ordered, ordseq; - int orderr, ordcolor; - struct request pre_flush_rq, bar_rq, post_flush_rq; + int orderr; + struct request bar_rq; struct request *orig_bar_rq; + struct list_head pending_barriers; struct mutex sysfs_lock; @@ -491,12 +492,11 @@ enum { /* * Ordered operation sequence */ - QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ - QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ - QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ - QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ - QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ - QUEUE_ORDSEQ_DONE = 0x20, + QUEUE_ORDSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_ORDSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_ORDSEQ_BAR = (1 << 2), /* barrier write in progress */ + QUEUE_ORDSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_ORDSEQ_DONE = (1 << 4), }; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) @@ -869,9 +869,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern unsigned blk_ordered_cur_seq(struct request_queue *); -extern unsigned blk_ordered_req_seq(struct request *); -extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); -- cgit v1.2.3 From dd4c133f387c48f526022860ad70354637a80f4c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: rename barrier/ordered to flush With ordering requirements dropped, barrier and ordered are misnomers. Now all block layer does is sequencing FLUSH and FUA. Rename them to flush. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 21 ++++++----- block/blk-flush.c | 98 +++++++++++++++++++++++++------------------------- block/blk.h | 4 +-- include/linux/blkdev.h | 24 ++++++------- 4 files changed, 72 insertions(+), 75 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index d316662682c8..8870ae40179d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -136,7 +136,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, { struct request_queue *q = rq->q; - if (&q->bar_rq != rq) { + if (&q->flush_rq != rq) { if (error) clear_bit(BIO_UPTODATE, &bio->bi_flags); else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) @@ -160,13 +160,12 @@ static void req_bio_endio(struct request *rq, struct bio *bio, if (bio->bi_size == 0) bio_endio(bio, error); } else { - /* - * Okay, this is the barrier request in progress, just - * record the error; + * Okay, this is the sequenced flush request in + * progress, just record the error; */ - if (error && !q->orderr) - q->orderr = error; + if (error && !q->flush_err) + q->flush_err = error; } } @@ -520,7 +519,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) init_timer(&q->unplug_timer); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->timeout_list); - INIT_LIST_HEAD(&q->pending_barriers); + INIT_LIST_HEAD(&q->pending_flushes); INIT_WORK(&q->unplug_work, blk_unplug_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -1764,11 +1763,11 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) static void blk_account_io_done(struct request *req) { /* - * Account IO completion. bar_rq isn't accounted as a normal - * IO on queueing nor completion. Accounting the containing - * request is enough. + * Account IO completion. flush_rq isn't accounted as a + * normal IO on queueing nor completion. Accounting the + * containing request is enough. */ - if (blk_do_io_stat(req) && req != &req->q->bar_rq) { + if (blk_do_io_stat(req) && req != &req->q->flush_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); struct hd_struct *part; diff --git a/block/blk-flush.c b/block/blk-flush.c index e8b2e5c091b1..dd873225da97 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -9,41 +9,38 @@ #include "blk.h" -static struct request *queue_next_ordseq(struct request_queue *q); +static struct request *queue_next_fseq(struct request_queue *q); -/* - * Cache flushing for ordered writes handling - */ -unsigned blk_ordered_cur_seq(struct request_queue *q) +unsigned blk_flush_cur_seq(struct request_queue *q) { - if (!q->ordseq) + if (!q->flush_seq) return 0; - return 1 << ffz(q->ordseq); + return 1 << ffz(q->flush_seq); } -static struct request *blk_ordered_complete_seq(struct request_queue *q, - unsigned seq, int error) +static struct request *blk_flush_complete_seq(struct request_queue *q, + unsigned seq, int error) { struct request *next_rq = NULL; - if (error && !q->orderr) - q->orderr = error; + if (error && !q->flush_err) + q->flush_err = error; - BUG_ON(q->ordseq & seq); - q->ordseq |= seq; + BUG_ON(q->flush_seq & seq); + q->flush_seq |= seq; - if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) { - /* not complete yet, queue the next ordered sequence */ - next_rq = queue_next_ordseq(q); + if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) { + /* not complete yet, queue the next flush sequence */ + next_rq = queue_next_fseq(q); } else { - /* complete this barrier request */ - __blk_end_request_all(q->orig_bar_rq, q->orderr); - q->orig_bar_rq = NULL; - q->ordseq = 0; - - /* dispatch the next barrier if there's one */ - if (!list_empty(&q->pending_barriers)) { - next_rq = list_entry_rq(q->pending_barriers.next); + /* complete this flush request */ + __blk_end_request_all(q->orig_flush_rq, q->flush_err); + q->orig_flush_rq = NULL; + q->flush_seq = 0; + + /* dispatch the next flush if there's one */ + if (!list_empty(&q->pending_flushes)) { + next_rq = list_entry_rq(q->pending_flushes.next); list_move(&next_rq->queuelist, &q->queue_head); } } @@ -53,19 +50,19 @@ static struct request *blk_ordered_complete_seq(struct request_queue *q, static void pre_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); + blk_flush_complete_seq(rq->q, QUEUE_FSEQ_PREFLUSH, error); } -static void bar_end_io(struct request *rq, int error) +static void flush_data_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); + blk_flush_complete_seq(rq->q, QUEUE_FSEQ_DATA, error); } static void post_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); + blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error); } static void queue_flush(struct request_queue *q, struct request *rq, @@ -74,34 +71,34 @@ static void queue_flush(struct request_queue *q, struct request *rq, blk_rq_init(q, rq); rq->cmd_type = REQ_TYPE_FS; rq->cmd_flags = REQ_FLUSH; - rq->rq_disk = q->orig_bar_rq->rq_disk; + rq->rq_disk = q->orig_flush_rq->rq_disk; rq->end_io = end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static struct request *queue_next_ordseq(struct request_queue *q) +static struct request *queue_next_fseq(struct request_queue *q) { - struct request *rq = &q->bar_rq; + struct request *rq = &q->flush_rq; - switch (blk_ordered_cur_seq(q)) { - case QUEUE_ORDSEQ_PREFLUSH: + switch (blk_flush_cur_seq(q)) { + case QUEUE_FSEQ_PREFLUSH: queue_flush(q, rq, pre_flush_end_io); break; - case QUEUE_ORDSEQ_BAR: + case QUEUE_FSEQ_DATA: /* initialize proxy request and queue it */ blk_rq_init(q, rq); - init_request_from_bio(rq, q->orig_bar_rq->bio); + init_request_from_bio(rq, q->orig_flush_rq->bio); rq->cmd_flags &= ~REQ_HARDBARRIER; if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; - rq->end_io = bar_end_io; + rq->end_io = flush_data_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); break; - case QUEUE_ORDSEQ_POSTFLUSH: + case QUEUE_FSEQ_POSTFLUSH: queue_flush(q, rq, post_flush_end_io); break; @@ -111,19 +108,20 @@ static struct request *queue_next_ordseq(struct request_queue *q) return rq; } -struct request *blk_do_ordered(struct request_queue *q, struct request *rq) +struct request *blk_do_flush(struct request_queue *q, struct request *rq) { unsigned skip = 0; if (!(rq->cmd_flags & REQ_HARDBARRIER)) return rq; - if (q->ordseq) { + if (q->flush_seq) { /* - * Barrier is already in progress and they can't be - * processed in parallel. Queue for later processing. + * Sequenced flush is already in progress and they + * can't be processed in parallel. Queue for later + * processing. */ - list_move_tail(&rq->queuelist, &q->pending_barriers); + list_move_tail(&rq->queuelist, &q->pending_flushes); return NULL; } @@ -138,11 +136,11 @@ struct request *blk_do_ordered(struct request_queue *q, struct request *rq) } /* - * Start a new ordered sequence + * Start a new flush sequence */ - q->orderr = 0; + q->flush_err = 0; q->ordered = q->next_ordered; - q->ordseq |= QUEUE_ORDSEQ_STARTED; + q->flush_seq |= QUEUE_FSEQ_STARTED; /* * For an empty barrier, there's no actual BAR request, which @@ -154,19 +152,19 @@ struct request *blk_do_ordered(struct request_queue *q, struct request *rq) /* stash away the original request */ blk_dequeue_request(rq); - q->orig_bar_rq = rq; + q->orig_flush_rq = rq; if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) - skip |= QUEUE_ORDSEQ_PREFLUSH; + skip |= QUEUE_FSEQ_PREFLUSH; if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) - skip |= QUEUE_ORDSEQ_BAR; + skip |= QUEUE_FSEQ_DATA; if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) - skip |= QUEUE_ORDSEQ_POSTFLUSH; + skip |= QUEUE_FSEQ_POSTFLUSH; /* complete skipped sequences and return the first sequence */ - return blk_ordered_complete_seq(q, skip, 0); + return blk_flush_complete_seq(q, skip, 0); } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk.h b/block/blk.h index 08081e4b294e..24b92bd78f37 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,7 +51,7 @@ static inline void blk_clear_rq_complete(struct request *rq) */ #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) -struct request *blk_do_ordered(struct request_queue *q, struct request *rq); +struct request *blk_do_flush(struct request_queue *q, struct request *rq); static inline struct request *__elv_next_request(struct request_queue *q) { @@ -60,7 +60,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - rq = blk_do_ordered(q, rq); + rq = blk_do_flush(q, rq); if (rq) return rq; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 20a3710a481b..1cd83ec077db 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,13 +357,13 @@ struct request_queue /* * for flush operations */ + unsigned int ordered, next_ordered; unsigned int flush_flags; - - unsigned int ordered, next_ordered, ordseq; - int orderr; - struct request bar_rq; - struct request *orig_bar_rq; - struct list_head pending_barriers; + unsigned int flush_seq; + int flush_err; + struct request flush_rq; + struct request *orig_flush_rq; + struct list_head pending_flushes; struct mutex sysfs_lock; @@ -490,13 +490,13 @@ enum { QUEUE_ORDERED_DO_FUA, /* - * Ordered operation sequence + * FLUSH/FUA sequences. */ - QUEUE_ORDSEQ_STARTED = (1 << 0), /* flushing in progress */ - QUEUE_ORDSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ - QUEUE_ORDSEQ_BAR = (1 << 2), /* barrier write in progress */ - QUEUE_ORDSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ - QUEUE_ORDSEQ_DONE = (1 << 4), + QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ + QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_FSEQ_DONE = (1 << 4), }; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) -- cgit v1.2.3 From 4fed947cb311e5aa51781d316cefca836352f6ce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: implement REQ_FLUSH/FUA based interface for FLUSH/FUA requests Now that the backend conversion is complete, export sequenced FLUSH/FUA capability through REQ_FLUSH/FUA flags. REQ_FLUSH means the device cache should be flushed before executing the request. REQ_FUA means that the data in the request should be on non-volatile media on completion. Block layer will choose the correct way of implementing the semantics and execute it. The request may be passed to the device directly if the device can handle it; otherwise, it will be sequenced using one or more proxy requests. Devices will never see REQ_FLUSH and/or FUA which it doesn't support. Also, unlike the original REQ_HARDBARRIER, REQ_FLUSH/FUA requests are never failed with -EOPNOTSUPP. If the underlying device doesn't support FLUSH/FUA, the block layer simply make those noop. IOW, it no longer distinguishes between writeback cache which doesn't support cache flush and writethrough/no cache. Devices which have WB cache w/o flush are very difficult to come by these days and there's nothing much we can do anyway, so it doesn't make sense to require everyone to implement -EOPNOTSUPP handling. This will simplify filesystems and block drivers as they can drop -EOPNOTSUPP retry logic for barriers. * QUEUE_ORDERED_* are removed and QUEUE_FSEQ_* are moved into blk-flush.c. * REQ_FLUSH w/o data can also be directly passed to drivers without sequencing but some drivers assume that zero length requests don't have rq->bio which isn't true for these requests requiring the use of proxy requests. * REQ_COMMON_MASK now includes REQ_FLUSH | REQ_FUA so that they are copied from bio to request. * WRITE_BARRIER is marked deprecated and WRITE_FLUSH, WRITE_FUA and WRITE_FLUSH_FUA are added. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-flush.c | 85 ++++++++++++++++++++++++--------------------- block/blk.h | 3 ++ include/linux/blk_types.h | 2 +- include/linux/blkdev.h | 38 ++------------------ include/linux/buffer_head.h | 2 +- include/linux/fs.h | 19 ++++++---- 7 files changed, 67 insertions(+), 84 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 8870ae40179d..18455c4f618a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1204,7 +1204,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (bio->bi_rw & REQ_HARDBARRIER) { + if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { where = ELEVATOR_INSERT_FRONT; goto get_rq; } diff --git a/block/blk-flush.c b/block/blk-flush.c index dd873225da97..452c552e9ead 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -1,5 +1,5 @@ /* - * Functions related to barrier IO handling + * Functions to sequence FLUSH and FUA writes. */ #include #include @@ -9,6 +9,15 @@ #include "blk.h" +/* FLUSH/FUA sequences */ +enum { + QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ + QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_FSEQ_DONE = (1 << 4), +}; + static struct request *queue_next_fseq(struct request_queue *q); unsigned blk_flush_cur_seq(struct request_queue *q) @@ -79,6 +88,7 @@ static void queue_flush(struct request_queue *q, struct request *rq, static struct request *queue_next_fseq(struct request_queue *q) { + struct request *orig_rq = q->orig_flush_rq; struct request *rq = &q->flush_rq; switch (blk_flush_cur_seq(q)) { @@ -87,12 +97,11 @@ static struct request *queue_next_fseq(struct request_queue *q) break; case QUEUE_FSEQ_DATA: - /* initialize proxy request and queue it */ + /* initialize proxy request, inherit FLUSH/FUA and queue it */ blk_rq_init(q, rq); - init_request_from_bio(rq, q->orig_flush_rq->bio); - rq->cmd_flags &= ~REQ_HARDBARRIER; - if (q->ordered & QUEUE_ORDERED_DO_FUA) - rq->cmd_flags |= REQ_FUA; + init_request_from_bio(rq, orig_rq->bio); + rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); + rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); rq->end_io = flush_data_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); @@ -110,60 +119,58 @@ static struct request *queue_next_fseq(struct request_queue *q) struct request *blk_do_flush(struct request_queue *q, struct request *rq) { + unsigned int fflags = q->flush_flags; /* may change, cache it */ + bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; + bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); + bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); unsigned skip = 0; - if (!(rq->cmd_flags & REQ_HARDBARRIER)) + /* + * Special case. If there's data but flush is not necessary, + * the request can be issued directly. + * + * Flush w/o data should be able to be issued directly too but + * currently some drivers assume that rq->bio contains + * non-zero data if it isn't NULL and empty FLUSH requests + * getting here usually have bio's without data. + */ + if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { + rq->cmd_flags &= ~REQ_FLUSH; + if (!has_fua) + rq->cmd_flags &= ~REQ_FUA; return rq; + } + /* + * Sequenced flushes can't be processed in parallel. If + * another one is already in progress, queue for later + * processing. + */ if (q->flush_seq) { - /* - * Sequenced flush is already in progress and they - * can't be processed in parallel. Queue for later - * processing. - */ list_move_tail(&rq->queuelist, &q->pending_flushes); return NULL; } - if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { - /* - * Queue ordering not supported. Terminate - * with prejudice. - */ - blk_dequeue_request(rq); - __blk_end_request_all(rq, -EOPNOTSUPP); - return NULL; - } - /* * Start a new flush sequence */ q->flush_err = 0; - q->ordered = q->next_ordered; q->flush_seq |= QUEUE_FSEQ_STARTED; - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - - /* stash away the original request */ + /* adjust FLUSH/FUA of the original request and stash it away */ + rq->cmd_flags &= ~REQ_FLUSH; + if (!has_fua) + rq->cmd_flags &= ~REQ_FUA; blk_dequeue_request(rq); q->orig_flush_rq = rq; - if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) + /* skip unneded sequences and return the first one */ + if (!do_preflush) skip |= QUEUE_FSEQ_PREFLUSH; - - if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) + if (!blk_rq_sectors(rq)) skip |= QUEUE_FSEQ_DATA; - - if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) + if (!do_postflush) skip |= QUEUE_FSEQ_POSTFLUSH; - - /* complete skipped sequences and return the first sequence */ return blk_flush_complete_seq(q, skip, 0); } diff --git a/block/blk.h b/block/blk.h index 24b92bd78f37..a09c18b19116 100644 --- a/block/blk.h +++ b/block/blk.h @@ -60,6 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); + if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || + rq == &q->flush_rq) + return rq; rq = blk_do_flush(q, rq); if (rq) return rq; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 9192282b4259..179799479e6f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -167,7 +167,7 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ - REQ_META| REQ_DISCARD | REQ_NOIDLE) + REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1cd83ec077db..8ef705f800ab 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,7 +357,6 @@ struct request_queue /* * for flush operations */ - unsigned int ordered, next_ordered; unsigned int flush_flags; unsigned int flush_seq; int flush_err; @@ -465,40 +464,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) __clear_bit(flag, &q->queue_flags); } -enum { - /* - * Hardbarrier is supported with one of the following methods. - * - * NONE : hardbarrier unsupported - * DRAIN : ordering by draining is enough - * DRAIN_FLUSH : ordering by draining w/ pre and post flushes - * DRAIN_FUA : ordering by draining w/ pre flush and FUA write - */ - QUEUE_ORDERED_DO_PREFLUSH = 0x10, - QUEUE_ORDERED_DO_BAR = 0x20, - QUEUE_ORDERED_DO_POSTFLUSH = 0x40, - QUEUE_ORDERED_DO_FUA = 0x80, - - QUEUE_ORDERED_NONE = 0x00, - - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - - /* - * FLUSH/FUA sequences. - */ - QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ - QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ - QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ - QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ - QUEUE_FSEQ_DONE = (1 << 4), -}; - #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) @@ -578,7 +543,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) * it already be started by driver. */ #define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) + (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ + REQ_FLUSH | REQ_FUA) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ (((rq)->cmd_flags & REQ_DISCARD) || \ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index ec94c12f21da..fc999f583fda 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,7 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Eopnotsupp, /* operation not supported (barrier) */ + BH_Eopnotsupp, /* DEPRECATED: operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..352c48627381 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -135,12 +135,13 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all - * previously submitted writes must be safely on storage - * before this one is started. Also guarantees that when - * this write is complete, it itself is also safely on - * storage. Prevents reordering of writes on both sides - * of this IO. + * WRITE_BARRIER DEPRECATED. Always fails. Use FLUSH/FUA instead. + * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. + * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on + * non-volatile media on completion. + * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded + * by a cache flush and data is guaranteed to be on + * non-volatile media on completion. * */ #define RW_MASK REQ_WRITE @@ -158,6 +159,12 @@ struct inodes_stat_t { #define WRITE_META (WRITE | REQ_META) #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_HARDBARRIER) +#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH) +#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FUA) +#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH | REQ_FUA) /* * These aren't really reads or writes, they pass down information about -- cgit v1.2.3 From 3a2edd0d6ddbd5fa3b389ea6db811285415ce6c8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:18 +0200 Subject: block: make __blk_rq_prep_clone() copy most command flags Currently __blk_rq_prep_clone() copies only REQ_WRITE and REQ_DISCARD. There's no reason to omit other command flags and REQ_FUA needs to be copied to implement FUA support in request-based dm. REQ_COMMON_MASK which specifies flags to be copied from bio to request already identifies all the command flags. Define REQ_CLONE_MASK to be the same as REQ_COMMON_MASK for clarity and make __blk_rq_prep_clone() copy all flags in the mask. Signed-off-by: Tejun Heo Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 4 +--- include/linux/blk_types.h | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 495bdc4a23da..2a5b19204546 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2505,9 +2505,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); static void __blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE); - if (src->cmd_flags & REQ_DISCARD) - dst->cmd_flags |= REQ_DISCARD; + dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 179799479e6f..36edadf5b41a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -168,6 +168,7 @@ enum rq_flag_bits { #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) -- cgit v1.2.3 From 2cf6d26a354ab6362e301b5a323832b02867df47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:10 -0400 Subject: block: pass gfp_mask and flags to sb_issue_discard We'll need to get rid of the BLKDEV_IFL_BARRIER flag, and to facilitate that and to make the interface less confusing pass all flags explicitly. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/ext4/mballoc.c | 3 ++- fs/fat/fatent.c | 4 +++- include/linux/blkdev.h | 11 +++++------ 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4b4ad4b7ce57..df44b345f662 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2566,7 +2566,8 @@ static inline void ext4_issue_discard(struct super_block *sb, discard_block = block + ext4_group_first_block_no(sb, block_group); trace_ext4_discard_blocks(sb, (unsigned long long) discard_block, count); - ret = sb_issue_discard(sb, discard_block, count); + ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, + BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); if (ret == EOPNOTSUPP) { ext4_warning(sb, "discard not supported, disabling"); clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 81184d3b75a3..3a56a82f5658 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -577,7 +577,9 @@ int fat_free_clusters(struct inode *inode, int cluster) sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), - nr_clus * sbi->sec_per_clus); + nr_clus * sbi->sec_per_clus, + GFP_NOFS, + BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); first_cl = cluster; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8ef705f800ab..6b305eb4a343 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -881,13 +881,12 @@ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); -static inline int sb_issue_discard(struct super_block *sb, - sector_t block, sector_t nr_blocks) +static inline int sb_issue_discard(struct super_block *sb, sector_t block, + sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { - block <<= (sb->s_blocksize_bits - 9); - nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, - BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); + return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), + nr_blocks << (sb->s_blocksize_bits - 9), + gfp_mask, flags); } extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); -- cgit v1.2.3 From 31725e65c7214b52b607eba705fc4f306be4d5a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:21 -0400 Subject: block: remove the WRITE_BARRIER flag It's unused now. Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 352c48627381..d6add69bc170 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -135,7 +135,6 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * WRITE_BARRIER DEPRECATED. Always fails. Use FLUSH/FUA instead. * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on * non-volatile media on completion. @@ -157,8 +156,6 @@ struct inodes_stat_t { #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) #define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) #define WRITE_META (WRITE | REQ_META) -#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ - REQ_HARDBARRIER) #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_FLUSH) #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ -- cgit v1.2.3 From 8c5553678237b7121355108e03c36086037d8975 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:22 -0400 Subject: block: remove the BLKDEV_IFL_BARRIER flag Remove support for barriers on discards, which is unused now. Also remove the DISCARD_NOBARRIER I/O type in favour of just setting the rw flags up locally in blkdev_issue_discard. tj: Also remove DISCARD_SECURE and use REQ_SECURE directly. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-lib.c | 18 ++---------------- include/linux/blkdev.h | 2 -- include/linux/fs.h | 8 -------- 3 files changed, 2 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/block/blk-lib.c b/block/blk-lib.c index c392029a104e..fe2e6ed0f510 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, { DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q = bdev_get_queue(bdev); - int type = flags & BLKDEV_IFL_BARRIER ? - DISCARD_BARRIER : DISCARD_NOBARRIER; + int type = REQ_WRITE | REQ_DISCARD; unsigned int max_discard_sectors; struct bio *bio; int ret = 0; @@ -65,7 +64,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (flags & BLKDEV_IFL_SECURE) { if (!blk_queue_secdiscard(q)) return -EOPNOTSUPP; - type |= DISCARD_SECURE; + type |= REQ_SECURE; } while (nr_sects && !ret) { @@ -162,12 +161,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, bb.wait = &wait; bb.end_io = NULL; - if (flags & BLKDEV_IFL_BARRIER) { - /* issue async barrier before the data */ - ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0); - if (ret) - return ret; - } submit: ret = 0; while (nr_sects != 0) { @@ -199,13 +192,6 @@ submit: issued++; submit_bio(WRITE, bio); } - /* - * When all data bios are in flight. Send final barrier if requeted. - */ - if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER) - ret = blkdev_issue_flush(bdev, gfp_mask, NULL, - flags & BLKDEV_IFL_WAIT); - if (flags & BLKDEV_IFL_WAIT) /* Wait for bios in-flight */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6b305eb4a343..cfcb3a610605 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -869,11 +869,9 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } enum{ BLKDEV_WAIT, /* wait for completion */ - BLKDEV_BARRIER, /* issue request with barrier */ BLKDEV_SECURE, /* secure discard */ }; #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) -#define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) #define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, unsigned long); diff --git a/include/linux/fs.h b/include/linux/fs.h index d6add69bc170..6b0f6e9993a3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -163,14 +163,6 @@ struct inodes_stat_t { #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_FLUSH | REQ_FUA) -/* - * These aren't really reads or writes, they pass down information about - * parts of device that are now unused by the file system. - */ -#define DISCARD_NOBARRIER (WRITE | REQ_DISCARD) -#define DISCARD_BARRIER (WRITE | REQ_DISCARD | REQ_HARDBARRIER) -#define DISCARD_SECURE (DISCARD_NOBARRIER | REQ_SECURE) - #define SEL_IN 1 #define SEL_OUT 2 #define SEL_EX 4 -- cgit v1.2.3 From 0edd55faea7c8081bc826234b917501738a6218f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:23 -0400 Subject: block: remove the BH_Eopnotsupp flag This flag was only set for barrier buffers, which we don't submit anymore. Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/buffer.c | 7 +------ fs/fat/misc.c | 5 +---- include/linux/buffer_head.h | 2 -- 3 files changed, 2 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/fs/buffer.c b/fs/buffer.c index 3e7dca279d1c..7f0b9b083f77 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -156,7 +156,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) if (uptodate) { set_buffer_uptodate(bh); } else { - if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) { + if (!quiet_error(bh)) { buffer_io_error(bh); printk(KERN_WARNING "lost page write due to " "I/O error on %s\n", @@ -2891,7 +2891,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) if (err == -EOPNOTSUPP) { set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - set_bit(BH_Eopnotsupp, &bh->b_state); } if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) @@ -3031,10 +3030,6 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw) bh->b_end_io = end_buffer_write_sync; ret = submit_bh(rw, bh); wait_on_buffer(bh); - if (buffer_eopnotsupp(bh)) { - clear_buffer_eopnotsupp(bh); - ret = -EOPNOTSUPP; - } if (!ret && !buffer_uptodate(bh)) ret = -EIO; } else { diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1736f2356388..970e682ea754 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -255,10 +255,7 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); - if (buffer_eopnotsupp(bhs[i])) { - clear_buffer_eopnotsupp(bhs[i]); - err = -EOPNOTSUPP; - } else if (!err && !buffer_uptodate(bhs[i])) + if (!err && !buffer_uptodate(bhs[i])) err = -EIO; } return err; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index fc999f583fda..dd1b25b2641c 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Eopnotsupp, /* DEPRECATED: operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ @@ -124,7 +123,6 @@ BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) -BUFFER_FNS(Eopnotsupp, eopnotsupp) BUFFER_FNS(Unwritten, unwritten) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) -- cgit v1.2.3 From 30ca22c70e3ef0a96ff84de69cd7e8561b416cb2 Mon Sep 17 00:00:00 2001 From: "Patrick J. LoPresti" Date: Thu, 22 Jul 2010 15:03:41 -0700 Subject: ext3/ext4: Factor out disk addressability check As part of adding support for OCFS2 to mount huge volumes, we need to check that the sector_t and page cache of the system are capable of addressing the entire volume. An identical check already appears in ext3 and ext4. This patch moves the addressability check into its own function in fs/libfs.c and modifies ext3 and ext4 to invoke it. [Edited to -EINVAL instead of BUG_ON() for bad blocksize_bits -- Joel] Signed-off-by: Patrick LoPresti Cc: linux-ext4@vger.kernel.org Acked-by: Andreas Dilger Signed-off-by: Joel Becker --- fs/ext3/super.c | 4 ++-- fs/ext4/super.c | 8 +++----- fs/libfs.c | 29 +++++++++++++++++++++++++++++ include/linux/fs.h | 2 ++ 4 files changed, 36 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 5dbf4dba03c4..a367dd044280 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1849,8 +1849,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount; } - if (le32_to_cpu(es->s_blocks_count) > - (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { + if (generic_check_addressable(sb->s_blocksize_bits, + le32_to_cpu(es->s_blocks_count))) { ext3_msg(sb, KERN_ERR, "error: filesystem is too large to mount safely"); if (sizeof(sector_t) < 8) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 26147746c272..7f47c366bf15 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2831,15 +2831,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * Test whether we have more sectors than will fit in sector_t, * and whether the max offset is addressable by the page cache. */ - if ((ext4_blocks_count(es) > - (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || - (ext4_blocks_count(es) > - (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { + ret = generic_check_addressable(sb->s_blocksize_bits, + ext4_blocks_count(es)); + if (ret) { ext4_msg(sb, KERN_ERR, "filesystem" " too large to mount safely on this system"); if (sizeof(sector_t) < 8) ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); - ret = -EFBIG; goto failed_mount; } diff --git a/fs/libfs.c b/fs/libfs.c index 0a9da95317f7..8debe7b33769 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -913,6 +913,35 @@ int generic_file_fsync(struct file *file, int datasync) } EXPORT_SYMBOL(generic_file_fsync); +/** + * generic_check_addressable - Check addressability of file system + * @blocksize_bits: log of file system block size + * @num_blocks: number of blocks in file system + * + * Determine whether a file system with @num_blocks blocks (and a + * block size of 2**@blocksize_bits) is addressable by the sector_t + * and page cache of the system. Return 0 if so and -EFBIG otherwise. + */ +int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) +{ + u64 last_fs_block = num_blocks - 1; + + if (unlikely(num_blocks == 0)) + return 0; + + if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT)) + return -EINVAL; + + if ((last_fs_block > + (sector_t)(~0ULL) >> (blocksize_bits - 9)) || + (last_fs_block > + (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - blocksize_bits))) { + return -EFBIG; + } + return 0; +} +EXPORT_SYMBOL(generic_check_addressable); + /* * No-op implementation of ->fsync for in-memory filesystems. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..1a759f40ab9e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2374,6 +2374,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, extern int generic_file_fsync(struct file *, int); +extern int generic_check_addressable(unsigned, u64); + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *); -- cgit v1.2.3 From c8bf1336824ebd698d37b71763e1c43190f2229a Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 10 Sep 2010 20:07:38 +0200 Subject: Consolidate min_not_zero We have several users of min_not_zero, each of them using their own definition. Move the define to kernel.h. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 5 ----- drivers/block/drbd/drbd_receiver.c | 1 - drivers/md/dm-snap.c | 2 -- drivers/md/dm-table.c | 5 ----- include/linux/kernel.h | 10 ++++++++++ 5 files changed, 10 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index a234f4bf1d6f..8d592b559bd3 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -455,11 +455,6 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt) } EXPORT_SYMBOL(blk_queue_io_opt); -/* - * Returns the minimum that is _not_ zero, unless both are zero. - */ -#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) - /** * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers * @t: the stacking driver (top) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 081522d3c742..484ecbb6b772 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2972,7 +2972,6 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) * we still need to figure out whether we accept that. */ mdev->p_size = p_size; -#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) if (get_ldev(mdev)) { warn_if_differ_considerably(mdev, "lower level device sizes", p_size, drbd_get_max_capacity(mdev->ldev)); diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 5974d3094d97..f30f6e8d594e 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -706,8 +706,6 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) return 0; } -#define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r))) - /* * Return a minimum chunk size of all snapshots that have the specified origin. * Return zero if the origin has no snapshots. diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f9fc07d7a4b9..90267f8d64ee 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -486,11 +486,6 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, return 0; } -/* - * Returns the minimum that is _not_ zero, unless both are zero. - */ -#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) - int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b0a35e6bc69..f5df2f4acb0d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -640,6 +640,16 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } (void) (&_max1 == &_max2); \ _max1 > _max2 ? _max1 : _max2; }) +/** + * min_not_zero - return the minimum that is _not_ zero, unless both are zero + * @x: value1 + * @y: value2 + */ +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + /** * clamp - return a value clamped to a given range with strict typechecking * @val: current value -- cgit v1.2.3 From 13f05c8d8e98bbdce89158bfdb2e380940695a88 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 10 Sep 2010 20:50:10 +0200 Subject: block/scsi: Provide a limit on the number of integrity segments Some controllers have a hardware limit on the number of protection information scatter-gather list segments they can handle. Introduce a max_integrity_segments limit in the block layer and provide a new scsi_host_template setting that allows HBA drivers to provide a value suitable for the hardware. Add support for honoring the integrity segment limit when merging both bios and requests. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-integrity.c | 93 ++++++++++++++++++++++++++++++++++++----------- block/blk-merge.c | 23 +++++++----- block/blk-settings.c | 3 ++ block/blk-sysfs.c | 11 ++++++ block/blk.h | 8 ---- drivers/scsi/hosts.c | 1 + drivers/scsi/scsi_lib.c | 26 +++++++++---- drivers/scsi/scsi_sysfs.c | 2 + include/linux/bio.h | 4 ++ include/linux/blkdev.h | 33 +++++++++++++++-- include/scsi/scsi.h | 6 +++ include/scsi/scsi_host.h | 7 ++++ 12 files changed, 167 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index edce1ef7933d..885cbb59967e 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -32,24 +32,37 @@ static struct kmem_cache *integrity_cachep; /** * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements - * @rq: request with integrity metadata attached + * @q: request queue + * @bio: bio with integrity metadata attached * * Description: Returns the number of elements required in a - * scatterlist corresponding to the integrity metadata in a request. + * scatterlist corresponding to the integrity metadata in a bio. */ -int blk_rq_count_integrity_sg(struct request *rq) +int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio) { - struct bio_vec *iv, *ivprv; - struct req_iterator iter; - unsigned int segments; + struct bio_vec *iv, *ivprv = NULL; + unsigned int segments = 0; + unsigned int seg_size = 0; + unsigned int i = 0; - ivprv = NULL; - segments = 0; + bio_for_each_integrity_vec(iv, bio, i) { - rq_for_each_integrity_segment(iv, rq, iter) { + if (ivprv) { + if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) + goto new_segment; + + if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) + goto new_segment; + + if (seg_size + iv->bv_len > queue_max_segment_size(q)) + goto new_segment; - if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv)) + seg_size += iv->bv_len; + } else { +new_segment: segments++; + seg_size = iv->bv_len; + } ivprv = iv; } @@ -60,30 +73,34 @@ EXPORT_SYMBOL(blk_rq_count_integrity_sg); /** * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist - * @rq: request with integrity metadata attached + * @q: request queue + * @bio: bio with integrity metadata attached * @sglist: target scatterlist * * Description: Map the integrity vectors in request into a * scatterlist. The scatterlist must be big enough to hold all * elements. I.e. sized using blk_rq_count_integrity_sg(). */ -int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist) +int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio, + struct scatterlist *sglist) { - struct bio_vec *iv, *ivprv; - struct req_iterator iter; - struct scatterlist *sg; - unsigned int segments; + struct bio_vec *iv, *ivprv = NULL; + struct scatterlist *sg = NULL; + unsigned int segments = 0; + unsigned int i = 0; - ivprv = NULL; - sg = NULL; - segments = 0; - - rq_for_each_integrity_segment(iv, rq, iter) { + bio_for_each_integrity_vec(iv, bio, i) { if (ivprv) { if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) goto new_segment; + if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) + goto new_segment; + + if (sg->length + iv->bv_len > queue_max_segment_size(q)) + goto new_segment; + sg->length += iv->bv_len; } else { new_segment: @@ -162,6 +179,40 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) } EXPORT_SYMBOL(blk_integrity_compare); +int blk_integrity_merge_rq(struct request_queue *q, struct request *req, + struct request *next) +{ + if (blk_integrity_rq(req) != blk_integrity_rq(next)) + return -1; + + if (req->nr_integrity_segments + next->nr_integrity_segments > + q->limits.max_integrity_segments) + return -1; + + return 0; +} +EXPORT_SYMBOL(blk_integrity_merge_rq); + +int blk_integrity_merge_bio(struct request_queue *q, struct request *req, + struct bio *bio) +{ + int nr_integrity_segs; + struct bio *next = bio->bi_next; + + bio->bi_next = NULL; + nr_integrity_segs = blk_rq_count_integrity_sg(q, bio); + bio->bi_next = next; + + if (req->nr_integrity_segments + nr_integrity_segs > + q->limits.max_integrity_segments) + return -1; + + req->nr_integrity_segments += nr_integrity_segs; + + return 0; +} +EXPORT_SYMBOL(blk_integrity_merge_bio); + struct integrity_sysfs_entry { struct attribute attr; ssize_t (*show)(struct blk_integrity *, char *); diff --git a/block/blk-merge.c b/block/blk-merge.c index 3b0cd4249671..6a725461654d 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -205,12 +205,11 @@ static inline int ll_new_hw_segment(struct request_queue *q, { int nr_phys_segs = bio_phys_segments(q, bio); - if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) { - req->cmd_flags |= REQ_NOMERGE; - if (req == q->last_merge) - q->last_merge = NULL; - return 0; - } + if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) + goto no_merge; + + if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio)) + goto no_merge; /* * This will form the start of a new hw segment. Bump both @@ -218,6 +217,12 @@ static inline int ll_new_hw_segment(struct request_queue *q, */ req->nr_phys_segments += nr_phys_segs; return 1; + +no_merge: + req->cmd_flags |= REQ_NOMERGE; + if (req == q->last_merge) + q->last_merge = NULL; + return 0; } int ll_back_merge_fn(struct request_queue *q, struct request *req, @@ -301,6 +306,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (total_phys_segments > queue_max_segments(q)) return 0; + if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next)) + return 0; + /* Merge is OK... */ req->nr_phys_segments = total_phys_segments; return 1; @@ -372,9 +380,6 @@ static int attempt_merge(struct request_queue *q, struct request *req, || next->special) return 0; - if (blk_integrity_rq(req) != blk_integrity_rq(next)) - return 0; - /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn diff --git a/block/blk-settings.c b/block/blk-settings.c index 8d592b559bd3..f8f2ddf20613 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -111,6 +111,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy); void blk_set_default_limits(struct queue_limits *lim) { lim->max_segments = BLK_MAX_SEGMENTS; + lim->max_integrity_segments = 0; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; lim->max_sectors = BLK_DEF_MAX_SECTORS; @@ -509,6 +510,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->seg_boundary_mask); t->max_segments = min_not_zero(t->max_segments, b->max_segments); + t->max_integrity_segments = min_not_zero(t->max_integrity_segments, + b->max_integrity_segments); t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 001ab18078f5..b014f7739e98 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -112,6 +112,11 @@ static ssize_t queue_max_segments_show(struct request_queue *q, char *page) return queue_var_show(queue_max_segments(q), (page)); } +static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page) +{ + return queue_var_show(q->limits.max_integrity_segments, (page)); +} + static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) { if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) @@ -288,6 +293,11 @@ static struct queue_sysfs_entry queue_max_segments_entry = { .show = queue_max_segments_show, }; +static struct queue_sysfs_entry queue_max_integrity_segments_entry = { + .attr = {.name = "max_integrity_segments", .mode = S_IRUGO }, + .show = queue_max_integrity_segments_show, +}; + static struct queue_sysfs_entry queue_max_segment_size_entry = { .attr = {.name = "max_segment_size", .mode = S_IRUGO }, .show = queue_max_segment_size_show, @@ -375,6 +385,7 @@ static struct attribute *default_attrs[] = { &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, &queue_max_segments_entry.attr, + &queue_max_integrity_segments_entry.attr, &queue_max_segment_size_entry.attr, &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, diff --git a/block/blk.h b/block/blk.h index 6e7dc87141e4..6738831ba447 100644 --- a/block/blk.h +++ b/block/blk.h @@ -132,14 +132,6 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) return q->nr_congestion_off; } -#if defined(CONFIG_BLK_DEV_INTEGRITY) - -#define rq_for_each_integrity_segment(bvl, _rq, _iter) \ - __rq_for_each_bio(_iter.bio, _rq) \ - bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i) - -#endif /* BLK_DEV_INTEGRITY */ - static inline int blk_cpu_to_group(int cpu) { #ifdef CONFIG_SCHED_MC diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 8a8f803439e1..10478153641b 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -376,6 +376,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->this_id = sht->this_id; shost->can_queue = sht->can_queue; shost->sg_tablesize = sht->sg_tablesize; + shost->sg_prot_tablesize = sht->sg_prot_tablesize; shost->cmd_per_lun = sht->cmd_per_lun; shost->unchecked_isa_dma = sht->unchecked_isa_dma; shost->use_clustering = sht->use_clustering; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9ade720422c6..861c0b937ac9 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -968,11 +968,13 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb, */ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask) { - int error = scsi_init_sgtable(cmd->request, &cmd->sdb, gfp_mask); + struct request *rq = cmd->request; + + int error = scsi_init_sgtable(rq, &cmd->sdb, gfp_mask); if (error) goto err_exit; - if (blk_bidi_rq(cmd->request)) { + if (blk_bidi_rq(rq)) { struct scsi_data_buffer *bidi_sdb = kmem_cache_zalloc( scsi_sdb_cache, GFP_ATOMIC); if (!bidi_sdb) { @@ -980,28 +982,28 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask) goto err_exit; } - cmd->request->next_rq->special = bidi_sdb; - error = scsi_init_sgtable(cmd->request->next_rq, bidi_sdb, - GFP_ATOMIC); + rq->next_rq->special = bidi_sdb; + error = scsi_init_sgtable(rq->next_rq, bidi_sdb, GFP_ATOMIC); if (error) goto err_exit; } - if (blk_integrity_rq(cmd->request)) { + if (blk_integrity_rq(rq)) { struct scsi_data_buffer *prot_sdb = cmd->prot_sdb; int ivecs, count; BUG_ON(prot_sdb == NULL); - ivecs = blk_rq_count_integrity_sg(cmd->request); + ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio); if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) { error = BLKPREP_DEFER; goto err_exit; } - count = blk_rq_map_integrity_sg(cmd->request, + count = blk_rq_map_integrity_sg(rq->q, rq->bio, prot_sdb->table.sgl); BUG_ON(unlikely(count > ivecs)); + BUG_ON(unlikely(count > queue_max_integrity_segments(rq->q))); cmd->prot_sdb = prot_sdb; cmd->prot_sdb->table.nents = count; @@ -1625,6 +1627,14 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize, SCSI_MAX_SG_CHAIN_SEGMENTS)); + if (scsi_host_prot_dma(shost)) { + shost->sg_prot_tablesize = + min_not_zero(shost->sg_prot_tablesize, + (unsigned short)SCSI_MAX_PROT_SG_SEGMENTS); + BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize); + blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize); + } + blk_queue_max_hw_sectors(q, shost->max_sectors); blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); blk_queue_segment_boundary(q, shost->dma_boundary); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index c3f67373a4f8..20ad59dff730 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -251,6 +251,7 @@ shost_rd_attr(host_busy, "%hu\n"); shost_rd_attr(cmd_per_lun, "%hd\n"); shost_rd_attr(can_queue, "%hd\n"); shost_rd_attr(sg_tablesize, "%hu\n"); +shost_rd_attr(sg_prot_tablesize, "%hu\n"); shost_rd_attr(unchecked_isa_dma, "%d\n"); shost_rd_attr(prot_capabilities, "%u\n"); shost_rd_attr(prot_guard_type, "%hd\n"); @@ -262,6 +263,7 @@ static struct attribute *scsi_sysfs_shost_attrs[] = { &dev_attr_cmd_per_lun.attr, &dev_attr_can_queue.attr, &dev_attr_sg_tablesize.attr, + &dev_attr_sg_prot_tablesize.attr, &dev_attr_unchecked_isa_dma.attr, &dev_attr_proc_name.attr, &dev_attr_scan.attr, diff --git a/include/linux/bio.h b/include/linux/bio.h index 5274103434ad..2c3fd7421607 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -496,6 +496,10 @@ static inline struct bio *bio_list_get(struct bio_list *bl) #define bip_for_each_vec(bvl, bip, i) \ __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) +#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ + for_each_bio(_bio) \ + bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) + #define bio_integrity(bio) (bio->bi_integrity != NULL) extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c54906f678f..7e661106270a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -124,6 +124,9 @@ struct request { * physical address coalescing is performed. */ unsigned short nr_phys_segments; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + unsigned short nr_integrity_segments; +#endif unsigned short ioprio; @@ -243,6 +246,7 @@ struct queue_limits { unsigned short logical_block_size; unsigned short max_segments; + unsigned short max_integrity_segments; unsigned char misaligned; unsigned char discard_misaligned; @@ -1213,8 +1217,13 @@ struct blk_integrity { extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); extern int blk_integrity_compare(struct gendisk *, struct gendisk *); -extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); -extern int blk_rq_count_integrity_sg(struct request *); +extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, + struct scatterlist *); +extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); +extern int blk_integrity_merge_rq(struct request_queue *, struct request *, + struct request *); +extern int blk_integrity_merge_bio(struct request_queue *, struct request *, + struct bio *); static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) @@ -1235,16 +1244,32 @@ static inline int blk_integrity_rq(struct request *rq) return bio_integrity(rq->bio); } +static inline void blk_queue_max_integrity_segments(struct request_queue *q, + unsigned int segs) +{ + q->limits.max_integrity_segments = segs; +} + +static inline unsigned short +queue_max_integrity_segments(struct request_queue *q) +{ + return q->limits.max_integrity_segments; +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ #define blk_integrity_rq(rq) (0) -#define blk_rq_count_integrity_sg(a) (0) -#define blk_rq_map_integrity_sg(a, b) (0) +#define blk_rq_count_integrity_sg(a, b) (0) +#define blk_rq_map_integrity_sg(a, b, c) (0) #define bdev_get_integrity(a) (0) #define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); +#define blk_queue_max_integrity_segments(a, b) do { } while (0); +#define queue_max_integrity_segments(a) (0) +#define blk_integrity_merge_rq(a, b, c) (0) +#define blk_integrity_merge_bio(a, b, c) (0) #endif /* CONFIG_BLK_DEV_INTEGRITY */ diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 8fcb6e0e9e72..d63533a4a59e 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -31,6 +31,12 @@ struct scsi_cmnd; #define SCSI_MAX_SG_CHAIN_SEGMENTS SCSI_MAX_SG_SEGMENTS #endif +/* + * DIX-capable adapters effectively support infinite chaining for the + * protection information scatterlist + */ +#define SCSI_MAX_PROT_SG_SEGMENTS 0xFFFF + /* * Special value for scanning to specify scanning or rescanning of all * possible channels, (target) ids, or luns on a given shost. diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index b7bdecb7b76e..d0a6a845f204 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -388,6 +388,7 @@ struct scsi_host_template { * of scatter-gather. */ unsigned short sg_tablesize; + unsigned short sg_prot_tablesize; /* * Set this if the host adapter has limitations beside segment count. @@ -599,6 +600,7 @@ struct Scsi_Host { int can_queue; short cmd_per_lun; short unsigned int sg_tablesize; + short unsigned int sg_prot_tablesize; short unsigned int max_sectors; unsigned long dma_boundary; /* @@ -823,6 +825,11 @@ static inline unsigned int scsi_host_get_prot(struct Scsi_Host *shost) return shost->prot_capabilities; } +static inline int scsi_host_prot_dma(struct Scsi_Host *shost) +{ + return shost->prot_capabilities >= SHOST_DIX_TYPE0_PROTECTION; +} + static inline unsigned int scsi_host_dif_capable(struct Scsi_Host *shost, unsigned int target_type) { static unsigned char cap[] = { 0, -- cgit v1.2.3 From 006abe887c5e637d059c44310de6c92f36aded3b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 12 Sep 2010 19:55:25 -0400 Subject: SUNRPC: Fix a race in rpc_info_open There is a race between rpc_info_open and rpc_release_client() in that nothing stops a process from opening the file after the clnt->cl_kref goes to zero. Fix this by using atomic_inc_unless_zero()... Reported-by: J. Bruce Fields Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- include/linux/sunrpc/clnt.h | 2 +- net/sunrpc/clnt.c | 26 ++++++++++++-------------- net/sunrpc/rpc_pipe.c | 14 ++++++++------ 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 569dc722a600..85f38a63f098 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -30,7 +30,7 @@ struct rpc_inode; * The high-level client handle */ struct rpc_clnt { - struct kref cl_kref; /* Number of references */ + atomic_t cl_count; /* Number of references */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 657aac630fc9..3a8f53e7ba07 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -226,7 +226,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru goto out_no_principal; } - kref_init(&clnt->cl_kref); + atomic_set(&clnt->cl_count, 1); err = rpc_setup_pipedir(clnt, program->pipe_dir_name); if (err < 0) @@ -390,14 +390,14 @@ rpc_clone_client(struct rpc_clnt *clnt) if (new->cl_principal == NULL) goto out_no_principal; } - kref_init(&new->cl_kref); + atomic_set(&new->cl_count, 1); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); xprt_get(clnt->cl_xprt); - kref_get(&clnt->cl_kref); + atomic_inc(&clnt->cl_count); rpc_register_client(new); rpciod_up(); return new; @@ -465,10 +465,8 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client); * Free an RPC client */ static void -rpc_free_client(struct kref *kref) +rpc_free_client(struct rpc_clnt *clnt) { - struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); - dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); if (!IS_ERR(clnt->cl_path.dentry)) { @@ -495,12 +493,10 @@ out_free: * Free an RPC client */ static void -rpc_free_auth(struct kref *kref) +rpc_free_auth(struct rpc_clnt *clnt) { - struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); - if (clnt->cl_auth == NULL) { - rpc_free_client(kref); + rpc_free_client(clnt); return; } @@ -509,10 +505,11 @@ rpc_free_auth(struct kref *kref) * release remaining GSS contexts. This mechanism ensures * that it can do so safely. */ - kref_init(kref); + atomic_inc(&clnt->cl_count); rpcauth_release(clnt->cl_auth); clnt->cl_auth = NULL; - kref_put(kref, rpc_free_client); + if (atomic_dec_and_test(&clnt->cl_count)) + rpc_free_client(clnt); } /* @@ -525,7 +522,8 @@ rpc_release_client(struct rpc_clnt *clnt) if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); - kref_put(&clnt->cl_kref, rpc_free_auth); + if (atomic_dec_and_test(&clnt->cl_count)) + rpc_free_auth(clnt); } /** @@ -588,7 +586,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) if (clnt != NULL) { rpc_task_release_client(task); task->tk_client = clnt; - kref_get(&clnt->cl_kref); + atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; /* Add to the client's list of all tasks */ diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 41a762f82630..8c8eef2b8f26 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -371,21 +371,23 @@ rpc_show_info(struct seq_file *m, void *v) static int rpc_info_open(struct inode *inode, struct file *file) { - struct rpc_clnt *clnt; + struct rpc_clnt *clnt = NULL; int ret = single_open(file, rpc_show_info, NULL); if (!ret) { struct seq_file *m = file->private_data; - mutex_lock(&inode->i_mutex); - clnt = RPC_I(inode)->private; - if (clnt) { - kref_get(&clnt->cl_kref); + + spin_lock(&file->f_path.dentry->d_lock); + if (!d_unhashed(file->f_path.dentry)) + clnt = RPC_I(inode)->private; + if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) { + spin_unlock(&file->f_path.dentry->d_lock); m->private = clnt; } else { + spin_unlock(&file->f_path.dentry->d_lock); single_release(inode, file); ret = -EINVAL; } - mutex_unlock(&inode->i_mutex); } return ret; } -- cgit v1.2.3 From c54fce6eff197d9c57c97afbf6c9722ce434fc8f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 10 Sep 2010 16:51:36 +0200 Subject: workqueue: add documentation Update copyright notice and add Documentation/workqueue.txt. Randy Dunlap, Dave Chinner: misc fixes. Signed-off-by: Tejun Heo Reviewed-By: Florian Mickler Cc: Ingo Molnar Cc: Christoph Lameter Cc: Randy Dunlap Cc: Dave Chinner --- Documentation/workqueue.txt | 380 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/workqueue.h | 4 + kernel/workqueue.c | 27 ++-- 3 files changed, 401 insertions(+), 10 deletions(-) create mode 100644 Documentation/workqueue.txt (limited to 'include') diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt new file mode 100644 index 000000000000..e4498a2872c3 --- /dev/null +++ b/Documentation/workqueue.txt @@ -0,0 +1,380 @@ + +Concurrency Managed Workqueue (cmwq) + +September, 2010 Tejun Heo + Florian Mickler + +CONTENTS + +1. Introduction +2. Why cmwq? +3. The Design +4. Application Programming Interface (API) +5. Example Execution Scenarios +6. Guidelines + + +1. Introduction + +There are many cases where an asynchronous process execution context +is needed and the workqueue (wq) API is the most commonly used +mechanism for such cases. + +When such an asynchronous execution context is needed, a work item +describing which function to execute is put on a queue. An +independent thread serves as the asynchronous execution context. The +queue is called workqueue and the thread is called worker. + +While there are work items on the workqueue the worker executes the +functions associated with the work items one after the other. When +there is no work item left on the workqueue the worker becomes idle. +When a new work item gets queued, the worker begins executing again. + + +2. Why cmwq? + +In the original wq implementation, a multi threaded (MT) wq had one +worker thread per CPU and a single threaded (ST) wq had one worker +thread system-wide. A single MT wq needed to keep around the same +number of workers as the number of CPUs. The kernel grew a lot of MT +wq users over the years and with the number of CPU cores continuously +rising, some systems saturated the default 32k PID space just booting +up. + +Although MT wq wasted a lot of resource, the level of concurrency +provided was unsatisfactory. The limitation was common to both ST and +MT wq albeit less severe on MT. Each wq maintained its own separate +worker pool. A MT wq could provide only one execution context per CPU +while a ST wq one for the whole system. Work items had to compete for +those very limited execution contexts leading to various problems +including proneness to deadlocks around the single execution context. + +The tension between the provided level of concurrency and resource +usage also forced its users to make unnecessary tradeoffs like libata +choosing to use ST wq for polling PIOs and accepting an unnecessary +limitation that no two polling PIOs can progress at the same time. As +MT wq don't provide much better concurrency, users which require +higher level of concurrency, like async or fscache, had to implement +their own thread pool. + +Concurrency Managed Workqueue (cmwq) is a reimplementation of wq with +focus on the following goals. + +* Maintain compatibility with the original workqueue API. + +* Use per-CPU unified worker pools shared by all wq to provide + flexible level of concurrency on demand without wasting a lot of + resource. + +* Automatically regulate worker pool and level of concurrency so that + the API users don't need to worry about such details. + + +3. The Design + +In order to ease the asynchronous execution of functions a new +abstraction, the work item, is introduced. + +A work item is a simple struct that holds a pointer to the function +that is to be executed asynchronously. Whenever a driver or subsystem +wants a function to be executed asynchronously it has to set up a work +item pointing to that function and queue that work item on a +workqueue. + +Special purpose threads, called worker threads, execute the functions +off of the queue, one after the other. If no work is queued, the +worker threads become idle. These worker threads are managed in so +called thread-pools. + +The cmwq design differentiates between the user-facing workqueues that +subsystems and drivers queue work items on and the backend mechanism +which manages thread-pool and processes the queued work items. + +The backend is called gcwq. There is one gcwq for each possible CPU +and one gcwq to serve work items queued on unbound workqueues. + +Subsystems and drivers can create and queue work items through special +workqueue API functions as they see fit. They can influence some +aspects of the way the work items are executed by setting flags on the +workqueue they are putting the work item on. These flags include +things like CPU locality, reentrancy, concurrency limits and more. To +get a detailed overview refer to the API description of +alloc_workqueue() below. + +When a work item is queued to a workqueue, the target gcwq is +determined according to the queue parameters and workqueue attributes +and appended on the shared worklist of the gcwq. For example, unless +specifically overridden, a work item of a bound workqueue will be +queued on the worklist of exactly that gcwq that is associated to the +CPU the issuer is running on. + +For any worker pool implementation, managing the concurrency level +(how many execution contexts are active) is an important issue. cmwq +tries to keep the concurrency at a minimal but sufficient level. +Minimal to save resources and sufficient in that the system is used at +its full capacity. + +Each gcwq bound to an actual CPU implements concurrency management by +hooking into the scheduler. The gcwq is notified whenever an active +worker wakes up or sleeps and keeps track of the number of the +currently runnable workers. Generally, work items are not expected to +hog a CPU and consume many cycles. That means maintaining just enough +concurrency to prevent work processing from stalling should be +optimal. As long as there are one or more runnable workers on the +CPU, the gcwq doesn't start execution of a new work, but, when the +last running worker goes to sleep, it immediately schedules a new +worker so that the CPU doesn't sit idle while there are pending work +items. This allows using a minimal number of workers without losing +execution bandwidth. + +Keeping idle workers around doesn't cost other than the memory space +for kthreads, so cmwq holds onto idle ones for a while before killing +them. + +For an unbound wq, the above concurrency management doesn't apply and +the gcwq for the pseudo unbound CPU tries to start executing all work +items as soon as possible. The responsibility of regulating +concurrency level is on the users. There is also a flag to mark a +bound wq to ignore the concurrency management. Please refer to the +API section for details. + +Forward progress guarantee relies on that workers can be created when +more execution contexts are necessary, which in turn is guaranteed +through the use of rescue workers. All work items which might be used +on code paths that handle memory reclaim are required to be queued on +wq's that have a rescue-worker reserved for execution under memory +pressure. Else it is possible that the thread-pool deadlocks waiting +for execution contexts to free up. + + +4. Application Programming Interface (API) + +alloc_workqueue() allocates a wq. The original create_*workqueue() +functions are deprecated and scheduled for removal. alloc_workqueue() +takes three arguments - @name, @flags and @max_active. @name is the +name of the wq and also used as the name of the rescuer thread if +there is one. + +A wq no longer manages execution resources but serves as a domain for +forward progress guarantee, flush and work item attributes. @flags +and @max_active control how work items are assigned execution +resources, scheduled and executed. + +@flags: + + WQ_NON_REENTRANT + + By default, a wq guarantees non-reentrance only on the same + CPU. A work item may not be executed concurrently on the same + CPU by multiple workers but is allowed to be executed + concurrently on multiple CPUs. This flag makes sure + non-reentrance is enforced across all CPUs. Work items queued + to a non-reentrant wq are guaranteed to be executed by at most + one worker system-wide at any given time. + + WQ_UNBOUND + + Work items queued to an unbound wq are served by a special + gcwq which hosts workers which are not bound to any specific + CPU. This makes the wq behave as a simple execution context + provider without concurrency management. The unbound gcwq + tries to start execution of work items as soon as possible. + Unbound wq sacrifices locality but is useful for the following + cases. + + * Wide fluctuation in the concurrency level requirement is + expected and using bound wq may end up creating large number + of mostly unused workers across different CPUs as the issuer + hops through different CPUs. + + * Long running CPU intensive workloads which can be better + managed by the system scheduler. + + WQ_FREEZEABLE + + A freezeable wq participates in the freeze phase of the system + suspend operations. Work items on the wq are drained and no + new work item starts execution until thawed. + + WQ_RESCUER + + All wq which might be used in the memory reclaim paths _MUST_ + have this flag set. This reserves one worker exclusively for + the execution of this wq under memory pressure. + + WQ_HIGHPRI + + Work items of a highpri wq are queued at the head of the + worklist of the target gcwq and start execution regardless of + the current concurrency level. In other words, highpri work + items will always start execution as soon as execution + resource is available. + + Ordering among highpri work items is preserved - a highpri + work item queued after another highpri work item will start + execution after the earlier highpri work item starts. + + Although highpri work items are not held back by other + runnable work items, they still contribute to the concurrency + level. Highpri work items in runnable state will prevent + non-highpri work items from starting execution. + + This flag is meaningless for unbound wq. + + WQ_CPU_INTENSIVE + + Work items of a CPU intensive wq do not contribute to the + concurrency level. In other words, runnable CPU intensive + work items will not prevent other work items from starting + execution. This is useful for bound work items which are + expected to hog CPU cycles so that their execution is + regulated by the system scheduler. + + Although CPU intensive work items don't contribute to the + concurrency level, start of their executions is still + regulated by the concurrency management and runnable + non-CPU-intensive work items can delay execution of CPU + intensive work items. + + This flag is meaningless for unbound wq. + + WQ_HIGHPRI | WQ_CPU_INTENSIVE + + This combination makes the wq avoid interaction with + concurrency management completely and behave as a simple + per-CPU execution context provider. Work items queued on a + highpri CPU-intensive wq start execution as soon as resources + are available and don't affect execution of other work items. + +@max_active: + +@max_active determines the maximum number of execution contexts per +CPU which can be assigned to the work items of a wq. For example, +with @max_active of 16, at most 16 work items of the wq can be +executing at the same time per CPU. + +Currently, for a bound wq, the maximum limit for @max_active is 512 +and the default value used when 0 is specified is 256. For an unbound +wq, the limit is higher of 512 and 4 * num_possible_cpus(). These +values are chosen sufficiently high such that they are not the +limiting factor while providing protection in runaway cases. + +The number of active work items of a wq is usually regulated by the +users of the wq, more specifically, by how many work items the users +may queue at the same time. Unless there is a specific need for +throttling the number of active work items, specifying '0' is +recommended. + +Some users depend on the strict execution ordering of ST wq. The +combination of @max_active of 1 and WQ_UNBOUND is used to achieve this +behavior. Work items on such wq are always queued to the unbound gcwq +and only one work item can be active at any given time thus achieving +the same ordering property as ST wq. + + +5. Example Execution Scenarios + +The following example execution scenarios try to illustrate how cmwq +behave under different configurations. + + Work items w0, w1, w2 are queued to a bound wq q0 on the same CPU. + w0 burns CPU for 5ms then sleeps for 10ms then burns CPU for 5ms + again before finishing. w1 and w2 burn CPU for 5ms then sleep for + 10ms. + +Ignoring all other tasks, works and processing overhead, and assuming +simple FIFO scheduling, the following is one highly simplified version +of possible sequences of events with the original wq. + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 starts and burns CPU + 25 w1 sleeps + 35 w1 wakes up and finishes + 35 w2 starts and burns CPU + 40 w2 sleeps + 50 w2 wakes up and finishes + +And with cmwq with @max_active >= 3, + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 5 w1 starts and burns CPU + 10 w1 sleeps + 10 w2 starts and burns CPU + 15 w2 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 wakes up and finishes + 25 w2 wakes up and finishes + +If @max_active == 2, + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 5 w1 starts and burns CPU + 10 w1 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 wakes up and finishes + 20 w2 starts and burns CPU + 25 w2 sleeps + 35 w2 wakes up and finishes + +Now, let's assume w1 and w2 are queued to a different wq q1 which has +WQ_HIGHPRI set, + + TIME IN MSECS EVENT + 0 w1 and w2 start and burn CPU + 5 w1 sleeps + 10 w2 sleeps + 10 w0 starts and burns CPU + 15 w0 sleeps + 15 w1 wakes up and finishes + 20 w2 wakes up and finishes + 25 w0 wakes up and burns CPU + 30 w0 finishes + +If q1 has WQ_CPU_INTENSIVE set, + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 5 w1 and w2 start and burn CPU + 10 w1 sleeps + 15 w2 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 wakes up and finishes + 25 w2 wakes up and finishes + + +6. Guidelines + +* Do not forget to use WQ_RESCUER if a wq may process work items which + are used during memory reclaim. Each wq with WQ_RESCUER set has one + rescuer thread reserved for it. If there is dependency among + multiple work items used during memory reclaim, they should be + queued to separate wq each with WQ_RESCUER. + +* Unless strict ordering is required, there is no need to use ST wq. + +* Unless there is a specific need, using 0 for @max_active is + recommended. In most use cases, concurrency level usually stays + well under the default limit. + +* A wq serves as a domain for forward progress guarantee (WQ_RESCUER), + flush and work item attributes. Work items which are not involved + in memory reclaim and don't need to be flushed as a part of a group + of work items, and don't require any special attribute, can use one + of the system wq. There is no difference in execution + characteristics between using a dedicated wq and a system wq. + +* Unless work items are expected to consume a huge amount of CPU + cycles, using a bound wq is usually beneficial due to the increased + level of locality in wq operations and work item execution. diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f11100f96482..25e02c941bac 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -235,6 +235,10 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define work_clear_pending(work) \ clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) +/* + * Workqueue flags and constants. For details, please refer to + * Documentation/workqueue.txt. + */ enum { WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 727f24e563ae..f77afd939229 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1,19 +1,26 @@ /* - * linux/kernel/workqueue.c + * kernel/workqueue.c - generic async execution with shared worker pool * - * Generic mechanism for defining kernel helper threads for running - * arbitrary tasks in process context. + * Copyright (C) 2002 Ingo Molnar * - * Started by Ingo Molnar, Copyright (C) 2002 + * Derived from the taskqueue/keventd code by: + * David Woodhouse + * Andrew Morton + * Kai Petzke + * Theodore Ts'o * - * Derived from the taskqueue/keventd code by: + * Made to use alloc_percpu by Christoph Lameter. * - * David Woodhouse - * Andrew Morton - * Kai Petzke - * Theodore Ts'o + * Copyright (C) 2010 SUSE Linux Products GmbH + * Copyright (C) 2010 Tejun Heo * - * Made to use alloc_percpu by Christoph Lameter. + * This is the generic async execution mechanism. Work items as are + * executed in process context. The worker pool is shared and + * automatically managed. There is one worker pool for each CPU and + * one extra for works which are better served by workers which are + * not bound to any specific CPU. + * + * Please read Documentation/workqueue.txt for details. */ #include -- cgit v1.2.3 From 7b334fcb45b757ffb093696ca3de1b0c8b4a33f1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Sep 2010 23:51:02 +0100 Subject: drm: Use a nondestructive mode for output detect when polling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Destructive load-detection is very expensive and due to failings elsewhere can trigger system wide stalls of up to 600ms. A simple first step to correcting this is not to invoke such an expensive and destructive load-detection operation automatically. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=29536 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=16265 Reported-by: Bruno Prémont Tested-by: Sitsofe Wheeler Signed-off-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 4 ++-- drivers/gpu/drm/drm_sysfs.c | 2 +- drivers/gpu/drm/i915/intel_crt.c | 7 ++++++- drivers/gpu/drm/i915/intel_dp.c | 3 ++- drivers/gpu/drm/i915/intel_dvo.c | 4 +++- drivers/gpu/drm/i915/intel_hdmi.c | 3 ++- drivers/gpu/drm/i915/intel_lvds.c | 8 ++++++-- drivers/gpu/drm/i915/intel_sdvo.c | 6 ++++-- drivers/gpu/drm/i915/intel_tv.c | 12 ++++++------ drivers/gpu/drm/nouveau/nouveau_connector.c | 8 +++++--- drivers/gpu/drm/radeon/radeon_connectors.c | 20 +++++++++++++++----- drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c | 3 ++- include/drm/drm_crtc.h | 3 ++- 13 files changed, 56 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index de152a58967d..fb6b70fc6572 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -103,7 +103,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, if (connector->funcs->force) connector->funcs->force(connector); } else { - connector->status = connector->funcs->detect(connector); + connector->status = connector->funcs->detect(connector, false); drm_kms_helper_poll_enable(dev); } @@ -866,7 +866,7 @@ static void output_poll_execute(struct work_struct *work) !(connector->polled & DRM_CONNECTOR_POLL_HPD)) continue; - status = connector->funcs->detect(connector); + status = connector->funcs->detect(connector, true); if (old_status != status) changed = true; } diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 86118a742231..85da4c40694c 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -159,7 +159,7 @@ static ssize_t status_show(struct device *device, struct drm_connector *connector = to_drm_connector(device); enum drm_connector_status status; - status = connector->funcs->detect(connector); + status = connector->funcs->detect(connector, true); return snprintf(buf, PAGE_SIZE, "%s\n", drm_get_connector_status_name(status)); } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 4b7735196cd5..0350e5d711f8 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -400,7 +400,9 @@ intel_crt_load_detect(struct drm_crtc *crtc, struct intel_encoder *intel_encoder return status; } -static enum drm_connector_status intel_crt_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_crt_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_device *dev = connector->dev; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -419,6 +421,9 @@ static enum drm_connector_status intel_crt_detect(struct drm_connector *connecto if (intel_crt_detect_ddc(encoder)) return connector_status_connected; + if (nondestructive) + return connector->status; + /* for pre-945g platforms use load detect */ if (encoder->crtc && encoder->crtc->enabled) { status = intel_crt_load_detect(encoder->crtc, intel_encoder); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 51d142939a26..e1a2a05fb838 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1386,7 +1386,8 @@ ironlake_dp_detect(struct drm_connector *connector) * \return false if DP port is disconnected. */ static enum drm_connector_status -intel_dp_detect(struct drm_connector *connector) +intel_dp_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index a399f4b2c1c5..f0de1addf8a4 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -221,7 +221,9 @@ static void intel_dvo_mode_set(struct drm_encoder *encoder, * * Unimplemented. */ -static enum drm_connector_status intel_dvo_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_dvo_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index ccd4c97e6524..2ea123d8d22b 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -139,7 +139,8 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder, } static enum drm_connector_status -intel_hdmi_detect(struct drm_connector *connector) +intel_hdmi_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 4fbb0165b26f..fb1bed8f4071 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -445,7 +445,9 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder, * connected and closed means disconnected. We also send hotplug events as * needed, using lid status notification from the input layer. */ -static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_lvds_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_device *dev = connector->dev; enum drm_connector_status status = connector_status_connected; @@ -540,7 +542,9 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val, * the LID nofication event. */ if (connector) - connector->status = connector->funcs->detect(connector); + connector->status = connector->funcs->detect(connector, + true); + /* Don't force modeset on machines where it causes a GPU lockup */ if (dmi_check_system(intel_no_modeset_on_lid)) return NOTIFY_OK; diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index e3b7a7ee39cb..db6b6d4b8fae 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1417,7 +1417,7 @@ intel_analog_is_connected(struct drm_device *dev) if (!analog_connector) return false; - if (analog_connector->funcs->detect(analog_connector) == + if (analog_connector->funcs->detect(analog_connector, true) == connector_status_disconnected) return false; @@ -1486,7 +1486,9 @@ intel_sdvo_hdmi_sink_detect(struct drm_connector *connector) return status; } -static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_sdvo_detect(struct drm_connector *connector, + bool nondestructive) { uint16_t response; struct drm_encoder *encoder = intel_attached_encoder(connector); diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index c671f60ce80b..d20b550c0f55 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1341,7 +1341,8 @@ static void intel_tv_find_better_format(struct drm_connector *connector) * we have a pipe programmed in order to probe the TV. */ static enum drm_connector_status -intel_tv_detect(struct drm_connector *connector) +intel_tv_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_display_mode mode; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -1353,7 +1354,7 @@ intel_tv_detect(struct drm_connector *connector) if (encoder->crtc && encoder->crtc->enabled) { type = intel_tv_detect_type(intel_tv); - } else { + } else if (nondestructive) { struct drm_crtc *crtc; int dpms_mode; @@ -1364,10 +1365,9 @@ intel_tv_detect(struct drm_connector *connector) intel_release_load_detect_pipe(&intel_tv->base, connector, dpms_mode); } else - type = -1; - } - - intel_tv->type = type; + return connector_status_unknown; + } else + return connector->status; if (type < 0) return connector_status_disconnected; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index a1473fff06ac..67d515cb67e0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -168,7 +168,8 @@ nouveau_connector_set_encoder(struct drm_connector *connector, } static enum drm_connector_status -nouveau_connector_detect(struct drm_connector *connector) +nouveau_connector_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_device *dev = connector->dev; struct nouveau_connector *nv_connector = nouveau_connector(connector); @@ -246,7 +247,8 @@ detect_analog: } static enum drm_connector_status -nouveau_connector_detect_lvds(struct drm_connector *connector) +nouveau_connector_detect_lvds(struct drm_connector *connector, + bool nondestructive) { struct drm_device *dev = connector->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; @@ -267,7 +269,7 @@ nouveau_connector_detect_lvds(struct drm_connector *connector) /* Try retrieving EDID via DDC */ if (!dev_priv->vbios.fp_no_ddc) { - status = nouveau_connector_detect(connector); + status = nouveau_connector_detect(connector, nondestructive); if (status == connector_status_connected) goto out; } diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index a9dd7847d96e..31d309a8e75b 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -481,7 +481,9 @@ static int radeon_lvds_mode_valid(struct drm_connector *connector, return MODE_OK; } -static enum drm_connector_status radeon_lvds_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_lvds_detect(struct drm_connector *connector, + bool nondestructive) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = radeon_best_single_encoder(connector); @@ -594,7 +596,9 @@ static int radeon_vga_mode_valid(struct drm_connector *connector, return MODE_OK; } -static enum drm_connector_status radeon_vga_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_vga_detect(struct drm_connector *connector, + bool nondestructive) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder; @@ -691,7 +695,9 @@ static int radeon_tv_mode_valid(struct drm_connector *connector, return MODE_OK; } -static enum drm_connector_status radeon_tv_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_tv_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_encoder *encoder; struct drm_encoder_helper_funcs *encoder_funcs; @@ -748,7 +754,9 @@ static int radeon_dvi_get_modes(struct drm_connector *connector) * we have to check if this analog encoder is shared with anyone else (TV) * if its shared we have to set the other connector to disconnected. */ -static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_dvi_detect(struct drm_connector *connector, + bool nondestructive) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = NULL; @@ -972,7 +980,9 @@ static int radeon_dp_get_modes(struct drm_connector *connector) return ret; } -static enum drm_connector_status radeon_dp_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_dp_detect(struct drm_connector *connector, + bool nondestructive) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); enum drm_connector_status ret = connector_status_disconnected; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 2ff5cf78235f..a527c91c0ba6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -335,7 +335,8 @@ static void vmw_ldu_connector_restore(struct drm_connector *connector) } static enum drm_connector_status - vmw_ldu_connector_detect(struct drm_connector *connector) + vmw_ldu_connector_detect(struct drm_connector *connector, + bool nondestructive) { if (vmw_connector_to_ldu(connector)->pref_active) return connector_status_connected; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index c9f3cc5949a8..5536223fbac8 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -386,7 +386,8 @@ struct drm_connector_funcs { void (*dpms)(struct drm_connector *connector, int mode); void (*save)(struct drm_connector *connector); void (*restore)(struct drm_connector *connector); - enum drm_connector_status (*detect)(struct drm_connector *connector); + enum drm_connector_status (*detect)(struct drm_connector *connector, + bool nondestructive); int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height); int (*set_property)(struct drm_connector *connector, struct drm_property *property, uint64_t val); -- cgit v1.2.3 From 637bbdc5b83615ef9f45f50399d1c7f27473c713 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Mon, 13 Sep 2010 20:19:03 +0800 Subject: sched: Remove unused PF_ALIGNWARN flag PF_ALIGNWARN is not implemented and it is for 486 as the comment. It is not likely someone will implement this flag feature. So here remove this flag and leave the valuable 0x00000001 for future use. Signed-off-by: Dave Young Cc: Peter Zijlstra Cc: Linus Torvalds LKML-Reference: <20100913121903.GB22238@darkstar> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b51c53c285b8..cdf56693ecbf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1682,8 +1682,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * /* * Per process flags */ -#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ - /* Not implemented yet, only for 486*/ #define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ -- cgit v1.2.3 From ceee42714cf382e9bb9ab71b846ad49497b29d6c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 13 Sep 2010 23:53:55 -0700 Subject: Input: serio_driver - mark id_table and description as const Memory pointed to by these fields is not supposed to change. Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/serio.h b/include/linux/serio.h index b5552568178d..a31c95a3171e 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -55,9 +55,9 @@ struct serio { struct serio_driver { void *private; - char *description; + const char *description; - struct serio_device_id *id_table; + const struct serio_device_id *id_table; bool manual_bind; void (*write_wakeup)(struct serio *); -- cgit v1.2.3 From 7fc49c498c18795d35864bee433caf419bd013b2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 13 Sep 2010 23:53:55 -0700 Subject: Input: serio_driver - drop private pointer Nobody uses it anymore. Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/serio.h b/include/linux/serio.h index a31c95a3171e..111ad501b054 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -54,7 +54,6 @@ struct serio { #define to_serio_port(d) container_of(d, struct serio, dev) struct serio_driver { - void *private; const char *description; const struct serio_device_id *id_table; -- cgit v1.2.3 From 37b0bb112b7e3ffa5015c4305a934e861b4e2e53 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 13 Sep 2010 23:53:55 -0700 Subject: Input: gameport_driver - mark description as const pointer Memory pointed to by the pointer should not change. Signed-off-by: Dmitry Torokhov --- include/linux/gameport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 361d1cc288d0..632d1265fbe0 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -55,7 +55,7 @@ struct gameport { struct gameport_driver { void *private; - char *description; + const char *description; int (*connect)(struct gameport *, struct gameport_driver *drv); int (*reconnect)(struct gameport *); -- cgit v1.2.3 From 528487081aad32da85bf99802bdb7af32f4922b9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 13 Sep 2010 23:53:55 -0700 Subject: Input: gameport_driver - drop private pointer Nobody uses it anymore. Signed-off-by: Dmitry Torokhov --- include/linux/gameport.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 632d1265fbe0..b65a6f472775 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -53,8 +53,6 @@ struct gameport { #define to_gameport_port(d) container_of(d, struct gameport, dev) struct gameport_driver { - - void *private; const char *description; int (*connect)(struct gameport *, struct gameport_driver *drv); -- cgit v1.2.3 From 44432407d9f5e4b2e56c7eccb65d98cad4bba191 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 3 Sep 2010 07:20:04 +0000 Subject: fbdev: sh_mobile_lcdcfb: Support multiple video modes in platform data This is a preparation for HDMI hotplug support. This patch just moves all platform defined video modes for the sh_mobile_lcdcfb driver to separate arrays and switches all users to use element 0 of that array, so, this patch doesn't introduce any functional changes and as such should not cause any regressions. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ap4evb.c | 98 ++++++++++++++++++++--------------- arch/sh/boards/mach-ap325rxa/setup.c | 29 ++++++----- arch/sh/boards/mach-ecovec24/setup.c | 60 ++++++++++++--------- arch/sh/boards/mach-kfr2r09/setup.c | 29 ++++++----- arch/sh/boards/mach-migor/setup.c | 58 +++++++++++---------- arch/sh/boards/mach-se/7724/setup.c | 54 ++++++++++++------- drivers/video/sh_mipi_dsi.c | 32 +++++++----- drivers/video/sh_mobile_hdmi.c | 3 +- drivers/video/sh_mobile_lcdcfb.c | 10 ++-- include/video/sh_mobile_lcdc.h | 3 +- 10 files changed, 219 insertions(+), 157 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 31e5b28ab9b4..4e883e0fb01b 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -375,10 +375,40 @@ static struct platform_device usb1_host_device = { .resource = usb1_host_resources, }; +const static struct fb_videomode ap4evb_lcdc_modes[] = { + { +#ifdef CONFIG_AP4EVB_QHD + .name = "R63302(QHD)", + .xres = 544, + .yres = 961, + .left_margin = 72, + .right_margin = 600, + .hsync_len = 16, + .upper_margin = 8, + .lower_margin = 8, + .vsync_len = 2, + .sync = FB_SYNC_VERT_HIGH_ACT | FB_SYNC_HOR_HIGH_ACT, +#else + .name = "WVGA Panel", + .xres = 800, + .yres = 480, + .left_margin = 220, + .right_margin = 110, + .hsync_len = 70, + .upper_margin = 20, + .lower_margin = 5, + .vsync_len = 5, + .sync = 0, +#endif + }, +}; + static struct sh_mobile_lcdc_info lcdc_info = { .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, + .lcd_cfg = ap4evb_lcdc_modes, + .num_cfg = ARRAY_SIZE(ap4evb_lcdc_modes), } }; @@ -569,6 +599,31 @@ static struct platform_device fsi_device = { }, }; +const static struct fb_videomode ap4evb_hdmi_modes[] = { + { + .name = "HDMI 720p", + .xres = 1280, + .yres = 720, + + /* + * If left and right margins are not multiples of 8, + * LDHAJR will be adjusted accordingly by the LCDC + * driver. Until we start using EDID, these values + * might have to be adjusted for different monitors. + */ + .left_margin = 200, + .right_margin = 88, + .hsync_len = 48, + + .upper_margin = 20, + .lower_margin = 5, + .vsync_len = 5, + + .pixclock = 13468, + .sync = FB_SYNC_VERT_HIGH_ACT | FB_SYNC_HOR_HIGH_ACT, + }, +}; + static struct sh_mobile_lcdc_info sh_mobile_lcdc1_info = { .clock_source = LCDC_CLK_EXTERNAL, .ch[0] = { @@ -577,26 +632,8 @@ static struct sh_mobile_lcdc_info sh_mobile_lcdc1_info = { .interface_type = RGB24, .clock_divider = 1, .flags = LCDC_FLAGS_DWPOL, - .lcd_cfg = { - .name = "HDMI", - /* So far only 720p is supported */ - .xres = 1280, - .yres = 720, - /* - * If left and right margins are not multiples of 8, - * LDHAJR will be adjusted accordingly by the LCDC - * driver. Until we start using EDID, these values - * might have to be adjusted for different monitors. - */ - .left_margin = 200, - .right_margin = 88, - .hsync_len = 48, - .upper_margin = 20, - .lower_margin = 5, - .vsync_len = 5, - .pixclock = 13468, - .sync = FB_SYNC_VERT_HIGH_ACT | FB_SYNC_HOR_HIGH_ACT, - }, + .lcd_cfg = ap4evb_hdmi_modes, + .num_cfg = ARRAY_SIZE(ap4evb_hdmi_modes), } }; @@ -960,17 +997,6 @@ static void __init ap4evb_init(void) lcdc_info.ch[0].interface_type = RGB24; lcdc_info.ch[0].clock_divider = 1; lcdc_info.ch[0].flags = LCDC_FLAGS_DWPOL; - lcdc_info.ch[0].lcd_cfg.name = "R63302(QHD)"; - lcdc_info.ch[0].lcd_cfg.xres = 544; - lcdc_info.ch[0].lcd_cfg.yres = 961; - lcdc_info.ch[0].lcd_cfg.left_margin = 72; - lcdc_info.ch[0].lcd_cfg.right_margin = 600; - lcdc_info.ch[0].lcd_cfg.hsync_len = 16; - lcdc_info.ch[0].lcd_cfg.upper_margin = 8; - lcdc_info.ch[0].lcd_cfg.lower_margin = 8; - lcdc_info.ch[0].lcd_cfg.vsync_len = 2; - lcdc_info.ch[0].lcd_cfg.sync = FB_SYNC_VERT_HIGH_ACT | - FB_SYNC_HOR_HIGH_ACT; lcdc_info.ch[0].lcd_size_cfg.width = 44; lcdc_info.ch[0].lcd_size_cfg.height = 79; @@ -1013,16 +1039,6 @@ static void __init ap4evb_init(void) lcdc_info.ch[0].interface_type = RGB18; lcdc_info.ch[0].clock_divider = 2; lcdc_info.ch[0].flags = 0; - lcdc_info.ch[0].lcd_cfg.name = "WVGA Panel"; - lcdc_info.ch[0].lcd_cfg.xres = 800; - lcdc_info.ch[0].lcd_cfg.yres = 480; - lcdc_info.ch[0].lcd_cfg.left_margin = 220; - lcdc_info.ch[0].lcd_cfg.right_margin = 110; - lcdc_info.ch[0].lcd_cfg.hsync_len = 70; - lcdc_info.ch[0].lcd_cfg.upper_margin = 20; - lcdc_info.ch[0].lcd_cfg.lower_margin = 5; - lcdc_info.ch[0].lcd_cfg.vsync_len = 5; - lcdc_info.ch[0].lcd_cfg.sync = 0; lcdc_info.ch[0].lcd_size_cfg.width = 152; lcdc_info.ch[0].lcd_size_cfg.height = 91; diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c index 3da116f47f01..00553233a7c5 100644 --- a/arch/sh/boards/mach-ap325rxa/setup.c +++ b/arch/sh/boards/mach-ap325rxa/setup.c @@ -176,6 +176,21 @@ static void ap320_wvga_power_off(void *board_data) __raw_writew(0, FPGA_LCDREG); } +const static struct fb_videomode ap325rxa_lcdc_modes[] = { + { + .name = "LB070WV1", + .xres = 800, + .yres = 480, + .left_margin = 32, + .right_margin = 160, + .hsync_len = 8, + .upper_margin = 63, + .lower_margin = 80, + .vsync_len = 1, + .sync = 0, /* hsync and vsync are active low */ + }, +}; + static struct sh_mobile_lcdc_info lcdc_info = { .clock_source = LCDC_CLK_EXTERNAL, .ch[0] = { @@ -183,18 +198,8 @@ static struct sh_mobile_lcdc_info lcdc_info = { .bpp = 16, .interface_type = RGB18, .clock_divider = 1, - .lcd_cfg = { - .name = "LB070WV1", - .xres = 800, - .yres = 480, - .left_margin = 32, - .right_margin = 160, - .hsync_len = 8, - .upper_margin = 63, - .lower_margin = 80, - .vsync_len = 1, - .sync = 0, /* hsync and vsync are active low */ - }, + .lcd_cfg = ap325rxa_lcdc_modes, + .num_cfg = ARRAY_SIZE(ap325rxa_lcdc_modes), .lcd_size_cfg = { /* 7.0 inch */ .width = 152, .height = 91, diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index 1d7b495a7db4..feadf5dada77 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -231,14 +231,41 @@ static struct platform_device usb1_common_device = { }; /* LCDC */ +const static struct fb_videomode ecovec_lcd_modes[] = { + { + .name = "Panel", + .xres = 800, + .yres = 480, + .left_margin = 220, + .right_margin = 110, + .hsync_len = 70, + .upper_margin = 20, + .lower_margin = 5, + .vsync_len = 5, + .sync = 0, /* hsync and vsync are active low */ + }, +}; + +const static struct fb_videomode ecovec_dvi_modes[] = { + { + .name = "DVI", + .xres = 1280, + .yres = 720, + .left_margin = 220, + .right_margin = 110, + .hsync_len = 40, + .upper_margin = 20, + .lower_margin = 5, + .vsync_len = 5, + .sync = 0, /* hsync and vsync are active low */ + }, +}; + static struct sh_mobile_lcdc_info lcdc_info = { .ch[0] = { .interface_type = RGB18, .chan = LCDC_CHAN_MAINLCD, .bpp = 16, - .lcd_cfg = { - .sync = 0, /* hsync and vsync are active low */ - }, .lcd_size_cfg = { /* 7.0 inch */ .width = 152, .height = 91, @@ -1079,33 +1106,18 @@ static int __init arch_setup(void) if (gpio_get_value(GPIO_PTE6)) { /* DVI */ lcdc_info.clock_source = LCDC_CLK_EXTERNAL; - lcdc_info.ch[0].clock_divider = 1, - lcdc_info.ch[0].lcd_cfg.name = "DVI"; - lcdc_info.ch[0].lcd_cfg.xres = 1280; - lcdc_info.ch[0].lcd_cfg.yres = 720; - lcdc_info.ch[0].lcd_cfg.left_margin = 220; - lcdc_info.ch[0].lcd_cfg.right_margin = 110; - lcdc_info.ch[0].lcd_cfg.hsync_len = 40; - lcdc_info.ch[0].lcd_cfg.upper_margin = 20; - lcdc_info.ch[0].lcd_cfg.lower_margin = 5; - lcdc_info.ch[0].lcd_cfg.vsync_len = 5; + lcdc_info.ch[0].clock_divider = 1; + lcdc_info.ch[0].lcd_cfg = ecovec_dvi_modes; + lcdc_info.ch[0].num_cfg = ARRAY_SIZE(ecovec_dvi_modes); gpio_set_value(GPIO_PTA2, 1); gpio_set_value(GPIO_PTU1, 1); } else { /* Panel */ - lcdc_info.clock_source = LCDC_CLK_PERIPHERAL; - lcdc_info.ch[0].clock_divider = 2, - lcdc_info.ch[0].lcd_cfg.name = "Panel"; - lcdc_info.ch[0].lcd_cfg.xres = 800; - lcdc_info.ch[0].lcd_cfg.yres = 480; - lcdc_info.ch[0].lcd_cfg.left_margin = 220; - lcdc_info.ch[0].lcd_cfg.right_margin = 110; - lcdc_info.ch[0].lcd_cfg.hsync_len = 70; - lcdc_info.ch[0].lcd_cfg.upper_margin = 20; - lcdc_info.ch[0].lcd_cfg.lower_margin = 5; - lcdc_info.ch[0].lcd_cfg.vsync_len = 5; + lcdc_info.ch[0].clock_divider = 2; + lcdc_info.ch[0].lcd_cfg = ecovec_lcd_modes; + lcdc_info.ch[0].num_cfg = ARRAY_SIZE(ecovec_lcd_modes); gpio_set_value(GPIO_PTR1, 1); diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c index 68994a163f6c..87d4b90e368c 100644 --- a/arch/sh/boards/mach-kfr2r09/setup.c +++ b/arch/sh/boards/mach-kfr2r09/setup.c @@ -126,6 +126,21 @@ static struct platform_device kfr2r09_sh_keysc_device = { }, }; +const static struct fb_videomode kfr2r09_lcdc_modes[] = { + { + .name = "TX07D34VM0AAA", + .xres = 240, + .yres = 400, + .left_margin = 0, + .right_margin = 16, + .hsync_len = 8, + .upper_margin = 0, + .lower_margin = 1, + .vsync_len = 1, + .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + }, +}; + static struct sh_mobile_lcdc_info kfr2r09_sh_lcdc_info = { .clock_source = LCDC_CLK_BUS, .ch[0] = { @@ -134,18 +149,8 @@ static struct sh_mobile_lcdc_info kfr2r09_sh_lcdc_info = { .interface_type = SYS18, .clock_divider = 6, .flags = LCDC_FLAGS_DWPOL, - .lcd_cfg = { - .name = "TX07D34VM0AAA", - .xres = 240, - .yres = 400, - .left_margin = 0, - .right_margin = 16, - .hsync_len = 8, - .upper_margin = 0, - .lower_margin = 1, - .vsync_len = 1, - .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, - }, + .lcd_cfg = kfr2r09_lcdc_modes, + .num_cfg = ARRAY_SIZE(kfr2r09_lcdc_modes), .lcd_size_cfg = { .width = 35, .height = 58, diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c index 662debe4ead2..9204cbb87147 100644 --- a/arch/sh/boards/mach-migor/setup.c +++ b/arch/sh/boards/mach-migor/setup.c @@ -213,51 +213,55 @@ static struct platform_device migor_nand_flash_device = { } }; +const static struct fb_videomode migor_lcd_modes[] = { + { +#if defined(CONFIG_SH_MIGOR_RTA_WVGA) + .name = "LB070WV1", + .xres = 800, + .yres = 480, + .left_margin = 64, + .right_margin = 16, + .hsync_len = 120, + .sync = 0, +#elif defined(CONFIG_SH_MIGOR_QVGA) + .name = "PH240320T", + .xres = 320, + .yres = 240, + .left_margin = 0, + .right_margin = 16, + .hsync_len = 8, + .sync = FB_SYNC_HOR_HIGH_ACT, +#endif + .upper_margin = 1, + .lower_margin = 17, + .vsync_len = 2, + }, +}; + static struct sh_mobile_lcdc_info sh_mobile_lcdc_info = { -#ifdef CONFIG_SH_MIGOR_RTA_WVGA +#if defined(CONFIG_SH_MIGOR_RTA_WVGA) .clock_source = LCDC_CLK_BUS, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, .interface_type = RGB16, .clock_divider = 2, - .lcd_cfg = { - .name = "LB070WV1", - .xres = 800, - .yres = 480, - .left_margin = 64, - .right_margin = 16, - .hsync_len = 120, - .upper_margin = 1, - .lower_margin = 17, - .vsync_len = 2, - .sync = 0, - }, + .lcd_cfg = migor_lcd_modes, + .num_cfg = ARRAY_SIZE(migor_lcd_modes), .lcd_size_cfg = { /* 7.0 inch */ .width = 152, .height = 91, }, } -#endif -#ifdef CONFIG_SH_MIGOR_QVGA +#elif defined(CONFIG_SH_MIGOR_QVGA) .clock_source = LCDC_CLK_PERIPHERAL, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, .interface_type = SYS16A, .clock_divider = 10, - .lcd_cfg = { - .name = "PH240320T", - .xres = 320, - .yres = 240, - .left_margin = 0, - .right_margin = 16, - .hsync_len = 8, - .upper_margin = 1, - .lower_margin = 17, - .vsync_len = 2, - .sync = FB_SYNC_HOR_HIGH_ACT, - }, + .lcd_cfg = migor_lcd_modes, + .num_cfg = ARRAY_SIZE(migor_lcd_modes), .lcd_size_cfg = { /* 2.4 inch */ .width = 49, .height = 37, diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index 552ebd9ba82b..3099c36759ad 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -144,16 +144,42 @@ static struct platform_device nor_flash_device = { }; /* LCDC */ +const static struct fb_videomode lcdc_720p_modes[] = { + { + .name = "LB070WV1", + .sync = 0, /* hsync and vsync are active low */ + .xres = 1280; + .yres = 720; + .left_margin = 220; + .right_margin = 110; + .hsync_len = 40; + .upper_margin = 20; + .lower_margin = 5; + .vsync_len = 5; + }, +}; + +const static struct fb_videomode lcdc_vga_modes[] = { + { + .name = "LB070WV1", + .sync = 0, /* hsync and vsync are active low */ + .xres = 640; + .yres = 480; + .left_margin = 105; + .right_margin = 50; + .hsync_len = 96; + .upper_margin = 33; + .lower_margin = 10; + .vsync_len = 2; + }, +}; + static struct sh_mobile_lcdc_info lcdc_info = { .clock_source = LCDC_CLK_EXTERNAL, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, .clock_divider = 1, - .lcd_cfg = { - .name = "LB070WV1", - .sync = 0, /* hsync and vsync are active low */ - }, .lcd_size_cfg = { /* 7.0 inch */ .width = 152, .height = 91, @@ -909,24 +935,12 @@ static int __init devices_setup(void) if (sw & SW41_B) { /* 720p */ - lcdc_info.ch[0].lcd_cfg.xres = 1280; - lcdc_info.ch[0].lcd_cfg.yres = 720; - lcdc_info.ch[0].lcd_cfg.left_margin = 220; - lcdc_info.ch[0].lcd_cfg.right_margin = 110; - lcdc_info.ch[0].lcd_cfg.hsync_len = 40; - lcdc_info.ch[0].lcd_cfg.upper_margin = 20; - lcdc_info.ch[0].lcd_cfg.lower_margin = 5; - lcdc_info.ch[0].lcd_cfg.vsync_len = 5; + lcdc_info.ch[0].lcd_cfg = lcdc_720p_modes; + lcdc_info.ch[0].num_cfg = ARRAY_SIZE(lcdc_720p_modes); } else { /* VGA */ - lcdc_info.ch[0].lcd_cfg.xres = 640; - lcdc_info.ch[0].lcd_cfg.yres = 480; - lcdc_info.ch[0].lcd_cfg.left_margin = 105; - lcdc_info.ch[0].lcd_cfg.right_margin = 50; - lcdc_info.ch[0].lcd_cfg.hsync_len = 96; - lcdc_info.ch[0].lcd_cfg.upper_margin = 33; - lcdc_info.ch[0].lcd_cfg.lower_margin = 10; - lcdc_info.ch[0].lcd_cfg.vsync_len = 2; + lcdc_info.ch[0].lcd_cfg = lcdc_vga_modes; + lcdc_info.ch[0].num_cfg = ARRAY_SIZE(lcdc_vga_modes); } if (sw & SW41_A) { diff --git a/drivers/video/sh_mipi_dsi.c b/drivers/video/sh_mipi_dsi.c index 5699ce0c1780..3f3d431033ca 100644 --- a/drivers/video/sh_mipi_dsi.c +++ b/drivers/video/sh_mipi_dsi.c @@ -123,83 +123,87 @@ static int __init sh_mipi_setup(struct sh_mipi *mipi, u32 linelength; bool yuv; - /* Select data format */ + /* + * Select data format. MIPI DSI is not hot-pluggable, so, we just use + * the default videomode. If this ever becomes a problem, We'll have to + * move this to mipi_display_on() above and use info->var.xres + */ switch (pdata->data_format) { case MIPI_RGB888: pctype = 0; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_24; pixfmt = MIPI_DCS_PIXEL_FMT_24BIT; - linelength = ch->lcd_cfg.xres * 3; + linelength = ch->lcd_cfg[0].xres * 3; yuv = false; break; case MIPI_RGB565: pctype = 1; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_16; pixfmt = MIPI_DCS_PIXEL_FMT_16BIT; - linelength = ch->lcd_cfg.xres * 2; + linelength = ch->lcd_cfg[0].xres * 2; yuv = false; break; case MIPI_RGB666_LP: pctype = 2; datatype = MIPI_DSI_PIXEL_STREAM_3BYTE_18; pixfmt = MIPI_DCS_PIXEL_FMT_24BIT; - linelength = ch->lcd_cfg.xres * 3; + linelength = ch->lcd_cfg[0].xres * 3; yuv = false; break; case MIPI_RGB666: pctype = 3; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_18; pixfmt = MIPI_DCS_PIXEL_FMT_18BIT; - linelength = (ch->lcd_cfg.xres * 18 + 7) / 8; + linelength = (ch->lcd_cfg[0].xres * 18 + 7) / 8; yuv = false; break; case MIPI_BGR888: pctype = 8; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_24; pixfmt = MIPI_DCS_PIXEL_FMT_24BIT; - linelength = ch->lcd_cfg.xres * 3; + linelength = ch->lcd_cfg[0].xres * 3; yuv = false; break; case MIPI_BGR565: pctype = 9; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_16; pixfmt = MIPI_DCS_PIXEL_FMT_16BIT; - linelength = ch->lcd_cfg.xres * 2; + linelength = ch->lcd_cfg[0].xres * 2; yuv = false; break; case MIPI_BGR666_LP: pctype = 0xa; datatype = MIPI_DSI_PIXEL_STREAM_3BYTE_18; pixfmt = MIPI_DCS_PIXEL_FMT_24BIT; - linelength = ch->lcd_cfg.xres * 3; + linelength = ch->lcd_cfg[0].xres * 3; yuv = false; break; case MIPI_BGR666: pctype = 0xb; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_18; pixfmt = MIPI_DCS_PIXEL_FMT_18BIT; - linelength = (ch->lcd_cfg.xres * 18 + 7) / 8; + linelength = (ch->lcd_cfg[0].xres * 18 + 7) / 8; yuv = false; break; case MIPI_YUYV: pctype = 4; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR16; pixfmt = MIPI_DCS_PIXEL_FMT_16BIT; - linelength = ch->lcd_cfg.xres * 2; + linelength = ch->lcd_cfg[0].xres * 2; yuv = true; break; case MIPI_UYVY: pctype = 5; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR16; pixfmt = MIPI_DCS_PIXEL_FMT_16BIT; - linelength = ch->lcd_cfg.xres * 2; + linelength = ch->lcd_cfg[0].xres * 2; yuv = true; break; case MIPI_YUV420_L: pctype = 6; datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR12; pixfmt = MIPI_DCS_PIXEL_FMT_12BIT; - linelength = (ch->lcd_cfg.xres * 12 + 7) / 8; + linelength = (ch->lcd_cfg[0].xres * 12 + 7) / 8; yuv = true; break; case MIPI_YUV420: @@ -207,7 +211,7 @@ static int __init sh_mipi_setup(struct sh_mipi *mipi, datatype = MIPI_DSI_PACKED_PIXEL_STREAM_YCBCR12; pixfmt = MIPI_DCS_PIXEL_FMT_12BIT; /* Length of U/V line */ - linelength = (ch->lcd_cfg.xres + 1) / 2; + linelength = (ch->lcd_cfg[0].xres + 1) / 2; yuv = true; break; default: @@ -281,7 +285,7 @@ static int __init sh_mipi_setup(struct sh_mipi *mipi, iowrite32(0x00e00000, base + 0x8024); /* VMCTR2 */ /* * 0x660 = 1632 bytes per line (RGB24, 544 pixels: see - * sh_mobile_lcdc_info.ch[0].lcd_cfg.xres), HSALEN = 1 - default + * sh_mobile_lcdc_info.ch[0].lcd_cfg[0].xres), HSALEN = 1 - default * (unused, since VMCTR2[HSABM] = 0) */ iowrite32(1 | (linelength << 16), base + 0x8028); /* VMLEN1 */ diff --git a/drivers/video/sh_mobile_hdmi.c b/drivers/video/sh_mobile_hdmi.c index a8117c33e75f..6608429c0fbb 100644 --- a/drivers/video/sh_mobile_hdmi.c +++ b/drivers/video/sh_mobile_hdmi.c @@ -777,7 +777,8 @@ static int __init sh_hdmi_probe(struct platform_device *pdev) goto egetclk; } - rate = PICOS2KHZ(pdata->lcd_chan->lcd_cfg.pixclock) * 1000; + /* TODO: reconfigure the clock on monitor plug in */ + rate = PICOS2KHZ(pdata->lcd_chan->lcd_cfg[0].pixclock) * 1000; rate = clk_round_rate(hdmi->hdmi_clk, rate); if (rate < 0) { diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c index e4f6012fe4e3..ce3ed4bc991f 100644 --- a/drivers/video/sh_mobile_lcdcfb.c +++ b/drivers/video/sh_mobile_lcdcfb.c @@ -989,8 +989,8 @@ static int sh_mobile_lcdc_notify(struct notifier_block *nb, int ret; /* Can we handle this display? */ - if (var->xres > ch->cfg.lcd_cfg.xres || - var->yres > ch->cfg.lcd_cfg.yres) + if (var->xres > ch->cfg.lcd_cfg[0].xres || + var->yres > ch->cfg.lcd_cfg[0].yres) return -ENOMEM; /* Add to the modelist */ @@ -1115,7 +1115,7 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev) info = ch->info; var = &info->var; - lcd_cfg = &cfg->lcd_cfg; + lcd_cfg = &cfg->lcd_cfg[0]; info->fbops = &sh_mobile_lcdc_ops; fb_videomode_to_var(var, lcd_cfg); /* Default Y virtual resolution is 2x panel size */ @@ -1187,8 +1187,8 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev) pdev->name, (ch->cfg.chan == LCDC_CHAN_MAINLCD) ? "mainlcd" : "sublcd", - (int) ch->cfg.lcd_cfg.xres, - (int) ch->cfg.lcd_cfg.yres, + (int) ch->cfg.lcd_cfg[0].xres, + (int) ch->cfg.lcd_cfg[0].yres, ch->cfg.bpp); /* deferred io mode: disable clock to save power */ diff --git a/include/video/sh_mobile_lcdc.h b/include/video/sh_mobile_lcdc.h index 55d700e8566e..19c69d788f3b 100644 --- a/include/video/sh_mobile_lcdc.h +++ b/include/video/sh_mobile_lcdc.h @@ -70,7 +70,8 @@ struct sh_mobile_lcdc_chan_cfg { int interface_type; /* selects RGBn or SYSn I/F, see above */ int clock_divider; unsigned long flags; /* LCDC_FLAGS_... */ - struct fb_videomode lcd_cfg; + const struct fb_videomode *lcd_cfg; + int num_cfg; struct sh_mobile_lcdc_lcd_size_cfg lcd_size_cfg; struct sh_mobile_lcdc_board_cfg board_cfg; struct sh_mobile_lcdc_sys_bus_cfg sys_bus_cfg; /* only for SYSn I/F */ -- cgit v1.2.3 From 6de9edd5bde0cdfea12e9948690e53ec669c3018 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 3 Sep 2010 07:20:23 +0000 Subject: fbdev: sh_mobile_hdmi: implement locking The SH-Mobile HDMI driver runs in several contexts: ISR, delayed work-queue, task context, when called from the sh_mobile_lcdc framebuffer driver. This creates ample race possibilities. Even though most these races are purely theoretical, it is better to close them. To trace fb_info validity we install a notification callback in the HDMI driver, and the only way for it to get to driver internal data is by using struct sh_mobile_lcdc_chan, therefore it had to be extracted into a separate common header. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt --- drivers/video/sh_mobile_hdmi.c | 103 +++++++++++++++++++++++++++++++++------ drivers/video/sh_mobile_lcdcfb.c | 46 ++++++----------- drivers/video/sh_mobile_lcdcfb.h | 37 ++++++++++++++ include/video/sh_mobile_lcdc.h | 2 + 4 files changed, 141 insertions(+), 47 deletions(-) create mode 100644 drivers/video/sh_mobile_lcdcfb.h (limited to 'include') diff --git a/drivers/video/sh_mobile_hdmi.c b/drivers/video/sh_mobile_hdmi.c index a8cf9a510f30..56e44fd0a3af 100644 --- a/drivers/video/sh_mobile_hdmi.c +++ b/drivers/video/sh_mobile_hdmi.c @@ -26,6 +26,8 @@ #include diff --git a/include/linux/idr.h b/include/linux/idr.h index 928ae712709f..13a801f3d028 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -81,6 +81,7 @@ struct idr { #define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC) /** + * DOC: idr sync * idr synchronization (stolen from radix-tree.h) * * idr_find() is able to be called locklessly, using RCU. The caller must diff --git a/lib/idr.c b/lib/idr.c index e35850d3004a..e15502e8b21e 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -106,7 +106,7 @@ static void idr_mark_full(struct idr_layer **pa, int id) } /** - * idr_pre_get - reserver resources for idr allocation + * idr_pre_get - reserve resources for idr allocation * @idp: idr handle * @gfp_mask: memory allocation flags * @@ -115,8 +115,8 @@ static void idr_mark_full(struct idr_layer **pa, int id) * caller should pass in GFP_KERNEL if possible. This of course requires that * no spinning locks be held. * - * If the system is REALLY out of memory this function returns 0, - * otherwise 1. + * If the system is REALLY out of memory this function returns %0, + * otherwise %1. */ int idr_pre_get(struct idr *idp, gfp_t gfp_mask) { @@ -292,12 +292,12 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) * required locks. * * If allocation from IDR's private freelist fails, idr_get_new_above() will - * return -EAGAIN. The caller should retry the idr_pre_get() call to refill + * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill * IDR's preallocation and then retry the idr_get_new_above() call. * - * If the idr is full idr_get_new_above() will return -ENOSPC. + * If the idr is full idr_get_new_above() will return %-ENOSPC. * - * @id returns a value in the range @starting_id ... 0x7fffffff + * @id returns a value in the range @starting_id ... %0x7fffffff */ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) { @@ -322,12 +322,12 @@ EXPORT_SYMBOL(idr_get_new_above); * @id: pointer to the allocated handle * * If allocation from IDR's private freelist fails, idr_get_new_above() will - * return -EAGAIN. The caller should retry the idr_pre_get() call to refill + * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill * IDR's preallocation and then retry the idr_get_new_above() call. * - * If the idr is full idr_get_new_above() will return -ENOSPC. + * If the idr is full idr_get_new_above() will return %-ENOSPC. * - * @id returns a value in the range 0 ... 0x7fffffff + * @id returns a value in the range %0 ... %0x7fffffff */ int idr_get_new(struct idr *idp, void *ptr, int *id) { @@ -390,7 +390,7 @@ static void sub_remove(struct idr *idp, int shift, int id) } /** - * idr_remove - remove the given id and free it's slot + * idr_remove - remove the given id and free its slot * @idp: idr handle * @id: unique key */ @@ -439,7 +439,7 @@ EXPORT_SYMBOL(idr_remove); * function will remove all id mappings and leave all idp_layers * unused. * - * A typical clean-up sequence for objects stored in an idr tree, will + * A typical clean-up sequence for objects stored in an idr tree will * use idr_for_each() to free all objects, if necessay, then * idr_remove_all() to remove all ids, and idr_destroy() to free * up the cached idr_layers. @@ -544,7 +544,7 @@ EXPORT_SYMBOL(idr_find); * not allowed. * * We check the return of @fn each time. If it returns anything other - * than 0, we break out and return that value. + * than %0, we break out and return that value. * * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove(). */ @@ -639,8 +639,8 @@ EXPORT_SYMBOL(idr_get_next); * @id: lookup key * * Replace the pointer registered with an id and return the old value. - * A -ENOENT return indicates that @id was not found. - * A -EINVAL return indicates that @id was not within valid constraints. + * A %-ENOENT return indicates that @id was not found. + * A %-EINVAL return indicates that @id was not within valid constraints. * * The caller must serialize with writers. */ @@ -698,10 +698,11 @@ void idr_init(struct idr *idp) EXPORT_SYMBOL(idr_init); -/* +/** + * DOC: IDA description * IDA - IDR based ID allocator * - * this is id allocator without id -> pointer translation. Memory + * This is id allocator without id -> pointer translation. Memory * usage is much lower than full blown idr because each id only * occupies a bit. ida uses a custom leaf node which contains * IDA_BITMAP_BITS slots. @@ -734,8 +735,8 @@ static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap) * following function. It preallocates enough memory to satisfy the * worst possible allocation. * - * If the system is REALLY out of memory this function returns 0, - * otherwise 1. + * If the system is REALLY out of memory this function returns %0, + * otherwise %1. */ int ida_pre_get(struct ida *ida, gfp_t gfp_mask) { @@ -767,11 +768,11 @@ EXPORT_SYMBOL(ida_pre_get); * Allocate new ID above or equal to @ida. It should be called with * any required locks. * - * If memory is required, it will return -EAGAIN, you should unlock + * If memory is required, it will return %-EAGAIN, you should unlock * and go back to the ida_pre_get() call. If the ida is full, it will - * return -ENOSPC. + * return %-ENOSPC. * - * @p_id returns a value in the range @starting_id ... 0x7fffffff. + * @p_id returns a value in the range @starting_id ... %0x7fffffff. */ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) { @@ -853,11 +854,11 @@ EXPORT_SYMBOL(ida_get_new_above); * * Allocate new ID. It should be called with any required locks. * - * If memory is required, it will return -EAGAIN, you should unlock + * If memory is required, it will return %-EAGAIN, you should unlock * and go back to the idr_pre_get() call. If the idr is full, it will - * return -ENOSPC. + * return %-ENOSPC. * - * @id returns a value in the range 0 ... 0x7fffffff. + * @id returns a value in the range %0 ... %0x7fffffff. */ int ida_get_new(struct ida *ida, int *p_id) { -- cgit v1.2.3 From 47f19a0814e80e1d4e5c17d61b70fca85ea09162 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Oct 2010 17:41:17 +0200 Subject: percpu: Remove the multi-page alignment facility [DECLARE|DEFINE]_PER_CPU_MULTIPAGE_ALIGNED never really worked because the head percpu section was only page aligned. Now that the last user is gone (32-bit IRQ stacks), remove the generic percpu facility. Cc: Brian Gerst Acked-by: Tejun Heo Acked-by: Linus Torvalds LKML-Reference: <1288158182-1753-1-git-send-email-brgerst@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/percpu-defs.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 018db9a62ffe..27ef6b190ea6 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -147,18 +147,6 @@ #define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "..readmostly") -/* - * Declaration/definition used for large per-CPU variables that must be - * aligned to something larger than the pagesize. - */ -#define DECLARE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size) \ - DECLARE_PER_CPU_SECTION(type, name, "..page_aligned") \ - __aligned(size) - -#define DEFINE_PER_CPU_MULTIPAGE_ALIGNED(type, name, size) \ - DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \ - __aligned(size) - /* * Intermodule exports for per-CPU variables. sparse forgets about * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to -- cgit v1.2.3 From 3a5f65df5a0fcbaa35e5417c0420d691fee4ac56 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Oct 2010 17:28:36 +0100 Subject: Typedef SMP call function pointer Typedef the pointer to the function to be called by smp_call_function() and friends: typedef void (*smp_call_func_t)(void *info); as it is used in a fair number of places. Signed-off-by: David Howells cc: linux-arch@vger.kernel.org --- include/linux/smp.h | 19 ++++++++++--------- kernel/smp.c | 8 ++++---- 2 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index cfa2d20e35f1..6dc95cac6b3d 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -13,9 +13,10 @@ extern void cpu_idle(void); +typedef void (*smp_call_func_t)(void *info); struct call_single_data { struct list_head list; - void (*func) (void *info); + smp_call_func_t func; void *info; u16 flags; u16 priv; @@ -24,8 +25,8 @@ struct call_single_data { /* total number of cpus in this system (may exceed NR_CPUS) */ extern unsigned int total_cpus; -int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, - int wait); +int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, + int wait); #ifdef CONFIG_SMP @@ -69,15 +70,15 @@ extern void smp_cpus_done(unsigned int max_cpus); /* * Call a function on all other processors */ -int smp_call_function(void(*func)(void *info), void *info, int wait); +int smp_call_function(smp_call_func_t func, void *info, int wait); void smp_call_function_many(const struct cpumask *mask, - void (*func)(void *info), void *info, bool wait); + smp_call_func_t func, void *info, bool wait); void __smp_call_function_single(int cpuid, struct call_single_data *data, int wait); int smp_call_function_any(const struct cpumask *mask, - void (*func)(void *info), void *info, int wait); + smp_call_func_t func, void *info, int wait); /* * Generic and arch helpers @@ -94,7 +95,7 @@ void ipi_call_unlock_irq(void); /* * Call a function on all processors */ -int on_each_cpu(void (*func) (void *info), void *info, int wait); +int on_each_cpu(smp_call_func_t func, void *info, int wait); #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ #define MSG_ALL 0x8001 @@ -122,7 +123,7 @@ static inline void smp_send_stop(void) { } * These macros fold the SMP functionality into a single CPU system */ #define raw_smp_processor_id() 0 -static inline int up_smp_call_function(void (*func)(void *), void *info) +static inline int up_smp_call_function(smp_call_func_t func, void *info) { return 0; } @@ -143,7 +144,7 @@ static inline void smp_send_reschedule(int cpu) { } static inline void init_call_single_data(void) { } static inline int -smp_call_function_any(const struct cpumask *mask, void (*func)(void *info), +smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, void *info, int wait) { return smp_call_function_single(0, func, info, wait); diff --git a/kernel/smp.c b/kernel/smp.c index ed6aacfcb7ef..12ed8b013e2d 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -267,7 +267,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); * * Returns 0 on success, else a negative status code. */ -int smp_call_function_single(int cpu, void (*func) (void *info), void *info, +int smp_call_function_single(int cpu, smp_call_func_t func, void *info, int wait) { struct call_single_data d = { @@ -336,7 +336,7 @@ EXPORT_SYMBOL(smp_call_function_single); * 3) any other online cpu in @mask */ int smp_call_function_any(const struct cpumask *mask, - void (*func)(void *info), void *info, int wait) + smp_call_func_t func, void *info, int wait) { unsigned int cpu; const struct cpumask *nodemask; @@ -416,7 +416,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, * must be disabled when calling this function. */ void smp_call_function_many(const struct cpumask *mask, - void (*func)(void *), void *info, bool wait) + smp_call_func_t func, void *info, bool wait) { struct call_function_data *data; unsigned long flags; @@ -500,7 +500,7 @@ EXPORT_SYMBOL(smp_call_function_many); * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ -int smp_call_function(void (*func)(void *), void *info, int wait) +int smp_call_function(smp_call_func_t func, void *info, int wait) { preempt_disable(); smp_call_function_many(cpu_online_mask, func, info, wait); -- cgit v1.2.3 From e28c31a96b1570f17731b18e8efabb7308d0c22c Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Wed, 27 Oct 2010 17:55:04 +0100 Subject: xen: register xen pci notifier Register a pci notifier to add (or remove) pci devices to Xen via hypercalls. Xen needs to know the pci devices present in the system to handle pci passthrough and even MSI remapping in the initial domain. Signed-off-by: Weidong Han Signed-off-by: Qing He Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Stefano Stabellini --- drivers/xen/Makefile | 1 + drivers/xen/pci.c | 117 ++++++++++++++++++++++++++++++++++++++++ include/xen/interface/physdev.h | 21 ++++++++ 3 files changed, 139 insertions(+) create mode 100644 drivers/xen/pci.c (limited to 'include') diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index b97864551718..eb8a78d77d9d 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -13,3 +13,4 @@ obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o +obj-$(CONFIG_XEN_DOM0) += pci.o diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c new file mode 100644 index 000000000000..cef4bafc07dc --- /dev/null +++ b/drivers/xen/pci.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2009, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Weidong Han + */ + +#include +#include +#include +#include + +#include +#include +#include "../pci/pci.h" + +static int xen_add_device(struct device *dev) +{ + int r; + struct pci_dev *pci_dev = to_pci_dev(dev); + +#ifdef CONFIG_PCI_IOV + if (pci_dev->is_virtfn) { + struct physdev_manage_pci_ext manage_pci_ext = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn, + .is_virtfn = 1, + .physfn.bus = pci_dev->physfn->bus->number, + .physfn.devfn = pci_dev->physfn->devfn, + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, + &manage_pci_ext); + } else +#endif + if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { + struct physdev_manage_pci_ext manage_pci_ext = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn, + .is_extfn = 1, + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, + &manage_pci_ext); + } else { + struct physdev_manage_pci manage_pci = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn, + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, + &manage_pci); + } + + return r; +} + +static int xen_remove_device(struct device *dev) +{ + int r; + struct pci_dev *pci_dev = to_pci_dev(dev); + struct physdev_manage_pci manage_pci; + + manage_pci.bus = pci_dev->bus->number; + manage_pci.devfn = pci_dev->devfn; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, + &manage_pci); + + return r; +} + +static int xen_pci_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + int r = 0; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + r = xen_add_device(dev); + break; + case BUS_NOTIFY_DEL_DEVICE: + r = xen_remove_device(dev); + break; + default: + break; + } + + return r; +} + +struct notifier_block device_nb = { + .notifier_call = xen_pci_notifier, +}; + +static int __init register_xen_pci_notifier(void) +{ + if (!xen_initial_domain()) + return 0; + + return bus_register_notifier(&pci_bus_type, &device_nb); +} + +arch_initcall(register_xen_pci_notifier); diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index a85d76c2e360..2b2c66c3df00 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -136,6 +136,27 @@ struct physdev_unmap_pirq { int pirq; }; +#define PHYSDEVOP_manage_pci_add 15 +#define PHYSDEVOP_manage_pci_remove 16 +struct physdev_manage_pci { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; + +#define PHYSDEVOP_manage_pci_add_ext 20 +struct physdev_manage_pci_ext { + /* IN */ + uint8_t bus; + uint8_t devfn; + unsigned is_extfn; + unsigned is_virtfn; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +}; + /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. -- cgit v1.2.3 From 1c31720a74e19bb57f301350a3b03210fa2ba9e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2010 21:02:07 +0000 Subject: ipv4: add __rcu annotations to routes.c Add __rcu annotations to : (struct dst_entry)->rt_next (struct rt_hash_bucket)->chain And use appropriate rcu primitives to reduce sparse warnings if CONFIG_SPARSE_RCU_POINTER=y Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- net/ipv4/route.c | 75 ++++++++++++++++++++++++++++++++++--------------------- 2 files changed, 47 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index a217c838ec0d..ffe9cb719c0e 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -95,7 +95,7 @@ struct dst_entry { unsigned long lastuse; union { struct dst_entry *next; - struct rtable *rt_next; + struct rtable __rcu *rt_next; struct rt6_info *rt6_next; struct dn_route *dn_next; }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d6cb2bfcd8e1..987bf9adb318 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -198,7 +198,7 @@ const __u8 ip_tos2prio[16] = { */ struct rt_hash_bucket { - struct rtable *chain; + struct rtable __rcu *chain; }; #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ @@ -280,7 +280,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) struct rtable *r = NULL; for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { - if (!rt_hash_table[st->bucket].chain) + if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)) continue; rcu_read_lock_bh(); r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); @@ -300,17 +300,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, { struct rt_cache_iter_state *st = seq->private; - r = r->dst.rt_next; + r = rcu_dereference_bh(r->dst.rt_next); while (!r) { rcu_read_unlock_bh(); do { if (--st->bucket < 0) return NULL; - } while (!rt_hash_table[st->bucket].chain); + } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)); rcu_read_lock_bh(); - r = rt_hash_table[st->bucket].chain; + r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); } - return rcu_dereference_bh(r); + return r; } static struct rtable *rt_cache_get_next(struct seq_file *seq, @@ -721,19 +721,23 @@ static void rt_do_flush(int process_context) for (i = 0; i <= rt_hash_mask; i++) { if (process_context && need_resched()) cond_resched(); - rth = rt_hash_table[i].chain; + rth = rcu_dereference_raw(rt_hash_table[i].chain); if (!rth) continue; spin_lock_bh(rt_hash_lock_addr(i)); #ifdef CONFIG_NET_NS { - struct rtable ** prev, * p; + struct rtable __rcu **prev; + struct rtable *p; - rth = rt_hash_table[i].chain; + rth = rcu_dereference_protected(rt_hash_table[i].chain, + lockdep_is_held(rt_hash_lock_addr(i))); /* defer releasing the head of the list after spin_unlock */ - for (tail = rth; tail; tail = tail->dst.rt_next) + for (tail = rth; tail; + tail = rcu_dereference_protected(tail->dst.rt_next, + lockdep_is_held(rt_hash_lock_addr(i)))) if (!rt_is_expired(tail)) break; if (rth != tail) @@ -741,8 +745,12 @@ static void rt_do_flush(int process_context) /* call rt_free on entries after the tail requiring flush */ prev = &rt_hash_table[i].chain; - for (p = *prev; p; p = next) { - next = p->dst.rt_next; + for (p = rcu_dereference_protected(*prev, + lockdep_is_held(rt_hash_lock_addr(i))); + p != NULL; + p = next) { + next = rcu_dereference_protected(p->dst.rt_next, + lockdep_is_held(rt_hash_lock_addr(i))); if (!rt_is_expired(p)) { prev = &p->dst.rt_next; } else { @@ -752,14 +760,15 @@ static void rt_do_flush(int process_context) } } #else - rth = rt_hash_table[i].chain; - rt_hash_table[i].chain = NULL; + rth = rcu_dereference_protected(rt_hash_table[i].chain, + lockdep_is_held(rt_hash_lock_addr(i))); + rcu_assign_pointer(rt_hash_table[i].chain, NULL); tail = NULL; #endif spin_unlock_bh(rt_hash_lock_addr(i)); for (; rth != tail; rth = next) { - next = rth->dst.rt_next; + next = rcu_dereference_protected(rth->dst.rt_next, 1); rt_free(rth); } } @@ -790,7 +799,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) while (aux != rth) { if (compare_hash_inputs(&aux->fl, &rth->fl)) return 0; - aux = aux->dst.rt_next; + aux = rcu_dereference_protected(aux->dst.rt_next, 1); } return ONE; } @@ -799,7 +808,8 @@ static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; - struct rtable *rth, **rthp; + struct rtable *rth; + struct rtable __rcu **rthp; unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; unsigned long delta; @@ -825,11 +835,12 @@ static void rt_check_expire(void) samples++; - if (*rthp == NULL) + if (rcu_dereference_raw(*rthp) == NULL) continue; length = 0; spin_lock_bh(rt_hash_lock_addr(i)); - while ((rth = *rthp) != NULL) { + while ((rth = rcu_dereference_protected(*rthp, + lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { prefetch(rth->dst.rt_next); if (rt_is_expired(rth)) { *rthp = rth->dst.rt_next; @@ -941,7 +952,8 @@ static int rt_garbage_collect(struct dst_ops *ops) static unsigned long last_gc; static int rover; static int equilibrium; - struct rtable *rth, **rthp; + struct rtable *rth; + struct rtable __rcu **rthp; unsigned long now = jiffies; int goal; int entries = dst_entries_get_fast(&ipv4_dst_ops); @@ -995,7 +1007,8 @@ static int rt_garbage_collect(struct dst_ops *ops) k = (k + 1) & rt_hash_mask; rthp = &rt_hash_table[k].chain; spin_lock_bh(rt_hash_lock_addr(k)); - while ((rth = *rthp) != NULL) { + while ((rth = rcu_dereference_protected(*rthp, + lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) { if (!rt_is_expired(rth) && !rt_may_expire(rth, tmo, expire)) { tmo >>= 1; @@ -1071,7 +1084,7 @@ static int slow_chain_length(const struct rtable *head) while (rth) { length += has_noalias(head, rth); - rth = rth->dst.rt_next; + rth = rcu_dereference_protected(rth->dst.rt_next, 1); } return length >> FRACT_BITS; } @@ -1079,9 +1092,9 @@ static int slow_chain_length(const struct rtable *head) static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp, struct sk_buff *skb, int ifindex) { - struct rtable *rth, **rthp; + struct rtable *rth, *cand; + struct rtable __rcu **rthp, **candp; unsigned long now; - struct rtable *cand, **candp; u32 min_score; int chain_length; int attempts = !in_softirq(); @@ -1128,7 +1141,8 @@ restart: rthp = &rt_hash_table[hash].chain; spin_lock_bh(rt_hash_lock_addr(hash)); - while ((rth = *rthp) != NULL) { + while ((rth = rcu_dereference_protected(*rthp, + lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { if (rt_is_expired(rth)) { *rthp = rth->dst.rt_next; rt_free(rth); @@ -1324,12 +1338,14 @@ EXPORT_SYMBOL(__ip_select_ident); static void rt_del(unsigned hash, struct rtable *rt) { - struct rtable **rthp, *aux; + struct rtable __rcu **rthp; + struct rtable *aux; rthp = &rt_hash_table[hash].chain; spin_lock_bh(rt_hash_lock_addr(hash)); ip_rt_put(rt); - while ((aux = *rthp) != NULL) { + while ((aux = rcu_dereference_protected(*rthp, + lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { if (aux == rt || rt_is_expired(aux)) { *rthp = aux->dst.rt_next; rt_free(aux); @@ -1346,7 +1362,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, { int i, k; struct in_device *in_dev = __in_dev_get_rcu(dev); - struct rtable *rth, **rthp; + struct rtable *rth; + struct rtable __rcu **rthp; __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; struct netevent_redirect netevent; @@ -1379,7 +1396,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], rt_genid(net)); - rthp=&rt_hash_table[hash].chain; + rthp = &rt_hash_table[hash].chain; while ((rth = rcu_dereference(*rthp)) != NULL) { struct rtable *rt; -- cgit v1.2.3 From e0ad61ec867fdd262804afa7a68e11fc9930c2b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2010 21:02:28 +0000 Subject: net: add __rcu annotations to protocol Add __rcu annotations to : struct net_protocol *inet_protos struct net_protocol *inet6_protos And use appropriate casts to reduce sparse warnings if CONFIG_SPARSE_RCU_POINTER=y Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/protocol.h | 4 ++-- net/ipv4/protocol.c | 8 +++++--- net/ipv6/protocol.c | 8 +++++--- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/protocol.h b/include/net/protocol.h index f1effdd3c265..dc07495bce4c 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -89,10 +89,10 @@ struct inet_protosw { #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */ #define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */ -extern const struct net_protocol *inet_protos[MAX_INET_PROTOS]; +extern const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS]; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) -extern const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS]; +extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS]; #endif extern int inet_add_protocol(const struct net_protocol *prot, unsigned char num); diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 65699c24411c..9ae5c01cd0b2 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -28,7 +28,7 @@ #include #include -const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly; +const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; /* * Add a protocol handler to the hash tables @@ -38,7 +38,8 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { int hash = protocol & (MAX_INET_PROTOS - 1); - return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1; + return !cmpxchg((const struct net_protocol **)&inet_protos[hash], + NULL, prot) ? 0 : -1; } EXPORT_SYMBOL(inet_add_protocol); @@ -50,7 +51,8 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) { int ret, hash = protocol & (MAX_INET_PROTOS - 1); - ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1; + ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash], + prot, NULL) == prot) ? 0 : -1; synchronize_net(); diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 9bb936ae2452..9a7978fdc02a 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -25,13 +25,14 @@ #include #include -const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] __read_mostly; +const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) { int hash = protocol & (MAX_INET_PROTOS - 1); - return !cmpxchg(&inet6_protos[hash], NULL, prot) ? 0 : -1; + return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash], + NULL, prot) ? 0 : -1; } EXPORT_SYMBOL(inet6_add_protocol); @@ -43,7 +44,8 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol { int ret, hash = protocol & (MAX_INET_PROTOS - 1); - ret = (cmpxchg(&inet6_protos[hash], prot, NULL) == prot) ? 0 : -1; + ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash], + prot, NULL) == prot) ? 0 : -1; synchronize_net(); -- cgit v1.2.3 From b33eab08445d86c3d0dec3111ce10df561328705 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2010 21:01:26 +0000 Subject: tunnels: add __rcu annotations Add __rcu annotations to : (struct ip_tunnel)->prl (struct ip_tunnel_prl_entry)->next (struct xfrm_tunnel)->next struct xfrm_tunnel *tunnel4_handlers struct xfrm_tunnel *tunnel64_handlers And use appropriate rcu primitives to reduce sparse warnings if CONFIG_SPARSE_RCU_POINTER=y Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ipip.h | 4 ++-- include/net/xfrm.h | 2 +- net/ipv4/tunnel4.c | 29 +++++++++++++++++++---------- 3 files changed, 22 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/ipip.h b/include/net/ipip.h index 0403fe4c4519..a32654d52730 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -34,12 +34,12 @@ struct ip_tunnel { #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd_parm ip6rd; #endif - struct ip_tunnel_prl_entry *prl; /* potential router list */ + struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ unsigned int prl_count; /* # of entries in PRL */ }; struct ip_tunnel_prl_entry { - struct ip_tunnel_prl_entry *next; + struct ip_tunnel_prl_entry __rcu *next; __be32 addr; u16 flags; struct rcu_head rcu_head; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ffcd47820a5b..bcfb6b24b019 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1264,7 +1264,7 @@ struct xfrm_tunnel { int (*handler)(struct sk_buff *skb); int (*err_handler)(struct sk_buff *skb, u32 info); - struct xfrm_tunnel *next; + struct xfrm_tunnel __rcu *next; int priority; }; diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index 9a17bd2a0a37..ac3b3ee4b07c 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -14,27 +14,32 @@ #include #include -static struct xfrm_tunnel *tunnel4_handlers __read_mostly; -static struct xfrm_tunnel *tunnel64_handlers __read_mostly; +static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly; +static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly; static DEFINE_MUTEX(tunnel4_mutex); -static inline struct xfrm_tunnel **fam_handlers(unsigned short family) +static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family) { return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers; } int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family) { - struct xfrm_tunnel **pprev; + struct xfrm_tunnel __rcu **pprev; + struct xfrm_tunnel *t; + int ret = -EEXIST; int priority = handler->priority; mutex_lock(&tunnel4_mutex); - for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) { - if ((*pprev)->priority > priority) + for (pprev = fam_handlers(family); + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&tunnel4_mutex))) != NULL; + pprev = &t->next) { + if (t->priority > priority) break; - if ((*pprev)->priority == priority) + if (t->priority == priority) goto err; } @@ -52,13 +57,17 @@ EXPORT_SYMBOL(xfrm4_tunnel_register); int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) { - struct xfrm_tunnel **pprev; + struct xfrm_tunnel __rcu **pprev; + struct xfrm_tunnel *t; int ret = -ENOENT; mutex_lock(&tunnel4_mutex); - for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) { - if (*pprev == handler) { + for (pprev = fam_handlers(family); + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&tunnel4_mutex))) != NULL; + pprev = &t->next) { + if (t == handler) { *pprev = handler->next; ret = 0; break; -- cgit v1.2.3 From 7a2b03c5175e9ddcc2a2d48ca86dea8a88b68383 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Oct 2010 09:24:55 +0000 Subject: fib_rules: __rcu annotates ctarget Adds __rcu annotation to (struct fib_rule)->ctarget Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/fib_rules.h | 2 +- net/core/fib_rules.c | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 106f3097d384..075f1e3a0fed 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -20,7 +20,7 @@ struct fib_rule { u32 table; u8 action; u32 target; - struct fib_rule * ctarget; + struct fib_rule __rcu *ctarget; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; struct rcu_head rcu; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 12b43cc2f889..82a4369ae150 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -351,12 +351,12 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) list_for_each_entry(r, &ops->rules_list, list) { if (r->pref == rule->target) { - rule->ctarget = r; + RCU_INIT_POINTER(rule->ctarget, r); break; } } - if (rule->ctarget == NULL) + if (rcu_dereference_protected(rule->ctarget, 1) == NULL) unresolved = 1; } else if (rule->action == FR_ACT_GOTO) goto errout_free; @@ -386,7 +386,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) list_for_each_entry(r, &ops->rules_list, list) { if (r->action == FR_ACT_GOTO && r->target == rule->pref) { - BUG_ON(r->ctarget != NULL); + BUG_ON(rtnl_dereference(r->ctarget) != NULL); rcu_assign_pointer(r->ctarget, rule); if (--ops->unresolved_rules == 0) break; @@ -487,7 +487,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) */ if (ops->nr_goto_rules > 0) { list_for_each_entry(tmp, &ops->rules_list, list) { - if (tmp->ctarget == rule) { + if (rtnl_dereference(tmp->ctarget) == rule) { rcu_assign_pointer(tmp->ctarget, NULL); ops->unresolved_rules++; } @@ -545,7 +545,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh->action = rule->action; frh->flags = rule->flags; - if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL) + if (rule->action == FR_ACT_GOTO && + rcu_dereference_raw(rule->ctarget) == NULL) frh->flags |= FIB_RULE_UNRESOLVED; if (rule->iifname[0]) { -- cgit v1.2.3 From b914c4ea929a4ba6fb97967800dc473c31552b98 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Oct 2010 23:55:38 +0000 Subject: inetpeer: __rcu annotations Adds __rcu annotations to inetpeer (struct inet_peer)->avl_left (struct inet_peer)->avl_right This is a tedious cleanup, but removes one smp_wmb() from link_to_pool() since we now use more self documenting rcu_assign_pointer(). Note the use of RCU_INIT_POINTER() instead of rcu_assign_pointer() in all cases we dont need a memory barrier. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inetpeer.h | 2 +- net/ipv4/inetpeer.c | 138 ++++++++++++++++++++++++++++--------------------- 2 files changed, 81 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 417d0c894f29..fe239bfe5f7f 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -15,7 +15,7 @@ struct inet_peer { /* group together avl_left,avl_right,v4daddr to speedup lookups */ - struct inet_peer *avl_left, *avl_right; + struct inet_peer __rcu *avl_left, *avl_right; __be32 v4daddr; /* peer's address */ __u32 avl_height; struct list_head unused; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 9ffa24b9a804..9e94d7cf4f8a 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -72,18 +72,19 @@ static struct kmem_cache *peer_cachep __read_mostly; #define node_height(x) x->avl_height #define peer_avl_empty ((struct inet_peer *)&peer_fake_node) +#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node) static const struct inet_peer peer_fake_node = { - .avl_left = peer_avl_empty, - .avl_right = peer_avl_empty, + .avl_left = peer_avl_empty_rcu, + .avl_right = peer_avl_empty_rcu, .avl_height = 0 }; static struct { - struct inet_peer *root; + struct inet_peer __rcu *root; spinlock_t lock; int total; } peers = { - .root = peer_avl_empty, + .root = peer_avl_empty_rcu, .lock = __SPIN_LOCK_UNLOCKED(peers.lock), .total = 0, }; @@ -156,11 +157,14 @@ static void unlink_from_unused(struct inet_peer *p) */ #define lookup(_daddr, _stack) \ ({ \ - struct inet_peer *u, **v; \ + struct inet_peer *u; \ + struct inet_peer __rcu **v; \ \ stackptr = _stack; \ *stackptr++ = &peers.root; \ - for (u = peers.root; u != peer_avl_empty; ) { \ + for (u = rcu_dereference_protected(peers.root, \ + lockdep_is_held(&peers.lock)); \ + u != peer_avl_empty; ) { \ if (_daddr == u->v4daddr) \ break; \ if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ @@ -168,7 +172,8 @@ static void unlink_from_unused(struct inet_peer *p) else \ v = &u->avl_right; \ *stackptr++ = v; \ - u = *v; \ + u = rcu_dereference_protected(*v, \ + lockdep_is_held(&peers.lock)); \ } \ u; \ }) @@ -209,13 +214,17 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) /* Called with local BH disabled and the pool lock held. */ #define lookup_rightempty(start) \ ({ \ - struct inet_peer *u, **v; \ + struct inet_peer *u; \ + struct inet_peer __rcu **v; \ *stackptr++ = &start->avl_left; \ v = &start->avl_left; \ - for (u = *v; u->avl_right != peer_avl_empty; ) { \ + for (u = rcu_dereference_protected(*v, \ + lockdep_is_held(&peers.lock)); \ + u->avl_right != peer_avl_empty_rcu; ) { \ v = &u->avl_right; \ *stackptr++ = v; \ - u = *v; \ + u = rcu_dereference_protected(*v, \ + lockdep_is_held(&peers.lock)); \ } \ u; \ }) @@ -224,74 +233,86 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) * Variable names are the proof of operation correctness. * Look into mm/map_avl.c for more detail description of the ideas. */ -static void peer_avl_rebalance(struct inet_peer **stack[], - struct inet_peer ***stackend) +static void peer_avl_rebalance(struct inet_peer __rcu **stack[], + struct inet_peer __rcu ***stackend) { - struct inet_peer **nodep, *node, *l, *r; + struct inet_peer __rcu **nodep; + struct inet_peer *node, *l, *r; int lh, rh; while (stackend > stack) { nodep = *--stackend; - node = *nodep; - l = node->avl_left; - r = node->avl_right; + node = rcu_dereference_protected(*nodep, + lockdep_is_held(&peers.lock)); + l = rcu_dereference_protected(node->avl_left, + lockdep_is_held(&peers.lock)); + r = rcu_dereference_protected(node->avl_right, + lockdep_is_held(&peers.lock)); lh = node_height(l); rh = node_height(r); if (lh > rh + 1) { /* l: RH+2 */ struct inet_peer *ll, *lr, *lrl, *lrr; int lrh; - ll = l->avl_left; - lr = l->avl_right; + ll = rcu_dereference_protected(l->avl_left, + lockdep_is_held(&peers.lock)); + lr = rcu_dereference_protected(l->avl_right, + lockdep_is_held(&peers.lock)); lrh = node_height(lr); if (lrh <= node_height(ll)) { /* ll: RH+1 */ - node->avl_left = lr; /* lr: RH or RH+1 */ - node->avl_right = r; /* r: RH */ + RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ + RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ node->avl_height = lrh + 1; /* RH+1 or RH+2 */ - l->avl_left = ll; /* ll: RH+1 */ - l->avl_right = node; /* node: RH+1 or RH+2 */ + RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */ + RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */ l->avl_height = node->avl_height + 1; - *nodep = l; + RCU_INIT_POINTER(*nodep, l); } else { /* ll: RH, lr: RH+1 */ - lrl = lr->avl_left; /* lrl: RH or RH-1 */ - lrr = lr->avl_right; /* lrr: RH or RH-1 */ - node->avl_left = lrr; /* lrr: RH or RH-1 */ - node->avl_right = r; /* r: RH */ + lrl = rcu_dereference_protected(lr->avl_left, + lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */ + lrr = rcu_dereference_protected(lr->avl_right, + lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */ + RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ + RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ node->avl_height = rh + 1; /* node: RH+1 */ - l->avl_left = ll; /* ll: RH */ - l->avl_right = lrl; /* lrl: RH or RH-1 */ + RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */ + RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */ l->avl_height = rh + 1; /* l: RH+1 */ - lr->avl_left = l; /* l: RH+1 */ - lr->avl_right = node; /* node: RH+1 */ + RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */ + RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */ lr->avl_height = rh + 2; - *nodep = lr; + RCU_INIT_POINTER(*nodep, lr); } } else if (rh > lh + 1) { /* r: LH+2 */ struct inet_peer *rr, *rl, *rlr, *rll; int rlh; - rr = r->avl_right; - rl = r->avl_left; + rr = rcu_dereference_protected(r->avl_right, + lockdep_is_held(&peers.lock)); + rl = rcu_dereference_protected(r->avl_left, + lockdep_is_held(&peers.lock)); rlh = node_height(rl); if (rlh <= node_height(rr)) { /* rr: LH+1 */ - node->avl_right = rl; /* rl: LH or LH+1 */ - node->avl_left = l; /* l: LH */ + RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ + RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ node->avl_height = rlh + 1; /* LH+1 or LH+2 */ - r->avl_right = rr; /* rr: LH+1 */ - r->avl_left = node; /* node: LH+1 or LH+2 */ + RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */ + RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */ r->avl_height = node->avl_height + 1; - *nodep = r; + RCU_INIT_POINTER(*nodep, r); } else { /* rr: RH, rl: RH+1 */ - rlr = rl->avl_right; /* rlr: LH or LH-1 */ - rll = rl->avl_left; /* rll: LH or LH-1 */ - node->avl_right = rll; /* rll: LH or LH-1 */ - node->avl_left = l; /* l: LH */ + rlr = rcu_dereference_protected(rl->avl_right, + lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */ + rll = rcu_dereference_protected(rl->avl_left, + lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */ + RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ + RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ node->avl_height = lh + 1; /* node: LH+1 */ - r->avl_right = rr; /* rr: LH */ - r->avl_left = rlr; /* rlr: LH or LH-1 */ + RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */ + RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */ r->avl_height = lh + 1; /* r: LH+1 */ - rl->avl_right = r; /* r: LH+1 */ - rl->avl_left = node; /* node: LH+1 */ + RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */ + RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */ rl->avl_height = lh + 2; - *nodep = rl; + RCU_INIT_POINTER(*nodep, rl); } } else { node->avl_height = (lh > rh ? lh : rh) + 1; @@ -303,10 +324,10 @@ static void peer_avl_rebalance(struct inet_peer **stack[], #define link_to_pool(n) \ do { \ n->avl_height = 1; \ - n->avl_left = peer_avl_empty; \ - n->avl_right = peer_avl_empty; \ - smp_wmb(); /* lockless readers can catch us now */ \ - **--stackptr = n; \ + n->avl_left = peer_avl_empty_rcu; \ + n->avl_right = peer_avl_empty_rcu; \ + /* lockless readers can catch us now */ \ + rcu_assign_pointer(**--stackptr, n); \ peer_avl_rebalance(stack, stackptr); \ } while (0) @@ -330,24 +351,25 @@ static void unlink_from_pool(struct inet_peer *p) * We use refcnt=-1 to alert lockless readers this entry is deleted. */ if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { - struct inet_peer **stack[PEER_MAXDEPTH]; - struct inet_peer ***stackptr, ***delp; + struct inet_peer __rcu **stack[PEER_MAXDEPTH]; + struct inet_peer __rcu ***stackptr, ***delp; if (lookup(p->v4daddr, stack) != p) BUG(); delp = stackptr - 1; /* *delp[0] == p */ - if (p->avl_left == peer_avl_empty) { + if (p->avl_left == peer_avl_empty_rcu) { *delp[0] = p->avl_right; --stackptr; } else { /* look for a node to insert instead of p */ struct inet_peer *t; t = lookup_rightempty(p); - BUG_ON(*stackptr[-1] != t); + BUG_ON(rcu_dereference_protected(*stackptr[-1], + lockdep_is_held(&peers.lock)) != t); **--stackptr = t->avl_left; /* t is removed, t->v4daddr > x->v4daddr for any * x in p->avl_left subtree. * Put t in the old place of p. */ - *delp[0] = t; + RCU_INIT_POINTER(*delp[0], t); t->avl_left = p->avl_left; t->avl_right = p->avl_right; t->avl_height = p->avl_height; @@ -414,7 +436,7 @@ static int cleanup_once(unsigned long ttl) struct inet_peer *inet_getpeer(__be32 daddr, int create) { struct inet_peer *p; - struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; + struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; /* Look up for the address quickly, lockless. * Because of a concurrent writer, we might not find an existing entry. -- cgit v1.2.3 From a10c02036f82b1fa30d69a62f7c7d9a927e8adbc Mon Sep 17 00:00:00 2001 From: Amarnath Revanna Date: Wed, 27 Oct 2010 08:34:39 +0000 Subject: caif-u5500: Adding shared memory include Signed-off-by: Sjur Braendeland Signed-off-by: David S. Miller --- include/net/caif/caif_shm.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 include/net/caif/caif_shm.h (limited to 'include') diff --git a/include/net/caif/caif_shm.h b/include/net/caif/caif_shm.h new file mode 100644 index 000000000000..5bcce55438cf --- /dev/null +++ b/include/net/caif/caif_shm.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) ST-Ericsson AB 2010 + * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com + * Author: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com + * License terms: GNU General Public License (GPL) version 2 + */ + +#ifndef CAIF_SHM_H_ +#define CAIF_SHM_H_ + +struct shmdev_layer { + u32 shm_base_addr; + u32 shm_total_sz; + u32 shm_id; + u32 shm_loopback; + void *hmbx; + int (*pshmdev_mbxsend) (u32 shm_id, u32 mbx_msg); + int (*pshmdev_mbxsetup) (void *pshmdrv_cb, + struct shmdev_layer *pshm_dev, void *pshm_drv); + struct net_device *pshm_netdev; +}; + +extern int caif_shmcore_probe(struct shmdev_layer *pshm_dev); +extern void caif_shmcore_remove(struct net_device *pshm_netdev); + +#endif -- cgit v1.2.3 From c5b1f0d92c36851aca09ac6c7c0c4f9690ac14f3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 27 Oct 2010 15:46:08 +0200 Subject: locks/nfsd: allocate file lock outside of spinlock As suggested by Christoph Hellwig, this moves allocation of new file locks out of generic_setlease into the callers, nfs4_open_delegation and fcntl_setlease in order to allow GFP_KERNEL allocations when lock_flocks has become a spinlock. Signed-off-by: Arnd Bergmann Acked-by: J. Bruce Fields --- fs/locks.c | 36 ++++++++++++------------------------ fs/nfsd/nfs4state.c | 26 +++++++++++++++----------- include/linux/fs.h | 1 + 3 files changed, 28 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/fs/locks.c b/fs/locks.c index 8b2b6ad56a09..0391d2ff5a4e 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -162,10 +162,11 @@ EXPORT_SYMBOL_GPL(unlock_flocks); static struct kmem_cache *filelock_cache __read_mostly; /* Allocate an empty lock structure. */ -static struct file_lock *locks_alloc_lock(void) +struct file_lock *locks_alloc_lock(void) { return kmem_cache_alloc(filelock_cache, GFP_KERNEL); } +EXPORT_SYMBOL_GPL(locks_alloc_lock); void locks_release_private(struct file_lock *fl) { @@ -1365,7 +1366,6 @@ int fcntl_getlease(struct file *filp) int generic_setlease(struct file *filp, long arg, struct file_lock **flp) { struct file_lock *fl, **before, **my_before = NULL, *lease; - struct file_lock *new_fl = NULL; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; int error, rdlease_count = 0, wrlease_count = 0; @@ -1385,11 +1385,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) lease = *flp; if (arg != F_UNLCK) { - error = -ENOMEM; - new_fl = locks_alloc_lock(); - if (new_fl == NULL) - goto out; - error = -EAGAIN; if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) goto out; @@ -1434,7 +1429,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) goto out; } - error = 0; if (arg == F_UNLCK) goto out; @@ -1442,15 +1436,11 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) if (!leases_enable) goto out; - locks_copy_lock(new_fl, lease); - locks_insert_lock(before, new_fl); - - *flp = new_fl; + locks_insert_lock(before, lease); return 0; out: - if (new_fl != NULL) - locks_free_lock(new_fl); + locks_free_lock(lease); return error; } EXPORT_SYMBOL(generic_setlease); @@ -1514,26 +1504,24 @@ EXPORT_SYMBOL_GPL(vfs_setlease); */ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) { - struct file_lock fl, *flp = &fl; + struct file_lock *fl; struct inode *inode = filp->f_path.dentry->d_inode; int error; - locks_init_lock(&fl); - error = lease_init(filp, arg, &fl); - if (error) - return error; + fl = lease_alloc(filp, arg); + if (IS_ERR(fl)) + return PTR_ERR(fl); lock_flocks(); - - error = __vfs_setlease(filp, arg, &flp); + error = __vfs_setlease(filp, arg, &fl); if (error || arg == F_UNLCK) goto out_unlock; - error = fasync_helper(fd, filp, 1, &flp->fl_fasync); + error = fasync_helper(fd, filp, 1, &fl->fl_fasync); if (error < 0) { /* remove lease just inserted by setlease */ - flp->fl_type = F_UNLCK | F_INPROGRESS; - flp->fl_break_time = jiffies - 10; + fl->fl_type = F_UNLCK | F_INPROGRESS; + fl->fl_break_time = jiffies - 10; time_out_leases(inode); goto out_unlock; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9019e8ec9dc8..56347e0ac88d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2614,7 +2614,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta struct nfs4_delegation *dp; struct nfs4_stateowner *sop = stp->st_stateowner; int cb_up = atomic_read(&sop->so_client->cl_cb_set); - struct file_lock fl, *flp = &fl; + struct file_lock *fl; int status, flag = 0; flag = NFS4_OPEN_DELEGATE_NONE; @@ -2648,20 +2648,24 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta flag = NFS4_OPEN_DELEGATE_NONE; goto out; } - locks_init_lock(&fl); - fl.fl_lmops = &nfsd_lease_mng_ops; - fl.fl_flags = FL_LEASE; - fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; - fl.fl_end = OFFSET_MAX; - fl.fl_owner = (fl_owner_t)dp; - fl.fl_file = find_readable_file(stp->st_file); - BUG_ON(!fl.fl_file); - fl.fl_pid = current->tgid; + status = -ENOMEM; + fl = locks_alloc_lock(); + if (!fl) + goto out; + locks_init_lock(fl); + fl->fl_lmops = &nfsd_lease_mng_ops; + fl->fl_flags = FL_LEASE; + fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; + fl->fl_end = OFFSET_MAX; + fl->fl_owner = (fl_owner_t)dp; + fl->fl_file = find_readable_file(stp->st_file); + BUG_ON(!fl->fl_file); + fl->fl_pid = current->tgid; /* vfs_setlease checks to see if delegation should be handed out. * the lock_manager callbacks fl_mylease and fl_change are used */ - if ((status = vfs_setlease(fl.fl_file, fl.fl_type, &flp))) { + if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) { dprintk("NFSD: setlease failed [%d], no delegation\n", status); unhash_delegation(dp); flag = NFS4_OPEN_DELEGATE_NONE; diff --git a/include/linux/fs.h b/include/linux/fs.h index bb20373d0b46..8d7de08ab546 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1113,6 +1113,7 @@ extern int fcntl_getlease(struct file *filp); /* fs/locks.c */ extern void locks_init_lock(struct file_lock *); +extern struct file_lock * locks_alloc_lock(void); extern void locks_copy_lock(struct file_lock *, struct file_lock *); extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); -- cgit v1.2.3 From f7347ce4ee7c65415f84be915c018473e7076f31 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 27 Oct 2010 12:38:12 -0400 Subject: fasync: re-organize fasync entry insertion to allow it under a spinlock You currently cannot use "fasync_helper()" in an atomic environment to insert a new fasync entry, because it will need to allocate the new "struct fasync_struct". Yet fcntl_setlease() wants to call this under lock_flocks(), which is in the process of being converted from the BKL to a spinlock. In order to fix this, this abstracts out the actual fasync list insertion and the fasync allocations into functions of their own, and teaches fs/locks.c to pre-allocate the fasync_struct entry. That way the actual list insertion can happen while holding the required spinlock. Signed-off-by: Linus Torvalds [bfields@redhat.com: rebase on top of my changes to Arnd's patch] Tested-by: J. Bruce Fields Signed-off-by: Arnd Bergmann --- fs/fcntl.c | 66 +++++++++++++++++++++++++++++++++++++++++------------- fs/locks.c | 18 ++++++++++++++- include/linux/fs.h | 5 +++++ 3 files changed, 72 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/fcntl.c b/fs/fcntl.c index f8cc34f542c3..dcdbc6f5c33b 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -640,7 +640,7 @@ static void fasync_free_rcu(struct rcu_head *head) * match the state "is the filp on a fasync list". * */ -static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) +int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) { struct fasync_struct *fa, **fp; int result = 0; @@ -666,21 +666,28 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) return result; } +struct fasync_struct *fasync_alloc(void) +{ + return kmem_cache_alloc(fasync_cache, GFP_KERNEL); +} + /* - * Add a fasync entry. Return negative on error, positive if - * added, and zero if did nothing but change an existing one. - * - * NOTE! It is very important that the FASYNC flag always - * match the state "is the filp on a fasync list". + * NOTE! This can be used only for unused fasync entries: + * entries that actually got inserted on the fasync list + * need to be released by rcu - see fasync_remove_entry. */ -static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) +void fasync_free(struct fasync_struct *new) { - struct fasync_struct *new, *fa, **fp; - int result = 0; + kmem_cache_free(fasync_cache, new); +} - new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); - if (!new) - return -ENOMEM; +/* + * Insert a new entry into the fasync list. Return the pointer to the + * old one if we didn't use the new one. + */ +struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new) +{ + struct fasync_struct *fa, **fp; spin_lock(&filp->f_lock); spin_lock(&fasync_lock); @@ -691,8 +698,6 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa spin_lock_irq(&fa->fa_lock); fa->fa_fd = fd; spin_unlock_irq(&fa->fa_lock); - - kmem_cache_free(fasync_cache, new); goto out; } @@ -702,13 +707,42 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa new->fa_fd = fd; new->fa_next = *fapp; rcu_assign_pointer(*fapp, new); - result = 1; filp->f_flags |= FASYNC; out: spin_unlock(&fasync_lock); spin_unlock(&filp->f_lock); - return result; + return fa; +} + +/* + * Add a fasync entry. Return negative on error, positive if + * added, and zero if did nothing but change an existing one. + * + * NOTE! It is very important that the FASYNC flag always + * match the state "is the filp on a fasync list". + */ +static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) +{ + struct fasync_struct *new; + + new = fasync_alloc(); + if (!new) + return -ENOMEM; + + /* + * fasync_insert_entry() returns the old (update) entry if + * it existed. + * + * So free the (unused) new entry and return 0 to let the + * caller know that we didn't add any new fasync entries. + */ + if (fasync_insert_entry(fd, filp, fapp, new)) { + fasync_free(new); + return 0; + } + + return 1; } /* diff --git a/fs/locks.c b/fs/locks.c index 0391d2ff5a4e..85fd9ce1abae 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1505,6 +1505,7 @@ EXPORT_SYMBOL_GPL(vfs_setlease); int fcntl_setlease(unsigned int fd, struct file *filp, long arg) { struct file_lock *fl; + struct fasync_struct *new; struct inode *inode = filp->f_path.dentry->d_inode; int error; @@ -1512,12 +1513,25 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) if (IS_ERR(fl)) return PTR_ERR(fl); + new = fasync_alloc(); + if (!new) { + locks_free_lock(fl); + return -ENOMEM; + } lock_flocks(); error = __vfs_setlease(filp, arg, &fl); if (error || arg == F_UNLCK) goto out_unlock; - error = fasync_helper(fd, filp, 1, &fl->fl_fasync); + /* + * fasync_insert_entry() returns the old entry if any. + * If there was no old entry, then it used 'new' and + * inserted it into the fasync list. Clear new so that + * we don't release it here. + */ + if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new)) + new = NULL; + if (error < 0) { /* remove lease just inserted by setlease */ fl->fl_type = F_UNLCK | F_INPROGRESS; @@ -1529,6 +1543,8 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); out_unlock: unlock_flocks(); + if (new) + fasync_free(new); return error; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 8d7de08ab546..56285e5e1de4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1302,6 +1302,11 @@ struct fasync_struct { /* SMP safe fasync helpers: */ extern int fasync_helper(int, struct file *, int, struct fasync_struct **); +extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *); +extern int fasync_remove_entry(struct file *, struct fasync_struct **); +extern struct fasync_struct *fasync_alloc(void); +extern void fasync_free(struct fasync_struct *); + /* can be called from interrupts */ extern void kill_fasync(struct fasync_struct **, int, int); -- cgit v1.2.3 From a8e23a291852cd7c4fb5ca696dbb93912185ad10 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Oct 2010 15:32:57 -0700 Subject: mm,x86: fix kmap_atomic_push vs ioremap_32.c It appears i386 uses kmap_atomic infrastructure regardless of CONFIG_HIGHMEM which results in a compile error when highmem is disabled. Cure this by providing the needed few bits for both CONFIG_HIGHMEM and CONFIG_X86_32. Signed-off-by: Peter Zijlstra Reported-by: Chris Wilson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 46 +++++++++++++++++++++++++--------------------- mm/highmem.c | 6 +++++- 2 files changed, 30 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 8a85ec109a3a..102f76be90da 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -37,27 +37,6 @@ extern unsigned long totalhigh_pages; void kmap_flush_unused(void); -DECLARE_PER_CPU(int, __kmap_atomic_idx); - -static inline int kmap_atomic_idx_push(void) -{ - int idx = __get_cpu_var(__kmap_atomic_idx)++; -#ifdef CONFIG_DEBUG_HIGHMEM - WARN_ON_ONCE(in_irq() && !irqs_disabled()); - BUG_ON(idx > KM_TYPE_NR); -#endif - return idx; -} - -static inline int kmap_atomic_idx_pop(void) -{ - int idx = --__get_cpu_var(__kmap_atomic_idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(idx < 0); -#endif - return idx; -} - #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } @@ -95,6 +74,31 @@ static inline void __kunmap_atomic(void *addr) #endif /* CONFIG_HIGHMEM */ +#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) + +DECLARE_PER_CPU(int, __kmap_atomic_idx); + +static inline int kmap_atomic_idx_push(void) +{ + int idx = __get_cpu_var(__kmap_atomic_idx)++; +#ifdef CONFIG_DEBUG_HIGHMEM + WARN_ON_ONCE(in_irq() && !irqs_disabled()); + BUG_ON(idx > KM_TYPE_NR); +#endif + return idx; +} + +static inline int kmap_atomic_idx_pop(void) +{ + int idx = --__get_cpu_var(__kmap_atomic_idx); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(idx < 0); +#endif + return idx; +} + +#endif + /* * Make both: kmap_atomic(page, idx) and kmap_atomic(page) work. */ diff --git a/mm/highmem.c b/mm/highmem.c index 781e754a75ac..693394daa2ed 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -29,6 +29,11 @@ #include #include + +#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) +DEFINE_PER_CPU(int, __kmap_atomic_idx); +#endif + /* * Virtual_count is not a pure "count". * 0 means that it is not mapped, and has not been mapped @@ -43,7 +48,6 @@ unsigned long totalhigh_pages __read_mostly; EXPORT_SYMBOL(totalhigh_pages); -DEFINE_PER_CPU(int, __kmap_atomic_idx); EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); unsigned int nr_free_highpages (void) -- cgit v1.2.3 From 20273941f2129aa5a432796d98a276ed73d60782 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Oct 2010 15:32:58 -0700 Subject: mm: fix race in kunmap_atomic() Christoph reported a nice splat which illustrated a race in the new stack based kmap_atomic implementation. The problem is that we pop our stack slot before we're completely done resetting its state -- in particular clearing the PTE (sometimes that's CONFIG_DEBUG_HIGHMEM). If an interrupt happens before we actually clear the PTE used for the last slot, that interrupt can reuse the slot in a dirty state, which triggers a BUG in kmap_atomic(). Fix this by introducing kmap_atomic_idx() which reports the current slot index without actually releasing it and use that to find the PTE and delay the _pop() until after we're completely done. Signed-off-by: Peter Zijlstra Reported-by: Christoph Hellwig Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/highmem.c | 3 ++- arch/frv/mm/highmem.c | 3 ++- arch/mips/mm/highmem.c | 3 ++- arch/mn10300/include/asm/highmem.h | 4 +++- arch/powerpc/mm/highmem.c | 4 +++- arch/sparc/mm/highmem.c | 4 +++- arch/tile/mm/highmem.c | 3 ++- arch/x86/mm/highmem_32.c | 3 ++- arch/x86/mm/iomap_32.c | 3 ++- include/linux/highmem.h | 5 +++++ 10 files changed, 26 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index c00f119babbf..c435fd9e1da9 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -89,7 +89,7 @@ void __kunmap_atomic(void *kvaddr) int idx, type; if (kvaddr >= (void *)FIXADDR_START) { - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); idx = type + KM_TYPE_NR * smp_processor_id(); if (cache_is_vivt()) @@ -101,6 +101,7 @@ void __kunmap_atomic(void *kvaddr) #else (void) idx; /* to kill a warning */ #endif + kmap_atomic_idx_pop(); } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { /* this address was obtained through kmap_high_get() */ kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c index 61088dcc1594..fd7fcd4c2e33 100644 --- a/arch/frv/mm/highmem.c +++ b/arch/frv/mm/highmem.c @@ -68,7 +68,7 @@ EXPORT_SYMBOL(__kmap_atomic); void __kunmap_atomic(void *kvaddr) { - int type = kmap_atomic_idx_pop(); + int type = kmap_atomic_idx(); switch (type) { case 0: __kunmap_atomic_primary(4, 6); break; case 1: __kunmap_atomic_primary(5, 7); break; @@ -83,6 +83,7 @@ void __kunmap_atomic(void *kvaddr) default: BUG(); } + kmap_atomic_idx_pop(); pagefault_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index 1e69b1fb4b85..3634c7ea06ac 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -74,7 +74,7 @@ void __kunmap_atomic(void *kvaddr) return; } - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); #ifdef CONFIG_DEBUG_HIGHMEM { int idx = type + KM_TYPE_NR * smp_processor_id(); @@ -89,6 +89,7 @@ void __kunmap_atomic(void *kvaddr) local_flush_tlb_one(vaddr); } #endif + kmap_atomic_idx_pop(); pagefault_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/mn10300/include/asm/highmem.h b/arch/mn10300/include/asm/highmem.h index f577ba2268ca..e2155e686451 100644 --- a/arch/mn10300/include/asm/highmem.h +++ b/arch/mn10300/include/asm/highmem.h @@ -101,7 +101,7 @@ static inline void __kunmap_atomic(unsigned long vaddr) return; } - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); #if HIGHMEM_DEBUG { @@ -119,6 +119,8 @@ static inline void __kunmap_atomic(unsigned long vaddr) __flush_tlb_one(vaddr); } #endif + + kmap_atomic_idx_pop(); pagefault_enable(); } #endif /* __KERNEL__ */ diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index b0848b462bbc..e7450bdbe83a 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -62,7 +62,7 @@ void __kunmap_atomic(void *kvaddr) return; } - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); #ifdef CONFIG_DEBUG_HIGHMEM { @@ -79,6 +79,8 @@ void __kunmap_atomic(void *kvaddr) local_flush_tlb_page(NULL, vaddr); } #endif + + kmap_atomic_idx_pop(); pagefault_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 5e50c09b7dce..4730eac0747b 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c @@ -75,7 +75,7 @@ void __kunmap_atomic(void *kvaddr) return; } - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); #ifdef CONFIG_DEBUG_HIGHMEM { @@ -104,6 +104,8 @@ void __kunmap_atomic(void *kvaddr) #endif } #endif + + kmap_atomic_idx_pop(); pagefault_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c index 8ef6595e162c..abb57331cf6e 100644 --- a/arch/tile/mm/highmem.c +++ b/arch/tile/mm/highmem.c @@ -241,7 +241,7 @@ void __kunmap_atomic(void *kvaddr) pte_t pteval = *pte; int idx, type; - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); idx = type + KM_TYPE_NR*smp_processor_id(); /* @@ -252,6 +252,7 @@ void __kunmap_atomic(void *kvaddr) BUG_ON(!pte_present(pteval) && !pte_migrating(pteval)); kmap_atomic_unregister(pte_page(pteval), vaddr); kpte_clear_flush(pte, vaddr); + kmap_atomic_idx_pop(); } else { /* Must be a lowmem page */ BUG_ON(vaddr < PAGE_OFFSET); diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index d723e369003c..b49962662101 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -74,7 +74,7 @@ void __kunmap_atomic(void *kvaddr) vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { int idx, type; - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); idx = type + KM_TYPE_NR * smp_processor_id(); #ifdef CONFIG_DEBUG_HIGHMEM @@ -87,6 +87,7 @@ void __kunmap_atomic(void *kvaddr) * attributes or becomes a protected page in a hypervisor. */ kpte_clear_flush(kmap_pte-idx, vaddr); + kmap_atomic_idx_pop(); } #ifdef CONFIG_DEBUG_HIGHMEM else { diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 75a3d7f24a2c..7b179b499fa3 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -98,7 +98,7 @@ iounmap_atomic(void __iomem *kvaddr) vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { int idx, type; - type = kmap_atomic_idx_pop(); + type = kmap_atomic_idx(); idx = type + KM_TYPE_NR * smp_processor_id(); #ifdef CONFIG_DEBUG_HIGHMEM @@ -111,6 +111,7 @@ iounmap_atomic(void __iomem *kvaddr) * attributes or becomes a protected page in a hypervisor. */ kpte_clear_flush(kmap_pte-idx, vaddr); + kmap_atomic_idx_pop(); } pagefault_enable(); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 102f76be90da..e9138198e823 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -88,6 +88,11 @@ static inline int kmap_atomic_idx_push(void) return idx; } +static inline int kmap_atomic_idx(void) +{ + return __get_cpu_var(__kmap_atomic_idx) - 1; +} + static inline int kmap_atomic_idx_pop(void) { int idx = --__get_cpu_var(__kmap_atomic_idx); -- cgit v1.2.3 From aeec56e331c6d2750de02ef34b305338305ca690 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 27 Oct 2010 15:33:15 -0700 Subject: gpio: add driver for basic memory-mapped GPIO controllers The basic GPIO controllers may be found in various on-board FPGA and ASIC solutions that are used to control board's switches, LEDs, chip-selects, Ethernet/USB PHY power, etc. These controllers may not provide any means of pin setup (in/out/open drain). The driver supports: - 8/16/32/64 bits registers; - GPIO controllers with clear/set registers; - GPIO controllers with a single "data" register; - Big endian bits/GPIOs ordering (mostly used on PowerPC). Signed-off-by: Anton Vorontsov Reviewed-by: Mark Brown Cc: David Brownell Cc: Samuel Ortiz , Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/Kconfig | 5 + drivers/gpio/Makefile | 1 + drivers/gpio/basic_mmio_gpio.c | 297 ++++++++++++++++++++++++++++++++++++++++ include/linux/basic_mmio_gpio.h | 20 +++ 4 files changed, 323 insertions(+) create mode 100644 drivers/gpio/basic_mmio_gpio.c create mode 100644 include/linux/basic_mmio_gpio.h (limited to 'include') diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 510aa2054544..e47ef94be379 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -70,6 +70,11 @@ config GPIO_MAX730X comment "Memory mapped GPIO expanders:" +config GPIO_BASIC_MMIO + tristate "Basic memory-mapped GPIO controllers support" + help + Say yes here to support basic memory-mapped GPIO controllers. + config GPIO_IT8761E tristate "IT8761E GPIO support" depends on GPIOLIB diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index fc6019d93720..3ff0651fd173 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_GPIOLIB) += gpiolib.o obj-$(CONFIG_GPIO_ADP5520) += adp5520-gpio.o obj-$(CONFIG_GPIO_ADP5588) += adp5588-gpio.o +obj-$(CONFIG_GPIO_BASIC_MMIO) += basic_mmio_gpio.o obj-$(CONFIG_GPIO_LANGWELL) += langwell_gpio.o obj-$(CONFIG_GPIO_MAX730X) += max730x.o obj-$(CONFIG_GPIO_MAX7300) += max7300.o diff --git a/drivers/gpio/basic_mmio_gpio.c b/drivers/gpio/basic_mmio_gpio.c new file mode 100644 index 000000000000..3addea65894e --- /dev/null +++ b/drivers/gpio/basic_mmio_gpio.c @@ -0,0 +1,297 @@ +/* + * Driver for basic memory-mapped GPIO controllers. + * + * Copyright 2008 MontaVista Software, Inc. + * Copyright 2008,2010 Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * ....``.```~~~~````.`.`.`.`.```````'',,,.........`````......`....... + * ...`` ```````.. + * ..The simplest form of a GPIO controller that the driver supports is`` + * `.just a single "data" register, where GPIO state can be read and/or ` + * `,..written. ,,..``~~~~ .....``.`.`.~~.```.`.........``````.``````` + * ````````` + ___ +_/~~|___/~| . ```~~~~~~ ___/___\___ ,~.`.`.`.`````.~~...,,,,... +__________|~$@~~~ %~ /o*o*o*o*o*o\ .. Implementing such a GPIO . +o ` ~~~~\___/~~~~ ` controller in FPGA is ,.` + `....trivial..'~`.```.``` + * ``````` + * .```````~~~~`..`.``.``. + * . The driver supports `... ,..```.`~~~```````````````....````.``,, + * . big-endian notation, just`. .. A bit more sophisticated controllers , + * . register the device with -be`. .with a pair of set/clear-bit registers , + * `.. suffix. ```~~`````....`.` . affecting the data register and the .` + * ``.`.``...``` ```.. output pins are also supported.` + * ^^ `````.`````````.,``~``~``~~`````` + * . ^^ + * ,..`.`.`...````````````......`.`.`.`.`.`..`.`.`.. + * .. The expectation is that in at least some cases . ,-~~~-, + * .this will be used with roll-your-own ASIC/FPGA .` \ / + * .logic in Verilog or VHDL. ~~~`````````..`````~~` \ / + * ..````````......``````````` \o_ + * | + * ^^ / \ + * + * ...`````~~`.....``.`..........``````.`.``.```........``. + * ` 8, 16, 32 and 64 bits registers are supported, and``. + * . the number of GPIOs is determined by the width of ~ + * .. the registers. ,............```.`.`..`.`.~~~.`.`.`~ + * `.......````.``` + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct bgpio_chip { + struct gpio_chip gc; + void __iomem *reg_dat; + void __iomem *reg_set; + void __iomem *reg_clr; + + /* Number of bits (GPIOs): * 8. */ + int bits; + + /* + * Some GPIO controllers work with the big-endian bits notation, + * e.g. in a 8-bits register, GPIO7 is the least significant bit. + */ + int big_endian_bits; + + /* + * Used to lock bgpio_chip->data. Also, this is needed to keep + * shadowed and real data registers writes together. + */ + spinlock_t lock; + + /* Shadowed data register to clear/set bits safely. */ + unsigned long data; +}; + +static struct bgpio_chip *to_bgpio_chip(struct gpio_chip *gc) +{ + return container_of(gc, struct bgpio_chip, gc); +} + +static unsigned long bgpio_in(struct bgpio_chip *bgc) +{ + switch (bgc->bits) { + case 8: + return __raw_readb(bgc->reg_dat); + case 16: + return __raw_readw(bgc->reg_dat); + case 32: + return __raw_readl(bgc->reg_dat); +#if BITS_PER_LONG >= 64 + case 64: + return __raw_readq(bgc->reg_dat); +#endif + } + return -EINVAL; +} + +static void bgpio_out(struct bgpio_chip *bgc, void __iomem *reg, + unsigned long data) +{ + switch (bgc->bits) { + case 8: + __raw_writeb(data, reg); + return; + case 16: + __raw_writew(data, reg); + return; + case 32: + __raw_writel(data, reg); + return; +#if BITS_PER_LONG >= 64 + case 64: + __raw_writeq(data, reg); + return; +#endif + } +} + +static unsigned long bgpio_pin2mask(struct bgpio_chip *bgc, unsigned int pin) +{ + if (bgc->big_endian_bits) + return 1 << (bgc->bits - 1 - pin); + else + return 1 << pin; +} + +static int bgpio_get(struct gpio_chip *gc, unsigned int gpio) +{ + struct bgpio_chip *bgc = to_bgpio_chip(gc); + + return bgpio_in(bgc) & bgpio_pin2mask(bgc, gpio); +} + +static void bgpio_set(struct gpio_chip *gc, unsigned int gpio, int val) +{ + struct bgpio_chip *bgc = to_bgpio_chip(gc); + unsigned long mask = bgpio_pin2mask(bgc, gpio); + unsigned long flags; + + if (bgc->reg_set) { + if (val) + bgpio_out(bgc, bgc->reg_set, mask); + else + bgpio_out(bgc, bgc->reg_clr, mask); + return; + } + + spin_lock_irqsave(&bgc->lock, flags); + + if (val) + bgc->data |= mask; + else + bgc->data &= ~mask; + + bgpio_out(bgc, bgc->reg_dat, bgc->data); + + spin_unlock_irqrestore(&bgc->lock, flags); +} + +static int bgpio_dir_in(struct gpio_chip *gc, unsigned int gpio) +{ + return 0; +} + +static int bgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) +{ + bgpio_set(gc, gpio, val); + return 0; +} + +static int __devinit bgpio_probe(struct platform_device *pdev) +{ + const struct platform_device_id *platid = platform_get_device_id(pdev); + struct device *dev = &pdev->dev; + struct bgpio_pdata *pdata = dev_get_platdata(dev); + struct bgpio_chip *bgc; + struct resource *res_dat; + struct resource *res_set; + struct resource *res_clr; + resource_size_t dat_sz; + int bits; + int ret; + + res_dat = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dat"); + if (!res_dat) + return -EINVAL; + + dat_sz = resource_size(res_dat); + if (!is_power_of_2(dat_sz)) + return -EINVAL; + + bits = dat_sz * 8; + if (bits > BITS_PER_LONG) + return -EINVAL; + + bgc = devm_kzalloc(dev, sizeof(*bgc), GFP_KERNEL); + if (!bgc) + return -ENOMEM; + + bgc->reg_dat = devm_ioremap(dev, res_dat->start, dat_sz); + if (!bgc->reg_dat) + return -ENOMEM; + + res_set = platform_get_resource_byname(pdev, IORESOURCE_MEM, "set"); + res_clr = platform_get_resource_byname(pdev, IORESOURCE_MEM, "clr"); + if (res_set && res_clr) { + if (resource_size(res_set) != resource_size(res_clr) || + resource_size(res_set) != dat_sz) + return -EINVAL; + + bgc->reg_set = devm_ioremap(dev, res_set->start, dat_sz); + bgc->reg_clr = devm_ioremap(dev, res_clr->start, dat_sz); + if (!bgc->reg_set || !bgc->reg_clr) + return -ENOMEM; + } else if (res_set || res_clr) { + return -EINVAL; + } + + spin_lock_init(&bgc->lock); + + bgc->bits = bits; + bgc->big_endian_bits = !strcmp(platid->name, "basic-mmio-gpio-be"); + bgc->data = bgpio_in(bgc); + + bgc->gc.ngpio = bits; + bgc->gc.direction_input = bgpio_dir_in; + bgc->gc.direction_output = bgpio_dir_out; + bgc->gc.get = bgpio_get; + bgc->gc.set = bgpio_set; + bgc->gc.dev = dev; + bgc->gc.label = dev_name(dev); + + if (pdata) + bgc->gc.base = pdata->base; + else + bgc->gc.base = -1; + + dev_set_drvdata(dev, bgc); + + ret = gpiochip_add(&bgc->gc); + if (ret) + dev_err(dev, "gpiochip_add() failed: %d\n", ret); + + return ret; +} + +static int __devexit bgpio_remove(struct platform_device *pdev) +{ + struct bgpio_chip *bgc = dev_get_drvdata(&pdev->dev); + + return gpiochip_remove(&bgc->gc); +} + +static const struct platform_device_id bgpio_id_table[] = { + { "basic-mmio-gpio", }, + { "basic-mmio-gpio-be", }, + {}, +}; +MODULE_DEVICE_TABLE(platform, bgpio_id_table); + +static struct platform_driver bgpio_driver = { + .driver = { + .name = "basic-mmio-gpio", + }, + .id_table = bgpio_id_table, + .probe = bgpio_probe, + .remove = __devexit_p(bgpio_remove), +}; + +static int __init bgpio_init(void) +{ + return platform_driver_register(&bgpio_driver); +} +module_init(bgpio_init); + +static void __exit bgpio_exit(void) +{ + platform_driver_unregister(&bgpio_driver); +} +module_exit(bgpio_exit); + +MODULE_DESCRIPTION("Driver for basic memory-mapped GPIO controllers"); +MODULE_AUTHOR("Anton Vorontsov "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h new file mode 100644 index 000000000000..198087a16fc4 --- /dev/null +++ b/include/linux/basic_mmio_gpio.h @@ -0,0 +1,20 @@ +/* + * Basic memory-mapped GPIO controllers. + * + * Copyright 2008 MontaVista Software, Inc. + * Copyright 2008,2010 Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __BASIC_MMIO_GPIO_H +#define __BASIC_MMIO_GPIO_H + +struct bgpio_pdata { + int base; +}; + +#endif /* __BASIC_MMIO_GPIO_H */ -- cgit v1.2.3 From 09cd9527d621640d4dd231dff77b681e711d8e4b Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 27 Oct 2010 15:33:16 -0700 Subject: gpiolib: fix HAVE_GPIO_LIB leftovers in asm-generic/gpio.h commit 7444a72effa632fcd8edc566f88 ("gpiolib: allow user-selection") removed HAVE_GPIO_LIB Kconfig symbol, but the header file still uses the name [to confuse readers wrt #ifdef/#else/#endif location]. The real Kconfig symbol nowadays is CONFIG_GPIOLIB. Signed-off-by: Anton Vorontsov Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/gpio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index 8ca18e26d7e3..ff5c66080c8c 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -210,7 +210,7 @@ extern void gpio_unexport(unsigned gpio); #endif /* CONFIG_GPIO_SYSFS */ -#else /* !CONFIG_HAVE_GPIO_LIB */ +#else /* !CONFIG_GPIOLIB */ static inline int gpio_is_valid(int number) { @@ -239,7 +239,7 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value) gpio_set_value(gpio, value); } -#endif /* !CONFIG_HAVE_GPIO_LIB */ +#endif /* !CONFIG_GPIOLIB */ #ifndef CONFIG_GPIO_SYSFS -- cgit v1.2.3 From ead6db084392349ad33323b1bb2916058dd7e82b Mon Sep 17 00:00:00 2001 From: Miguel Gaio Date: Wed, 27 Oct 2010 15:33:18 -0700 Subject: gpio: add support for 74x164 serial-in/parallel-out 8-bit shift register Add support for generic 74x164 serial-in/parallel-out 8-bits shift register. This driver can be used as a GPIO output expander. [akpm@linux-foundation.org: remove unused local `refresh'] Signed-off-by: Miguel Gaio Signed-off-by: Juhos Gabor Signed-off-by: Florian Fainelli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/74x164.c | 182 +++++++++++++++++++++++++++++++++++++++++++++ drivers/gpio/Kconfig | 8 ++ drivers/gpio/Makefile | 1 + include/linux/spi/74x164.h | 11 +++ 4 files changed, 202 insertions(+) create mode 100644 drivers/gpio/74x164.c create mode 100644 include/linux/spi/74x164.h (limited to 'include') diff --git a/drivers/gpio/74x164.c b/drivers/gpio/74x164.c new file mode 100644 index 000000000000..d91ff4c282e9 --- /dev/null +++ b/drivers/gpio/74x164.c @@ -0,0 +1,182 @@ +/* + * 74Hx164 - Generic serial-in/parallel-out 8-bits shift register GPIO driver + * + * Copyright (C) 2010 Gabor Juhos + * Copyright (C) 2010 Miguel Gaio + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#define GEN_74X164_GPIO_COUNT 8 + + +struct gen_74x164_chip { + struct spi_device *spi; + struct gpio_chip gpio_chip; + struct mutex lock; + u8 port_config; +}; + +static void gen_74x164_set_value(struct gpio_chip *, unsigned, int); + +static struct gen_74x164_chip *gpio_to_chip(struct gpio_chip *gc) +{ + return container_of(gc, struct gen_74x164_chip, gpio_chip); +} + +static int __gen_74x164_write_config(struct gen_74x164_chip *chip) +{ + return spi_write(chip->spi, + &chip->port_config, sizeof(chip->port_config)); +} + +static int gen_74x164_direction_output(struct gpio_chip *gc, + unsigned offset, int val) +{ + gen_74x164_set_value(gc, offset, val); + return 0; +} + +static int gen_74x164_get_value(struct gpio_chip *gc, unsigned offset) +{ + struct gen_74x164_chip *chip = gpio_to_chip(gc); + int ret; + + mutex_lock(&chip->lock); + ret = (chip->port_config >> offset) & 0x1; + mutex_unlock(&chip->lock); + + return ret; +} + +static void gen_74x164_set_value(struct gpio_chip *gc, + unsigned offset, int val) +{ + struct gen_74x164_chip *chip = gpio_to_chip(gc); + + mutex_lock(&chip->lock); + if (val) + chip->port_config |= (1 << offset); + else + chip->port_config &= ~(1 << offset); + + __gen_74x164_write_config(chip); + mutex_unlock(&chip->lock); +} + +static int __devinit gen_74x164_probe(struct spi_device *spi) +{ + struct gen_74x164_chip *chip; + struct gen_74x164_chip_platform_data *pdata; + int ret; + + pdata = spi->dev.platform_data; + if (!pdata || !pdata->base) { + dev_dbg(&spi->dev, "incorrect or missing platform data\n"); + return -EINVAL; + } + + /* + * bits_per_word cannot be configured in platform data + */ + spi->bits_per_word = 8; + + ret = spi_setup(spi); + if (ret < 0) + return ret; + + chip = kzalloc(sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + mutex_init(&chip->lock); + + dev_set_drvdata(&spi->dev, chip); + + chip->spi = spi; + + chip->gpio_chip.label = GEN_74X164_DRIVER_NAME, + chip->gpio_chip.direction_output = gen_74x164_direction_output; + chip->gpio_chip.get = gen_74x164_get_value; + chip->gpio_chip.set = gen_74x164_set_value; + chip->gpio_chip.base = pdata->base; + chip->gpio_chip.ngpio = GEN_74X164_GPIO_COUNT; + chip->gpio_chip.can_sleep = 1; + chip->gpio_chip.dev = &spi->dev; + chip->gpio_chip.owner = THIS_MODULE; + + ret = __gen_74x164_write_config(chip); + if (ret) { + dev_err(&spi->dev, "Failed writing: %d\n", ret); + goto exit_destroy; + } + + ret = gpiochip_add(&chip->gpio_chip); + if (ret) + goto exit_destroy; + + return ret; + +exit_destroy: + dev_set_drvdata(&spi->dev, NULL); + mutex_destroy(&chip->lock); + kfree(chip); + return ret; +} + +static int gen_74x164_remove(struct spi_device *spi) +{ + struct gen_74x164_chip *chip; + int ret; + + chip = dev_get_drvdata(&spi->dev); + if (chip == NULL) + return -ENODEV; + + dev_set_drvdata(&spi->dev, NULL); + + ret = gpiochip_remove(&chip->gpio_chip); + if (!ret) { + mutex_destroy(&chip->lock); + kfree(chip); + } else + dev_err(&spi->dev, "Failed to remove the GPIO controller: %d\n", + ret); + + return ret; +} + +static struct spi_driver gen_74x164_driver = { + .driver = { + .name = GEN_74X164_DRIVER_NAME, + .owner = THIS_MODULE, + }, + .probe = gen_74x164_probe, + .remove = __devexit_p(gen_74x164_remove), +}; + +static int __init gen_74x164_init(void) +{ + return spi_register_driver(&gen_74x164_driver); +} +subsys_initcall(gen_74x164_init); + +static void __exit gen_74x164_exit(void) +{ + spi_unregister_driver(&gen_74x164_driver); +} +module_exit(gen_74x164_exit); + +MODULE_AUTHOR("Gabor Juhos "); +MODULE_AUTHOR("Miguel Gaio "); +MODULE_DESCRIPTION("GPIO expander driver for 74X164 8-bits shift register"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index e47ef94be379..bc7b0fca6415 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -344,6 +344,14 @@ config GPIO_MC33880 SPI driver for Freescale MC33880 high-side/low-side switch. This provides GPIO interface supporting inputs and outputs. +config GPIO_74X164 + tristate "74x164 serial-in/parallel-out 8-bits shift register" + depends on SPI_MASTER + help + Platform driver for 74x164 compatible serial-in/parallel-out + 8-outputs shift registers. This driver can be used to provide access + to more gpio outputs. + comment "AC97 GPIO expanders:" config GPIO_UCB1400 diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 3ff0651fd173..0c23a4dd45e5 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_GPIO_MAX7301) += max7301.o obj-$(CONFIG_GPIO_MAX732X) += max732x.o obj-$(CONFIG_GPIO_MC33880) += mc33880.o obj-$(CONFIG_GPIO_MCP23S08) += mcp23s08.o +obj-$(CONFIG_GPIO_74X164) += 74x164.o obj-$(CONFIG_GPIO_PCA953X) += pca953x.o obj-$(CONFIG_GPIO_PCF857X) += pcf857x.o obj-$(CONFIG_GPIO_PL061) += pl061.o diff --git a/include/linux/spi/74x164.h b/include/linux/spi/74x164.h new file mode 100644 index 000000000000..d85c52f294a0 --- /dev/null +++ b/include/linux/spi/74x164.h @@ -0,0 +1,11 @@ +#ifndef LINUX_SPI_74X164_H +#define LINUX_SPI_74X164_H + +#define GEN_74X164_DRIVER_NAME "74x164" + +struct gen_74x164_chip_platform_data { + /* number assigned to the first GPIO */ + unsigned base; +}; + +#endif -- cgit v1.2.3 From 459773ae8dbbd480886d186181c6bc2e8556025f Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Wed, 27 Oct 2010 15:33:19 -0700 Subject: gpio: adp5588-gpio: support interrupt controller Implement irq_chip functionality on ADP5588/5587 GPIO expanders. Only level sensitive interrupts are supported. Interrupts provided by this irq_chip must be requested using request_threaded_irq(). Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/Kconfig | 7 ++ drivers/gpio/adp5588-gpio.c | 277 ++++++++++++++++++++++++++++++++++++++++---- include/linux/i2c/adp5588.h | 15 +++ 3 files changed, 278 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index bc7b0fca6415..3f3181dac8d7 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -272,6 +272,13 @@ config GPIO_ADP5588 To compile this driver as a module, choose M here: the module will be called adp5588-gpio. +config GPIO_ADP5588_IRQ + bool "Interrupt controller support for ADP5588" + depends on GPIO_ADP5588=y + help + Say yes here to enable the adp5588 to be used as an interrupt + controller. It requires the driver to be built in the kernel. + comment "PCI GPIO expanders:" config GPIO_CS5535 diff --git a/drivers/gpio/adp5588-gpio.c b/drivers/gpio/adp5588-gpio.c index 2e8e9e24f887..0871f78af593 100644 --- a/drivers/gpio/adp5588-gpio.c +++ b/drivers/gpio/adp5588-gpio.c @@ -1,8 +1,8 @@ /* * GPIO Chip driver for Analog Devices - * ADP5588 I/O Expander and QWERTY Keypad Controller + * ADP5588/ADP5587 I/O Expander and QWERTY Keypad Controller * - * Copyright 2009 Analog Devices Inc. + * Copyright 2009-2010 Analog Devices Inc. * * Licensed under the GPL-2 or later. */ @@ -13,21 +13,34 @@ #include #include #include +#include +#include #include -#define DRV_NAME "adp5588-gpio" -#define MAXGPIO 18 -#define ADP_BANK(offs) ((offs) >> 3) -#define ADP_BIT(offs) (1u << ((offs) & 0x7)) +#define DRV_NAME "adp5588-gpio" + +/* + * Early pre 4.0 Silicon required to delay readout by at least 25ms, + * since the Event Counter Register updated 25ms after the interrupt + * asserted. + */ +#define WA_DELAYED_READOUT_REVID(rev) ((rev) < 4) struct adp5588_gpio { struct i2c_client *client; struct gpio_chip gpio_chip; struct mutex lock; /* protect cached dir, dat_out */ + /* protect serialized access to the interrupt controller bus */ + struct mutex irq_lock; unsigned gpio_start; + unsigned irq_base; uint8_t dat_out[3]; uint8_t dir[3]; + uint8_t int_lvl[3]; + uint8_t int_en[3]; + uint8_t irq_mask[3]; + uint8_t irq_stat[3]; }; static int adp5588_gpio_read(struct i2c_client *client, u8 reg) @@ -55,8 +68,8 @@ static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned off) struct adp5588_gpio *dev = container_of(chip, struct adp5588_gpio, gpio_chip); - return !!(adp5588_gpio_read(dev->client, GPIO_DAT_STAT1 + ADP_BANK(off)) - & ADP_BIT(off)); + return !!(adp5588_gpio_read(dev->client, + GPIO_DAT_STAT1 + ADP5588_BANK(off)) & ADP5588_BIT(off)); } static void adp5588_gpio_set_value(struct gpio_chip *chip, @@ -66,8 +79,8 @@ static void adp5588_gpio_set_value(struct gpio_chip *chip, struct adp5588_gpio *dev = container_of(chip, struct adp5588_gpio, gpio_chip); - bank = ADP_BANK(off); - bit = ADP_BIT(off); + bank = ADP5588_BANK(off); + bit = ADP5588_BIT(off); mutex_lock(&dev->lock); if (val) @@ -87,10 +100,10 @@ static int adp5588_gpio_direction_input(struct gpio_chip *chip, unsigned off) struct adp5588_gpio *dev = container_of(chip, struct adp5588_gpio, gpio_chip); - bank = ADP_BANK(off); + bank = ADP5588_BANK(off); mutex_lock(&dev->lock); - dev->dir[bank] &= ~ADP_BIT(off); + dev->dir[bank] &= ~ADP5588_BIT(off); ret = adp5588_gpio_write(dev->client, GPIO_DIR1 + bank, dev->dir[bank]); mutex_unlock(&dev->lock); @@ -105,8 +118,8 @@ static int adp5588_gpio_direction_output(struct gpio_chip *chip, struct adp5588_gpio *dev = container_of(chip, struct adp5588_gpio, gpio_chip); - bank = ADP_BANK(off); - bit = ADP_BIT(off); + bank = ADP5588_BANK(off); + bit = ADP5588_BIT(off); mutex_lock(&dev->lock); dev->dir[bank] |= bit; @@ -125,6 +138,213 @@ static int adp5588_gpio_direction_output(struct gpio_chip *chip, return ret; } +#ifdef CONFIG_GPIO_ADP5588_IRQ +static int adp5588_gpio_to_irq(struct gpio_chip *chip, unsigned off) +{ + struct adp5588_gpio *dev = + container_of(chip, struct adp5588_gpio, gpio_chip); + return dev->irq_base + off; +} + +static void adp5588_irq_bus_lock(unsigned int irq) +{ + struct adp5588_gpio *dev = get_irq_chip_data(irq); + mutex_lock(&dev->irq_lock); +} + + /* + * genirq core code can issue chip->mask/unmask from atomic context. + * This doesn't work for slow busses where an access needs to sleep. + * bus_sync_unlock() is therefore called outside the atomic context, + * syncs the current irq mask state with the slow external controller + * and unlocks the bus. + */ + +static void adp5588_irq_bus_sync_unlock(unsigned int irq) +{ + struct adp5588_gpio *dev = get_irq_chip_data(irq); + int i; + + for (i = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++) + if (dev->int_en[i] ^ dev->irq_mask[i]) { + dev->int_en[i] = dev->irq_mask[i]; + adp5588_gpio_write(dev->client, GPIO_INT_EN1 + i, + dev->int_en[i]); + } + + mutex_unlock(&dev->irq_lock); +} + +static void adp5588_irq_mask(unsigned int irq) +{ + struct adp5588_gpio *dev = get_irq_chip_data(irq); + unsigned gpio = irq - dev->irq_base; + + dev->irq_mask[ADP5588_BANK(gpio)] &= ~ADP5588_BIT(gpio); +} + +static void adp5588_irq_unmask(unsigned int irq) +{ + struct adp5588_gpio *dev = get_irq_chip_data(irq); + unsigned gpio = irq - dev->irq_base; + + dev->irq_mask[ADP5588_BANK(gpio)] |= ADP5588_BIT(gpio); +} + +static int adp5588_irq_set_type(unsigned int irq, unsigned int type) +{ + struct adp5588_gpio *dev = get_irq_chip_data(irq); + uint16_t gpio = irq - dev->irq_base; + unsigned bank, bit; + + if ((type & IRQ_TYPE_EDGE_BOTH)) { + dev_err(&dev->client->dev, "irq %d: unsupported type %d\n", + irq, type); + return -EINVAL; + } + + bank = ADP5588_BANK(gpio); + bit = ADP5588_BIT(gpio); + + if (type & IRQ_TYPE_LEVEL_HIGH) + dev->int_lvl[bank] |= bit; + else if (type & IRQ_TYPE_LEVEL_LOW) + dev->int_lvl[bank] &= ~bit; + else + return -EINVAL; + + adp5588_gpio_direction_input(&dev->gpio_chip, gpio); + adp5588_gpio_write(dev->client, GPIO_INT_LVL1 + bank, + dev->int_lvl[bank]); + + return 0; +} + +static struct irq_chip adp5588_irq_chip = { + .name = "adp5588", + .mask = adp5588_irq_mask, + .unmask = adp5588_irq_unmask, + .bus_lock = adp5588_irq_bus_lock, + .bus_sync_unlock = adp5588_irq_bus_sync_unlock, + .set_type = adp5588_irq_set_type, +}; + +static int adp5588_gpio_read_intstat(struct i2c_client *client, u8 *buf) +{ + int ret = i2c_smbus_read_i2c_block_data(client, GPIO_INT_STAT1, 3, buf); + + if (ret < 0) + dev_err(&client->dev, "Read INT_STAT Error\n"); + + return ret; +} + +static irqreturn_t adp5588_irq_handler(int irq, void *devid) +{ + struct adp5588_gpio *dev = devid; + unsigned status, bank, bit, pending; + int ret; + status = adp5588_gpio_read(dev->client, INT_STAT); + + if (status & ADP5588_GPI_INT) { + ret = adp5588_gpio_read_intstat(dev->client, dev->irq_stat); + if (ret < 0) + memset(dev->irq_stat, 0, ARRAY_SIZE(dev->irq_stat)); + + for (bank = 0; bank <= ADP5588_BANK(ADP5588_MAXGPIO); + bank++, bit = 0) { + pending = dev->irq_stat[bank] & dev->irq_mask[bank]; + + while (pending) { + if (pending & (1 << bit)) { + handle_nested_irq(dev->irq_base + + (bank << 3) + bit); + pending &= ~(1 << bit); + + } + bit++; + } + } + } + + adp5588_gpio_write(dev->client, INT_STAT, status); /* Status is W1C */ + + return IRQ_HANDLED; +} + +static int adp5588_irq_setup(struct adp5588_gpio *dev) +{ + struct i2c_client *client = dev->client; + struct adp5588_gpio_platform_data *pdata = client->dev.platform_data; + unsigned gpio; + int ret; + + adp5588_gpio_write(client, CFG, ADP5588_AUTO_INC); + adp5588_gpio_write(client, INT_STAT, -1); /* status is W1C */ + adp5588_gpio_read_intstat(client, dev->irq_stat); /* read to clear */ + + dev->irq_base = pdata->irq_base; + mutex_init(&dev->irq_lock); + + for (gpio = 0; gpio < dev->gpio_chip.ngpio; gpio++) { + int irq = gpio + dev->irq_base; + set_irq_chip_data(irq, dev); + set_irq_chip_and_handler(irq, &adp5588_irq_chip, + handle_level_irq); + set_irq_nested_thread(irq, 1); +#ifdef CONFIG_ARM + /* + * ARM needs us to explicitly flag the IRQ as VALID, + * once we do so, it will also set the noprobe. + */ + set_irq_flags(irq, IRQF_VALID); +#else + set_irq_noprobe(irq); +#endif + } + + ret = request_threaded_irq(client->irq, + NULL, + adp5588_irq_handler, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + dev_name(&client->dev), dev); + if (ret) { + dev_err(&client->dev, "failed to request irq %d\n", + client->irq); + goto out; + } + + dev->gpio_chip.to_irq = adp5588_gpio_to_irq; + adp5588_gpio_write(client, CFG, + ADP5588_AUTO_INC | ADP5588_INT_CFG | ADP5588_GPI_INT); + + return 0; + +out: + dev->irq_base = 0; + return ret; +} + +static void adp5588_irq_teardown(struct adp5588_gpio *dev) +{ + if (dev->irq_base) + free_irq(dev->client->irq, dev); +} + +#else +static int adp5588_irq_setup(struct adp5588_gpio *dev) +{ + struct i2c_client *client = dev->client; + dev_warn(&client->dev, "interrupt support not compiled in\n"); + + return 0; +} + +static void adp5588_irq_teardown(struct adp5588_gpio *dev) +{ +} +#endif /* CONFIG_GPIO_ADP5588_IRQ */ + static int __devinit adp5588_gpio_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -160,37 +380,46 @@ static int __devinit adp5588_gpio_probe(struct i2c_client *client, gc->can_sleep = 1; gc->base = pdata->gpio_start; - gc->ngpio = MAXGPIO; + gc->ngpio = ADP5588_MAXGPIO; gc->label = client->name; gc->owner = THIS_MODULE; mutex_init(&dev->lock); - ret = adp5588_gpio_read(dev->client, DEV_ID); if (ret < 0) goto err; revid = ret & ADP5588_DEVICE_ID_MASK; - for (i = 0, ret = 0; i <= ADP_BANK(MAXGPIO); i++) { + for (i = 0, ret = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++) { dev->dat_out[i] = adp5588_gpio_read(client, GPIO_DAT_OUT1 + i); dev->dir[i] = adp5588_gpio_read(client, GPIO_DIR1 + i); ret |= adp5588_gpio_write(client, KP_GPIO1 + i, 0); ret |= adp5588_gpio_write(client, GPIO_PULL1 + i, (pdata->pullup_dis_mask >> (8 * i)) & 0xFF); - + ret |= adp5588_gpio_write(client, GPIO_INT_EN1 + i, 0); if (ret) goto err; } + if (pdata->irq_base) { + if (WA_DELAYED_READOUT_REVID(revid)) { + dev_warn(&client->dev, "GPIO int not supported\n"); + } else { + ret = adp5588_irq_setup(dev); + if (ret) + goto err; + } + } + ret = gpiochip_add(&dev->gpio_chip); if (ret) - goto err; + goto err_irq; - dev_info(&client->dev, "gpios %d..%d on a %s Rev. %d\n", + dev_info(&client->dev, "gpios %d..%d (IRQ Base %d) on a %s Rev. %d\n", gc->base, gc->base + gc->ngpio - 1, - client->name, revid); + pdata->irq_base, client->name, revid); if (pdata->setup) { ret = pdata->setup(client, gc->base, gc->ngpio, pdata->context); @@ -199,8 +428,11 @@ static int __devinit adp5588_gpio_probe(struct i2c_client *client, } i2c_set_clientdata(client, dev); + return 0; +err_irq: + adp5588_irq_teardown(dev); err: kfree(dev); return ret; @@ -222,6 +454,9 @@ static int __devexit adp5588_gpio_remove(struct i2c_client *client) } } + if (dev->irq_base) + free_irq(dev->client->irq, dev); + ret = gpiochip_remove(&dev->gpio_chip); if (ret) { dev_err(&client->dev, "gpiochip_remove failed %d\n", ret); diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h index 269181b8f623..531376b77773 100644 --- a/include/linux/i2c/adp5588.h +++ b/include/linux/i2c/adp5588.h @@ -74,6 +74,20 @@ #define ADP5588_DEVICE_ID_MASK 0xF + /* Configuration Register1 */ +#define ADP5588_AUTO_INC (1 << 7) +#define ADP5588_GPIEM_CFG (1 << 6) +#define ADP5588_INT_CFG (1 << 4) +#define ADP5588_GPI_IEN (1 << 1) + +/* Interrupt Status Register */ +#define ADP5588_GPI_INT (1 << 1) +#define ADP5588_KE_INT (1 << 0) + +#define ADP5588_MAXGPIO 18 +#define ADP5588_BANK(offs) ((offs) >> 3) +#define ADP5588_BIT(offs) (1u << ((offs) & 0x7)) + /* Put one of these structures in i2c_board_info platform_data */ #define ADP5588_KEYMAPSIZE 80 @@ -128,6 +142,7 @@ struct adp5588_kpad_platform_data { struct adp5588_gpio_platform_data { unsigned gpio_start; /* GPIO Chip base # */ + unsigned irq_base; /* interrupt base # */ unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ int (*setup)(struct i2c_client *client, int gpio, unsigned ngpio, -- cgit v1.2.3 From 9ef8c8c51a7d76bae73e0259c356b24533b6b7c0 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Wed, 27 Oct 2010 15:33:20 -0700 Subject: gpio: adp5588-gpio: gpio_start must be signed Common code interprets this as a signed value (a negative value is used to request dynamic ID allocation), so make sure the platform data has proper types to support that. Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c/adp5588.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h index 531376b77773..bec05ed21766 100644 --- a/include/linux/i2c/adp5588.h +++ b/include/linux/i2c/adp5588.h @@ -141,9 +141,9 @@ struct adp5588_kpad_platform_data { }; struct adp5588_gpio_platform_data { - unsigned gpio_start; /* GPIO Chip base # */ - unsigned irq_base; /* interrupt base # */ - unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ + int gpio_start; /* GPIO Chip base # */ + unsigned irq_base; /* interrupt base # */ + unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ int (*setup)(struct i2c_client *client, int gpio, unsigned ngpio, void *context); -- cgit v1.2.3 From dc5ae4f2f58cfa98b67d2be379fc99080a8967af Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Wed, 27 Oct 2010 15:33:20 -0700 Subject: gpio: adp5588-gpio: add i2c forward declaration Some ADP5588 functions take a pointer to an i2c_client, but if the i2c header doesn't happen to be included first, we hit the standard "struct declared inside parameter list" warnings from gcc. So add a simple forward decl of the i2c_client struct. Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c/adp5588.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h index bec05ed21766..3c5d6b6e765c 100644 --- a/include/linux/i2c/adp5588.h +++ b/include/linux/i2c/adp5588.h @@ -140,6 +140,8 @@ struct adp5588_kpad_platform_data { const struct adp5588_gpio_platform_data *gpio_data; }; +struct i2c_client; /* forward declaration */ + struct adp5588_gpio_platform_data { int gpio_start; /* GPIO Chip base # */ unsigned irq_base; /* interrupt base # */ -- cgit v1.2.3 From f11b478d461b7113eb4603b3914aaf15b7788e87 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 27 Oct 2010 15:33:28 -0700 Subject: fbmem: fix fb_read, fb_write unaligned accesses fb_{read,write} access the framebuffer using lots of fb_{read,write}l's but don't check that the file position is aligned which can cause problems on some architectures which do not support unaligned accesses. Since the operations are essentially memcpy_{from,to}io, new fb_memcpy_{from,to}fb macros have been defined and these are used instead. For Sparc, fb_{read,write} macros use sbus_{read,write}, so this defines new sbus_memcpy_{from,to}io functions the same as memcpy_{from,to}io but using sbus_{read,write}b instead of {read,write}b. Signed-off-by: James Hogan Acked-by: David S. Miller Acked-by: Florian Tobias Schandinat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/io_32.h | 31 ++++++++++++++++++++++++++++ arch/sparc/include/asm/io_64.h | 31 ++++++++++++++++++++++++++++ drivers/video/fbmem.c | 46 +++++++++++++----------------------------- include/linux/fb.h | 6 ++++++ 4 files changed, 82 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h index 2889574608db..c2ced21c9dc1 100644 --- a/arch/sparc/include/asm/io_32.h +++ b/arch/sparc/include/asm/io_32.h @@ -207,6 +207,21 @@ _memset_io(volatile void __iomem *dst, int c, __kernel_size_t n) #define memset_io(d,c,sz) _memset_io(d,c,sz) +static inline void +_sbus_memcpy_fromio(void *dst, const volatile void __iomem *src, + __kernel_size_t n) +{ + char *d = dst; + + while (n--) { + char tmp = sbus_readb(src); + *d++ = tmp; + src++; + } +} + +#define sbus_memcpy_fromio(d, s, sz) _sbus_memcpy_fromio(d, s, sz) + static inline void _memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n) { @@ -221,6 +236,22 @@ _memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n) #define memcpy_fromio(d,s,sz) _memcpy_fromio(d,s,sz) +static inline void +_sbus_memcpy_toio(volatile void __iomem *dst, const void *src, + __kernel_size_t n) +{ + const char *s = src; + volatile void __iomem *d = dst; + + while (n--) { + char tmp = *s++; + sbus_writeb(tmp, d); + d++; + } +} + +#define sbus_memcpy_toio(d, s, sz) _sbus_memcpy_toio(d, s, sz) + static inline void _memcpy_toio(volatile void __iomem *dst, const void *src, __kernel_size_t n) { diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h index 9517d063c79c..9c8965415f0a 100644 --- a/arch/sparc/include/asm/io_64.h +++ b/arch/sparc/include/asm/io_64.h @@ -418,6 +418,21 @@ _memset_io(volatile void __iomem *dst, int c, __kernel_size_t n) #define memset_io(d,c,sz) _memset_io(d,c,sz) +static inline void +_sbus_memcpy_fromio(void *dst, const volatile void __iomem *src, + __kernel_size_t n) +{ + char *d = dst; + + while (n--) { + char tmp = sbus_readb(src); + *d++ = tmp; + src++; + } +} + +#define sbus_memcpy_fromio(d, s, sz) _sbus_memcpy_fromio(d, s, sz) + static inline void _memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n) { @@ -432,6 +447,22 @@ _memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n) #define memcpy_fromio(d,s,sz) _memcpy_fromio(d,s,sz) +static inline void +_sbus_memcpy_toio(volatile void __iomem *dst, const void *src, + __kernel_size_t n) +{ + const char *s = src; + volatile void __iomem *d = dst; + + while (n--) { + char tmp = *s++; + sbus_writeb(tmp, d); + d++; + } +} + +#define sbus_memcpy_toio(d, s, sz) _sbus_memcpy_toio(d, s, sz) + static inline void _memcpy_toio(volatile void __iomem *dst, const void *src, __kernel_size_t n) { diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index a43442341ddd..0e6aa3d96a42 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -697,9 +697,9 @@ fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) struct inode *inode = file->f_path.dentry->d_inode; int fbidx = iminor(inode); struct fb_info *info = registered_fb[fbidx]; - u32 *buffer, *dst; - u32 __iomem *src; - int c, i, cnt = 0, err = 0; + u8 *buffer, *dst; + u8 __iomem *src; + int c, cnt = 0, err = 0; unsigned long total_size; if (!info || ! info->screen_base) @@ -730,7 +730,7 @@ fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) if (!buffer) return -ENOMEM; - src = (u32 __iomem *) (info->screen_base + p); + src = (u8 __iomem *) (info->screen_base + p); if (info->fbops->fb_sync) info->fbops->fb_sync(info); @@ -738,17 +738,9 @@ fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) while (count) { c = (count > PAGE_SIZE) ? PAGE_SIZE : count; dst = buffer; - for (i = c >> 2; i--; ) - *dst++ = fb_readl(src++); - if (c & 3) { - u8 *dst8 = (u8 *) dst; - u8 __iomem *src8 = (u8 __iomem *) src; - - for (i = c & 3; i--;) - *dst8++ = fb_readb(src8++); - - src = (u32 __iomem *) src8; - } + fb_memcpy_fromfb(dst, src, c); + dst += c; + src += c; if (copy_to_user(buf, buffer, c)) { err = -EFAULT; @@ -772,9 +764,9 @@ fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) struct inode *inode = file->f_path.dentry->d_inode; int fbidx = iminor(inode); struct fb_info *info = registered_fb[fbidx]; - u32 *buffer, *src; - u32 __iomem *dst; - int c, i, cnt = 0, err = 0; + u8 *buffer, *src; + u8 __iomem *dst; + int c, cnt = 0, err = 0; unsigned long total_size; if (!info || !info->screen_base) @@ -811,7 +803,7 @@ fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) if (!buffer) return -ENOMEM; - dst = (u32 __iomem *) (info->screen_base + p); + dst = (u8 __iomem *) (info->screen_base + p); if (info->fbops->fb_sync) info->fbops->fb_sync(info); @@ -825,19 +817,9 @@ fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) break; } - for (i = c >> 2; i--; ) - fb_writel(*src++, dst++); - - if (c & 3) { - u8 *src8 = (u8 *) src; - u8 __iomem *dst8 = (u8 __iomem *) dst; - - for (i = c & 3; i--; ) - fb_writeb(*src8++, dst8++); - - dst = (u32 __iomem *) dst8; - } - + fb_memcpy_tofb(dst, src, c); + dst += c; + src += c; *ppos += c; buf += c; cnt += c; diff --git a/include/linux/fb.h b/include/linux/fb.h index f0268deca658..7fca3dc4e475 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -931,6 +931,8 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { #define fb_writel sbus_writel #define fb_writeq sbus_writeq #define fb_memset sbus_memset_io +#define fb_memcpy_fromfb sbus_memcpy_fromio +#define fb_memcpy_tofb sbus_memcpy_toio #elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__) || defined(__bfin__) @@ -943,6 +945,8 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { #define fb_writel __raw_writel #define fb_writeq __raw_writeq #define fb_memset memset_io +#define fb_memcpy_fromfb memcpy_fromio +#define fb_memcpy_tofb memcpy_toio #else @@ -955,6 +959,8 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { #define fb_writel(b,addr) (*(volatile u32 *) (addr) = (b)) #define fb_writeq(b,addr) (*(volatile u64 *) (addr) = (b)) #define fb_memset memset +#define fb_memcpy_fromfb memcpy +#define fb_memcpy_tofb memcpy #endif -- cgit v1.2.3 From 97978e6d1f2da0073416870410459694fbdbfd9b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 27 Oct 2010 15:33:35 -0700 Subject: cgroup: add clone_children control file The ns_cgroup is a control group interacting with the namespaces. When a new namespace is created, a corresponding cgroup is automatically created too. The cgroup name is the pid of the process who did 'unshare' or the child of 'clone'. This cgroup is tied with the namespace because it prevents a process to escape the control group and use the post_clone callback, so the child cgroup inherits the values of the parent cgroup. Unfortunately, the more we use this cgroup and the more we are facing problems with it: (1) when a process unshares, the cgroup name may conflict with a previous cgroup with the same pid, so unshare or clone return -EEXIST (2) the cgroup creation is out of control because there may have an application creating several namespaces where the system will automatically create several cgroups in his back and let them on the cgroupfs (eg. a vrf based on the network namespace). (3) the mix of (1) and (2) force an administrator to regularly check and clean these cgroups. This patchset removes the ns_cgroup by adding a new flag to the cgroup and the cgroupfs mount option. It enables the copy of the parent cgroup when a child cgroup is created. We can then safely remove the ns_cgroup as this flag brings a compatibility. We have now to manually create and add the task to a cgroup, which is consistent with the cgroup framework. This patch: Sent as an answer to a previous thread around the ns_cgroup. https://lists.linux-foundation.org/pipermail/containers/2009-June/018627.html It adds a control file 'clone_children' for a cgroup. This control file is a boolean specifying if the child cgroup should be a clone of the parent cgroup or not. The default value is 'false'. This flag makes the child cgroup to call the post_clone callback of all the subsystem, if it is available. At present, the cpuset is the only one which had implemented the post_clone callback. The option can be set at mount time by specifying the 'clone_children' mount option. Signed-off-by: Daniel Lezcano Signed-off-by: Serge E. Hallyn Cc: Eric W. Biederman Acked-by: Paul Menage Reviewed-by: Li Zefan Cc: Jamal Hadi Salim Cc: Matt Helsley Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/cgroups.txt | 14 ++++++++++++-- include/linux/cgroup.h | 4 ++++ kernel/cgroup.c | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index b34823ff1646..190018b0c649 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -18,7 +18,8 @@ CONTENTS: 1.2 Why are cgroups needed ? 1.3 How are cgroups implemented ? 1.4 What does notify_on_release do ? - 1.5 How do I use cgroups ? + 1.5 What does clone_children do ? + 1.6 How do I use cgroups ? 2. Usage Examples and Syntax 2.1 Basic Usage 2.2 Attaching processes @@ -293,7 +294,16 @@ notify_on_release in the root cgroup at system boot is disabled value of their parents notify_on_release setting. The default value of a cgroup hierarchy's release_agent path is empty. -1.5 How do I use cgroups ? +1.5 What does clone_children do ? +--------------------------------- + +If the clone_children flag is enabled (1) in a cgroup, then all +cgroups created beneath will call the post_clone callbacks for each +subsystem of the newly created cgroup. Usually when this callback is +implemented for a subsystem, it copies the values of the parent +subsystem, this is the case for the cpuset. + +1.6 How do I use cgroups ? -------------------------- To start a new job that is to be contained within a cgroup, using diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 709dfb901d11..ed4ba111bc8d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -154,6 +154,10 @@ enum { * A thread in rmdir() is wating for this cgroup. */ CGRP_WAIT_ON_RMDIR, + /* + * Clone cgroup values when creating a new child cgroup + */ + CGRP_CLONE_CHILDREN, }; /* which pidlist file are we talking about? */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9270d532ec3c..4b218a46ddd3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -243,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp) return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); } +static int clone_children(const struct cgroup *cgrp) +{ + return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); +} + /* * for_each_subsys() allows you to iterate on each subsystem attached to * an active hierarchy @@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",noprefix"); if (strlen(root->release_agent_path)) seq_printf(seq, ",release_agent=%s", root->release_agent_path); + if (clone_children(&root->top_cgroup)) + seq_puts(seq, ",clone_children"); if (strlen(root->name)) seq_printf(seq, ",name=%s", root->name); mutex_unlock(&cgroup_mutex); @@ -1050,6 +1057,7 @@ struct cgroup_sb_opts { unsigned long subsys_bits; unsigned long flags; char *release_agent; + bool clone_children; char *name; /* User explicitly requested empty subsystem */ bool none; @@ -1097,6 +1105,8 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) opts->none = true; } else if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); + } else if (!strcmp(token, "clone_children")) { + opts->clone_children = true; } else if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ if (opts->release_agent) @@ -1355,6 +1365,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) strcpy(root->release_agent_path, opts->release_agent); if (opts->name) strcpy(root->name, opts->name); + if (opts->clone_children) + set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags); return root; } @@ -3173,6 +3185,23 @@ fail: return ret; } +static u64 cgroup_clone_children_read(struct cgroup *cgrp, + struct cftype *cft) +{ + return clone_children(cgrp); +} + +static int cgroup_clone_children_write(struct cgroup *cgrp, + struct cftype *cft, + u64 val) +{ + if (val) + set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + else + clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + return 0; +} + /* * for the common functions, 'private' gives the type of file */ @@ -3203,6 +3232,11 @@ static struct cftype files[] = { .write_string = cgroup_write_event_control, .mode = S_IWUGO, }, + { + .name = "cgroup.clone_children", + .read_u64 = cgroup_clone_children_read, + .write_u64 = cgroup_clone_children_write, + }, }; static struct cftype cft_release_agent = { @@ -3332,6 +3366,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, if (notify_on_release(parent)) set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); + if (clone_children(parent)) + set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); + for_each_subsys(root, ss) { struct cgroup_subsys_state *css = ss->create(ss, cgrp); @@ -3346,6 +3383,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, goto err_destroy; } /* At error, ->destroy() callback has to free assigned ID. */ + if (clone_children(parent) && ss->post_clone) + ss->post_clone(ss, cgrp); } cgroup_lock_hierarchy(root); -- cgit v1.2.3 From 4abf986960ecda6a87fc2f795aacf888a2f0127e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:45 -0700 Subject: ptrace: change signature of sys_ptrace() and friends Since userspace API of ptrace syscall defines @addr and @data as void pointers, it would be more appropriate to define them as unsigned long in kernel. Therefore related functions are changed also. 'unsigned long' is typically used in other places in kernel as an opaque data type and that using this helps cleaning up a lot of warnings from sparse. Suggested-by: Arnd Bergmann Signed-off-by: Namhyung Kim Acked-by: Arnd Bergmann Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 9 ++++++--- include/linux/syscalls.h | 3 ++- kernel/ptrace.c | 16 ++++++++++------ 3 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 4272521e29e9..67a4cd77c352 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -108,7 +108,8 @@ extern int ptrace_attach(struct task_struct *tsk); extern int ptrace_detach(struct task_struct *, unsigned int); extern void ptrace_disable(struct task_struct *); extern int ptrace_check_attach(struct task_struct *task, int kill); -extern int ptrace_request(struct task_struct *child, long request, long addr, long data); +extern int ptrace_request(struct task_struct *child, long request, + unsigned long addr, unsigned long data); extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); @@ -132,8 +133,10 @@ static inline void ptrace_unlink(struct task_struct *child) __ptrace_unlink(child); } -int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data); -int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data); +int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, + unsigned long data); +int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, + unsigned long data); /** * task_ptrace - return %PT_* flags that apply to a task diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e6319d18a55d..cacc27a0e285 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -701,7 +701,8 @@ asmlinkage long sys_nfsservctl(int cmd, asmlinkage long sys_syslog(int type, char __user *buf, int len); asmlinkage long sys_uselib(const char __user *library); asmlinkage long sys_ni_syscall(void); -asmlinkage long sys_ptrace(long request, long pid, long addr, long data); +asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, + unsigned long data); asmlinkage long sys_add_key(const char __user *_type, const char __user *_description, diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4afd9b86cc0b..06981a8b271b 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -404,7 +404,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds return copied; } -static int ptrace_setoptions(struct task_struct *child, long data) +static int ptrace_setoptions(struct task_struct *child, unsigned long data) { child->ptrace &= ~PT_TRACE_MASK; @@ -483,7 +483,8 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) #define is_sysemu_singlestep(request) 0 #endif -static int ptrace_resume(struct task_struct *child, long request, long data) +static int ptrace_resume(struct task_struct *child, long request, + unsigned long data) { if (!valid_signal(data)) return -EIO; @@ -560,7 +561,7 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, #endif int ptrace_request(struct task_struct *child, long request, - long addr, long data) + unsigned long addr, unsigned long data) { int ret = -EIO; siginfo_t siginfo; @@ -693,7 +694,8 @@ static struct task_struct *ptrace_get_task_struct(pid_t pid) #define arch_ptrace_attach(child) do { } while (0) #endif -SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) +SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, + unsigned long, data) { struct task_struct *child; long ret; @@ -734,7 +736,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) return ret; } -int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) +int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, + unsigned long data) { unsigned long tmp; int copied; @@ -745,7 +748,8 @@ int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) return put_user(tmp, (unsigned long __user *)data); } -int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) +int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, + unsigned long data) { int copied; -- cgit v1.2.3 From 9b05a69e0534ec70bc94921936ffa05b330507cb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:47 -0700 Subject: ptrace: change signature of arch_ptrace() Fix up the arguments to arch_ptrace() to take account of the fact that @addr and @data are now unsigned long rather than long as of a preceding patch in this series. Signed-off-by: Namhyung Kim Cc: Acked-by: Roland McGrath Acked-by: David Howells Acked-by: Geert Uytterhoeven Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/ptrace.c | 7 ++++--- arch/arm/kernel/ptrace.c | 3 ++- arch/avr32/kernel/ptrace.c | 3 ++- arch/blackfin/kernel/ptrace.c | 3 ++- arch/cris/arch-v10/kernel/ptrace.c | 7 ++++--- arch/cris/arch-v32/kernel/ptrace.c | 3 ++- arch/frv/kernel/ptrace.c | 3 ++- arch/h8300/kernel/ptrace.c | 7 ++++--- arch/ia64/kernel/ptrace.c | 3 ++- arch/m32r/kernel/ptrace.c | 3 ++- arch/m68k/kernel/ptrace.c | 9 +++++---- arch/m68knommu/kernel/ptrace.c | 7 ++++--- arch/microblaze/kernel/ptrace.c | 3 ++- arch/mips/kernel/ptrace.c | 3 ++- arch/mn10300/kernel/ptrace.c | 3 ++- arch/parisc/kernel/ptrace.c | 11 ++++++----- arch/powerpc/kernel/ptrace.c | 15 ++++++++------- arch/s390/kernel/ptrace.c | 3 ++- arch/score/kernel/ptrace.c | 3 ++- arch/sh/kernel/ptrace_32.c | 21 +++++++++++---------- arch/sh/kernel/ptrace_64.c | 6 ++++-- arch/sparc/kernel/ptrace_32.c | 3 ++- arch/sparc/kernel/ptrace_64.c | 7 ++++--- arch/tile/kernel/ptrace.c | 9 ++++++--- arch/um/kernel/ptrace.c | 5 +++-- arch/um/sys-i386/ptrace.c | 4 ++-- arch/um/sys-x86_64/ptrace.c | 4 ++-- arch/x86/kernel/ptrace.c | 7 ++++--- arch/xtensa/kernel/ptrace.c | 3 ++- include/linux/ptrace.h | 3 ++- 30 files changed, 101 insertions(+), 70 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index baa903602f6a..e2af5eb59bb4 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -269,7 +269,8 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long tmp; size_t copied; @@ -292,7 +293,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case PTRACE_PEEKUSR: force_successful_syscall_return(); ret = get_reg(child, addr); - DBG(DBG_MEM, ("peek $%ld->%#lx\n", addr, ret)); + DBG(DBG_MEM, ("peek $%lu->%#lx\n", addr, ret)); break; /* When I and D space are separate, this will have to be fixed. */ @@ -302,7 +303,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_POKEUSR: /* write the specified register */ - DBG(DBG_MEM, ("poke $%ld<-%#lx\n", addr, data)); + DBG(DBG_MEM, ("poke $%lu<-%#lx\n", addr, data)); ret = put_reg(child, addr, data); break; default: diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index e0cb6370ed14..9bca6165459e 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -1075,7 +1075,8 @@ out: } #endif -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c index 5e73c25f8f85..ecea9b6bfab4 100644 --- a/arch/avr32/kernel/ptrace.c +++ b/arch/avr32/kernel/ptrace.c @@ -146,7 +146,8 @@ static int ptrace_setregs(struct task_struct *tsk, const void __user *uregs) return ret; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index b35839354130..8e3083ccd88a 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -240,7 +240,8 @@ void user_disable_single_step(struct task_struct *child) clear_tsk_thread_flag(child, TIF_SINGLESTEP); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c index e70c804e9377..d411e024e05f 100644 --- a/arch/cris/arch-v10/kernel/ptrace.c +++ b/arch/cris/arch-v10/kernel/ptrace.c @@ -76,7 +76,8 @@ ptrace_disable(struct task_struct *child) * (in user space) where the result of the ptrace call is written (instead of * being returned). */ -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; @@ -141,7 +142,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } - data += sizeof(long); + data += sizeof(unsigned long); } break; @@ -165,7 +166,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) } put_reg(child, i, tmp); - data += sizeof(long); + data += sizeof(unsigned long); } break; diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index f4ebd1e7d0f5..3e058a121753 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -126,7 +126,8 @@ ptrace_disable(struct task_struct *child) } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c index fac028936a04..e9dbfad93589 100644 --- a/arch/frv/kernel/ptrace.c +++ b/arch/frv/kernel/ptrace.c @@ -254,7 +254,8 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long tmp; int ret; diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c index df114122ebdf..ef1aa0b8b20f 100644 --- a/arch/h8300/kernel/ptrace.c +++ b/arch/h8300/kernel/ptrace.c @@ -50,7 +50,8 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; @@ -120,7 +121,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = -EFAULT; break; } - data += sizeof(long); + data += sizeof(unsigned long); } ret = 0; break; @@ -135,7 +136,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } h8300_put_reg(child, i, tmp); - data += sizeof(long); + data += sizeof(unsigned long); } ret = 0; break; diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 7c7909f9bc93..8848f43d819e 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1177,7 +1177,8 @@ ptrace_disable (struct task_struct *child) } long -arch_ptrace (struct task_struct *child, long request, long addr, long data) +arch_ptrace (struct task_struct *child, long request, + unsigned long addr, unsigned long data) { switch (request) { case PTRACE_PEEKTEXT: diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index 0021ade4cba8..5e00e0a41fff 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -622,7 +622,8 @@ void ptrace_disable(struct task_struct *child) } long -arch_ptrace(struct task_struct *child, long request, long addr, long data) +arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c index 616e59752c29..583f59fcc363 100644 --- a/arch/m68k/kernel/ptrace.c +++ b/arch/m68k/kernel/ptrace.c @@ -156,7 +156,8 @@ void user_disable_single_step(struct task_struct *child) singlestep_disable(child); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long tmp; int i, ret = 0; @@ -200,7 +201,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) * into internal fpu reg representation */ if (FPU_IS_EMU && (addr < 45) && !(addr % 3)) { - data = (unsigned long)data << 15; + data <<= 15; data = (data & 0xffff0000) | ((data & 0x0000ffff) >> 1); } @@ -215,7 +216,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = put_user(tmp, (unsigned long *)data); if (ret) break; - data += sizeof(long); + data += sizeof(unsigned long); } break; @@ -229,7 +230,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) tmp |= get_reg(child, PT_SR) & ~SR_MASK; } put_reg(child, i, tmp); - data += sizeof(long); + data += sizeof(unsigned long); } break; diff --git a/arch/m68knommu/kernel/ptrace.c b/arch/m68knommu/kernel/ptrace.c index 6fe7c38cd556..7dbb08f5534e 100644 --- a/arch/m68knommu/kernel/ptrace.c +++ b/arch/m68knommu/kernel/ptrace.c @@ -112,7 +112,8 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; @@ -184,7 +185,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = -EFAULT; break; } - data += sizeof(long); + data += sizeof(unsigned long); } ret = 0; break; @@ -204,7 +205,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) tmp |= get_reg(child, PT_SR) & ~(SR_MASK << 16); } put_reg(child, i, tmp); - data += sizeof(long); + data += sizeof(unsigned long); } ret = 0; break; diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c index dc03ffc8174a..3544bc157406 100644 --- a/arch/microblaze/kernel/ptrace.c +++ b/arch/microblaze/kernel/ptrace.c @@ -73,7 +73,8 @@ static microblaze_reg_t *reg_save_addr(unsigned reg_offs, return (microblaze_reg_t *)((char *)regs + reg_offs); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int rval; unsigned long val = 0; diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index c8777333e198..95c3ae8b198c 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -255,7 +255,8 @@ int ptrace_set_watch_regs(struct task_struct *child, return 0; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c index cf847dabc1bd..ec4b41439e99 100644 --- a/arch/mn10300/kernel/ptrace.c +++ b/arch/mn10300/kernel/ptrace.c @@ -295,7 +295,8 @@ void ptrace_disable(struct task_struct *child) /* * handle the arch-specific side of process tracing */ -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long tmp; int ret; diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index c4f49e45129d..03920db4af45 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -110,7 +110,8 @@ void user_enable_block_step(struct task_struct *task) pa_psw(task)->l = 0; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long tmp; long ret = -EIO; @@ -120,8 +121,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) /* Read the word at location addr in the USER area. For ptraced processes, the kernel saves all regs on a syscall. */ case PTRACE_PEEKUSR: - if ((addr & (sizeof(long)-1)) || - (unsigned long) addr >= sizeof(struct pt_regs)) + if ((addr & (sizeof(unsigned long)-1)) || + addr >= sizeof(struct pt_regs)) break; tmp = *(unsigned long *) ((char *) task_regs(child) + addr); ret = put_user(tmp, (unsigned long *) data); @@ -151,8 +152,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } - if ((addr & (sizeof(long)-1)) || - (unsigned long) addr >= sizeof(struct pt_regs)) + if ((addr & (sizeof(unsigned long)-1)) || + addr >= sizeof(struct pt_regs)) break; if ((addr >= PT_GR1 && addr <= PT_GR31) || addr == PT_IAOQ0 || addr == PT_IAOQ1 || diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 286d9783d93f..136763568a7b 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1406,8 +1406,8 @@ static long ppc_del_hwdebug(struct task_struct *child, long addr, long data) * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, * we mark them as obsolete now, they will be removed in a future version */ -static long arch_ptrace_old(struct task_struct *child, long request, long addr, - long data) +static long arch_ptrace_old(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { switch (request) { case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ @@ -1434,7 +1434,8 @@ static long arch_ptrace_old(struct task_struct *child, long request, long addr, return -EPERM; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret = -EPERM; @@ -1446,11 +1447,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = -EIO; /* convert to index and check */ #ifdef CONFIG_PPC32 - index = (unsigned long) addr >> 2; + index = addr >> 2; if ((addr & 3) || (index > PT_FPSCR) || (child->thread.regs == NULL)) #else - index = (unsigned long) addr >> 3; + index = addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) #endif break; @@ -1474,11 +1475,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = -EIO; /* convert to index and check */ #ifdef CONFIG_PPC32 - index = (unsigned long) addr >> 2; + index = addr >> 2; if ((addr & 3) || (index > PT_FPSCR) || (child->thread.regs == NULL)) #else - index = (unsigned long) addr >> 3; + index = addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) #endif break; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 83339d33c4b1..019bb714db49 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -343,7 +343,8 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) return __poke_user(child, addr, data); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { ptrace_area parea; int copied, ret; diff --git a/arch/score/kernel/ptrace.c b/arch/score/kernel/ptrace.c index 174c6422b096..894dcbf72099 100644 --- a/arch/score/kernel/ptrace.c +++ b/arch/score/kernel/ptrace.c @@ -325,7 +325,8 @@ void ptrace_disable(struct task_struct *child) } long -arch_ptrace(struct task_struct *child, long request, long addr, long data) +arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (void __user *)data; diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 2cd42b58cb20..34bf03745e86 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -365,7 +365,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) return &user_sh_native_view; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { struct user * dummy = NULL; unsigned long __user *datap = (unsigned long __user *)data; @@ -383,17 +384,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) if (addr < sizeof(struct pt_regs)) tmp = get_stack_long(child, addr); - else if (addr >= (long) &dummy->fpu && - addr < (long) &dummy->u_fpvalid) { + else if (addr >= (unsigned long) &dummy->fpu && + addr < (unsigned long) &dummy->u_fpvalid) { if (!tsk_used_math(child)) { - if (addr == (long)&dummy->fpu.fpscr) + if (addr == (unsigned long)&dummy->fpu.fpscr) tmp = FPSCR_INIT; else tmp = 0; } else - tmp = ((long *)child->thread.xstate) + tmp = ((unsigned long *)child->thread.xstate) [(addr - (long)&dummy->fpu) >> 2]; - } else if (addr == (long) &dummy->u_fpvalid) + } else if (addr == (unsigned long) &dummy->u_fpvalid) tmp = !!tsk_used_math(child); else if (addr == PT_TEXT_ADDR) tmp = child->mm->start_code; @@ -417,13 +418,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) if (addr < sizeof(struct pt_regs)) ret = put_stack_long(child, addr, data); - else if (addr >= (long) &dummy->fpu && - addr < (long) &dummy->u_fpvalid) { + else if (addr >= (unsigned long) &dummy->fpu && + addr < (unsigned long) &dummy->u_fpvalid) { set_stopped_child_used_math(child); - ((long *)child->thread.xstate) + ((unsigned long *)child->thread.xstate) [(addr - (long)&dummy->fpu) >> 2] = data; ret = 0; - } else if (addr == (long) &dummy->u_fpvalid) { + } else if (addr == (unsigned long) &dummy->u_fpvalid) { conditional_stopped_child_used_math(data, child); ret = 0; } diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c index e0fb065914aa..4840716c196a 100644 --- a/arch/sh/kernel/ptrace_64.c +++ b/arch/sh/kernel/ptrace_64.c @@ -383,7 +383,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) return &user_sh64_native_view; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; @@ -471,7 +472,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) return ret; } -asmlinkage int sh64_ptrace(long request, long pid, long addr, long data) +asmlinkage int sh64_ptrace(long request, long pid, + unsigned long addr, unsigned long data) { #define WPC_DBRMODE 0x0d104008 static unsigned long first_call; diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c index e608f397e11f..e08ba4a46acd 100644 --- a/arch/sparc/kernel/ptrace_32.c +++ b/arch/sparc/kernel/ptrace_32.c @@ -323,7 +323,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) return &user_sparc32_view; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long addr2 = current->thread.kregs->u_regs[UREG_I4]; const struct user_regset_view *view; diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index aa90da08bf61..d9db5a4dfef9 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -969,7 +969,8 @@ struct fps { unsigned long fsr; }; -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { const struct user_regset_view *view = task_user_regset_view(current); unsigned long addr2 = task_pt_regs(current)->u_regs[UREG_I4]; @@ -977,8 +978,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) struct fps __user *fps; int ret; - pregs = (struct pt_regs __user *) (unsigned long) addr; - fps = (struct fps __user *) (unsigned long) addr; + pregs = (struct pt_regs __user *) addr; + fps = (struct fps __user *) addr; switch (request) { case PTRACE_PEEKUSR: diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index 5b20c2874d51..f634729211d1 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -45,7 +45,8 @@ void ptrace_disable(struct task_struct *child) clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long __user *datap = (long __user __force *)data; unsigned long tmp; @@ -98,7 +99,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) if (!access_ok(VERIFY_WRITE, datap, PTREGS_SIZE)) break; childregs = (long *)task_pt_regs(child); - for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) { + for (i = 0; i < sizeof(struct pt_regs)/sizeof(unsigned long); + ++i) { ret = __put_user(childregs[i], &datap[i]); if (ret != 0) break; @@ -109,7 +111,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) if (!access_ok(VERIFY_READ, datap, PTREGS_SIZE)) break; childregs = (long *)task_pt_regs(child); - for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) { + for (i = 0; i < sizeof(struct pt_regs)/sizeof(unsigned long); + ++i) { ret = __get_user(childregs[i], &datap[i]); if (ret != 0) break; diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index e0510496596c..963d82bdec06 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -42,10 +42,11 @@ void ptrace_disable(struct task_struct *child) extern int peek_user(struct task_struct * child, long addr, long data); extern int poke_user(struct task_struct * child, long addr, long data); -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int i, ret; - unsigned long __user *p = (void __user *)(unsigned long)data; + unsigned long __user *p = (void __user *)data; switch (request) { /* read word at location addr. */ diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c index c9b176534d65..d23b2d3ea384 100644 --- a/arch/um/sys-i386/ptrace.c +++ b/arch/um/sys-i386/ptrace.c @@ -203,8 +203,8 @@ int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) (unsigned long *) &fpregs); } -long subarch_ptrace(struct task_struct *child, long request, long addr, - long data) +long subarch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { return -EIO; } diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c index f3458d7d1c5a..67e63680df28 100644 --- a/arch/um/sys-x86_64/ptrace.c +++ b/arch/um/sys-x86_64/ptrace.c @@ -175,8 +175,8 @@ int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) return restore_fp_registers(userspace_pid[cpu], fpregs); } -long subarch_ptrace(struct task_struct *child, long request, long addr, - long data) +long subarch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret = -EIO; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 70c4872cd8aa..1a7ca045920d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -801,7 +801,8 @@ void ptrace_disable(struct task_struct *child) static const struct user_regset_view user_x86_32_view; /* Initialized below. */ #endif -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; @@ -888,14 +889,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION case PTRACE_GET_THREAD_AREA: - if (addr < 0) + if ((int) addr < 0) return -EIO; ret = do_get_thread_area(child, addr, (struct user_desc __user *) data); break; case PTRACE_SET_THREAD_AREA: - if (addr < 0) + if ((int) addr < 0) return -EIO; ret = do_set_thread_area(child, addr, (struct user_desc __user *) data, 0); diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index 9d4e1ceb3f09..af9ba80f254c 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -256,7 +256,8 @@ int ptrace_pokeusr(struct task_struct *child, long regno, long val) return 0; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret = -EPERM; diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 67a4cd77c352..092a04f874a8 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -100,7 +100,8 @@ #include /* For struct task_struct. */ -extern long arch_ptrace(struct task_struct *child, long request, long addr, long data); +extern long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data); extern int ptrace_traceme(void); extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); -- cgit v1.2.3 From b8ed374e202e23caaf9bd77dcadc9de6447faaa8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:06 -0700 Subject: signals: annotate lock_task_sighand() lock_task_sighand() grabs sighand->siglock in case of returning non-NULL but unlock_task_sighand() releases it unconditionally. This leads sparse to complain about the lock context imbalance. Rename and wrap lock_task_sighand() using __cond_lock() macro to make sparse happy. Suggested-by: Eric Dumazet Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 9 ++++++++- kernel/signal.c | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 393ce94e54b7..3ff5c8519abd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2236,9 +2236,16 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } -extern struct sighand_struct *lock_task_sighand(struct task_struct *tsk, +extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, unsigned long *flags); +#define lock_task_sighand(tsk, flags) \ +({ struct sighand_struct *__ss; \ + __cond_lock(&(tsk)->sighand->siglock, \ + (__ss = __lock_task_sighand(tsk, flags))); \ + __ss; \ +}) \ + static inline void unlock_task_sighand(struct task_struct *tsk, unsigned long *flags) { diff --git a/kernel/signal.c b/kernel/signal.c index 919562c3d6b7..e921409b85a9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1105,7 +1105,8 @@ int zap_other_threads(struct task_struct *p) return count; } -struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) +struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, + unsigned long *flags) { struct sighand_struct *sighand; -- cgit v1.2.3 From 9b1bf12d5d51bca178dea21b04a0805e29d60cf1 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 27 Oct 2010 15:34:08 -0700 Subject: signals: move cred_guard_mutex from task_struct to signal_struct Oleg Nesterov pointed out we have to prevent multiple-threads-inside-exec itself and we can reuse ->cred_guard_mutex for it. Yes, concurrent execve() has no worth. Let's move ->cred_guard_mutex from task_struct to signal_struct. It naturally prevent multiple-threads-inside-exec. Signed-off-by: KOSAKI Motohiro Reviewed-by: Oleg Nesterov Acked-by: Roland McGrath Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 10 +++++----- fs/proc/base.c | 8 ++++---- include/linux/init_task.h | 4 ++-- include/linux/sched.h | 7 ++++--- include/linux/tracehook.h | 2 +- kernel/cred.c | 4 +--- kernel/fork.c | 2 ++ kernel/ptrace.c | 4 ++-- 8 files changed, 21 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index 3aa75b8888a1..9722909c4d88 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1083,14 +1083,14 @@ EXPORT_SYMBOL(setup_new_exec); */ int prepare_bprm_creds(struct linux_binprm *bprm) { - if (mutex_lock_interruptible(¤t->cred_guard_mutex)) + if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex)) return -ERESTARTNOINTR; bprm->cred = prepare_exec_creds(); if (likely(bprm->cred)) return 0; - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); return -ENOMEM; } @@ -1098,7 +1098,7 @@ void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); if (bprm->cred) { - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } kfree(bprm); @@ -1119,13 +1119,13 @@ void install_exec_creds(struct linux_binprm *bprm) * credentials; any time after this it may be unlocked. */ security_bprm_committed_creds(bprm); - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); } EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program - * - the caller must hold current->cred_guard_mutex to protect against + * - the caller must hold ->cred_guard_mutex to protect against * PTRACE_ATTACH */ int check_unsafe_exec(struct linux_binprm *bprm) diff --git a/fs/proc/base.c b/fs/proc/base.c index 9b094c1c8465..f3d02ca461ec 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -226,7 +226,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) { struct mm_struct *mm; - if (mutex_lock_killable(&task->cred_guard_mutex)) + if (mutex_lock_killable(&task->signal->cred_guard_mutex)) return NULL; mm = get_task_mm(task); @@ -235,7 +235,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) mmput(mm); mm = NULL; } - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); return mm; } @@ -2354,14 +2354,14 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, goto out_free; /* Guard against adverse ptrace interaction */ - length = mutex_lock_interruptible(&task->cred_guard_mutex); + length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); if (length < 0) goto out_free; length = security_setprocattr(task, (char*)file->f_path.dentry->d_name.name, (void*)page, count); - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); out_free: free_page((unsigned long) page); out: diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2fea6c8ef6ba..1f8c06ce0fa6 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -29,6 +29,8 @@ extern struct fs_struct init_fs; .running = 0, \ .lock = __SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \ }, \ + .cred_guard_mutex = \ + __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ } extern struct nsproxy init_nsproxy; @@ -145,8 +147,6 @@ extern struct cred init_cred; .group_leader = &tsk, \ RCU_INIT_POINTER(.real_cred, &init_cred), \ RCU_INIT_POINTER(.cred, &init_cred), \ - .cred_guard_mutex = \ - __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3ff5c8519abd..be7adb7588e5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -626,6 +626,10 @@ struct signal_struct { int oom_adj; /* OOM kill score adjustment (bit shift) */ int oom_score_adj; /* OOM kill score adjustment */ + + struct mutex cred_guard_mutex; /* guard against foreign influences on + * credential calculations + * (notably. ptrace) */ }; /* Context switch must be unlocked if interrupts are to be enabled */ @@ -1305,9 +1309,6 @@ struct task_struct { * credentials (COW) */ const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */ - struct mutex cred_guard_mutex; /* guard against foreign influences on - * credential calculations - * (notably. ptrace) */ struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */ char comm[TASK_COMM_LEN]; /* executable name excluding path diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 10db0102a890..3a2e66d88a32 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -150,7 +150,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) * * Return %LSM_UNSAFE_* bits applied to an exec because of tracing. * - * @task->cred_guard_mutex is held by the caller through the do_execve(). + * @task->signal->cred_guard_mutex is held by the caller through the do_execve(). */ static inline int tracehook_unsafe_exec(struct task_struct *task) { diff --git a/kernel/cred.c b/kernel/cred.c index 9a3e22641fe7..6a1aa004e376 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -325,7 +325,7 @@ EXPORT_SYMBOL(prepare_creds); /* * Prepare credentials for current to perform an execve() - * - The caller must hold current->cred_guard_mutex + * - The caller must hold ->cred_guard_mutex */ struct cred *prepare_exec_creds(void) { @@ -384,8 +384,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct cred *new; int ret; - mutex_init(&p->cred_guard_mutex); - if ( #ifdef CONFIG_KEYS !p->cred->thread_keyring && diff --git a/kernel/fork.c b/kernel/fork.c index e87aaaaf5131..3b159c5991b7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -908,6 +908,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; + mutex_init(&sig->cred_guard_mutex); + return 0; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ea7ce0215cd1..99bbaa3e5b0d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -181,7 +181,7 @@ int ptrace_attach(struct task_struct *task) * under ptrace. */ retval = -ERESTARTNOINTR; - if (mutex_lock_interruptible(&task->cred_guard_mutex)) + if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) goto out; task_lock(task); @@ -208,7 +208,7 @@ int ptrace_attach(struct task_struct *task) unlock_tasklist: write_unlock_irq(&tasklist_lock); unlock_creds: - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); out: return retval; } -- cgit v1.2.3 From f2c66cd8eeddedb440f33bc0f5cec1ed7ae376cb Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 27 Oct 2010 15:34:13 -0700 Subject: /proc/stat: scalability of irq num per cpu /proc/stat shows the total number of all interrupts to each cpu. But when the number of IRQs are very large, it take very long time and 'cat /proc/stat' takes more than 10 secs. This is because sum of all irq events are counted when /proc/stat is read. This patch adds "sum of all irq" counter percpu and reduce read costs. The cost of reading /proc/stat is important because it's used by major applications as 'top', 'ps', 'w', etc.... A test on a mechin (4096cpu, 256 nodes, 4592 irqs) shows %time cat /proc/stat > /dev/null Before Patch: 12.627 sec After Patch: 2.459 sec Signed-off-by: KAMEZAWA Hiroyuki Tested-by: Jack Steiner Acked-by: Jack Steiner Cc: Yinghai Lu Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/stat.c | 4 +--- include/linux/kernel_stat.h | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index bf31b03fc275..b80c620565bf 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -52,9 +52,7 @@ static int show_stat(struct seq_file *p, void *v) guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); guest_nice = cputime64_add(guest_nice, kstat_cpu(i).cpustat.guest_nice); - for_each_irq_nr(j) { - sum += kstat_irqs_cpu(j, i); - } + sum += kstat_cpu_irqs_sum(i); sum += arch_irq_stat_cpu(i); for (j = 0; j < NR_SOFTIRQS; j++) { diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index c059044bc6dc..8b9b89085530 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -33,6 +33,7 @@ struct kernel_stat { #ifndef CONFIG_GENERIC_HARDIRQS unsigned int irqs[NR_IRQS]; #endif + unsigned long irqs_sum; unsigned int softirqs[NR_SOFTIRQS]; }; @@ -54,6 +55,7 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *desc) { kstat_this_cpu.irqs[irq]++; + kstat_this_cpu.irqs_sum++; } static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) @@ -65,8 +67,9 @@ static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); #define kstat_irqs_this_cpu(DESC) \ ((DESC)->kstat_irqs[smp_processor_id()]) -#define kstat_incr_irqs_this_cpu(irqno, DESC) \ - ((DESC)->kstat_irqs[smp_processor_id()]++) +#define kstat_incr_irqs_this_cpu(irqno, DESC) do {\ + ((DESC)->kstat_irqs[smp_processor_id()]++);\ + kstat_this_cpu.irqs_sum++; } while (0) #endif @@ -94,6 +97,13 @@ static inline unsigned int kstat_irqs(unsigned int irq) return sum; } +/* + * Number of interrupts per cpu, since bootup + */ +static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) +{ + return kstat_cpu(cpu).irqs_sum; +} /* * Lock/unlock the current runqueue - to extract task statistics: -- cgit v1.2.3 From 478735e38887077ac77a9756121b6ce0cb956e2f Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 27 Oct 2010 15:34:15 -0700 Subject: /proc/stat: fix scalability of irq sum of all cpu In /proc/stat, the number of per-IRQ event is shown by making a sum each irq's events on all cpus. But we can make use of kstat_irqs(). kstat_irqs() do the same calculation, If !CONFIG_GENERIC_HARDIRQ, it's not a big cost. (Both of the number of cpus and irqs are small.) If a system is very big and CONFIG_GENERIC_HARDIRQ, it does for_each_irq() for_each_cpu() - look up a radix tree - read desc->irq_stat[cpu] This seems not efficient. This patch adds kstat_irqs() for CONFIG_GENRIC_HARDIRQ and change the calculation as for_each_irq() look up radix tree for_each_cpu() - read desc->irq_stat[cpu] This reduces cost. A test on (4096cpusp, 256 nodes, 4592 irqs) host (by Jack Steiner) %time cat /proc/stat > /dev/null Before Patch: 2.459 sec After Patch : .561 sec [akpm@linux-foundation.org: unexport kstat_irqs, coding-style tweaks] [akpm@linux-foundation.org: fix unused variable 'per_irq_sum'] Signed-off-by: KAMEZAWA Hiroyuki Tested-by: Jack Steiner Acked-by: Jack Steiner Cc: Yinghai Lu Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/stat.c | 10 ++-------- include/linux/kernel_stat.h | 4 ++++ kernel/irq/irqdesc.c | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index b80c620565bf..e15a19c93bae 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -31,7 +31,6 @@ static int show_stat(struct seq_file *p, void *v) u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; struct timespec boottime; - unsigned int per_irq_sum; user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; @@ -108,13 +107,8 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "intr %llu", (unsigned long long)sum); /* sum again ? it could be updated? */ - for_each_irq_nr(j) { - per_irq_sum = 0; - for_each_possible_cpu(i) - per_irq_sum += kstat_irqs_cpu(j, i); - - seq_printf(p, " %u", per_irq_sum); - } + for_each_irq_nr(j) + seq_printf(p, " %u", kstat_irqs(j)); seq_printf(p, "\nctxt %llu\n" diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 8b9b89085530..ad54c846911b 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -86,6 +86,7 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) /* * Number of interrupts per specific IRQ source, since bootup */ +#ifndef CONFIG_GENERIC_HARDIRQS static inline unsigned int kstat_irqs(unsigned int irq) { unsigned int sum = 0; @@ -96,6 +97,9 @@ static inline unsigned int kstat_irqs(unsigned int irq) return sum; } +#else +extern unsigned int kstat_irqs(unsigned int irq); +#endif /* * Number of interrupts per cpu, since bootup diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9d917ff72675..9988d03797f5 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -393,3 +393,18 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) struct irq_desc *desc = irq_to_desc(irq); return desc ? desc->kstat_irqs[cpu] : 0; } + +#ifdef CONFIG_GENERIC_HARDIRQS +unsigned int kstat_irqs(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + int cpu; + int sum = 0; + + if (!desc) + return 0; + for_each_possible_cpu(cpu) + sum += desc->kstat_irqs[cpu]; + return sum; +} +#endif /* CONFIG_GENERIC_HARDIRQS */ -- cgit v1.2.3 From 9807224f1dce5fb746ee33fb67ea2e38dafe3e9c Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Wed, 27 Oct 2010 15:34:22 -0700 Subject: drivers/char/synclink_gt.c: add extended sync feature Add support for extended byte synchronous mode feature of hardware. Signed-off-by: Paul Fulghum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/synclink_gt.c | 111 +++++++++++++++++++++++++++++++++++++++++++-- include/linux/synclink.h | 5 ++ 2 files changed, 113 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 9f7fc71474b4..d01fffeac951 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -301,6 +301,8 @@ struct slgt_info { unsigned int rx_pio; unsigned int if_mode; unsigned int base_clock; + unsigned int xsync; + unsigned int xctrl; /* device status */ @@ -405,6 +407,8 @@ static MGSL_PARAMS default_params = { #define TDCSR 0x94 /* tx DMA control/status */ #define RDDAR 0x98 /* rx DMA descriptor address */ #define TDDAR 0x9c /* tx DMA descriptor address */ +#define XSR 0x40 /* extended sync pattern */ +#define XCR 0x44 /* extended control */ #define RXIDLE BIT14 #define RXBREAK BIT14 @@ -517,6 +521,10 @@ static int set_interface(struct slgt_info *info, int if_mode); static int set_gpio(struct slgt_info *info, struct gpio_desc __user *gpio); static int get_gpio(struct slgt_info *info, struct gpio_desc __user *gpio); static int wait_gpio(struct slgt_info *info, struct gpio_desc __user *gpio); +static int get_xsync(struct slgt_info *info, int __user *if_mode); +static int set_xsync(struct slgt_info *info, int if_mode); +static int get_xctrl(struct slgt_info *info, int __user *if_mode); +static int set_xctrl(struct slgt_info *info, int if_mode); /* * driver functions @@ -1056,6 +1064,14 @@ static int ioctl(struct tty_struct *tty, struct file *file, return get_gpio(info, argp); case MGSL_IOCWAITGPIO: return wait_gpio(info, argp); + case MGSL_IOCGXSYNC: + return get_xsync(info, argp); + case MGSL_IOCSXSYNC: + return set_xsync(info, (int)arg); + case MGSL_IOCGXCTRL: + return get_xctrl(info, argp); + case MGSL_IOCSXCTRL: + return set_xctrl(info, (int)arg); } mutex_lock(&info->port.mutex); switch (cmd) { @@ -1213,12 +1229,16 @@ static long slgt_compat_ioctl(struct tty_struct *tty, struct file *file, case MGSL_IOCSGPIO: case MGSL_IOCGGPIO: case MGSL_IOCWAITGPIO: + case MGSL_IOCGXSYNC: + case MGSL_IOCGXCTRL: case MGSL_IOCSTXIDLE: case MGSL_IOCTXENABLE: case MGSL_IOCRXENABLE: case MGSL_IOCTXABORT: case TIOCMIWAIT: case MGSL_IOCSIF: + case MGSL_IOCSXSYNC: + case MGSL_IOCSXCTRL: rc = ioctl(tty, file, cmd, arg); break; } @@ -1961,6 +1981,7 @@ static void bh_handler(struct work_struct *work) case MGSL_MODE_RAW: case MGSL_MODE_MONOSYNC: case MGSL_MODE_BISYNC: + case MGSL_MODE_XSYNC: while(rx_get_buf(info)); break; } @@ -2889,6 +2910,69 @@ static int set_interface(struct slgt_info *info, int if_mode) return 0; } +static int get_xsync(struct slgt_info *info, int __user *xsync) +{ + DBGINFO(("%s get_xsync=%x\n", info->device_name, info->xsync)); + if (put_user(info->xsync, xsync)) + return -EFAULT; + return 0; +} + +/* + * set extended sync pattern (1 to 4 bytes) for extended sync mode + * + * sync pattern is contained in least significant bytes of value + * most significant byte of sync pattern is oldest (1st sent/detected) + */ +static int set_xsync(struct slgt_info *info, int xsync) +{ + unsigned long flags; + + DBGINFO(("%s set_xsync=%x)\n", info->device_name, xsync)); + spin_lock_irqsave(&info->lock, flags); + info->xsync = xsync; + wr_reg32(info, XSR, xsync); + spin_unlock_irqrestore(&info->lock, flags); + return 0; +} + +static int get_xctrl(struct slgt_info *info, int __user *xctrl) +{ + DBGINFO(("%s get_xctrl=%x\n", info->device_name, info->xctrl)); + if (put_user(info->xctrl, xctrl)) + return -EFAULT; + return 0; +} + +/* + * set extended control options + * + * xctrl[31:19] reserved, must be zero + * xctrl[18:17] extended sync pattern length in bytes + * 00 = 1 byte in xsr[7:0] + * 01 = 2 bytes in xsr[15:0] + * 10 = 3 bytes in xsr[23:0] + * 11 = 4 bytes in xsr[31:0] + * xctrl[16] 1 = enable terminal count, 0=disabled + * xctrl[15:0] receive terminal count for fixed length packets + * value is count minus one (0 = 1 byte packet) + * when terminal count is reached, receiver + * automatically returns to hunt mode and receive + * FIFO contents are flushed to DMA buffers with + * end of frame (EOF) status + */ +static int set_xctrl(struct slgt_info *info, int xctrl) +{ + unsigned long flags; + + DBGINFO(("%s set_xctrl=%x)\n", info->device_name, xctrl)); + spin_lock_irqsave(&info->lock, flags); + info->xctrl = xctrl; + wr_reg32(info, XCR, xctrl); + spin_unlock_irqrestore(&info->lock, flags); + return 0; +} + /* * set general purpose IO pin state and direction * @@ -3768,7 +3852,9 @@ module_exit(slgt_exit); #define CALC_REGADDR() \ unsigned long reg_addr = ((unsigned long)info->reg_addr) + addr; \ if (addr >= 0x80) \ - reg_addr += (info->port_num) * 32; + reg_addr += (info->port_num) * 32; \ + else if (addr >= 0x40) \ + reg_addr += (info->port_num) * 16; static __u8 rd_reg8(struct slgt_info *info, unsigned int addr) { @@ -4187,7 +4273,13 @@ static void sync_mode(struct slgt_info *info) /* TCR (tx control) * - * 15..13 mode, 000=HDLC 001=raw 010=async 011=monosync 100=bisync + * 15..13 mode + * 000=HDLC/SDLC + * 001=raw bit synchronous + * 010=asynchronous/isochronous + * 011=monosync byte synchronous + * 100=bisync byte synchronous + * 101=xsync byte synchronous * 12..10 encoding * 09 CRC enable * 08 CRC32 @@ -4202,6 +4294,9 @@ static void sync_mode(struct slgt_info *info) val = BIT2; switch(info->params.mode) { + case MGSL_MODE_XSYNC: + val |= BIT15 + BIT13; + break; case MGSL_MODE_MONOSYNC: val |= BIT14 + BIT13; break; case MGSL_MODE_BISYNC: val |= BIT15; break; case MGSL_MODE_RAW: val |= BIT13; break; @@ -4256,7 +4351,13 @@ static void sync_mode(struct slgt_info *info) /* RCR (rx control) * - * 15..13 mode, 000=HDLC 001=raw 010=async 011=monosync 100=bisync + * 15..13 mode + * 000=HDLC/SDLC + * 001=raw bit synchronous + * 010=asynchronous/isochronous + * 011=monosync byte synchronous + * 100=bisync byte synchronous + * 101=xsync byte synchronous * 12..10 encoding * 09 CRC enable * 08 CRC32 @@ -4268,6 +4369,9 @@ static void sync_mode(struct slgt_info *info) val = 0; switch(info->params.mode) { + case MGSL_MODE_XSYNC: + val |= BIT15 + BIT13; + break; case MGSL_MODE_MONOSYNC: val |= BIT14 + BIT13; break; case MGSL_MODE_BISYNC: val |= BIT15; break; case MGSL_MODE_RAW: val |= BIT13; break; @@ -4684,6 +4788,7 @@ static bool rx_get_buf(struct slgt_info *info) switch(info->params.mode) { case MGSL_MODE_MONOSYNC: case MGSL_MODE_BISYNC: + case MGSL_MODE_XSYNC: /* ignore residue in byte synchronous modes */ if (desc_residue(info->rbufs[i])) count--; diff --git a/include/linux/synclink.h b/include/linux/synclink.h index 0ff2779c44d0..2e7d81c4e5ad 100644 --- a/include/linux/synclink.h +++ b/include/linux/synclink.h @@ -126,6 +126,7 @@ #define MGSL_MODE_BISYNC 4 #define MGSL_MODE_RAW 6 #define MGSL_MODE_BASE_CLOCK 7 +#define MGSL_MODE_XSYNC 8 #define MGSL_BUS_TYPE_ISA 1 #define MGSL_BUS_TYPE_EISA 2 @@ -290,6 +291,10 @@ struct gpio_desc { #define MGSL_IOCSGPIO _IOW(MGSL_MAGIC_IOC,16,struct gpio_desc) #define MGSL_IOCGGPIO _IOR(MGSL_MAGIC_IOC,17,struct gpio_desc) #define MGSL_IOCWAITGPIO _IOWR(MGSL_MAGIC_IOC,18,struct gpio_desc) +#define MGSL_IOCSXSYNC _IO(MGSL_MAGIC_IOC, 19) +#define MGSL_IOCGXSYNC _IO(MGSL_MAGIC_IOC, 20) +#define MGSL_IOCSXCTRL _IO(MGSL_MAGIC_IOC, 21) +#define MGSL_IOCGXCTRL _IO(MGSL_MAGIC_IOC, 22) #ifdef __KERNEL__ /* provide 32 bit ioctl compatibility on 64 bit systems */ -- cgit v1.2.3 From 2c70f022e2e1b1493e157dbc3796b1f70a3ff162 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:26 -0700 Subject: rapidio: fix RapidIO sysfs hierarchy This set of RapidIO patches extends support for standard error recovery mechanism and adds new IDT Gen2 sRIO switch devices - CPS-1848 and CPS-1616. Implementation of the standard error-stopped state recovery mechanism (as defined by the RapidIO specification) is required for the new switches. Version 2 of this set of patches addresses received comments and fixes an error notification setup issue found in the idt_gen2.c after the first version was released. This patch: Make RapidIO devices appear in /sys/devices/rapidio directory instead of top of /sys/devices directory. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio-driver.c | 2 +- drivers/rapidio/rio-scan.c | 1 + include/linux/rio.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c index 3222fa3c808c..0f4a53bdaa3c 100644 --- a/drivers/rapidio/rio-driver.c +++ b/drivers/rapidio/rio-driver.c @@ -192,7 +192,7 @@ static int rio_match_bus(struct device *dev, struct device_driver *drv) out:return 0; } -static struct device rio_bus = { +struct device rio_bus = { .init_name = "rapidio", }; diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 8070e074c739..1123be8f4b18 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -478,6 +478,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, } rdev->dev.bus = &rio_bus_type; + rdev->dev.parent = &rio_bus; device_initialize(&rdev->dev); rdev->dev.release = rio_release_dev; diff --git a/include/linux/rio.h b/include/linux/rio.h index bd6eb0ed34a7..84c9f8c5fb23 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -67,6 +67,7 @@ #define RIO_PW_MSG_SIZE 64 extern struct bus_type rio_bus_type; +extern struct device rio_bus; extern struct list_head rio_devices; /* list of all devices */ struct rio_mport; -- cgit v1.2.3 From ae05cbd5adef897d405ce8f90484c1239f79e086 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:29 -0700 Subject: rapidio: use stored ingress port number instead of register read The switch port information is obtained and stored during RIO device setup. Therefore repeated reads from Switch Port Information CAR may be removed. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio-scan.c | 36 +++++++++--------------------------- include/linux/rio.h | 4 +++- include/linux/rio_regs.h | 2 ++ 3 files changed, 14 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 1123be8f4b18..d09c359844f3 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -420,6 +420,11 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, hopcount, RIO_EFB_ERR_MGMNT); } + if (rdev->pef & (RIO_PEF_SWITCH | RIO_PEF_MULTIPORT)) { + rio_mport_read_config_32(port, destid, hopcount, + RIO_SWP_INFO_CAR, &rdev->swpinfo); + } + rio_mport_read_config_32(port, destid, hopcount, RIO_SRC_OPS_CAR, &rdev->src_ops); rio_mport_read_config_32(port, destid, hopcount, RIO_DST_OPS_CAR, @@ -439,8 +444,6 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, /* If a PE has both switch and other functions, show it as a switch */ if (rio_is_switch(rdev)) { - rio_mport_read_config_32(port, destid, hopcount, - RIO_SWP_INFO_CAR, &rdev->swpinfo); rswitch = kzalloc(sizeof(struct rio_switch), GFP_KERNEL); if (!rswitch) goto cleanup; @@ -458,6 +461,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, rdid++) rswitch->route_table[rdid] = RIO_INVALID_ROUTE; rdev->rswitch = rswitch; + rswitch->rdev = rdev; dev_set_name(&rdev->dev, "%02x:s:%04x", rdev->net->id, rdev->rswitch->switchid); rio_switch_init(rdev, do_enum); @@ -718,25 +722,6 @@ static u16 rio_get_host_deviceid_lock(struct rio_mport *port, u8 hopcount) return (u16) (result & 0xffff); } -/** - * rio_get_swpinfo_inport- Gets the ingress port number - * @mport: Master port to send transaction - * @destid: Destination ID associated with the switch - * @hopcount: Number of hops to the device - * - * Returns port number being used to access the switch device. - */ -static u8 -rio_get_swpinfo_inport(struct rio_mport *mport, u16 destid, u8 hopcount) -{ - u32 result; - - rio_mport_read_config_32(mport, destid, hopcount, RIO_SWP_INFO_CAR, - &result); - - return (u8) (result & 0xff); -} - /** * rio_get_swpinfo_tports- Gets total number of ports on the switch * @mport: Master port to send transaction @@ -834,8 +819,7 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, if (rio_is_switch(rdev)) { next_switchid++; - sw_inport = rio_get_swpinfo_inport(port, - RIO_ANY_DESTID(port->sys_size), hopcount); + sw_inport = RIO_GET_PORT_NUM(rdev->swpinfo); rio_route_add_entry(port, rdev->rswitch, RIO_GLOBAL_TABLE, port->host_deviceid, sw_inport, 0); rdev->rswitch->route_table[port->host_deviceid] = sw_inport; @@ -989,8 +973,7 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, "RIO: found %s (vid %4.4x did %4.4x) with %d ports\n", rio_name(rdev), rdev->vid, rdev->did, num_ports); for (port_num = 0; port_num < num_ports; port_num++) { - if (rio_get_swpinfo_inport(port, destid, hopcount) == - port_num) + if (RIO_GET_PORT_NUM(rdev->swpinfo) == port_num) continue; if (rio_sport_is_active @@ -1109,8 +1092,7 @@ static void rio_update_route_tables(struct rio_mport *port) if (rswitch->destid == destid) continue; - sport = rio_get_swpinfo_inport(port, - rswitch->destid, rswitch->hopcount); + sport = RIO_GET_PORT_NUM(rswitch->rdev->swpinfo); if (rswitch->add_entry) { rio_route_add_entry(port, rswitch, diff --git a/include/linux/rio.h b/include/linux/rio.h index 84c9f8c5fb23..ffdfe5ad43bf 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -112,7 +112,7 @@ struct rio_dev { u16 asm_rev; u16 efptr; u32 pef; - u32 swpinfo; /* Only used for switches */ + u32 swpinfo; u32 src_ops; u32 dst_ops; u32 comp_tag; @@ -219,6 +219,7 @@ struct rio_net { /** * struct rio_switch - RIO switch info * @node: Node in global list of switches + * @rdev: Associated RIO device structure * @switchid: Switch ID that is unique across a network * @hopcount: Hopcount to this switch * @destid: Associated destid in the path @@ -234,6 +235,7 @@ struct rio_net { */ struct rio_switch { struct list_head node; + struct rio_dev *rdev; u16 switchid; u16 hopcount; u16 destid; diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h index aedee0489fb4..be80b1b21815 100644 --- a/include/linux/rio_regs.h +++ b/include/linux/rio_regs.h @@ -33,6 +33,7 @@ #define RIO_PEF_MEMORY 0x40000000 /* [I] MMIO */ #define RIO_PEF_PROCESSOR 0x20000000 /* [I] Processor */ #define RIO_PEF_SWITCH 0x10000000 /* [I] Switch */ +#define RIO_PEF_MULTIPORT 0x08000000 /* [VI, 2.1] Multiport */ #define RIO_PEF_INB_MBOX 0x00f00000 /* [II] Mailboxes */ #define RIO_PEF_INB_MBOX0 0x00800000 /* [II] Mailbox 0 */ #define RIO_PEF_INB_MBOX1 0x00400000 /* [II] Mailbox 1 */ @@ -51,6 +52,7 @@ #define RIO_SWP_INFO_PORT_TOTAL_MASK 0x0000ff00 /* [I] Total number of ports */ #define RIO_SWP_INFO_PORT_NUM_MASK 0x000000ff /* [I] Maintenance transaction port number */ #define RIO_GET_TOTAL_PORTS(x) ((x & RIO_SWP_INFO_PORT_TOTAL_MASK) >> 8) +#define RIO_GET_PORT_NUM(x) (x & RIO_SWP_INFO_PORT_NUM_MASK) #define RIO_SRC_OPS_CAR 0x18 /* [I] Source Operations CAR */ #define RIO_SRC_OPS_READ 0x00008000 /* [I] Read op */ -- cgit v1.2.3 From 68fe4df5d21294401959fa61d5a7094705ed8f6f Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:29 -0700 Subject: rapidio: add relation links between RIO device structures Create back and forward links between RIO devices. These links are intended for use by error management and hot-plug extensions. Links for redundant RIO connections between switches are not set (will be fixed in a separate patch). Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio-scan.c | 56 +++++++++++++++++++--------------------------- include/linux/rio.h | 4 ++++ 2 files changed, 27 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index d09c359844f3..d2ea01872d6a 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -444,7 +444,10 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, /* If a PE has both switch and other functions, show it as a switch */ if (rio_is_switch(rdev)) { - rswitch = kzalloc(sizeof(struct rio_switch), GFP_KERNEL); + rswitch = kzalloc(sizeof(*rswitch) + + RIO_GET_TOTAL_PORTS(rdev->swpinfo) * + sizeof(rswitch->nextdev[0]), + GFP_KERNEL); if (!rswitch) goto cleanup; rswitch->switchid = next_switchid; @@ -722,25 +725,6 @@ static u16 rio_get_host_deviceid_lock(struct rio_mport *port, u8 hopcount) return (u16) (result & 0xffff); } -/** - * rio_get_swpinfo_tports- Gets total number of ports on the switch - * @mport: Master port to send transaction - * @destid: Destination ID associated with the switch - * @hopcount: Number of hops to the device - * - * Returns total numbers of ports implemented by the switch device. - */ -static u8 rio_get_swpinfo_tports(struct rio_mport *mport, u16 destid, - u8 hopcount) -{ - u32 result; - - rio_mport_read_config_32(mport, destid, hopcount, RIO_SWP_INFO_CAR, - &result); - - return RIO_GET_TOTAL_PORTS(result); -} - /** * rio_net_add_mport- Add a master port to a RIO network * @net: RIO network @@ -761,15 +745,16 @@ static void rio_net_add_mport(struct rio_net *net, struct rio_mport *port) * @net: RIO network being enumerated * @port: Master port to send transactions * @hopcount: Number of hops into the network + * @prev: Previous RIO device connected to the enumerated one + * @prev_port: Port on previous RIO device * * Recursively enumerates a RIO network. Transactions are sent via the * master port passed in @port. */ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, - u8 hopcount) + u8 hopcount, struct rio_dev *prev, int prev_port) { int port_num; - int num_ports; int cur_destid; int sw_destid; int sw_inport; @@ -814,6 +799,9 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, if (rdev) { /* Add device to the global and bus/net specific list. */ list_add_tail(&rdev->net_list, &net->devices); + rdev->prev = prev; + if (prev && rio_is_switch(prev)) + prev->rswitch->nextdev[prev_port] = rdev; } else return -1; @@ -832,14 +820,14 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, rdev->rswitch->route_table[destid] = sw_inport; } - num_ports = - rio_get_swpinfo_tports(port, RIO_ANY_DESTID(port->sys_size), - hopcount); pr_debug( "RIO: found %s (vid %4.4x did %4.4x) with %d ports\n", - rio_name(rdev), rdev->vid, rdev->did, num_ports); + rio_name(rdev), rdev->vid, rdev->did, + RIO_GET_TOTAL_PORTS(rdev->swpinfo)); sw_destid = next_destid; - for (port_num = 0; port_num < num_ports; port_num++) { + for (port_num = 0; + port_num < RIO_GET_TOTAL_PORTS(rdev->swpinfo); + port_num++) { /*Enable Input Output Port (transmitter reviever)*/ rio_enable_rx_tx_port(port, 0, RIO_ANY_DESTID(port->sys_size), @@ -864,7 +852,8 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, RIO_ANY_DESTID(port->sys_size), port_num, 0); - if (rio_enum_peer(net, port, hopcount + 1) < 0) + if (rio_enum_peer(net, port, hopcount + 1, + rdev, port_num) < 0) return -1; /* Update routing tables */ @@ -951,7 +940,6 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, u8 hopcount) { u8 port_num, route_port; - int num_ports; struct rio_dev *rdev; u16 ndestid; @@ -968,11 +956,13 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, /* Associated destid is how we accessed this switch */ rdev->rswitch->destid = destid; - num_ports = rio_get_swpinfo_tports(port, destid, hopcount); pr_debug( "RIO: found %s (vid %4.4x did %4.4x) with %d ports\n", - rio_name(rdev), rdev->vid, rdev->did, num_ports); - for (port_num = 0; port_num < num_ports; port_num++) { + rio_name(rdev), rdev->vid, rdev->did, + RIO_GET_TOTAL_PORTS(rdev->swpinfo)); + for (port_num = 0; + port_num < RIO_GET_TOTAL_PORTS(rdev->swpinfo); + port_num++) { if (RIO_GET_PORT_NUM(rdev->swpinfo) == port_num) continue; @@ -1167,7 +1157,7 @@ int __devinit rio_enum_mport(struct rio_mport *mport) /* Enable Input Output Port (transmitter reviever) */ rio_enable_rx_tx_port(mport, 1, 0, 0, 0); - if (rio_enum_peer(net, mport, 0) < 0) { + if (rio_enum_peer(net, mport, 0, NULL, 0) < 0) { /* A higher priority host won enumeration, bail. */ printk(KERN_INFO "RIO: master port %d device has lost enumeration to a remote host\n", diff --git a/include/linux/rio.h b/include/linux/rio.h index ffdfe5ad43bf..8d9e66dc7969 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -99,6 +99,7 @@ union rio_pw_msg; * @riores: RIO resources this device owns * @pwcback: port-write callback function for this device * @destid: Network destination ID + * @prev: Previous RIO device connected to the current one */ struct rio_dev { struct list_head global_list; /* node in list of all RIO devices */ @@ -125,6 +126,7 @@ struct rio_dev { struct resource riores[RIO_MAX_DEV_RESOURCES]; int (*pwcback) (struct rio_dev *rdev, union rio_pw_msg *msg, int step); u16 destid; + struct rio_dev *prev; }; #define rio_dev_g(n) list_entry(n, struct rio_dev, global_list) @@ -232,6 +234,7 @@ struct rio_net { * @get_domain: Callback for switch-specific domain get function * @em_init: Callback for switch-specific error management initialization function * @em_handle: Callback for switch-specific error management handler function + * @nextdev: Array of per-port pointers to the next attached device */ struct rio_switch { struct list_head node; @@ -253,6 +256,7 @@ struct rio_switch { u8 *sw_domain); int (*em_init) (struct rio_dev *dev); int (*em_handle) (struct rio_dev *dev, u8 swport); + struct rio_dev *nextdev[0]; }; /* Low-level architecture-dependent routines */ -- cgit v1.2.3 From dd5648c9f53b5cbd9f948d752624400545f979fb Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:30 -0700 Subject: rapidio: add default handler for error-stopped state The default error-stopped state handler provides recovery mechanism as defined by RIO specification. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 218 ++++++++++++++++++++++++++++++++------ drivers/rapidio/switches/idtcps.c | 10 ++ drivers/rapidio/switches/tsi57x.c | 4 + include/linux/rio_regs.h | 8 +- 4 files changed, 206 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 74e9d22d95fb..77bd4165238f 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -494,6 +494,148 @@ int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock) return 0; } +/** + * rio_get_input_status - Sends a Link-Request/Input-Status control symbol and + * returns link-response (if requested). + * @rdev: RIO devive to issue Input-status command + * @pnum: Device port number to issue the command + * @lnkresp: Response from a link partner + */ +static int +rio_get_input_status(struct rio_dev *rdev, int pnum, u32 *lnkresp) +{ + struct rio_mport *mport = rdev->net->hport; + u16 destid = rdev->rswitch->destid; + u8 hopcount = rdev->rswitch->hopcount; + u32 regval; + int checkcount; + + if (lnkresp) { + /* Read from link maintenance response register + * to clear valid bit */ + rio_mport_read_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(pnum), + ®val); + udelay(50); + } + + /* Issue Input-status command */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_MNT_REQ_CSR(pnum), + RIO_MNT_REQ_CMD_IS); + + /* Exit if the response is not expected */ + if (lnkresp == NULL) + return 0; + + checkcount = 3; + while (checkcount--) { + udelay(50); + rio_mport_read_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(pnum), + ®val); + if (regval & RIO_PORT_N_MNT_RSP_RVAL) { + *lnkresp = regval; + return 0; + } + } + + return -EIO; +} + +/** + * rio_clr_err_stopped - Clears port Error-stopped states. + * @rdev: Pointer to RIO device control structure + * @pnum: Switch port number to clear errors + * @err_status: port error status (if 0 reads register from device) + */ +static int rio_clr_err_stopped(struct rio_dev *rdev, u32 pnum, u32 err_status) +{ + struct rio_mport *mport = rdev->net->hport; + u16 destid = rdev->rswitch->destid; + u8 hopcount = rdev->rswitch->hopcount; + struct rio_dev *nextdev = rdev->rswitch->nextdev[pnum]; + u32 regval; + u32 far_ackid, far_linkstat, near_ackid; + + if (err_status == 0) + rio_mport_read_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum), + &err_status); + + if (err_status & RIO_PORT_N_ERR_STS_PW_OUT_ES) { + pr_debug("RIO_EM: servicing Output Error-Stopped state\n"); + /* + * Send a Link-Request/Input-Status control symbol + */ + if (rio_get_input_status(rdev, pnum, ®val)) { + pr_debug("RIO_EM: Input-status response timeout\n"); + goto rd_err; + } + + pr_debug("RIO_EM: SP%d Input-status response=0x%08x\n", + pnum, regval); + far_ackid = (regval & RIO_PORT_N_MNT_RSP_ASTAT) >> 5; + far_linkstat = regval & RIO_PORT_N_MNT_RSP_LSTAT; + rio_mport_read_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_ACK_STS_CSR(pnum), + ®val); + pr_debug("RIO_EM: SP%d_ACK_STS_CSR=0x%08x\n", pnum, regval); + near_ackid = (regval & RIO_PORT_N_ACK_INBOUND) >> 24; + pr_debug("RIO_EM: SP%d far_ackID=0x%02x far_linkstat=0x%02x" \ + " near_ackID=0x%02x\n", + pnum, far_ackid, far_linkstat, near_ackid); + + /* + * If required, synchronize ackIDs of near and + * far sides. + */ + if ((far_ackid != ((regval & RIO_PORT_N_ACK_OUTSTAND) >> 8)) || + (far_ackid != (regval & RIO_PORT_N_ACK_OUTBOUND))) { + /* Align near outstanding/outbound ackIDs with + * far inbound. + */ + rio_mport_write_config_32(mport, destid, + hopcount, rdev->phys_efptr + + RIO_PORT_N_ACK_STS_CSR(pnum), + (near_ackid << 24) | + (far_ackid << 8) | far_ackid); + /* Align far outstanding/outbound ackIDs with + * near inbound. + */ + far_ackid++; + if (nextdev) + rio_write_config_32(nextdev, + nextdev->phys_efptr + + RIO_PORT_N_ACK_STS_CSR(RIO_GET_PORT_NUM(nextdev->swpinfo)), + (far_ackid << 24) | + (near_ackid << 8) | near_ackid); + else + pr_debug("RIO_EM: Invalid nextdev pointer (NULL)\n"); + } +rd_err: + rio_mport_read_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum), + &err_status); + pr_debug("RIO_EM: SP%d_ERR_STS_CSR=0x%08x\n", pnum, err_status); + } + + if ((err_status & RIO_PORT_N_ERR_STS_PW_INP_ES) && nextdev) { + pr_debug("RIO_EM: servicing Input Error-Stopped state\n"); + rio_get_input_status(nextdev, + RIO_GET_PORT_NUM(nextdev->swpinfo), NULL); + udelay(50); + + rio_mport_read_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum), + &err_status); + pr_debug("RIO_EM: SP%d_ERR_STS_CSR=0x%08x\n", pnum, err_status); + } + + return (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES | + RIO_PORT_N_ERR_STS_PW_INP_ES)) ? 1 : 0; +} + /** * rio_inb_pwrite_handler - process inbound port-write message * @pw_msg: pointer to inbound port-write message @@ -507,7 +649,7 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) struct rio_mport *mport; u8 hopcount; u16 destid; - u32 err_status; + u32 err_status, em_perrdet, em_ltlerrdet; int rc, portnum; rdev = rio_get_comptag(pw_msg->em.comptag, NULL); @@ -524,12 +666,11 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) { u32 i; for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32);) { - pr_debug("0x%02x: %08x %08x %08x %08x", + pr_debug("0x%02x: %08x %08x %08x %08x\n", i*4, pw_msg->raw[i], pw_msg->raw[i + 1], pw_msg->raw[i + 2], pw_msg->raw[i + 3]); i += 4; } - pr_debug("\n"); } #endif @@ -573,29 +714,28 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) &err_status); pr_debug("RIO_PW: SP%d_ERR_STS_CSR=0x%08x\n", portnum, err_status); - if (pw_msg->em.errdetect) { - pr_debug("RIO_PW: RIO_EM_P%d_ERR_DETECT=0x%08x\n", - portnum, pw_msg->em.errdetect); - /* Clear EM Port N Error Detect CSR */ - rio_mport_write_config_32(mport, destid, hopcount, - rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), 0); - } + if (err_status & RIO_PORT_N_ERR_STS_PORT_OK) { - if (pw_msg->em.ltlerrdet) { - pr_debug("RIO_PW: RIO_EM_LTL_ERR_DETECT=0x%08x\n", - pw_msg->em.ltlerrdet); - /* Clear EM L/T Layer Error Detect CSR */ - rio_mport_write_config_32(mport, destid, hopcount, - rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, 0); - } + if (!(rdev->rswitch->port_ok & (1 << portnum))) { + rdev->rswitch->port_ok |= (1 << portnum); + rio_set_port_lockout(rdev, portnum, 0); + /* Schedule Insertion Service */ + pr_debug("RIO_PW: Device Insertion on [%s]-P%d\n", + rio_name(rdev), portnum); + } - /* Clear Port Errors */ - rio_mport_write_config_32(mport, destid, hopcount, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), - err_status & RIO_PORT_N_ERR_STS_CLR_MASK); + /* Clear error-stopped states (if reported). + * Depending on the link partner state, two attempts + * may be needed for successful recovery. + */ + if (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES | + RIO_PORT_N_ERR_STS_PW_INP_ES)) { + if (rio_clr_err_stopped(rdev, portnum, err_status)) + rio_clr_err_stopped(rdev, portnum, 0); + } + } else { /* if (err_status & RIO_PORT_N_ERR_STS_PORT_UNINIT) */ - if (rdev->rswitch->port_ok & (1 << portnum)) { - if (err_status & RIO_PORT_N_ERR_STS_PORT_UNINIT) { + if (rdev->rswitch->port_ok & (1 << portnum)) { rdev->rswitch->port_ok &= ~(1 << portnum); rio_set_port_lockout(rdev, portnum, 1); @@ -608,17 +748,33 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) pr_debug("RIO_PW: Device Extraction on [%s]-P%d\n", rio_name(rdev), portnum); } - } else { - if (err_status & RIO_PORT_N_ERR_STS_PORT_OK) { - rdev->rswitch->port_ok |= (1 << portnum); - rio_set_port_lockout(rdev, portnum, 0); + } - /* Schedule Insertion Service */ - pr_debug("RIO_PW: Device Insertion on [%s]-P%d\n", - rio_name(rdev), portnum); - } + rio_mport_read_config_32(mport, destid, hopcount, + rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), &em_perrdet); + if (em_perrdet) { + pr_debug("RIO_PW: RIO_EM_P%d_ERR_DETECT=0x%08x\n", + portnum, em_perrdet); + /* Clear EM Port N Error Detect CSR */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), 0); } + rio_mport_read_config_32(mport, destid, hopcount, + rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, &em_ltlerrdet); + if (em_ltlerrdet) { + pr_debug("RIO_PW: RIO_EM_LTL_ERR_DETECT=0x%08x\n", + em_ltlerrdet); + /* Clear EM L/T Layer Error Detect CSR */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, 0); + } + + /* Clear remaining error bits */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), + err_status & RIO_PORT_N_ERR_STS_CLR_MASK); + /* Clear Port-Write Pending bit */ rio_mport_write_config_32(mport, destid, hopcount, rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), diff --git a/drivers/rapidio/switches/idtcps.c b/drivers/rapidio/switches/idtcps.c index 2c790c144f89..fc9f6374f759 100644 --- a/drivers/rapidio/switches/idtcps.c +++ b/drivers/rapidio/switches/idtcps.c @@ -117,6 +117,10 @@ idtcps_get_domain(struct rio_mport *mport, u16 destid, u8 hopcount, static int idtcps_switch_init(struct rio_dev *rdev, int do_enum) { + struct rio_mport *mport = rdev->net->hport; + u16 destid = rdev->rswitch->destid; + u8 hopcount = rdev->rswitch->hopcount; + pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); rdev->rswitch->add_entry = idtcps_route_add_entry; rdev->rswitch->get_entry = idtcps_route_get_entry; @@ -126,6 +130,12 @@ static int idtcps_switch_init(struct rio_dev *rdev, int do_enum) rdev->rswitch->em_init = NULL; rdev->rswitch->em_handle = NULL; + if (do_enum) { + /* set TVAL = ~50us */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8); + } + return 0; } diff --git a/drivers/rapidio/switches/tsi57x.c b/drivers/rapidio/switches/tsi57x.c index d34df722d95f..d9e94920e8b0 100644 --- a/drivers/rapidio/switches/tsi57x.c +++ b/drivers/rapidio/switches/tsi57x.c @@ -205,6 +205,10 @@ tsi57x_em_init(struct rio_dev *rdev) portnum++; } + /* set TVAL = ~50us */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x9a << 8); + return 0; } diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h index be80b1b21815..daa269d18e07 100644 --- a/include/linux/rio_regs.h +++ b/include/linux/rio_regs.h @@ -224,15 +224,17 @@ #define RIO_PORT_GEN_MASTER 0x40000000 #define RIO_PORT_GEN_DISCOVERED 0x20000000 #define RIO_PORT_N_MNT_REQ_CSR(x) (0x0040 + x*0x20) /* 0x0002 */ +#define RIO_MNT_REQ_CMD_RD 0x03 /* Reset-device command */ +#define RIO_MNT_REQ_CMD_IS 0x04 /* Input-status command */ #define RIO_PORT_N_MNT_RSP_CSR(x) (0x0044 + x*0x20) /* 0x0002 */ #define RIO_PORT_N_MNT_RSP_RVAL 0x80000000 /* Response Valid */ #define RIO_PORT_N_MNT_RSP_ASTAT 0x000003e0 /* ackID Status */ #define RIO_PORT_N_MNT_RSP_LSTAT 0x0000001f /* Link Status */ #define RIO_PORT_N_ACK_STS_CSR(x) (0x0048 + x*0x20) /* 0x0002 */ #define RIO_PORT_N_ACK_CLEAR 0x80000000 -#define RIO_PORT_N_ACK_INBOUND 0x1f000000 -#define RIO_PORT_N_ACK_OUTSTAND 0x00001f00 -#define RIO_PORT_N_ACK_OUTBOUND 0x0000001f +#define RIO_PORT_N_ACK_INBOUND 0x3f000000 +#define RIO_PORT_N_ACK_OUTSTAND 0x00003f00 +#define RIO_PORT_N_ACK_OUTBOUND 0x0000003f #define RIO_PORT_N_ERR_STS_CSR(x) (0x0058 + x*0x20) #define RIO_PORT_N_ERR_STS_PW_OUT_ES 0x00010000 /* Output Error-stopped */ #define RIO_PORT_N_ERR_STS_PW_INP_ES 0x00000100 /* Input Error-stopped */ -- cgit v1.2.3 From ac38d7232dfa3c71b129bab3318ba327bbcf8405 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:31 -0700 Subject: rapidio: modify sysfs initialization for switches 1. Change to create attribute "routes" only for switches. 2. Add a switch-specific callback to create/remove proprietary attributes. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio-sysfs.c | 26 +++++++++++++++++++------- include/linux/rio.h | 6 ++++++ 2 files changed, 25 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index 00b475658356..137ed93ee33f 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -40,9 +40,6 @@ static ssize_t routes_show(struct device *dev, struct device_attribute *attr, ch char *str = buf; int i; - if (!rdev->rswitch) - goto out; - for (i = 0; i < RIO_MAX_ROUTE_ENTRIES(rdev->net->hport->sys_size); i++) { if (rdev->rswitch->route_table[i] == RIO_INVALID_ROUTE) @@ -52,7 +49,6 @@ static ssize_t routes_show(struct device *dev, struct device_attribute *attr, ch rdev->rswitch->route_table[i]); } - out: return (str - buf); } @@ -63,10 +59,11 @@ struct device_attribute rio_dev_attrs[] = { __ATTR_RO(asm_did), __ATTR_RO(asm_vid), __ATTR_RO(asm_rev), - __ATTR_RO(routes), __ATTR_NULL, }; +static DEVICE_ATTR(routes, S_IRUGO, routes_show, NULL); + static ssize_t rio_read_config(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -218,7 +215,17 @@ int rio_create_sysfs_dev_files(struct rio_dev *rdev) { int err = 0; - err = sysfs_create_bin_file(&rdev->dev.kobj, &rio_config_attr); + err = device_create_bin_file(&rdev->dev, &rio_config_attr); + + if (!err && rdev->rswitch) { + err = device_create_file(&rdev->dev, &dev_attr_routes); + if (!err && rdev->rswitch->sw_sysfs) + err = rdev->rswitch->sw_sysfs(rdev, RIO_SW_SYSFS_CREATE); + } + + if (err) + pr_warning("RIO: Failed to create attribute file(s) for %s\n", + rio_name(rdev)); return err; } @@ -231,5 +238,10 @@ int rio_create_sysfs_dev_files(struct rio_dev *rdev) */ void rio_remove_sysfs_dev_files(struct rio_dev *rdev) { - sysfs_remove_bin_file(&rdev->dev.kobj, &rio_config_attr); + device_remove_bin_file(&rdev->dev, &rio_config_attr); + if (rdev->rswitch) { + device_remove_file(&rdev->dev, &dev_attr_routes); + if (rdev->rswitch->sw_sysfs) + rdev->rswitch->sw_sysfs(rdev, RIO_SW_SYSFS_REMOVE); + } } diff --git a/include/linux/rio.h b/include/linux/rio.h index 8d9e66dc7969..4fa5e3d2b117 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -218,6 +218,10 @@ struct rio_net { unsigned char id; /* RIO network ID */ }; +/* Definitions used by switch sysfs initialization callback */ +#define RIO_SW_SYSFS_CREATE 1 /* Create switch attributes */ +#define RIO_SW_SYSFS_REMOVE 0 /* Remove switch attributes */ + /** * struct rio_switch - RIO switch info * @node: Node in global list of switches @@ -234,6 +238,7 @@ struct rio_net { * @get_domain: Callback for switch-specific domain get function * @em_init: Callback for switch-specific error management initialization function * @em_handle: Callback for switch-specific error management handler function + * @sw_sysfs: Callback that initializes switch-specific sysfs attributes * @nextdev: Array of per-port pointers to the next attached device */ struct rio_switch { @@ -256,6 +261,7 @@ struct rio_switch { u8 *sw_domain); int (*em_init) (struct rio_dev *dev); int (*em_handle) (struct rio_dev *dev, u8 swport); + int (*sw_sysfs) (struct rio_dev *dev, int create); struct rio_dev *nextdev[0]; }; -- cgit v1.2.3 From a3725c45c114bd06e091802f90533332d1e93819 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:33 -0700 Subject: rapidio: add support for IDT CPS Gen2 switches Add the RIO switch driver and definitions for IDT CPS-1848 and CPS-1616 Gen2 devices. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/switches/Kconfig | 7 + drivers/rapidio/switches/Makefile | 1 + drivers/rapidio/switches/idt_gen2.c | 447 ++++++++++++++++++++++++++++++++++++ include/linux/rio_ids.h | 2 + include/linux/rio_regs.h | 5 + 5 files changed, 462 insertions(+) create mode 100644 drivers/rapidio/switches/idt_gen2.c (limited to 'include') diff --git a/drivers/rapidio/switches/Kconfig b/drivers/rapidio/switches/Kconfig index 2b4e9b2b6631..f47fee5d4563 100644 --- a/drivers/rapidio/switches/Kconfig +++ b/drivers/rapidio/switches/Kconfig @@ -20,6 +20,13 @@ config RAPIDIO_TSI568 ---help--- Includes support for IDT Tsi568 serial RapidIO switch. +config RAPIDIO_CPS_GEN2 + bool "IDT CPS Gen.2 SRIO switch support" + depends on RAPIDIO + default n + ---help--- + Includes support for ITD CPS Gen.2 serial RapidIO switches. + config RAPIDIO_TSI500 bool "Tsi500 Parallel RapidIO switch support" depends on RAPIDIO diff --git a/drivers/rapidio/switches/Makefile b/drivers/rapidio/switches/Makefile index fe4adc3e8d5f..48d67a6b98c8 100644 --- a/drivers/rapidio/switches/Makefile +++ b/drivers/rapidio/switches/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_RAPIDIO_TSI57X) += tsi57x.o obj-$(CONFIG_RAPIDIO_CPS_XX) += idtcps.o obj-$(CONFIG_RAPIDIO_TSI568) += tsi568.o obj-$(CONFIG_RAPIDIO_TSI500) += tsi500.o +obj-$(CONFIG_RAPIDIO_CPS_GEN2) += idt_gen2.o ifeq ($(CONFIG_RAPIDIO_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c new file mode 100644 index 000000000000..0bb871cb5c40 --- /dev/null +++ b/drivers/rapidio/switches/idt_gen2.c @@ -0,0 +1,447 @@ +/* + * IDT CPS Gen.2 Serial RapidIO switch family support + * + * Copyright 2010 Integrated Device Technology, Inc. + * Alexandre Bounine + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include "../rio.h" + +#define LOCAL_RTE_CONF_DESTID_SEL 0x010070 +#define LOCAL_RTE_CONF_DESTID_SEL_PSEL 0x0000001f + +#define IDT_LT_ERR_REPORT_EN 0x03100c + +#define IDT_PORT_ERR_REPORT_EN(n) (0x031044 + (n)*0x40) +#define IDT_PORT_ERR_REPORT_EN_BC 0x03ff04 + +#define IDT_PORT_ISERR_REPORT_EN(n) (0x03104C + (n)*0x40) +#define IDT_PORT_ISERR_REPORT_EN_BC 0x03ff0c +#define IDT_PORT_INIT_TX_ACQUIRED 0x00000020 + +#define IDT_LANE_ERR_REPORT_EN(n) (0x038010 + (n)*0x100) +#define IDT_LANE_ERR_REPORT_EN_BC 0x03ff10 + +#define IDT_DEV_CTRL_1 0xf2000c +#define IDT_DEV_CTRL_1_GENPW 0x02000000 +#define IDT_DEV_CTRL_1_PRSTBEH 0x00000001 + +#define IDT_CFGBLK_ERR_CAPTURE_EN 0x020008 +#define IDT_CFGBLK_ERR_REPORT 0xf20014 +#define IDT_CFGBLK_ERR_REPORT_GENPW 0x00000002 + +#define IDT_AUX_PORT_ERR_CAP_EN 0x020000 +#define IDT_AUX_ERR_REPORT_EN 0xf20018 +#define IDT_AUX_PORT_ERR_LOG_I2C 0x00000002 +#define IDT_AUX_PORT_ERR_LOG_JTAG 0x00000001 + +#define IDT_ISLTL_ADDRESS_CAP 0x021014 + +#define IDT_RIO_DOMAIN 0xf20020 +#define IDT_RIO_DOMAIN_MASK 0x000000ff + +#define IDT_PW_INFO_CSR 0xf20024 + +#define IDT_SOFT_RESET 0xf20040 +#define IDT_SOFT_RESET_REQ 0x00030097 + +#define IDT_I2C_MCTRL 0xf20050 +#define IDT_I2C_MCTRL_GENPW 0x04000000 + +#define IDT_JTAG_CTRL 0xf2005c +#define IDT_JTAG_CTRL_GENPW 0x00000002 + +#define IDT_LANE_CTRL(n) (0xff8000 + (n)*0x100) +#define IDT_LANE_CTRL_BC 0xffff00 +#define IDT_LANE_CTRL_GENPW 0x00200000 +#define IDT_LANE_DFE_1_BC 0xffff18 +#define IDT_LANE_DFE_2_BC 0xffff1c + +#define IDT_PORT_OPS(n) (0xf40004 + (n)*0x100) +#define IDT_PORT_OPS_GENPW 0x08000000 +#define IDT_PORT_OPS_PL_ELOG 0x00000040 +#define IDT_PORT_OPS_LL_ELOG 0x00000020 +#define IDT_PORT_OPS_LT_ELOG 0x00000010 +#define IDT_PORT_OPS_BC 0xf4ff04 + +#define IDT_PORT_ISERR_DET(n) (0xf40008 + (n)*0x100) + +#define IDT_ERR_CAP 0xfd0000 +#define IDT_ERR_CAP_LOG_OVERWR 0x00000004 + +#define IDT_ERR_RD 0xfd0004 + +#define IDT_DEFAULT_ROUTE 0xde +#define IDT_NO_ROUTE 0xdf + +static int +idtg2_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table, u16 route_destid, u8 route_port) +{ + /* + * Select routing table to update + */ + if (table == RIO_GLOBAL_TABLE) + table = 0; + else + table++; + + rio_mport_write_config_32(mport, destid, hopcount, + LOCAL_RTE_CONF_DESTID_SEL, table); + + /* + * Program destination port for the specified destID + */ + rio_mport_write_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_DESTID_SEL_CSR, + (u32)route_destid); + + rio_mport_write_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_PORT_SEL_CSR, + (u32)route_port); + udelay(10); + + return 0; +} + +static int +idtg2_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table, u16 route_destid, u8 *route_port) +{ + u32 result; + + /* + * Select routing table to read + */ + if (table == RIO_GLOBAL_TABLE) + table = 0; + else + table++; + + rio_mport_write_config_32(mport, destid, hopcount, + LOCAL_RTE_CONF_DESTID_SEL, table); + + rio_mport_write_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_DESTID_SEL_CSR, + route_destid); + + rio_mport_read_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_PORT_SEL_CSR, &result); + + if (IDT_DEFAULT_ROUTE == (u8)result || IDT_NO_ROUTE == (u8)result) + *route_port = RIO_INVALID_ROUTE; + else + *route_port = (u8)result; + + return 0; +} + +static int +idtg2_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table) +{ + u32 i; + + /* + * Select routing table to read + */ + if (table == RIO_GLOBAL_TABLE) + table = 0; + else + table++; + + rio_mport_write_config_32(mport, destid, hopcount, + LOCAL_RTE_CONF_DESTID_SEL, table); + + for (i = RIO_STD_RTE_CONF_EXTCFGEN; + i <= (RIO_STD_RTE_CONF_EXTCFGEN | 0xff);) { + rio_mport_write_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_DESTID_SEL_CSR, i); + rio_mport_write_config_32(mport, destid, hopcount, + RIO_STD_RTE_CONF_PORT_SEL_CSR, + (IDT_DEFAULT_ROUTE << 24) | (IDT_DEFAULT_ROUTE << 16) | + (IDT_DEFAULT_ROUTE << 8) | IDT_DEFAULT_ROUTE); + i += 4; + } + + return 0; +} + + +static int +idtg2_set_domain(struct rio_mport *mport, u16 destid, u8 hopcount, + u8 sw_domain) +{ + /* + * Switch domain configuration operates only at global level + */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_RIO_DOMAIN, (u32)sw_domain); + return 0; +} + +static int +idtg2_get_domain(struct rio_mport *mport, u16 destid, u8 hopcount, + u8 *sw_domain) +{ + u32 regval; + + /* + * Switch domain configuration operates only at global level + */ + rio_mport_read_config_32(mport, destid, hopcount, + IDT_RIO_DOMAIN, ®val); + + *sw_domain = (u8)(regval & 0xff); + + return 0; +} + +static int +idtg2_em_init(struct rio_dev *rdev) +{ + struct rio_mport *mport = rdev->net->hport; + u16 destid = rdev->rswitch->destid; + u8 hopcount = rdev->rswitch->hopcount; + u32 regval; + int i, tmp; + + /* + * This routine performs device-specific initialization only. + * All standard EM configuration should be performed at upper level. + */ + + pr_debug("RIO: %s [%d:%d]\n", __func__, destid, hopcount); + + /* Set Port-Write info CSR: PRIO=3 and CRF=1 */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_PW_INFO_CSR, 0x0000e000); + + /* + * Configure LT LAYER error reporting. + */ + + /* Enable standard (RIO.p8) error reporting */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_LT_ERR_REPORT_EN, + REM_LTL_ERR_ILLTRAN | REM_LTL_ERR_UNSOLR | + REM_LTL_ERR_UNSUPTR); + + /* Use Port-Writes for LT layer error reporting. + * Enable per-port reset + */ + rio_mport_read_config_32(mport, destid, hopcount, + IDT_DEV_CTRL_1, ®val); + rio_mport_write_config_32(mport, destid, hopcount, + IDT_DEV_CTRL_1, + regval | IDT_DEV_CTRL_1_GENPW | IDT_DEV_CTRL_1_PRSTBEH); + + /* + * Configure PORT error reporting. + */ + + /* Report all RIO.p8 errors supported by device */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_PORT_ERR_REPORT_EN_BC, 0x807e8037); + + /* Configure reporting of implementation specific errors/events */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_PORT_ISERR_REPORT_EN_BC, IDT_PORT_INIT_TX_ACQUIRED); + + /* Use Port-Writes for port error reporting and enable error logging */ + tmp = RIO_GET_TOTAL_PORTS(rdev->swpinfo); + for (i = 0; i < tmp; i++) { + rio_mport_read_config_32(mport, destid, hopcount, + IDT_PORT_OPS(i), ®val); + rio_mport_write_config_32(mport, destid, hopcount, + IDT_PORT_OPS(i), regval | IDT_PORT_OPS_GENPW | + IDT_PORT_OPS_PL_ELOG | + IDT_PORT_OPS_LL_ELOG | + IDT_PORT_OPS_LT_ELOG); + } + /* Overwrite error log if full */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_ERR_CAP, IDT_ERR_CAP_LOG_OVERWR); + + /* + * Configure LANE error reporting. + */ + + /* Disable line error reporting */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_LANE_ERR_REPORT_EN_BC, 0); + + /* Use Port-Writes for lane error reporting (when enabled) + * (do per-lane update because lanes may have different configuration) + */ + tmp = (rdev->did == RIO_DID_IDTCPS1848) ? 48 : 16; + for (i = 0; i < tmp; i++) { + rio_mport_read_config_32(mport, destid, hopcount, + IDT_LANE_CTRL(i), ®val); + rio_mport_write_config_32(mport, destid, hopcount, + IDT_LANE_CTRL(i), regval | IDT_LANE_CTRL_GENPW); + } + + /* + * Configure AUX error reporting. + */ + + /* Disable JTAG and I2C Error capture */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_AUX_PORT_ERR_CAP_EN, 0); + + /* Disable JTAG and I2C Error reporting/logging */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_AUX_ERR_REPORT_EN, 0); + + /* Disable Port-Write notification from JTAG */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_JTAG_CTRL, 0); + + /* Disable Port-Write notification from I2C */ + rio_mport_read_config_32(mport, destid, hopcount, + IDT_I2C_MCTRL, ®val); + rio_mport_write_config_32(mport, destid, hopcount, + IDT_I2C_MCTRL, + regval & ~IDT_I2C_MCTRL_GENPW); + + /* + * Configure CFG_BLK error reporting. + */ + + /* Disable Configuration Block error capture */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_CFGBLK_ERR_CAPTURE_EN, 0); + + /* Disable Port-Writes for Configuration Block error reporting */ + rio_mport_read_config_32(mport, destid, hopcount, + IDT_CFGBLK_ERR_REPORT, ®val); + rio_mport_write_config_32(mport, destid, hopcount, + IDT_CFGBLK_ERR_REPORT, + regval & ~IDT_CFGBLK_ERR_REPORT_GENPW); + + /* set TVAL = ~50us */ + rio_mport_write_config_32(mport, destid, hopcount, + rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8); + + return 0; +} + +static int +idtg2_em_handler(struct rio_dev *rdev, u8 portnum) +{ + struct rio_mport *mport = rdev->net->hport; + u16 destid = rdev->rswitch->destid; + u8 hopcount = rdev->rswitch->hopcount; + u32 regval, em_perrdet, em_ltlerrdet; + + rio_mport_read_config_32(mport, destid, hopcount, + rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, &em_ltlerrdet); + if (em_ltlerrdet) { + /* Service Logical/Transport Layer Error(s) */ + if (em_ltlerrdet & REM_LTL_ERR_IMPSPEC) { + /* Implementation specific error reported */ + rio_mport_read_config_32(mport, destid, hopcount, + IDT_ISLTL_ADDRESS_CAP, ®val); + + pr_debug("RIO: %s Implementation Specific LTL errors" \ + " 0x%x @(0x%x)\n", + rio_name(rdev), em_ltlerrdet, regval); + + /* Clear implementation specific address capture CSR */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_ISLTL_ADDRESS_CAP, 0); + + } + } + + rio_mport_read_config_32(mport, destid, hopcount, + rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), &em_perrdet); + if (em_perrdet) { + /* Service Port-Level Error(s) */ + if (em_perrdet & REM_PED_IMPL_SPEC) { + /* Implementation Specific port error reported */ + + /* Get IS errors reported */ + rio_mport_read_config_32(mport, destid, hopcount, + IDT_PORT_ISERR_DET(portnum), ®val); + + pr_debug("RIO: %s Implementation Specific Port" \ + " errors 0x%x\n", rio_name(rdev), regval); + + /* Clear all implementation specific events */ + rio_mport_write_config_32(mport, destid, hopcount, + IDT_PORT_ISERR_DET(portnum), 0); + } + } + + return 0; +} + +static ssize_t +idtg2_show_errlog(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct rio_dev *rdev = to_rio_dev(dev); + struct rio_mport *mport = rdev->net->hport; + u16 destid = rdev->rswitch->destid; + u8 hopcount = rdev->rswitch->hopcount; + ssize_t len = 0; + u32 regval; + + while (!rio_mport_read_config_32(mport, destid, hopcount, + IDT_ERR_RD, ®val)) { + if (!regval) /* 0 = end of log */ + break; + len += snprintf(buf + len, PAGE_SIZE - len, + "%08x\n", regval); + if (len >= (PAGE_SIZE - 10)) + break; + } + + return len; +} + +static DEVICE_ATTR(errlog, S_IRUGO, idtg2_show_errlog, NULL); + +static int idtg2_sysfs(struct rio_dev *rdev, int create) +{ + struct device *dev = &rdev->dev; + int err = 0; + + if (create == RIO_SW_SYSFS_CREATE) { + /* Initialize sysfs entries */ + err = device_create_file(dev, &dev_attr_errlog); + if (err) + dev_err(dev, "Unable create sysfs errlog file\n"); + } else + device_remove_file(dev, &dev_attr_errlog); + + return err; +} + +static int idtg2_switch_init(struct rio_dev *rdev, int do_enum) +{ + pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); + rdev->rswitch->add_entry = idtg2_route_add_entry; + rdev->rswitch->get_entry = idtg2_route_get_entry; + rdev->rswitch->clr_table = idtg2_route_clr_table; + rdev->rswitch->set_domain = idtg2_set_domain; + rdev->rswitch->get_domain = idtg2_get_domain; + rdev->rswitch->em_init = idtg2_em_init; + rdev->rswitch->em_handle = idtg2_em_handler; + rdev->rswitch->sw_sysfs = idtg2_sysfs; + + return 0; +} + +DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1848, idtg2_switch_init); +DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS1616, idtg2_switch_init); diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h index db50e1c288b7..ee7b6ada188f 100644 --- a/include/linux/rio_ids.h +++ b/include/linux/rio_ids.h @@ -34,5 +34,7 @@ #define RIO_DID_IDTCPS16 0x035b #define RIO_DID_IDTCPS6Q 0x035f #define RIO_DID_IDTCPS10Q 0x035e +#define RIO_DID_IDTCPS1848 0x0374 +#define RIO_DID_IDTCPS1616 0x0379 #endif /* LINUX_RIO_IDS_H */ diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h index daa269d18e07..a18b2e22aa1d 100644 --- a/include/linux/rio_regs.h +++ b/include/linux/rio_regs.h @@ -161,6 +161,7 @@ #define RIO_COMPONENT_TAG_CSR 0x6c /* [III] Component Tag CSR */ #define RIO_STD_RTE_CONF_DESTID_SEL_CSR 0x70 +#define RIO_STD_RTE_CONF_EXTCFGEN 0x80000000 #define RIO_STD_RTE_CONF_PORT_SEL_CSR 0x74 #define RIO_STD_RTE_DEFAULT_PORT 0x78 @@ -265,6 +266,10 @@ #define RIO_EM_EFB_HEADER 0x000 /* Error Management Extensions Block Header */ #define RIO_EM_LTL_ERR_DETECT 0x008 /* Logical/Transport Layer Error Detect CSR */ #define RIO_EM_LTL_ERR_EN 0x00c /* Logical/Transport Layer Error Enable CSR */ +#define REM_LTL_ERR_ILLTRAN 0x08000000 /* Illegal Transaction decode */ +#define REM_LTL_ERR_UNSOLR 0x00800000 /* Unsolicited Response */ +#define REM_LTL_ERR_UNSUPTR 0x00400000 /* Unsupported Transaction */ +#define REM_LTL_ERR_IMPSPEC 0x000000ff /* Implementation Specific */ #define RIO_EM_LTL_HIADDR_CAP 0x010 /* Logical/Transport Layer High Address Capture CSR */ #define RIO_EM_LTL_ADDR_CAP 0x014 /* Logical/Transport Layer Address Capture CSR */ #define RIO_EM_LTL_DEVID_CAP 0x018 /* Logical/Transport Layer Device ID Capture CSR */ -- cgit v1.2.3 From af84ca38aff94061dd0711edbb99b0900a9c28fd Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:34 -0700 Subject: rapidio: add handling of redundant routes Detects RIO link to the already enumerated device and properly sets links between device objects. Changes to the enumeration/discovery logic: 1. Use Master Enable bit to signal end of the enumeration - agents may start their discovery process as soon as they see this bit set (Component Tag register was used before for this purpose). 2. Enumerator sets Component Tag (!= 0) immediately during device setup. This allows to identify the device if the redundant route exists in a RIO system. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/sysdev/fsl_rio.c | 8 ++++ drivers/rapidio/rio-scan.c | 88 ++++++++++++++++++++++--------------------- drivers/rapidio/rio.c | 16 ++++---- drivers/rapidio/rio.h | 1 + include/linux/rio.h | 2 + 5 files changed, 64 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index ed2ec7154917..9725369d432a 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -50,6 +50,7 @@ #define RIO_ATMU_REGS_OFFSET 0x10c00 #define RIO_P_MSG_REGS_OFFSET 0x11000 #define RIO_S_MSG_REGS_OFFSET 0x13000 +#define RIO_GCCSR 0x13c #define RIO_ESCSR 0x158 #define RIO_CCSR 0x15c #define RIO_LTLEDCSR 0x0608 @@ -1471,6 +1472,7 @@ int fsl_rio_setup(struct platform_device *dev) port->host_deviceid = fsl_rio_get_hdid(port->id); port->priv = priv; + port->phys_efptr = 0x100; rio_register_mport(port); priv->regs_win = ioremap(regs.start, regs.end - regs.start + 1); @@ -1518,6 +1520,12 @@ int fsl_rio_setup(struct platform_device *dev) dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n", port->sys_size ? 65536 : 256); + if (port->host_deviceid >= 0) + out_be32(priv->regs_win + RIO_GCCSR, RIO_PORT_GEN_HOST | + RIO_PORT_GEN_MASTER | RIO_PORT_GEN_DISCOVERED); + else + out_be32(priv->regs_win + RIO_GCCSR, 0x00000000); + priv->atmu_regs = (struct rio_atmu_regs *)(priv->regs_win + RIO_ATMU_REGS_OFFSET); priv->maint_atmu_regs = priv->atmu_regs + 1; diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index e3efdf93df5a..1eb82c4c712e 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -48,7 +48,7 @@ DEFINE_SPINLOCK(rio_global_list_lock); static int next_destid = 0; static int next_switchid = 0; static int next_net = 0; -static int next_comptag; +static int next_comptag = 1; static struct timer_list rio_enum_timer = TIMER_INITIALIZER(rio_enum_timeout, 0, 0); @@ -121,27 +121,6 @@ static int rio_clear_locks(struct rio_mport *port) u32 result; int ret = 0; - /* Assign component tag to all devices */ - next_comptag = 1; - rio_local_write_config_32(port, RIO_COMPONENT_TAG_CSR, next_comptag++); - - list_for_each_entry(rdev, &rio_devices, global_list) { - /* Mark device as discovered */ - rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR, - &result); - rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR, - result | RIO_PORT_GEN_DISCOVERED); - - rio_write_config_32(rdev, RIO_COMPONENT_TAG_CSR, next_comptag); - rdev->comp_tag = next_comptag++; - if (next_comptag >= 0x10000) { - pr_err("RIO: Component Tag Counter Overflow\n"); - break; - } - } - /* Release host device id locks */ rio_local_write_config_32(port, RIO_HOST_DID_LOCK_CSR, port->host_deviceid); @@ -162,6 +141,15 @@ static int rio_clear_locks(struct rio_mport *port) rdev->vid, rdev->did); ret = -EINVAL; } + + /* Mark device as discovered and enable master */ + rio_read_config_32(rdev, + rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR, + &result); + result |= RIO_PORT_GEN_DISCOVERED | RIO_PORT_GEN_MASTER; + rio_write_config_32(rdev, + rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR, + result); } return ret; @@ -430,6 +418,17 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, rio_mport_read_config_32(port, destid, hopcount, RIO_DST_OPS_CAR, &rdev->dst_ops); + if (do_enum) { + /* Assign component tag to device */ + if (next_comptag >= 0x10000) { + pr_err("RIO: Component Tag Counter Overflow\n"); + goto cleanup; + } + rio_mport_write_config_32(port, destid, hopcount, + RIO_COMPONENT_TAG_CSR, next_comptag); + rdev->comp_tag = next_comptag++; + } + if (rio_device_has_destid(port, rdev->src_ops, rdev->dst_ops)) { if (do_enum) { rio_set_device_id(port, destid, hopcount, next_destid); @@ -725,21 +724,6 @@ static u16 rio_get_host_deviceid_lock(struct rio_mport *port, u8 hopcount) return (u16) (result & 0xffff); } -/** - * rio_net_add_mport- Add a master port to a RIO network - * @net: RIO network - * @port: Master port to add - * - * Adds a master port to the network list of associated master - * ports.. - */ -static void rio_net_add_mport(struct rio_net *net, struct rio_mport *port) -{ - spin_lock(&rio_global_list_lock); - list_add_tail(&port->nnode, &net->mports); - spin_unlock(&rio_global_list_lock); -} - /** * rio_enum_peer- Recursively enumerate a RIO network through a master port * @net: RIO network being enumerated @@ -760,6 +744,7 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, int sw_inport; struct rio_dev *rdev; u16 destid; + u32 regval; int tmp; if (rio_mport_chk_dev_access(port, @@ -772,9 +757,21 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, pr_debug("RIO: PE already discovered by this host\n"); /* * Already discovered by this host. Add it as another - * master port for the current network. + * link to the existing device. */ - rio_net_add_mport(net, port); + rio_mport_read_config_32(port, RIO_ANY_DESTID(port->sys_size), + hopcount, RIO_COMPONENT_TAG_CSR, ®val); + + if (regval) { + rdev = rio_get_comptag((regval & 0xffff), NULL); + + if (rdev && prev && rio_is_switch(prev)) { + pr_debug("RIO: redundant path to %s\n", + rio_name(rdev)); + prev->rswitch->nextdev[prev_port] = rdev; + } + } + return 0; } @@ -925,10 +922,11 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port, */ static int rio_enum_complete(struct rio_mport *port) { - u32 tag_csr; + u32 regval; - rio_local_read_config_32(port, RIO_COMPONENT_TAG_CSR, &tag_csr); - return (tag_csr & 0xffff) ? 1 : 0; + rio_local_read_config_32(port, port->phys_efptr + RIO_PORT_GEN_CTL_CSR, + ®val); + return (regval & RIO_PORT_GEN_MASTER) ? 1 : 0; } /** @@ -991,6 +989,8 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, break; } + if (ndestid == RIO_ANY_DESTID(port->sys_size)) + continue; rio_unlock_device(port, destid, hopcount); if (rio_disc_peer (net, port, ndestid, hopcount + 1) < 0) @@ -1163,6 +1163,10 @@ int __devinit rio_enum_mport(struct rio_mport *mport) /* Enable Input Output Port (transmitter reviever) */ rio_enable_rx_tx_port(mport, 1, 0, 0, 0); + /* Set component tag for host */ + rio_local_write_config_32(mport, RIO_COMPONENT_TAG_CSR, + next_comptag++); + if (rio_enum_peer(net, mport, 0, NULL, 0) < 0) { /* A higher priority host won enumeration, bail. */ printk(KERN_INFO diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index fa5e3cbe4c83..7f18a65c4ed0 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -443,7 +443,7 @@ rio_mport_get_physefb(struct rio_mport *port, int local, * @from is not %NULL, searches continue from next device on the global * list. */ -static struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from) +struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from) { struct list_head *n; struct rio_dev *rdev; @@ -507,7 +507,7 @@ static int rio_chk_dev_route(struct rio_dev *rdev, struct rio_dev **nrdev, int *npnum) { u32 result; - int p_port, rc = -EIO; + int p_port, dstid, rc = -EIO; struct rio_dev *prev = NULL; /* Find switch with failed RIO link */ @@ -522,20 +522,18 @@ rio_chk_dev_route(struct rio_dev *rdev, struct rio_dev **nrdev, int *npnum) if (prev == NULL) goto err_out; - /* Find port with failed RIO link */ - for (p_port = 0; - p_port < RIO_GET_TOTAL_PORTS(prev->swpinfo); p_port++) - if (prev->rswitch->nextdev[p_port] == rdev) - break; + dstid = (rdev->pef & RIO_PEF_SWITCH) ? + rdev->rswitch->destid : rdev->destid; + p_port = prev->rswitch->route_table[dstid]; - if (p_port < RIO_GET_TOTAL_PORTS(prev->swpinfo)) { + if (p_port != RIO_INVALID_ROUTE) { pr_debug("RIO: link failed on [%s]-P%d\n", rio_name(prev), p_port); *nrdev = prev; *npnum = p_port; rc = 0; } else - pr_debug("RIO: failed to trace route to %s\n", rio_name(prev)); + pr_debug("RIO: failed to trace route to %s\n", rio_name(rdev)); err_out: return rc; } diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index d249a1205c7d..b1af414f15e6 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -38,6 +38,7 @@ extern int rio_std_route_get_entry(struct rio_mport *mport, u16 destid, extern int rio_std_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount, u16 table); extern int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock); +extern struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from); /* Structures internal to the RIO core code */ extern struct device_attribute rio_dev_attrs[]; diff --git a/include/linux/rio.h b/include/linux/rio.h index 4fa5e3d2b117..0bed941f9b13 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -177,6 +177,7 @@ enum rio_phy_type { * @index: Port index, unique among all port interfaces of the same type * @sys_size: RapidIO common transport system size * @phy_type: RapidIO phy type + * @phys_efptr: RIO port extended features pointer * @name: Port name string * @priv: Master port private data */ @@ -198,6 +199,7 @@ struct rio_mport { * 1 - Large size, 65536 devices. */ enum rio_phy_type phy_type; /* RapidIO phy type */ + u32 phys_efptr; unsigned char name[40]; void *priv; /* Master port private data */ }; -- cgit v1.2.3 From 388c45ccfaeec68e334ad79edeb0b5b0a43197ff Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Wed, 27 Oct 2010 15:34:35 -0700 Subject: rapidio: fix IDLE2 bits corruption RapidIO spec v.2.1 adds Idle Sequence 2 into LP-Serial Physical Layer. The fix ensures that corresponding bits are not corrupted during error handling. Signed-off-by: Alexandre Bounine Cc: Thomas Moll Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Micha Nelissen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 9 ++------- include/linux/rio_regs.h | 3 +-- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 7f18a65c4ed0..68cf0c99138a 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -871,15 +871,10 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg) rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, 0); } - /* Clear remaining error bits */ + /* Clear remaining error bits and Port-Write Pending bit */ rio_mport_write_config_32(mport, destid, hopcount, rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), - err_status & RIO_PORT_N_ERR_STS_CLR_MASK); - - /* Clear Port-Write Pending bit */ - rio_mport_write_config_32(mport, destid, hopcount, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), - RIO_PORT_N_ERR_STS_PW_PEND); + err_status); return 0; } diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h index a18b2e22aa1d..d63dcbaea169 100644 --- a/include/linux/rio_regs.h +++ b/include/linux/rio_regs.h @@ -229,7 +229,7 @@ #define RIO_MNT_REQ_CMD_IS 0x04 /* Input-status command */ #define RIO_PORT_N_MNT_RSP_CSR(x) (0x0044 + x*0x20) /* 0x0002 */ #define RIO_PORT_N_MNT_RSP_RVAL 0x80000000 /* Response Valid */ -#define RIO_PORT_N_MNT_RSP_ASTAT 0x000003e0 /* ackID Status */ +#define RIO_PORT_N_MNT_RSP_ASTAT 0x000007e0 /* ackID Status */ #define RIO_PORT_N_MNT_RSP_LSTAT 0x0000001f /* Link Status */ #define RIO_PORT_N_ACK_STS_CSR(x) (0x0048 + x*0x20) /* 0x0002 */ #define RIO_PORT_N_ACK_CLEAR 0x80000000 @@ -243,7 +243,6 @@ #define RIO_PORT_N_ERR_STS_PORT_ERR 0x00000004 #define RIO_PORT_N_ERR_STS_PORT_OK 0x00000002 #define RIO_PORT_N_ERR_STS_PORT_UNINIT 0x00000001 -#define RIO_PORT_N_ERR_STS_CLR_MASK 0x07120204 #define RIO_PORT_N_CTL_CSR(x) (0x005c + x*0x20) #define RIO_PORT_N_CTL_PWIDTH 0xc0000000 #define RIO_PORT_N_CTL_PWIDTH_1 0x00000000 -- cgit v1.2.3 From d57af9b2142f31a39dcfdeb30776baadfc802827 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 27 Oct 2010 15:34:45 -0700 Subject: taskstats: use real microsecond granularity for CPU times The taskstats interface uses microsecond granularity for the user and system time values. The conversion from cputime to the taskstats values uses the cputime_to_msecs primitive which effectively limits the granularity to milliseconds. Add the cputime_to_usecs primitive for architectures that have better, more precise CPU time values. Remove cputime_to_msecs primitive because there are no more users left. Signed-off-by: Michael Holzheu Acked-by: Balbir Singh Cc: Luck Tony Cc: Shailabh Nagar Cc: Martin Schwidefsky Cc: Oleg Nesterov Cc: Benjamin Herrenschmidt Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Shailabh Nagar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/cputime.h | 6 +++--- arch/powerpc/include/asm/cputime.h | 12 ++++++------ arch/s390/include/asm/cputime.h | 10 +++++----- include/asm-generic/cputime.h | 6 +++--- kernel/tsacct.c | 10 ++++------ 5 files changed, 21 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index 7fa8a8594660..6073b187528a 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -56,10 +56,10 @@ typedef u64 cputime64_t; #define jiffies64_to_cputime64(__jif) ((__jif) * (NSEC_PER_SEC / HZ)) /* - * Convert cputime <-> milliseconds + * Convert cputime <-> microseconds */ -#define cputime_to_msecs(__ct) ((__ct) / NSEC_PER_MSEC) -#define msecs_to_cputime(__msecs) ((__msecs) * NSEC_PER_MSEC) +#define cputime_to_usecs(__ct) ((__ct) / NSEC_PER_USEC) +#define usecs_to_cputime(__usecs) ((__usecs) * NSEC_PER_USEC) /* * Convert cputime <-> seconds diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 8bdc6a9e5773..1cf20bdfbeca 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -124,23 +124,23 @@ static inline u64 cputime64_to_jiffies64(const cputime_t ct) } /* - * Convert cputime <-> milliseconds + * Convert cputime <-> microseconds */ extern u64 __cputime_msec_factor; -static inline unsigned long cputime_to_msecs(const cputime_t ct) +static inline unsigned long cputime_to_usecs(const cputime_t ct) { - return mulhdu(ct, __cputime_msec_factor); + return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC; } -static inline cputime_t msecs_to_cputime(const unsigned long ms) +static inline cputime_t usecs_to_cputime(const unsigned long us) { cputime_t ct; unsigned long sec; /* have to be a little careful about overflow */ - ct = ms % 1000; - sec = ms / 1000; + ct = us % 1000000; + sec = us / 1000000; if (ct) { ct *= tb_ticks_per_sec; do_div(ct, 1000); diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 8b1a52a137c5..40e2ab0fa3f0 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -73,18 +73,18 @@ cputime64_to_jiffies64(cputime64_t cputime) } /* - * Convert cputime to milliseconds and back. + * Convert cputime to microseconds and back. */ static inline unsigned int -cputime_to_msecs(const cputime_t cputime) +cputime_to_usecs(const cputime_t cputime) { - return cputime_div(cputime, 4096000); + return cputime_div(cputime, 4096); } static inline cputime_t -msecs_to_cputime(const unsigned int m) +usecs_to_cputime(const unsigned int m) { - return (cputime_t) m * 4096000; + return (cputime_t) m * 4096; } /* diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index ca0f239f0e13..2bcc5c7c22a6 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -33,10 +33,10 @@ typedef u64 cputime64_t; /* - * Convert cputime to milliseconds and back. + * Convert cputime to microseconds and back. */ -#define cputime_to_msecs(__ct) jiffies_to_msecs(__ct) -#define msecs_to_cputime(__msecs) msecs_to_jiffies(__msecs) +#define cputime_to_usecs(__ct) jiffies_to_usecs(__ct); +#define usecs_to_cputime(__msecs) usecs_to_jiffies(__msecs); /* * Convert cputime to seconds and back. diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 0a67e041edf8..24dc60d9fa1f 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -63,12 +63,10 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_ppid = pid_alive(tsk) ? rcu_dereference(tsk->real_parent)->tgid : 0; rcu_read_unlock(); - stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; - stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; - stats->ac_utimescaled = - cputime_to_msecs(tsk->utimescaled) * USEC_PER_MSEC; - stats->ac_stimescaled = - cputime_to_msecs(tsk->stimescaled) * USEC_PER_MSEC; + stats->ac_utime = cputime_to_usecs(tsk->utime); + stats->ac_stime = cputime_to_usecs(tsk->stime); + stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled); + stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled); stats->ac_minflt = tsk->min_flt; stats->ac_majflt = tsk->maj_flt; -- cgit v1.2.3 From 144ecf310eb52d9df607b9b7eeb096743e232a96 Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Wed, 27 Oct 2010 15:34:50 -0700 Subject: kfifo: fix kfifo_alloc() to return a signed int value Add a new __kfifo_int_must_check_helper() helper function, which is needed for kfifo_alloc() to return the right signed integer value. The origin __kfifo_must_check_helper() helper was renamed into __kfifo_uint_must_check_helper() to show the sign which is expected and returned. (And revert the temporary disabling of __kfifo_must_check_helper()) Signed-off-by: Stefani Seibold Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index c238ad2f82ea..10308c6a3d1c 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -171,8 +171,17 @@ struct kfifo_rec_ptr_2 __STRUCT_KFIFO_PTR(unsigned char, 2, void); } -/* __kfifo_must_check_helper() is temporarily disabled because it was faulty */ -#define __kfifo_must_check_helper(x) (x) +static inline unsigned int __must_check +__kfifo_uint_must_check_helper(unsigned int val) +{ + return val; +} + +static inline int __must_check +__kfifo_int_must_check_helper(int val) +{ + return val; +} /** * kfifo_initialized - Check if the fifo is initialized @@ -264,7 +273,7 @@ struct kfifo_rec_ptr_2 __STRUCT_KFIFO_PTR(unsigned char, 2, void); * @fifo: address of the fifo to be used */ #define kfifo_avail(fifo) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmpq = (fifo); \ const size_t __recsize = sizeof(*__tmpq->rectype); \ @@ -297,7 +306,7 @@ __kfifo_must_check_helper( \ * This function returns the size of the next fifo record in number of bytes. */ #define kfifo_peek_len(fifo) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ @@ -320,7 +329,7 @@ __kfifo_must_check_helper( \ * Return 0 if no error, otherwise an error code. */ #define kfifo_alloc(fifo, size, gfp_mask) \ -__kfifo_must_check_helper( \ +__kfifo_int_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -416,7 +425,7 @@ __kfifo_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_get(fifo, val) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ typeof((val) + 1) __val = (val); \ @@ -457,7 +466,7 @@ __kfifo_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_peek(fifo, val) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ typeof((val) + 1) __val = (val); \ @@ -549,7 +558,7 @@ __kfifo_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_out(fifo, buf, n) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ typeof((buf) + 1) __buf = (buf); \ @@ -577,7 +586,7 @@ __kfifo_must_check_helper( \ * copied. */ #define kfifo_out_spinlocked(fifo, buf, n, lock) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ unsigned long __flags; \ unsigned int __ret; \ @@ -606,7 +615,7 @@ __kfifo_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_from_user(fifo, from, len, copied) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ const void __user *__from = (from); \ @@ -634,7 +643,7 @@ __kfifo_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_to_user(fifo, to, len, copied) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ void __user *__to = (to); \ @@ -761,7 +770,7 @@ __kfifo_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_out_peek(fifo, buf, n) \ -__kfifo_must_check_helper( \ +__kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ typeof((buf) + 1) __buf = (buf); \ -- cgit v1.2.3 From c3b92ce9e75f6353104fc7f8e32fb9fdb2550ad0 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Wed, 27 Oct 2010 15:34:52 -0700 Subject: ramoops: use the platform data structure instead of module params As each board and system has different memory for ramoops. It's better to define the platform data instead of module params. [akpm@linux-foundation.org: fix ramoops_remove() return type] Signed-off-by: Kyungmin Park Cc: Marco Stornelli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ramoops.c | 30 ++++++++++++++++++++++++++++-- include/linux/ramoops.h | 15 +++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 include/linux/ramoops.h (limited to 'include') diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c index 74f00b5ffa36..73dcb0ee41fd 100644 --- a/drivers/char/ramoops.c +++ b/drivers/char/ramoops.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #define RAMOOPS_KERNMSG_HDR "====" #define RAMOOPS_HEADER_SIZE (5 + sizeof(struct timeval)) @@ -91,11 +93,17 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper, cxt->count = (cxt->count + 1) % cxt->max_count; } -static int __init ramoops_init(void) +static int __init ramoops_probe(struct platform_device *pdev) { + struct ramoops_platform_data *pdata = pdev->dev.platform_data; struct ramoops_context *cxt = &oops_cxt; int err = -EINVAL; + if (pdata) { + mem_size = pdata->mem_size; + mem_address = pdata->mem_address; + } + if (!mem_size) { printk(KERN_ERR "ramoops: invalid size specification"); goto fail3; @@ -142,7 +150,7 @@ fail3: return err; } -static void __exit ramoops_exit(void) +static int __exit ramoops_remove(struct platform_device *pdev) { struct ramoops_context *cxt = &oops_cxt; @@ -151,8 +159,26 @@ static void __exit ramoops_exit(void) iounmap(cxt->virt_addr); release_mem_region(cxt->phys_addr, cxt->size); + return 0; } +static struct platform_driver ramoops_driver = { + .remove = __exit_p(ramoops_remove), + .driver = { + .name = "ramoops", + .owner = THIS_MODULE, + }, +}; + +static int __init ramoops_init(void) +{ + return platform_driver_probe(&ramoops_driver, ramoops_probe); +} + +static void __exit ramoops_exit(void) +{ + platform_driver_unregister(&ramoops_driver); +} module_init(ramoops_init); module_exit(ramoops_exit); diff --git a/include/linux/ramoops.h b/include/linux/ramoops.h new file mode 100644 index 000000000000..0ae68a2c1212 --- /dev/null +++ b/include/linux/ramoops.h @@ -0,0 +1,15 @@ +#ifndef __RAMOOPS_H +#define __RAMOOPS_H + +/* + * Ramoops platform data + * @mem_size memory size for ramoops + * @mem_address physical memory address to contain ramoops + */ + +struct ramoops_platform_data { + unsigned long mem_size; + unsigned long mem_address; +}; + +#endif -- cgit v1.2.3 From 61d8e11e519ee7912ab59610fba1aaf08e3c1d84 Mon Sep 17 00:00:00 2001 From: Zimny Lech Date: Wed, 27 Oct 2010 15:34:53 -0700 Subject: Remove duplicate includes from many files Signed-off-by: Zimny Lech Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-tegra/timer.c | 1 - arch/arm/plat-nomadik/include/plat/ste_dma40.h | 1 - arch/tile/kernel/setup.c | 2 -- arch/x86/mm/init_64.c | 1 - arch/x86/xen/enlighten.c | 1 - drivers/media/IR/lirc_dev.c | 1 - drivers/platform/x86/intel_pmic_gpio.c | 1 - include/linux/virtio_9p.h | 1 - kernel/trace/trace_kprobe.c | 1 - 9 files changed, 10 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-tegra/timer.c b/arch/arm/mach-tegra/timer.c index 2f420210d406..9057d6fd1d31 100644 --- a/arch/arm/mach-tegra/timer.c +++ b/arch/arm/mach-tegra/timer.c @@ -27,7 +27,6 @@ #include #include -#include #include #include diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h index 5fbde4b8dc12..93a812672d9a 100644 --- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h +++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h @@ -14,7 +14,6 @@ #include #include #include -#include /* dev types for memcpy */ #define STEDMA40_DEV_DST_MEMORY (-1) diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index f3a50e74f9a4..ae51cad12da0 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -30,8 +30,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 84346200e783..71a59296af80 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -51,7 +51,6 @@ #include #include #include -#include static int __init parse_direct_gbpages_off(char *arg) { diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 44ab12dc2a12..0cd12db0b142 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/IR/lirc_dev.c b/drivers/media/IR/lirc_dev.c index 0acf6396e068..202581808bdc 100644 --- a/drivers/media/IR/lirc_dev.c +++ b/drivers/media/IR/lirc_dev.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c index f540ff96c53f..e61db9dfebef 100644 --- a/drivers/platform/x86/intel_pmic_gpio.c +++ b/drivers/platform/x86/intel_pmic_gpio.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index 1faa80d92f05..e68b439b2860 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -5,7 +5,6 @@ #include #include #include -#include /* The feature bitmap for virtio 9P */ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b8d2852baa4a..2dec9bcde8b4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include "trace.h" -- cgit v1.2.3 From 95aac7b1cd224f568fb83937044cd303ff11b029 Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Wed, 27 Oct 2010 15:34:54 -0700 Subject: epoll: make epoll_wait() use the hrtimer range feature This make epoll use hrtimers for the timeout value which prevents epoll_wait() from timing out up to a millisecond early. This mirrors the behavior of select() and poll(). Signed-off-by: Shawn Bohrer Cc: Al Viro Acked-by: Davide Libenzi Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 35 +++++++++++++++++++---------------- fs/select.c | 2 +- include/linux/poll.h | 2 ++ 3 files changed, 22 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 256bb7bb102a..8cf07242067d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -77,9 +77,6 @@ /* Maximum number of nesting allowed inside epoll sets */ #define EP_MAX_NESTS 4 -/* Maximum msec timeout value storeable in a long int */ -#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) - #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) #define EP_UNACTIVE_PTR ((void *) -1L) @@ -1117,18 +1114,22 @@ static int ep_send_events(struct eventpoll *ep, static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout) { - int res, eavail; + int res, eavail, timed_out = 0; unsigned long flags; - long jtimeout; + long slack; wait_queue_t wait; - - /* - * Calculate the timeout by checking for the "infinite" value (-1) - * and the overflow condition. The passed timeout is in milliseconds, - * that why (t * HZ) / 1000. - */ - jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ? - MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; + struct timespec end_time; + ktime_t expires, *to = NULL; + + if (timeout > 0) { + ktime_get_ts(&end_time); + timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); + slack = select_estimate_accuracy(&end_time); + to = &expires; + *to = timespec_to_ktime(end_time); + } else if (timeout == 0) { + timed_out = 1; + } retry: spin_lock_irqsave(&ep->lock, flags); @@ -1150,7 +1151,7 @@ retry: * to TASK_INTERRUPTIBLE before doing the checks. */ set_current_state(TASK_INTERRUPTIBLE); - if (!list_empty(&ep->rdllist) || !jtimeout) + if (!list_empty(&ep->rdllist) || timed_out) break; if (signal_pending(current)) { res = -EINTR; @@ -1158,7 +1159,9 @@ retry: } spin_unlock_irqrestore(&ep->lock, flags); - jtimeout = schedule_timeout(jtimeout); + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + timed_out = 1; + spin_lock_irqsave(&ep->lock, flags); } __remove_wait_queue(&ep->wq, &wait); @@ -1176,7 +1179,7 @@ retry: * more luck. */ if (!res && eavail && - !(res = ep_send_events(ep, events, maxevents)) && jtimeout) + !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto retry; return res; diff --git a/fs/select.c b/fs/select.c index 5f023f911202..b7b10aa30861 100644 --- a/fs/select.c +++ b/fs/select.c @@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv) return slack; } -static long select_estimate_accuracy(struct timespec *tv) +long select_estimate_accuracy(struct timespec *tv) { unsigned long ret; struct timespec now; diff --git a/include/linux/poll.h b/include/linux/poll.h index 600cc1fde64d..56e76af78102 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -73,6 +73,8 @@ extern void poll_initwait(struct poll_wqueues *pwq); extern void poll_freewait(struct poll_wqueues *pwq); extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, ktime_t *expires, unsigned long slack); +extern long select_estimate_accuracy(struct timespec *tv); + static inline int poll_schedule(struct poll_wqueues *pwq, int state) { -- cgit v1.2.3 From 39e3ac2599a5f9aba499b5f8af809108e70a6163 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 27 Oct 2010 21:25:12 -0400 Subject: jbd2: Fix I/O hang in jbd2_journal_release_jbd_inode This fixes a hang seen in jbd2_journal_release_jbd_inode on a lot of Power 6 systems running with ext4. When we get in the hung state, all I/O to the disk in question gets blocked where we stay indefinitely. Looking at the task list, I can see we are stuck in jbd2_journal_release_jbd_inode waiting on a wake up. I added some debug code to detect this scenario and dump additional data if we were stuck in jbd2_journal_release_jbd_inode for longer than 30 minutes. When it hit, I was able to see that i_flags was 0, suggesting we missed the wake up. This patch changes i_flags to be an unsigned long, uses bit operators to access it, and adds barriers around the accesses. Prior to applying this patch, we were regularly hitting this hang on numerous systems in our test environment. After applying the patch, the hangs no longer occur. Signed-off-by: Brian King Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 12 ++++++++---- fs/jbd2/journal.c | 4 +++- include/linux/jbd2.h | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7c068c189d80..6494c81e3b0a 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -26,7 +26,9 @@ #include #include #include +#include #include +#include /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -236,7 +238,7 @@ static int journal_submit_data_buffers(journal_t *journal, spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { mapping = jinode->i_vfs_inode->i_mapping; - jinode->i_flags |= JI_COMMIT_RUNNING; + set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); spin_unlock(&journal->j_list_lock); /* * submit the inode data buffers. We use writepage @@ -251,7 +253,8 @@ static int journal_submit_data_buffers(journal_t *journal, spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); commit_transaction->t_flushed_data_blocks = 1; - jinode->i_flags &= ~JI_COMMIT_RUNNING; + clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); + smp_mb__after_clear_bit(); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); } spin_unlock(&journal->j_list_lock); @@ -272,7 +275,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, /* For locking, see the comment in journal_submit_data_buffers() */ spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { - jinode->i_flags |= JI_COMMIT_RUNNING; + set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); spin_unlock(&journal->j_list_lock); err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); if (err) { @@ -288,7 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, ret = err; } spin_lock(&journal->j_list_lock); - jinode->i_flags &= ~JI_COMMIT_RUNNING; + clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); + smp_mb__after_clear_bit(); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0e8014ea6b94..75e1b5a0bc2d 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -42,12 +42,14 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include #include #include +#include EXPORT_SYMBOL(jbd2_journal_extend); EXPORT_SYMBOL(jbd2_journal_stop); @@ -2206,7 +2208,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal, restart: spin_lock(&journal->j_list_lock); /* Is commit writing out inode - we have to wait */ - if (jinode->i_flags & JI_COMMIT_RUNNING) { + if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) { wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0b52924a0cb6..2ae86aa21fce 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -395,7 +395,7 @@ struct jbd2_inode { struct inode *i_vfs_inode; /* Flags of inode [j_list_lock] */ - unsigned int i_flags; + unsigned long i_flags; }; struct jbd2_revoke_table_s; -- cgit v1.2.3 From b853fd364810a241050778124842a8c415c72a69 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Wed, 27 Oct 2010 21:27:12 -0400 Subject: ext4: avoid null dereference in trace_ext4_mballoc_discard ac->inode is set to null in function ext4_mb_release_group_pa(), and then trace_ext4_mballoc_discard(ac) is called, the kernel will panic. BUG: unable to handle kernel NULL pointer dereference at 000000a4 IP: [] ftrace_raw_event_ext4__mballoc+0x54/0xc0 [ext4] *pdpt = 0000000000abd001 *pde = 0000000000000000 Oops: 0000 [#1] SMP Pid: 550, comm: flush-8:16 Not tainted 2.6.36-rc1 #1 SE7320EP2/Altos G530 EIP: 0060:[] EFLAGS: 00010206 CPU: 1 EIP is at ftrace_raw_event_ext4__mballoc+0x54/0xc0 [ext4] EAX: f32ac840 EBX: f3f1cf88 ECX: f32ac840 EDX: 00000000 ESI: f32ac83c EDI: f880b9d8 EBP: 00000000 ESP: f4b77ae4 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process flush-8:16 (pid: 550, ti=f4b76000 task=f613e540 task.ti=f4b76000) Call Trace: [] ? ext4_mb_release_group_pa+0x121/0x150 [ext4] [] ? ext4_mb_discard_group_preallocations+0x336/0x400 [ext4] [] ? ext4_mb_new_blocks+0x3d1/0x4f0 [ext4] [] ? __make_request+0x10b/0x440 [] ? ext4_ext_map_blocks+0x1334/0x1980 [ext4] [] ? rb_reserve_next_event+0xaa/0x3b0 [] ? ext4_map_blocks+0xd6/0x1d0 [ext4] [] ? mpage_da_map_blocks+0xc7/0x8a0 [ext4] [] ? find_get_pages_tag+0x38/0x110 [] ? __pagevec_release+0x15/0x20 [] ? ext4_da_writepages+0x2b5/0x5d0 [ext4] [] ? __writepage+0x0/0x30 [] ? do_writepages+0x14/0x30 [] ? writeback_single_inode+0xa0/0x240 [] ? writeback_sb_inodes+0xc1/0x180 [] ? writeback_inodes_wb+0x88/0x140 [] ? wb_writeback+0x20b/0x320 [] ? lock_timer_base+0x27/0x50 [] ? wb_do_writeback+0x150/0x190 [] ? bdi_writeback_thread+0x88/0x1f0 [] ? complete+0x40/0x60 [] ? bdi_writeback_thread+0x0/0x1f0 [] ? kthread+0x74/0x80 [] ? kthread+0x0/0x80 [] ? kernel_thread_helper+0x6/0x10 Signed-off-by: Wen Congyang Acked-by: Steven Rostedt Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 01e9e0076a92..6a1fcff95f7c 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -796,8 +796,9 @@ DECLARE_EVENT_CLASS(ext4__mballoc, ), TP_fast_assign( - __entry->dev = ac->ac_inode->i_sb->s_dev; - __entry->ino = ac->ac_inode->i_ino; + __entry->dev = ac->ac_sb->s_dev; + __entry->ino = ac->ac_inode ? + ac->ac_inode->i_ino : 0; __entry->result_logical = ac->ac_b_ex.fe_logical; __entry->result_start = ac->ac_b_ex.fe_start; __entry->result_group = ac->ac_b_ex.fe_group; -- cgit v1.2.3 From e6fa0be699449d28a20e815bfe9ce26725ec4962 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2010 21:30:04 -0400 Subject: Add helper function for blkdev_issue_zeroout (sb_issue_discard) This is done the same way as helper sb_issue_discard for blkdev_issue_discard. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- include/linux/blkdev.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c54906f678f..e5cb4d029689 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -941,6 +941,14 @@ static inline int sb_issue_discard(struct super_block *sb, return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); } +static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, + sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) +{ + return blkdev_issue_zeroout(sb->s_bdev, + block << (sb->s_blocksize_bits - 9), + nr_blocks << (sb->s_blocksize_bits - 9), + gfp_mask, flags); +} extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); -- cgit v1.2.3 From 4d5476164a052e80d4ef430e368e76dbde96801f Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 27 Oct 2010 21:30:07 -0400 Subject: ext4: fix oops in trace_ext4_mb_release_group_pa Our QA reported an oops in the ext4_mb_release_group_pa tracing, and Josef Bacik pointed out that it was because we may have a non-null but uninitialized ac_inode in the allocation context. I can reproduce it when running xfstests with ext4 tracepoints on, on a CONFIG_SLAB_DEBUG kernel. We call trace_ext4_mb_release_group_pa from 2 places, ext4_mb_discard_group_preallocations and ext4_mb_discard_lg_preallocations In both cases we allocate an ac as a container just for tracing (!) and never fill in the ac_inode. There's no reason to be assigning, testing, or printing it as far as I can see, so just remove it from the tracepoint. Signed-off-by: Eric Sandeen Reviewed-by: Josef Bacik Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6a1fcff95f7c..fbdfa3a6bbd8 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -432,7 +432,6 @@ TRACE_EVENT(ext4_mb_release_group_pa, TP_STRUCT__entry( __field( dev_t, dev ) - __field( ino_t, ino ) __field( __u64, pa_pstart ) __field( __u32, pa_len ) @@ -440,8 +439,6 @@ TRACE_EVENT(ext4_mb_release_group_pa, TP_fast_assign( __entry->dev = sb->s_dev; - __entry->ino = (ac && ac->ac_inode) ? - ac->ac_inode->i_ino : 0; __entry->pa_pstart = pa->pa_pstart; __entry->pa_len = pa->pa_len; ), -- cgit v1.2.3 From 3e1e5f501632460184a98237d5460c521510535e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 27 Oct 2010 21:30:07 -0400 Subject: ext4: don't use ext4_allocation_contexts for tracing Many tracepoints were populating an ext4_allocation_context to pass in, but this requires a slab allocation even when tracepoints are off. In fact, 4 of 5 of these allocations were only for tracing. In addition, we were only using a small fraction of the 144 bytes of this structure for this purpose. We can do away with all these alloc/frees of the ac and simply pass in the bits we care about, instead. I tested this by turning on tracing and running through xfstests on x86_64. I did not actually do anything with the trace output, however. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 81 +++++++-------------------------------------- include/trace/events/ext4.h | 51 ++++++++++++++++------------ 2 files changed, 41 insertions(+), 91 deletions(-) (limited to 'include') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ccdfec6acb75..3f62df50f483 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3591,8 +3591,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac) */ static noinline_for_stack int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, - struct ext4_prealloc_space *pa, - struct ext4_allocation_context *ac) + struct ext4_prealloc_space *pa) { struct super_block *sb = e4b->bd_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -3610,11 +3609,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, BUG_ON(group != e4b->bd_group && pa->pa_len != 0); end = bit + pa->pa_len; - if (ac) { - ac->ac_sb = sb; - ac->ac_inode = pa->pa_inode; - } - while (bit < end) { bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); if (bit >= end) @@ -3625,16 +3619,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, (unsigned) next - bit, (unsigned) group); free += next - bit; - if (ac) { - ac->ac_b_ex.fe_group = group; - ac->ac_b_ex.fe_start = bit; - ac->ac_b_ex.fe_len = next - bit; - ac->ac_b_ex.fe_logical = 0; - trace_ext4_mballoc_discard(ac); - } - - trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit, - next - bit); + trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); + trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa, + grp_blk_start + bit, next - bit); mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); bit = next + 1; } @@ -3657,29 +3644,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, static noinline_for_stack int ext4_mb_release_group_pa(struct ext4_buddy *e4b, - struct ext4_prealloc_space *pa, - struct ext4_allocation_context *ac) + struct ext4_prealloc_space *pa) { struct super_block *sb = e4b->bd_sb; ext4_group_t group; ext4_grpblk_t bit; - trace_ext4_mb_release_group_pa(sb, ac, pa); + trace_ext4_mb_release_group_pa(sb, pa); BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); BUG_ON(group != e4b->bd_group && pa->pa_len != 0); mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); - - if (ac) { - ac->ac_sb = sb; - ac->ac_inode = NULL; - ac->ac_b_ex.fe_group = group; - ac->ac_b_ex.fe_start = bit; - ac->ac_b_ex.fe_len = pa->pa_len; - ac->ac_b_ex.fe_logical = 0; - trace_ext4_mballoc_discard(ac); - } + trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); return 0; } @@ -3700,7 +3677,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct buffer_head *bitmap_bh = NULL; struct ext4_prealloc_space *pa, *tmp; - struct ext4_allocation_context *ac; struct list_head list; struct ext4_buddy e4b; int err; @@ -3729,9 +3705,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; INIT_LIST_HEAD(&list); - ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); - if (ac) - ac->ac_sb = sb; repeat: ext4_lock_group(sb, group); list_for_each_entry_safe(pa, tmp, @@ -3786,9 +3759,9 @@ repeat: spin_unlock(pa->pa_obj_lock); if (pa->pa_type == MB_GROUP_PA) - ext4_mb_release_group_pa(&e4b, pa, ac); + ext4_mb_release_group_pa(&e4b, pa); else - ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); + ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); list_del(&pa->u.pa_tmp_list); call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); @@ -3796,8 +3769,6 @@ repeat: out: ext4_unlock_group(sb, group); - if (ac) - kmem_cache_free(ext4_ac_cachep, ac); ext4_mb_unload_buddy(&e4b); put_bh(bitmap_bh); return free; @@ -3818,7 +3789,6 @@ void ext4_discard_preallocations(struct inode *inode) struct super_block *sb = inode->i_sb; struct buffer_head *bitmap_bh = NULL; struct ext4_prealloc_space *pa, *tmp; - struct ext4_allocation_context *ac; ext4_group_t group = 0; struct list_head list; struct ext4_buddy e4b; @@ -3834,11 +3804,6 @@ void ext4_discard_preallocations(struct inode *inode) INIT_LIST_HEAD(&list); - ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); - if (ac) { - ac->ac_sb = sb; - ac->ac_inode = inode; - } repeat: /* first, collect all pa's in the inode */ spin_lock(&ei->i_prealloc_lock); @@ -3908,7 +3873,7 @@ repeat: ext4_lock_group(sb, group); list_del(&pa->pa_group_list); - ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); + ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); @@ -3917,8 +3882,6 @@ repeat: list_del(&pa->u.pa_tmp_list); call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); } - if (ac) - kmem_cache_free(ext4_ac_cachep, ac); } /* @@ -4116,14 +4079,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, struct ext4_buddy e4b; struct list_head discard_list; struct ext4_prealloc_space *pa, *tmp; - struct ext4_allocation_context *ac; mb_debug(1, "discard locality group preallocation\n"); INIT_LIST_HEAD(&discard_list); - ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); - if (ac) - ac->ac_sb = sb; spin_lock(&lg->lg_prealloc_lock); list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], @@ -4175,15 +4134,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, } ext4_lock_group(sb, group); list_del(&pa->pa_group_list); - ext4_mb_release_group_pa(&e4b, pa, ac); + ext4_mb_release_group_pa(&e4b, pa); ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); list_del(&pa->u.pa_tmp_list); call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); } - if (ac) - kmem_cache_free(ext4_ac_cachep, ac); } /* @@ -4547,7 +4504,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, { struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb; - struct ext4_allocation_context *ac = NULL; struct ext4_group_desc *gdp; unsigned long freed = 0; unsigned int overflow; @@ -4602,12 +4558,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, if (!ext4_should_writeback_data(inode)) flags |= EXT4_FREE_BLOCKS_METADATA; - ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); - if (ac) { - ac->ac_inode = inode; - ac->ac_sb = sb; - } - do_more: overflow = 0; ext4_get_group_no_and_offset(sb, block, &block_group, &bit); @@ -4665,12 +4615,7 @@ do_more: BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); } #endif - if (ac) { - ac->ac_b_ex.fe_group = block_group; - ac->ac_b_ex.fe_start = bit; - ac->ac_b_ex.fe_len = count; - trace_ext4_mballoc_free(ac); - } + trace_ext4_mballoc_free(sb, inode, block_group, bit, count); err = ext4_mb_load_buddy(sb, block_group, &e4b); if (err) @@ -4741,7 +4686,5 @@ error_return: dquot_free_block(inode, freed); brelse(bitmap_bh); ext4_std_error(sb, err); - if (ac) - kmem_cache_free(ext4_ac_cachep, ac); return; } diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index fbdfa3a6bbd8..b5f4938d612d 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -396,11 +396,11 @@ DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa, TRACE_EVENT(ext4_mb_release_inode_pa, TP_PROTO(struct super_block *sb, - struct ext4_allocation_context *ac, + struct inode *inode, struct ext4_prealloc_space *pa, unsigned long long block, unsigned int count), - TP_ARGS(sb, ac, pa, block, count), + TP_ARGS(sb, inode, pa, block, count), TP_STRUCT__entry( __field( dev_t, dev ) @@ -412,8 +412,7 @@ TRACE_EVENT(ext4_mb_release_inode_pa, TP_fast_assign( __entry->dev = sb->s_dev; - __entry->ino = (ac && ac->ac_inode) ? - ac->ac_inode->i_ino : 0; + __entry->ino = inode->i_ino; __entry->block = block; __entry->count = count; ), @@ -425,10 +424,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa, TRACE_EVENT(ext4_mb_release_group_pa, TP_PROTO(struct super_block *sb, - struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa), - TP_ARGS(sb, ac, pa), + TP_ARGS(sb, pa), TP_STRUCT__entry( __field( dev_t, dev ) @@ -779,47 +777,56 @@ TRACE_EVENT(ext4_mballoc_prealloc, ); DECLARE_EVENT_CLASS(ext4__mballoc, - TP_PROTO(struct ext4_allocation_context *ac), + TP_PROTO(struct super_block *sb, + struct inode *inode, + ext4_group_t group, + ext4_grpblk_t start, + ext4_grpblk_t len), - TP_ARGS(ac), + TP_ARGS(sb, inode, group, start, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( __u32, result_logical ) __field( int, result_start ) __field( __u32, result_group ) __field( int, result_len ) ), TP_fast_assign( - __entry->dev = ac->ac_sb->s_dev; - __entry->ino = ac->ac_inode ? - ac->ac_inode->i_ino : 0; - __entry->result_logical = ac->ac_b_ex.fe_logical; - __entry->result_start = ac->ac_b_ex.fe_start; - __entry->result_group = ac->ac_b_ex.fe_group; - __entry->result_len = ac->ac_b_ex.fe_len; + __entry->dev = sb->s_dev; + __entry->ino = inode ? inode->i_ino : 0; + __entry->result_start = start; + __entry->result_group = group; + __entry->result_len = len; ), - TP_printk("dev %s inode %lu extent %u/%d/%u@%u ", + TP_printk("dev %s inode %lu extent %u/%d/%u ", jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, __entry->result_group, __entry->result_start, - __entry->result_len, __entry->result_logical) + __entry->result_len) ); DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard, - TP_PROTO(struct ext4_allocation_context *ac), + TP_PROTO(struct super_block *sb, + struct inode *inode, + ext4_group_t group, + ext4_grpblk_t start, + ext4_grpblk_t len), - TP_ARGS(ac) + TP_ARGS(sb, inode, group, start, len) ); DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free, - TP_PROTO(struct ext4_allocation_context *ac), + TP_PROTO(struct super_block *sb, + struct inode *inode, + ext4_group_t group, + ext4_grpblk_t start, + ext4_grpblk_t len), - TP_ARGS(ac) + TP_ARGS(sb, inode, group, start, len) ); TRACE_EVENT(ext4_forget, -- cgit v1.2.3 From 367a51a339020ba4d9edb0ce0f21d65bd50b00c9 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2010 21:30:11 -0400 Subject: fs: Add FITRIM ioctl Adds an filesystem independent ioctl to allow implementation of file system batched discard support. I takes fstrim_range structure as an argument. fstrim_range is definec in the include/fs.h and its definition is as follows. struct fstrim_range { start; len; minlen; } start - first Byte to trim len - number of Bytes to trim from start minlen - minimum extent length to trim, free extents shorter than this number of Bytes will be ignored. This will be rounded up to fs block size. It is also possible to specify NULL as an argument. In this case the arguments will set itself as follows: start = 0; len = ULLONG_MAX; minlen = 0; So it will trim the whole file system at one run. After the FITRIM is done, the number of actually discarded Bytes is stored in fstrim_range.len to give the user better insight on how much storage space has been really released for wear-leveling. Signed-off-by: Lukas Czerner Reviewed-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ioctl.c | 39 +++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 8 ++++++++ 2 files changed, 47 insertions(+) (limited to 'include') diff --git a/fs/ioctl.c b/fs/ioctl.c index f855ea4fc888..e92fdbb3bc3a 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -530,6 +530,41 @@ static int ioctl_fsthaw(struct file *filp) return thaw_super(sb); } +static int ioctl_fstrim(struct file *filp, void __user *argp) +{ + struct super_block *sb = filp->f_path.dentry->d_inode->i_sb; + struct fstrim_range range; + int ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* If filesystem doesn't support trim feature, return. */ + if (sb->s_op->trim_fs == NULL) + return -EOPNOTSUPP; + + /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */ + if (sb->s_bdev == NULL) + return -EINVAL; + + if (argp == NULL) { + range.start = 0; + range.len = ULLONG_MAX; + range.minlen = 0; + } else if (copy_from_user(&range, argp, sizeof(range))) + return -EFAULT; + + ret = sb->s_op->trim_fs(sb, &range); + if (ret < 0) + return ret; + + if ((argp != NULL) && + (copy_to_user(argp, &range, sizeof(range)))) + return -EFAULT; + + return 0; +} + /* * When you add any new common ioctls to the switches above and below * please update compat_sys_ioctl() too. @@ -580,6 +615,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, error = ioctl_fsthaw(filp); break; + case FITRIM: + error = ioctl_fstrim(filp, argp); + break; + case FS_IOC_FIEMAP: return ioctl_fiemap(filp, arg); diff --git a/include/linux/fs.h b/include/linux/fs.h index 63d069bd80b7..7008268e9b5a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -32,6 +32,12 @@ #define SEEK_END 2 /* seek relative to end of file */ #define SEEK_MAX SEEK_END +struct fstrim_range { + uint64_t start; + uint64_t len; + uint64_t minlen; +}; + /* And dynamically-tunable limits and defaults: */ struct files_stat_struct { int nr_files; /* read only */ @@ -316,6 +322,7 @@ struct inodes_stat_t { #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ #define FITHAW _IOWR('X', 120, int) /* Thaw */ +#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_IOC_SETFLAGS _IOW('f', 2, long) @@ -1581,6 +1588,7 @@ struct super_operations { ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); #endif int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); + int (*trim_fs) (struct super_block *, struct fstrim_range *); }; /* -- cgit v1.2.3 From 5b41d92437f1ae19b3f3ffa3b16589fd5df50ac0 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 27 Oct 2010 21:30:13 -0400 Subject: ext4: implement writeback livelock avoidance using page tagging This is analogous to Jan Kara's commit, f446daaea9d4a420d16c606f755f3689dcb2d0ce mm: implement writeback livelock avoidance using page tagging but since we forked write_cache_pages, we need to reimplement it there (and in ext4_da_writepages, since range_cyclic handling was moved to there) If you start a large buffered IO to a file, and then set fsync after it, you'll find that fsync does not complete until the other IO stops. If you continue re-dirtying the file (say, putting dd with conv=notrunc in a loop), when fsync finally completes (after all IO is done), it reports via tracing that it has written many more pages than the file contains; in other words it has synced and re-synced pages in the file multiple times. This then leads to problems with our writeback_index update, since it advances it by pages written, and essentially sets writeback_index off the end of the file... With the following patch, we only sync as much as was dirty at the time of the sync. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 18 +++++++++++++++--- include/linux/writeback.h | 2 ++ 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6671fcbb5293..c9ea95ba5fde 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2809,16 +2809,21 @@ static int write_cache_pages_da(struct address_space *mapping, pgoff_t index; pgoff_t end; /* Inclusive */ long nr_to_write = wbc->nr_to_write; + int tag; pagevec_init(&pvec, 0); index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->sync_mode == WB_SYNC_ALL) + tag = PAGECACHE_TAG_TOWRITE; + else + tag = PAGECACHE_TAG_DIRTY; + while (!done && (index <= end)) { int i; - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); if (nr_pages == 0) break; @@ -2923,6 +2928,7 @@ static int ext4_da_writepages(struct address_space *mapping, long desired_nr_to_write, nr_to_writebump = 0; loff_t range_start = wbc->range_start; struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); + pgoff_t end; trace_ext4_da_writepages(inode, wbc); @@ -2958,8 +2964,11 @@ static int ext4_da_writepages(struct address_space *mapping, wbc->range_start = index << PAGE_CACHE_SHIFT; wbc->range_end = LLONG_MAX; wbc->range_cyclic = 0; - } else + end = -1; + } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + } /* * This works around two forms of stupidity. The first is in @@ -3000,6 +3009,9 @@ static int ext4_da_writepages(struct address_space *mapping, pages_skipped = wbc->pages_skipped; retry: + if (wbc->sync_mode == WB_SYNC_ALL) + tag_pages_for_writeback(mapping, index, end); + while (!ret && wbc->nr_to_write > 0) { /* diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..3d132bfb4f3d 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -143,6 +143,8 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, int generic_writepages(struct address_space *mapping, struct writeback_control *wbc); +void tag_pages_for_writeback(struct address_space *mapping, + pgoff_t start, pgoff_t end); int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); -- cgit v1.2.3 From 7f93cff90fa9be6ed45f6189e136153d1d8631b0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Oct 2010 21:30:13 -0400 Subject: ext4: fix kernel oops if the journal superblock has a non-zero j_errno Commit 84061e0 fixed an accounting bug only to introduce the possibility of a kernel OOPS if the journal has a non-zero j_errno field indicating that the file system had detected a fs inconsistency. After the journal replay, if the journal superblock indicates that the file system has an error, this indication is transfered to the file system and then ext4_commit_super() is called to write this to the disk. But since the percpu counters are now initialized after the journal replay, the call to ext4_commit_super() will cause a kernel oops since it needs to use the percpu counters the ext4 superblock structure. The fix is to skip setting the ext4 free block and free inode fields if the percpu counter has not been set. Thanks to Ken Sumrall for reporting and analyzing the root causes of this bug. Addresses-Google-Bug: #3054080 Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 7 +++++-- include/linux/percpu_counter.h | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9ce3b67b7269..c9e06c647ce8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3964,9 +3964,12 @@ static int ext4_commit_super(struct super_block *sb, int sync) else es->s_kbytes_written = cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); - ext4_free_blocks_count_set(es, percpu_counter_sum_positive( + if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter)) + ext4_free_blocks_count_set(es, percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeblocks_counter)); - es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( + if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) + es->s_free_inodes_count = + cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); sb->s_dirt = 0; BUFFER_TRACE(sbh, "marking dirty"); diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 8a7d510ffa9c..46f6ba56fa91 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -78,6 +78,11 @@ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc) return 1; } +static inline int percpu_counter_initialized(struct percpu_counter *fbc) +{ + return (fbc->counters != NULL); +} + #else struct percpu_counter { @@ -143,6 +148,11 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc) return percpu_counter_read(fbc); } +static inline int percpu_counter_initialized(struct percpu_counter *fbc) +{ + return 1; +} + #endif /* CONFIG_SMP */ static inline void percpu_counter_inc(struct percpu_counter *fbc) -- cgit v1.2.3 From a269029d0e2192046be4c07ed78a45022469ee4c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 27 Oct 2010 22:08:50 -0400 Subject: ext4,jbd2: convert tracepoints to use major/minor numbers Unfortunately perf can't deal with anything other than direct structure accesses in the TP_printk() section. It will drop dead when it sees jbd2_dev_to_name() in the "print fmt" section of the tracepoint. Addresses-Google-Bug: 3138508 Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 329 +++++++++++++++++++++++++++----------------- include/trace/events/jbd2.h | 78 ++++++----- 2 files changed, 251 insertions(+), 156 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index b5f4938d612d..8f59db107bbb 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -21,7 +21,8 @@ TRACE_EVENT(ext4_free_inode, TP_ARGS(inode), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( umode_t, mode ) __field( uid_t, uid ) @@ -30,7 +31,8 @@ TRACE_EVENT(ext4_free_inode, ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->mode = inode->i_mode; __entry->uid = inode->i_uid; @@ -38,9 +40,10 @@ TRACE_EVENT(ext4_free_inode, __entry->blocks = inode->i_blocks; ), - TP_printk("dev %s ino %lu mode 0%o uid %u gid %u blocks %llu", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->mode, __entry->uid, __entry->gid, + TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->mode, + __entry->uid, __entry->gid, (unsigned long long) __entry->blocks) ); @@ -50,20 +53,22 @@ TRACE_EVENT(ext4_request_inode, TP_ARGS(dir, mode), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, dir ) __field( umode_t, mode ) ), TP_fast_assign( - __entry->dev = dir->i_sb->s_dev; + __entry->dev_major = MAJOR(dir->i_sb->s_dev); + __entry->dev_minor = MINOR(dir->i_sb->s_dev); __entry->dir = dir->i_ino; __entry->mode = mode; ), - TP_printk("dev %s dir %lu mode 0%o", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir, - __entry->mode) + TP_printk("dev %d,%d dir %lu mode 0%o", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->dir, __entry->mode) ); TRACE_EVENT(ext4_allocate_inode, @@ -72,21 +77,24 @@ TRACE_EVENT(ext4_allocate_inode, TP_ARGS(inode, dir, mode), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( ino_t, dir ) __field( umode_t, mode ) ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->dir = dir->i_ino; __entry->mode = mode; ), - TP_printk("dev %s ino %lu dir %lu mode 0%o", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + TP_printk("dev %d,%d ino %lu dir %lu mode 0%o", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, (unsigned long) __entry->dir, __entry->mode) ); @@ -98,7 +106,8 @@ DECLARE_EVENT_CLASS(ext4__write_begin, TP_ARGS(inode, pos, len, flags), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, len ) @@ -106,15 +115,17 @@ DECLARE_EVENT_CLASS(ext4__write_begin, ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->pos = pos; __entry->len = len; __entry->flags = flags; ), - TP_printk("dev %s ino %lu pos %llu len %u flags %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->pos, __entry->len, __entry->flags) ); @@ -141,7 +152,8 @@ DECLARE_EVENT_CLASS(ext4__write_end, TP_ARGS(inode, pos, len, copied), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( loff_t, pos ) __field( unsigned int, len ) @@ -149,16 +161,18 @@ DECLARE_EVENT_CLASS(ext4__write_end, ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->pos = pos; __entry->len = len; __entry->copied = copied; ), - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->pos, + __entry->len, __entry->copied) ); DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end, @@ -199,21 +213,23 @@ TRACE_EVENT(ext4_writepage, TP_ARGS(inode, page), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( pgoff_t, index ) ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->index = page->index; ), - TP_printk("dev %s ino %lu page_index %lu", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->index) + TP_printk("dev %d,%d ino %lu page_index %lu", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->index) ); TRACE_EVENT(ext4_da_writepages, @@ -222,7 +238,8 @@ TRACE_EVENT(ext4_da_writepages, TP_ARGS(inode, wbc), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( long, nr_to_write ) __field( long, pages_skipped ) @@ -236,7 +253,8 @@ TRACE_EVENT(ext4_da_writepages, ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; @@ -249,8 +267,8 @@ TRACE_EVENT(ext4_da_writepages, __entry->writeback_index = inode->i_mapping->writeback_index; ), - TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu", - jbd2_dev_to_name(__entry->dev), + TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu", + __entry->dev_major, __entry->dev_minor, (unsigned long) __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, __entry->nonblocking, @@ -265,7 +283,8 @@ TRACE_EVENT(ext4_da_write_pages, TP_ARGS(inode, mpd), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( __u64, b_blocknr ) __field( __u32, b_size ) @@ -276,7 +295,8 @@ TRACE_EVENT(ext4_da_write_pages, ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->b_blocknr = mpd->b_blocknr; __entry->b_size = mpd->b_size; @@ -286,8 +306,9 @@ TRACE_EVENT(ext4_da_write_pages, __entry->pages_written = mpd->pages_written; ), - TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->b_blocknr, __entry->b_size, __entry->b_state, __entry->first_page, __entry->io_done, __entry->pages_written) @@ -300,7 +321,8 @@ TRACE_EVENT(ext4_da_writepages_result, TP_ARGS(inode, wbc, ret, pages_written), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( int, ret ) __field( int, pages_written ) @@ -310,7 +332,8 @@ TRACE_EVENT(ext4_da_writepages_result, ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->ret = ret; __entry->pages_written = pages_written; @@ -319,8 +342,8 @@ TRACE_EVENT(ext4_da_writepages_result, __entry->writeback_index = inode->i_mapping->writeback_index; ), - TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu", - jbd2_dev_to_name(__entry->dev), + TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu", + __entry->dev_major, __entry->dev_minor, (unsigned long) __entry->ino, __entry->ret, __entry->pages_written, __entry->pages_skipped, __entry->more_io, @@ -334,20 +357,23 @@ TRACE_EVENT(ext4_discard_blocks, TP_ARGS(sb, blk, count), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( __u64, blk ) __field( __u64, count ) ), TP_fast_assign( - __entry->dev = sb->s_dev; + __entry->dev_major = MAJOR(sb->s_dev); + __entry->dev_minor = MINOR(sb->s_dev); __entry->blk = blk; __entry->count = count; ), - TP_printk("dev %s blk %llu count %llu", - jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count) + TP_printk("dev %d,%d blk %llu count %llu", + __entry->dev_major, __entry->dev_minor, + __entry->blk, __entry->count) ); DECLARE_EVENT_CLASS(ext4__mb_new_pa, @@ -357,7 +383,8 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa, TP_ARGS(ac, pa), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( __u64, pa_pstart ) __field( __u32, pa_len ) @@ -366,16 +393,18 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa, ), TP_fast_assign( - __entry->dev = ac->ac_sb->s_dev; + __entry->dev_major = MAJOR(ac->ac_sb->s_dev); + __entry->dev_minor = MINOR(ac->ac_sb->s_dev); __entry->ino = ac->ac_inode->i_ino; __entry->pa_pstart = pa->pa_pstart; __entry->pa_len = pa->pa_len; __entry->pa_lstart = pa->pa_lstart; ), - TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) + TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->pa_pstart, + __entry->pa_len, __entry->pa_lstart) ); DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa, @@ -403,7 +432,8 @@ TRACE_EVENT(ext4_mb_release_inode_pa, TP_ARGS(sb, inode, pa, block, count), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( __u64, block ) __field( __u32, count ) @@ -411,15 +441,16 @@ TRACE_EVENT(ext4_mb_release_inode_pa, ), TP_fast_assign( - __entry->dev = sb->s_dev; + __entry->dev_major = MAJOR(sb->s_dev); + __entry->dev_minor = MINOR(sb->s_dev); __entry->ino = inode->i_ino; __entry->block = block; __entry->count = count; ), - TP_printk("dev %s ino %lu block %llu count %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->block, __entry->count) + TP_printk("dev %d,%d ino %lu block %llu count %u", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->block, __entry->count) ); TRACE_EVENT(ext4_mb_release_group_pa, @@ -429,20 +460,23 @@ TRACE_EVENT(ext4_mb_release_group_pa, TP_ARGS(sb, pa), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( __u64, pa_pstart ) __field( __u32, pa_len ) ), TP_fast_assign( - __entry->dev = sb->s_dev; + __entry->dev_major = MAJOR(sb->s_dev); + __entry->dev_minor = MINOR(sb->s_dev); __entry->pa_pstart = pa->pa_pstart; __entry->pa_len = pa->pa_len; ), - TP_printk("dev %s pstart %llu len %u", - jbd2_dev_to_name(__entry->dev), __entry->pa_pstart, __entry->pa_len) + TP_printk("dev %d,%d pstart %llu len %u", + __entry->dev_major, __entry->dev_minor, + __entry->pa_pstart, __entry->pa_len) ); TRACE_EVENT(ext4_discard_preallocations, @@ -451,18 +485,21 @@ TRACE_EVENT(ext4_discard_preallocations, TP_ARGS(inode), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; ), - TP_printk("dev %s ino %lu", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) + TP_printk("dev %d,%d ino %lu", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino) ); TRACE_EVENT(ext4_mb_discard_preallocations, @@ -471,18 +508,20 @@ TRACE_EVENT(ext4_mb_discard_preallocations, TP_ARGS(sb, needed), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( int, needed ) ), TP_fast_assign( - __entry->dev = sb->s_dev; + __entry->dev_major = MAJOR(sb->s_dev); + __entry->dev_minor = MINOR(sb->s_dev); __entry->needed = needed; ), - TP_printk("dev %s needed %d", - jbd2_dev_to_name(__entry->dev), __entry->needed) + TP_printk("dev %d,%d needed %d", + __entry->dev_major, __entry->dev_minor, __entry->needed) ); TRACE_EVENT(ext4_request_blocks, @@ -491,7 +530,8 @@ TRACE_EVENT(ext4_request_blocks, TP_ARGS(ar), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( unsigned int, flags ) __field( unsigned int, len ) @@ -504,7 +544,8 @@ TRACE_EVENT(ext4_request_blocks, ), TP_fast_assign( - __entry->dev = ar->inode->i_sb->s_dev; + __entry->dev_major = MAJOR(ar->inode->i_sb->s_dev); + __entry->dev_minor = MINOR(ar->inode->i_sb->s_dev); __entry->ino = ar->inode->i_ino; __entry->flags = ar->flags; __entry->len = ar->len; @@ -516,8 +557,9 @@ TRACE_EVENT(ext4_request_blocks, __entry->pright = ar->pright; ), - TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->flags, __entry->len, (unsigned long long) __entry->logical, (unsigned long long) __entry->goal, @@ -533,7 +575,8 @@ TRACE_EVENT(ext4_allocate_blocks, TP_ARGS(ar, block), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( __u64, block ) __field( unsigned int, flags ) @@ -547,7 +590,8 @@ TRACE_EVENT(ext4_allocate_blocks, ), TP_fast_assign( - __entry->dev = ar->inode->i_sb->s_dev; + __entry->dev_major = MAJOR(ar->inode->i_sb->s_dev); + __entry->dev_minor = MINOR(ar->inode->i_sb->s_dev); __entry->ino = ar->inode->i_ino; __entry->block = block; __entry->flags = ar->flags; @@ -560,9 +604,10 @@ TRACE_EVENT(ext4_allocate_blocks, __entry->pright = ar->pright; ), - TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->flags, __entry->len, __entry->block, + TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->flags, + __entry->len, __entry->block, (unsigned long long) __entry->logical, (unsigned long long) __entry->goal, (unsigned long long) __entry->lleft, @@ -578,7 +623,8 @@ TRACE_EVENT(ext4_free_blocks, TP_ARGS(inode, block, count, flags), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( umode_t, mode ) __field( __u64, block ) @@ -587,7 +633,8 @@ TRACE_EVENT(ext4_free_blocks, ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->mode = inode->i_mode; __entry->block = block; @@ -595,8 +642,9 @@ TRACE_EVENT(ext4_free_blocks, __entry->flags = flags; ), - TP_printk("dev %s ino %lu mode 0%o block %llu count %lu flags %d", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %d", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->mode, __entry->block, __entry->count, __entry->flags) ); @@ -607,7 +655,8 @@ TRACE_EVENT(ext4_sync_file, TP_ARGS(file, datasync), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( ino_t, parent ) __field( int, datasync ) @@ -616,14 +665,16 @@ TRACE_EVENT(ext4_sync_file, TP_fast_assign( struct dentry *dentry = file->f_path.dentry; - __entry->dev = dentry->d_inode->i_sb->s_dev; + __entry->dev_major = MAJOR(dentry->d_inode->i_sb->s_dev); + __entry->dev_minor = MINOR(dentry->d_inode->i_sb->s_dev); __entry->ino = dentry->d_inode->i_ino; __entry->datasync = datasync; __entry->parent = dentry->d_parent->d_inode->i_ino; ), - TP_printk("dev %s ino %ld parent %ld datasync %d ", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + TP_printk("dev %d,%d ino %ld parent %ld datasync %d ", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, (unsigned long) __entry->parent, __entry->datasync) ); @@ -633,18 +684,20 @@ TRACE_EVENT(ext4_sync_fs, TP_ARGS(sb, wait), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( int, wait ) ), TP_fast_assign( - __entry->dev = sb->s_dev; + __entry->dev_major = MAJOR(sb->s_dev); + __entry->dev_minor = MINOR(sb->s_dev); __entry->wait = wait; ), - TP_printk("dev %s wait %d", jbd2_dev_to_name(__entry->dev), - __entry->wait) + TP_printk("dev %d,%d wait %d", __entry->dev_major, + __entry->dev_minor, __entry->wait) ); TRACE_EVENT(ext4_alloc_da_blocks, @@ -653,21 +706,24 @@ TRACE_EVENT(ext4_alloc_da_blocks, TP_ARGS(inode), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( unsigned int, data_blocks ) __field( unsigned int, meta_blocks ) ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks; __entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; ), - TP_printk("dev %s ino %lu data_blocks %u meta_blocks %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + TP_printk("dev %d,%d ino %lu data_blocks %u meta_blocks %u", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->data_blocks, __entry->meta_blocks) ); @@ -677,7 +733,8 @@ TRACE_EVENT(ext4_mballoc_alloc, TP_ARGS(ac), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( __u16, found ) __field( __u16, groups ) @@ -700,7 +757,8 @@ TRACE_EVENT(ext4_mballoc_alloc, ), TP_fast_assign( - __entry->dev = ac->ac_inode->i_sb->s_dev; + __entry->dev_major = MAJOR(ac->ac_inode->i_sb->s_dev); + __entry->dev_minor = MINOR(ac->ac_inode->i_sb->s_dev); __entry->ino = ac->ac_inode->i_ino; __entry->found = ac->ac_found; __entry->flags = ac->ac_flags; @@ -722,10 +780,11 @@ TRACE_EVENT(ext4_mballoc_alloc, __entry->result_len = ac->ac_f_ex.fe_len; ), - TP_printk("dev %s inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u " + TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u " "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x " "tail %u broken %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->orig_group, __entry->orig_start, __entry->orig_len, __entry->orig_logical, __entry->goal_group, __entry->goal_start, @@ -743,7 +802,8 @@ TRACE_EVENT(ext4_mballoc_prealloc, TP_ARGS(ac), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( __u32, orig_logical ) __field( int, orig_start ) @@ -756,7 +816,8 @@ TRACE_EVENT(ext4_mballoc_prealloc, ), TP_fast_assign( - __entry->dev = ac->ac_inode->i_sb->s_dev; + __entry->dev_major = MAJOR(ac->ac_inode->i_sb->s_dev); + __entry->dev_minor = MINOR(ac->ac_inode->i_sb->s_dev); __entry->ino = ac->ac_inode->i_ino; __entry->orig_logical = ac->ac_o_ex.fe_logical; __entry->orig_start = ac->ac_o_ex.fe_start; @@ -768,8 +829,9 @@ TRACE_EVENT(ext4_mballoc_prealloc, __entry->result_len = ac->ac_b_ex.fe_len; ), - TP_printk("dev %s inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->orig_group, __entry->orig_start, __entry->orig_len, __entry->orig_logical, __entry->result_group, __entry->result_start, @@ -786,7 +848,8 @@ DECLARE_EVENT_CLASS(ext4__mballoc, TP_ARGS(sb, inode, group, start, len), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( int, result_start ) __field( __u32, result_group ) @@ -794,15 +857,17 @@ DECLARE_EVENT_CLASS(ext4__mballoc, ), TP_fast_assign( - __entry->dev = sb->s_dev; + __entry->dev_major = MAJOR(sb->s_dev); + __entry->dev_minor = MINOR(sb->s_dev); __entry->ino = inode ? inode->i_ino : 0; __entry->result_start = start; __entry->result_group = group; __entry->result_len = len; ), - TP_printk("dev %s inode %lu extent %u/%d/%u ", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + TP_printk("dev %d,%d inode %lu extent %u/%d/%u ", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->result_group, __entry->result_start, __entry->result_len) ); @@ -835,7 +900,8 @@ TRACE_EVENT(ext4_forget, TP_ARGS(inode, is_metadata, block), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( umode_t, mode ) __field( int, is_metadata ) @@ -843,16 +909,18 @@ TRACE_EVENT(ext4_forget, ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->mode = inode->i_mode; __entry->is_metadata = is_metadata; __entry->block = block; ), - TP_printk("dev %s ino %lu mode 0%o is_metadata %d block %llu", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->mode, __entry->is_metadata, __entry->block) + TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->mode, + __entry->is_metadata, __entry->block) ); TRACE_EVENT(ext4_da_update_reserve_space, @@ -861,7 +929,8 @@ TRACE_EVENT(ext4_da_update_reserve_space, TP_ARGS(inode, used_blocks), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( umode_t, mode ) __field( __u64, i_blocks ) @@ -872,7 +941,8 @@ TRACE_EVENT(ext4_da_update_reserve_space, ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->mode = inode->i_mode; __entry->i_blocks = inode->i_blocks; @@ -882,9 +952,10 @@ TRACE_EVENT(ext4_da_update_reserve_space, __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks; ), - TP_printk("dev %s ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->mode, (unsigned long long) __entry->i_blocks, + TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->mode, + (unsigned long long) __entry->i_blocks, __entry->used_blocks, __entry->reserved_data_blocks, __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) ); @@ -895,7 +966,8 @@ TRACE_EVENT(ext4_da_reserve_space, TP_ARGS(inode, md_needed), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( umode_t, mode ) __field( __u64, i_blocks ) @@ -905,7 +977,8 @@ TRACE_EVENT(ext4_da_reserve_space, ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->mode = inode->i_mode; __entry->i_blocks = inode->i_blocks; @@ -914,8 +987,9 @@ TRACE_EVENT(ext4_da_reserve_space, __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; ), - TP_printk("dev %s ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->mode, (unsigned long long) __entry->i_blocks, __entry->md_needed, __entry->reserved_data_blocks, __entry->reserved_meta_blocks) @@ -927,7 +1001,8 @@ TRACE_EVENT(ext4_da_release_space, TP_ARGS(inode, freed_blocks), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) __field( umode_t, mode ) __field( __u64, i_blocks ) @@ -938,7 +1013,8 @@ TRACE_EVENT(ext4_da_release_space, ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; __entry->mode = inode->i_mode; __entry->i_blocks = inode->i_blocks; @@ -948,8 +1024,9 @@ TRACE_EVENT(ext4_da_release_space, __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks; ), - TP_printk("dev %s ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino, __entry->mode, (unsigned long long) __entry->i_blocks, __entry->freed_blocks, __entry->reserved_data_blocks, __entry->reserved_meta_blocks, __entry->allocated_meta_blocks) @@ -961,18 +1038,20 @@ DECLARE_EVENT_CLASS(ext4__bitmap_load, TP_ARGS(sb, group), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( __u32, group ) ), TP_fast_assign( - __entry->dev = sb->s_dev; + __entry->dev_major = MAJOR(sb->s_dev); + __entry->dev_minor = MINOR(sb->s_dev); __entry->group = group; ), - TP_printk("dev %s group %u", - jbd2_dev_to_name(__entry->dev), __entry->group) + TP_printk("dev %d,%d group %u", + __entry->dev_major, __entry->dev_minor, __entry->group) ); DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load, diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index bf16545cc977..7447ea9305b5 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -17,17 +17,19 @@ TRACE_EVENT(jbd2_checkpoint, TP_ARGS(journal, result), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( int, result ) ), TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; + __entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev); + __entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev); __entry->result = result; ), - TP_printk("dev %s result %d", - jbd2_dev_to_name(__entry->dev), __entry->result) + TP_printk("dev %d,%d result %d", + __entry->dev_major, __entry->dev_minor, __entry->result) ); DECLARE_EVENT_CLASS(jbd2_commit, @@ -37,20 +39,22 @@ DECLARE_EVENT_CLASS(jbd2_commit, TP_ARGS(journal, commit_transaction), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( char, sync_commit ) __field( int, transaction ) ), TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; + __entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev); + __entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev); __entry->sync_commit = commit_transaction->t_synchronous_commit; __entry->transaction = commit_transaction->t_tid; ), - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_printk("dev %d,%d transaction %d sync %d", + __entry->dev_major, __entry->dev_minor, + __entry->transaction, __entry->sync_commit) ); DEFINE_EVENT(jbd2_commit, jbd2_start_commit, @@ -87,22 +91,24 @@ TRACE_EVENT(jbd2_end_commit, TP_ARGS(journal, commit_transaction), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( char, sync_commit ) __field( int, transaction ) __field( int, head ) ), TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; + __entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev); + __entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev); __entry->sync_commit = commit_transaction->t_synchronous_commit; __entry->transaction = commit_transaction->t_tid; __entry->head = journal->j_tail_sequence; ), - TP_printk("dev %s transaction %d sync %d head %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit, __entry->head) + TP_printk("dev %d,%d transaction %d sync %d head %d", + __entry->dev_major, __entry->dev_minor, + __entry->transaction, __entry->sync_commit, __entry->head) ); TRACE_EVENT(jbd2_submit_inode_data, @@ -111,17 +117,20 @@ TRACE_EVENT(jbd2_submit_inode_data, TP_ARGS(inode), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( ino_t, ino ) ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev_major = MAJOR(inode->i_sb->s_dev); + __entry->dev_minor = MINOR(inode->i_sb->s_dev); __entry->ino = inode->i_ino; ), - TP_printk("dev %s ino %lu", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) + TP_printk("dev %d,%d ino %lu", + __entry->dev_major, __entry->dev_minor, + (unsigned long) __entry->ino) ); TRACE_EVENT(jbd2_run_stats, @@ -131,7 +140,8 @@ TRACE_EVENT(jbd2_run_stats, TP_ARGS(dev, tid, stats), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( unsigned long, tid ) __field( unsigned long, wait ) __field( unsigned long, running ) @@ -144,7 +154,8 @@ TRACE_EVENT(jbd2_run_stats, ), TP_fast_assign( - __entry->dev = dev; + __entry->dev_major = MAJOR(dev); + __entry->dev_minor = MINOR(dev); __entry->tid = tid; __entry->wait = stats->rs_wait; __entry->running = stats->rs_running; @@ -156,9 +167,9 @@ TRACE_EVENT(jbd2_run_stats, __entry->blocks_logged = stats->rs_blocks_logged; ), - TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u " + TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u " "logging %u handle_count %u blocks %u blocks_logged %u", - jbd2_dev_to_name(__entry->dev), __entry->tid, + __entry->dev_major, __entry->dev_minor, __entry->tid, jiffies_to_msecs(__entry->wait), jiffies_to_msecs(__entry->running), jiffies_to_msecs(__entry->locked), @@ -175,7 +186,8 @@ TRACE_EVENT(jbd2_checkpoint_stats, TP_ARGS(dev, tid, stats), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( unsigned long, tid ) __field( unsigned long, chp_time ) __field( __u32, forced_to_close ) @@ -184,7 +196,8 @@ TRACE_EVENT(jbd2_checkpoint_stats, ), TP_fast_assign( - __entry->dev = dev; + __entry->dev_major = MAJOR(dev); + __entry->dev_minor = MINOR(dev); __entry->tid = tid; __entry->chp_time = stats->cs_chp_time; __entry->forced_to_close= stats->cs_forced_to_close; @@ -192,9 +205,9 @@ TRACE_EVENT(jbd2_checkpoint_stats, __entry->dropped = stats->cs_dropped; ), - TP_printk("dev %s tid %lu chp_time %u forced_to_close %u " + TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u " "written %u dropped %u", - jbd2_dev_to_name(__entry->dev), __entry->tid, + __entry->dev_major, __entry->dev_minor, __entry->tid, jiffies_to_msecs(__entry->chp_time), __entry->forced_to_close, __entry->written, __entry->dropped) ); @@ -207,7 +220,8 @@ TRACE_EVENT(jbd2_cleanup_journal_tail, TP_ARGS(journal, first_tid, block_nr, freed), TP_STRUCT__entry( - __field( dev_t, dev ) + __field( int, dev_major ) + __field( int, dev_minor ) __field( tid_t, tail_sequence ) __field( tid_t, first_tid ) __field(unsigned long, block_nr ) @@ -215,16 +229,18 @@ TRACE_EVENT(jbd2_cleanup_journal_tail, ), TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; + __entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev); + __entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev); __entry->tail_sequence = journal->j_tail_sequence; __entry->first_tid = first_tid; __entry->block_nr = block_nr; __entry->freed = freed; ), - TP_printk("dev %s from %u to %u offset %lu freed %lu", - jbd2_dev_to_name(__entry->dev), __entry->tail_sequence, - __entry->first_tid, __entry->block_nr, __entry->freed) + TP_printk("dev %d,%d from %u to %u offset %lu freed %lu", + __entry->dev_major, __entry->dev_minor, + __entry->tail_sequence, __entry->first_tid, + __entry->block_nr, __entry->freed) ); #endif /* _TRACE_JBD2_H */ -- cgit v1.2.3 From 4600d7c493f354a3e338a35bcf8a3bfbe815776a Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 28 Oct 2010 11:30:31 +0900 Subject: genirq: Fix up irq_node() for irq_data changes. Now that the node ID is tracked in the irq_data structure, update the irq_node() definition accordingly. This fixes up irq_node() usage under GENERIC_HARDIRQS_NO_DEPRECATED && SMP. Signed-off-by: Paul Mundt LKML-Reference: <20101028023031.GB10365@linux-sh.org> Signed-off-by: Thomas Gleixner --- include/linux/irqnr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 05aa8c23483f..3bc4dcab6e82 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -43,7 +43,7 @@ unsigned int irq_get_next_irq(unsigned int offset); else #ifdef CONFIG_SMP -#define irq_node(irq) (irq_to_desc(irq)->node) +#define irq_node(irq) (irq_get_irq_data(irq)->node) #else #define irq_node(irq) 0 #endif -- cgit v1.2.3 From 91bad2f8d3057482b9afb599f14421b007136960 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 1 Oct 2010 17:23:48 -0400 Subject: jump label: Fix deadlock b/w jump_label_mutex vs. text_mutex register_kprobe() downs the 'text_mutex' and then calls jump_label_text_reserved(), which downs the 'jump_label_mutex'. However, the jump label code takes those mutexes in the reverse order. Fix by requiring the caller of jump_label_text_reserved() to do the jump label locking via the newly added: jump_label_lock(), jump_label_unlock(). Currently, kprobes is the only user of jump_label_text_reserved(). Reported-by: Ingo Molnar Acked-by: Masami Hiramatsu Signed-off-by: Jason Baron LKML-Reference: <759032c48d5e30c27f0bba003d09bffa8e9f28bb.1285965957.git.jbaron@redhat.com> Signed-off-by: Steven Rostedt --- include/linux/jump_label.h | 5 +++++ kernel/jump_label.c | 33 +++++++++++++++++++++------------ kernel/kprobes.c | 6 ++++++ 3 files changed, 32 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index b67cb180e6e9..1947a1212678 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -18,6 +18,8 @@ struct module; extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; +extern void jump_label_lock(void); +extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_text_poke_early(jump_label_t addr); @@ -59,6 +61,9 @@ static inline int jump_label_text_reserved(void *start, void *end) return 0; } +static inline void jump_label_lock(void) {} +static inline void jump_label_unlock(void) {} + #endif #define COND_STMT(key, stmt) \ diff --git a/kernel/jump_label.c b/kernel/jump_label.c index be9e105345eb..12cce78e9568 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -39,6 +39,16 @@ struct jump_label_module_entry { struct module *mod; }; +void jump_label_lock(void) +{ + mutex_lock(&jump_label_mutex); +} + +void jump_label_unlock(void) +{ + mutex_unlock(&jump_label_mutex); +} + static int jump_label_cmp(const void *a, const void *b) { const struct jump_entry *jea = a; @@ -152,7 +162,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type) struct jump_label_module_entry *e_module; int count; - mutex_lock(&jump_label_mutex); + jump_label_lock(); entry = get_jump_label_entry((jump_label_t)key); if (entry) { count = entry->nr_entries; @@ -175,7 +185,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type) } } } - mutex_unlock(&jump_label_mutex); + jump_label_unlock(); } static int addr_conflict(struct jump_entry *entry, void *start, void *end) @@ -232,6 +242,7 @@ out: * overlaps with any of the jump label patch addresses. Code * that wants to modify kernel text should first verify that * it does not overlap with any of the jump label addresses. + * Caller must hold jump_label_mutex. * * returns 1 if there is an overlap, 0 otherwise */ @@ -242,7 +253,6 @@ int jump_label_text_reserved(void *start, void *end) struct jump_entry *iter_stop = __start___jump_table; int conflict = 0; - mutex_lock(&jump_label_mutex); iter = iter_start; while (iter < iter_stop) { if (addr_conflict(iter, start, end)) { @@ -257,7 +267,6 @@ int jump_label_text_reserved(void *start, void *end) conflict = module_conflict(start, end); #endif out: - mutex_unlock(&jump_label_mutex); return conflict; } @@ -268,7 +277,7 @@ static __init int init_jump_label(void) struct jump_entry *iter_stop = __stop___jump_table; struct jump_entry *iter; - mutex_lock(&jump_label_mutex); + jump_label_lock(); ret = build_jump_label_hashtable(__start___jump_table, __stop___jump_table); iter = iter_start; @@ -276,7 +285,7 @@ static __init int init_jump_label(void) arch_jump_label_text_poke_early(iter->code); iter++; } - mutex_unlock(&jump_label_mutex); + jump_label_unlock(); return ret; } early_initcall(init_jump_label); @@ -409,21 +418,21 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, switch (val) { case MODULE_STATE_COMING: - mutex_lock(&jump_label_mutex); + jump_label_lock(); ret = add_jump_label_module(mod); if (ret) remove_jump_label_module(mod); - mutex_unlock(&jump_label_mutex); + jump_label_unlock(); break; case MODULE_STATE_GOING: - mutex_lock(&jump_label_mutex); + jump_label_lock(); remove_jump_label_module(mod); - mutex_unlock(&jump_label_mutex); + jump_label_unlock(); break; case MODULE_STATE_LIVE: - mutex_lock(&jump_label_mutex); + jump_label_lock(); remove_jump_label_module_init(mod); - mutex_unlock(&jump_label_mutex); + jump_label_unlock(); break; } return ret; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 99865c33a60d..9437e14f36bd 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1146,13 +1146,16 @@ int __kprobes register_kprobe(struct kprobe *p) return ret; preempt_disable(); + jump_label_lock(); if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr) || ftrace_text_reserved(p->addr, p->addr) || jump_label_text_reserved(p->addr, p->addr)) { preempt_enable(); + jump_label_unlock(); return -EINVAL; } + jump_label_unlock(); /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ p->flags &= KPROBE_FLAG_DISABLED; @@ -1187,6 +1190,8 @@ int __kprobes register_kprobe(struct kprobe *p) INIT_LIST_HEAD(&p->list); mutex_lock(&kprobe_mutex); + jump_label_lock(); /* needed to call jump_label_text_reserved() */ + get_online_cpus(); /* For avoiding text_mutex deadlock. */ mutex_lock(&text_mutex); @@ -1214,6 +1219,7 @@ int __kprobes register_kprobe(struct kprobe *p) out: mutex_unlock(&text_mutex); put_online_cpus(); + jump_label_unlock(); mutex_unlock(&kprobe_mutex); if (probed_mod) -- cgit v1.2.3 From 4f7ebe807242898ee08ed732d56982874442c304 Mon Sep 17 00:00:00 2001 From: Arun R Bharadwaj Date: Wed, 28 Jul 2010 14:17:26 +0530 Subject: net/9p: This patch implements TLERROR/RLERROR on the 9P client. Signed-off-by: Arun R Bharadwaj Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- include/net/9p/9p.h | 4 ++++ net/9p/client.c | 43 +++++++++++++++++++++++++++---------------- 2 files changed, 31 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index a8de812ccbc8..a4a1b043a8c4 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -86,6 +86,8 @@ do { \ /** * enum p9_msg_t - 9P message types + * @P9_TLERROR: not used + * @P9_RLERROR: response for any failed request for 9P2000.L * @P9_TSTATFS: file system status request * @P9_RSTATFS: file system status response * @P9_TSYMLINK: make symlink request @@ -137,6 +139,8 @@ do { \ */ enum p9_msg_t { + P9_TLERROR = 6, + P9_RLERROR, P9_TSTATFS = 8, P9_RSTATFS, P9_TLOPEN = 12, diff --git a/net/9p/client.c b/net/9p/client.c index 83bf0541d66f..c155cc45eff9 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -450,32 +450,43 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) return err; } - if (type == P9_RERROR) { + if (type == P9_RERROR || type == P9_RLERROR) { int ecode; - char *ename; - err = p9pdu_readf(req->rc, c->proto_version, "s?d", - &ename, &ecode); - if (err) { - P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", - err); - return err; - } + if (!p9_is_proto_dotl(c)) { + char *ename; - if (p9_is_proto_dotu(c) || - p9_is_proto_dotl(c)) - err = -ecode; + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); + if (err) + goto out_err; - if (!err || !IS_ERR_VALUE(err)) - err = p9_errstr2errno(ename, strlen(ename)); + if (p9_is_proto_dotu(c)) + err = -ecode; - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); + + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); + + kfree(ename); + } + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } - kfree(ename); } else err = 0; return err; + +out_err: + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + + return err; } /** -- cgit v1.2.3 From 920e65dc6911da28a58e17f4b683302636fc6d8e Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Wed, 22 Sep 2010 17:19:19 -0700 Subject: [9p] Introduce client side TFSYNC/RFSYNC for dotl. SYNOPSIS size[4] Tfsync tag[2] fid[4] size[4] Rfsync tag[2] DESCRIPTION The Tfsync transaction transfers ("flushes") all modified in-core data of file identified by fid to the disk device (or other permanent storage device) where that file resides. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_file.c | 18 ++++++++++++++++-- include/net/9p/9p.h | 2 ++ include/net/9p/client.h | 1 + net/9p/client.c | 25 +++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 28db7fb1d96e..fdf303207c72 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -290,6 +290,20 @@ static int v9fs_file_fsync(struct file *filp, int datasync) return retval; } +static int v9fs_file_fsync_dotl(struct file *filp, int datasync) +{ + struct p9_fid *fid; + int retval; + + P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n", + filp, datasync); + + fid = filp->private_data; + + retval = p9_client_fsync(fid); + return retval; +} + static const struct file_operations v9fs_cached_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -311,7 +325,7 @@ static const struct file_operations v9fs_cached_file_operations_dotl = { .release = v9fs_dir_release, .lock = v9fs_file_lock, .mmap = generic_file_readonly_mmap, - .fsync = v9fs_file_fsync, + .fsync = v9fs_file_fsync_dotl, }; const struct file_operations v9fs_file_operations = { @@ -333,5 +347,5 @@ const struct file_operations v9fs_file_operations_dotl = { .release = v9fs_dir_release, .lock = v9fs_file_lock, .mmap = generic_file_readonly_mmap, - .fsync = v9fs_file_fsync, + .fsync = v9fs_file_fsync_dotl, }; diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index a4a1b043a8c4..55e96057f47f 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -163,6 +163,8 @@ enum p9_msg_t { P9_RXATTRCREATE, P9_TREADDIR = 40, P9_RREADDIR, + P9_TFSYNC = 50, + P9_RFSYNC, P9_TLINK = 70, P9_RLINK, P9_TMKDIR = 72, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 7f63d5ab7b44..8744e3ad4a07 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -229,6 +229,7 @@ int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, gid_t gid, int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, gid_t gid, struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); +int p9_client_fsync(struct p9_fid *fid); int p9_client_remove(struct p9_fid *fid); int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count); diff --git a/net/9p/client.c b/net/9p/client.c index e50ec802937a..30c4a1b224fb 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1162,6 +1162,31 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) } EXPORT_SYMBOL(p9_client_link); +int p9_client_fsync(struct p9_fid *fid) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d\n", fid->fid); + err = 0; + clnt = fid->clnt; + + req = p9_client_rpc(clnt, P9_TFSYNC, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid); + + p9_free_req(clnt, req); + +error: + return err; +} +EXPORT_SYMBOL(p9_client_fsync); + int p9_client_clunk(struct p9_fid *fid) { int err; -- cgit v1.2.3 From a099027c779068b834f335cfdc3f2bf10f531dd9 Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Mon, 27 Sep 2010 11:34:24 +0530 Subject: 9p: Implement TLOCK Synopsis size[4] TLock tag[2] fid[4] flock[n] size[4] RLock tag[2] status[1] Description Tlock is used to acquire/release byte range posix locks on a file identified by given fid. The reply contains status of the lock request flock structure: type[1] - Type of lock: F_RDLCK, F_WRLCK, F_UNLCK flags[4] - Flags could be either of P9_LOCK_FLAGS_BLOCK - Blocked lock request, if there is a conflicting lock exists, wait for that lock to be released. P9_LOCK_FLAGS_RECLAIM - Reclaim lock request, used when client is trying to reclaim a lock after a server restrart (due to crash) start[8] - Starting offset for lock length[8] - Number of bytes to lock If length is 0, lock all bytes starting at the location 'start' through to the end of file pid[4] - PID of the process that wants to take lock client_id[4] - Unique client id status[1] - Status of the lock request, can be P9_LOCK_SUCCESS(0), P9_LOCK_BLOCKED(1), P9_LOCK_ERROR(2) or P9_LOCK_GRACE(3) P9_LOCK_SUCCESS - Request was successful P9_LOCK_BLOCKED - A conflicting lock is held by another process P9_LOCK_ERROR - Error while processing the lock request P9_LOCK_GRACE - Server is in grace period, it can't accept new lock requests in this period (except locks with P9_LOCK_FLAGS_RECLAIM flag set) Signed-off-by: M. Mohan Kumar Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs_vfs.h | 2 + fs/9p/vfs_file.c | 160 +++++++++++++++++++++++++++++++++++++++++++++++- include/net/9p/9p.h | 28 +++++++++ include/net/9p/client.h | 1 + net/9p/client.c | 33 ++++++++++ 5 files changed, 222 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 09861295eef9..d26db1932f7a 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -65,3 +65,5 @@ int v9fs_uflags2omode(int uflags, int extended); ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); void v9fs_blank_wstat(struct p9_wstat *wstat); int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); + +#define P9_LOCK_TIMEOUT (30*HZ) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index fdf303207c72..6f77abd23184 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -133,6 +134,159 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) return res; } +static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + struct p9_flock flock; + struct p9_fid *fid; + uint8_t status; + int res = 0; + unsigned char fl_type; + + fid = filp->private_data; + BUG_ON(fid == NULL); + + if ((fl->fl_flags & FL_POSIX) != FL_POSIX) + BUG(); + + res = posix_lock_file_wait(filp, fl); + if (res < 0) + goto out; + + /* convert posix lock to p9 tlock args */ + memset(&flock, 0, sizeof(flock)); + flock.type = fl->fl_type; + flock.start = fl->fl_start; + if (fl->fl_end == OFFSET_MAX) + flock.length = 0; + else + flock.length = fl->fl_end - fl->fl_start + 1; + flock.proc_id = fl->fl_pid; + flock.client_id = utsname()->nodename; + if (IS_SETLKW(cmd)) + flock.flags = P9_LOCK_FLAGS_BLOCK; + + /* + * if its a blocked request and we get P9_LOCK_BLOCKED as the status + * for lock request, keep on trying + */ + for (;;) { + res = p9_client_lock_dotl(fid, &flock, &status); + if (res < 0) + break; + + if (status != P9_LOCK_BLOCKED) + break; + if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd)) + break; + schedule_timeout_interruptible(P9_LOCK_TIMEOUT); + } + + /* map 9p status to VFS status */ + switch (status) { + case P9_LOCK_SUCCESS: + res = 0; + break; + case P9_LOCK_BLOCKED: + res = -EAGAIN; + break; + case P9_LOCK_ERROR: + case P9_LOCK_GRACE: + res = -ENOLCK; + break; + default: + BUG(); + } + + /* + * incase server returned error for lock request, revert + * it locally + */ + if (res < 0 && fl->fl_type != F_UNLCK) { + fl_type = fl->fl_type; + fl->fl_type = F_UNLCK; + res = posix_lock_file_wait(filp, fl); + fl->fl_type = fl_type; + } +out: + return res; +} + +/** + * v9fs_file_lock_dotl - lock a file (or directory) + * @filp: file to be locked + * @cmd: lock command + * @fl: file lock structure + * + */ + +static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + int ret = -ENOLCK; + + P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp, + cmd, fl, filp->f_path.dentry->d_name.name); + + /* No mandatory locks */ + if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) + goto out_err; + + if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { + filemap_write_and_wait(inode->i_mapping); + invalidate_mapping_pages(&inode->i_data, 0, -1); + } + + if (IS_SETLK(cmd) || IS_SETLKW(cmd)) + ret = v9fs_file_do_lock(filp, cmd, fl); + else + ret = -EINVAL; +out_err: + return ret; +} + +/** + * v9fs_file_flock_dotl - lock a file + * @filp: file to be locked + * @cmd: lock command + * @fl: file lock structure + * + */ + +static int v9fs_file_flock_dotl(struct file *filp, int cmd, + struct file_lock *fl) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + int ret = -ENOLCK; + + P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp, + cmd, fl, filp->f_path.dentry->d_name.name); + + /* No mandatory locks */ + if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) + goto out_err; + + if (!(fl->fl_flags & FL_FLOCK)) + goto out_err; + + if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { + filemap_write_and_wait(inode->i_mapping); + invalidate_mapping_pages(&inode->i_data, 0, -1); + } + /* Convert flock to posix lock */ + fl->fl_owner = (fl_owner_t)filp; + fl->fl_start = 0; + fl->fl_end = OFFSET_MAX; + fl->fl_flags |= FL_POSIX; + fl->fl_flags ^= FL_FLOCK; + + if (IS_SETLK(cmd) | IS_SETLKW(cmd)) + ret = v9fs_file_do_lock(filp, cmd, fl); + else + ret = -EINVAL; +out_err: + return ret; +} + /** * v9fs_file_readn - read from a file * @filp: file pointer to read @@ -323,7 +477,8 @@ static const struct file_operations v9fs_cached_file_operations_dotl = { .write = v9fs_file_write, .open = v9fs_file_open, .release = v9fs_dir_release, - .lock = v9fs_file_lock, + .lock = v9fs_file_lock_dotl, + .flock = v9fs_file_flock_dotl, .mmap = generic_file_readonly_mmap, .fsync = v9fs_file_fsync_dotl, }; @@ -345,7 +500,8 @@ const struct file_operations v9fs_file_operations_dotl = { .write = v9fs_file_write, .open = v9fs_file_open, .release = v9fs_dir_release, - .lock = v9fs_file_lock, + .lock = v9fs_file_lock_dotl, + .flock = v9fs_file_flock_dotl, .mmap = generic_file_readonly_mmap, .fsync = v9fs_file_fsync_dotl, }; diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 55e96057f47f..1859a2560cc5 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -165,6 +165,8 @@ enum p9_msg_t { P9_RREADDIR, P9_TFSYNC = 50, P9_RFSYNC, + P9_TLOCK = 52, + P9_RLOCK, P9_TLINK = 70, P9_RLINK, P9_TMKDIR = 72, @@ -464,6 +466,32 @@ struct p9_iattr_dotl { u64 mtime_nsec; }; +#define P9_LOCK_SUCCESS 0 +#define P9_LOCK_BLOCKED 1 +#define P9_LOCK_ERROR 2 +#define P9_LOCK_GRACE 3 + +#define P9_LOCK_FLAGS_BLOCK 1 +#define P9_LOCK_FLAGS_RECLAIM 2 + +/* struct p9_flock: POSIX lock structure + * @type - type of lock + * @flags - lock flags + * @start - starting offset of the lock + * @length - number of bytes + * @proc_id - process id which wants to take lock + * @client_id - client id + */ + +struct p9_flock { + u8 type; + u32 flags; + u64 start; + u64 length; + u32 proc_id; + char *client_id; +}; + /* Structures for Protocol Operations */ struct p9_tstatfs { u32 fid; diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 8744e3ad4a07..d7dcb142e3bb 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -249,6 +249,7 @@ int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode, dev_t rdev, gid_t gid, struct p9_qid *); int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, gid_t gid, struct p9_qid *); +int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); void p9_client_cb(struct p9_client *c, struct p9_req_t *req); diff --git a/net/9p/client.c b/net/9p/client.c index 30c4a1b224fb..fbd2b195801c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1803,3 +1803,36 @@ error: } EXPORT_SYMBOL(p9_client_mkdir_dotl); + +int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d " + "start %lld length %lld proc_id %d client_id %s\n", + fid->fid, flock->type, flock->flags, flock->start, + flock->length, flock->proc_id, flock->client_id); + + req = p9_client_rpc(clnt, P9_TLOCK, "dbdqqds", fid->fid, flock->type, + flock->flags, flock->start, flock->length, + flock->proc_id, flock->client_id); + + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "b", status); + if (err) { + p9pdu_dump(1, req->rc); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); +error: + p9_free_req(clnt, req); + return err; + +} +EXPORT_SYMBOL(p9_client_lock_dotl); -- cgit v1.2.3 From 1d769cd192fc8c4097b1e2cd41fdee6ba3d1b2af Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Mon, 27 Sep 2010 12:22:13 +0530 Subject: 9p: Implement TGETLOCK Synopsis size[4] TGetlock tag[2] fid[4] getlock[n] size[4] RGetlock tag[2] getlock[n] Description TGetlock is used to test for the existence of byte range posix locks on a file identified by given fid. The reply contains getlock structure. If the lock could be placed it returns F_UNLCK in type field of getlock structure. Otherwise it returns the details of the conflicting locks in the getlock structure getlock structure: type[1] - Type of lock: F_RDLCK, F_WRLCK start[8] - Starting offset for lock length[8] - Number of bytes to check for the lock If length is 0, check for lock in all bytes starting at the location 'start' through to the end of file pid[4] - PID of the process that wants to take lock/owns the task in case of reply client[4] - Client id of the system that owns the process which has the conflicting lock Signed-off-by: M. Mohan Kumar Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_file.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ include/net/9p/9p.h | 18 ++++++++++++++++++ include/net/9p/client.h | 1 + net/9p/client.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+) (limited to 'include') diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 6f77abd23184..3a4352f23a98 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -211,6 +211,51 @@ out: return res; } +static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) +{ + struct p9_getlock glock; + struct p9_fid *fid; + int res = 0; + + fid = filp->private_data; + BUG_ON(fid == NULL); + + posix_test_lock(filp, fl); + /* + * if we have a conflicting lock locally, no need to validate + * with server + */ + if (fl->fl_type != F_UNLCK) + return res; + + /* convert posix lock to p9 tgetlock args */ + memset(&glock, 0, sizeof(glock)); + glock.type = fl->fl_type; + glock.start = fl->fl_start; + if (fl->fl_end == OFFSET_MAX) + glock.length = 0; + else + glock.length = fl->fl_end - fl->fl_start + 1; + glock.proc_id = fl->fl_pid; + glock.client_id = utsname()->nodename; + + res = p9_client_getlock_dotl(fid, &glock); + if (res < 0) + return res; + if (glock.type != F_UNLCK) { + fl->fl_type = glock.type; + fl->fl_start = glock.start; + if (glock.length == 0) + fl->fl_end = OFFSET_MAX; + else + fl->fl_end = glock.start + glock.length - 1; + fl->fl_pid = glock.proc_id; + } else + fl->fl_type = F_UNLCK; + + return res; +} + /** * v9fs_file_lock_dotl - lock a file (or directory) * @filp: file to be locked @@ -238,6 +283,8 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl) if (IS_SETLK(cmd) || IS_SETLKW(cmd)) ret = v9fs_file_do_lock(filp, cmd, fl); + else if (IS_GETLK(cmd)) + ret = v9fs_file_getlock(filp, fl); else ret = -EINVAL; out_err: diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 1859a2560cc5..6367a71d84fc 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -167,6 +167,8 @@ enum p9_msg_t { P9_RFSYNC, P9_TLOCK = 52, P9_RLOCK, + P9_TGETLOCK = 54, + P9_RGETLOCK, P9_TLINK = 70, P9_RLINK, P9_TMKDIR = 72, @@ -492,6 +494,22 @@ struct p9_flock { char *client_id; }; +/* struct p9_getlock: getlock structure + * @type - type of lock + * @start - starting offset of the lock + * @length - number of bytes + * @proc_id - process id which wants to take lock + * @client_id - client id + */ + +struct p9_getlock { + u8 type; + u64 start; + u64 length; + u32 proc_id; + char *client_id; +}; + /* Structures for Protocol Operations */ struct p9_tstatfs { u32 fid; diff --git a/include/net/9p/client.h b/include/net/9p/client.h index d7dcb142e3bb..127c9f2a9cb8 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -250,6 +250,7 @@ int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode, int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, gid_t gid, struct p9_qid *); int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); +int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); void p9_client_cb(struct p9_client *c, struct p9_req_t *req); diff --git a/net/9p/client.c b/net/9p/client.c index fbd2b195801c..fc1b0579016a 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1836,3 +1836,37 @@ error: } EXPORT_SYMBOL(p9_client_lock_dotl); + +int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld " + "length %lld proc_id %d client_id %s\n", fid->fid, glock->type, + glock->start, glock->length, glock->proc_id, glock->client_id); + + req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid, glock->type, + glock->start, glock->length, glock->proc_id, glock->client_id); + + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type, + &glock->start, &glock->length, &glock->proc_id, + &glock->client_id); + if (err) { + p9pdu_dump(1, req->rc); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " + "proc_id %d client_id %s\n", glock->type, glock->start, + glock->length, glock->proc_id, glock->client_id); +error: + p9_free_req(clnt, req); + return err; +} +EXPORT_SYMBOL(p9_client_getlock_dotl); -- cgit v1.2.3 From 368c09d2a303c39e9f37193b23e945e6754cf0a7 Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Mon, 27 Sep 2010 14:17:24 +0530 Subject: 9p: Use V9FS_MAGIC in statfs Use V9FS_MAGIC as the file system type while filling kernel statfs strucutre instead of using host file system magic number. Also move the definition of V9FS_MAGIC from v9fs.h to standard magic.h file. Signed-off-by: M. Mohan Kumar Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs.h | 2 -- fs/9p/vfs_super.c | 3 ++- include/linux/magic.h | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 8bb7792afe2e..cb6396855e2d 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -117,8 +117,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses); void v9fs_session_cancel(struct v9fs_session_info *v9ses); void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); -#define V9FS_MAGIC 0x01021997 - /* other default globals */ #define V9FS_PORT 564 #define V9FS_DEFUSER "nobody" diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 174643f4f901..48d4215c60a8 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -256,7 +257,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf) if (v9fs_proto_dotl(v9ses)) { res = p9_client_statfs(fid, &rs); if (res == 0) { - buf->f_type = rs.type; + buf->f_type = V9FS_MAGIC; buf->f_bsize = rs.bsize; buf->f_blocks = rs.blocks; buf->f_bfree = rs.bfree; diff --git a/include/linux/magic.h b/include/linux/magic.h index eb9800f05782..ff690d05f129 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -57,5 +57,6 @@ #define DEVPTS_SUPER_MAGIC 0x1cd1 #define SOCKFS_MAGIC 0x534F434B +#define V9FS_MAGIC 0x01021997 #endif /* __LINUX_MAGIC_H__ */ -- cgit v1.2.3 From 329176cc2c50e63c580ddaabb385876db5af1360 Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Tue, 28 Sep 2010 19:59:25 +0530 Subject: 9p: Implement TREADLINK operation for 9p2000.L Synopsis size[4] TReadlink tag[2] fid[4] size[4] RReadlink tag[2] target[s] Description Readlink is used to return the contents of the symoblic link referred by fid. Contents of symboic link is returned as a response. target[s] - Contents of the symbolic link referred by fid. Signed-off-by: M. Mohan Kumar Reviewed-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++--- include/net/9p/9p.h | 2 ++ include/net/9p/client.h | 1 + net/9p/client.c | 26 +++++++++++++++++++++ 4 files changed, 86 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 68f02973c338..88419073c654 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1527,7 +1527,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) if (IS_ERR(fid)) return PTR_ERR(fid); - if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) + if (!v9fs_proto_dotu(v9ses)) return -EBADF; st = p9_client_stat(fid); @@ -1995,6 +1995,60 @@ error: return err; } +static int +v9fs_vfs_readlink_dotl(struct dentry *dentry, char *buffer, int buflen) +{ + int retval; + struct p9_fid *fid; + char *target = NULL; + + P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); + retval = -EPERM; + fid = v9fs_fid_lookup(dentry); + if (IS_ERR(fid)) + return PTR_ERR(fid); + + retval = p9_client_readlink(fid, &target); + if (retval < 0) + return retval; + + strncpy(buffer, target, buflen); + P9_DPRINTK(P9_DEBUG_VFS, "%s -> %s\n", dentry->d_name.name, buffer); + + retval = strnlen(buffer, buflen); + return retval; +} + +/** + * v9fs_vfs_follow_link_dotl - follow a symlink path + * @dentry: dentry for symlink + * @nd: nameidata + * + */ + +static void * +v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd) +{ + int len = 0; + char *link = __getname(); + + P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name); + + if (!link) + link = ERR_PTR(-ENOMEM); + else { + len = v9fs_vfs_readlink_dotl(dentry, link, PATH_MAX); + if (len < 0) { + __putname(link); + link = ERR_PTR(len); + } else + link[min(len, PATH_MAX-1)] = 0; + } + nd_set_link(nd, link); + + return NULL; +} + static const struct inode_operations v9fs_dir_inode_operations_dotu = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, @@ -2064,8 +2118,8 @@ static const struct inode_operations v9fs_symlink_inode_operations = { }; static const struct inode_operations v9fs_symlink_inode_operations_dotl = { - .readlink = generic_readlink, - .follow_link = v9fs_vfs_follow_link, + .readlink = v9fs_vfs_readlink_dotl, + .follow_link = v9fs_vfs_follow_link_dotl, .put_link = v9fs_vfs_put_link, .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 6367a71d84fc..071fd7a8d781 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -153,6 +153,8 @@ enum p9_msg_t { P9_RMKNOD, P9_TRENAME = 20, P9_RRENAME, + P9_TREADLINK = 22, + P9_RREADLINK, P9_TGETATTR = 24, P9_RGETATTR, P9_TSETATTR = 26, diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 127c9f2a9cb8..335b088e7672 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -262,5 +262,6 @@ int p9_is_proto_dotu(struct p9_client *clnt); int p9_is_proto_dotl(struct p9_client *clnt); struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *); int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int); +int p9_client_readlink(struct p9_fid *fid, char **target); #endif /* NET_9P_CLIENT_H */ diff --git a/net/9p/client.c b/net/9p/client.c index fc1b0579016a..2bc99e9031e7 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1870,3 +1870,29 @@ error: return err; } EXPORT_SYMBOL(p9_client_getlock_dotl); + +int p9_client_readlink(struct p9_fid *fid, char **target) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid); + + req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "s", target); + if (err) { + p9pdu_dump(1, req->rc); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); +error: + p9_free_req(clnt, req); + return err; +} +EXPORT_SYMBOL(p9_client_readlink); -- cgit v1.2.3 From b165d60145b717261a0234f989c442c2b68b6ec0 Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Fri, 22 Oct 2010 10:13:12 -0700 Subject: 9p: Add datasync to client side TFSYNC/RFSYNC for dotl SYNOPSIS size[4] Tfsync tag[2] fid[4] datasync[4] size[4] Rfsync tag[2] DESCRIPTION The Tfsync transaction transfers ("flushes") all modified in-core data of file identified by fid to the disk device (or other permanent storage device) where that file resides. If datasync flag is specified data will be fleshed but does not flush modified metadata unless that metadata is needed in order to allow a subsequent data retrieval to be correctly handled. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs_vfs.h | 1 + fs/9p/vfs_dir.c | 1 + fs/9p/vfs_file.c | 4 ++-- include/net/9p/client.h | 2 +- net/9p/client.c | 7 ++++--- 5 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index d26db1932f7a..bab0eac873f4 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -65,5 +65,6 @@ int v9fs_uflags2omode(int uflags, int extended); ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); void v9fs_blank_wstat(struct p9_wstat *wstat); int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); +int v9fs_file_fsync_dotl(struct file *filp, int datasync); #define P9_LOCK_TIMEOUT (30*HZ) diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 170f5bb8ebe0..b84ebe8cefed 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -315,4 +315,5 @@ const struct file_operations v9fs_dir_operations_dotl = { .readdir = v9fs_dir_readdir_dotl, .open = v9fs_file_open, .release = v9fs_dir_release, + .fsync = v9fs_file_fsync_dotl, }; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 3a4352f23a98..240c30674396 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -491,7 +491,7 @@ static int v9fs_file_fsync(struct file *filp, int datasync) return retval; } -static int v9fs_file_fsync_dotl(struct file *filp, int datasync) +int v9fs_file_fsync_dotl(struct file *filp, int datasync) { struct p9_fid *fid; int retval; @@ -501,7 +501,7 @@ static int v9fs_file_fsync_dotl(struct file *filp, int datasync) fid = filp->private_data; - retval = p9_client_fsync(fid); + retval = p9_client_fsync(fid, datasync); return retval; } diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 335b088e7672..83ba6a4d58a3 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -229,7 +229,7 @@ int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, gid_t gid, int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, gid_t gid, struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); -int p9_client_fsync(struct p9_fid *fid); +int p9_client_fsync(struct p9_fid *fid, int datasync); int p9_client_remove(struct p9_fid *fid); int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count); diff --git a/net/9p/client.c b/net/9p/client.c index e3cfdff37327..8df80fb86f23 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1165,17 +1165,18 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) } EXPORT_SYMBOL(p9_client_link); -int p9_client_fsync(struct p9_fid *fid) +int p9_client_fsync(struct p9_fid *fid, int datasync) { int err; struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n", + fid->fid, datasync); err = 0; clnt = fid->clnt; - req = p9_client_rpc(clnt, P9_TFSYNC, "d", fid->fid); + req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -- cgit v1.2.3 From b31d42a5af1818bdf31a5f023abe4d8b212542f2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 Oct 2010 16:39:24 +0200 Subject: Fix compile brekage with !CONFIG_BLOCK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Today's git tree fails to build on !CONFIG_BLOCK, due to upstream commit 367a51a33902 ("fs: Add FITRIM ioctl"): include/linux/fs.h:36: error: expected specifier-qualifier-list before ‘uint64_t’ include/linux/fs.h:36: error: expected specifier-qualifier-list before ‘uint64_t’ include/linux/fs.h:36: error: expected specifier-qualifier-list before ‘uint64_t’ The commit adds uint64_t type usage to fs.h, but linux/types.h is not included explicitly - it's only included implicitly via linux/blk_types.h, and there only if CONFIG_BLOCK is enabled. Add the explicit #include to fix this. Signed-off-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6ed7ace74b7c..1c73b50e81ff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -9,6 +9,7 @@ #include #include #include +#include /* * It's silly to have NR_OPEN bigger than NR_FILE, but you can change -- cgit v1.2.3 From e732ff707743e5ceba6ae2bfc7e799a0bac30ffa Mon Sep 17 00:00:00 2001 From: "Figo.zhang" Date: Tue, 26 Oct 2010 21:01:47 +0800 Subject: mmu_notifier.h: fix comment spelling Signed-off-by: Figo.zhang Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 4e02ee2b071e..43dcfbdc39de 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -227,7 +227,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) /* * These two macros will sometime replace ptep_clear_flush. - * ptep_clear_flush is impleemnted as macro itself, so this also is + * ptep_clear_flush is implemented as macro itself, so this also is * implemented as a macro until ptep_clear_flush will converted to an * inline function, to diminish the risk of compilation failure. The * invalidate_page method over time can be moved outside the PT lock -- cgit v1.2.3 From dc841e30eaea9f9f83c9ab1ee0b3ef9e5c95ce8a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 27 Oct 2010 19:16:26 +0000 Subject: dccp: Extend CCID packet dequeueing interface This extends the packet dequeuing interface of dccp_write_xmit() to allow 1. CCIDs to take care of timing when the next packet may be sent; 2. delayed sending (as before, with an inter-packet gap up to 65.535 seconds). The main purpose is to take CCID-2 out of its polling mode (when it is network- limited, it tries every millisecond to send, without interruption). The mode of operation for (2) is as follows: * new packet is enqueued via dccp_sendmsg() => dccp_write_xmit(), * ccid_hc_tx_send_packet() detects that it may not send (e.g. window full), * it signals this condition via `CCID_PACKET_WILL_DEQUEUE_LATER', * dccp_write_xmit() returns without further action; * after some time the wait-condition for CCID becomes true, * that CCID schedules the tasklet, * tasklet function calls ccid_hc_tx_send_packet() via dccp_write_xmit(), * since the wait-condition is now true, ccid_hc_tx_packet() returns "send now", * packet is sent, and possibly more (since dccp_write_xmit() loops). Code reuse: the taskled function calls dccp_write_xmit(), the timer function reduces to a wrapper around the same code. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/linux/dccp.h | 4 +- net/dccp/output.c | 118 +++++++++++++++++++++++++++++++-------------------- net/dccp/timer.c | 25 ++++++----- 3 files changed, 89 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 7187bd8a75f6..749f01ccd26e 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -462,7 +462,8 @@ struct dccp_ackvec; * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) - * @dccps_xmit_timer - timer for when CCID is not ready to send + * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets + * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ struct dccp_sock { @@ -502,6 +503,7 @@ struct dccp_sock { __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; + struct tasklet_struct dccps_xmitlet; struct timer_list dccps_xmit_timer; }; diff --git a/net/dccp/output.c b/net/dccp/output.c index a988fe9ffcba..11418a9a389d 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -254,63 +254,89 @@ do_interrupted: goto out; } +/** + * dccp_xmit_packet - Send data packet under control of CCID + * Transmits next-queued payload and informs CCID to account for the packet. + */ +static void dccp_xmit_packet(struct sock *sk) +{ + int err, len; + struct dccp_sock *dp = dccp_sk(sk); + struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); + + if (unlikely(skb == NULL)) + return; + len = skb->len; + + if (sk->sk_state == DCCP_PARTOPEN) { + const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; + /* + * See 8.1.5 - Handshake Completion. + * + * For robustness we resend Confirm options until the client has + * entered OPEN. During the initial feature negotiation, the MPS + * is smaller than usual, reduced by the Change/Confirm options. + */ + if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { + DCCP_WARN("Payload too large (%d) for featneg.\n", len); + dccp_send_ack(sk); + dccp_feat_list_purge(&dp->dccps_featneg); + } + + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + inet_csk(sk)->icsk_rto, + DCCP_RTO_MAX); + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; + } else if (dccp_ack_pending(sk)) { + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; + } else { + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA; + } + + err = dccp_transmit_skb(sk, skb); + if (err) + dccp_pr_debug("transmit_skb() returned err=%d\n", err); + /* + * Register this one as sent even if an error occurred. To the remote + * end a local packet drop is indistinguishable from network loss, i.e. + * any local drop will eventually be reported via receiver feedback. + */ + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); +} + void dccp_write_xmit(struct sock *sk, int block) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; while ((skb = skb_peek(&sk->sk_write_queue))) { - int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); + int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); - if (err > 0) { + switch (ccid_packet_dequeue_eval(rc)) { + case CCID_PACKET_WILL_DEQUEUE_LATER: + return; + case CCID_PACKET_DELAY: if (!block) { sk_reset_timer(sk, &dp->dccps_xmit_timer, - msecs_to_jiffies(err)+jiffies); + msecs_to_jiffies(rc)+jiffies); + return; + } + rc = dccp_wait_for_ccid(sk, skb, rc); + if (rc && rc != -EINTR) { + DCCP_BUG("err=%d after dccp_wait_for_ccid", rc); + skb_dequeue(&sk->sk_write_queue); + kfree_skb(skb); break; - } else - err = dccp_wait_for_ccid(sk, skb, err); - if (err && err != -EINTR) - DCCP_BUG("err=%d after dccp_wait_for_ccid", err); - } - - skb_dequeue(&sk->sk_write_queue); - if (err == 0) { - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - const int len = skb->len; - - if (sk->sk_state == DCCP_PARTOPEN) { - const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; - /* - * See 8.1.5 - Handshake Completion. - * - * For robustness we resend Confirm options until the client has - * entered OPEN. During the initial feature negotiation, the MPS - * is smaller than usual, reduced by the Change/Confirm options. - */ - if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { - DCCP_WARN("Payload too large (%d) for featneg.\n", len); - dccp_send_ack(sk); - dccp_feat_list_purge(&dp->dccps_featneg); - } - - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - inet_csk(sk)->icsk_rto, - DCCP_RTO_MAX); - dcb->dccpd_type = DCCP_PKT_DATAACK; - } else if (dccp_ack_pending(sk)) - dcb->dccpd_type = DCCP_PKT_DATAACK; - else - dcb->dccpd_type = DCCP_PKT_DATA; - - err = dccp_transmit_skb(sk, skb); - ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); - if (err) - DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", - err); - } else { - dccp_pr_debug("packet discarded due to err=%d\n", err); + } + /* fall through */ + case CCID_PACKET_SEND_AT_ONCE: + dccp_xmit_packet(sk); + break; + case CCID_PACKET_ERR: + skb_dequeue(&sk->sk_write_queue); kfree_skb(skb); + dccp_pr_debug("packet discarded due to err=%d\n", rc); } } } diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 1a9aa05d4dc4..916f9d1dab36 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -237,32 +237,35 @@ out: sock_put(sk); } -/* Transmit-delay timer: used by the CCIDs to delay actual send time */ -static void dccp_write_xmit_timer(unsigned long data) +/** + * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface + * See the comments above %ccid_dequeueing_decision for supported modes. + */ +static void dccp_write_xmitlet(unsigned long data) { struct sock *sk = (struct sock *)data; - struct dccp_sock *dp = dccp_sk(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) - sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); + sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1); else dccp_write_xmit(sk, 0); bh_unlock_sock(sk); - sock_put(sk); } -static void dccp_init_write_xmit_timer(struct sock *sk) +static void dccp_write_xmit_timer(unsigned long data) { - struct dccp_sock *dp = dccp_sk(sk); - - setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, - (unsigned long)sk); + dccp_write_xmitlet(data); + sock_put((struct sock *)data); } void dccp_init_xmit_timers(struct sock *sk) { - dccp_init_write_xmit_timer(sk); + struct dccp_sock *dp = dccp_sk(sk); + + tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk); + setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, + (unsigned long)sk); inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, &dccp_keepalive_timer); } -- cgit v1.2.3 From 4aa2c466a7733af093a526e9d1cdd0b3b90d47e9 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 28 Oct 2010 02:00:43 +0000 Subject: fib: Fix fib zone and its hash leak on namespace stop When we stop a namespace we flush the table and free one, but the added fn_zone-s (and their hashes if grown) are leaked. Need to free. Tries releases all its stuff in the flushing code. Shame on us - this bug exists since the very first make-fib-per-net patches in 2.6.27 :( Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 ++ net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_hash.c | 18 ++++++++++++++++++ net/ipv4/fib_trie.c | 5 +++++ 4 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ba3666d31766..07bdb5e9e8ac 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -158,6 +158,8 @@ extern int fib_table_flush(struct fib_table *table); extern void fib_table_select_default(struct fib_table *table, const struct flowi *flp, struct fib_result *res); +extern void fib_free_table(struct fib_table *tb); + #ifndef CONFIG_IP_MULTIPLE_TABLES diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 36e27c2107de..eb6f69a8f27a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1052,7 +1052,7 @@ static void ip_fib_net_exit(struct net *net) hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { hlist_del(node); fib_table_flush(tb); - kfree(tb); + fib_free_table(tb); } } kfree(net->ipv4.fib_table_hash); diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index b232375a0b75..b3acb0417b21 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -716,6 +716,24 @@ int fib_table_flush(struct fib_table *tb) return found; } +void fib_free_table(struct fib_table *tb) +{ + struct fn_hash *table = (struct fn_hash *) tb->tb_data; + struct fn_zone *fz, *next; + + next = table->fn_zone_list; + while (next != NULL) { + fz = next; + next = fz->fz_next; + + if (fz->fz_hash != fz->fz_embedded_hash) + fz_hash_free(fz->fz_hash, fz->fz_divisor); + + kfree(fz); + } + + kfree(tb); +} static inline int fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b14450895102..200eb538fbb3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1797,6 +1797,11 @@ int fib_table_flush(struct fib_table *tb) return found; } +void fib_free_table(struct fib_table *tb) +{ + kfree(tb); +} + void fib_table_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) -- cgit v1.2.3 From 8acfe468b0384e834a303f08ebc4953d72fb690a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Oct 2010 11:41:55 -0700 Subject: net: Limit socket I/O iovec total length to INT_MAX. This helps protect us from overflow issues down in the individual protocol sendmsg/recvmsg handlers. Once we hit INT_MAX we truncate out the rest of the iovec by setting the iov_len members to zero. This works because: 1) For SOCK_STREAM and SOCK_SEQPACKET sockets, partial writes are allowed and the application will just continue with another write to send the rest of the data. 2) For datagram oriented sockets, where there must be a one-to-one correspondance between write() calls and packets on the wire, INT_MAX is going to be far larger than the packet size limit the protocol is going to check for and signal with -EMSGSIZE. Based upon a patch by Linus Torvalds. Signed-off-by: David S. Miller --- include/linux/socket.h | 2 +- net/compat.c | 10 ++++++---- net/core/iovec.c | 20 +++++++++----------- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index 5146b50202ce..86b652fabf6e 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -322,7 +322,7 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata, int offset, unsigned int len, __wsum *csump); -extern long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode); +extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode); extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len); extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, int offset, int len); diff --git a/net/compat.c b/net/compat.c index 63d260e81472..3649d5895361 100644 --- a/net/compat.c +++ b/net/compat.c @@ -41,10 +41,12 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov, compat_size_t len; if (get_user(len, &uiov32->iov_len) || - get_user(buf, &uiov32->iov_base)) { - tot_len = -EFAULT; - break; - } + get_user(buf, &uiov32->iov_base)) + return -EFAULT; + + if (len > INT_MAX - tot_len) + len = INT_MAX - tot_len; + tot_len += len; kiov->iov_base = compat_ptr(buf); kiov->iov_len = (__kernel_size_t) len; diff --git a/net/core/iovec.c b/net/core/iovec.c index 72aceb1fe4fa..c40f27e7d208 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -35,10 +35,9 @@ * in any case. */ -long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) +int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) { - int size, ct; - long err; + int size, ct, err; if (m->msg_namelen) { if (mode == VERIFY_READ) { @@ -62,14 +61,13 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, err = 0; for (ct = 0; ct < m->msg_iovlen; ct++) { - err += iov[ct].iov_len; - /* - * Goal is not to verify user data, but to prevent returning - * negative value, which is interpreted as errno. - * Overflow is still possible, but it is harmless. - */ - if (err < 0) - return -EMSGSIZE; + size_t len = iov[ct].iov_len; + + if (len > INT_MAX - err) { + len = INT_MAX - err; + iov[ct].iov_len = len; + } + err += len; } return err; -- cgit v1.2.3 From 9343919c1495b085a4a1cf4cbada8d7888daf099 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fanotify: allow fanotify to be built We disabled the ability to build fanotify in commit 7c5347733dcc4ba0ba. This reverts that commit and allows people to build fanotify. Signed-off-by: Eric Paris --- fs/notify/Kconfig | 2 +- include/linux/Kbuild | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index b388443c3a09..22c629eedd82 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig @@ -3,4 +3,4 @@ config FSNOTIFY source "fs/notify/dnotify/Kconfig" source "fs/notify/inotify/Kconfig" -#source "fs/notify/fanotify/Kconfig" +source "fs/notify/fanotify/Kconfig" diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 90e3ed3a3144..97319a8fc1e0 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -118,6 +118,7 @@ header-y += eventpoll.h header-y += ext2_fs.h header-y += fadvise.h header-y += falloc.h +header-y += fanotify.h header-y += fb.h header-y += fcntl.h header-y += fd.h -- cgit v1.2.3 From 6ad2d4e3e97ee4bfde0b45e8dfe37911330fc4aa Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fsnotify: implement ordering between notifiers fanotify needs to be able to specify that some groups get events before others. They use this idea to make sure that a hierarchical storage manager gets access to files before programs which actually use them. This is purely infrastructure. Everything will have a priority of 0, but the infrastructure will exist for it to be non-zero. Signed-off-by: Eric Paris --- fs/notify/inode_mark.c | 9 +++++++-- fs/notify/vfsmount_mark.c | 6 +++++- include/linux/fsnotify_backend.h | 8 ++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 21ed10660b80..4c29fcf557d1 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -177,7 +177,8 @@ void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark, * Attach an initialized mark to a given inode. * These marks may be used for the fsnotify backend to determine which * event types should be delivered to which group and for which inodes. These - * marks are ordered according to the group's location in memory. + * marks are ordered according to priority, highest number first, and then by + * the group's location in memory. */ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, @@ -211,7 +212,11 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, goto out; } - if (mark->group < lmark->group) + if (mark->group->priority < lmark->group->priority) + continue; + + if ((mark->group->priority == lmark->group->priority) && + (mark->group < lmark->group)) continue; hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index 56772b578fbd..85eebff6d0d7 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -169,7 +169,11 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, goto out; } - if (mark->group < lmark->group) + if (mark->group->priority < lmark->group->priority) + continue; + + if ((mark->group->priority == lmark->group->priority) && + (mark->group < lmark->group)) continue; hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index e40190d16878..825329534162 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -129,6 +129,14 @@ struct fsnotify_group { wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */ unsigned int q_len; /* events on the queue */ unsigned int max_events; /* maximum events allowed on the list */ + /* + * Valid fsnotify group priorities. Events are send in order from highest + * priority to lowest priority. We default to the lowest priority. + */ + #define FS_PRIO_0 0 /* normal notifiers, no permissions */ + #define FS_PRIO_1 1 /* fanotify content based access control */ + #define FS_PRIO_2 2 /* fanotify pre-content access */ + unsigned int priority; /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ spinlock_t mark_lock; /* protect marks_list */ -- cgit v1.2.3 From 4231a23530a30e86eb32fbe869bbef1b3e54d5aa Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fanotify: implement fanotify listener ordering The fanotify listeners needs to be able to specify what types of operations they are going to perform so they can be ordered appropriately between other listeners doing other types of operations. They need this to be able to make sure that things like hierarchichal storage managers will get access to inodes before processes which need the data. This patch defines 3 possible uses which groups must indicate in the fanotify_init() flags. FAN_CLASS_PRE_CONTENT FAN_CLASS_CONTENT FAN_CLASS_NOTIF Groups will receive notification in that order. The order between 2 groups in the same class is undeterministic. FAN_CLASS_PRE_CONTENT is intended to be used by listeners which need access to the inode before they are certain that the inode contains it's final data. A hierarchical storage manager should choose to use this class. FAN_CLASS_CONTENT is intended to be used by listeners which need access to the inode after it contains its intended contents. This would be the appropriate level for an AV solution or document control system. FAN_CLASS_NOTIF is intended for normal async notification about access, much the same as inotify and dnotify. Syncronous permissions events are not permitted at this class. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 25 ++++++++++++++++++++++++- include/linux/fanotify.h | 11 ++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index bbcb98e7fcc6..1c09e6321c5e 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -664,6 +664,20 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); #endif + switch (flags & FAN_ALL_CLASS_BITS) { + case FAN_CLASS_NOTIF: + group->priority = FS_PRIO_0; + break; + case FAN_CLASS_CONTENT: + group->priority = FS_PRIO_1; + break; + case FAN_CLASS_PRE_CONTENT: + group->priority = FS_PRIO_2; + break; + default: + fd = -EINVAL; + goto out_put_group; + } fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) @@ -719,6 +733,16 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, ret = -EINVAL; if (unlikely(filp->f_op != &fanotify_fops)) goto fput_and_out; + group = filp->private_data; + + /* + * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not + * allowed to set permissions events. + */ + ret = -EINVAL; + if (mask & FAN_ALL_PERM_EVENTS && + group->priority == FS_PRIO_0) + goto fput_and_out; ret = fanotify_find_path(dfd, pathname, &path, flags); if (ret) @@ -729,7 +753,6 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, inode = path.dentry->d_inode; else mnt = path.mnt; - group = filp->private_data; /* create/update an inode mark */ switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 63531a6b4d2a..2c89ce7b644e 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -25,7 +25,16 @@ #define FAN_CLOEXEC 0x00000001 #define FAN_NONBLOCK 0x00000002 -#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK) +/* These are NOT bitwise flags. Both bits are used togther. */ +#define FAN_CLASS_NOTIF 0x00000000 +#define FAN_CLASS_CONTENT 0x00000004 +#define FAN_CLASS_PRE_CONTENT 0x00000008 + +#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ + FAN_CLASS_PRE_CONTENT) + +#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ + FAN_ALL_CLASS_BITS) /* flags used for fanotify_modify_mark() */ #define FAN_MARK_ADD 0x00000001 -- cgit v1.2.3 From 2868201965419b9011f3f07fd80e765181343cb1 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fanotify: use __aligned_u64 in fanotify userspace metadata Currently the userspace struct exposed by fanotify uses __attribute__((packed)) to make sure that alignment works on multiarch platforms. Since this causes a severe performance penalty on some platforms we are going to switch to using explicit alignment notation on the 64bit values so we don't have to use 'packed' Signed-off-by: Eric Paris --- include/linux/fanotify.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 2c89ce7b644e..8a621c1a0991 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -79,10 +79,10 @@ struct fanotify_event_metadata { __u32 event_len; __u32 vers; - __u64 mask; + __aligned_u64 mask; __s32 fd; __s32 pid; -} __attribute__ ((packed)); +}; struct fanotify_response { __s32 fd; -- cgit v1.2.3 From ff8bcbd03da881bf1171910c6c07d44bd3c0a234 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fsnotify: correctly handle return codes from listeners When fsnotify groups return errors they are ignored. For permissions events these should be passed back up the stack, but for most events these should continue to be ignored. Signed-off-by: Eric Paris --- fs/notify/fsnotify.c | 20 ++++++++++++-------- include/linux/fsnotify_backend.h | 2 ++ 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4498a208df94..57ecadd85abf 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -252,20 +252,23 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, if (inode_group > vfsmount_group) { /* handle inode */ - send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, - data_is, cookie, file_name, &event); + ret = send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, + data_is, cookie, file_name, &event); /* we didn't use the vfsmount_mark */ vfsmount_group = NULL; } else if (vfsmount_group > inode_group) { - send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, - data_is, cookie, file_name, &event); + ret = send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, + data_is, cookie, file_name, &event); inode_group = NULL; } else { - send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, - mask, data, data_is, cookie, file_name, - &event); + ret = send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, + mask, data, data_is, cookie, file_name, + &event); } + if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) + goto out; + if (inode_group) inode_node = srcu_dereference(inode_node->next, &fsnotify_mark_srcu); @@ -273,7 +276,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, vfsmount_node = srcu_dereference(vfsmount_node->next, &fsnotify_mark_srcu); } - + ret = 0; +out: srcu_read_unlock(&fsnotify_mark_srcu, idx); /* * fsnotify_create_event() took a reference so the event can't be cleaned diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 825329534162..026892187c83 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -64,6 +64,8 @@ #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) +#define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM) + #define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \ -- cgit v1.2.3 From 52420392c81c8712f555e6bcd116d8bd214ce43a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:56 -0400 Subject: fsnotify: call fsnotify_parent in perm events fsnotify perm events do not call fsnotify parent. That means you cannot register a perm event on a directory and enforce permissions on all inodes in that directory. This patch fixes that situation. Signed-off-by: Eric Paris --- fs/notify/fsnotify.c | 15 +++++++++------ include/linux/fsnotify.h | 9 +++++++-- include/linux/fsnotify_backend.h | 8 +++++--- 3 files changed, 21 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 57ecadd85abf..20dc218707ca 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -84,16 +84,17 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) } /* Notify this dentry's parent about a child's events. */ -void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) +int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) { struct dentry *parent; struct inode *p_inode; + int ret = 0; if (!dentry) dentry = path->dentry; if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) - return; + return 0; parent = dget_parent(dentry); p_inode = parent->d_inode; @@ -106,14 +107,16 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) mask |= FS_EVENT_ON_CHILD; if (path) - fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, - dentry->d_name.name, 0); + ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, + dentry->d_name.name, 0); else - fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, - dentry->d_name.name, 0); + ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, + dentry->d_name.name, 0); } dput(parent); + + return ret; } EXPORT_SYMBOL_GPL(__fsnotify_parent); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 59d0df43ff9d..5059faacceab 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -26,12 +26,12 @@ static inline void fsnotify_d_instantiate(struct dentry *dentry, } /* Notify this dentry's parent about a child's events. */ -static inline void fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) +static inline int fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) { if (!dentry) dentry = path->dentry; - __fsnotify_parent(path, dentry, mask); + return __fsnotify_parent(path, dentry, mask); } /* simple call site for access decisions */ @@ -40,6 +40,7 @@ static inline int fsnotify_perm(struct file *file, int mask) struct path *path = &file->f_path; struct inode *inode = path->dentry->d_inode; __u32 fsnotify_mask = 0; + int ret; if (file->f_mode & FMODE_NONOTIFY) return 0; @@ -52,6 +53,10 @@ static inline int fsnotify_perm(struct file *file, int mask) else BUG(); + ret = fsnotify_parent(path, NULL, fsnotify_mask); + if (ret) + return ret; + return fsnotify(inode, fsnotify_mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 026892187c83..b37f3a71a9dc 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -304,7 +304,7 @@ struct fsnotify_mark { /* main fsnotify call to send events */ extern int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const unsigned char *name, u32 cookie); -extern void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask); +extern int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask); extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern u32 fsnotify_get_cookie(void); @@ -433,8 +433,10 @@ static inline int fsnotify(struct inode *to_tell, __u32 mask, void *data, int da return 0; } -static inline void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) -{} +static inline int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) +{ + return 0; +} static inline void __fsnotify_inode_delete(struct inode *inode) {} -- cgit v1.2.3 From bbf2aba50f6ed7c8dd53623fa1437b539928ac39 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:57 -0400 Subject: fanotify: allow userspace to flush all marks fanotify is supposed to be able to flush all marks. This is mostly useful for the AV community to flush all cached decisions on a security policy change. This functionality has existed in the kernel but wasn't correctly exposed to userspace. Signed-off-by: Eric Paris --- include/linux/fanotify.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 8a621c1a0991..a97c96d28c07 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -52,7 +52,8 @@ FAN_MARK_ONLYDIR |\ FAN_MARK_MOUNT |\ FAN_MARK_IGNORED_MASK |\ - FAN_MARK_IGNORED_SURV_MODIFY) + FAN_MARK_IGNORED_SURV_MODIFY |\ + FAN_MARK_FLUSH) /* * All of the events - we build the list by hand so that we can add flags in -- cgit v1.2.3 From 2529a0df0f64dab1f60ae08e038b89c53a6b4c02 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:57 -0400 Subject: fsnotify: implement a default maximum queue depth Currently fanotify has no maximum queue depth. Since fanotify is CAP_SYS_ADMIN only this does not pose a normal user DoS issue, but it certianly is possible that an fanotify listener which can't keep up could OOM the box. This patch implements a default 16k depth. This is the same default depth used by inotify, but given fanotify's better queue merging in many situations this queue will contain many additional useful events by comparison. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 4 ++++ include/linux/fanotify.h | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index b265936e92d6..04f2fe47b66a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -16,6 +16,8 @@ #include +#define FANOTIFY_DEFAULT_MAX_EVENTS 16384 + extern const struct fsnotify_ops fanotify_fsnotify_ops; static struct kmem_cache *fanotify_mark_cache __read_mostly; @@ -689,6 +691,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_put_group; } + group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; + fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) goto out_put_group; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index a97c96d28c07..ed479b6fef7b 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -12,7 +12,6 @@ #define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */ -/* FIXME currently Q's have no limit.... */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ -- cgit v1.2.3 From 5dd03f55fd2f21916ce248bb2e68bbfb39d94fe5 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:57 -0400 Subject: fanotify: allow userspace to override max queue depth fanotify has a defualt max queue depth. This patch allows processes which explicitly request it to have an 'unlimited' queue depth. These processes need to be very careful to make sure they cannot fall far enough behind that they OOM the box. Thus this flag is gated on CAP_SYS_ADMIN. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 9 ++++++++- include/linux/fanotify.h | 5 +++-- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 04f2fe47b66a..43d66d9b2eff 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -691,7 +691,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_put_group; } - group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; + if (flags & FAN_UNLIMITED_QUEUE) { + fd = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out_put_group; + group->max_events = UINT_MAX; + } else { + group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; + } fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index ed479b6fef7b..e37f559c95e1 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -28,12 +28,13 @@ #define FAN_CLASS_NOTIF 0x00000000 #define FAN_CLASS_CONTENT 0x00000004 #define FAN_CLASS_PRE_CONTENT 0x00000008 - #define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ FAN_CLASS_PRE_CONTENT) +#define FAN_UNLIMITED_QUEUE 0x00000010 + #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ - FAN_ALL_CLASS_BITS) + FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE) /* flags used for fanotify_modify_mark() */ #define FAN_MARK_ADD 0x00000001 -- cgit v1.2.3 From e7099d8a5a34d2876908a9fab4952dabdcfc5909 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:57 -0400 Subject: fanotify: limit the number of marks in a single fanotify group There is currently no limit on the number of marks a given fanotify group can have. Since fanotify is gated on CAP_SYS_ADMIN this was not seen as a serious DoS threat. This patch implements a default of 8192, the same as inotify to work towards removing the CAP_SYS_ADMIN gating and eliminating the default DoS'able status. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 9 +++++++++ include/linux/fsnotify_backend.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 43d66d9b2eff..1d33d7db277a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -17,6 +17,7 @@ #include #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 +#define FANOTIFY_DEFAULT_MAX_MARKS 8192 extern const struct fsnotify_ops fanotify_fsnotify_ops; @@ -584,6 +585,9 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, if (!fsn_mark) { int ret; + if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) + return -ENOSPC; + fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); if (!fsn_mark) return -ENOMEM; @@ -626,6 +630,9 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, if (!fsn_mark) { int ret; + if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) + return -ENOSPC; + fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); if (!fsn_mark) return -ENOMEM; @@ -700,6 +707,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; } + group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; + fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) goto out_put_group; diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index b37f3a71a9dc..49ceed6e92b1 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -169,6 +169,7 @@ struct fsnotify_group { bool bypass_perm; /* protected by access_mutex */ #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; + unsigned int max_marks; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; -- cgit v1.2.3 From ac7e22dcfafd04c842a02057afd6541c1d613ef9 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:58 -0400 Subject: fanotify: allow userspace to override max marks Some fanotify groups, especially those like AV scanners, will need to place lots of marks, particularly ignore marks. Since ignore marks do not pin inodes in cache and are cleared if the inode is removed from core (usually under memory pressure) we expose an interface for listeners, with CAP_SYS_ADMIN, to override the maximum number of marks and be allowed to set and 'unlimited' number of marks. Programs which make use of this feature will be able to OOM a machine. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 9 ++++++++- include/linux/fanotify.h | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 1d33d7db277a..f9216102b426 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -707,7 +707,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; } - group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; + if (flags & FAN_UNLIMITED_MARKS) { + fd = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out_put_group; + group->fanotify_data.max_marks = UINT_MAX; + } else { + group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; + } fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); if (fd < 0) diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index e37f559c95e1..7592a366a57b 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -32,9 +32,11 @@ FAN_CLASS_PRE_CONTENT) #define FAN_UNLIMITED_QUEUE 0x00000010 +#define FAN_UNLIMITED_MARKS 0x00000020 #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ - FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE) + FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\ + FAN_UNLIMITED_MARKS) /* flags used for fanotify_modify_mark() */ #define FAN_MARK_ADD 0x00000001 -- cgit v1.2.3 From 4afeff8505cb8a38e36c1ef2bd3447c4b8f87367 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:58 -0400 Subject: fanotify: limit number of listeners per user fanotify currently has no limit on the number of listeners a given user can have open. This patch limits the total number of listeners per user to 128. This is the same as the inotify default limit. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.c | 11 ++++++++++- fs/notify/fanotify/fanotify_user.c | 11 +++++++++++ include/linux/fsnotify_backend.h | 1 + include/linux/sched.h | 3 +++ 4 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 85366c78cc37..60c11c306fd9 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -200,10 +200,19 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, return false; } +static void fanotify_free_group_priv(struct fsnotify_group *group) +{ + struct user_struct *user; + + user = group->fanotify_data.user; + atomic_dec(&user->fanotify_listeners); + free_uid(user); +} + const struct fsnotify_ops fanotify_fsnotify_ops = { .handle_event = fanotify_handle_event, .should_send_event = fanotify_should_send_event, - .free_group_priv = NULL, + .free_group_priv = fanotify_free_group_priv, .free_event_priv = NULL, .freeing_mark = NULL, }; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index f9216102b426..a7d9369482d5 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -18,6 +18,7 @@ #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 #define FANOTIFY_DEFAULT_MAX_MARKS 8192 +#define FANOTIFY_DEFAULT_MAX_LISTENERS 128 extern const struct fsnotify_ops fanotify_fsnotify_ops; @@ -656,6 +657,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) { struct fsnotify_group *group; int f_flags, fd; + struct user_struct *user; pr_debug("%s: flags=%d event_f_flags=%d\n", __func__, flags, event_f_flags); @@ -666,6 +668,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (flags & ~FAN_ALL_INIT_FLAGS) return -EINVAL; + user = get_current_user(); + if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) { + free_uid(user); + return -EMFILE; + } + f_flags = O_RDWR | FMODE_NONOTIFY; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; @@ -677,6 +685,9 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (IS_ERR(group)) return PTR_ERR(group); + group->fanotify_data.user = user; + atomic_inc(&user->fanotify_listeners); + group->fanotify_data.f_flags = event_f_flags; #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS mutex_init(&group->fanotify_data.access_mutex); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 49ceed6e92b1..4366f458a86a 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -170,6 +170,7 @@ struct fsnotify_group { #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; unsigned int max_marks; + struct user_struct *user; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; diff --git a/include/linux/sched.h b/include/linux/sched.h index be7adb7588e5..6f420baf37ca 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -672,6 +672,9 @@ struct user_struct { atomic_t inotify_watches; /* How many inotify watches does this user have? */ atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ #endif +#ifdef CONFIG_FANOTIFY + atomic_t fanotify_listeners; +#endif #ifdef CONFIG_EPOLL atomic_t epoll_watches; /* The number of file descriptors currently watched */ #endif -- cgit v1.2.3 From b29866aab8489487f11cc4506590ac31bdbae22a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:58 -0400 Subject: fsnotify: rename FS_IN_ISDIR to FS_ISDIR The _IN_ in the naming is reserved for flags only used by inotify. Since I am about to use this flag for fanotify rename it to be generic like the rest. Signed-off-by: Eric Paris --- fs/notify/inotify/inotify_user.c | 2 +- include/linux/fsnotify.h | 20 ++++++++++---------- include/linux/fsnotify_backend.h | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 24edc1185d53..444c305a468c 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -862,7 +862,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK); - BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR); + BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 5059faacceab..ecb43b33d181 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -98,8 +98,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, old_dir_mask |= FS_DN_RENAME; if (isdir) { - old_dir_mask |= FS_IN_ISDIR; - new_dir_mask |= FS_IN_ISDIR; + old_dir_mask |= FS_ISDIR; + new_dir_mask |= FS_ISDIR; } fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie); @@ -137,7 +137,7 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) __u32 mask = FS_DELETE; if (isdir) - mask |= FS_IN_ISDIR; + mask |= FS_ISDIR; fsnotify_parent(NULL, dentry, mask); } @@ -179,7 +179,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct */ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { - __u32 mask = (FS_CREATE | FS_IN_ISDIR); + __u32 mask = (FS_CREATE | FS_ISDIR); struct inode *d_inode = dentry->d_inode; audit_inode_child(dentry, inode); @@ -197,7 +197,7 @@ static inline void fsnotify_access(struct file *file) __u32 mask = FS_ACCESS; if (S_ISDIR(inode->i_mode)) - mask |= FS_IN_ISDIR; + mask |= FS_ISDIR; if (!(file->f_mode & FMODE_NONOTIFY)) { fsnotify_parent(path, NULL, mask); @@ -215,7 +215,7 @@ static inline void fsnotify_modify(struct file *file) __u32 mask = FS_MODIFY; if (S_ISDIR(inode->i_mode)) - mask |= FS_IN_ISDIR; + mask |= FS_ISDIR; if (!(file->f_mode & FMODE_NONOTIFY)) { fsnotify_parent(path, NULL, mask); @@ -233,7 +233,7 @@ static inline void fsnotify_open(struct file *file) __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) - mask |= FS_IN_ISDIR; + mask |= FS_ISDIR; if (!(file->f_mode & FMODE_NONOTIFY)) { fsnotify_parent(path, NULL, mask); @@ -252,7 +252,7 @@ static inline void fsnotify_close(struct file *file) __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; if (S_ISDIR(inode->i_mode)) - mask |= FS_IN_ISDIR; + mask |= FS_ISDIR; if (!(file->f_mode & FMODE_NONOTIFY)) { fsnotify_parent(path, NULL, mask); @@ -269,7 +269,7 @@ static inline void fsnotify_xattr(struct dentry *dentry) __u32 mask = FS_ATTRIB; if (S_ISDIR(inode->i_mode)) - mask |= FS_IN_ISDIR; + mask |= FS_ISDIR; fsnotify_parent(NULL, dentry, mask); fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); @@ -304,7 +304,7 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) if (mask) { if (S_ISDIR(inode->i_mode)) - mask |= FS_IN_ISDIR; + mask |= FS_ISDIR; fsnotify_parent(NULL, dentry, mask); fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 4366f458a86a..b36041e9cd34 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -45,7 +45,7 @@ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ -#define FS_IN_ISDIR 0x40000000 /* event occurred against dir */ +#define FS_ISDIR 0x40000000 /* event occurred against dir */ #define FS_IN_ONESHOT 0x80000000 /* only send event once */ #define FS_DN_RENAME 0x10000000 /* file renamed */ @@ -72,7 +72,7 @@ FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ FS_OPEN_PERM | FS_ACCESS_PERM | FS_EXCL_UNLINK | \ - FS_IN_ISDIR | FS_IN_ONESHOT | FS_DN_RENAME | \ + FS_ISDIR | FS_IN_ONESHOT | FS_DN_RENAME | \ FS_DN_MULTISHOT | FS_EVENT_ON_CHILD) struct fsnotify_group; -- cgit v1.2.3 From 8fcd65280abc4699510f1853ede31f43e8a3783a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:59 -0400 Subject: fanotify: ignore events on directories unless specifically requested fanotify has a very limited number of events it sends on directories. The usefulness of these events is yet to be seen and still we send them. This is particularly painful for mount marks where one might receive many of these useless events. As such this patch will drop events on IS_DIR() inodes unless they were explictly requested with FAN_ON_DIR. This means that a mark on a directory without FAN_EVENT_ON_CHILD or FAN_ON_DIR is meaningless and will result in no events ever (although it will still be allowed since detecting it is hard) Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.c | 5 +++++ fs/notify/fanotify/fanotify_user.c | 12 ++++++++++++ include/linux/fanotify.h | 10 ++++++++-- 3 files changed, 25 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 8d98e1f5817b..b04f88eed09e 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -131,6 +131,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); + BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -195,6 +196,10 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, BUG(); } + if (S_ISDIR(path->dentry->d_inode->i_mode) && + (marks_ignored_mask & FS_ISDIR)) + return false; + if (event_mask & marks_mask & ~marks_ignored_mask) return true; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index a7d9369482d5..ff1a908c9708 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -570,6 +570,12 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, if (flags & FAN_MARK_IGNORED_SURV_MODIFY) fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; } + + if (!(flags & FAN_MARK_ONDIR)) { + __u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR; + fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); + } + spin_unlock(&fsn_mark->lock); return mask & ~oldmask; @@ -766,6 +772,12 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, default: return -EINVAL; } + + if (mask & FAN_ONDIR) { + flags |= FAN_MARK_ONDIR; + mask &= ~FAN_ONDIR; + } + #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD)) #else diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 7592a366a57b..5e0400a80c33 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -10,13 +10,15 @@ #define FAN_CLOSE_NOWRITE 0x00000010 /* Writtable file closed */ #define FAN_OPEN 0x00000020 /* File was opened */ -#define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */ - #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ +#define FAN_ONDIR 0x40000000 /* event occurred against dir */ + +#define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */ + /* helper events */ #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ @@ -47,6 +49,10 @@ #define FAN_MARK_IGNORED_MASK 0x00000020 #define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 #define FAN_MARK_FLUSH 0x00000080 +#ifdef __KERNEL__ +/* not valid from userspace, only kernel internal */ +#define FAN_MARK_ONDIR 0x00000100 +#endif #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ -- cgit v1.2.3 From 50e4a98914de13c6f38f50fd1afa06e2c18b3cf7 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 28 Oct 2010 17:21:59 -0400 Subject: fanotify: Fix FAN_CLOSE comments The comments for FAN_CLOSE_WRITE and FAN_CLOSE_NOWRITE do not match FS_CLOSE_WRITE and FS_CLOSE_NOWRITE, respectively. WRITE is for writable files while NOWRITE is for non-writable files. Signed-off-by: Stefan Hajnoczi Signed-off-by: Eric Paris --- include/linux/fanotify.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 5e0400a80c33..0f0121467fc4 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -6,8 +6,8 @@ /* the following events that user-space can register for */ #define FAN_ACCESS 0x00000001 /* File was accessed */ #define FAN_MODIFY 0x00000002 /* File was modified */ -#define FAN_CLOSE_WRITE 0x00000008 /* Unwrittable file closed */ -#define FAN_CLOSE_NOWRITE 0x00000010 /* Writtable file closed */ +#define FAN_CLOSE_WRITE 0x00000008 /* Writtable file closed */ +#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ #define FAN_OPEN 0x00000020 /* File was opened */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ -- cgit v1.2.3 From d8c0fca68da25ca3df534dfb12ce628675c828e4 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Thu, 28 Oct 2010 17:21:59 -0400 Subject: fsnotify: remove alignment padding from fsnotify_mark on 64 bit builds Reorder struct fsnotfiy_mark to remove 8 bytes of alignment padding on 64 bit builds. Shrinks fsnotfiy_mark to 128 bytes allowing more objects per slab in its kmem_cache and reduces the number of cachelines needed for each structure. Signed-off-by: Richard Kennedy Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index b36041e9cd34..0a68f924f06f 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -287,8 +287,8 @@ struct fsnotify_mark { struct fsnotify_inode_mark i; struct fsnotify_vfsmount_mark m; }; - __u32 ignored_mask; /* events types to ignore */ struct list_head free_g_list; /* tmp list used when freeing this mark */ + __u32 ignored_mask; /* events types to ignore */ #define FSNOTIFY_MARK_FLAG_INODE 0x01 #define FSNOTIFY_MARK_FLAG_VFSMOUNT 0x02 #define FSNOTIFY_MARK_FLAG_OBJECT_PINNED 0x04 -- cgit v1.2.3 From 202f4f53e503ae09b431459131b5b3a99fa6d839 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Fri, 20 Aug 2010 14:43:56 +0900 Subject: MAX8952 PMIC Driver Initial Release MAX8952 PMIC is used to provide voltage output between 770mV - 1400mV with DVS support. In this initial release, users can set voltages for four DVS modes, RAMP delay values, and SYNC frequency. Controlling FPWM/SYNC_MODE/Pull-Down/Ramp Modes and reading CHIP_ID is not supported in this release. If GPIO of EN is not valid in platform data, the driver assumes that it is always-on. If GPIO of VID0 or VID1 is invalid, the driver pulls down VID0 and VID1 to fix DVS mode as 0 and disables DVS support. We assume that V_OUT is capable to provide every voltage from 770mV to 1.40V in 10mV steps although the data sheet has some ambiguity on it. Signed-off-by: MyungJoo Ham Signed-off-by: Kyungmin Park Acked-by: Mark Brown -- v2: - Style correction - Can accept platform_data with invalid GPIOs - Removed unnecessary features - Improved error handling Signed-off-by: Liam Girdwood --- drivers/regulator/Kconfig | 8 + drivers/regulator/Makefile | 1 + drivers/regulator/max8952.c | 360 ++++++++++++++++++++++++++++++++++++++ include/linux/regulator/max8952.h | 135 ++++++++++++++ 4 files changed, 504 insertions(+) create mode 100644 drivers/regulator/max8952.c create mode 100644 include/linux/regulator/max8952.h (limited to 'include') diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 172951bf23a4..4889caabc632 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -100,6 +100,14 @@ config REGULATOR_MAX8925 help Say y here to support the voltage regulaltor of Maxim MAX8925 PMIC. +config REGULATOR_MAX8952 + tristate "Maxim MAX8952 Power Management IC" + depends on I2C + help + This driver controls a Maxim 8952 voltage output regulator + via I2C bus. Maxim 8952 has one voltage output and supports 4 DVS + modes ranging from 0.77V to 1.40V by 0.01V steps. + config REGULATOR_MAX8998 tristate "Maxim 8998 voltage regulator" depends on MFD_MAX8998 diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 8285fd832e16..beff6da6eff6 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o obj-$(CONFIG_REGULATOR_MAX8649) += max8649.o obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o obj-$(CONFIG_REGULATOR_MAX8925) += max8925-regulator.o +obj-$(CONFIG_REGULATOR_MAX8952) += max8952.o obj-$(CONFIG_REGULATOR_MAX8998) += max8998.o obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o obj-$(CONFIG_REGULATOR_WM831X) += wm831x-isink.o diff --git a/drivers/regulator/max8952.c b/drivers/regulator/max8952.c new file mode 100644 index 000000000000..f2af0b1c3925 --- /dev/null +++ b/drivers/regulator/max8952.c @@ -0,0 +1,360 @@ +/* + * max8952.c - Voltage and current regulation for the Maxim 8952 + * + * Copyright (C) 2010 Samsung Electronics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Registers */ +enum { + MAX8952_REG_MODE0, + MAX8952_REG_MODE1, + MAX8952_REG_MODE2, + MAX8952_REG_MODE3, + MAX8952_REG_CONTROL, + MAX8952_REG_SYNC, + MAX8952_REG_RAMP, + MAX8952_REG_CHIP_ID1, + MAX8952_REG_CHIP_ID2, +}; + +struct max8952_data { + struct i2c_client *client; + struct device *dev; + struct mutex mutex; + struct max8952_platform_data *pdata; + struct regulator_dev *rdev; + + bool vid0; + bool vid1; + bool en; +}; + +static int max8952_read_reg(struct max8952_data *max8952, u8 reg) +{ + int ret = i2c_smbus_read_byte_data(max8952->client, reg); + if (ret > 0) + ret &= 0xff; + + return ret; +} + +static int max8952_write_reg(struct max8952_data *max8952, + u8 reg, u8 value) +{ + return i2c_smbus_write_byte_data(max8952->client, reg, value); +} + +static int max8952_voltage(struct max8952_data *max8952, u8 mode) +{ + return (max8952->pdata->dvs_mode[mode] * 10 + 770) * 1000; +} + +static int max8952_list_voltage(struct regulator_dev *rdev, + unsigned int selector) +{ + struct max8952_data *max8952 = rdev_get_drvdata(rdev); + + if (rdev_get_id(rdev) != 0) + return -EINVAL; + + return max8952_voltage(max8952, selector); +} + +static int max8952_is_enabled(struct regulator_dev *rdev) +{ + struct max8952_data *max8952 = rdev_get_drvdata(rdev); + return max8952->en; +} + +static int max8952_enable(struct regulator_dev *rdev) +{ + struct max8952_data *max8952 = rdev_get_drvdata(rdev); + + /* If not valid, assume "ALWAYS_HIGH" */ + if (gpio_is_valid(max8952->pdata->gpio_en)) + gpio_set_value(max8952->pdata->gpio_en, 1); + + max8952->en = true; + return 0; +} + +static int max8952_disable(struct regulator_dev *rdev) +{ + struct max8952_data *max8952 = rdev_get_drvdata(rdev); + + /* If not valid, assume "ALWAYS_HIGH" -> not permitted */ + if (gpio_is_valid(max8952->pdata->gpio_en)) + gpio_set_value(max8952->pdata->gpio_en, 0); + else + return -EPERM; + + max8952->en = false; + return 0; +} + +static int max8952_get_voltage(struct regulator_dev *rdev) +{ + struct max8952_data *max8952 = rdev_get_drvdata(rdev); + u8 vid = 0; + + if (max8952->vid0) + vid += 1; + if (max8952->vid1) + vid += 2; + + return max8952_voltage(max8952, vid); +} + +static int max8952_set_voltage(struct regulator_dev *rdev, + int min_uV, int max_uV) +{ + struct max8952_data *max8952 = rdev_get_drvdata(rdev); + u8 vid = -1, i; + + if (!gpio_is_valid(max8952->pdata->gpio_vid0) || + !gpio_is_valid(max8952->pdata->gpio_vid0)) { + /* DVS not supported */ + return -EPERM; + } + + for (i = 0; i < MAX8952_NUM_DVS_MODE; i++) { + int volt = max8952_voltage(max8952, i); + + /* Set the voltage as low as possible within the range */ + if (volt <= max_uV && volt >= min_uV) + if (vid == -1 || max8952_voltage(max8952, vid) > volt) + vid = i; + } + + if (vid >= 0 && vid < MAX8952_NUM_DVS_MODE) { + max8952->vid0 = (vid % 2 == 1); + max8952->vid1 = (((vid >> 1) % 2) == 1); + gpio_set_value(max8952->pdata->gpio_vid0, max8952->vid0); + gpio_set_value(max8952->pdata->gpio_vid1, max8952->vid1); + } else + return -EINVAL; + + return 0; +} + +static struct regulator_ops max8952_ops = { + .list_voltage = max8952_list_voltage, + .is_enabled = max8952_is_enabled, + .enable = max8952_enable, + .disable = max8952_disable, + .get_voltage = max8952_get_voltage, + .set_voltage = max8952_set_voltage, + .set_suspend_disable = max8952_disable, +}; + +static struct regulator_desc regulator = { + .name = "MAX8952_VOUT", + .id = 0, + .n_voltages = MAX8952_NUM_DVS_MODE, + .ops = &max8952_ops, + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, +}; + +static int __devinit max8952_pmic_probe(struct i2c_client *client, + const struct i2c_device_id *i2c_id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct max8952_platform_data *pdata = client->dev.platform_data; + struct max8952_data *max8952; + + int ret = 0, err = 0; + + if (!pdata) { + dev_err(&client->dev, "Require the platform data\n"); + return -EINVAL; + } + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) + return -EIO; + + max8952 = kzalloc(sizeof(struct max8952_data), GFP_KERNEL); + if (!max8952) + return -ENOMEM; + + max8952->client = client; + max8952->dev = &client->dev; + max8952->pdata = pdata; + mutex_init(&max8952->mutex); + + max8952->rdev = regulator_register(®ulator, max8952->dev, + &pdata->reg_data, max8952); + + ret = IS_ERR(max8952->rdev); + if (ret) + dev_err(max8952->dev, "regulator init failed (%d)\n", ret); + + max8952->en = !!(pdata->reg_data.constraints.boot_on); + max8952->vid0 = (pdata->default_mode % 2) == 1; + max8952->vid1 = ((pdata->default_mode >> 1) % 2) == 1; + + if (gpio_is_valid(pdata->gpio_en)) { + if (!gpio_request(pdata->gpio_en, "MAX8952 EN")) + gpio_direction_output(pdata->gpio_en, max8952->en); + else + err = 1; + } else + err = 2; + + if (err) { + dev_info(max8952->dev, "EN gpio invalid: assume that EN" + "is always High\n"); + max8952->en = 1; + pdata->gpio_en = -1; /* Mark invalid */ + } + + err = 0; + + if (gpio_is_valid(pdata->gpio_vid0) && + gpio_is_valid(pdata->gpio_vid1)) { + if (!gpio_request(pdata->gpio_vid0, "MAX8952 VID0")) + gpio_direction_output(pdata->gpio_vid0, + (pdata->default_mode) % 2); + else + err = 1; + + if (!gpio_request(pdata->gpio_vid1, "MAX8952 VID1")) + gpio_direction_output(pdata->gpio_vid1, + (pdata->default_mode >> 1) % 2); + else { + if (!err) + gpio_free(pdata->gpio_vid0); + err = 2; + } + + } else + err = 3; + + if (err) { + dev_warn(max8952->dev, "VID0/1 gpio invalid: " + "DVS not avilable.\n"); + max8952->vid0 = 0; + max8952->vid1 = 0; + /* Mark invalid */ + pdata->gpio_vid0 = -1; + pdata->gpio_vid1 = -1; + + /* Disable Pulldown of EN only */ + max8952_write_reg(max8952, MAX8952_REG_CONTROL, 0x60); + + dev_err(max8952->dev, "DVS modes disabled because VID0 and VID1" + " do not have proper controls.\n"); + } else { + /* + * Disable Pulldown on EN, VID0, VID1 to reduce + * leakage current of MAX8952 assuming that MAX8952 + * is turned on (EN==1). Note that without having VID0/1 + * properly connected, turning pulldown off can be + * problematic. Thus, turn this off only when they are + * controllable by GPIO. + */ + max8952_write_reg(max8952, MAX8952_REG_CONTROL, 0x0); + } + + max8952_write_reg(max8952, MAX8952_REG_MODE0, + (max8952_read_reg(max8952, + MAX8952_REG_MODE0) & 0xC0) | + (pdata->dvs_mode[0] & 0x3F)); + max8952_write_reg(max8952, MAX8952_REG_MODE1, + (max8952_read_reg(max8952, + MAX8952_REG_MODE1) & 0xC0) | + (pdata->dvs_mode[1] & 0x3F)); + max8952_write_reg(max8952, MAX8952_REG_MODE2, + (max8952_read_reg(max8952, + MAX8952_REG_MODE2) & 0xC0) | + (pdata->dvs_mode[2] & 0x3F)); + max8952_write_reg(max8952, MAX8952_REG_MODE3, + (max8952_read_reg(max8952, + MAX8952_REG_MODE3) & 0xC0) | + (pdata->dvs_mode[3] & 0x3F)); + + max8952_write_reg(max8952, MAX8952_REG_SYNC, + (max8952_read_reg(max8952, MAX8952_REG_SYNC) & 0x3F) | + ((pdata->sync_freq & 0x3) << 6)); + max8952_write_reg(max8952, MAX8952_REG_RAMP, + (max8952_read_reg(max8952, MAX8952_REG_RAMP) & 0x1F) | + ((pdata->ramp_speed & 0x7) << 5)); + + i2c_set_clientdata(client, max8952); + + return ret; +} + +static int __devexit max8952_pmic_remove(struct i2c_client *client) +{ + struct max8952_data *max8952 = i2c_get_clientdata(client); + struct max8952_platform_data *pdata = max8952->pdata; + struct regulator_dev *rdev = max8952->rdev; + + regulator_unregister(rdev); + + gpio_free(pdata->gpio_vid0); + gpio_free(pdata->gpio_vid1); + gpio_free(pdata->gpio_en); + + kfree(max8952); + return 0; +} + +static const struct i2c_device_id max8952_ids[] = { + { "max8952", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(i2c, max8952_ids); + +static struct i2c_driver max8952_pmic_driver = { + .probe = max8952_pmic_probe, + .remove = __devexit_p(max8952_pmic_remove), + .driver = { + .name = "max8952", + }, + .id_table = max8952_ids, +}; + +static int __init max8952_pmic_init(void) +{ + return i2c_add_driver(&max8952_pmic_driver); +} +subsys_initcall(max8952_pmic_init); + +static void __exit max8952_pmic_exit(void) +{ + i2c_del_driver(&max8952_pmic_driver); +} +module_exit(max8952_pmic_exit); + +MODULE_DESCRIPTION("MAXIM 8952 voltage regulator driver"); +MODULE_AUTHOR("MyungJoo Ham "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/regulator/max8952.h b/include/linux/regulator/max8952.h new file mode 100644 index 000000000000..45e42855ad05 --- /dev/null +++ b/include/linux/regulator/max8952.h @@ -0,0 +1,135 @@ +/* + * max8952.h - Voltage regulation for the Maxim 8952 + * + * Copyright (C) 2010 Samsung Electrnoics + * MyungJoo Ham + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef REGULATOR_MAX8952 +#define REGULATOR_MAX8952 + +#include + +enum { + MAX8952_DVS_MODE0, + MAX8952_DVS_MODE1, + MAX8952_DVS_MODE2, + MAX8952_DVS_MODE3, +}; + +enum { + MAX8952_DVS_770mV = 0, + MAX8952_DVS_780mV, + MAX8952_DVS_790mV, + MAX8952_DVS_800mV, + MAX8952_DVS_810mV, + MAX8952_DVS_820mV, + MAX8952_DVS_830mV, + MAX8952_DVS_840mV, + MAX8952_DVS_850mV, + MAX8952_DVS_860mV, + MAX8952_DVS_870mV, + MAX8952_DVS_880mV, + MAX8952_DVS_890mV, + MAX8952_DVS_900mV, + MAX8952_DVS_910mV, + MAX8952_DVS_920mV, + MAX8952_DVS_930mV, + MAX8952_DVS_940mV, + MAX8952_DVS_950mV, + MAX8952_DVS_960mV, + MAX8952_DVS_970mV, + MAX8952_DVS_980mV, + MAX8952_DVS_990mV, + MAX8952_DVS_1000mV, + MAX8952_DVS_1010mV, + MAX8952_DVS_1020mV, + MAX8952_DVS_1030mV, + MAX8952_DVS_1040mV, + MAX8952_DVS_1050mV, + MAX8952_DVS_1060mV, + MAX8952_DVS_1070mV, + MAX8952_DVS_1080mV, + MAX8952_DVS_1090mV, + MAX8952_DVS_1100mV, + MAX8952_DVS_1110mV, + MAX8952_DVS_1120mV, + MAX8952_DVS_1130mV, + MAX8952_DVS_1140mV, + MAX8952_DVS_1150mV, + MAX8952_DVS_1160mV, + MAX8952_DVS_1170mV, + MAX8952_DVS_1180mV, + MAX8952_DVS_1190mV, + MAX8952_DVS_1200mV, + MAX8952_DVS_1210mV, + MAX8952_DVS_1220mV, + MAX8952_DVS_1230mV, + MAX8952_DVS_1240mV, + MAX8952_DVS_1250mV, + MAX8952_DVS_1260mV, + MAX8952_DVS_1270mV, + MAX8952_DVS_1280mV, + MAX8952_DVS_1290mV, + MAX8952_DVS_1300mV, + MAX8952_DVS_1310mV, + MAX8952_DVS_1320mV, + MAX8952_DVS_1330mV, + MAX8952_DVS_1340mV, + MAX8952_DVS_1350mV, + MAX8952_DVS_1360mV, + MAX8952_DVS_1370mV, + MAX8952_DVS_1380mV, + MAX8952_DVS_1390mV, + MAX8952_DVS_1400mV, +}; + +enum { + MAX8952_SYNC_FREQ_26MHZ, /* Default */ + MAX8952_SYNC_FREQ_13MHZ, + MAX8952_SYNC_FREQ_19_2MHZ, +}; + +enum { + MAX8952_RAMP_32mV_us = 0, /* Default */ + MAX8952_RAMP_16mV_us, + MAX8952_RAMP_8mV_us, + MAX8952_RAMP_4mV_us, + MAX8952_RAMP_2mV_us, + MAX8952_RAMP_1mV_us, + MAX8952_RAMP_0_5mV_us, + MAX8952_RAMP_0_25mV_us, +}; + +#define MAX8952_NUM_DVS_MODE 4 + +struct max8952_platform_data { + int gpio_vid0; + int gpio_vid1; + int gpio_en; + + u8 default_mode; + u8 dvs_mode[MAX8952_NUM_DVS_MODE]; /* MAX8952_DVS_MODEx_XXXXmV */ + + u8 sync_freq; + u8 ramp_speed; + + struct regulator_init_data reg_data; +}; + + +#endif /* REGULATOR_MAX8952 */ -- cgit v1.2.3 From 5976f0959d5251ae5b4db848eaa2f42a19e98652 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 16 Sep 2010 16:48:49 +0800 Subject: Regulator: LP3972 PMIC regulator driver This patch adds regulator drivers for National Semiconductors LP3972 PMIC. This LP3972 PMIC controller has 3 DC/DC voltage converters and 5 low drop-out (LDO) regulators. LP3972 PMIC controller uses I2C interface. Signed-off-by: Axel Lin Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/Kconfig | 7 + drivers/regulator/Makefile | 1 + drivers/regulator/lp3972.c | 649 +++++++++++++++++++++++++++++++++++++++ include/linux/regulator/lp3972.h | 48 +++ 4 files changed, 705 insertions(+) create mode 100644 drivers/regulator/lp3972.c create mode 100644 include/linux/regulator/lp3972.h (limited to 'include') diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 4889caabc632..dd30e883d4a7 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -172,6 +172,13 @@ config REGULATOR_LP3971 Say Y here to support the voltage regulators and convertors on National Semiconductors LP3971 PMIC +config REGULATOR_LP3972 + tristate "National Semiconductors LP3972 PMIC regulator driver" + depends on I2C + help + Say Y here to support the voltage regulators and convertors + on National Semiconductors LP3972 PMIC + config REGULATOR_PCAP tristate "PCAP2 regulator driver" depends on EZX_PCAP diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index beff6da6eff6..6cae6419b8b1 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_REGULATOR_AD5398) += ad5398.o obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o obj-$(CONFIG_REGULATOR_DUMMY) += dummy.o obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o +obj-$(CONFIG_REGULATOR_LP3972) += lp3972.o obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o obj-$(CONFIG_REGULATOR_MAX8649) += max8649.o diff --git a/drivers/regulator/lp3972.c b/drivers/regulator/lp3972.c new file mode 100644 index 000000000000..0d9b47569c1b --- /dev/null +++ b/drivers/regulator/lp3972.c @@ -0,0 +1,649 @@ +/* + * Regulator driver for National Semiconductors LP3972 PMIC chip + * + * Based on lp3971.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct lp3972 { + struct device *dev; + struct mutex io_lock; + struct i2c_client *i2c; + int num_regulators; + struct regulator_dev **rdev; +}; + +/* LP3972 Control Registers */ +#define LP3972_SCR_REG 0x07 +#define LP3972_OVER1_REG 0x10 +#define LP3972_OVSR1_REG 0x11 +#define LP3972_OVER2_REG 0x12 +#define LP3972_OVSR2_REG 0x13 +#define LP3972_VCC1_REG 0x20 +#define LP3972_ADTV1_REG 0x23 +#define LP3972_ADTV2_REG 0x24 +#define LP3972_AVRC_REG 0x25 +#define LP3972_CDTC1_REG 0x26 +#define LP3972_CDTC2_REG 0x27 +#define LP3972_SDTV1_REG 0x29 +#define LP3972_SDTV2_REG 0x2A +#define LP3972_MDTV1_REG 0x32 +#define LP3972_MDTV2_REG 0x33 +#define LP3972_L2VCR_REG 0x39 +#define LP3972_L34VCR_REG 0x3A +#define LP3972_SCR1_REG 0x80 +#define LP3972_SCR2_REG 0x81 +#define LP3972_OEN3_REG 0x82 +#define LP3972_OSR3_REG 0x83 +#define LP3972_LOER4_REG 0x84 +#define LP3972_B2TV_REG 0x85 +#define LP3972_B3TV_REG 0x86 +#define LP3972_B32RC_REG 0x87 +#define LP3972_ISRA_REG 0x88 +#define LP3972_BCCR_REG 0x89 +#define LP3972_II1RR_REG 0x8E +#define LP3972_II2RR_REG 0x8F + +#define LP3972_SYS_CONTROL1_REG LP3972_SCR1_REG +/* System control register 1 initial value, + * bits 5, 6 and 7 are EPROM programmable */ +#define SYS_CONTROL1_INIT_VAL 0x02 +#define SYS_CONTROL1_INIT_MASK 0x1F + +#define LP3972_VOL_CHANGE_REG LP3972_VCC1_REG +#define LP3972_VOL_CHANGE_FLAG_GO 0x01 +#define LP3972_VOL_CHANGE_FLAG_MASK 0x03 + +/* LDO output enable mask */ +#define LP3972_OEN3_L1EN BIT(0) +#define LP3972_OVER2_LDO2_EN BIT(2) +#define LP3972_OVER2_LDO3_EN BIT(3) +#define LP3972_OVER2_LDO4_EN BIT(4) +#define LP3972_OVER1_S_EN BIT(2) + +static const int ldo1_voltage_map[] = { + 1700, 1725, 1750, 1775, 1800, 1825, 1850, 1875, + 1900, 1925, 1950, 1975, 2000, +}; + +static const int ldo23_voltage_map[] = { + 1800, 1900, 2000, 2100, 2200, 2300, 2400, 2500, + 2600, 2700, 2800, 2900, 3000, 3100, 3200, 3300, +}; + +static const int ldo4_voltage_map[] = { + 1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350, + 1400, 1500, 1800, 1900, 2500, 2800, 3000, 3300, +}; + +static const int ldo5_voltage_map[] = { + 0, 0, 0, 0, 0, 850, 875, 900, + 925, 950, 975, 1000, 1025, 1050, 1075, 1100, + 1125, 1150, 1175, 1200, 1225, 1250, 1275, 1300, + 1325, 1350, 1375, 1400, 1425, 1450, 1475, 1500, +}; + +static const int buck1_voltage_map[] = { + 725, 750, 775, 800, 825, 850, 875, 900, + 925, 950, 975, 1000, 1025, 1050, 1075, 1100, + 1125, 1150, 1175, 1200, 1225, 1250, 1275, 1300, + 1325, 1350, 1375, 1400, 1425, 1450, 1475, 1500, +}; + +static const int buck23_voltage_map[] = { + 0, 800, 850, 900, 950, 1000, 1050, 1100, + 1150, 1200, 1250, 1300, 1350, 1400, 1450, 1500, + 1550, 1600, 1650, 1700, 1800, 1900, 2500, 2800, + 3000, 3300, +}; + +static const int *ldo_voltage_map[] = { + ldo1_voltage_map, + ldo23_voltage_map, + ldo23_voltage_map, + ldo4_voltage_map, + ldo5_voltage_map, +}; + +static const int *buck_voltage_map[] = { + buck1_voltage_map, + buck23_voltage_map, + buck23_voltage_map, +}; + +static const int ldo_output_enable_mask[] = { + LP3972_OEN3_L1EN, + LP3972_OVER2_LDO2_EN, + LP3972_OVER2_LDO3_EN, + LP3972_OVER2_LDO4_EN, + LP3972_OVER1_S_EN, +}; + +static const int ldo_output_enable_addr[] = { + LP3972_OEN3_REG, + LP3972_OVER2_REG, + LP3972_OVER2_REG, + LP3972_OVER2_REG, + LP3972_OVER1_REG, +}; + +static const int ldo_vol_ctl_addr[] = { + LP3972_MDTV1_REG, + LP3972_L2VCR_REG, + LP3972_L34VCR_REG, + LP3972_L34VCR_REG, + LP3972_SDTV1_REG, +}; + +static const int buck_vol_enable_addr[] = { + LP3972_OVER1_REG, + LP3972_OEN3_REG, + LP3972_OEN3_REG, +}; + +static const int buck_base_addr[] = { + LP3972_ADTV1_REG, + LP3972_B2TV_REG, + LP3972_B3TV_REG, +}; + +#define LP3972_LDO_VOL_VALUE_MAP(x) (ldo_voltage_map[x]) +#define LP3972_LDO_OUTPUT_ENABLE_MASK(x) (ldo_output_enable_mask[x]) +#define LP3972_LDO_OUTPUT_ENABLE_REG(x) (ldo_output_enable_addr[x]) + +/* LDO voltage control registers shift: + LP3972_LDO1 -> 0, LP3972_LDO2 -> 4 + LP3972_LDO3 -> 0, LP3972_LDO4 -> 4 + LP3972_LDO5 -> 0 +*/ +#define LP3972_LDO_VOL_CONTR_SHIFT(x) (((x) & 1) << 2) +#define LP3972_LDO_VOL_CONTR_REG(x) (ldo_vol_ctl_addr[x]) +#define LP3972_LDO_VOL_CHANGE_SHIFT(x) ((x) ? 4 : 6) + +#define LP3972_LDO_VOL_MASK(x) (((x) % 4) ? 0x0f : 0x1f) +#define LP3972_LDO_VOL_MIN_IDX(x) (((x) == 4) ? 0x05 : 0x00) +#define LP3972_LDO_VOL_MAX_IDX(x) ((x) ? (((x) == 4) ? 0x1f : 0x0f) : 0x0c) + +#define LP3972_BUCK_VOL_VALUE_MAP(x) (buck_voltage_map[x]) +#define LP3972_BUCK_VOL_ENABLE_REG(x) (buck_vol_enable_addr[x]) +#define LP3972_BUCK_VOL1_REG(x) (buck_base_addr[x]) +#define LP3972_BUCK_VOL_MASK 0x1f +#define LP3972_BUCK_VOL_MIN_IDX(x) ((x) ? 0x01 : 0x00) +#define LP3972_BUCK_VOL_MAX_IDX(x) ((x) ? 0x19 : 0x1f) + +static int lp3972_i2c_read(struct i2c_client *i2c, char reg, int count, + u16 *dest) +{ + int ret; + + if (count != 1) + return -EIO; + ret = i2c_smbus_read_byte_data(i2c, reg); + if (ret < 0) + return -EIO; + + *dest = ret; + return 0; +} + +static int lp3972_i2c_write(struct i2c_client *i2c, char reg, int count, + const u16 *src) +{ + if (count != 1) + return -EIO; + return i2c_smbus_write_byte_data(i2c, reg, *src); +} + +static u8 lp3972_reg_read(struct lp3972 *lp3972, u8 reg) +{ + u16 val = 0; + + mutex_lock(&lp3972->io_lock); + + lp3972_i2c_read(lp3972->i2c, reg, 1, &val); + + dev_dbg(lp3972->dev, "reg read 0x%02x -> 0x%02x\n", (int)reg, + (unsigned)val&0xff); + + mutex_unlock(&lp3972->io_lock); + + return val & 0xff; +} + +static int lp3972_set_bits(struct lp3972 *lp3972, u8 reg, u16 mask, u16 val) +{ + u16 tmp; + int ret; + + mutex_lock(&lp3972->io_lock); + + ret = lp3972_i2c_read(lp3972->i2c, reg, 1, &tmp); + tmp = (tmp & ~mask) | val; + if (ret == 0) { + ret = lp3972_i2c_write(lp3972->i2c, reg, 1, &tmp); + dev_dbg(lp3972->dev, "reg write 0x%02x -> 0x%02x\n", (int)reg, + (unsigned)val&0xff); + } + mutex_unlock(&lp3972->io_lock); + + return ret; +} + +static int lp3972_ldo_list_voltage(struct regulator_dev *dev, unsigned index) +{ + int ldo = rdev_get_id(dev) - LP3972_LDO1; + return 1000 * LP3972_LDO_VOL_VALUE_MAP(ldo)[index]; +} + +static int lp3972_ldo_is_enabled(struct regulator_dev *dev) +{ + struct lp3972 *lp3972 = rdev_get_drvdata(dev); + int ldo = rdev_get_id(dev) - LP3972_LDO1; + u16 mask = LP3972_LDO_OUTPUT_ENABLE_MASK(ldo); + u16 val; + + val = lp3972_reg_read(lp3972, LP3972_LDO_OUTPUT_ENABLE_REG(ldo)); + return !!(val & mask); +} + +static int lp3972_ldo_enable(struct regulator_dev *dev) +{ + struct lp3972 *lp3972 = rdev_get_drvdata(dev); + int ldo = rdev_get_id(dev) - LP3972_LDO1; + u16 mask = LP3972_LDO_OUTPUT_ENABLE_MASK(ldo); + + return lp3972_set_bits(lp3972, LP3972_LDO_OUTPUT_ENABLE_REG(ldo), + mask, mask); +} + +static int lp3972_ldo_disable(struct regulator_dev *dev) +{ + struct lp3972 *lp3972 = rdev_get_drvdata(dev); + int ldo = rdev_get_id(dev) - LP3972_LDO1; + u16 mask = LP3972_LDO_OUTPUT_ENABLE_MASK(ldo); + + return lp3972_set_bits(lp3972, LP3972_LDO_OUTPUT_ENABLE_REG(ldo), + mask, 0); +} + +static int lp3972_ldo_get_voltage(struct regulator_dev *dev) +{ + struct lp3972 *lp3972 = rdev_get_drvdata(dev); + int ldo = rdev_get_id(dev) - LP3972_LDO1; + u16 mask = LP3972_LDO_VOL_MASK(ldo); + u16 val, reg; + + reg = lp3972_reg_read(lp3972, LP3972_LDO_VOL_CONTR_REG(ldo)); + val = (reg >> LP3972_LDO_VOL_CONTR_SHIFT(ldo)) & mask; + + return 1000 * LP3972_LDO_VOL_VALUE_MAP(ldo)[val]; +} + +static int lp3972_ldo_set_voltage(struct regulator_dev *dev, + int min_uV, int max_uV) +{ + struct lp3972 *lp3972 = rdev_get_drvdata(dev); + int ldo = rdev_get_id(dev) - LP3972_LDO1; + int min_vol = min_uV / 1000, max_vol = max_uV / 1000; + const int *vol_map = LP3972_LDO_VOL_VALUE_MAP(ldo); + u16 val; + int shift, ret; + + if (min_vol < vol_map[LP3972_LDO_VOL_MIN_IDX(ldo)] || + min_vol > vol_map[LP3972_LDO_VOL_MAX_IDX(ldo)]) + return -EINVAL; + + for (val = LP3972_LDO_VOL_MIN_IDX(ldo); + val <= LP3972_LDO_VOL_MAX_IDX(ldo); val++) + if (vol_map[val] >= min_vol) + break; + + if (val > LP3972_LDO_VOL_MAX_IDX(ldo) || vol_map[val] > max_vol) + return -EINVAL; + + shift = LP3972_LDO_VOL_CONTR_SHIFT(ldo); + ret = lp3972_set_bits(lp3972, LP3972_LDO_VOL_CONTR_REG(ldo), + LP3972_LDO_VOL_MASK(ldo) << shift, val << shift); + + if (ret) + return ret; + + switch (ldo) { + case LP3972_LDO1: + case LP3972_LDO5: + shift = LP3972_LDO_VOL_CHANGE_SHIFT(ldo); + ret = lp3972_set_bits(lp3972, LP3972_VOL_CHANGE_REG, + LP3972_VOL_CHANGE_FLAG_MASK << shift, + LP3972_VOL_CHANGE_FLAG_GO << shift); + if (ret) + return ret; + + ret = lp3972_set_bits(lp3972, LP3972_VOL_CHANGE_REG, + LP3972_VOL_CHANGE_FLAG_MASK << shift, 0); + break; + } + + return ret; +} + +static struct regulator_ops lp3972_ldo_ops = { + .list_voltage = lp3972_ldo_list_voltage, + .is_enabled = lp3972_ldo_is_enabled, + .enable = lp3972_ldo_enable, + .disable = lp3972_ldo_disable, + .get_voltage = lp3972_ldo_get_voltage, + .set_voltage = lp3972_ldo_set_voltage, +}; + +static int lp3972_dcdc_list_voltage(struct regulator_dev *dev, unsigned index) +{ + int buck = rdev_get_id(dev) - LP3972_DCDC1; + return 1000 * buck_voltage_map[buck][index]; +} + +static int lp3972_dcdc_is_enabled(struct regulator_dev *dev) +{ + struct lp3972 *lp3972 = rdev_get_drvdata(dev); + int buck = rdev_get_id(dev) - LP3972_DCDC1; + u16 mask = 1 << (buck * 2); + u16 val; + + val = lp3972_reg_read(lp3972, LP3972_BUCK_VOL_ENABLE_REG(buck)); + return !!(val & mask); +} + +static int lp3972_dcdc_enable(struct regulator_dev *dev) +{ + struct lp3972 *lp3972 = rdev_get_drvdata(dev); + int buck = rdev_get_id(dev) - LP3972_DCDC1; + u16 mask = 1 << (buck * 2); + u16 val; + + val = lp3972_set_bits(lp3972, LP3972_BUCK_VOL_ENABLE_REG(buck), + mask, mask); + return val; +} + +static int lp3972_dcdc_disable(struct regulator_dev *dev) +{ + struct lp3972 *lp3972 = rdev_get_drvdata(dev); + int buck = rdev_get_id(dev) - LP3972_DCDC1; + u16 mask = 1 << (buck * 2); + u16 val; + + val = lp3972_set_bits(lp3972, LP3972_BUCK_VOL_ENABLE_REG(buck), + mask, 0); + return val; +} + +static int lp3972_dcdc_get_voltage(struct regulator_dev *dev) +{ + struct lp3972 *lp3972 = rdev_get_drvdata(dev); + int buck = rdev_get_id(dev) - LP3972_DCDC1; + u16 reg; + int val; + + reg = lp3972_reg_read(lp3972, LP3972_BUCK_VOL1_REG(buck)); + reg &= LP3972_BUCK_VOL_MASK; + if (reg <= LP3972_BUCK_VOL_MAX_IDX(buck)) + val = 1000 * buck_voltage_map[buck][reg]; + else { + val = 0; + dev_warn(&dev->dev, "chip reported incorrect voltage value.\n"); + } + + return val; +} + +static int lp3972_dcdc_set_voltage(struct regulator_dev *dev, + int min_uV, int max_uV) +{ + struct lp3972 *lp3972 = rdev_get_drvdata(dev); + int buck = rdev_get_id(dev) - LP3972_DCDC1; + int min_vol = min_uV / 1000, max_vol = max_uV / 1000; + const int *vol_map = buck_voltage_map[buck]; + u16 val; + int ret; + + if (min_vol < vol_map[LP3972_BUCK_VOL_MIN_IDX(buck)] || + min_vol > vol_map[LP3972_BUCK_VOL_MAX_IDX(buck)]) + return -EINVAL; + + for (val = LP3972_BUCK_VOL_MIN_IDX(buck); + val <= LP3972_BUCK_VOL_MAX_IDX(buck); val++) + if (vol_map[val] >= min_vol) + break; + + if (val > LP3972_BUCK_VOL_MAX_IDX(buck) || + vol_map[val] > max_vol) + return -EINVAL; + + ret = lp3972_set_bits(lp3972, LP3972_BUCK_VOL1_REG(buck), + LP3972_BUCK_VOL_MASK, val); + if (ret) + return ret; + + if (buck != 0) + return ret; + + ret = lp3972_set_bits(lp3972, LP3972_VOL_CHANGE_REG, + LP3972_VOL_CHANGE_FLAG_MASK, LP3972_VOL_CHANGE_FLAG_GO); + if (ret) + return ret; + + return lp3972_set_bits(lp3972, LP3972_VOL_CHANGE_REG, + LP3972_VOL_CHANGE_FLAG_MASK, 0); +} + +static struct regulator_ops lp3972_dcdc_ops = { + .list_voltage = lp3972_dcdc_list_voltage, + .is_enabled = lp3972_dcdc_is_enabled, + .enable = lp3972_dcdc_enable, + .disable = lp3972_dcdc_disable, + .get_voltage = lp3972_dcdc_get_voltage, + .set_voltage = lp3972_dcdc_set_voltage, +}; + +static struct regulator_desc regulators[] = { + { + .name = "LDO1", + .id = LP3972_LDO1, + .ops = &lp3972_ldo_ops, + .n_voltages = ARRAY_SIZE(ldo1_voltage_map), + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + { + .name = "LDO2", + .id = LP3972_LDO2, + .ops = &lp3972_ldo_ops, + .n_voltages = ARRAY_SIZE(ldo23_voltage_map), + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + { + .name = "LDO3", + .id = LP3972_LDO3, + .ops = &lp3972_ldo_ops, + .n_voltages = ARRAY_SIZE(ldo23_voltage_map), + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + { + .name = "LDO4", + .id = LP3972_LDO4, + .ops = &lp3972_ldo_ops, + .n_voltages = ARRAY_SIZE(ldo4_voltage_map), + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + { + .name = "LDO5", + .id = LP3972_LDO5, + .ops = &lp3972_ldo_ops, + .n_voltages = ARRAY_SIZE(ldo5_voltage_map), + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + { + .name = "DCDC1", + .id = LP3972_DCDC1, + .ops = &lp3972_dcdc_ops, + .n_voltages = ARRAY_SIZE(buck1_voltage_map), + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + { + .name = "DCDC2", + .id = LP3972_DCDC2, + .ops = &lp3972_dcdc_ops, + .n_voltages = ARRAY_SIZE(buck23_voltage_map), + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, + { + .name = "DCDC3", + .id = LP3972_DCDC3, + .ops = &lp3972_dcdc_ops, + .n_voltages = ARRAY_SIZE(buck23_voltage_map), + .type = REGULATOR_VOLTAGE, + .owner = THIS_MODULE, + }, +}; + +static int setup_regulators(struct lp3972 *lp3972, + struct lp3972_platform_data *pdata) +{ + int i, err; + + lp3972->num_regulators = pdata->num_regulators; + lp3972->rdev = kcalloc(pdata->num_regulators, + sizeof(struct regulator_dev *), GFP_KERNEL); + if (!lp3972->rdev) { + err = -ENOMEM; + goto err_nomem; + } + + /* Instantiate the regulators */ + for (i = 0; i < pdata->num_regulators; i++) { + struct lp3972_regulator_subdev *reg = &pdata->regulators[i]; + lp3972->rdev[i] = regulator_register(®ulators[reg->id], + lp3972->dev, reg->initdata, lp3972); + + if (IS_ERR(lp3972->rdev[i])) { + err = PTR_ERR(lp3972->rdev[i]); + dev_err(lp3972->dev, "regulator init failed: %d\n", + err); + goto error; + } + } + + return 0; +error: + while (--i >= 0) + regulator_unregister(lp3972->rdev[i]); + kfree(lp3972->rdev); + lp3972->rdev = NULL; +err_nomem: + return err; +} + +static int __devinit lp3972_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct lp3972 *lp3972; + struct lp3972_platform_data *pdata = i2c->dev.platform_data; + int ret; + u16 val; + + if (!pdata) { + dev_dbg(&i2c->dev, "No platform init data supplied\n"); + return -ENODEV; + } + + lp3972 = kzalloc(sizeof(struct lp3972), GFP_KERNEL); + if (!lp3972) + return -ENOMEM; + + lp3972->i2c = i2c; + lp3972->dev = &i2c->dev; + + mutex_init(&lp3972->io_lock); + + /* Detect LP3972 */ + ret = lp3972_i2c_read(i2c, LP3972_SYS_CONTROL1_REG, 1, &val); + if (ret == 0 && (val & SYS_CONTROL1_INIT_MASK) != SYS_CONTROL1_INIT_VAL) + ret = -ENODEV; + if (ret < 0) { + dev_err(&i2c->dev, "failed to detect device\n"); + goto err_detect; + } + + ret = setup_regulators(lp3972, pdata); + if (ret < 0) + goto err_detect; + + i2c_set_clientdata(i2c, lp3972); + return 0; + +err_detect: + kfree(lp3972); + return ret; +} + +static int __devexit lp3972_i2c_remove(struct i2c_client *i2c) +{ + struct lp3972 *lp3972 = i2c_get_clientdata(i2c); + int i; + + for (i = 0; i < lp3972->num_regulators; i++) + regulator_unregister(lp3972->rdev[i]); + kfree(lp3972->rdev); + kfree(lp3972); + + return 0; +} + +static const struct i2c_device_id lp3972_i2c_id[] = { + { "lp3972", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lp3972_i2c_id); + +static struct i2c_driver lp3972_i2c_driver = { + .driver = { + .name = "lp3972", + .owner = THIS_MODULE, + }, + .probe = lp3972_i2c_probe, + .remove = __devexit_p(lp3972_i2c_remove), + .id_table = lp3972_i2c_id, +}; + +static int __init lp3972_module_init(void) +{ + return i2c_add_driver(&lp3972_i2c_driver); +} +subsys_initcall(lp3972_module_init); + +static void __exit lp3972_module_exit(void) +{ + i2c_del_driver(&lp3972_i2c_driver); +} +module_exit(lp3972_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Axel Lin "); +MODULE_DESCRIPTION("LP3972 PMIC driver"); diff --git a/include/linux/regulator/lp3972.h b/include/linux/regulator/lp3972.h new file mode 100644 index 000000000000..9bb7389b7a1e --- /dev/null +++ b/include/linux/regulator/lp3972.h @@ -0,0 +1,48 @@ +/* + * National Semiconductors LP3972 PMIC chip client interface + * + * Based on lp3971.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __LINUX_REGULATOR_LP3972_H +#define __LINUX_REGULATOR_LP3972_H + +#include + +#define LP3972_LDO1 0 +#define LP3972_LDO2 1 +#define LP3972_LDO3 2 +#define LP3972_LDO4 3 +#define LP3972_LDO5 4 + +#define LP3972_DCDC1 5 +#define LP3972_DCDC2 6 +#define LP3972_DCDC3 7 + +#define LP3972_NUM_REGULATORS 8 + +struct lp3972_regulator_subdev { + int id; + struct regulator_init_data *initdata; +}; + +struct lp3972_platform_data { + int num_regulators; + struct lp3972_regulator_subdev *regulators; +}; + +#endif -- cgit v1.2.3 From 688fe99a439f7c9dfcc52fbf7cb347f140a2dc8b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Oct 2010 19:18:32 -0700 Subject: regulator: Add option for machine drivers to enable the dummy regulator Allow machine drivers to explicitly enable the use of the dummy regulator, enabling simpler support for systems with only a few specific supplies visible to software. It is strongly recommended that this is not used on systems with substantial software control over their PMICs, for maximum functionality constrints should be as fully specified as possible. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/Makefile | 3 +-- drivers/regulator/core.c | 22 ++++++++++++++++++++++ include/linux/regulator/machine.h | 5 +++++ 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 6cae6419b8b1..bff815736780 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -3,14 +3,13 @@ # -obj-$(CONFIG_REGULATOR) += core.o +obj-$(CONFIG_REGULATOR) += core.o dummy.o obj-$(CONFIG_REGULATOR_FIXED_VOLTAGE) += fixed.o obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o obj-$(CONFIG_REGULATOR_USERSPACE_CONSUMER) += userspace-consumer.o obj-$(CONFIG_REGULATOR_AD5398) += ad5398.o obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o -obj-$(CONFIG_REGULATOR_DUMMY) += dummy.o obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o obj-$(CONFIG_REGULATOR_LP3972) += lp3972.o obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index cc8b337b9119..4fa08c85f3ce 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -33,6 +33,7 @@ static DEFINE_MUTEX(regulator_list_mutex); static LIST_HEAD(regulator_list); static LIST_HEAD(regulator_map_list); static int has_full_constraints; +static bool board_wants_dummy_regulator; /* * struct regulator_map @@ -1108,6 +1109,11 @@ static struct regulator *_regulator_get(struct device *dev, const char *id, } } + if (board_wants_dummy_regulator) { + rdev = dummy_regulator_rdev; + goto found; + } + #ifdef CONFIG_REGULATOR_DUMMY if (!devname) devname = "deviceless"; @@ -2462,6 +2468,22 @@ void regulator_has_full_constraints(void) } EXPORT_SYMBOL_GPL(regulator_has_full_constraints); +/** + * regulator_use_dummy_regulator - Provide a dummy regulator when none is found + * + * Calling this function will cause the regulator API to provide a + * dummy regulator to consumers if no physical regulator is found, + * allowing most consumers to proceed as though a regulator were + * configured. This allows systems such as those with software + * controllable regulators for the CPU core only to be brought up more + * readily. + */ +void regulator_use_dummy_regulator(void) +{ + board_wants_dummy_regulator = true; +} +EXPORT_SYMBOL_GPL(regulator_use_dummy_regulator); + /** * rdev_get_drvdata - get rdev regulator driver data * @rdev: regulator diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index e2980287245e..761c745b9c24 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -189,10 +189,15 @@ int regulator_suspend_prepare(suspend_state_t state); #ifdef CONFIG_REGULATOR void regulator_has_full_constraints(void); +void regulator_use_dummy_regulator(void); #else static inline void regulator_has_full_constraints(void) { } + +static inline void regulator_use_dummy_regulator(void) +{ +} #endif #endif -- cgit v1.2.3 From f337134ff0cfe60fb1e347bc45b8e7190ef90a82 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 17 Aug 2010 13:13:36 +0100 Subject: mfd: Move PCF50633 IRQ protoypes where the definitions can see them Fixed warnings about unprototyped global functions. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/pcf50633-core.c | 7 ------- include/linux/mfd/pcf50633/core.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c index 23e585527285..6d4233f0d0d3 100644 --- a/drivers/mfd/pcf50633-core.c +++ b/drivers/mfd/pcf50633-core.c @@ -25,13 +25,6 @@ #include -int pcf50633_irq_init(struct pcf50633 *pcf, int irq); -void pcf50633_irq_free(struct pcf50633 *pcf); -#ifdef CONFIG_PM -int pcf50633_irq_suspend(struct pcf50633 *pcf); -int pcf50633_irq_resume(struct pcf50633 *pcf); -#endif - static int __pcf50633_read(struct pcf50633 *pcf, u8 reg, int num, u8 *data) { int ret; diff --git a/include/linux/mfd/pcf50633/core.h b/include/linux/mfd/pcf50633/core.h index ad411a78870c..50d4a047118d 100644 --- a/include/linux/mfd/pcf50633/core.h +++ b/include/linux/mfd/pcf50633/core.h @@ -227,4 +227,11 @@ static inline struct pcf50633 *dev_to_pcf50633(struct device *dev) return dev_get_drvdata(dev); } +int pcf50633_irq_init(struct pcf50633 *pcf, int irq); +void pcf50633_irq_free(struct pcf50633 *pcf); +#ifdef CONFIG_PM +int pcf50633_irq_suspend(struct pcf50633 *pcf); +int pcf50633_irq_resume(struct pcf50633 *pcf); +#endif + #endif -- cgit v1.2.3 From b8e9cf0b28173fc25dae9f3ac44de6fc4e9fc385 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 16 Aug 2010 17:14:44 +0200 Subject: gpio: Add bitmask to block requests to unavailable stmpe GPIOs GPIOs on these controller are multi-functional. If you decided to use some of them e.g. as input channels for the ADC, you surely don't want those pins to be reassigned as simple GPIOs (which may be triggered even from userspace via 'export'). Same for the touchscreen controller pins. Since knowledge about the hardware is needed to decide which GPIOs to reserve, let this bitmask be inside platform_data and provide some defines to assist potential users. Signed-off-by: Wolfram Sang Acked-by: Rabin Vincent Cc: Linus Walleij Signed-off-by: Samuel Ortiz --- drivers/gpio/stmpe-gpio.c | 5 +++++ include/linux/mfd/stmpe.h | 6 ++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/drivers/gpio/stmpe-gpio.c b/drivers/gpio/stmpe-gpio.c index 4e1f1b9d5e67..65b996083918 100644 --- a/drivers/gpio/stmpe-gpio.c +++ b/drivers/gpio/stmpe-gpio.c @@ -30,6 +30,7 @@ struct stmpe_gpio { struct mutex irq_lock; int irq_base; + unsigned norequest_mask; /* Caches of interrupt control registers for bus_lock */ u8 regs[CACHE_NR_REGS][CACHE_NR_BANKS]; @@ -103,6 +104,9 @@ static int stmpe_gpio_request(struct gpio_chip *chip, unsigned offset) struct stmpe_gpio *stmpe_gpio = to_stmpe_gpio(chip); struct stmpe *stmpe = stmpe_gpio->stmpe; + if (stmpe_gpio->norequest_mask & (1 << offset)) + return -EINVAL; + return stmpe_set_altfunc(stmpe, 1 << offset, STMPE_BLOCK_GPIO); } @@ -302,6 +306,7 @@ static int __devinit stmpe_gpio_probe(struct platform_device *pdev) stmpe_gpio->dev = &pdev->dev; stmpe_gpio->stmpe = stmpe; + stmpe_gpio->norequest_mask = pdata ? pdata->norequest_mask : 0; stmpe_gpio->chip = template_chip; stmpe_gpio->chip.ngpio = stmpe->num_gpios; diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index 39ca7588659b..e762c270d8d4 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -112,13 +112,19 @@ struct stmpe_keypad_platform_data { bool no_autorepeat; }; +#define STMPE_GPIO_NOREQ_811_TOUCH (0xf0) + /** * struct stmpe_gpio_platform_data - STMPE GPIO platform data * @gpio_base: first gpio number assigned. A maximum of * %STMPE_NR_GPIOS GPIOs will be allocated. + * @norequest_mask: bitmask specifying which GPIOs should _not_ be + * requestable due to different usage (e.g. touch, keypad) + * STMPE_GPIO_NOREQ_* macros can be used here. */ struct stmpe_gpio_platform_data { int gpio_base; + unsigned norequest_mask; void (*setup)(struct stmpe *stmpe, unsigned gpio_base); void (*remove)(struct stmpe *stmpe, unsigned gpio_base); }; -- cgit v1.2.3 From 89712059c09ff12f1e60e444d05d2ca257dd00ef Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 10 Sep 2010 17:10:21 +0200 Subject: i2c: twl: add register defines for pm master module Some modules already need to talk to at least PROTECT_KEY register, while at that, add defines to the entire register space. Signed-off-by: Felipe Balbi Signed-off-by: Samuel Ortiz --- include/linux/i2c/twl.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include') diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index 4793d8a7f480..53089516c17a 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -357,6 +357,52 @@ int twl6030_interrupt_mask(u8 bit_mask, u8 offset); /*----------------------------------------------------------------------*/ +/* + * PM Master module register offsets (use TWL4030_MODULE_PM_MASTER) + */ + +#define TWL4030_PM_MASTER_CFG_P1_TRANSITION 0x00 +#define TWL4030_PM_MASTER_CFG_P2_TRANSITION 0x01 +#define TWL4030_PM_MASTER_CFG_P3_TRANSITION 0x02 +#define TWL4030_PM_MASTER_CFG_P123_TRANSITION 0x03 +#define TWL4030_PM_MASTER_STS_BOOT 0x04 +#define TWL4030_PM_MASTER_CFG_BOOT 0x05 +#define TWL4030_PM_MASTER_SHUNDAN 0x06 +#define TWL4030_PM_MASTER_BOOT_BCI 0x07 +#define TWL4030_PM_MASTER_CFG_PWRANA1 0x08 +#define TWL4030_PM_MASTER_CFG_PWRANA2 0x09 +#define TWL4030_PM_MASTER_BACKUP_MISC_STS 0x0b +#define TWL4030_PM_MASTER_BACKUP_MISC_CFG 0x0c +#define TWL4030_PM_MASTER_BACKUP_MISC_TST 0x0d +#define TWL4030_PM_MASTER_PROTECT_KEY 0x0e +#define TWL4030_PM_MASTER_STS_HW_CONDITIONS 0x0f +#define TWL4030_PM_MASTER_P1_SW_EVENTS 0x10 +#define TWL4030_PM_MASTER_P2_SW_EVENTS 0x11 +#define TWL4030_PM_MASTER_P3_SW_EVENTS 0x12 +#define TWL4030_PM_MASTER_STS_P123_STATE 0x13 +#define TWL4030_PM_MASTER_PB_CFG 0x14 +#define TWL4030_PM_MASTER_PB_WORD_MSB 0x15 +#define TWL4030_PM_MASTER_PB_WORD_LSB 0x16 +#define TWL4030_PM_MASTER_SEQ_ADD_W2P 0x1c +#define TWL4030_PM_MASTER_SEQ_ADD_P2A 0x1d +#define TWL4030_PM_MASTER_SEQ_ADD_A2W 0x1e +#define TWL4030_PM_MASTER_SEQ_ADD_A2S 0x1f +#define TWL4030_PM_MASTER_SEQ_ADD_S2A12 0x20 +#define TWL4030_PM_MASTER_SEQ_ADD_S2A3 0x21 +#define TWL4030_PM_MASTER_SEQ_ADD_WARM 0x22 +#define TWL4030_PM_MASTER_MEMORY_ADDRESS 0x23 +#define TWL4030_PM_MASTER_MEMORY_DATA 0x24 + +#define TWL4030_PM_MASTER_KEY_CFG1 0xc0 +#define TWL4030_PM_MASTER_KEY_CFG2 0x0c + +#define TWL4030_PM_MASTER_KEY_TST1 0xe0 +#define TWL4030_PM_MASTER_KEY_TST2 0x0e + +#define TWL4030_PM_MASTER_GLOBAL_TST 0xb6 + +/*----------------------------------------------------------------------*/ + /* Power bus message definitions */ /* The TWL4030/5030 splits its power-management resources (the various -- cgit v1.2.3 From 676e02d7a2ed9bb02994670a07df533a29a99de6 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Fri, 6 Aug 2010 11:28:06 +0900 Subject: mfd: Use i2c_client as an argument on MAX8998 i2c routines The MAX8998 chip have regulator and rtc features. The i2c slave address of regulator and rtc is different, so needs each i2c client on i2c operation functions. Also, this patch exports i2c operation functions instead of callback to make easy to read. Signed-off-by: Joonyoung Shim Signed-off-by: Kyungmin Park Signed-off-by: Samuel Ortiz --- drivers/mfd/max8998.c | 29 ++++++++++++++--------------- drivers/regulator/max8998.c | 17 +++++++++++------ include/linux/mfd/max8998-private.h | 30 +++++------------------------- 3 files changed, 30 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c index f1c928915880..3b8a5076d351 100644 --- a/drivers/mfd/max8998.c +++ b/drivers/mfd/max8998.c @@ -36,13 +36,13 @@ static struct mfd_cell max8998_devs[] = { } }; -static int max8998_i2c_device_read(struct max8998_dev *max8998, u8 reg, u8 *dest) +int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest) { - struct i2c_client *client = max8998->i2c_client; + struct max8998_dev *max8998 = i2c_get_clientdata(i2c); int ret; mutex_lock(&max8998->iolock); - ret = i2c_smbus_read_byte_data(client, reg); + ret = i2c_smbus_read_byte_data(i2c, reg); mutex_unlock(&max8998->iolock); if (ret < 0) return ret; @@ -51,36 +51,38 @@ static int max8998_i2c_device_read(struct max8998_dev *max8998, u8 reg, u8 *dest *dest = ret; return 0; } +EXPORT_SYMBOL(max8998_read_reg); -static int max8998_i2c_device_write(struct max8998_dev *max8998, u8 reg, u8 value) +int max8998_write_reg(struct i2c_client *i2c, u8 reg, u8 value) { - struct i2c_client *client = max8998->i2c_client; + struct max8998_dev *max8998 = i2c_get_clientdata(i2c); int ret; mutex_lock(&max8998->iolock); - ret = i2c_smbus_write_byte_data(client, reg, value); + ret = i2c_smbus_write_byte_data(i2c, reg, value); mutex_unlock(&max8998->iolock); return ret; } +EXPORT_SYMBOL(max8998_write_reg); -static int max8998_i2c_device_update(struct max8998_dev *max8998, u8 reg, - u8 val, u8 mask) +int max8998_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask) { - struct i2c_client *client = max8998->i2c_client; + struct max8998_dev *max8998 = i2c_get_clientdata(i2c); int ret; mutex_lock(&max8998->iolock); - ret = i2c_smbus_read_byte_data(client, reg); + ret = i2c_smbus_read_byte_data(i2c, reg); if (ret >= 0) { u8 old_val = ret & 0xff; u8 new_val = (val & mask) | (old_val & (~mask)); - ret = i2c_smbus_write_byte_data(client, reg, new_val); + ret = i2c_smbus_write_byte_data(i2c, reg, new_val); if (ret >= 0) ret = 0; } mutex_unlock(&max8998->iolock); return ret; } +EXPORT_SYMBOL(max8998_update_reg); static int max8998_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) @@ -94,10 +96,7 @@ static int max8998_i2c_probe(struct i2c_client *i2c, i2c_set_clientdata(i2c, max8998); max8998->dev = &i2c->dev; - max8998->i2c_client = i2c; - max8998->dev_read = max8998_i2c_device_read; - max8998->dev_write = max8998_i2c_device_write; - max8998->dev_update = max8998_i2c_device_update; + max8998->i2c = i2c; mutex_init(&max8998->iolock); ret = mfd_add_devices(max8998->dev, -1, diff --git a/drivers/regulator/max8998.c b/drivers/regulator/max8998.c index a1baf1fbe004..7f5fe6f198cf 100644 --- a/drivers/regulator/max8998.c +++ b/drivers/regulator/max8998.c @@ -173,6 +173,7 @@ static int max8998_get_enable_register(struct regulator_dev *rdev, static int max8998_ldo_is_enabled(struct regulator_dev *rdev) { struct max8998_data *max8998 = rdev_get_drvdata(rdev); + struct i2c_client *i2c = max8998->iodev->i2c; int ret, reg, shift = 8; u8 val; @@ -180,7 +181,7 @@ static int max8998_ldo_is_enabled(struct regulator_dev *rdev) if (ret) return ret; - ret = max8998_read_reg(max8998->iodev, reg, &val); + ret = max8998_read_reg(i2c, reg, &val); if (ret) return ret; @@ -190,25 +191,27 @@ static int max8998_ldo_is_enabled(struct regulator_dev *rdev) static int max8998_ldo_enable(struct regulator_dev *rdev) { struct max8998_data *max8998 = rdev_get_drvdata(rdev); + struct i2c_client *i2c = max8998->iodev->i2c; int reg, shift = 8, ret; ret = max8998_get_enable_register(rdev, ®, &shift); if (ret) return ret; - return max8998_update_reg(max8998->iodev, reg, 1<iodev->i2c; int reg, shift = 8, ret; ret = max8998_get_enable_register(rdev, ®, &shift); if (ret) return ret; - return max8998_update_reg(max8998->iodev, reg, 0, 1<iodev->i2c; int reg, shift = 0, mask, ret; u8 val; @@ -283,7 +287,7 @@ static int max8998_get_voltage(struct regulator_dev *rdev) if (ret) return ret; - ret = max8998_read_reg(max8998->iodev, reg, &val); + ret = max8998_read_reg(i2c, reg, &val); if (ret) return ret; @@ -297,6 +301,7 @@ static int max8998_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV) { struct max8998_data *max8998 = rdev_get_drvdata(rdev); + struct i2c_client *i2c = max8998->iodev->i2c; int min_vol = min_uV / 1000, max_vol = max_uV / 1000; int previous_vol = 0; const struct voltage_map_desc *desc; @@ -330,14 +335,14 @@ static int max8998_set_voltage(struct regulator_dev *rdev, /* wait for RAMP_UP_DELAY if rdev is BUCK1/2 and * ENRAMP is ON */ if (ldo == MAX8998_BUCK1 || ldo == MAX8998_BUCK2) { - max8998_read_reg(max8998->iodev, MAX8998_REG_ONOFF4, &val); + max8998_read_reg(i2c, MAX8998_REG_ONOFF4, &val); if (val & (1 << 4)) { en_ramp = true; previous_vol = max8998_get_voltage(rdev); } } - ret = max8998_update_reg(max8998->iodev, reg, i<min + desc->step*i - previous_vol/1000; diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h index 6dc75b3e2d33..f0a20cdc288c 100644 --- a/include/linux/mfd/max8998-private.h +++ b/include/linux/mfd/max8998-private.h @@ -75,38 +75,18 @@ enum { /** * struct max8998_dev - max8998 master device for sub-drivers * @dev: master device of the chip (can be used to access platform data) - * @i2c_client: i2c client private data - * @dev_read(): chip register read function - * @dev_write(): chip register write function - * @dev_update(): chip register update function + * @i2c: i2c client private data * @iolock: mutex for serializing io access */ struct max8998_dev { struct device *dev; - struct i2c_client *i2c_client; - int (*dev_read)(struct max8998_dev *max8998, u8 reg, u8 *dest); - int (*dev_write)(struct max8998_dev *max8998, u8 reg, u8 val); - int (*dev_update)(struct max8998_dev *max8998, u8 reg, u8 val, u8 mask); + struct i2c_client *i2c; struct mutex iolock; }; -static inline int max8998_read_reg(struct max8998_dev *max8998, u8 reg, - u8 *value) -{ - return max8998->dev_read(max8998, reg, value); -} - -static inline int max8998_write_reg(struct max8998_dev *max8998, u8 reg, - u8 value) -{ - return max8998->dev_write(max8998, reg, value); -} - -static inline int max8998_update_reg(struct max8998_dev *max8998, u8 reg, - u8 value, u8 mask) -{ - return max8998->dev_update(max8998, reg, value, mask); -} +extern int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest); +extern int max8998_write_reg(struct i2c_client *i2c, u8 reg, u8 value); +extern int max8998_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask); #endif /* __LINUX_MFD_MAX8998_PRIV_H */ -- cgit v1.2.3 From 2c7e6f5797140b33ec2b967ff28941e1c7eff4b2 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Fri, 10 Sep 2010 18:36:39 +0200 Subject: mfd: Add MAX8998 interrupts support Use genirq and provide seperated file for interrupts support. Signed-off-by: Joonyoung Shim Signed-off-by: Kyungmin Park Reviewed-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 2 +- drivers/mfd/Makefile | 2 +- drivers/mfd/max8998-irq.c | 255 ++++++++++++++++++++++++++++++++++++ drivers/mfd/max8998.c | 25 +++- include/linux/mfd/max8998-private.h | 72 +++++++++- include/linux/mfd/max8998.h | 11 +- 6 files changed, 358 insertions(+), 9 deletions(-) create mode 100644 drivers/mfd/max8998-irq.c (limited to 'include') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 02cd6c0e59cb..8a463e6c22a0 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -295,7 +295,7 @@ config MFD_MAX8925 config MFD_MAX8998 bool "Maxim Semiconductor MAX8998/National LP3974 PMIC Support" - depends on I2C=y + depends on I2C=y && GENERIC_HARDIRQS select MFD_CORE help Say yes here to support for Maxim Semiconductor MAX8998 and diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index feaeeaeeddb7..b1ad74da2351 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -58,7 +58,7 @@ obj-$(CONFIG_UCB1400_CORE) += ucb1400_core.o obj-$(CONFIG_PMIC_DA903X) += da903x.o max8925-objs := max8925-core.o max8925-i2c.o obj-$(CONFIG_MFD_MAX8925) += max8925.o -obj-$(CONFIG_MFD_MAX8998) += max8998.o +obj-$(CONFIG_MFD_MAX8998) += max8998.o max8998-irq.o pcf50633-objs := pcf50633-core.o pcf50633-irq.o obj-$(CONFIG_MFD_PCF50633) += pcf50633.o diff --git a/drivers/mfd/max8998-irq.c b/drivers/mfd/max8998-irq.c new file mode 100644 index 000000000000..346fd296cf7d --- /dev/null +++ b/drivers/mfd/max8998-irq.c @@ -0,0 +1,255 @@ +/* + * Interrupt controller support for MAX8998 + * + * Copyright (C) 2010 Samsung Electronics Co.Ltd + * Author: Joonyoung Shim + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include + +struct max8998_irq_data { + int reg; + int mask; +}; + +static struct max8998_irq_data max8998_irqs[] = { + [MAX8998_IRQ_DCINF] = { + .reg = 1, + .mask = MAX8998_IRQ_DCINF_MASK, + }, + [MAX8998_IRQ_DCINR] = { + .reg = 1, + .mask = MAX8998_IRQ_DCINR_MASK, + }, + [MAX8998_IRQ_JIGF] = { + .reg = 1, + .mask = MAX8998_IRQ_JIGF_MASK, + }, + [MAX8998_IRQ_JIGR] = { + .reg = 1, + .mask = MAX8998_IRQ_JIGR_MASK, + }, + [MAX8998_IRQ_PWRONF] = { + .reg = 1, + .mask = MAX8998_IRQ_PWRONF_MASK, + }, + [MAX8998_IRQ_PWRONR] = { + .reg = 1, + .mask = MAX8998_IRQ_PWRONR_MASK, + }, + [MAX8998_IRQ_WTSREVNT] = { + .reg = 2, + .mask = MAX8998_IRQ_WTSREVNT_MASK, + }, + [MAX8998_IRQ_SMPLEVNT] = { + .reg = 2, + .mask = MAX8998_IRQ_SMPLEVNT_MASK, + }, + [MAX8998_IRQ_ALARM1] = { + .reg = 2, + .mask = MAX8998_IRQ_ALARM1_MASK, + }, + [MAX8998_IRQ_ALARM0] = { + .reg = 2, + .mask = MAX8998_IRQ_ALARM0_MASK, + }, + [MAX8998_IRQ_ONKEY1S] = { + .reg = 3, + .mask = MAX8998_IRQ_ONKEY1S_MASK, + }, + [MAX8998_IRQ_TOPOFFR] = { + .reg = 3, + .mask = MAX8998_IRQ_TOPOFFR_MASK, + }, + [MAX8998_IRQ_DCINOVPR] = { + .reg = 3, + .mask = MAX8998_IRQ_DCINOVPR_MASK, + }, + [MAX8998_IRQ_CHGRSTF] = { + .reg = 3, + .mask = MAX8998_IRQ_CHGRSTF_MASK, + }, + [MAX8998_IRQ_DONER] = { + .reg = 3, + .mask = MAX8998_IRQ_DONER_MASK, + }, + [MAX8998_IRQ_CHGFAULT] = { + .reg = 3, + .mask = MAX8998_IRQ_CHGFAULT_MASK, + }, + [MAX8998_IRQ_LOBAT1] = { + .reg = 4, + .mask = MAX8998_IRQ_LOBAT1_MASK, + }, + [MAX8998_IRQ_LOBAT2] = { + .reg = 4, + .mask = MAX8998_IRQ_LOBAT2_MASK, + }, +}; + +static inline struct max8998_irq_data * +irq_to_max8998_irq(struct max8998_dev *max8998, int irq) +{ + return &max8998_irqs[irq - max8998->irq_base]; +} + +static void max8998_irq_lock(unsigned int irq) +{ + struct max8998_dev *max8998 = get_irq_chip_data(irq); + + mutex_lock(&max8998->irqlock); +} + +static void max8998_irq_sync_unlock(unsigned int irq) +{ + struct max8998_dev *max8998 = get_irq_chip_data(irq); + int i; + + for (i = 0; i < ARRAY_SIZE(max8998->irq_masks_cur); i++) { + /* + * If there's been a change in the mask write it back + * to the hardware. + */ + if (max8998->irq_masks_cur[i] != max8998->irq_masks_cache[i]) { + max8998->irq_masks_cache[i] = max8998->irq_masks_cur[i]; + max8998_write_reg(max8998->i2c, MAX8998_REG_IRQM1 + i, + max8998->irq_masks_cur[i]); + } + } + + mutex_unlock(&max8998->irqlock); +} + +static void max8998_irq_unmask(unsigned int irq) +{ + struct max8998_dev *max8998 = get_irq_chip_data(irq); + struct max8998_irq_data *irq_data = irq_to_max8998_irq(max8998, irq); + + max8998->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask; +} + +static void max8998_irq_mask(unsigned int irq) +{ + struct max8998_dev *max8998 = get_irq_chip_data(irq); + struct max8998_irq_data *irq_data = irq_to_max8998_irq(max8998, irq); + + max8998->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask; +} + +static struct irq_chip max8998_irq_chip = { + .name = "max8998", + .bus_lock = max8998_irq_lock, + .bus_sync_unlock = max8998_irq_sync_unlock, + .mask = max8998_irq_mask, + .unmask = max8998_irq_unmask, +}; + +static irqreturn_t max8998_irq_thread(int irq, void *data) +{ + struct max8998_dev *max8998 = data; + u8 irq_reg[MAX8998_NUM_IRQ_REGS]; + int ret; + int i; + + ret = max8998_bulk_read(max8998->i2c, MAX8998_REG_IRQ1, + MAX8998_NUM_IRQ_REGS, irq_reg); + if (ret < 0) { + dev_err(max8998->dev, "Failed to read interrupt register: %d\n", + ret); + return IRQ_NONE; + } + + /* Apply masking */ + for (i = 0; i < MAX8998_NUM_IRQ_REGS; i++) + irq_reg[i] &= ~max8998->irq_masks_cur[i]; + + /* Report */ + for (i = 0; i < MAX8998_IRQ_NR; i++) { + if (irq_reg[max8998_irqs[i].reg - 1] & max8998_irqs[i].mask) + handle_nested_irq(max8998->irq_base + i); + } + + return IRQ_HANDLED; +} + +int max8998_irq_init(struct max8998_dev *max8998) +{ + int i; + int cur_irq; + int ret; + + if (!max8998->irq) { + dev_warn(max8998->dev, + "No interrupt specified, no interrupts\n"); + max8998->irq_base = 0; + return 0; + } + + if (!max8998->irq_base) { + dev_err(max8998->dev, + "No interrupt base specified, no interrupts\n"); + return 0; + } + + mutex_init(&max8998->irqlock); + + /* Mask the individual interrupt sources */ + for (i = 0; i < MAX8998_NUM_IRQ_REGS; i++) { + max8998->irq_masks_cur[i] = 0xff; + max8998->irq_masks_cache[i] = 0xff; + max8998_write_reg(max8998->i2c, MAX8998_REG_IRQM1 + i, 0xff); + } + + max8998_write_reg(max8998->i2c, MAX8998_REG_STATUSM1, 0xff); + max8998_write_reg(max8998->i2c, MAX8998_REG_STATUSM2, 0xff); + + /* register with genirq */ + for (i = 0; i < MAX8998_IRQ_NR; i++) { + cur_irq = i + max8998->irq_base; + set_irq_chip_data(cur_irq, max8998); + set_irq_chip_and_handler(cur_irq, &max8998_irq_chip, + handle_edge_irq); + set_irq_nested_thread(cur_irq, 1); +#ifdef CONFIG_ARM + set_irq_flags(cur_irq, IRQF_VALID); +#else + set_irq_noprobe(cur_irq); +#endif + } + + ret = request_threaded_irq(max8998->irq, NULL, max8998_irq_thread, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + "max8998-irq", max8998); + if (ret) { + dev_err(max8998->dev, "Failed to request IRQ %d: %d\n", + max8998->irq, ret); + return ret; + } + + if (!max8998->ono) + return 0; + + ret = request_threaded_irq(max8998->ono, NULL, max8998_irq_thread, + IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING | + IRQF_ONESHOT, "max8998-ono", max8998); + if (ret) + dev_err(max8998->dev, "Failed to request IRQ %d: %d\n", + max8998->ono, ret); + + return 0; +} + +void max8998_irq_exit(struct max8998_dev *max8998) +{ + if (max8998->irq) + free_irq(max8998->irq, max8998); +} diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c index 3b8a5076d351..49c140a78ba9 100644 --- a/drivers/mfd/max8998.c +++ b/drivers/mfd/max8998.c @@ -1,5 +1,5 @@ /* - * max8698.c - mfd core driver for the Maxim 8998 + * max8998.c - mfd core driver for the Maxim 8998 * * Copyright (C) 2009-2010 Samsung Electronics * Kyungmin Park @@ -53,6 +53,21 @@ int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest) } EXPORT_SYMBOL(max8998_read_reg); +int max8998_bulk_read(struct i2c_client *i2c, u8 reg, int count, u8 *buf) +{ + struct max8998_dev *max8998 = i2c_get_clientdata(i2c); + int ret; + + mutex_lock(&max8998->iolock); + ret = i2c_smbus_read_i2c_block_data(i2c, reg, count, buf); + mutex_unlock(&max8998->iolock); + if (ret < 0) + return ret; + + return 0; +} +EXPORT_SYMBOL(max8998_bulk_read); + int max8998_write_reg(struct i2c_client *i2c, u8 reg, u8 value) { struct max8998_dev *max8998 = i2c_get_clientdata(i2c); @@ -87,6 +102,7 @@ EXPORT_SYMBOL(max8998_update_reg); static int max8998_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { + struct max8998_platform_data *pdata = i2c->dev.platform_data; struct max8998_dev *max8998; int ret = 0; @@ -97,8 +113,15 @@ static int max8998_i2c_probe(struct i2c_client *i2c, i2c_set_clientdata(i2c, max8998); max8998->dev = &i2c->dev; max8998->i2c = i2c; + max8998->irq = i2c->irq; + if (pdata) { + max8998->ono = pdata->ono; + max8998->irq_base = pdata->irq_base; + } mutex_init(&max8998->iolock); + max8998_irq_init(max8998); + ret = mfd_add_devices(max8998->dev, -1, max8998_devs, ARRAY_SIZE(max8998_devs), NULL, 0); diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h index f0a20cdc288c..3bd2371a05f7 100644 --- a/include/linux/mfd/max8998-private.h +++ b/include/linux/mfd/max8998-private.h @@ -1,5 +1,5 @@ /* - * max8698.h - Voltage regulator driver for the Maxim 8998 + * max8998.h - Voltage regulator driver for the Maxim 8998 * * Copyright (C) 2009-2010 Samsung Electrnoics * Kyungmin Park @@ -23,6 +23,8 @@ #ifndef __LINUX_MFD_MAX8998_PRIV_H #define __LINUX_MFD_MAX8998_PRIV_H +#define MAX8998_NUM_IRQ_REGS 4 + /* MAX 8998 registers */ enum { MAX8998_REG_IRQ1, @@ -72,20 +74,86 @@ enum { MAX8998_REG_LBCNFG2, }; +/* IRQ definitions */ +enum { + MAX8998_IRQ_DCINF, + MAX8998_IRQ_DCINR, + MAX8998_IRQ_JIGF, + MAX8998_IRQ_JIGR, + MAX8998_IRQ_PWRONF, + MAX8998_IRQ_PWRONR, + + MAX8998_IRQ_WTSREVNT, + MAX8998_IRQ_SMPLEVNT, + MAX8998_IRQ_ALARM1, + MAX8998_IRQ_ALARM0, + + MAX8998_IRQ_ONKEY1S, + MAX8998_IRQ_TOPOFFR, + MAX8998_IRQ_DCINOVPR, + MAX8998_IRQ_CHGRSTF, + MAX8998_IRQ_DONER, + MAX8998_IRQ_CHGFAULT, + + MAX8998_IRQ_LOBAT1, + MAX8998_IRQ_LOBAT2, + + MAX8998_IRQ_NR, +}; + +#define MAX8998_IRQ_DCINF_MASK (1 << 2) +#define MAX8998_IRQ_DCINR_MASK (1 << 3) +#define MAX8998_IRQ_JIGF_MASK (1 << 4) +#define MAX8998_IRQ_JIGR_MASK (1 << 5) +#define MAX8998_IRQ_PWRONF_MASK (1 << 6) +#define MAX8998_IRQ_PWRONR_MASK (1 << 7) + +#define MAX8998_IRQ_WTSREVNT_MASK (1 << 0) +#define MAX8998_IRQ_SMPLEVNT_MASK (1 << 1) +#define MAX8998_IRQ_ALARM1_MASK (1 << 2) +#define MAX8998_IRQ_ALARM0_MASK (1 << 3) + +#define MAX8998_IRQ_ONKEY1S_MASK (1 << 0) +#define MAX8998_IRQ_TOPOFFR_MASK (1 << 2) +#define MAX8998_IRQ_DCINOVPR_MASK (1 << 3) +#define MAX8998_IRQ_CHGRSTF_MASK (1 << 4) +#define MAX8998_IRQ_DONER_MASK (1 << 5) +#define MAX8998_IRQ_CHGFAULT_MASK (1 << 7) + +#define MAX8998_IRQ_LOBAT1_MASK (1 << 0) +#define MAX8998_IRQ_LOBAT2_MASK (1 << 1) + /** * struct max8998_dev - max8998 master device for sub-drivers * @dev: master device of the chip (can be used to access platform data) * @i2c: i2c client private data * @iolock: mutex for serializing io access + * @irqlock: mutex for buslock + * @irq_base: base IRQ number for max8998, required for IRQs + * @irq: generic IRQ number for max8998 + * @ono: power onoff IRQ number for max8998 + * @irq_masks_cur: currently active value + * @irq_masks_cache: cached hardware value */ - struct max8998_dev { struct device *dev; struct i2c_client *i2c; struct mutex iolock; + struct mutex irqlock; + + int irq_base; + int irq; + int ono; + u8 irq_masks_cur[MAX8998_NUM_IRQ_REGS]; + u8 irq_masks_cache[MAX8998_NUM_IRQ_REGS]; }; +int max8998_irq_init(struct max8998_dev *max8998); +void max8998_irq_exit(struct max8998_dev *max8998); + extern int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest); +extern int max8998_bulk_read(struct i2c_client *i2c, u8 reg, int count, + u8 *buf); extern int max8998_write_reg(struct i2c_client *i2c, u8 reg, u8 value); extern int max8998_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask); diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h index 1d3601a2d853..d47ed4c190fe 100644 --- a/include/linux/mfd/max8998.h +++ b/include/linux/mfd/max8998.h @@ -1,5 +1,5 @@ /* - * max8698.h - Voltage regulator driver for the Maxim 8998 + * max8998.h - Voltage regulator driver for the Maxim 8998 * * Copyright (C) 2009-2010 Samsung Electrnoics * Kyungmin Park @@ -66,13 +66,16 @@ struct max8998_regulator_data { /** * struct max8998_board - packages regulator init data - * @num_regulators: number of regultors used * @regulators: array of defined regulators + * @num_regulators: number of regultors used + * @irq_base: base IRQ number for max8998, required for IRQs + * @ono: power onoff IRQ number for max8998 */ - struct max8998_platform_data { - int num_regulators; struct max8998_regulator_data *regulators; + int num_regulators; + int irq_base; + int ono; }; #endif /* __LINUX_MFD_MAX8998_H */ -- cgit v1.2.3 From 9b16c0a43b74393cc18666a7748293812c61af1f Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Fri, 6 Aug 2010 11:28:08 +0900 Subject: rtc: Add MAX8998 rtc driver This adds support for the RTC provided by the Maxim 8998 chip. This driver was tested on a GONI board by using the rtc-test application from the Documentation/rtc.txt. Signed-off-by: Joonyoung Shim Signed-off-by: Kyungmin Park Acked-by: Alessandro Zummo Signed-off-by: Samuel Ortiz --- drivers/mfd/max8998.c | 24 ++- drivers/rtc/Kconfig | 10 ++ drivers/rtc/Makefile | 1 + drivers/rtc/rtc-max8998.c | 300 ++++++++++++++++++++++++++++++++++++ include/linux/mfd/max8998-private.h | 6 +- 5 files changed, 339 insertions(+), 2 deletions(-) create mode 100644 drivers/rtc/rtc-max8998.c (limited to 'include') diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c index 49c140a78ba9..310fd8054f35 100644 --- a/drivers/mfd/max8998.c +++ b/drivers/mfd/max8998.c @@ -30,10 +30,14 @@ #include #include +#define RTC_I2C_ADDR (0x0c >> 1) + static struct mfd_cell max8998_devs[] = { { .name = "max8998-pmic", - } + }, { + .name = "max8998-rtc", + }, }; int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest) @@ -80,6 +84,21 @@ int max8998_write_reg(struct i2c_client *i2c, u8 reg, u8 value) } EXPORT_SYMBOL(max8998_write_reg); +int max8998_bulk_write(struct i2c_client *i2c, u8 reg, int count, u8 *buf) +{ + struct max8998_dev *max8998 = i2c_get_clientdata(i2c); + int ret; + + mutex_lock(&max8998->iolock); + ret = i2c_smbus_write_i2c_block_data(i2c, reg, count, buf); + mutex_unlock(&max8998->iolock); + if (ret < 0) + return ret; + + return 0; +} +EXPORT_SYMBOL(max8998_bulk_write); + int max8998_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask) { struct max8998_dev *max8998 = i2c_get_clientdata(i2c); @@ -120,6 +139,9 @@ static int max8998_i2c_probe(struct i2c_client *i2c, } mutex_init(&max8998->iolock); + max8998->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR); + i2c_set_clientdata(max8998->rtc, max8998); + max8998_irq_init(max8998); ret = mfd_add_devices(max8998->dev, -1, diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 6a77437d4f5a..23579d6e4a6e 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -196,6 +196,16 @@ config RTC_DRV_MAX8925 This driver can also be built as a module. If so, the module will be called rtc-max8925. +config RTC_DRV_MAX8998 + tristate "Maxim MAX8998" + depends on MFD_MAX8998 + help + If you say yes here you will get support for the + RTC of Maxim MAX8998 PMIC. + + This driver can also be built as a module. If so, the module + will be called rtc-max8998. + config RTC_DRV_RS5C372 tristate "Ricoh R2025S/D, RS5C372A/B, RV5C386, RV5C387A" help diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 7a7cb3228a1d..c3f3c2d9e1ed 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -60,6 +60,7 @@ obj-$(CONFIG_RTC_DRV_M48T86) += rtc-m48t86.o obj-$(CONFIG_RTC_MXC) += rtc-mxc.o obj-$(CONFIG_RTC_DRV_MAX6900) += rtc-max6900.o obj-$(CONFIG_RTC_DRV_MAX8925) += rtc-max8925.o +obj-$(CONFIG_RTC_DRV_MAX8998) += rtc-max8998.o obj-$(CONFIG_RTC_DRV_MAX6902) += rtc-max6902.o obj-$(CONFIG_RTC_DRV_MC13783) += rtc-mc13783.o obj-$(CONFIG_RTC_DRV_MSM6242) += rtc-msm6242.o diff --git a/drivers/rtc/rtc-max8998.c b/drivers/rtc/rtc-max8998.c new file mode 100644 index 000000000000..f22dee35f330 --- /dev/null +++ b/drivers/rtc/rtc-max8998.c @@ -0,0 +1,300 @@ +/* + * RTC driver for Maxim MAX8998 + * + * Copyright (C) 2010 Samsung Electronics Co.Ltd + * Author: Minkyu Kang + * Author: Joonyoung Shim + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX8998_RTC_SEC 0x00 +#define MAX8998_RTC_MIN 0x01 +#define MAX8998_RTC_HOUR 0x02 +#define MAX8998_RTC_WEEKDAY 0x03 +#define MAX8998_RTC_DATE 0x04 +#define MAX8998_RTC_MONTH 0x05 +#define MAX8998_RTC_YEAR1 0x06 +#define MAX8998_RTC_YEAR2 0x07 +#define MAX8998_ALARM0_SEC 0x08 +#define MAX8998_ALARM0_MIN 0x09 +#define MAX8998_ALARM0_HOUR 0x0a +#define MAX8998_ALARM0_WEEKDAY 0x0b +#define MAX8998_ALARM0_DATE 0x0c +#define MAX8998_ALARM0_MONTH 0x0d +#define MAX8998_ALARM0_YEAR1 0x0e +#define MAX8998_ALARM0_YEAR2 0x0f +#define MAX8998_ALARM1_SEC 0x10 +#define MAX8998_ALARM1_MIN 0x11 +#define MAX8998_ALARM1_HOUR 0x12 +#define MAX8998_ALARM1_WEEKDAY 0x13 +#define MAX8998_ALARM1_DATE 0x14 +#define MAX8998_ALARM1_MONTH 0x15 +#define MAX8998_ALARM1_YEAR1 0x16 +#define MAX8998_ALARM1_YEAR2 0x17 +#define MAX8998_ALARM0_CONF 0x18 +#define MAX8998_ALARM1_CONF 0x19 +#define MAX8998_RTC_STATUS 0x1a +#define MAX8998_WTSR_SMPL_CNTL 0x1b +#define MAX8998_TEST 0x1f + +#define HOUR_12 (1 << 7) +#define HOUR_PM (1 << 5) +#define ALARM0_STATUS (1 << 1) +#define ALARM1_STATUS (1 << 2) + +enum { + RTC_SEC = 0, + RTC_MIN, + RTC_HOUR, + RTC_WEEKDAY, + RTC_DATE, + RTC_MONTH, + RTC_YEAR1, + RTC_YEAR2, +}; + +struct max8998_rtc_info { + struct device *dev; + struct max8998_dev *max8998; + struct i2c_client *rtc; + struct rtc_device *rtc_dev; + int irq; +}; + +static void max8998_data_to_tm(u8 *data, struct rtc_time *tm) +{ + tm->tm_sec = bcd2bin(data[RTC_SEC]); + tm->tm_min = bcd2bin(data[RTC_MIN]); + if (data[RTC_HOUR] & HOUR_12) { + tm->tm_hour = bcd2bin(data[RTC_HOUR] & 0x1f); + if (data[RTC_HOUR] & HOUR_PM) + tm->tm_hour += 12; + } else + tm->tm_hour = bcd2bin(data[RTC_HOUR] & 0x3f); + + tm->tm_wday = data[RTC_WEEKDAY] & 0x07; + tm->tm_mday = bcd2bin(data[RTC_DATE]); + tm->tm_mon = bcd2bin(data[RTC_MONTH]); + tm->tm_year = bcd2bin(data[RTC_YEAR1]) + bcd2bin(data[RTC_YEAR2]) * 100; + tm->tm_year -= 1900; +} + +static void max8998_tm_to_data(struct rtc_time *tm, u8 *data) +{ + data[RTC_SEC] = bin2bcd(tm->tm_sec); + data[RTC_MIN] = bin2bcd(tm->tm_min); + data[RTC_HOUR] = bin2bcd(tm->tm_hour); + data[RTC_WEEKDAY] = tm->tm_wday; + data[RTC_DATE] = bin2bcd(tm->tm_mday); + data[RTC_MONTH] = bin2bcd(tm->tm_mon); + data[RTC_YEAR1] = bin2bcd(tm->tm_year % 100); + data[RTC_YEAR2] = bin2bcd((tm->tm_year + 1900) / 100); +} + +static int max8998_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct max8998_rtc_info *info = dev_get_drvdata(dev); + u8 data[8]; + int ret; + + ret = max8998_bulk_read(info->rtc, MAX8998_RTC_SEC, 8, data); + if (ret < 0) + return ret; + + max8998_data_to_tm(data, tm); + + return rtc_valid_tm(tm); +} + +static int max8998_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + struct max8998_rtc_info *info = dev_get_drvdata(dev); + u8 data[8]; + + max8998_tm_to_data(tm, data); + + return max8998_bulk_write(info->rtc, MAX8998_RTC_SEC, 8, data); +} + +static int max8998_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct max8998_rtc_info *info = dev_get_drvdata(dev); + u8 data[8]; + u8 val; + int ret; + + ret = max8998_bulk_read(info->rtc, MAX8998_ALARM0_SEC, 8, data); + if (ret < 0) + return ret; + + max8998_data_to_tm(data, &alrm->time); + + ret = max8998_read_reg(info->rtc, MAX8998_ALARM0_CONF, &val); + if (ret < 0) + return ret; + + alrm->enabled = !!val; + + ret = max8998_read_reg(info->rtc, MAX8998_RTC_STATUS, &val); + if (ret < 0) + return ret; + + if (val & ALARM0_STATUS) + alrm->pending = 1; + else + alrm->pending = 0; + + return 0; +} + +static int max8998_rtc_stop_alarm(struct max8998_rtc_info *info) +{ + return max8998_write_reg(info->rtc, MAX8998_ALARM0_CONF, 0); +} + +static int max8998_rtc_start_alarm(struct max8998_rtc_info *info) +{ + return max8998_write_reg(info->rtc, MAX8998_ALARM0_CONF, 0x77); +} + +static int max8998_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct max8998_rtc_info *info = dev_get_drvdata(dev); + u8 data[8]; + int ret; + + max8998_tm_to_data(&alrm->time, data); + + ret = max8998_rtc_stop_alarm(info); + if (ret < 0) + return ret; + + ret = max8998_bulk_write(info->rtc, MAX8998_ALARM0_SEC, 8, data); + if (ret < 0) + return ret; + + if (alrm->enabled) + return max8998_rtc_start_alarm(info); + + return 0; +} + +static int max8998_rtc_alarm_irq_enable(struct device *dev, + unsigned int enabled) +{ + struct max8998_rtc_info *info = dev_get_drvdata(dev); + + if (enabled) + return max8998_rtc_start_alarm(info); + else + return max8998_rtc_stop_alarm(info); +} + +static irqreturn_t max8998_rtc_alarm_irq(int irq, void *data) +{ + struct max8998_rtc_info *info = data; + + rtc_update_irq(info->rtc_dev, 1, RTC_IRQF | RTC_AF); + + return IRQ_HANDLED; +} + +static const struct rtc_class_ops max8998_rtc_ops = { + .read_time = max8998_rtc_read_time, + .set_time = max8998_rtc_set_time, + .read_alarm = max8998_rtc_read_alarm, + .set_alarm = max8998_rtc_set_alarm, + .alarm_irq_enable = max8998_rtc_alarm_irq_enable, +}; + +static int __devinit max8998_rtc_probe(struct platform_device *pdev) +{ + struct max8998_dev *max8998 = dev_get_drvdata(pdev->dev.parent); + struct max8998_rtc_info *info; + int ret; + + info = kzalloc(sizeof(struct max8998_rtc_info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->dev = &pdev->dev; + info->max8998 = max8998; + info->rtc = max8998->rtc; + info->irq = max8998->irq_base + MAX8998_IRQ_ALARM0; + + info->rtc_dev = rtc_device_register("max8998-rtc", &pdev->dev, + &max8998_rtc_ops, THIS_MODULE); + + if (IS_ERR(info->rtc_dev)) { + ret = PTR_ERR(info->rtc_dev); + dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret); + goto out_rtc; + } + + platform_set_drvdata(pdev, info); + + ret = request_threaded_irq(info->irq, NULL, max8998_rtc_alarm_irq, 0, + "rtc-alarm0", info); + if (ret < 0) + dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n", + info->irq, ret); + + return 0; + +out_rtc: + kfree(info); + return ret; +} + +static int __devexit max8998_rtc_remove(struct platform_device *pdev) +{ + struct max8998_rtc_info *info = platform_get_drvdata(pdev); + + if (info) { + free_irq(info->irq, info); + rtc_device_unregister(info->rtc_dev); + kfree(info); + } + + return 0; +} + +static struct platform_driver max8998_rtc_driver = { + .driver = { + .name = "max8998-rtc", + .owner = THIS_MODULE, + }, + .probe = max8998_rtc_probe, + .remove = __devexit_p(max8998_rtc_remove), +}; + +static int __init max8998_rtc_init(void) +{ + return platform_driver_register(&max8998_rtc_driver); +} +module_init(max8998_rtc_init); + +static void __exit max8998_rtc_exit(void) +{ + platform_driver_unregister(&max8998_rtc_driver); +} +module_exit(max8998_rtc_exit); + +MODULE_AUTHOR("Minkyu Kang "); +MODULE_AUTHOR("Joonyoung Shim "); +MODULE_DESCRIPTION("Maxim MAX8998 RTC driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h index 3bd2371a05f7..170f665c7cdd 100644 --- a/include/linux/mfd/max8998-private.h +++ b/include/linux/mfd/max8998-private.h @@ -126,7 +126,8 @@ enum { /** * struct max8998_dev - max8998 master device for sub-drivers * @dev: master device of the chip (can be used to access platform data) - * @i2c: i2c client private data + * @i2c: i2c client private data for regulator + * @rtc: i2c client private data for rtc * @iolock: mutex for serializing io access * @irqlock: mutex for buslock * @irq_base: base IRQ number for max8998, required for IRQs @@ -138,6 +139,7 @@ enum { struct max8998_dev { struct device *dev; struct i2c_client *i2c; + struct i2c_client *rtc; struct mutex iolock; struct mutex irqlock; @@ -155,6 +157,8 @@ extern int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest); extern int max8998_bulk_read(struct i2c_client *i2c, u8 reg, int count, u8 *buf); extern int max8998_write_reg(struct i2c_client *i2c, u8 reg, u8 value); +extern int max8998_bulk_write(struct i2c_client *i2c, u8 reg, int count, + u8 *buf); extern int max8998_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask); #endif /* __LINUX_MFD_MAX8998_PRIV_H */ -- cgit v1.2.3 From 19ca7502c508595edfb963e5dbcf62854a926506 Mon Sep 17 00:00:00 2001 From: Arnd Hannemann Date: Tue, 24 Aug 2010 17:26:59 +0200 Subject: mmc: Allow the tmio_mmc mfd driver to specify get_cd handler Some controllers, supported by the tmio_mmc driver do not have the card detect pin of a slot connected, so that polling needs to be used and card detection is handled by other means. This patch exposes a get_cd hook for that purpose. Signed-off-by: Arnd Hannemann Signed-off-by: Samuel Ortiz --- drivers/mmc/host/tmio_mmc.c | 13 +++++++++++++ include/linux/mfd/tmio.h | 1 + 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c index 69d98e3bf6ab..1a47221d01a4 100644 --- a/drivers/mmc/host/tmio_mmc.c +++ b/drivers/mmc/host/tmio_mmc.c @@ -756,10 +756,23 @@ static int tmio_mmc_get_ro(struct mmc_host *mmc) (sd_ctrl_read32(host, CTL_STATUS) & TMIO_STAT_WRPROTECT)) ? 0 : 1; } +static int tmio_mmc_get_cd(struct mmc_host *mmc) +{ + struct tmio_mmc_host *host = mmc_priv(mmc); + struct mfd_cell *cell = host->pdev->dev.platform_data; + struct tmio_mmc_data *pdata = cell->driver_data; + + if (!pdata->get_cd) + return -ENOSYS; + else + return pdata->get_cd(host->pdev); +} + static const struct mmc_host_ops tmio_mmc_ops = { .request = tmio_mmc_request, .set_ios = tmio_mmc_set_ios, .get_ro = tmio_mmc_get_ro, + .get_cd = tmio_mmc_get_cd, }; #ifdef CONFIG_PM diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index f07425bc3dcd..24c43bbad541 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -74,6 +74,7 @@ struct tmio_mmc_data { struct tmio_mmc_dma *dma; void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); + int (*get_cd)(struct platform_device *host); }; /* -- cgit v1.2.3 From 998283e2e359249133f2f47db26669a55ff25c98 Mon Sep 17 00:00:00 2001 From: Arnd Hannemann Date: Tue, 24 Aug 2010 17:27:00 +0200 Subject: mfd: Allow the platform to specify the sh_mobile_sdhi get_cd handler On some platforms (e.g. AP4EVB) the card detect pin of a slot is not directly connected to the sdhi hardware, so that polling needs to be used with tmio_mmc and card detection is handled in the platform code. This patch allows to set tmio_mmc capabilities (to pass the MMC_CAP_NEEDS_POLL flag) and exposes a get_cd hook for that purpose. Signed-off-by: Arnd Hannemann Signed-off-by: Samuel Ortiz --- drivers/mfd/sh_mobile_sdhi.c | 13 +++++++++++++ include/linux/mfd/sh_mobile_sdhi.h | 2 ++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/drivers/mfd/sh_mobile_sdhi.c b/drivers/mfd/sh_mobile_sdhi.c index 49b4d069cbf9..01d83a41d570 100644 --- a/drivers/mfd/sh_mobile_sdhi.c +++ b/drivers/mfd/sh_mobile_sdhi.c @@ -65,6 +65,17 @@ static void sh_mobile_sdhi_set_pwr(struct platform_device *tmio, int state) p->set_pwr(pdev, state); } +static int sh_mobile_sdhi_get_cd(struct platform_device *tmio) +{ + struct platform_device *pdev = to_platform_device(tmio->dev.parent); + struct sh_mobile_sdhi_info *p = pdev->dev.platform_data; + + if (p && p->get_cd) + return p->get_cd(pdev); + else + return -ENOSYS; +} + static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev) { struct sh_mobile_sdhi *priv; @@ -106,10 +117,12 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev) mmc_data->hclk = clk_get_rate(priv->clk); mmc_data->set_pwr = sh_mobile_sdhi_set_pwr; + mmc_data->get_cd = sh_mobile_sdhi_get_cd; mmc_data->capabilities = MMC_CAP_MMC_HIGHSPEED; if (p) { mmc_data->flags = p->tmio_flags; mmc_data->ocr_mask = p->tmio_ocr_mask; + mmc_data->capabilities |= p->tmio_caps; } if (p && p->dma_slave_tx >= 0 && p->dma_slave_rx >= 0) { diff --git a/include/linux/mfd/sh_mobile_sdhi.h b/include/linux/mfd/sh_mobile_sdhi.h index 49067802a6d7..c981b959760f 100644 --- a/include/linux/mfd/sh_mobile_sdhi.h +++ b/include/linux/mfd/sh_mobile_sdhi.h @@ -7,8 +7,10 @@ struct sh_mobile_sdhi_info { int dma_slave_tx; int dma_slave_rx; unsigned long tmio_flags; + unsigned long tmio_caps; u32 tmio_ocr_mask; /* available MMC voltages */ void (*set_pwr)(struct platform_device *pdev, int state); + int (*get_cd)(struct platform_device *pdev); }; #endif /* __SH_MOBILE_SDHI_H__ */ -- cgit v1.2.3 From 777271d0f33da306575ef776c75f66fc27246bf0 Mon Sep 17 00:00:00 2001 From: Arnd Hannemann Date: Tue, 24 Aug 2010 17:27:01 +0200 Subject: mmc: Allow the platform to specify the sh_mmcif get_cd handler In some platforms (e.g. AP4EVB) the card detect pin of a slot is not directly connected to the sh_mmcif controller, so that polling needs to be used. To overcome the overhead induced by querying the controller on each poll cycle, card detection can be handled in the platform code more efficiently. This patch exposes a get_cd hook for that purpose. Signed-off-by: Arnd Hannemann Tested-by: Yusuke Goda Signed-off-by: Samuel Ortiz --- drivers/mmc/host/sh_mmcif.c | 12 ++++++++++++ include/linux/mmc/sh_mmcif.h | 1 + 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index 0f06b8002814..ddd09840520b 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -710,9 +710,21 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->bus_width = ios->bus_width; } +static int sh_mmcif_get_cd(struct mmc_host *mmc) +{ + struct sh_mmcif_host *host = mmc_priv(mmc); + struct sh_mmcif_plat_data *p = host->pd->dev.platform_data; + + if (!p->get_cd) + return -ENOSYS; + else + return p->get_cd(host->pd); +} + static struct mmc_host_ops sh_mmcif_ops = { .request = sh_mmcif_request, .set_ios = sh_mmcif_set_ios, + .get_cd = sh_mmcif_get_cd, }; static void sh_mmcif_detect(struct mmc_host *mmc) diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index d4a2ebbdab4b..d19e2114fd86 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -34,6 +34,7 @@ struct sh_mmcif_plat_data { void (*set_pwr)(struct platform_device *pdev, int state); void (*down_pwr)(struct platform_device *pdev); + int (*get_cd)(struct platform_device *pdef); u8 sup_pclk; /* 1 :SH7757, 0: SH7724/SH7372 */ unsigned long caps; u32 ocr; -- cgit v1.2.3 From f1334fb3c3006ba109886158c0ad79512f928bc1 Mon Sep 17 00:00:00 2001 From: Yusuke Goda Date: Mon, 30 Aug 2010 11:50:19 +0100 Subject: mmc: Allow 2 byte requests in 4-bit mode for tmio_mmc Adjust the tmio_mmc block size check to accept 2-byte requests in 4-bit mode if the hardware supports it. Tested with the SDHI hardware block included in sh7724. Signed-off-by: Yusuke Goda Signed-off-by: Matt Fleming Acked-by: Magnus Damm Tested-by: Arnd Hannemann Signed-off-by: Samuel Ortiz --- drivers/mfd/sh_mobile_sdhi.c | 6 ++++++ drivers/mmc/host/tmio_mmc.c | 17 ++++++++++++----- include/linux/mfd/tmio.h | 5 +++++ 3 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/mfd/sh_mobile_sdhi.c b/drivers/mfd/sh_mobile_sdhi.c index 01d83a41d570..f1714f93af9d 100644 --- a/drivers/mfd/sh_mobile_sdhi.c +++ b/drivers/mfd/sh_mobile_sdhi.c @@ -125,6 +125,12 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev) mmc_data->capabilities |= p->tmio_caps; } + /* + * All SDHI blocks support 2-byte and larger block sizes in 4-bit + * bus width mode. + */ + mmc_data->flags |= TMIO_MMC_BLKSZ_2BYTES; + if (p && p->dma_slave_tx >= 0 && p->dma_slave_rx >= 0) { priv->param_tx.slave_id = p->dma_slave_tx; priv->param_rx.slave_id = p->dma_slave_rx; diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c index 1a47221d01a4..e7765a89593e 100644 --- a/drivers/mmc/host/tmio_mmc.c +++ b/drivers/mmc/host/tmio_mmc.c @@ -658,14 +658,21 @@ static void tmio_mmc_release_dma(struct tmio_mmc_host *host) static int tmio_mmc_start_data(struct tmio_mmc_host *host, struct mmc_data *data) { + struct mfd_cell *cell = host->pdev->dev.platform_data; + struct tmio_mmc_data *pdata = cell->driver_data; + pr_debug("setup data transfer: blocksize %08x nr_blocks %d\n", data->blksz, data->blocks); - /* Hardware cannot perform 1 and 2 byte requests in 4 bit mode */ - if (data->blksz < 4 && host->mmc->ios.bus_width == MMC_BUS_WIDTH_4) { - pr_err("%s: %d byte block unsupported in 4 bit mode\n", - mmc_hostname(host->mmc), data->blksz); - return -EINVAL; + /* Some hardware cannot perform 2 byte requests in 4 bit mode */ + if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_4) { + int blksz_2bytes = pdata->flags & TMIO_MMC_BLKSZ_2BYTES; + + if (data->blksz < 2 || (data->blksz < 4 && !blksz_2bytes)) { + pr_err("%s: %d byte block unsupported in 4 bit mode\n", + mmc_hostname(host->mmc), data->blksz); + return -EINVAL; + } } tmio_mmc_init_sg(host, data); diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 24c43bbad541..085f041197dc 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -52,6 +52,11 @@ /* tmio MMC platform flags */ #define TMIO_MMC_WRPROTECT_DISABLE (1 << 0) +/* + * Some controllers can support a 2-byte block size when the bus width + * is configured in 4-bit mode. + */ +#define TMIO_MMC_BLKSZ_2BYTES (1 << 1) int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); -- cgit v1.2.3 From 47c1697508f2ec9f6b31ce6c825fe1017871dea6 Mon Sep 17 00:00:00 2001 From: Mattias Wallin Date: Fri, 10 Sep 2010 17:47:56 +0200 Subject: mfd: Align ab8500 with the abx500 interface This patch makes the ab8500 mixed signal chip expose the same interface for register access as the ab3100, ab3550 and ab5500 chip. The ab8500_read() and ab8500_write() is removed and replaced with abx500_get_register_interruptible() and abx500_set_register_interruptible(). Signed-off-by: Mattias Wallin Acked-by: Linus Walleij Acked-by: Mark Brown Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 4 +- drivers/mfd/ab8500-core.c | 281 ++++++++++++++++++++++++++------------------- drivers/regulator/ab8500.c | 86 ++++++++------ drivers/rtc/rtc-ab8500.c | 103 +++++++++-------- include/linux/mfd/ab8500.h | 28 ++++- include/linux/mfd/abx500.h | 3 +- 6 files changed, 296 insertions(+), 209 deletions(-) (limited to 'include') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 8a463e6c22a0..ec0af47a9058 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -434,7 +434,7 @@ config PCF50633_GPIO config ABX500_CORE bool "ST-Ericsson ABX500 Mixed Signal Circuit register functions" - default y if ARCH_U300 + default y if ARCH_U300 || ARCH_U8500 help Say yes here if you have the ABX500 Mixed Signal IC family chips. This core driver expose register access functions. @@ -475,7 +475,7 @@ config EZX_PCAP config AB8500_CORE bool "ST-Ericsson AB8500 Mixed Signal Power Management chip" - depends on SPI=y && GENERIC_HARDIRQS + depends on SPI=y && GENERIC_HARDIRQS && ABX500_CORE select MFD_CORE help Select this option to enable access to AB8500 power management diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index b8c4b80e4c43..373783146b5e 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -4,6 +4,7 @@ * License Terms: GNU General Public License v2 * Author: Srinidhi Kasagar * Author: Rabin Vincent + * Changes: Mattias Wallin */ #include @@ -15,6 +16,7 @@ #include #include #include +#include #include #include @@ -22,71 +24,71 @@ * Interrupt register offsets * Bank : 0x0E */ -#define AB8500_IT_SOURCE1_REG 0x0E00 -#define AB8500_IT_SOURCE2_REG 0x0E01 -#define AB8500_IT_SOURCE3_REG 0x0E02 -#define AB8500_IT_SOURCE4_REG 0x0E03 -#define AB8500_IT_SOURCE5_REG 0x0E04 -#define AB8500_IT_SOURCE6_REG 0x0E05 -#define AB8500_IT_SOURCE7_REG 0x0E06 -#define AB8500_IT_SOURCE8_REG 0x0E07 -#define AB8500_IT_SOURCE19_REG 0x0E12 -#define AB8500_IT_SOURCE20_REG 0x0E13 -#define AB8500_IT_SOURCE21_REG 0x0E14 -#define AB8500_IT_SOURCE22_REG 0x0E15 -#define AB8500_IT_SOURCE23_REG 0x0E16 -#define AB8500_IT_SOURCE24_REG 0x0E17 +#define AB8500_IT_SOURCE1_REG 0x00 +#define AB8500_IT_SOURCE2_REG 0x01 +#define AB8500_IT_SOURCE3_REG 0x02 +#define AB8500_IT_SOURCE4_REG 0x03 +#define AB8500_IT_SOURCE5_REG 0x04 +#define AB8500_IT_SOURCE6_REG 0x05 +#define AB8500_IT_SOURCE7_REG 0x06 +#define AB8500_IT_SOURCE8_REG 0x07 +#define AB8500_IT_SOURCE19_REG 0x12 +#define AB8500_IT_SOURCE20_REG 0x13 +#define AB8500_IT_SOURCE21_REG 0x14 +#define AB8500_IT_SOURCE22_REG 0x15 +#define AB8500_IT_SOURCE23_REG 0x16 +#define AB8500_IT_SOURCE24_REG 0x17 /* * latch registers */ -#define AB8500_IT_LATCH1_REG 0x0E20 -#define AB8500_IT_LATCH2_REG 0x0E21 -#define AB8500_IT_LATCH3_REG 0x0E22 -#define AB8500_IT_LATCH4_REG 0x0E23 -#define AB8500_IT_LATCH5_REG 0x0E24 -#define AB8500_IT_LATCH6_REG 0x0E25 -#define AB8500_IT_LATCH7_REG 0x0E26 -#define AB8500_IT_LATCH8_REG 0x0E27 -#define AB8500_IT_LATCH9_REG 0x0E28 -#define AB8500_IT_LATCH10_REG 0x0E29 -#define AB8500_IT_LATCH19_REG 0x0E32 -#define AB8500_IT_LATCH20_REG 0x0E33 -#define AB8500_IT_LATCH21_REG 0x0E34 -#define AB8500_IT_LATCH22_REG 0x0E35 -#define AB8500_IT_LATCH23_REG 0x0E36 -#define AB8500_IT_LATCH24_REG 0x0E37 +#define AB8500_IT_LATCH1_REG 0x20 +#define AB8500_IT_LATCH2_REG 0x21 +#define AB8500_IT_LATCH3_REG 0x22 +#define AB8500_IT_LATCH4_REG 0x23 +#define AB8500_IT_LATCH5_REG 0x24 +#define AB8500_IT_LATCH6_REG 0x25 +#define AB8500_IT_LATCH7_REG 0x26 +#define AB8500_IT_LATCH8_REG 0x27 +#define AB8500_IT_LATCH9_REG 0x28 +#define AB8500_IT_LATCH10_REG 0x29 +#define AB8500_IT_LATCH19_REG 0x32 +#define AB8500_IT_LATCH20_REG 0x33 +#define AB8500_IT_LATCH21_REG 0x34 +#define AB8500_IT_LATCH22_REG 0x35 +#define AB8500_IT_LATCH23_REG 0x36 +#define AB8500_IT_LATCH24_REG 0x37 /* * mask registers */ -#define AB8500_IT_MASK1_REG 0x0E40 -#define AB8500_IT_MASK2_REG 0x0E41 -#define AB8500_IT_MASK3_REG 0x0E42 -#define AB8500_IT_MASK4_REG 0x0E43 -#define AB8500_IT_MASK5_REG 0x0E44 -#define AB8500_IT_MASK6_REG 0x0E45 -#define AB8500_IT_MASK7_REG 0x0E46 -#define AB8500_IT_MASK8_REG 0x0E47 -#define AB8500_IT_MASK9_REG 0x0E48 -#define AB8500_IT_MASK10_REG 0x0E49 -#define AB8500_IT_MASK11_REG 0x0E4A -#define AB8500_IT_MASK12_REG 0x0E4B -#define AB8500_IT_MASK13_REG 0x0E4C -#define AB8500_IT_MASK14_REG 0x0E4D -#define AB8500_IT_MASK15_REG 0x0E4E -#define AB8500_IT_MASK16_REG 0x0E4F -#define AB8500_IT_MASK17_REG 0x0E50 -#define AB8500_IT_MASK18_REG 0x0E51 -#define AB8500_IT_MASK19_REG 0x0E52 -#define AB8500_IT_MASK20_REG 0x0E53 -#define AB8500_IT_MASK21_REG 0x0E54 -#define AB8500_IT_MASK22_REG 0x0E55 -#define AB8500_IT_MASK23_REG 0x0E56 -#define AB8500_IT_MASK24_REG 0x0E57 - -#define AB8500_REV_REG 0x1080 +#define AB8500_IT_MASK1_REG 0x40 +#define AB8500_IT_MASK2_REG 0x41 +#define AB8500_IT_MASK3_REG 0x42 +#define AB8500_IT_MASK4_REG 0x43 +#define AB8500_IT_MASK5_REG 0x44 +#define AB8500_IT_MASK6_REG 0x45 +#define AB8500_IT_MASK7_REG 0x46 +#define AB8500_IT_MASK8_REG 0x47 +#define AB8500_IT_MASK9_REG 0x48 +#define AB8500_IT_MASK10_REG 0x49 +#define AB8500_IT_MASK11_REG 0x4A +#define AB8500_IT_MASK12_REG 0x4B +#define AB8500_IT_MASK13_REG 0x4C +#define AB8500_IT_MASK14_REG 0x4D +#define AB8500_IT_MASK15_REG 0x4E +#define AB8500_IT_MASK16_REG 0x4F +#define AB8500_IT_MASK17_REG 0x50 +#define AB8500_IT_MASK18_REG 0x51 +#define AB8500_IT_MASK19_REG 0x52 +#define AB8500_IT_MASK20_REG 0x53 +#define AB8500_IT_MASK21_REG 0x54 +#define AB8500_IT_MASK22_REG 0x55 +#define AB8500_IT_MASK23_REG 0x56 +#define AB8500_IT_MASK24_REG 0x57 + +#define AB8500_REV_REG 0x80 /* * Map interrupt numbers to the LATCH and MASK register offsets, Interrupt @@ -99,96 +101,132 @@ static const int ab8500_irq_regoffset[AB8500_NUM_IRQ_REGS] = { 0, 1, 2, 3, 4, 6, 7, 8, 9, 18, 19, 20, 21, }; -static int __ab8500_write(struct ab8500 *ab8500, u16 addr, u8 data) +static int ab8500_get_chip_id(struct device *dev) +{ + struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); + return (int)ab8500->chip_id; +} + +static int set_register_interruptible(struct ab8500 *ab8500, u8 bank, + u8 reg, u8 data) { int ret; + /* + * Put the u8 bank and u8 register together into a an u16. + * The bank on higher 8 bits and register in lower 8 bits. + * */ + u16 addr = ((u16)bank) << 8 | reg; dev_vdbg(ab8500->dev, "wr: addr %#x <= %#x\n", addr, data); + ret = mutex_lock_interruptible(&ab8500->lock); + if (ret) + return ret; + ret = ab8500->write(ab8500, addr, data); if (ret < 0) dev_err(ab8500->dev, "failed to write reg %#x: %d\n", addr, ret); + mutex_unlock(&ab8500->lock); return ret; } -/** - * ab8500_write() - write an AB8500 register - * @ab8500: device to write to - * @addr: address of the register - * @data: value to write - */ -int ab8500_write(struct ab8500 *ab8500, u16 addr, u8 data) +static int ab8500_set_register(struct device *dev, u8 bank, + u8 reg, u8 value) { - int ret; - - mutex_lock(&ab8500->lock); - ret = __ab8500_write(ab8500, addr, data); - mutex_unlock(&ab8500->lock); + struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); - return ret; + return set_register_interruptible(ab8500, bank, reg, value); } -EXPORT_SYMBOL_GPL(ab8500_write); -static int __ab8500_read(struct ab8500 *ab8500, u16 addr) +static int get_register_interruptible(struct ab8500 *ab8500, u8 bank, + u8 reg, u8 *value) { int ret; + /* put the u8 bank and u8 reg together into a an u16. + * bank on higher 8 bits and reg in lower */ + u16 addr = ((u16)bank) << 8 | reg; + + ret = mutex_lock_interruptible(&ab8500->lock); + if (ret) + return ret; ret = ab8500->read(ab8500, addr); if (ret < 0) dev_err(ab8500->dev, "failed to read reg %#x: %d\n", addr, ret); + else + *value = ret; + mutex_unlock(&ab8500->lock); dev_vdbg(ab8500->dev, "rd: addr %#x => data %#x\n", addr, ret); return ret; } -/** - * ab8500_read() - read an AB8500 register - * @ab8500: device to read from - * @addr: address of the register - */ -int ab8500_read(struct ab8500 *ab8500, u16 addr) +static int ab8500_get_register(struct device *dev, u8 bank, + u8 reg, u8 *value) { - int ret; - - mutex_lock(&ab8500->lock); - ret = __ab8500_read(ab8500, addr); - mutex_unlock(&ab8500->lock); + struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); - return ret; + return get_register_interruptible(ab8500, bank, reg, value); } -EXPORT_SYMBOL_GPL(ab8500_read); - -/** - * ab8500_set_bits() - set a bitfield in an AB8500 register - * @ab8500: device to read from - * @addr: address of the register - * @mask: mask of the bitfield to modify - * @data: value to set to the bitfield - */ -int ab8500_set_bits(struct ab8500 *ab8500, u16 addr, u8 mask, u8 data) + +static int mask_and_set_register_interruptible(struct ab8500 *ab8500, u8 bank, + u8 reg, u8 bitmask, u8 bitvalues) { int ret; + u8 data; + /* put the u8 bank and u8 reg together into a an u16. + * bank on higher 8 bits and reg in lower */ + u16 addr = ((u16)bank) << 8 | reg; - mutex_lock(&ab8500->lock); + ret = mutex_lock_interruptible(&ab8500->lock); + if (ret) + return ret; - ret = __ab8500_read(ab8500, addr); - if (ret < 0) + ret = ab8500->read(ab8500, addr); + if (ret < 0) { + dev_err(ab8500->dev, "failed to read reg %#x: %d\n", + addr, ret); goto out; + } - ret &= ~mask; - ret |= data; + data = (u8)ret; + data = (~bitmask & data) | (bitmask & bitvalues); - ret = __ab8500_write(ab8500, addr, ret); + ret = ab8500->write(ab8500, addr, data); + if (ret < 0) + dev_err(ab8500->dev, "failed to write reg %#x: %d\n", + addr, ret); + dev_vdbg(ab8500->dev, "mask: addr %#x => data %#x\n", addr, data); out: mutex_unlock(&ab8500->lock); return ret; } -EXPORT_SYMBOL_GPL(ab8500_set_bits); + +static int ab8500_mask_and_set_register(struct device *dev, + u8 bank, u8 reg, u8 bitmask, u8 bitvalues) +{ + struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); + + return mask_and_set_register_interruptible(ab8500, bank, reg, + bitmask, bitvalues); + +} + +static struct abx500_ops ab8500_ops = { + .get_chip_id = ab8500_get_chip_id, + .get_register = ab8500_get_register, + .set_register = ab8500_set_register, + .get_register_page = NULL, + .set_register_page = NULL, + .mask_and_set_register = ab8500_mask_and_set_register, + .event_registers_startup_state_get = NULL, + .startup_irq_enabled = NULL, +}; static void ab8500_irq_lock(unsigned int irq) { @@ -213,7 +251,7 @@ static void ab8500_irq_sync_unlock(unsigned int irq) ab8500->oldmask[i] = new; reg = AB8500_IT_MASK1_REG + ab8500_irq_regoffset[i]; - ab8500_write(ab8500, reg, new); + set_register_interruptible(ab8500, AB8500_INTERRUPT, reg, new); } mutex_unlock(&ab8500->irq_lock); @@ -257,9 +295,11 @@ static irqreturn_t ab8500_irq(int irq, void *dev) for (i = 0; i < AB8500_NUM_IRQ_REGS; i++) { int regoffset = ab8500_irq_regoffset[i]; int status; + u8 value; - status = ab8500_read(ab8500, AB8500_IT_LATCH1_REG + regoffset); - if (status <= 0) + status = get_register_interruptible(ab8500, AB8500_INTERRUPT, + AB8500_IT_LATCH1_REG + regoffset, &value); + if (status < 0 || value == 0) continue; do { @@ -267,8 +307,8 @@ static irqreturn_t ab8500_irq(int irq, void *dev) int line = i * 8 + bit; handle_nested_irq(ab8500->irq_base + line); - status &= ~(1 << bit); - } while (status); + value &= ~(1 << bit); + } while (value); } return IRQ_HANDLED; @@ -381,6 +421,7 @@ int __devinit ab8500_init(struct ab8500 *ab8500) struct ab8500_platform_data *plat = dev_get_platdata(ab8500->dev); int ret; int i; + u8 value; if (plat) ab8500->irq_base = plat->irq_base; @@ -388,7 +429,8 @@ int __devinit ab8500_init(struct ab8500 *ab8500) mutex_init(&ab8500->lock); mutex_init(&ab8500->irq_lock); - ret = ab8500_read(ab8500, AB8500_REV_REG); + ret = get_register_interruptible(ab8500, AB8500_MISC, + AB8500_REV_REG, &value); if (ret < 0) return ret; @@ -397,28 +439,37 @@ int __devinit ab8500_init(struct ab8500 *ab8500) * 0x10 - Cut 1.0 * 0x11 - Cut 1.1 */ - if (ret == 0x0 || ret == 0x10 || ret == 0x11) { - ab8500->revision = ret; - dev_info(ab8500->dev, "detected chip, revision: %#x\n", ret); + if (value == 0x0 || value == 0x10 || value == 0x11) { + ab8500->revision = value; + dev_info(ab8500->dev, "detected chip, revision: %#x\n", value); } else { - dev_err(ab8500->dev, "unknown chip, revision: %#x\n", ret); + dev_err(ab8500->dev, "unknown chip, revision: %#x\n", value); return -EINVAL; } + ab8500->chip_id = value; if (plat && plat->init) plat->init(ab8500); /* Clear and mask all interrupts */ for (i = 0; i < 10; i++) { - ab8500_read(ab8500, AB8500_IT_LATCH1_REG + i); - ab8500_write(ab8500, AB8500_IT_MASK1_REG + i, 0xff); + get_register_interruptible(ab8500, AB8500_INTERRUPT, + AB8500_IT_LATCH1_REG + i, &value); + set_register_interruptible(ab8500, AB8500_INTERRUPT, + AB8500_IT_MASK1_REG + i, 0xff); } for (i = 18; i < 24; i++) { - ab8500_read(ab8500, AB8500_IT_LATCH1_REG + i); - ab8500_write(ab8500, AB8500_IT_MASK1_REG + i, 0xff); + get_register_interruptible(ab8500, AB8500_INTERRUPT, + AB8500_IT_LATCH1_REG + i, &value); + set_register_interruptible(ab8500, AB8500_INTERRUPT, + AB8500_IT_MASK1_REG + i, 0xff); } + ret = abx500_register_ops(ab8500->dev, &ab8500_ops); + if (ret) + return ret; + for (i = 0; i < AB8500_NUM_IRQ_REGS; i++) ab8500->mask[i] = ab8500->oldmask[i] = 0xff; diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index 28c7ae67cec9..db6b70f20511 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -33,9 +34,11 @@ * @max_uV: maximum voltage (for variable voltage supplies) * @min_uV: minimum voltage (for variable voltage supplies) * @fixed_uV: typical voltage (for fixed voltage supplies) + * @update_bank: bank to control on/off * @update_reg: register to control on/off * @mask: mask to enable/disable regulator * @enable: bits to enable the regulator in normal(high power) mode + * @voltage_bank: bank to control regulator voltage * @voltage_reg: register to control regulator voltage * @voltage_mask: mask to control regulator voltage * @supported_voltages: supported voltage table @@ -49,11 +52,13 @@ struct ab8500_regulator_info { int max_uV; int min_uV; int fixed_uV; - int update_reg; - int mask; - int enable; - int voltage_reg; - int voltage_mask; + u8 update_bank; + u8 update_reg; + u8 mask; + u8 enable; + u8 voltage_bank; + u8 voltage_reg; + u8 voltage_mask; int const *supported_voltages; int voltages_len; }; @@ -97,8 +102,8 @@ static int ab8500_regulator_enable(struct regulator_dev *rdev) if (regulator_id >= AB8500_NUM_REGULATORS) return -EINVAL; - ret = ab8500_set_bits(info->ab8500, info->update_reg, - info->mask, info->enable); + ret = abx500_mask_and_set_register_interruptible(info->dev, + info->update_bank, info->update_reg, info->mask, info->enable); if (ret < 0) dev_err(rdev_get_dev(rdev), "couldn't set enable bits for regulator\n"); @@ -114,8 +119,8 @@ static int ab8500_regulator_disable(struct regulator_dev *rdev) if (regulator_id >= AB8500_NUM_REGULATORS) return -EINVAL; - ret = ab8500_set_bits(info->ab8500, info->update_reg, - info->mask, 0x0); + ret = abx500_mask_and_set_register_interruptible(info->dev, + info->update_bank, info->update_reg, info->mask, 0x0); if (ret < 0) dev_err(rdev_get_dev(rdev), "couldn't set disable bits for regulator\n"); @@ -126,19 +131,21 @@ static int ab8500_regulator_is_enabled(struct regulator_dev *rdev) { int regulator_id, ret; struct ab8500_regulator_info *info = rdev_get_drvdata(rdev); + u8 value; regulator_id = rdev_get_id(rdev); if (regulator_id >= AB8500_NUM_REGULATORS) return -EINVAL; - ret = ab8500_read(info->ab8500, info->update_reg); + ret = abx500_get_register_interruptible(info->dev, + info->update_bank, info->update_reg, &value); if (ret < 0) { dev_err(rdev_get_dev(rdev), "couldn't read 0x%x register\n", info->update_reg); return ret; } - if (ret & info->mask) + if (value & info->mask) return true; else return false; @@ -165,14 +172,16 @@ static int ab8500_list_voltage(struct regulator_dev *rdev, unsigned selector) static int ab8500_regulator_get_voltage(struct regulator_dev *rdev) { - int regulator_id, ret, val; + int regulator_id, ret; struct ab8500_regulator_info *info = rdev_get_drvdata(rdev); + u8 value; regulator_id = rdev_get_id(rdev); if (regulator_id >= AB8500_NUM_REGULATORS) return -EINVAL; - ret = ab8500_read(info->ab8500, info->voltage_reg); + ret = abx500_get_register_interruptible(info->dev, info->voltage_bank, + info->voltage_reg, &value); if (ret < 0) { dev_err(rdev_get_dev(rdev), "couldn't read voltage reg for regulator\n"); @@ -180,11 +189,11 @@ static int ab8500_regulator_get_voltage(struct regulator_dev *rdev) } /* vintcore has a different layout */ - val = ret & info->voltage_mask; + value &= info->voltage_mask; if (regulator_id == AB8500_LDO_INTCORE) - ret = info->supported_voltages[val >> 0x3]; + ret = info->supported_voltages[value >> 0x3]; else - ret = info->supported_voltages[val]; + ret = info->supported_voltages[value]; return ret; } @@ -224,8 +233,9 @@ static int ab8500_regulator_set_voltage(struct regulator_dev *rdev, } /* set the registers for the request */ - ret = ab8500_set_bits(info->ab8500, info->voltage_reg, - info->voltage_mask, ret); + ret = abx500_mask_and_set_register_interruptible(info->dev, + info->voltage_bank, info->voltage_reg, + info->voltage_mask, (u8)ret); if (ret < 0) dev_err(rdev_get_dev(rdev), "couldn't set voltage reg for regulator\n"); @@ -262,9 +272,9 @@ static struct regulator_ops ab8500_ldo_fixed_ops = { .list_voltage = ab8500_list_voltage, }; -#define AB8500_LDO(_id, min, max, reg, reg_mask, reg_enable, \ - volt_reg, volt_mask, voltages, \ - len_volts) \ +#define AB8500_LDO(_id, min, max, bank, reg, reg_mask, \ + reg_enable, volt_bank, volt_reg, volt_mask, \ + voltages, len_volts) \ { \ .desc = { \ .name = "LDO-" #_id, \ @@ -275,9 +285,11 @@ static struct regulator_ops ab8500_ldo_fixed_ops = { }, \ .min_uV = (min) * 1000, \ .max_uV = (max) * 1000, \ + .update_bank = bank, \ .update_reg = reg, \ .mask = reg_mask, \ .enable = reg_enable, \ + .voltage_bank = volt_bank, \ .voltage_reg = volt_reg, \ .voltage_mask = volt_mask, \ .supported_voltages = voltages, \ @@ -285,8 +297,8 @@ static struct regulator_ops ab8500_ldo_fixed_ops = { .fixed_uV = 0, \ } -#define AB8500_FIXED_LDO(_id, fixed, reg, reg_mask, \ - reg_enable) \ +#define AB8500_FIXED_LDO(_id, fixed, bank, reg, \ + reg_mask, reg_enable) \ { \ .desc = { \ .name = "LDO-" #_id, \ @@ -296,6 +308,7 @@ static struct regulator_ops ab8500_ldo_fixed_ops = { .owner = THIS_MODULE, \ }, \ .fixed_uV = fixed * 1000, \ + .update_bank = bank, \ .update_reg = reg, \ .mask = reg_mask, \ .enable = reg_enable, \ @@ -304,28 +317,29 @@ static struct regulator_ops ab8500_ldo_fixed_ops = { static struct ab8500_regulator_info ab8500_regulator_info[] = { /* * Variable Voltage LDOs - * name, min uV, max uV, ctrl reg, reg mask, enable mask, - * volt ctrl reg, volt ctrl mask, volt table, num supported volts + * name, min uV, max uV, ctrl bank, ctrl reg, reg mask, enable mask, + * volt ctrl bank, volt ctrl reg, volt ctrl mask, volt table, + * num supported volts */ - AB8500_LDO(AUX1, 1100, 3300, 0x0409, 0x3, 0x1, 0x041f, 0xf, + AB8500_LDO(AUX1, 1100, 3300, 0x04, 0x09, 0x3, 0x1, 0x04, 0x1f, 0xf, ldo_vauxn_voltages, ARRAY_SIZE(ldo_vauxn_voltages)), - AB8500_LDO(AUX2, 1100, 3300, 0x0409, 0xc, 0x4, 0x0420, 0xf, + AB8500_LDO(AUX2, 1100, 3300, 0x04, 0x09, 0xc, 0x4, 0x04, 0x20, 0xf, ldo_vauxn_voltages, ARRAY_SIZE(ldo_vauxn_voltages)), - AB8500_LDO(AUX3, 1100, 3300, 0x040a, 0x3, 0x1, 0x0421, 0xf, + AB8500_LDO(AUX3, 1100, 3300, 0x04, 0x0a, 0x3, 0x1, 0x04, 0x21, 0xf, ldo_vauxn_voltages, ARRAY_SIZE(ldo_vauxn_voltages)), - AB8500_LDO(INTCORE, 1100, 3300, 0x0380, 0x4, 0x4, 0x0380, 0x38, + AB8500_LDO(INTCORE, 1100, 3300, 0x03, 0x80, 0x4, 0x4, 0x03, 0x80, 0x38, ldo_vintcore_voltages, ARRAY_SIZE(ldo_vintcore_voltages)), /* * Fixed Voltage LDOs - * name, o/p uV, ctrl reg, enable, disable + * name, o/p uV, ctrl bank, ctrl reg, enable, disable */ - AB8500_FIXED_LDO(TVOUT, 2000, 0x0380, 0x2, 0x2), - AB8500_FIXED_LDO(AUDIO, 2000, 0x0383, 0x2, 0x2), - AB8500_FIXED_LDO(ANAMIC1, 2050, 0x0383, 0x4, 0x4), - AB8500_FIXED_LDO(ANAMIC2, 2050, 0x0383, 0x8, 0x8), - AB8500_FIXED_LDO(DMIC, 1800, 0x0383, 0x10, 0x10), - AB8500_FIXED_LDO(ANA, 1200, 0x0383, 0xc, 0x4), + AB8500_FIXED_LDO(TVOUT, 2000, 0x03, 0x80, 0x2, 0x2), + AB8500_FIXED_LDO(AUDIO, 2000, 0x03, 0x83, 0x2, 0x2), + AB8500_FIXED_LDO(ANAMIC1, 2050, 0x03, 0x83, 0x4, 0x4), + AB8500_FIXED_LDO(ANAMIC2, 2050, 0x03, 0x83, 0x8, 0x8), + AB8500_FIXED_LDO(DMIC, 1800, 0x03, 0x83, 0x10, 0x10), + AB8500_FIXED_LDO(ANA, 1200, 0x03, 0x83, 0xc, 0x4), }; static inline struct ab8500_regulator_info *find_regulator_info(int id) diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c index 2fda03125e55..e346705aae92 100644 --- a/drivers/rtc/rtc-ab8500.c +++ b/drivers/rtc/rtc-ab8500.c @@ -14,26 +14,26 @@ #include #include #include +#include #include #include -#define AB8500_RTC_SOFF_STAT_REG 0x0F00 -#define AB8500_RTC_CC_CONF_REG 0x0F01 -#define AB8500_RTC_READ_REQ_REG 0x0F02 -#define AB8500_RTC_WATCH_TSECMID_REG 0x0F03 -#define AB8500_RTC_WATCH_TSECHI_REG 0x0F04 -#define AB8500_RTC_WATCH_TMIN_LOW_REG 0x0F05 -#define AB8500_RTC_WATCH_TMIN_MID_REG 0x0F06 -#define AB8500_RTC_WATCH_TMIN_HI_REG 0x0F07 -#define AB8500_RTC_ALRM_MIN_LOW_REG 0x0F08 -#define AB8500_RTC_ALRM_MIN_MID_REG 0x0F09 -#define AB8500_RTC_ALRM_MIN_HI_REG 0x0F0A -#define AB8500_RTC_STAT_REG 0x0F0B -#define AB8500_RTC_BKUP_CHG_REG 0x0F0C -#define AB8500_RTC_FORCE_BKUP_REG 0x0F0D -#define AB8500_RTC_CALIB_REG 0x0F0E -#define AB8500_RTC_SWITCH_STAT_REG 0x0F0F -#define AB8500_REV_REG 0x1080 +#define AB8500_RTC_SOFF_STAT_REG 0x00 +#define AB8500_RTC_CC_CONF_REG 0x01 +#define AB8500_RTC_READ_REQ_REG 0x02 +#define AB8500_RTC_WATCH_TSECMID_REG 0x03 +#define AB8500_RTC_WATCH_TSECHI_REG 0x04 +#define AB8500_RTC_WATCH_TMIN_LOW_REG 0x05 +#define AB8500_RTC_WATCH_TMIN_MID_REG 0x06 +#define AB8500_RTC_WATCH_TMIN_HI_REG 0x07 +#define AB8500_RTC_ALRM_MIN_LOW_REG 0x08 +#define AB8500_RTC_ALRM_MIN_MID_REG 0x09 +#define AB8500_RTC_ALRM_MIN_HI_REG 0x0A +#define AB8500_RTC_STAT_REG 0x0B +#define AB8500_RTC_BKUP_CHG_REG 0x0C +#define AB8500_RTC_FORCE_BKUP_REG 0x0D +#define AB8500_RTC_CALIB_REG 0x0E +#define AB8500_RTC_SWITCH_STAT_REG 0x0F /* RtcReadRequest bits */ #define RTC_READ_REQUEST 0x01 @@ -46,13 +46,13 @@ #define COUNTS_PER_SEC (0xF000 / 60) #define AB8500_RTC_EPOCH 2000 -static const unsigned long ab8500_rtc_time_regs[] = { +static const u8 ab8500_rtc_time_regs[] = { AB8500_RTC_WATCH_TMIN_HI_REG, AB8500_RTC_WATCH_TMIN_MID_REG, AB8500_RTC_WATCH_TMIN_LOW_REG, AB8500_RTC_WATCH_TSECHI_REG, AB8500_RTC_WATCH_TSECMID_REG }; -static const unsigned long ab8500_rtc_alarm_regs[] = { +static const u8 ab8500_rtc_alarm_regs[] = { AB8500_RTC_ALRM_MIN_HI_REG, AB8500_RTC_ALRM_MIN_MID_REG, AB8500_RTC_ALRM_MIN_LOW_REG }; @@ -76,29 +76,30 @@ static unsigned long get_elapsed_seconds(int year) static int ab8500_rtc_read_time(struct device *dev, struct rtc_time *tm) { - struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); unsigned long timeout = jiffies + HZ; int retval, i; unsigned long mins, secs; unsigned char buf[ARRAY_SIZE(ab8500_rtc_time_regs)]; + u8 value; /* Request a data read */ - retval = ab8500_write(ab8500, AB8500_RTC_READ_REQ_REG, - RTC_READ_REQUEST); + retval = abx500_set_register_interruptible(dev, + AB8500_RTC, AB8500_RTC_READ_REQ_REG, RTC_READ_REQUEST); if (retval < 0) return retval; /* Early AB8500 chips will not clear the rtc read request bit */ - if (ab8500->revision == 0) { + if (abx500_get_chip_id(dev) == 0) { msleep(1); } else { /* Wait for some cycles after enabling the rtc read in ab8500 */ while (time_before(jiffies, timeout)) { - retval = ab8500_read(ab8500, AB8500_RTC_READ_REQ_REG); + retval = abx500_get_register_interruptible(dev, + AB8500_RTC, AB8500_RTC_READ_REQ_REG, &value); if (retval < 0) return retval; - if (!(retval & RTC_READ_REQUEST)) + if (!(value & RTC_READ_REQUEST)) break; msleep(1); @@ -107,10 +108,11 @@ static int ab8500_rtc_read_time(struct device *dev, struct rtc_time *tm) /* Read the Watchtime registers */ for (i = 0; i < ARRAY_SIZE(ab8500_rtc_time_regs); i++) { - retval = ab8500_read(ab8500, ab8500_rtc_time_regs[i]); + retval = abx500_get_register_interruptible(dev, + AB8500_RTC, ab8500_rtc_time_regs[i], &value); if (retval < 0) return retval; - buf[i] = retval; + buf[i] = value; } mins = (buf[0] << 16) | (buf[1] << 8) | buf[2]; @@ -128,7 +130,6 @@ static int ab8500_rtc_read_time(struct device *dev, struct rtc_time *tm) static int ab8500_rtc_set_time(struct device *dev, struct rtc_time *tm) { - struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); int retval, i; unsigned char buf[ARRAY_SIZE(ab8500_rtc_time_regs)]; unsigned long no_secs, no_mins, secs = 0; @@ -162,27 +163,29 @@ static int ab8500_rtc_set_time(struct device *dev, struct rtc_time *tm) buf[0] = (no_mins >> 16) & 0xFF; for (i = 0; i < ARRAY_SIZE(ab8500_rtc_time_regs); i++) { - retval = ab8500_write(ab8500, ab8500_rtc_time_regs[i], buf[i]); + retval = abx500_set_register_interruptible(dev, AB8500_RTC, + ab8500_rtc_time_regs[i], buf[i]); if (retval < 0) return retval; } /* Request a data write */ - return ab8500_write(ab8500, AB8500_RTC_READ_REQ_REG, RTC_WRITE_REQUEST); + return abx500_set_register_interruptible(dev, AB8500_RTC, + AB8500_RTC_READ_REQ_REG, RTC_WRITE_REQUEST); } static int ab8500_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) { - struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); int retval, i; - int rtc_ctrl; + u8 rtc_ctrl, value; unsigned char buf[ARRAY_SIZE(ab8500_rtc_alarm_regs)]; unsigned long secs, mins; /* Check if the alarm is enabled or not */ - rtc_ctrl = ab8500_read(ab8500, AB8500_RTC_STAT_REG); - if (rtc_ctrl < 0) - return rtc_ctrl; + retval = abx500_get_register_interruptible(dev, AB8500_RTC, + AB8500_RTC_STAT_REG, &rtc_ctrl); + if (retval < 0) + return retval; if (rtc_ctrl & RTC_ALARM_ENA) alarm->enabled = 1; @@ -192,10 +195,11 @@ static int ab8500_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) alarm->pending = 0; for (i = 0; i < ARRAY_SIZE(ab8500_rtc_alarm_regs); i++) { - retval = ab8500_read(ab8500, ab8500_rtc_alarm_regs[i]); + retval = abx500_get_register_interruptible(dev, AB8500_RTC, + ab8500_rtc_alarm_regs[i], &value); if (retval < 0) return retval; - buf[i] = retval; + buf[i] = value; } mins = (buf[0] << 16) | (buf[1] << 8) | (buf[2]); @@ -211,15 +215,13 @@ static int ab8500_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) static int ab8500_rtc_irq_enable(struct device *dev, unsigned int enabled) { - struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); - - return ab8500_set_bits(ab8500, AB8500_RTC_STAT_REG, RTC_ALARM_ENA, - enabled ? RTC_ALARM_ENA : 0); + return abx500_mask_and_set_register_interruptible(dev, AB8500_RTC, + AB8500_RTC_STAT_REG, RTC_ALARM_ENA, + enabled ? RTC_ALARM_ENA : 0); } static int ab8500_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) { - struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); int retval, i; unsigned char buf[ARRAY_SIZE(ab8500_rtc_alarm_regs)]; unsigned long mins, secs = 0; @@ -247,7 +249,8 @@ static int ab8500_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) /* Set the alarm time */ for (i = 0; i < ARRAY_SIZE(ab8500_rtc_alarm_regs); i++) { - retval = ab8500_write(ab8500, ab8500_rtc_alarm_regs[i], buf[i]); + retval = abx500_set_register_interruptible(dev, AB8500_RTC, + ab8500_rtc_alarm_regs[i], buf[i]); if (retval < 0) return retval; } @@ -276,10 +279,9 @@ static const struct rtc_class_ops ab8500_rtc_ops = { static int __devinit ab8500_rtc_probe(struct platform_device *pdev) { - struct ab8500 *ab8500 = dev_get_drvdata(pdev->dev.parent); int err; struct rtc_device *rtc; - int rtc_ctrl; + u8 rtc_ctrl; int irq; irq = platform_get_irq_byname(pdev, "ALARM"); @@ -287,17 +289,18 @@ static int __devinit ab8500_rtc_probe(struct platform_device *pdev) return irq; /* For RTC supply test */ - err = ab8500_set_bits(ab8500, AB8500_RTC_STAT_REG, RTC_STATUS_DATA, - RTC_STATUS_DATA); + err = abx500_mask_and_set_register_interruptible(&pdev->dev, AB8500_RTC, + AB8500_RTC_STAT_REG, RTC_STATUS_DATA, RTC_STATUS_DATA); if (err < 0) return err; /* Wait for reset by the PorRtc */ msleep(1); - rtc_ctrl = ab8500_read(ab8500, AB8500_RTC_STAT_REG); - if (rtc_ctrl < 0) - return rtc_ctrl; + err = abx500_get_register_interruptible(&pdev->dev, AB8500_RTC, + AB8500_RTC_STAT_REG, &rtc_ctrl); + if (err < 0) + return err; /* Check if the RTC Supply fails */ if (!(rtc_ctrl & RTC_STATUS_DATA)) { diff --git a/include/linux/mfd/ab8500.h b/include/linux/mfd/ab8500.h index f5cec4500f38..d63b6050b183 100644 --- a/include/linux/mfd/ab8500.h +++ b/include/linux/mfd/ab8500.h @@ -9,6 +9,29 @@ #include +/* + * AB8500 bank addresses + */ +#define AB8500_SYS_CTRL1_BLOCK 0x1 +#define AB8500_SYS_CTRL2_BLOCK 0x2 +#define AB8500_REGU_CTRL1 0x3 +#define AB8500_REGU_CTRL2 0x4 +#define AB8500_USB 0x5 +#define AB8500_TVOUT 0x6 +#define AB8500_DBI 0x7 +#define AB8500_ECI_AV_ACC 0x8 +#define AB8500_RESERVED 0x9 +#define AB8500_GPADC 0xA +#define AB8500_CHARGER 0xB +#define AB8500_GAS_GAUGE 0xC +#define AB8500_AUDIO 0xD +#define AB8500_INTERRUPT 0xE +#define AB8500_RTC 0xF +#define AB8500_MISC 0x10 +#define AB8500_DEBUG 0x12 +#define AB8500_PROD_TEST 0x13 +#define AB8500_OTP_EMUL 0x15 + /* * Interrupts */ @@ -99,6 +122,7 @@ struct ab8500 { int revision; int irq_base; int irq; + u8 chip_id; int (*write) (struct ab8500 *a8500, u16 addr, u8 data); int (*read) (struct ab8500 *a8500, u16 addr); @@ -124,10 +148,6 @@ struct ab8500_platform_data { struct regulator_init_data *regulator[AB8500_NUM_REGULATORS]; }; -extern int ab8500_write(struct ab8500 *a8500, u16 addr, u8 data); -extern int ab8500_read(struct ab8500 *a8500, u16 addr); -extern int ab8500_set_bits(struct ab8500 *a8500, u16 addr, u8 mask, u8 data); - extern int __devinit ab8500_init(struct ab8500 *ab8500); extern int __devexit ab8500_exit(struct ab8500 *ab8500); diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h index 390726fcbcb1..be7373c79bea 100644 --- a/include/linux/mfd/abx500.h +++ b/include/linux/mfd/abx500.h @@ -6,8 +6,7 @@ * * ABX500 core access functions. * The abx500 interface is used for the Analog Baseband chip - * ab3100, ab3550, ab5500 and possibly comming. It is not used for - * ab4500 and ab8500 since they are another family of chip. + * ab3100, ab3550, ab5500, and ab8500. * * Author: Mattias Wallin * Author: Mattias Nilsson -- cgit v1.2.3 From 38b340527aa44bb8d1b88ef1e5a4e26b27695c2b Mon Sep 17 00:00:00 2001 From: Haojian Zhuang Date: Wed, 8 Sep 2010 09:44:34 -0400 Subject: mfd: Update chip id of 88pm8607 Chipid of 88pm8607 is 0x40 or 0x50. Signed-off-by: Haojian Zhuang Signed-off-by: Samuel Ortiz --- drivers/mfd/88pm860x-core.c | 7 +++++-- include/linux/mfd/88pm860x.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c index 4db10a150369..20895e7a99c9 100644 --- a/drivers/mfd/88pm860x-core.c +++ b/drivers/mfd/88pm860x-core.c @@ -645,10 +645,13 @@ static void __devinit device_8607_init(struct pm860x_chip *chip, dev_err(chip->dev, "Failed to read CHIP ID: %d\n", ret); goto out; } - if ((ret & PM8607_VERSION_MASK) == PM8607_VERSION) + switch (ret & PM8607_VERSION_MASK) { + case 0x40: + case 0x50: dev_info(chip->dev, "Marvell 88PM8607 (ID: %02x) detected\n", ret); - else { + break; + default: dev_err(chip->dev, "Failed to detect Marvell 88PM8607. " "Chip ID: %02x\n", ret); goto out; diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h index bfd23bef7363..4db1fbd8969e 100644 --- a/include/linux/mfd/88pm860x.h +++ b/include/linux/mfd/88pm860x.h @@ -138,7 +138,7 @@ enum { PM8607_ID_RG_MAX, }; -#define PM8607_VERSION (0x40) /* 8607 chip ID */ +/* 8607 chip ID is 0x40 or 0x50 */ #define PM8607_VERSION_MASK (0xF0) /* 8607 chip ID mask */ /* Interrupt Registers */ -- cgit v1.2.3 From c26448c48448266480e1b6c371f897167060ceaf Mon Sep 17 00:00:00 2001 From: Gary King Date: Mon, 20 Sep 2010 00:18:27 +0200 Subject: mfd: Add basic tps6586x interrupt support Add support for enabling and disabling tps6586x subdevice interrupts Signed-off-by: Gary King Acked-by: Mike Rapoport Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 4 +- drivers/mfd/tps6586x.c | 200 +++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/tps6586x.h | 31 +++++++ 3 files changed, 233 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 01c928bca099..66779bdc9627 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -563,8 +563,8 @@ config MFD_JZ4740_ADC This driver is necessary for jz4740-battery and jz4740-hwmon driver. config MFD_TPS6586X - tristate "TPS6586x Power Management chips" - depends on I2C && GPIOLIB + bool "TPS6586x Power Management chips" + depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS select MFD_CORE help If you say yes here you get support for the TPS6586X series of diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c index 2f9336c3710c..117eb7cafe77 100644 --- a/drivers/mfd/tps6586x.c +++ b/drivers/mfd/tps6586x.c @@ -15,6 +15,8 @@ * published by the Free Software Foundation. */ +#include +#include #include #include #include @@ -29,16 +31,76 @@ #define TPS6586X_GPIOSET1 0x5d #define TPS6586X_GPIOSET2 0x5e +/* interrupt control registers */ +#define TPS6586X_INT_ACK1 0xb5 +#define TPS6586X_INT_ACK2 0xb6 +#define TPS6586X_INT_ACK3 0xb7 +#define TPS6586X_INT_ACK4 0xb8 + +/* interrupt mask registers */ +#define TPS6586X_INT_MASK1 0xb0 +#define TPS6586X_INT_MASK2 0xb1 +#define TPS6586X_INT_MASK3 0xb2 +#define TPS6586X_INT_MASK4 0xb3 +#define TPS6586X_INT_MASK5 0xb4 + /* device id */ #define TPS6586X_VERSIONCRC 0xcd #define TPS658621A_VERSIONCRC 0x15 +struct tps6586x_irq_data { + u8 mask_reg; + u8 mask_mask; +}; + +#define TPS6586X_IRQ(_reg, _mask) \ + { \ + .mask_reg = (_reg) - TPS6586X_INT_MASK1, \ + .mask_mask = (_mask), \ + } + +static const struct tps6586x_irq_data tps6586x_irqs[] = { + [TPS6586X_INT_PLDO_0] = TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 0), + [TPS6586X_INT_PLDO_1] = TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 1), + [TPS6586X_INT_PLDO_2] = TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 2), + [TPS6586X_INT_PLDO_3] = TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 3), + [TPS6586X_INT_PLDO_4] = TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 4), + [TPS6586X_INT_PLDO_5] = TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 5), + [TPS6586X_INT_PLDO_6] = TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 6), + [TPS6586X_INT_PLDO_7] = TPS6586X_IRQ(TPS6586X_INT_MASK1, 1 << 7), + [TPS6586X_INT_COMP_DET] = TPS6586X_IRQ(TPS6586X_INT_MASK4, 1 << 0), + [TPS6586X_INT_ADC] = TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 1), + [TPS6586X_INT_PLDO_8] = TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 2), + [TPS6586X_INT_PLDO_9] = TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 3), + [TPS6586X_INT_PSM_0] = TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 4), + [TPS6586X_INT_PSM_1] = TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 5), + [TPS6586X_INT_PSM_2] = TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 6), + [TPS6586X_INT_PSM_3] = TPS6586X_IRQ(TPS6586X_INT_MASK2, 1 << 7), + [TPS6586X_INT_RTC_ALM1] = TPS6586X_IRQ(TPS6586X_INT_MASK5, 1 << 4), + [TPS6586X_INT_ACUSB_OVP] = TPS6586X_IRQ(TPS6586X_INT_MASK5, 0x03), + [TPS6586X_INT_USB_DET] = TPS6586X_IRQ(TPS6586X_INT_MASK5, 1 << 2), + [TPS6586X_INT_AC_DET] = TPS6586X_IRQ(TPS6586X_INT_MASK5, 1 << 3), + [TPS6586X_INT_BAT_DET] = TPS6586X_IRQ(TPS6586X_INT_MASK3, 1 << 0), + [TPS6586X_INT_CHG_STAT] = TPS6586X_IRQ(TPS6586X_INT_MASK4, 0xfc), + [TPS6586X_INT_CHG_TEMP] = TPS6586X_IRQ(TPS6586X_INT_MASK3, 0x06), + [TPS6586X_INT_PP] = TPS6586X_IRQ(TPS6586X_INT_MASK3, 0xf0), + [TPS6586X_INT_RESUME] = TPS6586X_IRQ(TPS6586X_INT_MASK5, 1 << 5), + [TPS6586X_INT_LOW_SYS] = TPS6586X_IRQ(TPS6586X_INT_MASK5, 1 << 6), + [TPS6586X_INT_RTC_ALM2] = TPS6586X_IRQ(TPS6586X_INT_MASK4, 1 << 1), +}; + struct tps6586x { struct mutex lock; struct device *dev; struct i2c_client *client; struct gpio_chip gpio; + struct irq_chip irq_chip; + struct mutex irq_lock; + int irq_base; + u32 irq_en; + u8 mask_cache[5]; + u8 mask_reg[5]; }; static inline int __tps6586x_read(struct i2c_client *client, @@ -262,6 +324,129 @@ static int tps6586x_remove_subdevs(struct tps6586x *tps6586x) return device_for_each_child(tps6586x->dev, NULL, __remove_subdev); } +static void tps6586x_irq_lock(unsigned int irq) +{ + struct tps6586x *tps6586x = get_irq_chip_data(irq); + + mutex_lock(&tps6586x->irq_lock); +} + +static void tps6586x_irq_enable(unsigned int irq) +{ + struct tps6586x *tps6586x = get_irq_chip_data(irq); + unsigned int __irq = irq - tps6586x->irq_base; + const struct tps6586x_irq_data *data = &tps6586x_irqs[__irq]; + + tps6586x->mask_reg[data->mask_reg] &= ~data->mask_mask; + tps6586x->irq_en |= (1 << __irq); +} + +static void tps6586x_irq_disable(unsigned int irq) +{ + struct tps6586x *tps6586x = get_irq_chip_data(irq); + + unsigned int __irq = irq - tps6586x->irq_base; + const struct tps6586x_irq_data *data = &tps6586x_irqs[__irq]; + + tps6586x->mask_reg[data->mask_reg] |= data->mask_mask; + tps6586x->irq_en &= ~(1 << __irq); +} + +static void tps6586x_irq_sync_unlock(unsigned int irq) +{ + struct tps6586x *tps6586x = get_irq_chip_data(irq); + int i; + + for (i = 0; i < ARRAY_SIZE(tps6586x->mask_reg); i++) { + if (tps6586x->mask_reg[i] != tps6586x->mask_cache[i]) { + if (!WARN_ON(tps6586x_write(tps6586x->dev, + TPS6586X_INT_MASK1 + i, + tps6586x->mask_reg[i]))) + tps6586x->mask_cache[i] = tps6586x->mask_reg[i]; + } + } + + mutex_unlock(&tps6586x->irq_lock); +} + +static irqreturn_t tps6586x_irq(int irq, void *data) +{ + struct tps6586x *tps6586x = data; + u32 acks; + int ret = 0; + + ret = tps6586x_reads(tps6586x->dev, TPS6586X_INT_ACK1, + sizeof(acks), (uint8_t *)&acks); + + if (ret < 0) { + dev_err(tps6586x->dev, "failed to read interrupt status\n"); + return IRQ_NONE; + } + + acks = le32_to_cpu(acks); + + while (acks) { + int i = __ffs(acks); + + if (tps6586x->irq_en & (1 << i)) + handle_nested_irq(tps6586x->irq_base + i); + + acks &= ~(1 << i); + } + + return IRQ_HANDLED; +} + +static int __devinit tps6586x_irq_init(struct tps6586x *tps6586x, int irq, + int irq_base) +{ + int i, ret; + u8 tmp[4]; + + if (!irq_base) { + dev_warn(tps6586x->dev, "No interrupt support on IRQ base\n"); + return -EINVAL; + } + + mutex_init(&tps6586x->irq_lock); + for (i = 0; i < 5; i++) { + tps6586x->mask_cache[i] = 0xff; + tps6586x->mask_reg[i] = 0xff; + tps6586x_write(tps6586x->dev, TPS6586X_INT_MASK1 + i, 0xff); + } + + tps6586x_reads(tps6586x->dev, TPS6586X_INT_ACK1, sizeof(tmp), tmp); + + tps6586x->irq_base = irq_base; + + tps6586x->irq_chip.name = "tps6586x"; + tps6586x->irq_chip.enable = tps6586x_irq_enable; + tps6586x->irq_chip.disable = tps6586x_irq_disable; + tps6586x->irq_chip.bus_lock = tps6586x_irq_lock; + tps6586x->irq_chip.bus_sync_unlock = tps6586x_irq_sync_unlock; + + for (i = 0; i < ARRAY_SIZE(tps6586x_irqs); i++) { + int __irq = i + tps6586x->irq_base; + set_irq_chip_data(__irq, tps6586x); + set_irq_chip_and_handler(__irq, &tps6586x->irq_chip, + handle_simple_irq); + set_irq_nested_thread(__irq, 1); +#ifdef CONFIG_ARM + set_irq_flags(__irq, IRQF_VALID); +#endif + } + + ret = request_threaded_irq(irq, NULL, tps6586x_irq, IRQF_ONESHOT, + "tps6586x", tps6586x); + + if (!ret) { + device_init_wakeup(tps6586x->dev, 1); + enable_irq_wake(irq); + } + + return ret; +} + static int __devinit tps6586x_add_subdevs(struct tps6586x *tps6586x, struct tps6586x_platform_data *pdata) { @@ -327,6 +512,15 @@ static int __devinit tps6586x_i2c_probe(struct i2c_client *client, mutex_init(&tps6586x->lock); + if (client->irq) { + ret = tps6586x_irq_init(tps6586x, client->irq, + pdata->irq_base); + if (ret) { + dev_err(&client->dev, "IRQ init failed: %d\n", ret); + goto err_irq_init; + } + } + ret = tps6586x_add_subdevs(tps6586x, pdata); if (ret) { dev_err(&client->dev, "add devices failed: %d\n", ret); @@ -338,6 +532,9 @@ static int __devinit tps6586x_i2c_probe(struct i2c_client *client, return 0; err_add_devs: + if (client->irq) + free_irq(client->irq, tps6586x); +err_irq_init: kfree(tps6586x); return ret; } @@ -348,6 +545,9 @@ static int __devexit tps6586x_i2c_remove(struct i2c_client *client) struct tps6586x_platform_data *pdata = client->dev.platform_data; int ret; + if (client->irq) + free_irq(client->irq, tps6586x); + if (pdata->gpio_base) { ret = gpiochip_remove(&tps6586x->gpio); if (ret) diff --git a/include/linux/mfd/tps6586x.h b/include/linux/mfd/tps6586x.h index 772b3ae640af..b6bab1b04e25 100644 --- a/include/linux/mfd/tps6586x.h +++ b/include/linux/mfd/tps6586x.h @@ -18,6 +18,36 @@ enum { TPS6586X_ID_LDO_RTC, }; +enum { + TPS6586X_INT_PLDO_0, + TPS6586X_INT_PLDO_1, + TPS6586X_INT_PLDO_2, + TPS6586X_INT_PLDO_3, + TPS6586X_INT_PLDO_4, + TPS6586X_INT_PLDO_5, + TPS6586X_INT_PLDO_6, + TPS6586X_INT_PLDO_7, + TPS6586X_INT_COMP_DET, + TPS6586X_INT_ADC, + TPS6586X_INT_PLDO_8, + TPS6586X_INT_PLDO_9, + TPS6586X_INT_PSM_0, + TPS6586X_INT_PSM_1, + TPS6586X_INT_PSM_2, + TPS6586X_INT_PSM_3, + TPS6586X_INT_RTC_ALM1, + TPS6586X_INT_ACUSB_OVP, + TPS6586X_INT_USB_DET, + TPS6586X_INT_AC_DET, + TPS6586X_INT_BAT_DET, + TPS6586X_INT_CHG_STAT, + TPS6586X_INT_CHG_TEMP, + TPS6586X_INT_PP, + TPS6586X_INT_RESUME, + TPS6586X_INT_LOW_SYS, + TPS6586X_INT_RTC_ALM2, +}; + struct tps6586x_subdev_info { int id; const char *name; @@ -29,6 +59,7 @@ struct tps6586x_platform_data { struct tps6586x_subdev_info *subdevs; int gpio_base; + int irq_base; }; /* -- cgit v1.2.3 From c6252e9ce7f51a2af66bd69c93afb37191467c96 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 22 Sep 2010 14:58:30 +0100 Subject: mfd: Declare abx500_remove_ops() Otherwise sparse warns about a public symbol with no declaration and the compiler can't spot if the callers and users have different signatures for the function. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- include/linux/mfd/abx500.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h index be7373c79bea..67bd6f7ecf32 100644 --- a/include/linux/mfd/abx500.h +++ b/include/linux/mfd/abx500.h @@ -229,4 +229,5 @@ struct abx500_ops { }; int abx500_register_ops(struct device *core_dev, struct abx500_ops *ops); +void abx500_remove_ops(struct device *dev); #endif -- cgit v1.2.3 From 5f2545fa156f3d4d327038d7664608e146809a3c Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 30 Sep 2010 21:55:36 +0100 Subject: mfd: Allow for bypass of cell resource conflict check The upcoming VIA VX855 MFD driver needs to communicate resources to subdevices where the resources may be claimed by ACPI. Add a flag to mfd_cell to request that resources are not policed. Signed-off-by: Daniel Drake Signed-off-by: Samuel Ortiz --- drivers/mfd/mfd-core.c | 8 +++++--- include/linux/mfd/core.h | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index 1823a57b7d8f..d1c8605d4ed4 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -65,9 +65,11 @@ static int mfd_add_device(struct device *parent, int id, res[r].end = cell->resources[r].end; } - ret = acpi_check_resource_conflict(res); - if (ret) - goto fail_res; + if (!cell->ignore_resource_conflicts) { + ret = acpi_check_resource_conflict(res); + if (ret) + goto fail_res; + } } ret = platform_device_add_resources(pdev, res, cell->num_resources); diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 11d740b8831d..cb93d80aa642 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -44,6 +44,9 @@ struct mfd_cell { */ int num_resources; const struct resource *resources; + + /* don't check for resource conflicts */ + bool ignore_resource_conflicts; }; extern int mfd_add_devices(struct device *parent, int id, -- cgit v1.2.3 From b4e017e332b873133602f47ae8cacfae64ab82c5 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 28 Sep 2010 16:38:41 +0200 Subject: mfd: Remove deprecated mc13783 functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last user is gone since v2.6.34-rc1~40 Signed-off-by: Uwe Kleine-König Signed-off-by: Samuel Ortiz --- include/linux/mfd/mc13783.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h index 0fa44fb8dd26..5f6aff55eeb7 100644 --- a/include/linux/mfd/mc13783.h +++ b/include/linux/mfd/mc13783.h @@ -35,24 +35,6 @@ int mc13783_irq_status(struct mc13783 *mc13783, int irq, int *enabled, int *pending); int mc13783_irq_ack(struct mc13783 *mc13783, int irq); -static inline int mc13783_mask(struct mc13783 *mc13783, int irq) __deprecated; -static inline int mc13783_mask(struct mc13783 *mc13783, int irq) -{ - return mc13783_irq_mask(mc13783, irq); -} - -static inline int mc13783_unmask(struct mc13783 *mc13783, int irq) __deprecated; -static inline int mc13783_unmask(struct mc13783 *mc13783, int irq) -{ - return mc13783_irq_unmask(mc13783, irq); -} - -static inline int mc13783_ackirq(struct mc13783 *mc13783, int irq) __deprecated; -static inline int mc13783_ackirq(struct mc13783 *mc13783, int irq) -{ - return mc13783_irq_ack(mc13783, irq); -} - #define MC13783_ADC0 43 #define MC13783_ADC0_ADREFEN (1 << 10) #define MC13783_ADC0_ADREFMODE (1 << 11) -- cgit v1.2.3 From 8e00593557c3c5a7bc6f636412a1cadcf4624232 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 28 Sep 2010 16:37:20 +0200 Subject: mfd: Add mc13892 support to mc13xxx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mc13892 is the companion PMIC for Freescale's i.MX51. It's similar enough to mc13782 to support it in a single driver. This patch introduces enough compatibility cruft to keep all users of the superseded mc13783 driver unchanged. Signed-off-by: Uwe Kleine-König Signed-off-by: Samuel Ortiz --- drivers/mfd/Kconfig | 9 +- drivers/mfd/Makefile | 2 +- drivers/mfd/mc13783-core.c | 743 --------------------------------------- drivers/mfd/mc13xxx-core.c | 840 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/mc13783.h | 247 ++++++------- include/linux/mfd/mc13xxx.h | 154 ++++++++ 6 files changed, 1113 insertions(+), 882 deletions(-) delete mode 100644 drivers/mfd/mc13783-core.c create mode 100644 drivers/mfd/mc13xxx-core.c create mode 100644 include/linux/mfd/mc13xxx.h (limited to 'include') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 9735f581574d..6c6b9f02d177 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -409,11 +409,16 @@ config MFD_PCF50633 so that function-specific drivers can bind to them. config MFD_MC13783 - tristate "Support Freescale MC13783" + tristate + +config MFD_MC13XXX + tristate "Support Freescale MC13783 and MC13892" depends on SPI_MASTER select MFD_CORE + select MFD_MC13783 help - Support for the Freescale (Atlas) MC13783 PMIC and audio CODEC. + Support for the Freescale (Atlas) PMIC and audio CODECs + MC13783 and MC13892. This driver provides common support for accessing the device, additional drivers must be enabled in order to use the functionality of the device. diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index d18a6ab2ab58..70b26999dc81 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -39,7 +39,7 @@ obj-$(CONFIG_TWL4030_POWER) += twl4030-power.o obj-$(CONFIG_TWL4030_CODEC) += twl4030-codec.o obj-$(CONFIG_TWL6030_PWM) += twl6030-pwm.o -obj-$(CONFIG_MFD_MC13783) += mc13783-core.o +obj-$(CONFIG_MFD_MC13XXX) += mc13xxx-core.o obj-$(CONFIG_MFD_CORE) += mfd-core.o diff --git a/drivers/mfd/mc13783-core.c b/drivers/mfd/mc13783-core.c deleted file mode 100644 index 2506e6888507..000000000000 --- a/drivers/mfd/mc13783-core.c +++ /dev/null @@ -1,743 +0,0 @@ -/* - * Copyright 2009 Pengutronix - * Uwe Kleine-Koenig - * - * loosely based on an earlier driver that has - * Copyright 2009 Pengutronix, Sascha Hauer - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -struct mc13783 { - struct spi_device *spidev; - struct mutex lock; - int irq; - int adcflags; - - irq_handler_t irqhandler[MC13783_NUM_IRQ]; - void *irqdata[MC13783_NUM_IRQ]; -}; - -#define MC13783_REG_REVISION 7 -#define MC13783_REG_ADC_0 43 -#define MC13783_REG_ADC_1 44 -#define MC13783_REG_ADC_2 45 - -#define MC13783_IRQSTAT0 0 -#define MC13783_IRQSTAT0_ADCDONEI (1 << 0) -#define MC13783_IRQSTAT0_ADCBISDONEI (1 << 1) -#define MC13783_IRQSTAT0_TSI (1 << 2) -#define MC13783_IRQSTAT0_WHIGHI (1 << 3) -#define MC13783_IRQSTAT0_WLOWI (1 << 4) -#define MC13783_IRQSTAT0_CHGDETI (1 << 6) -#define MC13783_IRQSTAT0_CHGOVI (1 << 7) -#define MC13783_IRQSTAT0_CHGREVI (1 << 8) -#define MC13783_IRQSTAT0_CHGSHORTI (1 << 9) -#define MC13783_IRQSTAT0_CCCVI (1 << 10) -#define MC13783_IRQSTAT0_CHGCURRI (1 << 11) -#define MC13783_IRQSTAT0_BPONI (1 << 12) -#define MC13783_IRQSTAT0_LOBATLI (1 << 13) -#define MC13783_IRQSTAT0_LOBATHI (1 << 14) -#define MC13783_IRQSTAT0_UDPI (1 << 15) -#define MC13783_IRQSTAT0_USBI (1 << 16) -#define MC13783_IRQSTAT0_IDI (1 << 19) -#define MC13783_IRQSTAT0_SE1I (1 << 21) -#define MC13783_IRQSTAT0_CKDETI (1 << 22) -#define MC13783_IRQSTAT0_UDMI (1 << 23) - -#define MC13783_IRQMASK0 1 -#define MC13783_IRQMASK0_ADCDONEM MC13783_IRQSTAT0_ADCDONEI -#define MC13783_IRQMASK0_ADCBISDONEM MC13783_IRQSTAT0_ADCBISDONEI -#define MC13783_IRQMASK0_TSM MC13783_IRQSTAT0_TSI -#define MC13783_IRQMASK0_WHIGHM MC13783_IRQSTAT0_WHIGHI -#define MC13783_IRQMASK0_WLOWM MC13783_IRQSTAT0_WLOWI -#define MC13783_IRQMASK0_CHGDETM MC13783_IRQSTAT0_CHGDETI -#define MC13783_IRQMASK0_CHGOVM MC13783_IRQSTAT0_CHGOVI -#define MC13783_IRQMASK0_CHGREVM MC13783_IRQSTAT0_CHGREVI -#define MC13783_IRQMASK0_CHGSHORTM MC13783_IRQSTAT0_CHGSHORTI -#define MC13783_IRQMASK0_CCCVM MC13783_IRQSTAT0_CCCVI -#define MC13783_IRQMASK0_CHGCURRM MC13783_IRQSTAT0_CHGCURRI -#define MC13783_IRQMASK0_BPONM MC13783_IRQSTAT0_BPONI -#define MC13783_IRQMASK0_LOBATLM MC13783_IRQSTAT0_LOBATLI -#define MC13783_IRQMASK0_LOBATHM MC13783_IRQSTAT0_LOBATHI -#define MC13783_IRQMASK0_UDPM MC13783_IRQSTAT0_UDPI -#define MC13783_IRQMASK0_USBM MC13783_IRQSTAT0_USBI -#define MC13783_IRQMASK0_IDM MC13783_IRQSTAT0_IDI -#define MC13783_IRQMASK0_SE1M MC13783_IRQSTAT0_SE1I -#define MC13783_IRQMASK0_CKDETM MC13783_IRQSTAT0_CKDETI -#define MC13783_IRQMASK0_UDMM MC13783_IRQSTAT0_UDMI - -#define MC13783_IRQSTAT1 3 -#define MC13783_IRQSTAT1_1HZI (1 << 0) -#define MC13783_IRQSTAT1_TODAI (1 << 1) -#define MC13783_IRQSTAT1_ONOFD1I (1 << 3) -#define MC13783_IRQSTAT1_ONOFD2I (1 << 4) -#define MC13783_IRQSTAT1_ONOFD3I (1 << 5) -#define MC13783_IRQSTAT1_SYSRSTI (1 << 6) -#define MC13783_IRQSTAT1_RTCRSTI (1 << 7) -#define MC13783_IRQSTAT1_PCI (1 << 8) -#define MC13783_IRQSTAT1_WARMI (1 << 9) -#define MC13783_IRQSTAT1_MEMHLDI (1 << 10) -#define MC13783_IRQSTAT1_PWRRDYI (1 << 11) -#define MC13783_IRQSTAT1_THWARNLI (1 << 12) -#define MC13783_IRQSTAT1_THWARNHI (1 << 13) -#define MC13783_IRQSTAT1_CLKI (1 << 14) -#define MC13783_IRQSTAT1_SEMAFI (1 << 15) -#define MC13783_IRQSTAT1_MC2BI (1 << 17) -#define MC13783_IRQSTAT1_HSDETI (1 << 18) -#define MC13783_IRQSTAT1_HSLI (1 << 19) -#define MC13783_IRQSTAT1_ALSPTHI (1 << 20) -#define MC13783_IRQSTAT1_AHSSHORTI (1 << 21) - -#define MC13783_IRQMASK1 4 -#define MC13783_IRQMASK1_1HZM MC13783_IRQSTAT1_1HZI -#define MC13783_IRQMASK1_TODAM MC13783_IRQSTAT1_TODAI -#define MC13783_IRQMASK1_ONOFD1M MC13783_IRQSTAT1_ONOFD1I -#define MC13783_IRQMASK1_ONOFD2M MC13783_IRQSTAT1_ONOFD2I -#define MC13783_IRQMASK1_ONOFD3M MC13783_IRQSTAT1_ONOFD3I -#define MC13783_IRQMASK1_SYSRSTM MC13783_IRQSTAT1_SYSRSTI -#define MC13783_IRQMASK1_RTCRSTM MC13783_IRQSTAT1_RTCRSTI -#define MC13783_IRQMASK1_PCM MC13783_IRQSTAT1_PCI -#define MC13783_IRQMASK1_WARMM MC13783_IRQSTAT1_WARMI -#define MC13783_IRQMASK1_MEMHLDM MC13783_IRQSTAT1_MEMHLDI -#define MC13783_IRQMASK1_PWRRDYM MC13783_IRQSTAT1_PWRRDYI -#define MC13783_IRQMASK1_THWARNLM MC13783_IRQSTAT1_THWARNLI -#define MC13783_IRQMASK1_THWARNHM MC13783_IRQSTAT1_THWARNHI -#define MC13783_IRQMASK1_CLKM MC13783_IRQSTAT1_CLKI -#define MC13783_IRQMASK1_SEMAFM MC13783_IRQSTAT1_SEMAFI -#define MC13783_IRQMASK1_MC2BM MC13783_IRQSTAT1_MC2BI -#define MC13783_IRQMASK1_HSDETM MC13783_IRQSTAT1_HSDETI -#define MC13783_IRQMASK1_HSLM MC13783_IRQSTAT1_HSLI -#define MC13783_IRQMASK1_ALSPTHM MC13783_IRQSTAT1_ALSPTHI -#define MC13783_IRQMASK1_AHSSHORTM MC13783_IRQSTAT1_AHSSHORTI - -#define MC13783_ADC1 44 -#define MC13783_ADC1_ADEN (1 << 0) -#define MC13783_ADC1_RAND (1 << 1) -#define MC13783_ADC1_ADSEL (1 << 3) -#define MC13783_ADC1_ASC (1 << 20) -#define MC13783_ADC1_ADTRIGIGN (1 << 21) - -#define MC13783_NUMREGS 0x3f - -void mc13783_lock(struct mc13783 *mc13783) -{ - if (!mutex_trylock(&mc13783->lock)) { - dev_dbg(&mc13783->spidev->dev, "wait for %s from %pf\n", - __func__, __builtin_return_address(0)); - - mutex_lock(&mc13783->lock); - } - dev_dbg(&mc13783->spidev->dev, "%s from %pf\n", - __func__, __builtin_return_address(0)); -} -EXPORT_SYMBOL(mc13783_lock); - -void mc13783_unlock(struct mc13783 *mc13783) -{ - dev_dbg(&mc13783->spidev->dev, "%s from %pf\n", - __func__, __builtin_return_address(0)); - mutex_unlock(&mc13783->lock); -} -EXPORT_SYMBOL(mc13783_unlock); - -#define MC13783_REGOFFSET_SHIFT 25 -int mc13783_reg_read(struct mc13783 *mc13783, unsigned int offset, u32 *val) -{ - struct spi_transfer t; - struct spi_message m; - int ret; - - BUG_ON(!mutex_is_locked(&mc13783->lock)); - - if (offset > MC13783_NUMREGS) - return -EINVAL; - - *val = offset << MC13783_REGOFFSET_SHIFT; - - memset(&t, 0, sizeof(t)); - - t.tx_buf = val; - t.rx_buf = val; - t.len = sizeof(u32); - - spi_message_init(&m); - spi_message_add_tail(&t, &m); - - ret = spi_sync(mc13783->spidev, &m); - - /* error in message.status implies error return from spi_sync */ - BUG_ON(!ret && m.status); - - if (ret) - return ret; - - *val &= 0xffffff; - - dev_vdbg(&mc13783->spidev->dev, "[0x%02x] -> 0x%06x\n", offset, *val); - - return 0; -} -EXPORT_SYMBOL(mc13783_reg_read); - -int mc13783_reg_write(struct mc13783 *mc13783, unsigned int offset, u32 val) -{ - u32 buf; - struct spi_transfer t; - struct spi_message m; - int ret; - - BUG_ON(!mutex_is_locked(&mc13783->lock)); - - dev_vdbg(&mc13783->spidev->dev, "[0x%02x] <- 0x%06x\n", offset, val); - - if (offset > MC13783_NUMREGS || val > 0xffffff) - return -EINVAL; - - buf = 1 << 31 | offset << MC13783_REGOFFSET_SHIFT | val; - - memset(&t, 0, sizeof(t)); - - t.tx_buf = &buf; - t.rx_buf = &buf; - t.len = sizeof(u32); - - spi_message_init(&m); - spi_message_add_tail(&t, &m); - - ret = spi_sync(mc13783->spidev, &m); - - BUG_ON(!ret && m.status); - - if (ret) - return ret; - - return 0; -} -EXPORT_SYMBOL(mc13783_reg_write); - -int mc13783_reg_rmw(struct mc13783 *mc13783, unsigned int offset, - u32 mask, u32 val) -{ - int ret; - u32 valread; - - BUG_ON(val & ~mask); - - ret = mc13783_reg_read(mc13783, offset, &valread); - if (ret) - return ret; - - valread = (valread & ~mask) | val; - - return mc13783_reg_write(mc13783, offset, valread); -} -EXPORT_SYMBOL(mc13783_reg_rmw); - -int mc13783_get_flags(struct mc13783 *mc13783) -{ - struct mc13783_platform_data *pdata = - dev_get_platdata(&mc13783->spidev->dev); - - return pdata->flags; -} -EXPORT_SYMBOL(mc13783_get_flags); - -int mc13783_irq_mask(struct mc13783 *mc13783, int irq) -{ - int ret; - unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1; - u32 irqbit = 1 << (irq < 24 ? irq : irq - 24); - u32 mask; - - if (irq < 0 || irq >= MC13783_NUM_IRQ) - return -EINVAL; - - ret = mc13783_reg_read(mc13783, offmask, &mask); - if (ret) - return ret; - - if (mask & irqbit) - /* already masked */ - return 0; - - return mc13783_reg_write(mc13783, offmask, mask | irqbit); -} -EXPORT_SYMBOL(mc13783_irq_mask); - -int mc13783_irq_unmask(struct mc13783 *mc13783, int irq) -{ - int ret; - unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1; - u32 irqbit = 1 << (irq < 24 ? irq : irq - 24); - u32 mask; - - if (irq < 0 || irq >= MC13783_NUM_IRQ) - return -EINVAL; - - ret = mc13783_reg_read(mc13783, offmask, &mask); - if (ret) - return ret; - - if (!(mask & irqbit)) - /* already unmasked */ - return 0; - - return mc13783_reg_write(mc13783, offmask, mask & ~irqbit); -} -EXPORT_SYMBOL(mc13783_irq_unmask); - -int mc13783_irq_status(struct mc13783 *mc13783, int irq, - int *enabled, int *pending) -{ - int ret; - unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1; - unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1; - u32 irqbit = 1 << (irq < 24 ? irq : irq - 24); - - if (irq < 0 || irq >= MC13783_NUM_IRQ) - return -EINVAL; - - if (enabled) { - u32 mask; - - ret = mc13783_reg_read(mc13783, offmask, &mask); - if (ret) - return ret; - - *enabled = mask & irqbit; - } - - if (pending) { - u32 stat; - - ret = mc13783_reg_read(mc13783, offstat, &stat); - if (ret) - return ret; - - *pending = stat & irqbit; - } - - return 0; -} -EXPORT_SYMBOL(mc13783_irq_status); - -int mc13783_irq_ack(struct mc13783 *mc13783, int irq) -{ - unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1; - unsigned int val = 1 << (irq < 24 ? irq : irq - 24); - - BUG_ON(irq < 0 || irq >= MC13783_NUM_IRQ); - - return mc13783_reg_write(mc13783, offstat, val); -} -EXPORT_SYMBOL(mc13783_irq_ack); - -int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq, - irq_handler_t handler, const char *name, void *dev) -{ - BUG_ON(!mutex_is_locked(&mc13783->lock)); - BUG_ON(!handler); - - if (irq < 0 || irq >= MC13783_NUM_IRQ) - return -EINVAL; - - if (mc13783->irqhandler[irq]) - return -EBUSY; - - mc13783->irqhandler[irq] = handler; - mc13783->irqdata[irq] = dev; - - return 0; -} -EXPORT_SYMBOL(mc13783_irq_request_nounmask); - -int mc13783_irq_request(struct mc13783 *mc13783, int irq, - irq_handler_t handler, const char *name, void *dev) -{ - int ret; - - ret = mc13783_irq_request_nounmask(mc13783, irq, handler, name, dev); - if (ret) - return ret; - - ret = mc13783_irq_unmask(mc13783, irq); - if (ret) { - mc13783->irqhandler[irq] = NULL; - mc13783->irqdata[irq] = NULL; - return ret; - } - - return 0; -} -EXPORT_SYMBOL(mc13783_irq_request); - -int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev) -{ - int ret; - BUG_ON(!mutex_is_locked(&mc13783->lock)); - - if (irq < 0 || irq >= MC13783_NUM_IRQ || !mc13783->irqhandler[irq] || - mc13783->irqdata[irq] != dev) - return -EINVAL; - - ret = mc13783_irq_mask(mc13783, irq); - if (ret) - return ret; - - mc13783->irqhandler[irq] = NULL; - mc13783->irqdata[irq] = NULL; - - return 0; -} -EXPORT_SYMBOL(mc13783_irq_free); - -static inline irqreturn_t mc13783_irqhandler(struct mc13783 *mc13783, int irq) -{ - return mc13783->irqhandler[irq](irq, mc13783->irqdata[irq]); -} - -/* - * returns: number of handled irqs or negative error - * locking: holds mc13783->lock - */ -static int mc13783_irq_handle(struct mc13783 *mc13783, - unsigned int offstat, unsigned int offmask, int baseirq) -{ - u32 stat, mask; - int ret = mc13783_reg_read(mc13783, offstat, &stat); - int num_handled = 0; - - if (ret) - return ret; - - ret = mc13783_reg_read(mc13783, offmask, &mask); - if (ret) - return ret; - - while (stat & ~mask) { - int irq = __ffs(stat & ~mask); - - stat &= ~(1 << irq); - - if (likely(mc13783->irqhandler[baseirq + irq])) { - irqreturn_t handled; - - handled = mc13783_irqhandler(mc13783, baseirq + irq); - if (handled == IRQ_HANDLED) - num_handled++; - } else { - dev_err(&mc13783->spidev->dev, - "BUG: irq %u but no handler\n", - baseirq + irq); - - mask |= 1 << irq; - - ret = mc13783_reg_write(mc13783, offmask, mask); - } - } - - return num_handled; -} - -static irqreturn_t mc13783_irq_thread(int irq, void *data) -{ - struct mc13783 *mc13783 = data; - irqreturn_t ret; - int handled = 0; - - mc13783_lock(mc13783); - - ret = mc13783_irq_handle(mc13783, MC13783_IRQSTAT0, - MC13783_IRQMASK0, MC13783_IRQ_ADCDONE); - if (ret > 0) - handled = 1; - - ret = mc13783_irq_handle(mc13783, MC13783_IRQSTAT1, - MC13783_IRQMASK1, MC13783_IRQ_1HZ); - if (ret > 0) - handled = 1; - - mc13783_unlock(mc13783); - - return IRQ_RETVAL(handled); -} - -#define MC13783_ADC1_CHAN0_SHIFT 5 -#define MC13783_ADC1_CHAN1_SHIFT 8 - -struct mc13783_adcdone_data { - struct mc13783 *mc13783; - struct completion done; -}; - -static irqreturn_t mc13783_handler_adcdone(int irq, void *data) -{ - struct mc13783_adcdone_data *adcdone_data = data; - - mc13783_irq_ack(adcdone_data->mc13783, irq); - - complete_all(&adcdone_data->done); - - return IRQ_HANDLED; -} - -#define MC13783_ADC_WORKING (1 << 0) - -int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode, - unsigned int channel, unsigned int *sample) -{ - u32 adc0, adc1, old_adc0; - int i, ret; - struct mc13783_adcdone_data adcdone_data = { - .mc13783 = mc13783, - }; - init_completion(&adcdone_data.done); - - dev_dbg(&mc13783->spidev->dev, "%s\n", __func__); - - mc13783_lock(mc13783); - - if (mc13783->adcflags & MC13783_ADC_WORKING) { - ret = -EBUSY; - goto out; - } - - mc13783->adcflags |= MC13783_ADC_WORKING; - - mc13783_reg_read(mc13783, MC13783_ADC0, &old_adc0); - - adc0 = MC13783_ADC0_ADINC1 | MC13783_ADC0_ADINC2; - adc1 = MC13783_ADC1_ADEN | MC13783_ADC1_ADTRIGIGN | MC13783_ADC1_ASC; - - if (channel > 7) - adc1 |= MC13783_ADC1_ADSEL; - - switch (mode) { - case MC13783_ADC_MODE_TS: - adc0 |= MC13783_ADC0_ADREFEN | MC13783_ADC0_TSMOD0 | - MC13783_ADC0_TSMOD1; - adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT; - break; - - case MC13783_ADC_MODE_SINGLE_CHAN: - adc0 |= old_adc0 & MC13783_ADC0_TSMOD_MASK; - adc1 |= (channel & 0x7) << MC13783_ADC1_CHAN0_SHIFT; - adc1 |= MC13783_ADC1_RAND; - break; - - case MC13783_ADC_MODE_MULT_CHAN: - adc0 |= old_adc0 & MC13783_ADC0_TSMOD_MASK; - adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT; - break; - - default: - mc13783_unlock(mc13783); - return -EINVAL; - } - - dev_dbg(&mc13783->spidev->dev, "%s: request irq\n", __func__); - mc13783_irq_request(mc13783, MC13783_IRQ_ADCDONE, - mc13783_handler_adcdone, __func__, &adcdone_data); - mc13783_irq_ack(mc13783, MC13783_IRQ_ADCDONE); - - mc13783_reg_write(mc13783, MC13783_REG_ADC_0, adc0); - mc13783_reg_write(mc13783, MC13783_REG_ADC_1, adc1); - - mc13783_unlock(mc13783); - - ret = wait_for_completion_interruptible_timeout(&adcdone_data.done, HZ); - - if (!ret) - ret = -ETIMEDOUT; - - mc13783_lock(mc13783); - - mc13783_irq_free(mc13783, MC13783_IRQ_ADCDONE, &adcdone_data); - - if (ret > 0) - for (i = 0; i < 4; ++i) { - ret = mc13783_reg_read(mc13783, - MC13783_REG_ADC_2, &sample[i]); - if (ret) - break; - } - - if (mode == MC13783_ADC_MODE_TS) - /* restore TSMOD */ - mc13783_reg_write(mc13783, MC13783_REG_ADC_0, old_adc0); - - mc13783->adcflags &= ~MC13783_ADC_WORKING; -out: - mc13783_unlock(mc13783); - - return ret; -} -EXPORT_SYMBOL_GPL(mc13783_adc_do_conversion); - -static int mc13783_add_subdevice_pdata(struct mc13783 *mc13783, - const char *name, void *pdata, size_t pdata_size) -{ - struct mfd_cell cell = { - .name = name, - .platform_data = pdata, - .data_size = pdata_size, - }; - - return mfd_add_devices(&mc13783->spidev->dev, -1, &cell, 1, NULL, 0); -} - -static int mc13783_add_subdevice(struct mc13783 *mc13783, const char *name) -{ - return mc13783_add_subdevice_pdata(mc13783, name, NULL, 0); -} - -static int mc13783_check_revision(struct mc13783 *mc13783) -{ - u32 rev_id, rev1, rev2, finid, icid; - - mc13783_reg_read(mc13783, MC13783_REG_REVISION, &rev_id); - - rev1 = (rev_id & 0x018) >> 3; - rev2 = (rev_id & 0x007); - icid = (rev_id & 0x01C0) >> 6; - finid = (rev_id & 0x01E00) >> 9; - - /* Ver 0.2 is actually 3.2a. Report as 3.2 */ - if ((rev1 == 0) && (rev2 == 2)) - rev1 = 3; - - if (rev1 == 0 || icid != 2) { - dev_err(&mc13783->spidev->dev, "No MC13783 detected.\n"); - return -ENODEV; - } - - dev_info(&mc13783->spidev->dev, - "MC13783 Rev %d.%d FinVer %x detected\n", - rev1, rev2, finid); - - return 0; -} - -static int mc13783_probe(struct spi_device *spi) -{ - struct mc13783 *mc13783; - struct mc13783_platform_data *pdata = dev_get_platdata(&spi->dev); - int ret; - - mc13783 = kzalloc(sizeof(*mc13783), GFP_KERNEL); - if (!mc13783) - return -ENOMEM; - - dev_set_drvdata(&spi->dev, mc13783); - spi->mode = SPI_MODE_0 | SPI_CS_HIGH; - spi->bits_per_word = 32; - spi_setup(spi); - - mc13783->spidev = spi; - - mutex_init(&mc13783->lock); - mc13783_lock(mc13783); - - ret = mc13783_check_revision(mc13783); - if (ret) - goto err_revision; - - /* mask all irqs */ - ret = mc13783_reg_write(mc13783, MC13783_IRQMASK0, 0x00ffffff); - if (ret) - goto err_mask; - - ret = mc13783_reg_write(mc13783, MC13783_IRQMASK1, 0x00ffffff); - if (ret) - goto err_mask; - - ret = request_threaded_irq(spi->irq, NULL, mc13783_irq_thread, - IRQF_ONESHOT | IRQF_TRIGGER_HIGH, "mc13783", mc13783); - - if (ret) { -err_mask: -err_revision: - mutex_unlock(&mc13783->lock); - dev_set_drvdata(&spi->dev, NULL); - kfree(mc13783); - return ret; - } - - mc13783_unlock(mc13783); - - if (pdata->flags & MC13783_USE_ADC) - mc13783_add_subdevice(mc13783, "mc13783-adc"); - - if (pdata->flags & MC13783_USE_CODEC) - mc13783_add_subdevice(mc13783, "mc13783-codec"); - - if (pdata->flags & MC13783_USE_REGULATOR) { - struct mc13783_regulator_platform_data regulator_pdata = { - .num_regulators = pdata->num_regulators, - .regulators = pdata->regulators, - }; - - mc13783_add_subdevice_pdata(mc13783, "mc13783-regulator", - ®ulator_pdata, sizeof(regulator_pdata)); - } - - if (pdata->flags & MC13783_USE_RTC) - mc13783_add_subdevice(mc13783, "mc13783-rtc"); - - if (pdata->flags & MC13783_USE_TOUCHSCREEN) - mc13783_add_subdevice(mc13783, "mc13783-ts"); - - if (pdata->flags & MC13783_USE_LED) - mc13783_add_subdevice_pdata(mc13783, "mc13783-led", - pdata->leds, sizeof(*pdata->leds)); - - return 0; -} - -static int __devexit mc13783_remove(struct spi_device *spi) -{ - struct mc13783 *mc13783 = dev_get_drvdata(&spi->dev); - - free_irq(mc13783->spidev->irq, mc13783); - - mfd_remove_devices(&spi->dev); - - return 0; -} - -static struct spi_driver mc13783_driver = { - .driver = { - .name = "mc13783", - .bus = &spi_bus_type, - .owner = THIS_MODULE, - }, - .probe = mc13783_probe, - .remove = __devexit_p(mc13783_remove), -}; - -static int __init mc13783_init(void) -{ - return spi_register_driver(&mc13783_driver); -} -subsys_initcall(mc13783_init); - -static void __exit mc13783_exit(void) -{ - spi_unregister_driver(&mc13783_driver); -} -module_exit(mc13783_exit); - -MODULE_DESCRIPTION("Core driver for Freescale MC13783 PMIC"); -MODULE_AUTHOR("Uwe Kleine-Koenig "); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c new file mode 100644 index 000000000000..93258adf8b63 --- /dev/null +++ b/drivers/mfd/mc13xxx-core.c @@ -0,0 +1,840 @@ +/* + * Copyright 2009-2010 Pengutronix + * Uwe Kleine-Koenig + * + * loosely based on an earlier driver that has + * Copyright 2009 Pengutronix, Sascha Hauer + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + */ +#define DEBUG +#define VERBOSE_DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include + +struct mc13xxx { + struct spi_device *spidev; + struct mutex lock; + int irq; + + irq_handler_t irqhandler[MC13XXX_NUM_IRQ]; + void *irqdata[MC13XXX_NUM_IRQ]; +}; + +struct mc13783 { + struct mc13xxx mc13xxx; + + int adcflags; +}; + +struct mc13xxx *mc13783_to_mc13xxx(struct mc13783 *mc13783) +{ + return &mc13783->mc13xxx; +} +EXPORT_SYMBOL(mc13783_to_mc13xxx); + +#define MC13XXX_IRQSTAT0 0 +#define MC13XXX_IRQSTAT0_ADCDONEI (1 << 0) +#define MC13XXX_IRQSTAT0_ADCBISDONEI (1 << 1) +#define MC13XXX_IRQSTAT0_TSI (1 << 2) +#define MC13783_IRQSTAT0_WHIGHI (1 << 3) +#define MC13783_IRQSTAT0_WLOWI (1 << 4) +#define MC13XXX_IRQSTAT0_CHGDETI (1 << 6) +#define MC13783_IRQSTAT0_CHGOVI (1 << 7) +#define MC13XXX_IRQSTAT0_CHGREVI (1 << 8) +#define MC13XXX_IRQSTAT0_CHGSHORTI (1 << 9) +#define MC13XXX_IRQSTAT0_CCCVI (1 << 10) +#define MC13XXX_IRQSTAT0_CHGCURRI (1 << 11) +#define MC13XXX_IRQSTAT0_BPONI (1 << 12) +#define MC13XXX_IRQSTAT0_LOBATLI (1 << 13) +#define MC13XXX_IRQSTAT0_LOBATHI (1 << 14) +#define MC13783_IRQSTAT0_UDPI (1 << 15) +#define MC13783_IRQSTAT0_USBI (1 << 16) +#define MC13783_IRQSTAT0_IDI (1 << 19) +#define MC13783_IRQSTAT0_SE1I (1 << 21) +#define MC13783_IRQSTAT0_CKDETI (1 << 22) +#define MC13783_IRQSTAT0_UDMI (1 << 23) + +#define MC13XXX_IRQMASK0 1 +#define MC13XXX_IRQMASK0_ADCDONEM MC13XXX_IRQSTAT0_ADCDONEI +#define MC13XXX_IRQMASK0_ADCBISDONEM MC13XXX_IRQSTAT0_ADCBISDONEI +#define MC13XXX_IRQMASK0_TSM MC13XXX_IRQSTAT0_TSI +#define MC13783_IRQMASK0_WHIGHM MC13783_IRQSTAT0_WHIGHI +#define MC13783_IRQMASK0_WLOWM MC13783_IRQSTAT0_WLOWI +#define MC13XXX_IRQMASK0_CHGDETM MC13XXX_IRQSTAT0_CHGDETI +#define MC13783_IRQMASK0_CHGOVM MC13783_IRQSTAT0_CHGOVI +#define MC13XXX_IRQMASK0_CHGREVM MC13XXX_IRQSTAT0_CHGREVI +#define MC13XXX_IRQMASK0_CHGSHORTM MC13XXX_IRQSTAT0_CHGSHORTI +#define MC13XXX_IRQMASK0_CCCVM MC13XXX_IRQSTAT0_CCCVI +#define MC13XXX_IRQMASK0_CHGCURRM MC13XXX_IRQSTAT0_CHGCURRI +#define MC13XXX_IRQMASK0_BPONM MC13XXX_IRQSTAT0_BPONI +#define MC13XXX_IRQMASK0_LOBATLM MC13XXX_IRQSTAT0_LOBATLI +#define MC13XXX_IRQMASK0_LOBATHM MC13XXX_IRQSTAT0_LOBATHI +#define MC13783_IRQMASK0_UDPM MC13783_IRQSTAT0_UDPI +#define MC13783_IRQMASK0_USBM MC13783_IRQSTAT0_USBI +#define MC13783_IRQMASK0_IDM MC13783_IRQSTAT0_IDI +#define MC13783_IRQMASK0_SE1M MC13783_IRQSTAT0_SE1I +#define MC13783_IRQMASK0_CKDETM MC13783_IRQSTAT0_CKDETI +#define MC13783_IRQMASK0_UDMM MC13783_IRQSTAT0_UDMI + +#define MC13XXX_IRQSTAT1 3 +#define MC13XXX_IRQSTAT1_1HZI (1 << 0) +#define MC13XXX_IRQSTAT1_TODAI (1 << 1) +#define MC13783_IRQSTAT1_ONOFD1I (1 << 3) +#define MC13783_IRQSTAT1_ONOFD2I (1 << 4) +#define MC13783_IRQSTAT1_ONOFD3I (1 << 5) +#define MC13XXX_IRQSTAT1_SYSRSTI (1 << 6) +#define MC13XXX_IRQSTAT1_RTCRSTI (1 << 7) +#define MC13XXX_IRQSTAT1_PCI (1 << 8) +#define MC13XXX_IRQSTAT1_WARMI (1 << 9) +#define MC13XXX_IRQSTAT1_MEMHLDI (1 << 10) +#define MC13783_IRQSTAT1_PWRRDYI (1 << 11) +#define MC13XXX_IRQSTAT1_THWARNLI (1 << 12) +#define MC13XXX_IRQSTAT1_THWARNHI (1 << 13) +#define MC13XXX_IRQSTAT1_CLKI (1 << 14) +#define MC13783_IRQSTAT1_SEMAFI (1 << 15) +#define MC13783_IRQSTAT1_MC2BI (1 << 17) +#define MC13783_IRQSTAT1_HSDETI (1 << 18) +#define MC13783_IRQSTAT1_HSLI (1 << 19) +#define MC13783_IRQSTAT1_ALSPTHI (1 << 20) +#define MC13783_IRQSTAT1_AHSSHORTI (1 << 21) + +#define MC13XXX_IRQMASK1 4 +#define MC13XXX_IRQMASK1_1HZM MC13XXX_IRQSTAT1_1HZI +#define MC13XXX_IRQMASK1_TODAM MC13XXX_IRQSTAT1_TODAI +#define MC13783_IRQMASK1_ONOFD1M MC13783_IRQSTAT1_ONOFD1I +#define MC13783_IRQMASK1_ONOFD2M MC13783_IRQSTAT1_ONOFD2I +#define MC13783_IRQMASK1_ONOFD3M MC13783_IRQSTAT1_ONOFD3I +#define MC13XXX_IRQMASK1_SYSRSTM MC13XXX_IRQSTAT1_SYSRSTI +#define MC13XXX_IRQMASK1_RTCRSTM MC13XXX_IRQSTAT1_RTCRSTI +#define MC13XXX_IRQMASK1_PCM MC13XXX_IRQSTAT1_PCI +#define MC13XXX_IRQMASK1_WARMM MC13XXX_IRQSTAT1_WARMI +#define MC13XXX_IRQMASK1_MEMHLDM MC13XXX_IRQSTAT1_MEMHLDI +#define MC13783_IRQMASK1_PWRRDYM MC13783_IRQSTAT1_PWRRDYI +#define MC13XXX_IRQMASK1_THWARNLM MC13XXX_IRQSTAT1_THWARNLI +#define MC13XXX_IRQMASK1_THWARNHM MC13XXX_IRQSTAT1_THWARNHI +#define MC13XXX_IRQMASK1_CLKM MC13XXX_IRQSTAT1_CLKI +#define MC13783_IRQMASK1_SEMAFM MC13783_IRQSTAT1_SEMAFI +#define MC13783_IRQMASK1_MC2BM MC13783_IRQSTAT1_MC2BI +#define MC13783_IRQMASK1_HSDETM MC13783_IRQSTAT1_HSDETI +#define MC13783_IRQMASK1_HSLM MC13783_IRQSTAT1_HSLI +#define MC13783_IRQMASK1_ALSPTHM MC13783_IRQSTAT1_ALSPTHI +#define MC13783_IRQMASK1_AHSSHORTM MC13783_IRQSTAT1_AHSSHORTI + +#define MC13XXX_REVISION 7 +#define MC13XXX_REVISION_REVMETAL (0x07 << 0) +#define MC13XXX_REVISION_REVFULL (0x03 << 3) +#define MC13XXX_REVISION_ICID (0x07 << 6) +#define MC13XXX_REVISION_FIN (0x03 << 9) +#define MC13XXX_REVISION_FAB (0x03 << 11) +#define MC13XXX_REVISION_ICIDCODE (0x3f << 13) + +#define MC13783_ADC1 44 +#define MC13783_ADC1_ADEN (1 << 0) +#define MC13783_ADC1_RAND (1 << 1) +#define MC13783_ADC1_ADSEL (1 << 3) +#define MC13783_ADC1_ASC (1 << 20) +#define MC13783_ADC1_ADTRIGIGN (1 << 21) + +#define MC13783_ADC2 45 + +#define MC13XXX_NUMREGS 0x3f + +void mc13xxx_lock(struct mc13xxx *mc13xxx) +{ + if (!mutex_trylock(&mc13xxx->lock)) { + dev_dbg(&mc13xxx->spidev->dev, "wait for %s from %pf\n", + __func__, __builtin_return_address(0)); + + mutex_lock(&mc13xxx->lock); + } + dev_dbg(&mc13xxx->spidev->dev, "%s from %pf\n", + __func__, __builtin_return_address(0)); +} +EXPORT_SYMBOL(mc13xxx_lock); + +void mc13xxx_unlock(struct mc13xxx *mc13xxx) +{ + dev_dbg(&mc13xxx->spidev->dev, "%s from %pf\n", + __func__, __builtin_return_address(0)); + mutex_unlock(&mc13xxx->lock); +} +EXPORT_SYMBOL(mc13xxx_unlock); + +#define MC13XXX_REGOFFSET_SHIFT 25 +int mc13xxx_reg_read(struct mc13xxx *mc13xxx, unsigned int offset, u32 *val) +{ + struct spi_transfer t; + struct spi_message m; + int ret; + + BUG_ON(!mutex_is_locked(&mc13xxx->lock)); + + if (offset > MC13XXX_NUMREGS) + return -EINVAL; + + *val = offset << MC13XXX_REGOFFSET_SHIFT; + + memset(&t, 0, sizeof(t)); + + t.tx_buf = val; + t.rx_buf = val; + t.len = sizeof(u32); + + spi_message_init(&m); + spi_message_add_tail(&t, &m); + + ret = spi_sync(mc13xxx->spidev, &m); + + /* error in message.status implies error return from spi_sync */ + BUG_ON(!ret && m.status); + + if (ret) + return ret; + + *val &= 0xffffff; + + dev_vdbg(&mc13xxx->spidev->dev, "[0x%02x] -> 0x%06x\n", offset, *val); + + return 0; +} +EXPORT_SYMBOL(mc13xxx_reg_read); + +int mc13xxx_reg_write(struct mc13xxx *mc13xxx, unsigned int offset, u32 val) +{ + u32 buf; + struct spi_transfer t; + struct spi_message m; + int ret; + + BUG_ON(!mutex_is_locked(&mc13xxx->lock)); + + dev_vdbg(&mc13xxx->spidev->dev, "[0x%02x] <- 0x%06x\n", offset, val); + + if (offset > MC13XXX_NUMREGS || val > 0xffffff) + return -EINVAL; + + buf = 1 << 31 | offset << MC13XXX_REGOFFSET_SHIFT | val; + + memset(&t, 0, sizeof(t)); + + t.tx_buf = &buf; + t.rx_buf = &buf; + t.len = sizeof(u32); + + spi_message_init(&m); + spi_message_add_tail(&t, &m); + + ret = spi_sync(mc13xxx->spidev, &m); + + BUG_ON(!ret && m.status); + + if (ret) + return ret; + + return 0; +} +EXPORT_SYMBOL(mc13xxx_reg_write); + +int mc13xxx_reg_rmw(struct mc13xxx *mc13xxx, unsigned int offset, + u32 mask, u32 val) +{ + int ret; + u32 valread; + + BUG_ON(val & ~mask); + + ret = mc13xxx_reg_read(mc13xxx, offset, &valread); + if (ret) + return ret; + + valread = (valread & ~mask) | val; + + return mc13xxx_reg_write(mc13xxx, offset, valread); +} +EXPORT_SYMBOL(mc13xxx_reg_rmw); + +int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq) +{ + int ret; + unsigned int offmask = irq < 24 ? MC13XXX_IRQMASK0 : MC13XXX_IRQMASK1; + u32 irqbit = 1 << (irq < 24 ? irq : irq - 24); + u32 mask; + + if (irq < 0 || irq >= MC13XXX_NUM_IRQ) + return -EINVAL; + + ret = mc13xxx_reg_read(mc13xxx, offmask, &mask); + if (ret) + return ret; + + if (mask & irqbit) + /* already masked */ + return 0; + + return mc13xxx_reg_write(mc13xxx, offmask, mask | irqbit); +} +EXPORT_SYMBOL(mc13xxx_irq_mask); + +int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq) +{ + int ret; + unsigned int offmask = irq < 24 ? MC13XXX_IRQMASK0 : MC13XXX_IRQMASK1; + u32 irqbit = 1 << (irq < 24 ? irq : irq - 24); + u32 mask; + + if (irq < 0 || irq >= MC13XXX_NUM_IRQ) + return -EINVAL; + + ret = mc13xxx_reg_read(mc13xxx, offmask, &mask); + if (ret) + return ret; + + if (!(mask & irqbit)) + /* already unmasked */ + return 0; + + return mc13xxx_reg_write(mc13xxx, offmask, mask & ~irqbit); +} +EXPORT_SYMBOL(mc13xxx_irq_unmask); + +int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq, + int *enabled, int *pending) +{ + int ret; + unsigned int offmask = irq < 24 ? MC13XXX_IRQMASK0 : MC13XXX_IRQMASK1; + unsigned int offstat = irq < 24 ? MC13XXX_IRQSTAT0 : MC13XXX_IRQSTAT1; + u32 irqbit = 1 << (irq < 24 ? irq : irq - 24); + + if (irq < 0 || irq >= MC13XXX_NUM_IRQ) + return -EINVAL; + + if (enabled) { + u32 mask; + + ret = mc13xxx_reg_read(mc13xxx, offmask, &mask); + if (ret) + return ret; + + *enabled = mask & irqbit; + } + + if (pending) { + u32 stat; + + ret = mc13xxx_reg_read(mc13xxx, offstat, &stat); + if (ret) + return ret; + + *pending = stat & irqbit; + } + + return 0; +} +EXPORT_SYMBOL(mc13xxx_irq_status); + +int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq) +{ + unsigned int offstat = irq < 24 ? MC13XXX_IRQSTAT0 : MC13XXX_IRQSTAT1; + unsigned int val = 1 << (irq < 24 ? irq : irq - 24); + + BUG_ON(irq < 0 || irq >= MC13XXX_NUM_IRQ); + + return mc13xxx_reg_write(mc13xxx, offstat, val); +} +EXPORT_SYMBOL(mc13xxx_irq_ack); + +int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq, + irq_handler_t handler, const char *name, void *dev) +{ + BUG_ON(!mutex_is_locked(&mc13xxx->lock)); + BUG_ON(!handler); + + if (irq < 0 || irq >= MC13XXX_NUM_IRQ) + return -EINVAL; + + if (mc13xxx->irqhandler[irq]) + return -EBUSY; + + mc13xxx->irqhandler[irq] = handler; + mc13xxx->irqdata[irq] = dev; + + return 0; +} +EXPORT_SYMBOL(mc13xxx_irq_request_nounmask); + +int mc13xxx_irq_request(struct mc13xxx *mc13xxx, int irq, + irq_handler_t handler, const char *name, void *dev) +{ + int ret; + + ret = mc13xxx_irq_request_nounmask(mc13xxx, irq, handler, name, dev); + if (ret) + return ret; + + ret = mc13xxx_irq_unmask(mc13xxx, irq); + if (ret) { + mc13xxx->irqhandler[irq] = NULL; + mc13xxx->irqdata[irq] = NULL; + return ret; + } + + return 0; +} +EXPORT_SYMBOL(mc13xxx_irq_request); + +int mc13xxx_irq_free(struct mc13xxx *mc13xxx, int irq, void *dev) +{ + int ret; + BUG_ON(!mutex_is_locked(&mc13xxx->lock)); + + if (irq < 0 || irq >= MC13XXX_NUM_IRQ || !mc13xxx->irqhandler[irq] || + mc13xxx->irqdata[irq] != dev) + return -EINVAL; + + ret = mc13xxx_irq_mask(mc13xxx, irq); + if (ret) + return ret; + + mc13xxx->irqhandler[irq] = NULL; + mc13xxx->irqdata[irq] = NULL; + + return 0; +} +EXPORT_SYMBOL(mc13xxx_irq_free); + +static inline irqreturn_t mc13xxx_irqhandler(struct mc13xxx *mc13xxx, int irq) +{ + return mc13xxx->irqhandler[irq](irq, mc13xxx->irqdata[irq]); +} + +/* + * returns: number of handled irqs or negative error + * locking: holds mc13xxx->lock + */ +static int mc13xxx_irq_handle(struct mc13xxx *mc13xxx, + unsigned int offstat, unsigned int offmask, int baseirq) +{ + u32 stat, mask; + int ret = mc13xxx_reg_read(mc13xxx, offstat, &stat); + int num_handled = 0; + + if (ret) + return ret; + + ret = mc13xxx_reg_read(mc13xxx, offmask, &mask); + if (ret) + return ret; + + while (stat & ~mask) { + int irq = __ffs(stat & ~mask); + + stat &= ~(1 << irq); + + if (likely(mc13xxx->irqhandler[baseirq + irq])) { + irqreturn_t handled; + + handled = mc13xxx_irqhandler(mc13xxx, baseirq + irq); + if (handled == IRQ_HANDLED) + num_handled++; + } else { + dev_err(&mc13xxx->spidev->dev, + "BUG: irq %u but no handler\n", + baseirq + irq); + + mask |= 1 << irq; + + ret = mc13xxx_reg_write(mc13xxx, offmask, mask); + } + } + + return num_handled; +} + +static irqreturn_t mc13xxx_irq_thread(int irq, void *data) +{ + struct mc13xxx *mc13xxx = data; + irqreturn_t ret; + int handled = 0; + + mc13xxx_lock(mc13xxx); + + ret = mc13xxx_irq_handle(mc13xxx, MC13XXX_IRQSTAT0, + MC13XXX_IRQMASK0, 0); + if (ret > 0) + handled = 1; + + ret = mc13xxx_irq_handle(mc13xxx, MC13XXX_IRQSTAT1, + MC13XXX_IRQMASK1, 24); + if (ret > 0) + handled = 1; + + mc13xxx_unlock(mc13xxx); + + return IRQ_RETVAL(handled); +} + +enum mc13xxx_id { + MC13XXX_ID_MC13783, + MC13XXX_ID_MC13892, + MC13XXX_ID_INVALID, +}; + +const char *mc13xxx_chipname[] = { + [MC13XXX_ID_MC13783] = "mc13783", + [MC13XXX_ID_MC13892] = "mc13892", +}; + +#define maskval(reg, mask) (((reg) & (mask)) >> __ffs(mask)) +static int mc13xxx_identify(struct mc13xxx *mc13xxx, enum mc13xxx_id *id) +{ + u32 icid; + u32 revision; + const char *name; + int ret; + + ret = mc13xxx_reg_read(mc13xxx, 46, &icid); + if (ret) + return ret; + + icid = (icid >> 6) & 0x7; + + switch (icid) { + case 2: + *id = MC13XXX_ID_MC13783; + name = "mc13783"; + break; + case 7: + *id = MC13XXX_ID_MC13892; + name = "mc13892"; + break; + default: + *id = MC13XXX_ID_INVALID; + break; + } + + if (*id == MC13XXX_ID_MC13783 || *id == MC13XXX_ID_MC13892) { + ret = mc13xxx_reg_read(mc13xxx, MC13XXX_REVISION, &revision); + if (ret) + return ret; + + dev_info(&mc13xxx->spidev->dev, "%s: rev: %d.%d, " + "fin: %d, fab: %d, icid: %d/%d\n", + mc13xxx_chipname[*id], + maskval(revision, MC13XXX_REVISION_REVFULL), + maskval(revision, MC13XXX_REVISION_REVMETAL), + maskval(revision, MC13XXX_REVISION_FIN), + maskval(revision, MC13XXX_REVISION_FAB), + maskval(revision, MC13XXX_REVISION_ICID), + maskval(revision, MC13XXX_REVISION_ICIDCODE)); + } + + if (*id != MC13XXX_ID_INVALID) { + const struct spi_device_id *devid = + spi_get_device_id(mc13xxx->spidev); + if (!devid || devid->driver_data != *id) + dev_warn(&mc13xxx->spidev->dev, "device id doesn't " + "match auto detection!\n"); + } + + return 0; +} + +static const char *mc13xxx_get_chipname(struct mc13xxx *mc13xxx) +{ + const struct spi_device_id *devid = + spi_get_device_id(mc13xxx->spidev); + + if (!devid) + return NULL; + + return mc13xxx_chipname[devid->driver_data]; +} + +#include + +int mc13xxx_get_flags(struct mc13xxx *mc13xxx) +{ + struct mc13xxx_platform_data *pdata = + dev_get_platdata(&mc13xxx->spidev->dev); + + return pdata->flags; +} +EXPORT_SYMBOL(mc13xxx_get_flags); + +#define MC13783_ADC1_CHAN0_SHIFT 5 +#define MC13783_ADC1_CHAN1_SHIFT 8 + +struct mc13xxx_adcdone_data { + struct mc13xxx *mc13xxx; + struct completion done; +}; + +static irqreturn_t mc13783_handler_adcdone(int irq, void *data) +{ + struct mc13xxx_adcdone_data *adcdone_data = data; + + mc13xxx_irq_ack(adcdone_data->mc13xxx, irq); + + complete_all(&adcdone_data->done); + + return IRQ_HANDLED; +} + +#define MC13783_ADC_WORKING (1 << 0) + +int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode, + unsigned int channel, unsigned int *sample) +{ + struct mc13xxx *mc13xxx = &mc13783->mc13xxx; + u32 adc0, adc1, old_adc0; + int i, ret; + struct mc13xxx_adcdone_data adcdone_data = { + .mc13xxx = mc13xxx, + }; + init_completion(&adcdone_data.done); + + dev_dbg(&mc13xxx->spidev->dev, "%s\n", __func__); + + mc13xxx_lock(mc13xxx); + + if (mc13783->adcflags & MC13783_ADC_WORKING) { + ret = -EBUSY; + goto out; + } + + mc13783->adcflags |= MC13783_ADC_WORKING; + + mc13xxx_reg_read(mc13xxx, MC13783_ADC0, &old_adc0); + + adc0 = MC13783_ADC0_ADINC1 | MC13783_ADC0_ADINC2; + adc1 = MC13783_ADC1_ADEN | MC13783_ADC1_ADTRIGIGN | MC13783_ADC1_ASC; + + if (channel > 7) + adc1 |= MC13783_ADC1_ADSEL; + + switch (mode) { + case MC13783_ADC_MODE_TS: + adc0 |= MC13783_ADC0_ADREFEN | MC13783_ADC0_TSMOD0 | + MC13783_ADC0_TSMOD1; + adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT; + break; + + case MC13783_ADC_MODE_SINGLE_CHAN: + adc0 |= old_adc0 & MC13783_ADC0_TSMOD_MASK; + adc1 |= (channel & 0x7) << MC13783_ADC1_CHAN0_SHIFT; + adc1 |= MC13783_ADC1_RAND; + break; + + case MC13783_ADC_MODE_MULT_CHAN: + adc0 |= old_adc0 & MC13783_ADC0_TSMOD_MASK; + adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT; + break; + + default: + mc13783_unlock(mc13783); + return -EINVAL; + } + + dev_dbg(&mc13783->mc13xxx.spidev->dev, "%s: request irq\n", __func__); + mc13xxx_irq_request(mc13xxx, MC13783_IRQ_ADCDONE, + mc13783_handler_adcdone, __func__, &adcdone_data); + mc13xxx_irq_ack(mc13xxx, MC13783_IRQ_ADCDONE); + + mc13xxx_reg_write(mc13xxx, MC13783_ADC0, adc0); + mc13xxx_reg_write(mc13xxx, MC13783_ADC1, adc1); + + mc13xxx_unlock(mc13xxx); + + ret = wait_for_completion_interruptible_timeout(&adcdone_data.done, HZ); + + if (!ret) + ret = -ETIMEDOUT; + + mc13xxx_lock(mc13xxx); + + mc13xxx_irq_free(mc13xxx, MC13783_IRQ_ADCDONE, &adcdone_data); + + if (ret > 0) + for (i = 0; i < 4; ++i) { + ret = mc13xxx_reg_read(mc13xxx, + MC13783_ADC2, &sample[i]); + if (ret) + break; + } + + if (mode == MC13783_ADC_MODE_TS) + /* restore TSMOD */ + mc13xxx_reg_write(mc13xxx, MC13783_ADC0, old_adc0); + + mc13783->adcflags &= ~MC13783_ADC_WORKING; +out: + mc13xxx_unlock(mc13xxx); + + return ret; +} +EXPORT_SYMBOL_GPL(mc13783_adc_do_conversion); + +static int mc13xxx_add_subdevice_pdata(struct mc13xxx *mc13xxx, + const char *format, void *pdata, size_t pdata_size) +{ + char buf[30]; + const char *name = mc13xxx_get_chipname(mc13xxx); + + struct mfd_cell cell = { + .platform_data = pdata, + .data_size = pdata_size, + }; + + /* there is no asnprintf in the kernel :-( */ + if (snprintf(buf, sizeof(buf), format, name) > sizeof(buf)) + return -E2BIG; + + cell.name = kmemdup(buf, strlen(buf) + 1, GFP_KERNEL); + if (!cell.name) + return -ENOMEM; + + return mfd_add_devices(&mc13xxx->spidev->dev, -1, &cell, 1, NULL, 0); +} + +static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format) +{ + return mc13xxx_add_subdevice_pdata(mc13xxx, format, NULL, 0); +} + +static int mc13xxx_probe(struct spi_device *spi) +{ + struct mc13xxx *mc13xxx; + struct mc13xxx_platform_data *pdata = dev_get_platdata(&spi->dev); + enum mc13xxx_id id; + int ret; + + mc13xxx = kzalloc(sizeof(*mc13xxx), GFP_KERNEL); + if (!mc13xxx) + return -ENOMEM; + + dev_set_drvdata(&spi->dev, mc13xxx); + spi->mode = SPI_MODE_0 | SPI_CS_HIGH; + spi->bits_per_word = 32; + spi_setup(spi); + + mc13xxx->spidev = spi; + + mutex_init(&mc13xxx->lock); + mc13xxx_lock(mc13xxx); + + ret = mc13xxx_identify(mc13xxx, &id); + if (ret || id == MC13XXX_ID_INVALID) + goto err_revision; + + /* mask all irqs */ + ret = mc13xxx_reg_write(mc13xxx, MC13XXX_IRQMASK0, 0x00ffffff); + if (ret) + goto err_mask; + + ret = mc13xxx_reg_write(mc13xxx, MC13XXX_IRQMASK1, 0x00ffffff); + if (ret) + goto err_mask; + + ret = request_threaded_irq(spi->irq, NULL, mc13xxx_irq_thread, + IRQF_ONESHOT | IRQF_TRIGGER_HIGH, "mc13xxx", mc13xxx); + + if (ret) { +err_mask: +err_revision: + mutex_unlock(&mc13xxx->lock); + dev_set_drvdata(&spi->dev, NULL); + kfree(mc13xxx); + return ret; + } + + mc13xxx_unlock(mc13xxx); + + if (pdata->flags & MC13XXX_USE_ADC) + mc13xxx_add_subdevice(mc13xxx, "%s-adc"); + + if (pdata->flags & MC13XXX_USE_CODEC) + mc13xxx_add_subdevice(mc13xxx, "%s-codec"); + + if (pdata->flags & MC13XXX_USE_REGULATOR) { + struct mc13xxx_regulator_platform_data regulator_pdata = { + .num_regulators = pdata->num_regulators, + .regulators = pdata->regulators, + }; + + mc13xxx_add_subdevice_pdata(mc13xxx, "%s-regulator", + ®ulator_pdata, sizeof(regulator_pdata)); + } + + if (pdata->flags & MC13XXX_USE_RTC) + mc13xxx_add_subdevice(mc13xxx, "%s-rtc"); + + if (pdata->flags & MC13XXX_USE_TOUCHSCREEN) + mc13xxx_add_subdevice(mc13xxx, "%s-ts"); + + if (pdata->flags & MC13XXX_USE_LED) { + mc13xxx_add_subdevice_pdata(mc13xxx, "%s-led", + pdata->leds, sizeof(*pdata->leds)); + } + + return 0; +} + +static int __devexit mc13xxx_remove(struct spi_device *spi) +{ + struct mc13xxx *mc13xxx = dev_get_drvdata(&spi->dev); + + free_irq(mc13xxx->spidev->irq, mc13xxx); + + mfd_remove_devices(&spi->dev); + + return 0; +} + +static const struct spi_device_id mc13xxx_device_id[] = { + { + .name = "mc13783", + .driver_data = MC13XXX_ID_MC13783, + }, { + .name = "mc13892", + .driver_data = MC13XXX_ID_MC13892, + }, { + /* sentinel */ + } +}; + +static struct spi_driver mc13xxx_driver = { + .id_table = mc13xxx_device_id, + .driver = { + .name = "mc13xxx", + .bus = &spi_bus_type, + .owner = THIS_MODULE, + }, + .probe = mc13xxx_probe, + .remove = __devexit_p(mc13xxx_remove), +}; + +static int __init mc13xxx_init(void) +{ + return spi_register_driver(&mc13xxx_driver); +} +subsys_initcall(mc13xxx_init); + +static void __exit mc13xxx_exit(void) +{ + spi_unregister_driver(&mc13xxx_driver); +} +module_exit(mc13xxx_exit); + +MODULE_DESCRIPTION("Core driver for Freescale MC13XXX PMIC"); +MODULE_AUTHOR("Uwe Kleine-Koenig "); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h index 5f6aff55eeb7..b4c741e352c2 100644 --- a/include/linux/mfd/mc13783.h +++ b/include/linux/mfd/mc13783.h @@ -1,5 +1,5 @@ /* - * Copyright 2009 Pengutronix + * Copyright 2009-2010 Pengutronix * Uwe Kleine-Koenig * * This program is free software; you can redistribute it and/or modify it under @@ -9,31 +9,84 @@ #ifndef __LINUX_MFD_MC13783_H #define __LINUX_MFD_MC13783_H -#include +#include struct mc13783; -void mc13783_lock(struct mc13783 *mc13783); -void mc13783_unlock(struct mc13783 *mc13783); - -int mc13783_reg_read(struct mc13783 *mc13783, unsigned int offset, u32 *val); -int mc13783_reg_write(struct mc13783 *mc13783, unsigned int offset, u32 val); -int mc13783_reg_rmw(struct mc13783 *mc13783, unsigned int offset, - u32 mask, u32 val); - -int mc13783_get_flags(struct mc13783 *mc13783); - -int mc13783_irq_request(struct mc13783 *mc13783, int irq, - irq_handler_t handler, const char *name, void *dev); -int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq, - irq_handler_t handler, const char *name, void *dev); -int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev); - -int mc13783_irq_mask(struct mc13783 *mc13783, int irq); -int mc13783_irq_unmask(struct mc13783 *mc13783, int irq); -int mc13783_irq_status(struct mc13783 *mc13783, int irq, - int *enabled, int *pending); -int mc13783_irq_ack(struct mc13783 *mc13783, int irq); +struct mc13xxx *mc13783_to_mc13xxx(struct mc13783 *mc13783); + +static inline void mc13783_lock(struct mc13783 *mc13783) +{ + mc13xxx_lock(mc13783_to_mc13xxx(mc13783)); +} + +static inline void mc13783_unlock(struct mc13783 *mc13783) +{ + mc13xxx_unlock(mc13783_to_mc13xxx(mc13783)); +} + +static inline int mc13783_reg_read(struct mc13783 *mc13783, + unsigned int offset, u32 *val) +{ + return mc13xxx_reg_read(mc13783_to_mc13xxx(mc13783), offset, val); +} + +static inline int mc13783_reg_write(struct mc13783 *mc13783, + unsigned int offset, u32 val) +{ + return mc13xxx_reg_write(mc13783_to_mc13xxx(mc13783), offset, val); +} + +static inline int mc13783_reg_rmw(struct mc13783 *mc13783, + unsigned int offset, u32 mask, u32 val) +{ + return mc13xxx_reg_rmw(mc13783_to_mc13xxx(mc13783), offset, mask, val); +} + +static inline int mc13783_get_flags(struct mc13783 *mc13783) +{ + return mc13xxx_get_flags(mc13783_to_mc13xxx(mc13783)); +} + +static inline int mc13783_irq_request(struct mc13783 *mc13783, int irq, + irq_handler_t handler, const char *name, void *dev) +{ + return mc13xxx_irq_request(mc13783_to_mc13xxx(mc13783), irq, + handler, name, dev); +} + +static inline int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq, + irq_handler_t handler, const char *name, void *dev) +{ + return mc13xxx_irq_request_nounmask(mc13783_to_mc13xxx(mc13783), irq, + handler, name, dev); +} + +static inline int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev) +{ + return mc13xxx_irq_free(mc13783_to_mc13xxx(mc13783), irq, dev); +} + +static inline int mc13783_irq_mask(struct mc13783 *mc13783, int irq) +{ + return mc13xxx_irq_mask(mc13783_to_mc13xxx(mc13783), irq); +} + +static inline int mc13783_irq_unmask(struct mc13783 *mc13783, int irq) +{ + return mc13xxx_irq_unmask(mc13783_to_mc13xxx(mc13783), irq); +} +static inline int mc13783_irq_status(struct mc13783 *mc13783, int irq, + int *enabled, int *pending) +{ + return mc13xxx_irq_status(mc13783_to_mc13xxx(mc13783), + irq, enabled, pending); +} + +static inline int mc13783_irq_ack(struct mc13783 *mc13783, int irq) +{ + return mc13xxx_irq_ack(mc13783_to_mc13xxx(mc13783), irq); +} #define MC13783_ADC0 43 #define MC13783_ADC0_ADREFEN (1 << 10) @@ -48,96 +101,18 @@ int mc13783_irq_ack(struct mc13783 *mc13783, int irq); MC13783_ADC0_TSMOD1 | \ MC13783_ADC0_TSMOD2) -struct mc13783_led_platform_data { -#define MC13783_LED_MD 0 -#define MC13783_LED_AD 1 -#define MC13783_LED_KP 2 -#define MC13783_LED_R1 3 -#define MC13783_LED_G1 4 -#define MC13783_LED_B1 5 -#define MC13783_LED_R2 6 -#define MC13783_LED_G2 7 -#define MC13783_LED_B2 8 -#define MC13783_LED_R3 9 -#define MC13783_LED_G3 10 -#define MC13783_LED_B3 11 -#define MC13783_LED_MAX MC13783_LED_B3 - int id; - const char *name; - const char *default_trigger; - -/* Three or two bits current selection depending on the led */ - char max_current; -}; - -struct mc13783_leds_platform_data { - int num_leds; - struct mc13783_led_platform_data *led; - -#define MC13783_LED_TRIODE_MD (1 << 0) -#define MC13783_LED_TRIODE_AD (1 << 1) -#define MC13783_LED_TRIODE_KP (1 << 2) -#define MC13783_LED_BOOST_EN (1 << 3) -#define MC13783_LED_TC1HALF (1 << 4) -#define MC13783_LED_SLEWLIMTC (1 << 5) -#define MC13783_LED_SLEWLIMBL (1 << 6) -#define MC13783_LED_TRIODE_TC1 (1 << 7) -#define MC13783_LED_TRIODE_TC2 (1 << 8) -#define MC13783_LED_TRIODE_TC3 (1 << 9) - int flags; - -#define MC13783_LED_AB_DISABLED 0 -#define MC13783_LED_AB_MD1 1 -#define MC13783_LED_AB_MD12 2 -#define MC13783_LED_AB_MD123 3 -#define MC13783_LED_AB_MD1234 4 -#define MC13783_LED_AB_MD1234_AD1 5 -#define MC13783_LED_AB_MD1234_AD12 6 -#define MC13783_LED_AB_MD1_AD 7 - char abmode; - -#define MC13783_LED_ABREF_200MV 0 -#define MC13783_LED_ABREF_400MV 1 -#define MC13783_LED_ABREF_600MV 2 -#define MC13783_LED_ABREF_800MV 3 - char abref; - -#define MC13783_LED_PERIOD_10MS 0 -#define MC13783_LED_PERIOD_100MS 1 -#define MC13783_LED_PERIOD_500MS 2 -#define MC13783_LED_PERIOD_2S 3 - char bl_period; - char tc1_period; - char tc2_period; - char tc3_period; -}; - -/* to be cleaned up */ -struct regulator_init_data; - -struct mc13783_regulator_init_data { - int id; - struct regulator_init_data *init_data; -}; - -struct mc13783_regulator_platform_data { - int num_regulators; - struct mc13783_regulator_init_data *regulators; -}; - -struct mc13783_platform_data { - int num_regulators; - struct mc13783_regulator_init_data *regulators; - struct mc13783_leds_platform_data *leds; - -#define MC13783_USE_TOUCHSCREEN (1 << 0) -#define MC13783_USE_CODEC (1 << 1) -#define MC13783_USE_ADC (1 << 2) -#define MC13783_USE_RTC (1 << 3) -#define MC13783_USE_REGULATOR (1 << 4) -#define MC13783_USE_LED (1 << 5) - unsigned int flags; -}; +#define mc13783_regulator_init_data mc13xxx_regulator_init_data +#define mc13783_regulator_platform_data mc13xxx_regulator_platform_data +#define mc13783_led_platform_data mc13xxx_led_platform_data +#define mc13783_leds_platform_data mc13xxx_leds_platform_data + +#define mc13783_platform_data mc13xxx_platform_data +#define MC13783_USE_TOUCHSCREEN MC13XXX_USE_TOUCHSCREEN +#define MC13783_USE_CODEC MC13XXX_USE_CODEC +#define MC13783_USE_ADC MC13XXX_USE_ADC +#define MC13783_USE_RTC MC13XXX_USE_RTC +#define MC13783_USE_REGULATOR MC13XXX_USE_REGULATOR +#define MC13783_USE_LED MC13XXX_USE_LED #define MC13783_ADC_MODE_TS 1 #define MC13783_ADC_MODE_SINGLE_CHAN 2 @@ -181,46 +156,46 @@ int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode, #define MC13783_REGU_PWGT1SPI 31 #define MC13783_REGU_PWGT2SPI 32 -#define MC13783_IRQ_ADCDONE 0 -#define MC13783_IRQ_ADCBISDONE 1 -#define MC13783_IRQ_TS 2 +#define MC13783_IRQ_ADCDONE MC13XXX_IRQ_ADCDONE +#define MC13783_IRQ_ADCBISDONE MC13XXX_IRQ_ADCBISDONE +#define MC13783_IRQ_TS MC13XXX_IRQ_TS #define MC13783_IRQ_WHIGH 3 #define MC13783_IRQ_WLOW 4 -#define MC13783_IRQ_CHGDET 6 +#define MC13783_IRQ_CHGDET MC13XXX_IRQ_CHGDET #define MC13783_IRQ_CHGOV 7 -#define MC13783_IRQ_CHGREV 8 -#define MC13783_IRQ_CHGSHORT 9 -#define MC13783_IRQ_CCCV 10 -#define MC13783_IRQ_CHGCURR 11 -#define MC13783_IRQ_BPON 12 -#define MC13783_IRQ_LOBATL 13 -#define MC13783_IRQ_LOBATH 14 +#define MC13783_IRQ_CHGREV MC13XXX_IRQ_CHGREV +#define MC13783_IRQ_CHGSHORT MC13XXX_IRQ_CHGSHORT +#define MC13783_IRQ_CCCV MC13XXX_IRQ_CCCV +#define MC13783_IRQ_CHGCURR MC13XXX_IRQ_CHGCURR +#define MC13783_IRQ_BPON MC13XXX_IRQ_BPON +#define MC13783_IRQ_LOBATL MC13XXX_IRQ_LOBATL +#define MC13783_IRQ_LOBATH MC13XXX_IRQ_LOBATH #define MC13783_IRQ_UDP 15 #define MC13783_IRQ_USB 16 #define MC13783_IRQ_ID 19 #define MC13783_IRQ_SE1 21 #define MC13783_IRQ_CKDET 22 #define MC13783_IRQ_UDM 23 -#define MC13783_IRQ_1HZ 24 -#define MC13783_IRQ_TODA 25 +#define MC13783_IRQ_1HZ MC13XXX_IRQ_1HZ +#define MC13783_IRQ_TODA MC13XXX_IRQ_TODA #define MC13783_IRQ_ONOFD1 27 #define MC13783_IRQ_ONOFD2 28 #define MC13783_IRQ_ONOFD3 29 -#define MC13783_IRQ_SYSRST 30 -#define MC13783_IRQ_RTCRST 31 -#define MC13783_IRQ_PC 32 -#define MC13783_IRQ_WARM 33 -#define MC13783_IRQ_MEMHLD 34 +#define MC13783_IRQ_SYSRST MC13XXX_IRQ_SYSRST +#define MC13783_IRQ_RTCRST MC13XXX_IRQ_RTCRST +#define MC13783_IRQ_PC MC13XXX_IRQ_PC +#define MC13783_IRQ_WARM MC13XXX_IRQ_WARM +#define MC13783_IRQ_MEMHLD MC13XXX_IRQ_MEMHLD #define MC13783_IRQ_PWRRDY 35 -#define MC13783_IRQ_THWARNL 36 -#define MC13783_IRQ_THWARNH 37 -#define MC13783_IRQ_CLK 38 +#define MC13783_IRQ_THWARNL MC13XXX_IRQ_THWARNL +#define MC13783_IRQ_THWARNH MC13XXX_IRQ_THWARNH +#define MC13783_IRQ_CLK MC13XXX_IRQ_CLK #define MC13783_IRQ_SEMAF 39 #define MC13783_IRQ_MC2B 41 #define MC13783_IRQ_HSDET 42 #define MC13783_IRQ_HSL 43 #define MC13783_IRQ_ALSPTH 44 #define MC13783_IRQ_AHSSHORT 45 -#define MC13783_NUM_IRQ 46 +#define MC13783_NUM_IRQ MC13XXX_NUM_IRQ -#endif /* __LINUX_MFD_MC13783_H */ +#endif /* ifndef __LINUX_MFD_MC13783_H */ diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h new file mode 100644 index 000000000000..a1d391b40e68 --- /dev/null +++ b/include/linux/mfd/mc13xxx.h @@ -0,0 +1,154 @@ +/* + * Copyright 2009-2010 Pengutronix + * Uwe Kleine-Koenig + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + */ +#ifndef __LINUX_MFD_MC13XXX_H +#define __LINUX_MFD_MC13XXX_H + +#include + +struct mc13xxx; + +void mc13xxx_lock(struct mc13xxx *mc13xxx); +void mc13xxx_unlock(struct mc13xxx *mc13xxx); + +int mc13xxx_reg_read(struct mc13xxx *mc13xxx, unsigned int offset, u32 *val); +int mc13xxx_reg_write(struct mc13xxx *mc13xxx, unsigned int offset, u32 val); +int mc13xxx_reg_rmw(struct mc13xxx *mc13xxx, unsigned int offset, + u32 mask, u32 val); + +int mc13xxx_get_flags(struct mc13xxx *mc13xxx); + +int mc13xxx_irq_request(struct mc13xxx *mc13xxx, int irq, + irq_handler_t handler, const char *name, void *dev); +int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq, + irq_handler_t handler, const char *name, void *dev); +int mc13xxx_irq_free(struct mc13xxx *mc13xxx, int irq, void *dev); + +int mc13xxx_irq_mask(struct mc13xxx *mc13xxx, int irq); +int mc13xxx_irq_unmask(struct mc13xxx *mc13xxx, int irq); +int mc13xxx_irq_status(struct mc13xxx *mc13xxx, int irq, + int *enabled, int *pending); +int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq); + +int mc13xxx_get_flags(struct mc13xxx *mc13xxx); + +#define MC13XXX_IRQ_ADCDONE 0 +#define MC13XXX_IRQ_ADCBISDONE 1 +#define MC13XXX_IRQ_TS 2 +#define MC13XXX_IRQ_CHGDET 6 +#define MC13XXX_IRQ_CHGREV 8 +#define MC13XXX_IRQ_CHGSHORT 9 +#define MC13XXX_IRQ_CCCV 10 +#define MC13XXX_IRQ_CHGCURR 11 +#define MC13XXX_IRQ_BPON 12 +#define MC13XXX_IRQ_LOBATL 13 +#define MC13XXX_IRQ_LOBATH 14 +#define MC13XXX_IRQ_1HZ 24 +#define MC13XXX_IRQ_TODA 25 +#define MC13XXX_IRQ_SYSRST 30 +#define MC13XXX_IRQ_RTCRST 31 +#define MC13XXX_IRQ_PC 32 +#define MC13XXX_IRQ_WARM 33 +#define MC13XXX_IRQ_MEMHLD 34 +#define MC13XXX_IRQ_THWARNL 36 +#define MC13XXX_IRQ_THWARNH 37 +#define MC13XXX_IRQ_CLK 38 + +#define MC13XXX_NUM_IRQ 46 + +struct regulator_init_data; + +struct mc13xxx_regulator_init_data { + int id; + struct regulator_init_data *init_data; +}; + +struct mc13xxx_regulator_platform_data { + int num_regulators; + struct mc13xxx_regulator_init_data *regulators; +}; + +struct mc13xxx_led_platform_data { +#define MC13783_LED_MD 0 +#define MC13783_LED_AD 1 +#define MC13783_LED_KP 2 +#define MC13783_LED_R1 3 +#define MC13783_LED_G1 4 +#define MC13783_LED_B1 5 +#define MC13783_LED_R2 6 +#define MC13783_LED_G2 7 +#define MC13783_LED_B2 8 +#define MC13783_LED_R3 9 +#define MC13783_LED_G3 10 +#define MC13783_LED_B3 11 +#define MC13783_LED_MAX MC13783_LED_B3 + int id; + const char *name; + const char *default_trigger; + +/* Three or two bits current selection depending on the led */ + char max_current; +}; + +struct mc13xxx_leds_platform_data { + int num_leds; + struct mc13xxx_led_platform_data *led; + +#define MC13783_LED_TRIODE_MD (1 << 0) +#define MC13783_LED_TRIODE_AD (1 << 1) +#define MC13783_LED_TRIODE_KP (1 << 2) +#define MC13783_LED_BOOST_EN (1 << 3) +#define MC13783_LED_TC1HALF (1 << 4) +#define MC13783_LED_SLEWLIMTC (1 << 5) +#define MC13783_LED_SLEWLIMBL (1 << 6) +#define MC13783_LED_TRIODE_TC1 (1 << 7) +#define MC13783_LED_TRIODE_TC2 (1 << 8) +#define MC13783_LED_TRIODE_TC3 (1 << 9) + int flags; + +#define MC13783_LED_AB_DISABLED 0 +#define MC13783_LED_AB_MD1 1 +#define MC13783_LED_AB_MD12 2 +#define MC13783_LED_AB_MD123 3 +#define MC13783_LED_AB_MD1234 4 +#define MC13783_LED_AB_MD1234_AD1 5 +#define MC13783_LED_AB_MD1234_AD12 6 +#define MC13783_LED_AB_MD1_AD 7 + char abmode; + +#define MC13783_LED_ABREF_200MV 0 +#define MC13783_LED_ABREF_400MV 1 +#define MC13783_LED_ABREF_600MV 2 +#define MC13783_LED_ABREF_800MV 3 + char abref; + +#define MC13783_LED_PERIOD_10MS 0 +#define MC13783_LED_PERIOD_100MS 1 +#define MC13783_LED_PERIOD_500MS 2 +#define MC13783_LED_PERIOD_2S 3 + char bl_period; + char tc1_period; + char tc2_period; + char tc3_period; +}; + +struct mc13xxx_platform_data { +#define MC13XXX_USE_TOUCHSCREEN (1 << 0) +#define MC13XXX_USE_CODEC (1 << 1) +#define MC13XXX_USE_ADC (1 << 2) +#define MC13XXX_USE_RTC (1 << 3) +#define MC13XXX_USE_REGULATOR (1 << 4) +#define MC13XXX_USE_LED (1 << 5) + unsigned int flags; + + int num_regulators; + struct mc13xxx_regulator_init_data *regulators; + struct mc13xxx_leds_platform_data *leds; +}; + +#endif /* ifndef __LINUX_MFD_MC13XXX_H */ -- cgit v1.2.3 From 72f2e2c763edc41f8eead042b6ff933acb0378e2 Mon Sep 17 00:00:00 2001 From: kishore kadiyala Date: Fri, 24 Sep 2010 17:13:20 +0000 Subject: mfd: Adding twl6030 mmc card detect support for MMC1 Adding card detect callback function and card detect configuration function for MMC1 Controller on OMAP4. Card detect configuration function does initial configuration of the MMC Control & PullUp-PullDown registers of Phoenix. For MMC1 Controller, card detect interrupt source is twl6030 which is non-gpio. The card detect call back function provides card present/absent status by reading MMC Control register present on twl6030. Since OMAP4 doesn't use any GPIO line as used in OMAP3 for card detect, the suspend/resume initialization which was done in omap_hsmmc_gpio_init previously is moved to the probe thus making it generic for both OMAP3 & OMAP4. Cc: Tony Lindgren Cc: Andrew Morton Cc: Madhusudhan Chikkature Cc: Adrian Hunter Signed-off-by: Kishore Kadiyala Signed-off-by: Samuel Ortiz --- arch/arm/mach-omap2/board-4430sdp.c | 7 +++- drivers/mfd/twl6030-irq.c | 73 +++++++++++++++++++++++++++++++++++++ drivers/mmc/host/omap_hsmmc.c | 4 +- include/linux/i2c/twl.h | 31 ++++++++++++++++ 4 files changed, 112 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c index 69a4ae971e41..df5a425a49d1 100644 --- a/arch/arm/mach-omap2/board-4430sdp.c +++ b/arch/arm/mach-omap2/board-4430sdp.c @@ -269,9 +269,14 @@ static int omap4_twl6030_hsmmc_late_init(struct device *dev) struct omap_mmc_platform_data *pdata = dev->platform_data; /* Setting MMC1 Card detect Irq */ - if (pdev->id == 0) + if (pdev->id == 0) { + ret = twl6030_mmc_card_detect_config(); + if (ret) + pr_err("Failed configuring MMC1 card detect\n"); pdata->slots[0].card_detect_irq = TWL6030_IRQ_BASE + MMCDETECT_INTR_OFFSET; + pdata->slots[0].card_detect = twl6030_mmc_card_detect; + } return ret; } diff --git a/drivers/mfd/twl6030-irq.c b/drivers/mfd/twl6030-irq.c index 10bf228ad626..2d3bb82dbf24 100644 --- a/drivers/mfd/twl6030-irq.c +++ b/drivers/mfd/twl6030-irq.c @@ -36,6 +36,7 @@ #include #include #include +#include /* * TWL6030 (unlike its predecessors, which had two level interrupt handling) @@ -223,6 +224,78 @@ int twl6030_interrupt_mask(u8 bit_mask, u8 offset) } EXPORT_SYMBOL(twl6030_interrupt_mask); +int twl6030_mmc_card_detect_config(void) +{ + int ret; + u8 reg_val = 0; + + /* Unmasking the Card detect Interrupt line for MMC1 from Phoenix */ + twl6030_interrupt_unmask(TWL6030_MMCDETECT_INT_MASK, + REG_INT_MSK_LINE_B); + twl6030_interrupt_unmask(TWL6030_MMCDETECT_INT_MASK, + REG_INT_MSK_STS_B); + /* + * Intially Configuring MMC_CTRL for receving interrupts & + * Card status on TWL6030 for MMC1 + */ + ret = twl_i2c_read_u8(TWL6030_MODULE_ID0, ®_val, TWL6030_MMCCTRL); + if (ret < 0) { + pr_err("twl6030: Failed to read MMCCTRL, error %d\n", ret); + return ret; + } + reg_val &= ~VMMC_AUTO_OFF; + reg_val |= SW_FC; + ret = twl_i2c_write_u8(TWL6030_MODULE_ID0, reg_val, TWL6030_MMCCTRL); + if (ret < 0) { + pr_err("twl6030: Failed to write MMCCTRL, error %d\n", ret); + return ret; + } + + /* Configuring PullUp-PullDown register */ + ret = twl_i2c_read_u8(TWL6030_MODULE_ID0, ®_val, + TWL6030_CFG_INPUT_PUPD3); + if (ret < 0) { + pr_err("twl6030: Failed to read CFG_INPUT_PUPD3, error %d\n", + ret); + return ret; + } + reg_val &= ~(MMC_PU | MMC_PD); + ret = twl_i2c_write_u8(TWL6030_MODULE_ID0, reg_val, + TWL6030_CFG_INPUT_PUPD3); + if (ret < 0) { + pr_err("twl6030: Failed to write CFG_INPUT_PUPD3, error %d\n", + ret); + return ret; + } + return 0; +} +EXPORT_SYMBOL(twl6030_mmc_card_detect_config); + +int twl6030_mmc_card_detect(struct device *dev, int slot) +{ + int ret = -EIO; + u8 read_reg = 0; + struct platform_device *pdev = to_platform_device(dev); + + if (pdev->id) { + /* TWL6030 provide's Card detect support for + * only MMC1 controller. + */ + pr_err("Unkown MMC controller %d in %s\n", pdev->id, __func__); + return ret; + } + /* + * BIT0 of MMC_CTRL on TWL6030 provides card status for MMC1 + * 0 - Card not present ,1 - Card present + */ + ret = twl_i2c_read_u8(TWL6030_MODULE_ID0, &read_reg, + TWL6030_MMCCTRL); + if (ret >= 0) + ret = read_reg & STS_MMC; + return ret; +} +EXPORT_SYMBOL(twl6030_mmc_card_detect); + int twl6030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end) { diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index e865032a52eb..82a1079bbdc7 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -483,8 +483,6 @@ static int omap_hsmmc_gpio_init(struct omap_mmc_platform_data *pdata) int ret; if (gpio_is_valid(pdata->slots[0].switch_pin)) { - pdata->suspend = omap_hsmmc_suspend_cdirq; - pdata->resume = omap_hsmmc_resume_cdirq; if (pdata->slots[0].cover) pdata->slots[0].get_cover_state = omap_hsmmc_get_cover_state; @@ -2218,6 +2216,8 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev) "Unable to grab MMC CD IRQ\n"); goto err_irq_cd; } + pdata->suspend = omap_hsmmc_suspend_cdirq; + pdata->resume = omap_hsmmc_resume_cdirq; } omap_hsmmc_disable_irq(host); diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index 53089516c17a..c760991b354a 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -141,6 +141,16 @@ #define TWL6030_CHARGER_CTRL_INT_MASK 0x10 #define TWL6030_CHARGER_FAULT_INT_MASK 0x60 +#define TWL6030_MMCCTRL 0xEE +#define VMMC_AUTO_OFF (0x1 << 3) +#define SW_FC (0x1 << 2) +#define STS_MMC 0x1 + +#define TWL6030_CFG_INPUT_PUPD3 0xF2 +#define MMC_PU (0x1 << 3) +#define MMC_PD (0x1 << 2) + + #define TWL4030_CLASS_ID 0x4030 #define TWL6030_CLASS_ID 0x6030 @@ -173,6 +183,27 @@ int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes); int twl6030_interrupt_unmask(u8 bit_mask, u8 offset); int twl6030_interrupt_mask(u8 bit_mask, u8 offset); +/* Card detect Configuration for MMC1 Controller on OMAP4 */ +#ifdef CONFIG_TWL4030_CORE +int twl6030_mmc_card_detect_config(void); +#else +static inline int twl6030_mmc_card_detect_config(void) +{ + pr_debug("twl6030_mmc_card_detect_config not supported\n"); + return 0; +} +#endif + +/* MMC1 Controller on OMAP4 uses Phoenix irq for Card detect */ +#ifdef CONFIG_TWL4030_CORE +int twl6030_mmc_card_detect(struct device *dev, int slot); +#else +static inline int twl6030_mmc_card_detect(struct device *dev, int slot) +{ + pr_debug("Call back twl6030_mmc_card_detect not supported\n"); + return -EIO; +} +#endif /*----------------------------------------------------------------------*/ /* -- cgit v1.2.3 From 509bd4764c110b89bb3d09a5b6621fd31dc58044 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Mon, 27 Sep 2010 14:32:24 +0200 Subject: mfd: Support for ICs compliant with max8998 Signed-off-by: Lukasz Majewski Signed-off-by: Kyungmin Park Acked-by: Mark Brown Acked-by: Liam Girdwood Signed-off-by: Samuel Ortiz --- drivers/mfd/max8998.c | 5 +++-- include/linux/mfd/max8998-private.h | 11 +++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c index 310fd8054f35..a720f412cd15 100644 --- a/drivers/mfd/max8998.c +++ b/drivers/mfd/max8998.c @@ -133,6 +133,7 @@ static int max8998_i2c_probe(struct i2c_client *i2c, max8998->dev = &i2c->dev; max8998->i2c = i2c; max8998->irq = i2c->irq; + max8998->type = id->driver_data; if (pdata) { max8998->ono = pdata->ono; max8998->irq_base = pdata->irq_base; @@ -169,8 +170,8 @@ static int max8998_i2c_remove(struct i2c_client *i2c) } static const struct i2c_device_id max8998_i2c_id[] = { - { "max8998", 0 }, - { "lp3974", 0 }, + { "max8998", TYPE_MAX8998 }, + { "lp3974", TYPE_LP3974}, { } }; MODULE_DEVICE_TABLE(i2c, max8998_i2c_id); diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h index 170f665c7cdd..0ff42116d5dd 100644 --- a/include/linux/mfd/max8998-private.h +++ b/include/linux/mfd/max8998-private.h @@ -101,6 +101,13 @@ enum { MAX8998_IRQ_NR, }; +/* MAX8998 various variants */ +enum { + TYPE_MAX8998 = 0, /* Default */ + TYPE_LP3974, /* National version of MAX8998 */ + TYPE_LP3979, /* Added AVS */ +}; + #define MAX8998_IRQ_DCINF_MASK (1 << 2) #define MAX8998_IRQ_DCINR_MASK (1 << 3) #define MAX8998_IRQ_JIGF_MASK (1 << 4) @@ -123,6 +130,8 @@ enum { #define MAX8998_IRQ_LOBAT1_MASK (1 << 0) #define MAX8998_IRQ_LOBAT2_MASK (1 << 1) +#define MAX8998_ENRAMP (1 << 4) + /** * struct max8998_dev - max8998 master device for sub-drivers * @dev: master device of the chip (can be used to access platform data) @@ -135,6 +144,7 @@ enum { * @ono: power onoff IRQ number for max8998 * @irq_masks_cur: currently active value * @irq_masks_cache: cached hardware value + * @type: indicate which max8998 "variant" is used */ struct max8998_dev { struct device *dev; @@ -148,6 +158,7 @@ struct max8998_dev { int ono; u8 irq_masks_cur[MAX8998_NUM_IRQ_REGS]; u8 irq_masks_cache[MAX8998_NUM_IRQ_REGS]; + int type; }; int max8998_irq_init(struct max8998_dev *max8998); -- cgit v1.2.3 From 889cd5a60f880e0a56b7b769d0b74eb222e6896c Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Mon, 27 Sep 2010 14:32:25 +0200 Subject: regulator: max8998 BUCK1/2 internal voltages and indexes defined BUCK1/2 internal voltages and indexes defined in the struct max8998_data max_get_voltage_register now uses index values to chose proper register More generic BUCK1/2 registers names provided Signed-off-by: Lukasz Majewski Signed-off-by: Kyungmin Park Acked-by: Mark Brown Acked-by: Liam Girdwood Signed-off-by: Samuel Ortiz --- drivers/regulator/max8998.c | 10 ++++++++-- include/linux/mfd/max8998-private.h | 12 ++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/regulator/max8998.c b/drivers/regulator/max8998.c index 1e5bd504fbad..7ae0639c1394 100644 --- a/drivers/regulator/max8998.c +++ b/drivers/regulator/max8998.c @@ -39,6 +39,11 @@ struct max8998_data { struct max8998_dev *iodev; int num_regulators; struct regulator_dev **rdev; + u8 buck1_vol[4]; /* voltages for selection */ + u8 buck2_vol[2]; + unsigned int buck1_idx; /* index to last changed voltage */ + /* value in a set */ + unsigned int buck2_idx; }; struct voltage_map_desc { @@ -218,6 +223,7 @@ static int max8998_get_voltage_register(struct regulator_dev *rdev, int *_reg, int *_shift, int *_mask) { int ldo = max8998_get_ldo(rdev); + struct max8998_data *max8998 = rdev_get_drvdata(rdev); int reg, shift = 0, mask = 0xff; switch (ldo) { @@ -254,10 +260,10 @@ static int max8998_get_voltage_register(struct regulator_dev *rdev, reg = MAX8998_REG_LDO12 + (ldo - MAX8998_LDO12); break; case MAX8998_BUCK1: - reg = MAX8998_REG_BUCK1_DVSARM1; + reg = MAX8998_REG_BUCK1_VOLTAGE1 + max8998->buck1_idx; break; case MAX8998_BUCK2: - reg = MAX8998_REG_BUCK2_DVSINT1; + reg = MAX8998_REG_BUCK2_VOLTAGE1 + max8998->buck2_idx; break; case MAX8998_BUCK3: reg = MAX8998_REG_BUCK3; diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h index 0ff42116d5dd..7363dea6bbcd 100644 --- a/include/linux/mfd/max8998-private.h +++ b/include/linux/mfd/max8998-private.h @@ -48,12 +48,12 @@ enum { MAX8998_REG_ONOFF2, MAX8998_REG_ONOFF3, MAX8998_REG_ONOFF4, - MAX8998_REG_BUCK1_DVSARM1, - MAX8998_REG_BUCK1_DVSARM2, - MAX8998_REG_BUCK1_DVSARM3, - MAX8998_REG_BUCK1_DVSARM4, - MAX8998_REG_BUCK2_DVSINT1, - MAX8998_REG_BUCK2_DVSINT2, + MAX8998_REG_BUCK1_VOLTAGE1, + MAX8998_REG_BUCK1_VOLTAGE2, + MAX8998_REG_BUCK1_VOLTAGE3, + MAX8998_REG_BUCK1_VOLTAGE4, + MAX8998_REG_BUCK2_VOLTAGE1, + MAX8998_REG_BUCK2_VOLTAGE2, MAX8998_REG_BUCK3, MAX8998_REG_BUCK4, MAX8998_REG_LDO2_LDO3, -- cgit v1.2.3 From 58aa6334fbf5cf420a47cfd2718a0b299f40a379 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Mon, 27 Sep 2010 14:32:26 +0200 Subject: mfd: Voltages and GPIOs platform_data definitions for max8998 Signed-off-by: Lukasz Majewski Signed-off-by: Kyungmin Park Acked-by: Mark Brown Acked-by: Liam Girdwood Signed-off-by: Samuel Ortiz --- include/linux/mfd/max8998.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h index d47ed4c190fe..f8c9f884aff2 100644 --- a/include/linux/mfd/max8998.h +++ b/include/linux/mfd/max8998.h @@ -70,12 +70,24 @@ struct max8998_regulator_data { * @num_regulators: number of regultors used * @irq_base: base IRQ number for max8998, required for IRQs * @ono: power onoff IRQ number for max8998 + * @buck1_max_voltage1: BUCK1 maximum alowed voltage register 1 + * @buck1_max_voltage2: BUCK1 maximum alowed voltage register 2 + * @buck2_max_voltage: BUCK2 maximum alowed voltage + * @buck1_set1: BUCK1 gpio pin 1 to set output voltage + * @buck1_set2: BUCK1 gpio pin 2 to set output voltage + * @buck2_set3: BUCK2 gpio pin to set output voltage */ struct max8998_platform_data { struct max8998_regulator_data *regulators; int num_regulators; int irq_base; int ono; + int buck1_max_voltage1; + int buck1_max_voltage2; + int buck2_max_voltage; + int buck1_set1; + int buck1_set2; + int buck2_set3; }; #endif /* __LINUX_MFD_MAX8998_H */ -- cgit v1.2.3 From e5b486841d572c5ac83c798f82f4f67cbbac5320 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Oct 2010 23:57:56 +0200 Subject: mfd: Factor out WM831x I2C I/O from the core driver In preparation for the addition of SPI support for the WM831x move the I2C specific code into a separate file with a separate Kconfig option so the I2C support can be excluded from the build. Also update the 1133-EV1 PMIC module support for SMDK6410 to use the new symbol. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz --- arch/arm/mach-s3c64xx/Kconfig | 1 + drivers/mfd/Kconfig | 15 +++-- drivers/mfd/Makefile | 1 + drivers/mfd/wm831x-core.c | 138 ++------------------------------------ drivers/mfd/wm831x-i2c.c | 143 ++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/wm831x/core.h | 12 ++++ 6 files changed, 171 insertions(+), 139 deletions(-) create mode 100644 drivers/mfd/wm831x-i2c.c (limited to 'include') diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig index 1e4d78af7d84..546db5cb8929 100644 --- a/arch/arm/mach-s3c64xx/Kconfig +++ b/arch/arm/mach-s3c64xx/Kconfig @@ -185,6 +185,7 @@ config SMDK6410_WM1192_EV1 select REGULATOR_WM831X select S3C24XX_GPIO_EXTRA64 select MFD_WM831X + select MFD_WM831X_I2C help The Wolfson Microelectronics 1192-EV1 is a WM831x based PMIC daughtercard for the Samsung SMDK6410 reference platform. diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 6c6b9f02d177..2fb1e892331c 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -315,14 +315,19 @@ config MFD_WM8400 the functionality of the device. config MFD_WM831X - bool "Support Wolfson Microelectronics WM831x/2x PMICs" + bool + depends on GENERIC_HARDIRQS + +config MFD_WM831X_I2C + bool "Support Wolfson Microelectronics WM831x/2x PMICs with I2C" select MFD_CORE + select MFD_WM831X depends on I2C=y && GENERIC_HARDIRQS help - Support for the Wolfson Microelecronics WM831x and WM832x PMICs. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the - functionality of the device. + Support for the Wolfson Microelecronics WM831x and WM832x PMICs + when controlled using I2C. This driver provides common support + for accessing the device, additional drivers must be enabled in + order to use the functionality of the device. config MFD_WM8350 bool diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 70b26999dc81..c9ef41b13bf3 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_MFD_TC6393XB) += tc6393xb.o tmio_core.o obj-$(CONFIG_MFD_WM8400) += wm8400-core.o wm831x-objs := wm831x-core.o wm831x-irq.o wm831x-otp.o obj-$(CONFIG_MFD_WM831X) += wm831x.o +obj-$(CONFIG_MFD_WM831X_I2C) += wm831x-i2c.o wm8350-objs := wm8350-core.o wm8350-regmap.o wm8350-gpio.o wm8350-objs += wm8350-irq.o obj-$(CONFIG_MFD_WM8350) += wm8350.o diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c index ad36579bc815..7d2563fc15c6 100644 --- a/drivers/mfd/wm831x-core.c +++ b/drivers/mfd/wm831x-core.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -90,15 +89,6 @@ int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1] = { }; EXPORT_SYMBOL_GPL(wm831x_isinkv_values); -enum wm831x_parent { - WM8310 = 0x8310, - WM8311 = 0x8311, - WM8312 = 0x8312, - WM8320 = 0x8320, - WM8321 = 0x8321, - WM8325 = 0x8325, -}; - static int wm831x_reg_locked(struct wm831x *wm831x, unsigned short reg) { if (!wm831x->locked) @@ -1447,7 +1437,7 @@ static struct mfd_cell backlight_devs[] = { /* * Instantiate the generic non-control parts of the device. */ -static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq) +int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq) { struct wm831x_pdata *pdata = wm831x->dev->platform_data; int rev; @@ -1673,7 +1663,7 @@ err: return ret; } -static void wm831x_device_exit(struct wm831x *wm831x) +void wm831x_device_exit(struct wm831x *wm831x) { wm831x_otp_exit(wm831x); mfd_remove_devices(wm831x->dev); @@ -1683,7 +1673,7 @@ static void wm831x_device_exit(struct wm831x *wm831x) kfree(wm831x); } -static int wm831x_device_suspend(struct wm831x *wm831x) +int wm831x_device_suspend(struct wm831x *wm831x) { int reg, mask; @@ -1719,126 +1709,6 @@ static int wm831x_device_suspend(struct wm831x *wm831x) return 0; } -static int wm831x_i2c_read_device(struct wm831x *wm831x, unsigned short reg, - int bytes, void *dest) -{ - struct i2c_client *i2c = wm831x->control_data; - int ret; - u16 r = cpu_to_be16(reg); - - ret = i2c_master_send(i2c, (unsigned char *)&r, 2); - if (ret < 0) - return ret; - if (ret != 2) - return -EIO; - - ret = i2c_master_recv(i2c, dest, bytes); - if (ret < 0) - return ret; - if (ret != bytes) - return -EIO; - return 0; -} - -/* Currently we allocate the write buffer on the stack; this is OK for - * small writes - if we need to do large writes this will need to be - * revised. - */ -static int wm831x_i2c_write_device(struct wm831x *wm831x, unsigned short reg, - int bytes, void *src) -{ - struct i2c_client *i2c = wm831x->control_data; - unsigned char msg[bytes + 2]; - int ret; - - reg = cpu_to_be16(reg); - memcpy(&msg[0], ®, 2); - memcpy(&msg[2], src, bytes); - - ret = i2c_master_send(i2c, msg, bytes + 2); - if (ret < 0) - return ret; - if (ret < bytes + 2) - return -EIO; - - return 0; -} - -static int wm831x_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) -{ - struct wm831x *wm831x; - - wm831x = kzalloc(sizeof(struct wm831x), GFP_KERNEL); - if (wm831x == NULL) - return -ENOMEM; - - i2c_set_clientdata(i2c, wm831x); - wm831x->dev = &i2c->dev; - wm831x->control_data = i2c; - wm831x->read_dev = wm831x_i2c_read_device; - wm831x->write_dev = wm831x_i2c_write_device; - - return wm831x_device_init(wm831x, id->driver_data, i2c->irq); -} - -static int wm831x_i2c_remove(struct i2c_client *i2c) -{ - struct wm831x *wm831x = i2c_get_clientdata(i2c); - - wm831x_device_exit(wm831x); - - return 0; -} - -static int wm831x_i2c_suspend(struct i2c_client *i2c, pm_message_t mesg) -{ - struct wm831x *wm831x = i2c_get_clientdata(i2c); - - return wm831x_device_suspend(wm831x); -} - -static const struct i2c_device_id wm831x_i2c_id[] = { - { "wm8310", WM8310 }, - { "wm8311", WM8311 }, - { "wm8312", WM8312 }, - { "wm8320", WM8320 }, - { "wm8321", WM8321 }, - { "wm8325", WM8325 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, wm831x_i2c_id); - - -static struct i2c_driver wm831x_i2c_driver = { - .driver = { - .name = "wm831x", - .owner = THIS_MODULE, - }, - .probe = wm831x_i2c_probe, - .remove = wm831x_i2c_remove, - .suspend = wm831x_i2c_suspend, - .id_table = wm831x_i2c_id, -}; - -static int __init wm831x_i2c_init(void) -{ - int ret; - - ret = i2c_add_driver(&wm831x_i2c_driver); - if (ret != 0) - pr_err("Failed to register wm831x I2C driver: %d\n", ret); - - return ret; -} -subsys_initcall(wm831x_i2c_init); - -static void __exit wm831x_i2c_exit(void) -{ - i2c_del_driver(&wm831x_i2c_driver); -} -module_exit(wm831x_i2c_exit); - -MODULE_DESCRIPTION("I2C support for the WM831X AudioPlus PMIC"); +MODULE_DESCRIPTION("Core support for the WM831X AudioPlus PMIC"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Mark Brown"); diff --git a/drivers/mfd/wm831x-i2c.c b/drivers/mfd/wm831x-i2c.c new file mode 100644 index 000000000000..156b19859e81 --- /dev/null +++ b/drivers/mfd/wm831x-i2c.c @@ -0,0 +1,143 @@ +/* + * wm831x-i2c.c -- I2C access for Wolfson WM831x PMICs + * + * Copyright 2009,2010 Wolfson Microelectronics PLC. + * + * Author: Mark Brown + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +static int wm831x_i2c_read_device(struct wm831x *wm831x, unsigned short reg, + int bytes, void *dest) +{ + struct i2c_client *i2c = wm831x->control_data; + int ret; + u16 r = cpu_to_be16(reg); + + ret = i2c_master_send(i2c, (unsigned char *)&r, 2); + if (ret < 0) + return ret; + if (ret != 2) + return -EIO; + + ret = i2c_master_recv(i2c, dest, bytes); + if (ret < 0) + return ret; + if (ret != bytes) + return -EIO; + return 0; +} + +/* Currently we allocate the write buffer on the stack; this is OK for + * small writes - if we need to do large writes this will need to be + * revised. + */ +static int wm831x_i2c_write_device(struct wm831x *wm831x, unsigned short reg, + int bytes, void *src) +{ + struct i2c_client *i2c = wm831x->control_data; + unsigned char msg[bytes + 2]; + int ret; + + reg = cpu_to_be16(reg); + memcpy(&msg[0], ®, 2); + memcpy(&msg[2], src, bytes); + + ret = i2c_master_send(i2c, msg, bytes + 2); + if (ret < 0) + return ret; + if (ret < bytes + 2) + return -EIO; + + return 0; +} + +static int wm831x_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct wm831x *wm831x; + + wm831x = kzalloc(sizeof(struct wm831x), GFP_KERNEL); + if (wm831x == NULL) + return -ENOMEM; + + i2c_set_clientdata(i2c, wm831x); + wm831x->dev = &i2c->dev; + wm831x->control_data = i2c; + wm831x->read_dev = wm831x_i2c_read_device; + wm831x->write_dev = wm831x_i2c_write_device; + + return wm831x_device_init(wm831x, id->driver_data, i2c->irq); +} + +static int wm831x_i2c_remove(struct i2c_client *i2c) +{ + struct wm831x *wm831x = i2c_get_clientdata(i2c); + + wm831x_device_exit(wm831x); + + return 0; +} + +static int wm831x_i2c_suspend(struct i2c_client *i2c, pm_message_t mesg) +{ + struct wm831x *wm831x = i2c_get_clientdata(i2c); + + return wm831x_device_suspend(wm831x); +} + +static const struct i2c_device_id wm831x_i2c_id[] = { + { "wm8310", WM8310 }, + { "wm8311", WM8311 }, + { "wm8312", WM8312 }, + { "wm8320", WM8320 }, + { "wm8321", WM8321 }, + { "wm8325", WM8325 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, wm831x_i2c_id); + + +static struct i2c_driver wm831x_i2c_driver = { + .driver = { + .name = "wm831x", + .owner = THIS_MODULE, + }, + .probe = wm831x_i2c_probe, + .remove = wm831x_i2c_remove, + .suspend = wm831x_i2c_suspend, + .id_table = wm831x_i2c_id, +}; + +static int __init wm831x_i2c_init(void) +{ + int ret; + + ret = i2c_add_driver(&wm831x_i2c_driver); + if (ret != 0) + pr_err("Failed to register wm831x I2C driver: %d\n", ret); + + return ret; +} +subsys_initcall(wm831x_i2c_init); + +static void __exit wm831x_i2c_exit(void) +{ + i2c_del_driver(&wm831x_i2c_driver); +} +module_exit(wm831x_i2c_exit); diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h index eb5bd4e0e03c..a1239c48b41a 100644 --- a/include/linux/mfd/wm831x/core.h +++ b/include/linux/mfd/wm831x/core.h @@ -238,6 +238,15 @@ struct regulator_dev; #define WM831X_NUM_IRQ_REGS 5 +enum wm831x_parent { + WM8310 = 0x8310, + WM8311 = 0x8311, + WM8312 = 0x8312, + WM8320 = 0x8320, + WM8321 = 0x8321, + WM8325 = 0x8325, +}; + struct wm831x { struct mutex io_lock; @@ -285,6 +294,9 @@ int wm831x_set_bits(struct wm831x *wm831x, unsigned short reg, int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg, int count, u16 *buf); +int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq); +void wm831x_device_exit(struct wm831x *wm831x); +int wm831x_device_suspend(struct wm831x *wm831x); int wm831x_irq_init(struct wm831x *wm831x, int irq); void wm831x_irq_exit(struct wm831x *wm831x); -- cgit v1.2.3 From c96e41e92b4aaf11e1f9775ecf0d1c8cbff829ed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Jul 2010 00:17:56 +0400 Subject: beginning of transtion: ->mount() eventual replacement for ->get_sb() - does *not* get vfsmount, return ERR_PTR(error) or root of subtree to be mounted. Signed-off-by: Al Viro --- fs/super.c | 17 ++++++++++++++--- include/linux/fs.h | 2 ++ 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/super.c b/fs/super.c index b9c9869165db..00a2c9662b55 100644 --- a/fs/super.c +++ b/fs/super.c @@ -918,6 +918,7 @@ struct vfsmount * vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { struct vfsmount *mnt; + struct dentry *root; char *secdata = NULL; int error; @@ -942,9 +943,19 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void goto out_free_secdata; } - error = type->get_sb(type, flags, name, data, mnt); - if (error < 0) - goto out_free_secdata; + if (type->mount) { + root = type->mount(type, flags, name, data); + if (IS_ERR(root)) { + error = PTR_ERR(root); + goto out_free_secdata; + } + mnt->mnt_root = root; + mnt->mnt_sb = root->d_sb; + } else { + error = type->get_sb(type, flags, name, data, mnt); + if (error < 0) + goto out_free_secdata; + } BUG_ON(!mnt->mnt_sb); WARN_ON(!mnt->mnt_sb->s_bdi); mnt->mnt_sb->s_flags |= MS_BORN; diff --git a/include/linux/fs.h b/include/linux/fs.h index 1c73b50e81ff..c6b474311690 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1772,6 +1772,8 @@ struct file_system_type { int fs_flags; int (*get_sb) (struct file_system_type *, int, const char *, void *, struct vfsmount *); + struct dentry *(*mount) (struct file_system_type *, int, + const char *, void *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; -- cgit v1.2.3 From 152a08366671080f27b32e0c411ad620c5f88b57 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Jul 2010 00:46:55 +0400 Subject: new helper: mount_bdev() ... and switch of the obvious get_sb_bdev() users to ->mount() Signed-off-by: Al Viro --- fs/adfs/super.c | 9 ++++----- fs/affs/super.c | 9 ++++----- fs/befs/linuxvfs.c | 11 +++++------ fs/bfs/inode.c | 8 ++++---- fs/cramfs/inode.c | 9 ++++----- fs/efs/super.c | 8 ++++---- fs/ext2/super.c | 8 ++++---- fs/ext3/super.c | 8 ++++---- fs/ext4/super.c | 16 ++++++++-------- fs/fat/namei_msdos.c | 9 ++++----- fs/fat/namei_vfat.c | 9 ++++----- fs/freevxfs/vxfs_super.c | 9 ++++----- fs/fuse/inode.c | 9 ++++----- fs/hfs/super.c | 9 ++++----- fs/hfsplus/super.c | 10 ++++------ fs/hpfs/super.c | 9 ++++----- fs/isofs/inode.c | 9 ++++----- fs/jfs/super.c | 9 ++++----- fs/minix/inode.c | 9 ++++----- fs/ntfs/super.c | 9 ++++----- fs/ocfs2/super.c | 11 ++++------- fs/omfs/inode.c | 9 ++++----- fs/qnx4/inode.c | 9 ++++----- fs/reiserfs/super.c | 9 ++++----- fs/squashfs/super.c | 10 ++++------ fs/super.c | 28 +++++++++++++++++++++------- fs/sysv/super.c | 17 ++++++++--------- fs/udf/super.c | 9 ++++----- fs/ufs/super.c | 8 ++++---- fs/xfs/linux-2.6/xfs_super.c | 12 +++++------- include/linux/fs.h | 3 +++ 31 files changed, 150 insertions(+), 161 deletions(-) (limited to 'include') diff --git a/fs/adfs/super.c b/fs/adfs/super.c index d9803f73236f..959dbff2d42d 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -490,17 +490,16 @@ error: return -EINVAL; } -static int adfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *adfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, adfs_fill_super); } static struct file_system_type adfs_fs_type = { .owner = THIS_MODULE, .name = "adfs", - .get_sb = adfs_get_sb, + .mount = adfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/affs/super.c b/fs/affs/super.c index fa4fbe1e238a..0cf7f4384cbd 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -573,17 +573,16 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int affs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *affs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, affs_fill_super); } static struct file_system_type affs_fs_type = { .owner = THIS_MODULE, .name = "affs", - .get_sb = affs_get_sb, + .mount = affs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index dc39d2824885..aa4e7c7ae3c6 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -913,18 +913,17 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int -befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) +static struct dentry * +befs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, + void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, befs_fill_super); } static struct file_system_type befs_fs_type = { .owner = THIS_MODULE, .name = "befs", - .get_sb = befs_get_sb, + .mount = befs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 883e77acd5a8..76db6d7d49bb 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -450,16 +450,16 @@ out: return ret; } -static int bfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *bfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt); + return mount_bdev(fs_type, flags, dev_name, data, bfs_fill_super); } static struct file_system_type bfs_fs_type = { .owner = THIS_MODULE, .name = "bfs", - .get_sb = bfs_get_sb, + .mount = bfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 1e7a33028d33..32fd5fe9ca0e 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -533,17 +533,16 @@ static const struct super_operations cramfs_ops = { .statfs = cramfs_statfs, }; -static int cramfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *cramfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, cramfs_fill_super); } static struct file_system_type cramfs_fs_type = { .owner = THIS_MODULE, .name = "cramfs", - .get_sb = cramfs_get_sb, + .mount = cramfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/efs/super.c b/fs/efs/super.c index f04942810818..5073a07652cc 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -20,16 +20,16 @@ static int efs_statfs(struct dentry *dentry, struct kstatfs *buf); static int efs_fill_super(struct super_block *s, void *d, int silent); -static int efs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *efs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt); + return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super); } static struct file_system_type efs_fs_type = { .owner = THIS_MODULE, .name = "efs", - .get_sb = efs_get_sb, + .mount = efs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 0901320671da..d89e0b6a2d78 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1356,10 +1356,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) return 0; } -static int ext2_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *ext2_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt); + return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super); } #ifdef CONFIG_QUOTA @@ -1473,7 +1473,7 @@ out: static struct file_system_type ext2_fs_type = { .owner = THIS_MODULE, .name = "ext2", - .get_sb = ext2_get_sb, + .mount = ext2_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index db87413d3479..2fedaf8b5012 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -3020,16 +3020,16 @@ out: #endif -static int ext3_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *ext3_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt); + return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super); } static struct file_system_type ext3_fs_type = { .owner = THIS_MODULE, .name = "ext3", - .get_sb = ext3_get_sb, + .mount = ext3_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0348ce066592..40131b777af6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -73,8 +73,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static int ext4_unfreeze(struct super_block *sb); static void ext4_write_super(struct super_block *sb); static int ext4_freeze(struct super_block *sb); -static int ext4_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt); +static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data); static void ext4_destroy_lazyinit_thread(void); static void ext4_unregister_li_request(struct super_block *sb); @@ -82,7 +82,7 @@ static void ext4_unregister_li_request(struct super_block *sb); static struct file_system_type ext3_fs_type = { .owner = THIS_MODULE, .name = "ext3", - .get_sb = ext4_get_sb, + .mount = ext4_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; @@ -4667,17 +4667,17 @@ out: #endif -static int ext4_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); + return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); } #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext2_fs_type = { .owner = THIS_MODULE, .name = "ext2", - .get_sb = ext4_get_sb, + .mount = ext4_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; @@ -4722,7 +4722,7 @@ static inline void unregister_as_ext3(void) { } static struct file_system_type ext4_fs_type = { .owner = THIS_MODULE, .name = "ext4", - .get_sb = ext4_get_sb, + .mount = ext4_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index bbca5c186ae7..3345aabd1dd7 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -675,18 +675,17 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static int msdos_get_sb(struct file_system_type *fs_type, +static struct dentry *msdos_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) + void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super); } static struct file_system_type msdos_fs_type = { .owner = THIS_MODULE, .name = "msdos", - .get_sb = msdos_get_sb, + .mount = msdos_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6f0f6c9a0152..b936703b8924 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -1071,18 +1071,17 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static int vfat_get_sb(struct file_system_type *fs_type, +static struct dentry *vfat_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) + void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super); } static struct file_system_type vfat_fs_type = { .owner = THIS_MODULE, .name = "vfat", - .get_sb = vfat_get_sb, + .mount = vfat_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 71b0148b8784..9d1c99558389 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -246,17 +246,16 @@ out: /* * The usual module blurb. */ -static int vxfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *vxfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, vxfs_fill_super); } static struct file_system_type vxfs_fs_type = { .owner = THIS_MODULE, .name = "vxfs", - .get_sb = vxfs_get_sb, + .mount = vxfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index da9e6e11374c..edf6a1843533 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1070,12 +1070,11 @@ static struct file_system_type fuse_fs_type = { }; #ifdef CONFIG_BLOCK -static int fuse_get_sb_blk(struct file_system_type *fs_type, +static struct dentry *fuse_mount_blk(struct file_system_type *fs_type, int flags, const char *dev_name, - void *raw_data, struct vfsmount *mnt) + void *raw_data) { - return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super); } static void fuse_kill_sb_blk(struct super_block *sb) @@ -1094,7 +1093,7 @@ static void fuse_kill_sb_blk(struct super_block *sb) static struct file_system_type fuseblk_fs_type = { .owner = THIS_MODULE, .name = "fuseblk", - .get_sb = fuse_get_sb_blk, + .mount = fuse_mount_blk, .kill_sb = fuse_kill_sb_blk, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 6ee1586f2334..4824c27cebb8 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -441,17 +441,16 @@ bail: return res; } -static int hfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *hfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt); + return mount_bdev(fs_type, flags, dev_name, data, hfs_fill_super); } static struct file_system_type hfs_fs_type = { .owner = THIS_MODULE, .name = "hfs", - .get_sb = hfs_get_sb, + .mount = hfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 9a88d7536103..52cc746d3ba3 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -495,18 +495,16 @@ static void hfsplus_destroy_inode(struct inode *inode) #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) -static int hfsplus_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *hfsplus_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super); } static struct file_system_type hfsplus_fs_type = { .owner = THIS_MODULE, .name = "hfsplus", - .get_sb = hfsplus_get_sb, + .mount = hfsplus_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index c969a1aa163a..bb69389972eb 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -686,17 +686,16 @@ bail0: return -EINVAL; } -static int hpfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *hpfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, hpfs_fill_super); } static struct file_system_type hpfs_fs_type = { .owner = THIS_MODULE, .name = "hpfs", - .get_sb = hpfs_get_sb, + .mount = hpfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 79cf7f616bbe..bfdeb82a53be 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1507,17 +1507,16 @@ struct inode *isofs_iget(struct super_block *sb, return inode; } -static int isofs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *isofs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super); } static struct file_system_type iso9660_fs_type = { .owner = THIS_MODULE, .name = "iso9660", - .get_sb = isofs_get_sb, + .mount = isofs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 68eee2bf629e..0669fc1cc3bf 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -583,11 +583,10 @@ static int jfs_unfreeze(struct super_block *sb) return 0; } -static int jfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *jfs_do_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super); } static int jfs_sync_fs(struct super_block *sb, int wait) @@ -770,7 +769,7 @@ static const struct export_operations jfs_export_operations = { static struct file_system_type jfs_fs_type = { .owner = THIS_MODULE, .name = "jfs", - .get_sb = jfs_get_sb, + .mount = jfs_do_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index e39d6bf2e8fb..fb2020858a34 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -614,17 +614,16 @@ void minix_truncate(struct inode * inode) V2_minix_truncate(inode); } -static int minix_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *minix_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super); } static struct file_system_type minix_fs_type = { .owner = THIS_MODULE, .name = "minix", - .get_sb = minix_get_sb, + .mount = minix_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index d3fbe5730bfc..a30ecacc01f2 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3059,17 +3059,16 @@ struct kmem_cache *ntfs_index_ctx_cache; /* Driver wide mutex. */ DEFINE_MUTEX(ntfs_lock); -static int ntfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *ntfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super); } static struct file_system_type ntfs_fs_type = { .owner = THIS_MODULE, .name = "ntfs", - .get_sb = ntfs_get_sb, + .mount = ntfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 56f0cb395820..f02c0ef31578 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1236,14 +1236,12 @@ read_super_error: return status; } -static int ocfs2_get_sb(struct file_system_type *fs_type, +static struct dentry *ocfs2_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, - struct vfsmount *mnt) + void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); } static void ocfs2_kill_sb(struct super_block *sb) @@ -1267,8 +1265,7 @@ out: static struct file_system_type ocfs2_fs_type = { .owner = THIS_MODULE, .name = "ocfs2", - .get_sb = ocfs2_get_sb, /* is this called when we mount - * the fs? */ + .mount = ocfs2_mount, .kill_sb = ocfs2_kill_sb, .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 14a22863291a..e043c4cb9a97 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -557,17 +557,16 @@ end: return ret; } -static int omfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, struct vfsmount *m) +static struct dentry *omfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m); + return mount_bdev(fs_type, flags, dev_name, data, omfs_fill_super); } static struct file_system_type omfs_fs_type = { .owner = THIS_MODULE, .name = "omfs", - .get_sb = omfs_get_sb, + .mount = omfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 01bad30026fc..fcada42f1aa3 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -454,17 +454,16 @@ static void destroy_inodecache(void) kmem_cache_destroy(qnx4_inode_cachep); } -static int qnx4_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *qnx4_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, qnx4_fill_super); } static struct file_system_type qnx4_fs_type = { .owner = THIS_MODULE, .name = "qnx4", - .get_sb = qnx4_get_sb, + .mount = qnx4_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index e15ff612002d..3bf7a6457f4d 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2213,12 +2213,11 @@ out: #endif -static int get_super_block(struct file_system_type *fs_type, +static struct dentry *get_super_block(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) + void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super); } static int __init init_reiserfs_fs(void) @@ -2253,7 +2252,7 @@ static void __exit exit_reiserfs_fs(void) struct file_system_type reiserfs_fs_type = { .owner = THIS_MODULE, .name = "reiserfs", - .get_sb = get_super_block, + .mount = get_super_block, .kill_sb = reiserfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 07a4f1156048..24de30ba34c1 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -370,12 +370,10 @@ static void squashfs_put_super(struct super_block *sb) } -static int squashfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super); } @@ -451,7 +449,7 @@ static void squashfs_destroy_inode(struct inode *inode) static struct file_system_type squashfs_fs_type = { .owner = THIS_MODULE, .name = "squashfs", - .get_sb = squashfs_get_sb, + .mount = squashfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV }; diff --git a/fs/super.c b/fs/super.c index 00a2c9662b55..40989e9a2606 100644 --- a/fs/super.c +++ b/fs/super.c @@ -762,10 +762,9 @@ static int test_bdev_super(struct super_block *s, void *data) return (void *)s->s_bdev == data; } -int get_sb_bdev(struct file_system_type *fs_type, +struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt) + int (*fill_super)(struct super_block *, void *, int)) { struct block_device *bdev; struct super_block *s; @@ -777,7 +776,7 @@ int get_sb_bdev(struct file_system_type *fs_type, bdev = open_bdev_exclusive(dev_name, mode, fs_type); if (IS_ERR(bdev)) - return PTR_ERR(bdev); + return ERR_CAST(bdev); /* * once the super is inserted into the list by sget, s_umount @@ -829,15 +828,30 @@ int get_sb_bdev(struct file_system_type *fs_type, bdev->bd_super = s; } - simple_set_mnt(mnt, s); - return 0; + return dget(s->s_root); error_s: error = PTR_ERR(s); error_bdev: close_bdev_exclusive(bdev, mode); error: - return error; + return ERR_PTR(error); +} +EXPORT_SYMBOL(mount_bdev); + +int get_sb_bdev(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt) +{ + struct dentry *root; + + root = mount_bdev(fs_type, flags, dev_name, data, fill_super); + if (IS_ERR(root)) + return PTR_ERR(root); + mnt->mnt_root = root; + mnt->mnt_sb = root->d_sb; + return 0; } EXPORT_SYMBOL(get_sb_bdev); diff --git a/fs/sysv/super.c b/fs/sysv/super.c index a0b0cda6927e..3d9c62be0c10 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -526,23 +526,22 @@ failed: /* Every kernel module contains stuff like this. */ -static int sysv_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *sysv_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, sysv_fill_super); } -static int v7_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *v7_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt); + return mount_bdev(fs_type, flags, dev_name, data, v7_fill_super); } static struct file_system_type sysv_fs_type = { .owner = THIS_MODULE, .name = "sysv", - .get_sb = sysv_get_sb, + .mount = sysv_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; @@ -550,7 +549,7 @@ static struct file_system_type sysv_fs_type = { static struct file_system_type v7_fs_type = { .owner = THIS_MODULE, .name = "v7", - .get_sb = v7_get_sb, + .mount = v7_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/udf/super.c b/fs/udf/super.c index 76f3d6d97b40..4a5c7c61836a 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -107,17 +107,16 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) } /* UDF filesystem type */ -static int udf_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *udf_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt); + return mount_bdev(fs_type, flags, dev_name, data, udf_fill_super); } static struct file_system_type udf_fstype = { .owner = THIS_MODULE, .name = "udf", - .get_sb = udf_get_sb, + .mount = udf_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 6b9be90dae7d..2c47daed56da 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1454,16 +1454,16 @@ static const struct super_operations ufs_super_ops = { .show_options = ufs_show_options, }; -static int ufs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *ufs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt); + return mount_bdev(fs_type, flags, dev_name, data, ufs_fill_super); } static struct file_system_type ufs_fs_type = { .owner = THIS_MODULE, .name = "ufs", - .get_sb = ufs_get_sb, + .mount = ufs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index cf808782c065..9f3a78fe6ae4 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1609,16 +1609,14 @@ xfs_fs_fill_super( goto out_free_sb; } -STATIC int -xfs_fs_get_sb( +STATIC struct dentry * +xfs_fs_mount( struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, - struct vfsmount *mnt) + void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); } static const struct super_operations xfs_super_operations = { @@ -1639,7 +1637,7 @@ static const struct super_operations xfs_super_operations = { static struct file_system_type xfs_fs_type = { .owner = THIS_MODULE, .name = "xfs", - .get_sb = xfs_fs_get_sb, + .mount = xfs_fs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index c6b474311690..2fab5a24ca51 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1792,6 +1792,9 @@ struct file_system_type { extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int), struct vfsmount *mnt); +extern struct dentry *mount_bdev(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + int (*fill_super)(struct super_block *, void *, int)); extern int get_sb_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int), -- cgit v1.2.3 From 848b83a59b772b8f102bc5e3f1187c2fa5676959 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Jul 2010 00:56:46 +0400 Subject: convert get_sb_mtd() users to ->mount() Signed-off-by: Al Viro --- drivers/mtd/mtdsuper.c | 54 +++++++++++++++++++---------------------------- fs/jffs2/super.c | 9 ++++---- fs/romfs/super.c | 17 +++++++-------- include/linux/mtd/super.h | 5 ++--- 4 files changed, 36 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index 38e2ab07e7a3..16b02a1fc100 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -54,11 +54,10 @@ static int get_sb_mtd_set(struct super_block *sb, void *_mtd) /* * get a superblock on an MTD-backed filesystem */ -static int get_sb_mtd_aux(struct file_system_type *fs_type, int flags, +static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct mtd_info *mtd, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt) + int (*fill_super)(struct super_block *, void *, int)) { struct super_block *sb; int ret; @@ -79,57 +78,49 @@ static int get_sb_mtd_aux(struct file_system_type *fs_type, int flags, ret = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (ret < 0) { deactivate_locked_super(sb); - return ret; + return ERR_PTR(ret); } /* go */ sb->s_flags |= MS_ACTIVE; - simple_set_mnt(mnt, sb); - - return 0; + return dget(sb->s_root); /* new mountpoint for an already mounted superblock */ already_mounted: DEBUG(1, "MTDSB: Device %d (\"%s\") is already mounted\n", mtd->index, mtd->name); - simple_set_mnt(mnt, sb); - ret = 0; - goto out_put; + put_mtd_device(mtd); + return dget(sb->s_root); out_error: - ret = PTR_ERR(sb); -out_put: put_mtd_device(mtd); - return ret; + return ERR_CAST(sb); } /* * get a superblock on an MTD-backed filesystem by MTD device number */ -static int get_sb_mtd_nr(struct file_system_type *fs_type, int flags, +static struct dentry *mount_mtd_nr(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int mtdnr, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt) + int (*fill_super)(struct super_block *, void *, int)) { struct mtd_info *mtd; mtd = get_mtd_device(NULL, mtdnr); if (IS_ERR(mtd)) { DEBUG(0, "MTDSB: Device #%u doesn't appear to exist\n", mtdnr); - return PTR_ERR(mtd); + return ERR_CAST(mtd); } - return get_sb_mtd_aux(fs_type, flags, dev_name, data, mtd, fill_super, - mnt); + return mount_mtd_aux(fs_type, flags, dev_name, data, mtd, fill_super); } /* * set up an MTD-based superblock */ -int get_sb_mtd(struct file_system_type *fs_type, int flags, +struct dentry *mount_mtd(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt) + int (*fill_super)(struct super_block *, void *, int)) { #ifdef CONFIG_BLOCK struct block_device *bdev; @@ -138,7 +129,7 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags, int mtdnr; if (!dev_name) - return -EINVAL; + return ERR_PTR(-EINVAL); DEBUG(2, "MTDSB: dev_name \"%s\"\n", dev_name); @@ -156,10 +147,10 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags, mtd = get_mtd_device_nm(dev_name + 4); if (!IS_ERR(mtd)) - return get_sb_mtd_aux( + return mount_mtd_aux( fs_type, flags, dev_name, data, mtd, - fill_super, mnt); + fill_super); printk(KERN_NOTICE "MTD:" " MTD device with name \"%s\" not found.\n", @@ -174,9 +165,9 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags, /* It was a valid number */ DEBUG(1, "MTDSB: mtd%%d, mtdnr %d\n", mtdnr); - return get_sb_mtd_nr(fs_type, flags, + return mount_mtd_nr(fs_type, flags, dev_name, data, - mtdnr, fill_super, mnt); + mtdnr, fill_super); } } } @@ -189,7 +180,7 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags, if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); DEBUG(1, "MTDSB: lookup_bdev() returned %d\n", ret); - return ret; + return ERR_PTR(ret); } DEBUG(1, "MTDSB: lookup_bdev() returned 0\n"); @@ -202,8 +193,7 @@ int get_sb_mtd(struct file_system_type *fs_type, int flags, if (major != MTD_BLOCK_MAJOR) goto not_an_MTD_device; - return get_sb_mtd_nr(fs_type, flags, dev_name, data, mtdnr, fill_super, - mnt); + return mount_mtd_nr(fs_type, flags, dev_name, data, mtdnr, fill_super); not_an_MTD_device: #endif /* CONFIG_BLOCK */ @@ -212,10 +202,10 @@ not_an_MTD_device: printk(KERN_NOTICE "MTD: Attempt to mount non-MTD device \"%s\"\n", dev_name); - return -EINVAL; + return ERR_PTR(-EINVAL); } -EXPORT_SYMBOL_GPL(get_sb_mtd); +EXPORT_SYMBOL_GPL(mount_mtd); /* * destroy an MTD-based superblock diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index d1ae5dfc22b9..c86041b866a4 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -179,12 +179,11 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent) return ret; } -static int jffs2_get_sb(struct file_system_type *fs_type, +static struct dentry *jffs2_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) + void *data) { - return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super, - mnt); + return mount_mtd(fs_type, flags, dev_name, data, jffs2_fill_super); } static void jffs2_put_super (struct super_block *sb) @@ -229,7 +228,7 @@ static void jffs2_kill_sb(struct super_block *sb) static struct file_system_type jffs2_fs_type = { .owner = THIS_MODULE, .name = "jffs2", - .get_sb = jffs2_get_sb, + .mount = jffs2_mount, .kill_sb = jffs2_kill_sb, }; diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 268580535c92..6647f90e55cd 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -552,20 +552,19 @@ error_rsb: /* * get a superblock for mounting */ -static int romfs_get_sb(struct file_system_type *fs_type, +static struct dentry *romfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) + void *data) { - int ret = -EINVAL; + struct dentry *ret = ERR_PTR(-EINVAL); #ifdef CONFIG_ROMFS_ON_MTD - ret = get_sb_mtd(fs_type, flags, dev_name, data, romfs_fill_super, - mnt); + ret = mount_mtd(fs_type, flags, dev_name, data, romfs_fill_super); #endif #ifdef CONFIG_ROMFS_ON_BLOCK - if (ret == -EINVAL) - ret = get_sb_bdev(fs_type, flags, dev_name, data, - romfs_fill_super, mnt); + if (ret == ERR_PTR(-EINVAL)) + ret = mount_bdev(fs_type, flags, dev_name, data, + romfs_fill_super); #endif return ret; } @@ -592,7 +591,7 @@ static void romfs_kill_sb(struct super_block *sb) static struct file_system_type romfs_fs_type = { .owner = THIS_MODULE, .name = "romfs", - .get_sb = romfs_get_sb, + .mount = romfs_mount, .kill_sb = romfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/include/linux/mtd/super.h b/include/linux/mtd/super.h index 4016dd6fe336..f456230f9330 100644 --- a/include/linux/mtd/super.h +++ b/include/linux/mtd/super.h @@ -18,10 +18,9 @@ #include #include -extern int get_sb_mtd(struct file_system_type *fs_type, int flags, +extern struct dentry *mount_mtd(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt); + int (*fill_super)(struct super_block *, void *, int)); extern void kill_mtd_super(struct super_block *sb); -- cgit v1.2.3 From fc14f2fef682df677d64a145256dbd263df2aa7b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Jul 2010 01:48:30 +0400 Subject: convert get_sb_single() users Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/inode.c | 10 +++++----- arch/s390/hypfs/inode.c | 8 ++++---- drivers/base/devtmpfs.c | 18 ++++++++--------- drivers/infiniband/hw/ipath/ipath_fs.c | 14 +++++++------- drivers/infiniband/hw/qib/qib_fs.c | 14 +++++++------- drivers/isdn/capi/capifs.c | 8 ++++---- drivers/misc/ibmasm/ibmasmfs.c | 9 ++++----- drivers/oprofile/oprofilefs.c | 8 ++++---- drivers/usb/core/inode.c | 8 ++++---- drivers/usb/gadget/f_fs.c | 14 +++++++------- drivers/usb/gadget/inode.c | 10 +++++----- drivers/xen/xenfs/super.c | 8 ++++---- fs/binfmt_misc.c | 8 ++++---- fs/configfs/mount.c | 8 ++++---- fs/debugfs/inode.c | 8 ++++---- fs/devpts/inode.c | 32 +++++++++++++++---------------- fs/fuse/control.c | 10 ++++------ fs/nfsd/nfsctl.c | 8 ++++---- fs/openpromfs/inode.c | 8 ++++---- fs/super.c | 25 ++++++++++++++++++------ include/linux/fs.h | 3 +++ net/sunrpc/rpc_pipe.c | 18 ++++++++--------- security/inode.c | 8 ++++---- security/selinux/selinuxfs.c | 9 ++++----- security/smack/smackfs.c | 12 +++++------- 25 files changed, 147 insertions(+), 139 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 5dec408d6703..3532b92de983 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -798,17 +798,17 @@ spufs_fill_super(struct super_block *sb, void *data, int silent) return spufs_create_root(sb, data); } -static int -spufs_get_sb(struct file_system_type *fstype, int flags, - const char *name, void *data, struct vfsmount *mnt) +static struct dentry * +spufs_mount(struct file_system_type *fstype, int flags, + const char *name, void *data) { - return get_sb_single(fstype, flags, data, spufs_fill_super, mnt); + return mount_single(fstype, flags, data, spufs_fill_super); } static struct file_system_type spufs_type = { .owner = THIS_MODULE, .name = "spufs", - .get_sb = spufs_get_sb, + .mount = spufs_mount, .kill_sb = kill_litter_super, }; diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 74d98670be27..47cc446dab8f 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -316,10 +316,10 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static int hypfs_get_super(struct file_system_type *fst, int flags, - const char *devname, void *data, struct vfsmount *mnt) +static struct dentry *hypfs_mount(struct file_system_type *fst, int flags, + const char *devname, void *data) { - return get_sb_single(fst, flags, data, hypfs_fill_super, mnt); + return mount_single(fst, flags, data, hypfs_fill_super); } static void hypfs_kill_super(struct super_block *sb) @@ -455,7 +455,7 @@ static const struct file_operations hypfs_file_ops = { static struct file_system_type hypfs_type = { .owner = THIS_MODULE, .name = "s390_hypfs", - .get_sb = hypfs_get_super, + .mount = hypfs_mount, .kill_sb = hypfs_kill_super }; diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index af0600143d1c..82bbb5967aa9 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -29,33 +29,33 @@ static struct vfsmount *dev_mnt; #if defined CONFIG_DEVTMPFS_MOUNT -static int dev_mount = 1; +static int mount_dev = 1; #else -static int dev_mount; +static int mount_dev; #endif static DEFINE_MUTEX(dirlock); static int __init mount_param(char *str) { - dev_mount = simple_strtoul(str, NULL, 0); + mount_dev = simple_strtoul(str, NULL, 0); return 1; } __setup("devtmpfs.mount=", mount_param); -static int dev_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *dev_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) { #ifdef CONFIG_TMPFS - return get_sb_single(fs_type, flags, data, shmem_fill_super, mnt); + return mount_single(fs_type, flags, data, shmem_fill_super); #else - return get_sb_single(fs_type, flags, data, ramfs_fill_super, mnt); + return mount_single(fs_type, flags, data, ramfs_fill_super); #endif } static struct file_system_type dev_fs_type = { .name = "devtmpfs", - .get_sb = dev_get_sb, + .mount = dev_mount, .kill_sb = kill_litter_super, }; @@ -351,7 +351,7 @@ int devtmpfs_mount(const char *mntdir) { int err; - if (!dev_mount) + if (!mount_dev) return 0; if (!dev_mnt) diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 12d5bf76302c..8c8afc716b98 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -362,13 +362,13 @@ bail: return ret; } -static int ipathfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *ipathfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - int ret = get_sb_single(fs_type, flags, data, - ipathfs_fill_super, mnt); - if (ret >= 0) - ipath_super = mnt->mnt_sb; + struct dentry *ret; + ret = mount_single(fs_type, flags, data, ipathfs_fill_super); + if (!IS_ERR(ret)) + ipath_super = ret->d_sb; return ret; } @@ -411,7 +411,7 @@ bail: static struct file_system_type ipathfs_fs_type = { .owner = THIS_MODULE, .name = "ipathfs", - .get_sb = ipathfs_get_sb, + .mount = ipathfs_mount, .kill_sb = ipathfs_kill_super, }; diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 7e433d75c775..f99bddc01716 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -555,13 +555,13 @@ bail: return ret; } -static int qibfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *qibfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) { - int ret = get_sb_single(fs_type, flags, data, - qibfs_fill_super, mnt); - if (ret >= 0) - qib_super = mnt->mnt_sb; + struct dentry *ret; + ret = mount_single(fs_type, flags, data, qibfs_fill_super); + if (!IS_ERR(ret)) + qib_super = ret->d_sb; return ret; } @@ -603,7 +603,7 @@ int qibfs_remove(struct qib_devdata *dd) static struct file_system_type qibfs_fs_type = { .owner = THIS_MODULE, .name = "ipathfs", - .get_sb = qibfs_get_sb, + .mount = qibfs_mount, .kill_sb = qibfs_kill_super, }; diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c index 2b83850997c3..b4faed7fe0d3 100644 --- a/drivers/isdn/capi/capifs.c +++ b/drivers/isdn/capi/capifs.c @@ -125,16 +125,16 @@ fail: return -ENOMEM; } -static int capifs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *capifs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, capifs_fill_super, mnt); + return mount_single(fs_type, flags, data, capifs_fill_super); } static struct file_system_type capifs_fs_type = { .owner = THIS_MODULE, .name = "capifs", - .get_sb = capifs_get_sb, + .mount = capifs_mount, .kill_sb = kill_anon_super, }; diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index 0a53500636c9..d2d5d23416dd 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -91,11 +91,10 @@ static void ibmasmfs_create_files (struct super_block *sb, struct dentry *root); static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent); -static int ibmasmfs_get_super(struct file_system_type *fst, - int flags, const char *name, void *data, - struct vfsmount *mnt) +static struct dentry *ibmasmfs_mount(struct file_system_type *fst, + int flags, const char *name, void *data) { - return get_sb_single(fst, flags, data, ibmasmfs_fill_super, mnt); + return mount_single(fst, flags, data, ibmasmfs_fill_super); } static const struct super_operations ibmasmfs_s_ops = { @@ -108,7 +107,7 @@ static const struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations; static struct file_system_type ibmasmfs_type = { .owner = THIS_MODULE, .name = "ibmasmfs", - .get_sb = ibmasmfs_get_super, + .mount = ibmasmfs_mount, .kill_sb = kill_litter_super, }; diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 449de59bf35b..e9ff6f7770be 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -259,17 +259,17 @@ static int oprofilefs_fill_super(struct super_block *sb, void *data, int silent) } -static int oprofilefs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *oprofilefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, oprofilefs_fill_super, mnt); + return mount_single(fs_type, flags, data, oprofilefs_fill_super); } static struct file_system_type oprofilefs_type = { .owner = THIS_MODULE, .name = "oprofilefs", - .get_sb = oprofilefs_get_sb, + .mount = oprofilefs_mount, .kill_sb = kill_litter_super, }; diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index e2f63c0ea09d..9819a4cc3b26 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -574,16 +574,16 @@ static void fs_remove_file (struct dentry *dentry) /* --------------------------------------------------------------------- */ -static int usb_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *usb_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, usbfs_fill_super, mnt); + return mount_single(fs_type, flags, data, usbfs_fill_super); } static struct file_system_type usb_fs_type = { .owner = THIS_MODULE, .name = "usbfs", - .get_sb = usb_get_sb, + .mount = usb_mount, .kill_sb = kill_litter_super, }; diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index f276e9594f00..4a830df4fc31 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -1176,9 +1176,9 @@ invalid: /* "mount -t functionfs dev_name /dev/function" ends up here */ -static int -ffs_fs_get_sb(struct file_system_type *t, int flags, - const char *dev_name, void *opts, struct vfsmount *mnt) +static struct dentry * +ffs_fs_mount(struct file_system_type *t, int flags, + const char *dev_name, void *opts) { struct ffs_sb_fill_data data = { .perms = { @@ -1194,14 +1194,14 @@ ffs_fs_get_sb(struct file_system_type *t, int flags, ret = functionfs_check_dev_callback(dev_name); if (unlikely(ret < 0)) - return ret; + return ERR_PTR(ret); ret = ffs_fs_parse_opts(&data, opts); if (unlikely(ret < 0)) - return ret; + return ERR_PTR(ret); data.dev_name = dev_name; - return get_sb_single(t, flags, &data, ffs_sb_fill, mnt); + return mount_single(t, flags, &data, ffs_sb_fill); } static void @@ -1220,7 +1220,7 @@ ffs_fs_kill_sb(struct super_block *sb) static struct file_system_type ffs_fs_type = { .owner = THIS_MODULE, .name = "functionfs", - .get_sb = ffs_fs_get_sb, + .mount = ffs_fs_mount, .kill_sb = ffs_fs_kill_sb, }; diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index ba145e7fbe03..3ed73f49cf18 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -2097,11 +2097,11 @@ enomem0: } /* "mount -t gadgetfs path /dev/gadget" ends up here */ -static int -gadgetfs_get_sb (struct file_system_type *t, int flags, - const char *path, void *opts, struct vfsmount *mnt) +static struct dentry * +gadgetfs_mount (struct file_system_type *t, int flags, + const char *path, void *opts) { - return get_sb_single (t, flags, opts, gadgetfs_fill_super, mnt); + return mount_single (t, flags, opts, gadgetfs_fill_super); } static void @@ -2119,7 +2119,7 @@ gadgetfs_kill_sb (struct super_block *sb) static struct file_system_type gadgetfs_type = { .owner = THIS_MODULE, .name = shortname, - .get_sb = gadgetfs_get_sb, + .mount = gadgetfs_mount, .kill_sb = gadgetfs_kill_sb, }; diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index d6662b789b6b..f6339d11d59c 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -121,17 +121,17 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) return rc; } -static int xenfs_get_sb(struct file_system_type *fs_type, +static int xenfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) + void *data) { - return get_sb_single(fs_type, flags, data, xenfs_fill_super, mnt); + return mount_single(fs_type, flags, data, xenfs_fill_super); } static struct file_system_type xenfs_type = { .owner = THIS_MODULE, .name = "xenfs", - .get_sb = xenfs_get_sb, + .mount = xenfs_mount, .kill_sb = kill_litter_super, }; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 29990f0eee0c..1befe2ec8186 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -706,10 +706,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent) return err; } -static int bm_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *bm_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, bm_fill_super, mnt); + return mount_single(fs_type, flags, data, bm_fill_super); } static struct linux_binfmt misc_format = { @@ -720,7 +720,7 @@ static struct linux_binfmt misc_format = { static struct file_system_type bm_fs_type = { .owner = THIS_MODULE, .name = "binfmt_misc", - .get_sb = bm_get_sb, + .mount = bm_mount, .kill_sb = kill_litter_super, }; diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 8c8d64230c2d..7d3607febe1c 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -104,16 +104,16 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static int configfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *configfs_do_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt); + return mount_single(fs_type, flags, data, configfs_fill_super); } static struct file_system_type configfs_fs_type = { .owner = THIS_MODULE, .name = "configfs", - .get_sb = configfs_get_sb, + .mount = configfs_do_mount, .kill_sb = kill_litter_super, }; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index a4ed8380e98a..37a8ca7c1222 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -135,17 +135,17 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent) return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); } -static int debug_get_sb(struct file_system_type *fs_type, +static struct dentry *debug_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) + void *data) { - return get_sb_single(fs_type, flags, data, debug_fill_super, mnt); + return mount_single(fs_type, flags, data, debug_fill_super); } static struct file_system_type debug_fs_type = { .owner = THIS_MODULE, .name = "debugfs", - .get_sb = debug_get_sb, + .mount = debug_mount, .kill_sb = kill_litter_super, }; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 8b3ffd5b5235..1bb547c9cad6 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -331,7 +331,7 @@ static int compare_init_pts_sb(struct super_block *s, void *p) } /* - * devpts_get_sb() + * devpts_mount() * * If the '-o newinstance' mount option was specified, mount a new * (private) instance of devpts. PTYs created in this instance are @@ -345,20 +345,20 @@ static int compare_init_pts_sb(struct super_block *s, void *p) * semantics in devpts while preserving backward compatibility of the * current 'single-namespace' semantics. i.e all mounts of devpts * without the 'newinstance' mount option should bind to the initial - * kernel mount, like get_sb_single(). + * kernel mount, like mount_single(). * * Mounts with 'newinstance' option create a new, private namespace. * * NOTE: * - * For single-mount semantics, devpts cannot use get_sb_single(), - * because get_sb_single()/sget() find and use the super-block from + * For single-mount semantics, devpts cannot use mount_single(), + * because mount_single()/sget() find and use the super-block from * the most recent mount of devpts. But that recent mount may be a - * 'newinstance' mount and get_sb_single() would pick the newinstance + * 'newinstance' mount and mount_single() would pick the newinstance * super-block instead of the initial super-block. */ -static int devpts_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *devpts_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { int error; struct pts_mount_opts opts; @@ -366,7 +366,7 @@ static int devpts_get_sb(struct file_system_type *fs_type, error = parse_mount_options(data, PARSE_MOUNT, &opts); if (error) - return error; + return ERR_PTR(error); if (opts.newinstance) s = sget(fs_type, NULL, set_anon_super, NULL); @@ -374,7 +374,7 @@ static int devpts_get_sb(struct file_system_type *fs_type, s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL); if (IS_ERR(s)) - return PTR_ERR(s); + return ERR_CAST(s); if (!s->s_root) { s->s_flags = flags; @@ -390,13 +390,11 @@ static int devpts_get_sb(struct file_system_type *fs_type, if (error) goto out_undo_sget; - simple_set_mnt(mnt, s); - - return 0; + return dget(s->s_root); out_undo_sget: deactivate_locked_super(s); - return error; + return ERR_PTR(error); } #else @@ -404,10 +402,10 @@ out_undo_sget: * This supports only the legacy single-instance semantics (no * multiple-instance semantics) */ -static int devpts_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt); + return mount_single(fs_type, flags, data, devpts_fill_super); } #endif @@ -421,7 +419,7 @@ static void devpts_kill_sb(struct super_block *sb) static struct file_system_type devpts_fs_type = { .name = "devpts", - .get_sb = devpts_get_sb, + .mount = devpts_mount, .kill_sb = devpts_kill_sb, }; diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 4eba07661e5c..85542a7daf40 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -322,12 +322,10 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data, - struct vfsmount *mnt) +static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) { - return get_sb_single(fs_type, flags, raw_data, - fuse_ctl_fill_super, mnt); + return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super); } static void fuse_ctl_kill_sb(struct super_block *sb) @@ -346,7 +344,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb) static struct file_system_type fuse_ctl_fs_type = { .owner = THIS_MODULE, .name = "fusectl", - .get_sb = fuse_ctl_get_sb, + .mount = fuse_ctl_mount, .kill_sb = fuse_ctl_kill_sb, }; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index d6dc3f61f8ba..4514ebbee4d6 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1405,16 +1405,16 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) return simple_fill_super(sb, 0x6e667364, nfsd_files); } -static int nfsd_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *nfsd_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt); + return mount_single(fs_type, flags, data, nfsd_fill_super); } static struct file_system_type nfsd_fs_type = { .owner = THIS_MODULE, .name = "nfsd", - .get_sb = nfsd_get_sb, + .mount = nfsd_mount, .kill_sb = kill_litter_super, }; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index ffcd04f0012c..ddb1f41376e5 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -415,16 +415,16 @@ out_no_root: return ret; } -static int openprom_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *openprom_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt); + return mount_single(fs_type, flags, data, openprom_fill_super) } static struct file_system_type openprom_fs_type = { .owner = THIS_MODULE, .name = "openpromfs", - .get_sb = openprom_get_sb, + .mount = openprom_mount, .kill_sb = kill_anon_super, }; diff --git a/fs/super.c b/fs/super.c index 40989e9a2606..6f021a171ac6 100644 --- a/fs/super.c +++ b/fs/super.c @@ -900,29 +900,42 @@ static int compare_single(struct super_block *s, void *p) return 1; } -int get_sb_single(struct file_system_type *fs_type, +struct dentry *mount_single(struct file_system_type *fs_type, int flags, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt) + int (*fill_super)(struct super_block *, void *, int)) { struct super_block *s; int error; s = sget(fs_type, compare_single, set_anon_super, NULL); if (IS_ERR(s)) - return PTR_ERR(s); + return ERR_CAST(s); if (!s->s_root) { s->s_flags = flags; error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); - return error; + return ERR_PTR(error); } s->s_flags |= MS_ACTIVE; } else { do_remount_sb(s, flags, data, 0); } - simple_set_mnt(mnt, s); + return dget(s->s_root); +} +EXPORT_SYMBOL(mount_single); + +int get_sb_single(struct file_system_type *fs_type, + int flags, void *data, + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt) +{ + struct dentry *root; + root = mount_single(fs_type, flags, data, fill_super); + if (IS_ERR(root)) + return PTR_ERR(root); + mnt->mnt_root = root; + mnt->mnt_sb = root->d_sb; return 0; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 2fab5a24ca51..0aa2f1202afa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1799,6 +1799,9 @@ extern int get_sb_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int), struct vfsmount *mnt); +extern struct dentry *mount_single(struct file_system_type *fs_type, + int flags, void *data, + int (*fill_super)(struct super_block *, void *, int)); extern int get_sb_single(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int), diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 7df92d237cb8..10a17a37ec4e 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -28,7 +28,7 @@ #include #include -static struct vfsmount *rpc_mount __read_mostly; +static struct vfsmount *rpc_mnt __read_mostly; static int rpc_mount_count; static struct file_system_type rpc_pipe_fs_type; @@ -417,16 +417,16 @@ struct vfsmount *rpc_get_mount(void) { int err; - err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count); + err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mnt, &rpc_mount_count); if (err != 0) return ERR_PTR(err); - return rpc_mount; + return rpc_mnt; } EXPORT_SYMBOL_GPL(rpc_get_mount); void rpc_put_mount(void) { - simple_release_fs(&rpc_mount, &rpc_mount_count); + simple_release_fs(&rpc_mnt, &rpc_mount_count); } EXPORT_SYMBOL_GPL(rpc_put_mount); @@ -1018,17 +1018,17 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static int -rpc_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry * +rpc_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, rpc_fill_super, mnt); + return mount_single(fs_type, flags, data, rpc_fill_super); } static struct file_system_type rpc_pipe_fs_type = { .owner = THIS_MODULE, .name = "rpc_pipefs", - .get_sb = rpc_get_sb, + .mount = rpc_mount, .kill_sb = kill_litter_super, }; diff --git a/security/inode.c b/security/inode.c index cb8f47c66a58..c4df2fbebe6b 100644 --- a/security/inode.c +++ b/security/inode.c @@ -131,17 +131,17 @@ static int fill_super(struct super_block *sb, void *data, int silent) return simple_fill_super(sb, SECURITYFS_MAGIC, files); } -static int get_sb(struct file_system_type *fs_type, +static struct dentry *get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) + void *data) { - return get_sb_single(fs_type, flags, data, fill_super, mnt); + return mount_single(fs_type, flags, data, fill_super); } static struct file_system_type fs_type = { .owner = THIS_MODULE, .name = "securityfs", - .get_sb = get_sb, + .mount = get_sb, .kill_sb = kill_litter_super, }; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 55a755c1a1bd..073fd5b0a53a 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1909,16 +1909,15 @@ err: goto out; } -static int sel_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *sel_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, sel_fill_super, mnt); + return mount_single(fs_type, flags, data, sel_fill_super); } static struct file_system_type sel_fs_type = { .name = "selinuxfs", - .get_sb = sel_get_sb, + .mount = sel_mount, .kill_sb = kill_litter_super, }; diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 7512502d0162..dc1fd6239f24 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -1310,27 +1310,25 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent) } /** - * smk_get_sb - get the smackfs superblock + * smk_mount - get the smackfs superblock * @fs_type: passed along without comment * @flags: passed along without comment * @dev_name: passed along without comment * @data: passed along without comment - * @mnt: passed along without comment * * Just passes everything along. * * Returns what the lower level code does. */ -static int smk_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *smk_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, smk_fill_super, mnt); + return mount_single(fs_type, flags, data, smk_fill_super); } static struct file_system_type smk_fs_type = { .name = "smackfs", - .get_sb = smk_get_sb, + .mount = smk_mount, .kill_sb = kill_litter_super, }; -- cgit v1.2.3 From 3c26ff6e499ee7e6f9f2bc7da5f2f30d80862ecf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Jul 2010 11:46:36 +0400 Subject: convert get_sb_nodev() users Signed-off-by: Al Viro --- drivers/staging/autofs/init.c | 8 ++++---- drivers/staging/pohmelfs/inode.c | 9 ++++----- drivers/staging/smbfs/inode.c | 8 ++++---- fs/autofs4/init.c | 8 ++++---- fs/coda/inode.c | 8 ++++---- fs/exofs/super.c | 10 +++++----- fs/fuse/inode.c | 8 ++++---- fs/hostfs/hostfs_kern.c | 8 ++++---- fs/hppfs/hppfs.c | 8 ++++---- fs/hugetlbfs/inode.c | 8 ++++---- fs/ncpfs/inode.c | 8 ++++---- fs/ocfs2/dlmfs/dlmfs.c | 8 ++++---- fs/ramfs/inode.c | 17 ++++++++--------- fs/super.c | 27 ++++++++++++++++++++------- include/linux/fs.h | 3 +++ include/linux/ramfs.h | 4 ++-- mm/shmem.c | 10 +++++----- 17 files changed, 87 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/drivers/staging/autofs/init.c b/drivers/staging/autofs/init.c index 765c72f42976..5e4b372ea663 100644 --- a/drivers/staging/autofs/init.c +++ b/drivers/staging/autofs/init.c @@ -14,16 +14,16 @@ #include #include "autofs_i.h" -static int autofs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *autofs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_nodev(fs_type, flags, data, autofs_fill_super, mnt); + return mount_nodev(fs_type, flags, data, autofs_fill_super); } static struct file_system_type autofs_fs_type = { .owner = THIS_MODULE, .name = "autofs", - .get_sb = autofs_get_sb, + .mount = autofs_mount, .kill_sb = autofs_kill_sb, }; diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c index c62d30017c07..61685ccceda8 100644 --- a/drivers/staging/pohmelfs/inode.c +++ b/drivers/staging/pohmelfs/inode.c @@ -1937,11 +1937,10 @@ err_out_exit: /* * Some VFS magic here... */ -static int pohmelfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *pohmelfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_nodev(fs_type, flags, data, pohmelfs_fill_super, - mnt); + return mount_nodev(fs_type, flags, data, pohmelfs_fill_super); } /* @@ -1958,7 +1957,7 @@ static void pohmelfs_kill_super(struct super_block *sb) static struct file_system_type pohmel_fs_type = { .owner = THIS_MODULE, .name = "pohmel", - .get_sb = pohmelfs_get_sb, + .mount = pohmelfs_mount, .kill_sb = pohmelfs_kill_super, }; diff --git a/drivers/staging/smbfs/inode.c b/drivers/staging/smbfs/inode.c index 552951aa7498..f9c493591ce7 100644 --- a/drivers/staging/smbfs/inode.c +++ b/drivers/staging/smbfs/inode.c @@ -793,16 +793,16 @@ out: return error; } -static int smb_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *smb_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt); + return mount_nodev(fs_type, flags, data, smb_fill_super); } static struct file_system_type smb_fs_type = { .owner = THIS_MODULE, .name = "smbfs", - .get_sb = smb_get_sb, + .mount = smb_mount, .kill_sb = kill_anon_super, .fs_flags = FS_BINARY_MOUNTDATA, }; diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index 9722e4bd8957..c038727b4050 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c @@ -14,16 +14,16 @@ #include #include "autofs_i.h" -static int autofs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *autofs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt); + return mount_nodev(fs_type, flags, data, autofs4_fill_super); } static struct file_system_type autofs_fs_type = { .owner = THIS_MODULE, .name = "autofs", - .get_sb = autofs_get_sb, + .mount = autofs_mount, .kill_sb = autofs4_kill_sb, }; diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 7993b96ca348..5ea57c8c7f97 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -306,16 +306,16 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf) /* init_coda: used by filesystems.c to register coda */ -static int coda_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *coda_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt); + return mount_nodev(fs_type, flags, data, coda_fill_super); } struct file_system_type coda_fs_type = { .owner = THIS_MODULE, .name = "coda", - .get_sb = coda_get_sb, + .mount = coda_mount, .kill_sb = kill_anon_super, .fs_flags = FS_BINARY_MOUNTDATA, }; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 047e92fa3af8..79c3ae6e0456 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -659,19 +659,19 @@ free_bdi: /* * Set up the superblock (calls exofs_fill_super eventually) */ -static int exofs_get_sb(struct file_system_type *type, +static struct dentry *exofs_mount(struct file_system_type *type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) + void *data) { struct exofs_mountopt opts; int ret; ret = parse_options(data, &opts); if (ret) - return ret; + return ERR_PTR(ret); opts.dev_name = dev_name; - return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt); + return mount_nodev(type, flags, &opts, exofs_fill_super); } /* @@ -809,7 +809,7 @@ static const struct export_operations exofs_export_ops = { static struct file_system_type exofs_type = { .owner = THIS_MODULE, .name = "exofs", - .get_sb = exofs_get_sb, + .mount = exofs_mount, .kill_sb = generic_shutdown_super, }; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index edf6a1843533..cfce3ad86a92 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1041,11 +1041,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) return err; } -static int fuse_get_sb(struct file_system_type *fs_type, +static struct dentry *fuse_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *raw_data, struct vfsmount *mnt) + void *raw_data) { - return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); + return mount_nodev(fs_type, flags, raw_data, fuse_fill_super); } static void fuse_kill_sb_anon(struct super_block *sb) @@ -1065,7 +1065,7 @@ static struct file_system_type fuse_fs_type = { .owner = THIS_MODULE, .name = "fuse", .fs_flags = FS_HAS_SUBTYPE, - .get_sb = fuse_get_sb, + .mount = fuse_mount, .kill_sb = fuse_kill_sb_anon, }; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index cd7c93917cc7..2c0f148a49e6 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -962,11 +962,11 @@ out: return err; } -static int hostfs_read_sb(struct file_system_type *type, +static struct dentry *hostfs_read_sb(struct file_system_type *type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) + void *data) { - return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt); + return mount_nodev(type, flags, data, hostfs_fill_sb_common); } static void hostfs_kill_sb(struct super_block *s) @@ -978,7 +978,7 @@ static void hostfs_kill_sb(struct super_block *s) static struct file_system_type hostfs_type = { .owner = THIS_MODULE, .name = "hostfs", - .get_sb = hostfs_read_sb, + .mount = hostfs_read_sb, .kill_sb = hostfs_kill_sb, .fs_flags = 0, }; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 4e2a45ea6140..f702b5f713fc 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -748,17 +748,17 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent) return(err); } -static int hppfs_read_super(struct file_system_type *type, +static struct dentry *hppfs_read_super(struct file_system_type *type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) + void *data) { - return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt); + return mount_nodev(type, flags, data, hppfs_fill_super); } static struct file_system_type hppfs_type = { .owner = THIS_MODULE, .name = "hppfs", - .get_sb = hppfs_read_super, + .mount = hppfs_read_super, .kill_sb = kill_anon_super, .fs_flags = 0, }; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b14be3f781c7..d6cfac1f0a40 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -896,15 +896,15 @@ void hugetlb_put_quota(struct address_space *mapping, long delta) } } -static int hugetlbfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); + return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super); } static struct file_system_type hugetlbfs_fs_type = { .name = "hugetlbfs", - .get_sb = hugetlbfs_get_sb, + .mount = hugetlbfs_mount, .kill_sb = kill_litter_super, }; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 985fabb26aca..d290545aa0c4 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -1020,16 +1020,16 @@ out: return result; } -static int ncp_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *ncp_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt); + return mount_nodev(fs_type, flags, data, ncp_fill_super); } static struct file_system_type ncp_fs_type = { .owner = THIS_MODULE, .name = "ncpfs", - .get_sb = ncp_get_sb, + .mount = ncp_mount, .kill_sb = kill_anon_super, .fs_flags = FS_BINARY_MOUNTDATA, }; diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 75e115f1bd73..b2df490a19ed 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -643,16 +643,16 @@ static const struct inode_operations dlmfs_file_inode_operations = { .setattr = dlmfs_file_setattr, }; -static int dlmfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *dlmfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); + return mount_nodev(fs_type, flags, data, dlmfs_fill_super); } static struct file_system_type dlmfs_fs_type = { .owner = THIS_MODULE, .name = "ocfs2_dlmfs", - .get_sb = dlmfs_get_sb, + .mount = dlmfs_mount, .kill_sb = kill_litter_super, }; diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 67fadb1ad2c1..eacb166fb259 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -255,17 +255,16 @@ fail: return err; } -int ramfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +struct dentry *ramfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt); + return mount_nodev(fs_type, flags, data, ramfs_fill_super); } -static int rootfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *rootfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super, - mnt); + return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super); } static void ramfs_kill_sb(struct super_block *sb) @@ -276,12 +275,12 @@ static void ramfs_kill_sb(struct super_block *sb) static struct file_system_type ramfs_fs_type = { .name = "ramfs", - .get_sb = ramfs_get_sb, + .mount = ramfs_mount, .kill_sb = ramfs_kill_sb, }; static struct file_system_type rootfs_fs_type = { .name = "rootfs", - .get_sb = rootfs_get_sb, + .mount = rootfs_mount, .kill_sb = kill_litter_super, }; diff --git a/fs/super.c b/fs/super.c index 6f021a171ac6..f6a7bf1fff2b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -870,29 +870,42 @@ void kill_block_super(struct super_block *sb) EXPORT_SYMBOL(kill_block_super); #endif -int get_sb_nodev(struct file_system_type *fs_type, +struct dentry *mount_nodev(struct file_system_type *fs_type, int flags, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt) + int (*fill_super)(struct super_block *, void *, int)) { int error; struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); if (IS_ERR(s)) - return PTR_ERR(s); + return ERR_CAST(s); s->s_flags = flags; error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); - return error; + return ERR_PTR(error); } s->s_flags |= MS_ACTIVE; - simple_set_mnt(mnt, s); - return 0; + return dget(s->s_root); } +EXPORT_SYMBOL(mount_nodev); + +int get_sb_nodev(struct file_system_type *fs_type, + int flags, void *data, + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt) +{ + struct dentry *root; + root = mount_nodev(fs_type, flags, data, fill_super); + if (IS_ERR(root)) + return PTR_ERR(root); + mnt->mnt_root = root; + mnt->mnt_sb = root->d_sb; + return 0; +} EXPORT_SYMBOL(get_sb_nodev); static int compare_single(struct super_block *s, void *p) diff --git a/include/linux/fs.h b/include/linux/fs.h index 0aa2f1202afa..4c3a29ddcacb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1806,6 +1806,9 @@ extern int get_sb_single(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int), struct vfsmount *mnt); +extern struct dentry *mount_nodev(struct file_system_type *fs_type, + int flags, void *data, + int (*fill_super)(struct super_block *, void *, int)); extern int get_sb_nodev(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int), diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index e7320b5e82fb..3a8f0c9b2933 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -3,8 +3,8 @@ struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir, int mode, dev_t dev); -extern int ramfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt); +extern struct dentry *ramfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data); #ifndef CONFIG_MMU extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize); diff --git a/mm/shmem.c b/mm/shmem.c index f6d350e8adc5..47fdeeb9d636 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2538,16 +2538,16 @@ static const struct vm_operations_struct shmem_vm_ops = { }; -static int shmem_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *shmem_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt); + return mount_nodev(fs_type, flags, data, shmem_fill_super); } static struct file_system_type tmpfs_fs_type = { .owner = THIS_MODULE, .name = "tmpfs", - .get_sb = shmem_get_sb, + .mount = shmem_mount, .kill_sb = kill_litter_super, }; @@ -2643,7 +2643,7 @@ out: static struct file_system_type tmpfs_fs_type = { .name = "tmpfs", - .get_sb = ramfs_get_sb, + .mount = ramfs_mount, .kill_sb = kill_litter_super, }; -- cgit v1.2.3 From 51139adac92f7160ad3ca1cab2de1b4b8d19dc96 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Jul 2010 23:47:46 +0400 Subject: convert get_sb_pseudo() users Signed-off-by: Al Viro --- arch/ia64/kernel/perfmon.c | 9 ++++----- drivers/mtd/mtdchar.c | 10 ++++------ fs/anon_inodes.c | 10 ++++------ fs/block_dev.c | 8 ++++---- fs/libfs.c | 14 ++++++-------- fs/pipe.c | 9 ++++----- include/linux/fs.h | 5 ++--- net/socket.c | 10 ++++------ 8 files changed, 32 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 6b1852f7f972..39e534f5a3b0 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -618,16 +618,15 @@ pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, } -static int -pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry * +pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt); + return mount_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC); } static struct file_system_type pfm_fs_type = { .name = "pfmfs", - .get_sb = pfmfs_get_sb, + .mount = pfmfs_mount, .kill_sb = kill_anon_super, }; diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 5ef45487b65f..a34a0fe14884 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1030,17 +1030,15 @@ static const struct file_operations mtd_fops = { #endif }; -static int mtd_inodefs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *mtd_inodefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "mtd_inode:", NULL, MTD_INODE_FS_MAGIC, - mnt); + return mount_pseudo(fs_type, "mtd_inode:", NULL, MTD_INODE_FS_MAGIC); } static struct file_system_type mtd_inodefs_type = { .name = "mtd_inodefs", - .get_sb = mtd_inodefs_get_sb, + .mount = mtd_inodefs_mount, .kill_sb = kill_anon_super, }; diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 5365527ca43f..57ce55b2564c 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -26,12 +26,10 @@ static struct vfsmount *anon_inode_mnt __read_mostly; static struct inode *anon_inode_inode; static const struct file_operations anon_inode_fops; -static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC, - mnt); + return mount_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC); } /* @@ -45,7 +43,7 @@ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen) static struct file_system_type anon_inode_fs_type = { .name = "anon_inodefs", - .get_sb = anon_inodefs_get_sb, + .mount = anon_inodefs_mount, .kill_sb = kill_anon_super, }; static const struct dentry_operations anon_inodefs_dentry_operations = { diff --git a/fs/block_dev.c b/fs/block_dev.c index dea3b628a6ce..06e8ff12b97c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -464,15 +464,15 @@ static const struct super_operations bdev_sops = { .evict_inode = bdev_evict_inode, }; -static int bd_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *bd_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); + return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576); } static struct file_system_type bd_type = { .name = "bdev", - .get_sb = bd_get_sb, + .mount = bd_mount, .kill_sb = kill_anon_super, }; diff --git a/fs/libfs.c b/fs/libfs.c index 304a5132ca27..a3accdf528ad 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -201,9 +201,8 @@ static const struct super_operations simple_super_operations = { * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that * will never be mountable) */ -int get_sb_pseudo(struct file_system_type *fs_type, char *name, - const struct super_operations *ops, unsigned long magic, - struct vfsmount *mnt) +struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name, + const struct super_operations *ops, unsigned long magic) { struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); struct dentry *dentry; @@ -211,7 +210,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, struct qstr d_name = {.name = name, .len = strlen(name)}; if (IS_ERR(s)) - return PTR_ERR(s); + return ERR_CAST(s); s->s_flags = MS_NOUSER; s->s_maxbytes = MAX_LFS_FILESIZE; @@ -241,12 +240,11 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, d_instantiate(dentry, root); s->s_root = dentry; s->s_flags |= MS_ACTIVE; - simple_set_mnt(mnt, s); - return 0; + return dget(s->s_root); Enomem: deactivate_locked_super(s); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) @@ -951,7 +949,7 @@ EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); EXPORT_SYMBOL(dcache_readdir); EXPORT_SYMBOL(generic_read_dir); -EXPORT_SYMBOL(get_sb_pseudo); +EXPORT_SYMBOL(mount_pseudo); EXPORT_SYMBOL(simple_write_begin); EXPORT_SYMBOL(simple_write_end); EXPORT_SYMBOL(simple_dir_inode_operations); diff --git a/fs/pipe.c b/fs/pipe.c index d2d7566ce68e..a8012a955720 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1247,16 +1247,15 @@ out: * any operations on the root directory. However, we need a non-trivial * d_name - pipe: will go nicely and kill the special-casing in procfs. */ -static int pipefs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *pipefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); + return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); } static struct file_system_type pipe_fs_type = { .name = "pipefs", - .get_sb = pipefs_get_sb, + .mount = pipefs_mount, .kill_sb = kill_anon_super, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4c3a29ddcacb..43e6cfb5cbb3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1824,9 +1824,8 @@ struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), void *data); -extern int get_sb_pseudo(struct file_system_type *, char *, - const struct super_operations *ops, unsigned long, - struct vfsmount *mnt); +extern struct dentry *mount_pseudo(struct file_system_type *, char *, + const struct super_operations *ops, unsigned long); extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); static inline void sb_mark_dirty(struct super_block *sb) diff --git a/net/socket.c b/net/socket.c index ee3cd280c76e..5247ae10f374 100644 --- a/net/socket.c +++ b/net/socket.c @@ -305,19 +305,17 @@ static const struct super_operations sockfs_ops = { .statfs = simple_statfs, }; -static int sockfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry *sockfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, - mnt); + return mount_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); } static struct vfsmount *sock_mnt __read_mostly; static struct file_system_type sock_fs_type = { .name = "sockfs", - .get_sb = sockfs_get_sb, + .mount = sockfs_mount, .kill_sb = kill_anon_super, }; -- cgit v1.2.3 From ceefda6931806972ecf550bd8231dce4a4178953 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 26 Jul 2010 13:16:50 +0400 Subject: switch get_sb_ns() users Signed-off-by: Al Viro --- fs/super.c | 14 ++++++-------- include/linux/fs.h | 5 ++--- ipc/mqueue.c | 8 ++++---- 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/fs/super.c b/fs/super.c index f6a7bf1fff2b..ca696155cd9a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -715,15 +715,14 @@ static int ns_set_super(struct super_block *sb, void *data) return set_anon_super(sb, NULL); } -int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt) +struct dentry *mount_ns(struct file_system_type *fs_type, int flags, + void *data, int (*fill_super)(struct super_block *, void *, int)) { struct super_block *sb; sb = sget(fs_type, ns_test_super, ns_set_super, data); if (IS_ERR(sb)) - return PTR_ERR(sb); + return ERR_CAST(sb); if (!sb->s_root) { int err; @@ -731,17 +730,16 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) { deactivate_locked_super(sb); - return err; + return ERR_PTR(err); } sb->s_flags |= MS_ACTIVE; } - simple_set_mnt(mnt, sb); - return 0; + return dget(sb->s_root); } -EXPORT_SYMBOL(get_sb_ns); +EXPORT_SYMBOL(mount_ns); #ifdef CONFIG_BLOCK static int set_bdev_super(struct super_block *s, void *data) diff --git a/include/linux/fs.h b/include/linux/fs.h index 43e6cfb5cbb3..4d07902bc50c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1789,9 +1789,8 @@ struct file_system_type { struct lock_class_key i_alloc_sem_key; }; -extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt); +extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, + void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 3a61ffefe884..035f4399edbc 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -211,13 +211,13 @@ out: return error; } -static int mqueue_get_sb(struct file_system_type *fs_type, +static struct dentry *mqueue_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, struct vfsmount *mnt) + void *data) { if (!(flags & MS_KERNMOUNT)) data = current->nsproxy->ipc_ns; - return get_sb_ns(fs_type, flags, data, mqueue_fill_super, mnt); + return mount_ns(fs_type, flags, data, mqueue_fill_super); } static void init_once(void *foo) @@ -1232,7 +1232,7 @@ static const struct super_operations mqueue_super_ops = { static struct file_system_type mqueue_fs_type = { .name = "mqueue", - .get_sb = mqueue_get_sb, + .mount = mqueue_mount, .kill_sb = kill_litter_super, }; -- cgit v1.2.3 From 3259f8bed2f0f57c2fdcdac1b510c3fa319ef97e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 29 Oct 2010 11:16:17 -0400 Subject: Add new functions for triggering inode writeback When btrfs is running low on metadata space, it needs to force delayed allocation pages to disk. It currently does this with a suboptimal walk of a private list of inodes with delayed allocation, and it would be much better if we used the generic flusher threads. writeback_inodes_sb_if_idle would be ideal, but it waits for the flusher thread to start IO on all the dirty pages in the FS before it returns. This adds variants of writeback_inodes_sb* that allow the caller to control how many pages get sent down. Signed-off-by: Chris Mason --- fs/fs-writeback.c | 52 ++++++++++++++++++++++++++++++++++++++--------- include/linux/writeback.h | 2 ++ 2 files changed, 44 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ab38fef1c9a1..1e23c33ea5cf 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1069,33 +1069,44 @@ static void wait_sb_inodes(struct super_block *sb) } /** - * writeback_inodes_sb - writeback dirty inodes from given super_block + * writeback_inodes_sb_nr - writeback dirty inodes from given super_block * @sb: the superblock + * @nr: the number of pages to write * * Start writeback on some inodes on this super_block. No guarantees are made * on how many (if any) will be written, and this function does not wait - * for IO completion of submitted IO. The number of pages submitted is - * returned. + * for IO completion of submitted IO. */ -void writeback_inodes_sb(struct super_block *sb) +void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr) { - unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); - unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); DECLARE_COMPLETION_ONSTACK(done); struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_NONE, .done = &done, + .nr_pages = nr, }; WARN_ON(!rwsem_is_locked(&sb->s_umount)); - - work.nr_pages = nr_dirty + nr_unstable + - (inodes_stat.nr_inodes - inodes_stat.nr_unused); - bdi_queue_work(sb->s_bdi, &work); wait_for_completion(&done); } +EXPORT_SYMBOL(writeback_inodes_sb_nr); + +/** + * writeback_inodes_sb - writeback dirty inodes from given super_block + * @sb: the superblock + * + * Start writeback on some inodes on this super_block. No guarantees are made + * on how many (if any) will be written, and this function does not wait + * for IO completion of submitted IO. + */ +void writeback_inodes_sb(struct super_block *sb) +{ + return writeback_inodes_sb_nr(sb, global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS) + + (inodes_stat.nr_inodes - inodes_stat.nr_unused)); +} EXPORT_SYMBOL(writeback_inodes_sb); /** @@ -1117,6 +1128,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb) } EXPORT_SYMBOL(writeback_inodes_sb_if_idle); +/** + * writeback_inodes_sb_if_idle - start writeback if none underway + * @sb: the superblock + * @nr: the number of pages to write + * + * Invoke writeback_inodes_sb if no writeback is currently underway. + * Returns 1 if writeback was started, 0 if not. + */ +int writeback_inodes_sb_nr_if_idle(struct super_block *sb, + unsigned long nr) +{ + if (!writeback_in_progress(sb->s_bdi)) { + down_read(&sb->s_umount); + writeback_inodes_sb_nr(sb, nr); + up_read(&sb->s_umount); + return 1; + } else + return 0; +} +EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle); + /** * sync_inodes_sb - sync sb inode pages * @sb: the superblock diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..a4cf84511e79 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -60,7 +60,9 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); +void writeback_inodes_sb_nr(struct super_block *, unsigned long nr); int writeback_inodes_sb_if_idle(struct super_block *); +int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr); void sync_inodes_sb(struct super_block *); void writeback_inodes_wb(struct bdi_writeback *wb, struct writeback_control *wbc); -- cgit v1.2.3 From 435f49a518c78eec8e2edbbadd912737246cbe20 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 29 Oct 2010 10:36:49 -0700 Subject: readv/writev: do the same MAX_RW_COUNT truncation that read/write does We used to protect against overflow, but rather than return an error, do what read/write does, namely to limit the total size to MAX_RW_COUNT. This is not only more consistent, but it also means that any broken low-level read/write routine that still keeps counts in 'int' can't break. Signed-off-by: Linus Torvalds --- fs/compat.c | 12 +++++------ fs/read_write.c | 62 +++++++++++++++++++++++++++++------------------------- include/linux/fs.h | 1 + 3 files changed, 40 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/fs/compat.c b/fs/compat.c index 52cfeb61da77..ff66c0d7583d 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -606,14 +606,14 @@ ssize_t compat_rw_copy_check_uvector(int type, /* * Single unix specification: * We should -EINVAL if an element length is not >= 0 and fitting an - * ssize_t. The total length is fitting an ssize_t + * ssize_t. * - * Be careful here because iov_len is a size_t not an ssize_t + * In Linux, the total length is limited to MAX_RW_COUNT, there is + * no overflow possibility. */ tot_len = 0; ret = -EINVAL; for (seg = 0; seg < nr_segs; seg++) { - compat_ssize_t tmp = tot_len; compat_uptr_t buf; compat_ssize_t len; @@ -624,13 +624,13 @@ ssize_t compat_rw_copy_check_uvector(int type, } if (len < 0) /* size_t not fitting in compat_ssize_t .. */ goto out; - tot_len += len; - if (tot_len < tmp) /* maths overflow on the compat_ssize_t */ - goto out; if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) { ret = -EFAULT; goto out; } + if (len > MAX_RW_COUNT - tot_len) + len = MAX_RW_COUNT - tot_len; + tot_len += len; iov->iov_base = compat_ptr(buf); iov->iov_len = (compat_size_t) len; uvector++; diff --git a/fs/read_write.c b/fs/read_write.c index 9cd9d148105d..431a0ed610c8 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -243,8 +243,6 @@ bad: * them to something that fits in "int" so that others * won't have to do range checks all the time. */ -#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) - int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) { struct inode *inode; @@ -584,65 +582,71 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, struct iovec **ret_pointer) - { +{ unsigned long seg; - ssize_t ret; + ssize_t ret; struct iovec *iov = fast_pointer; - /* - * SuS says "The readv() function *may* fail if the iovcnt argument - * was less than or equal to 0, or greater than {IOV_MAX}. Linux has - * traditionally returned zero for zero segments, so... - */ + /* + * SuS says "The readv() function *may* fail if the iovcnt argument + * was less than or equal to 0, or greater than {IOV_MAX}. Linux has + * traditionally returned zero for zero segments, so... + */ if (nr_segs == 0) { ret = 0; - goto out; + goto out; } - /* - * First get the "struct iovec" from user memory and - * verify all the pointers - */ + /* + * First get the "struct iovec" from user memory and + * verify all the pointers + */ if (nr_segs > UIO_MAXIOV) { ret = -EINVAL; - goto out; + goto out; } if (nr_segs > fast_segs) { - iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); + iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); if (iov == NULL) { ret = -ENOMEM; - goto out; + goto out; } - } + } if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { ret = -EFAULT; - goto out; + goto out; } - /* + /* * According to the Single Unix Specification we should return EINVAL * if an element length is < 0 when cast to ssize_t or if the * total length would overflow the ssize_t return value of the * system call. - */ + * + * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the + * overflow case. + */ ret = 0; - for (seg = 0; seg < nr_segs; seg++) { - void __user *buf = iov[seg].iov_base; - ssize_t len = (ssize_t)iov[seg].iov_len; + for (seg = 0; seg < nr_segs; seg++) { + void __user *buf = iov[seg].iov_base; + ssize_t len = (ssize_t)iov[seg].iov_len; /* see if we we're about to use an invalid len or if * it's about to overflow ssize_t */ - if (len < 0 || (ret + len < ret)) { + if (len < 0) { ret = -EINVAL; - goto out; + goto out; } if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { ret = -EFAULT; - goto out; + goto out; + } + if (len > MAX_RW_COUNT - ret) { + len = MAX_RW_COUNT - ret; + iov[seg].iov_len = len; } - ret += len; - } + } out: *ret_pointer = iov; return ret; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4d07902bc50c..7b7b507ffa1c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1867,6 +1867,7 @@ extern int current_umask(void); /* /sys/fs */ extern struct kobject *fs_kobj; +#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) extern int rw_verify_area(int, struct file *, loff_t *, size_t); #define FLOCK_VERIFY_READ 1 -- cgit v1.2.3 From d7ba979d45272385ce0fdf141d922e61ff48e07b Mon Sep 17 00:00:00 2001 From: Dongdong Deng Date: Wed, 18 Aug 2010 06:02:00 -0500 Subject: debug_core,x86,blackfin: Clean up hw debug disable API The kgdb_disable_hw_debug() was an architecture specific function for disabling all hardware breakpoints on a per cpu basis when entering the debug core. This patch will remove the weak function kdbg_disable_hw_debug() and change it into a call back which lives with the rest of hw breakpoint call backs in struct kgdb_arch. Signed-off-by: Dongdong Deng Signed-off-by: Jason Wessel --- arch/blackfin/kernel/kgdb.c | 3 ++- arch/x86/kernel/kgdb.c | 3 ++- include/linux/kgdb.h | 13 +++---------- kernel/debug/debug_core.c | 16 +++------------- 4 files changed, 10 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c index 08bc44ea6883..edae461b1c54 100644 --- a/arch/blackfin/kernel/kgdb.c +++ b/arch/blackfin/kernel/kgdb.c @@ -320,7 +320,7 @@ static void bfin_correct_hw_break(void) } } -void kgdb_disable_hw_debug(struct pt_regs *regs) +static void bfin_disable_hw_debug(struct pt_regs *regs) { /* Disable hardware debugging while we are in kgdb */ bfin_write_WPIACTL(0); @@ -406,6 +406,7 @@ struct kgdb_arch arch_kgdb_ops = { #endif .set_hw_breakpoint = bfin_set_hw_break, .remove_hw_breakpoint = bfin_remove_hw_break, + .disable_hw_break = bfin_disable_hw_debug, .remove_all_hw_break = bfin_remove_all_hw_break, .correct_hw_break = bfin_correct_hw_break, }; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index d81cfebb848f..ec592caac4b4 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -387,7 +387,7 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) * disable hardware debugging while it is processing gdb packets or * handling exception. */ -void kgdb_disable_hw_debug(struct pt_regs *regs) +static void kgdb_disable_hw_debug(struct pt_regs *regs) { int i; int cpu = raw_smp_processor_id(); @@ -724,6 +724,7 @@ struct kgdb_arch arch_kgdb_ops = { .flags = KGDB_HW_BREAKPOINT, .set_hw_breakpoint = kgdb_set_hw_break, .remove_hw_breakpoint = kgdb_remove_hw_break, + .disable_hw_break = kgdb_disable_hw_debug, .remove_all_hw_break = kgdb_remove_all_hw_break, .correct_hw_break = kgdb_correct_hw_break, }; diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index cc96f0f23e04..092e4250a458 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -35,16 +35,6 @@ struct pt_regs; */ extern int kgdb_skipexception(int exception, struct pt_regs *regs); -/** - * kgdb_disable_hw_debug - (optional) Disable hardware debugging hook - * @regs: Current &struct pt_regs. - * - * This function will be called if the particular architecture must - * disable hardware debugging while it is processing gdb packets or - * handling exception. - */ -extern void kgdb_disable_hw_debug(struct pt_regs *regs); - struct tasklet_struct; struct task_struct; struct uart_port; @@ -243,6 +233,8 @@ extern void kgdb_arch_late(void); * breakpoint. * @remove_hw_breakpoint: Allow an architecture to specify how to remove a * hardware breakpoint. + * @disable_hw_break: Allow an architecture to specify how to disable + * hardware breakpoints for a single cpu. * @remove_all_hw_break: Allow an architecture to specify how to remove all * hardware breakpoints. * @correct_hw_break: Allow an architecture to specify how to correct the @@ -256,6 +248,7 @@ struct kgdb_arch { int (*remove_breakpoint)(unsigned long, char *); int (*set_hw_breakpoint)(unsigned long, int, enum kgdb_bptype); int (*remove_hw_breakpoint)(unsigned long, int, enum kgdb_bptype); + void (*disable_hw_break)(struct pt_regs *regs); void (*remove_all_hw_break)(void); void (*correct_hw_break)(void); }; diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index fec596da9bd0..cefd4a11f6d9 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -209,18 +209,6 @@ int __weak kgdb_skipexception(int exception, struct pt_regs *regs) return 0; } -/** - * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb. - * @regs: Current &struct pt_regs. - * - * This function will be called if the particular architecture must - * disable hardware debugging while it is processing gdb packets or - * handling exception. - */ -void __weak kgdb_disable_hw_debug(struct pt_regs *regs) -{ -} - /* * Some architectures need cache flushes when we set/clear a * breakpoint: @@ -484,7 +472,9 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, atomic_inc(&masters_in_kgdb); else atomic_inc(&slaves_in_kgdb); - kgdb_disable_hw_debug(ks->linux_regs); + + if (arch_kgdb_ops.disable_hw_break) + arch_kgdb_ops.disable_hw_break(regs); acquirelock: /* -- cgit v1.2.3 From 45f81b1c96d9793e47ce925d257ea693ce0b193e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 29 Oct 2010 12:33:43 -0400 Subject: jump label: Add work around to i386 gcc asm goto bug On i386 (not x86_64) early implementations of gcc would have a bug with asm goto causing it to produce code like the following: (This was noticed by Peter Zijlstra) 56 pushl 0 67 nopl jmp 0x6f popl jmp 0x8c 6f mov test je 0x8c 8c mov call *(%esp) The jump added in the asm goto skipped over the popl that matched the pushl 0, which lead up to a quick crash of the system when the jump was enabled. The nopl is defined in the asm goto () statement and when tracepoints are enabled, the nop changes to a jump to the label that was specified by the asm goto. asm goto is suppose to tell gcc that the code in the asm might jump to an external label. Here gcc obviously fails to make that work. The bug report for gcc is here: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226 The bug only appears on x86 when not compiled with -maccumulate-outgoing-args. This option is always set on x86_64 and it is also the work around for a function graph tracer i386 bug. (See commit: 746357d6a526d6da9d89a2ec645b28406e959c2e) This explains why the bug only showed up on i386 when function graph tracer was not enabled. This patch now adds a CONFIG_JUMP_LABEL option that is default off instead of using jump labels by default. When jump labels are enabled, the -maccumulate-outgoing-args will be used (causing a slightly larger kernel image on i386). This option will exist until we have a way to detect if the gcc compiler in use is safe to use on all configurations without the work around. Note, there exists such a test, but for now we will keep the enabling of jump label as a manual option. Archs that know the compiler is safe with asm goto, may choose to select JUMP_LABEL and enable it by default. Reported-by: Ingo Molnar Cause-discovered-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Jason Baron Cc: H. Peter Anvin Cc: David Daney Cc: Mathieu Desnoyers Cc: Masami Hiramatsu Cc: David Miller Cc: Richard Henderson LKML-Reference: <1288028746.3673.11.camel@laptop> Signed-off-by: Steven Rostedt --- arch/Kconfig | 14 ++++++++++++++ arch/x86/Makefile_32.cpu | 13 ++++++++++++- include/linux/jump_label.h | 2 +- 3 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/Kconfig b/arch/Kconfig index 53d7f619a1b9..8bf0fa652eb6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -42,6 +42,20 @@ config KPROBES for kernel debugging, non-intrusive instrumentation and testing. If in doubt, say "N". +config JUMP_LABEL + bool "Optimize trace point call sites" + depends on HAVE_ARCH_JUMP_LABEL + help + If it is detected that the compiler has support for "asm goto", + the kernel will compile trace point locations with just a + nop instruction. When trace points are enabled, the nop will + be converted to a jump to the trace function. This technique + lowers overhead and stress on the branch prediction of the + processor. + + On i386, options added to the compiler flags may increase + the size of the kernel slightly. + config OPTPROBES def_bool y depends on KPROBES && HAVE_OPTPROBES diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 1255d953c65d..f2ee1abb1df9 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -51,7 +51,18 @@ cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) # prologue (push %ebp, mov %esp, %ebp) which breaks the function graph # tracer assumptions. For i686, generic, core2 this is set by the # compiler anyway -cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args) +ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y) +ADD_ACCUMULATE_OUTGOING_ARGS := y +endif + +# Work around to a bug with asm goto with first implementations of it +# in gcc causing gcc to mess up the push and pop of the stack in some +# uses of asm goto. +ifeq ($(CONFIG_JUMP_LABEL), y) +ADD_ACCUMULATE_OUTGOING_ARGS := y +endif + +cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args) # Bug fix for binutils: this option is required in order to keep # binutils from generating NOPL instructions against our will. diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 1947a1212678..7880f18e4b86 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -1,7 +1,7 @@ #ifndef _LINUX_JUMP_LABEL_H #define _LINUX_JUMP_LABEL_H -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL) +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) # include # define HAVE_JUMP_LABEL #endif -- cgit v1.2.3 From 337ac9d5218cc19f40fca13fa4deb3c658c4241b Mon Sep 17 00:00:00 2001 From: Cyril Chemparathy Date: Fri, 29 Oct 2010 13:50:25 -0700 Subject: phy/marvell: rename 88ec048 to 88e1318s and fix mscr1 addr The marvell 88ec048's official part number is 88e1318s. This patch renames definitions in the driver to reflect this. In addition, a minor bug fix has been added to write back the MSCR1 register value properly. Signed-off-by: Cyril Chemparathy Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 18 +++++++++--------- include/linux/marvell_phy.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index e2afdce0a437..f0bd1a1aba3a 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -74,8 +74,8 @@ #define MII_88E1121_PHY_MSCR_TX_DELAY BIT(4) #define MII_88E1121_PHY_MSCR_DELAY_MASK (~(0x3 << 4)) -#define MII_88EC048_PHY_MSCR1_REG 16 -#define MII_88EC048_PHY_MSCR1_PAD_ODD BIT(6) +#define MII_88E1318S_PHY_MSCR1_REG 16 +#define MII_88E1318S_PHY_MSCR1_PAD_ODD BIT(6) #define MII_88E1121_PHY_LED_CTRL 16 #define MII_88E1121_PHY_LED_PAGE 3 @@ -240,7 +240,7 @@ static int m88e1121_config_aneg(struct phy_device *phydev) return err; } -static int m88ec048_config_aneg(struct phy_device *phydev) +static int m88e1318_config_aneg(struct phy_device *phydev) { int err, oldpage, mscr; @@ -251,10 +251,10 @@ static int m88ec048_config_aneg(struct phy_device *phydev) if (err < 0) return err; - mscr = phy_read(phydev, MII_88EC048_PHY_MSCR1_REG); - mscr |= MII_88EC048_PHY_MSCR1_PAD_ODD; + mscr = phy_read(phydev, MII_88E1318S_PHY_MSCR1_REG); + mscr |= MII_88E1318S_PHY_MSCR1_PAD_ODD; - err = phy_write(phydev, MII_88E1121_PHY_MSCR_REG, mscr); + err = phy_write(phydev, MII_88E1318S_PHY_MSCR1_REG, mscr); if (err < 0) return err; @@ -659,12 +659,12 @@ static struct phy_driver marvell_drivers[] = { .driver = { .owner = THIS_MODULE }, }, { - .phy_id = MARVELL_PHY_ID_88EC048, + .phy_id = MARVELL_PHY_ID_88E1318S, .phy_id_mask = MARVELL_PHY_ID_MASK, - .name = "Marvell 88EC048", + .name = "Marvell 88E1318S", .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_aneg = &m88ec048_config_aneg, + .config_aneg = &m88e1318_config_aneg, .read_status = &marvell_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index d0f08018335d..1ff81b51b656 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -12,7 +12,7 @@ #define MARVELL_PHY_ID_88E1121R 0x01410cb0 #define MARVELL_PHY_ID_88E1145 0x01410cd0 #define MARVELL_PHY_ID_88E1240 0x01410e30 -#define MARVELL_PHY_ID_88EC048 0x01410e90 +#define MARVELL_PHY_ID_88E1318S 0x01410e90 /* struct phy_device dev_flags definitions */ #define MARVELL_PHY_M1145_FLAGS_RESISTANCE 0x00000001 -- cgit v1.2.3 From 4882720b267b7b1d1b0ce08334b205f0329d4615 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Sep 2010 14:34:01 +0000 Subject: semaphore: Remove mutex emulation Semaphores used as mutexes have been deprecated for years. Now that all users are either converted to real semaphores or to mutexes remove the cruft. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Christoph Hellwig LKML-Reference: <20100907125057.562399240@linutronix.de> --- include/linux/semaphore.h | 6 ------ scripts/checkpatch.pl | 8 ++------ 2 files changed, 2 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 5310d27abd2a..39fa04966aa8 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -29,9 +29,6 @@ struct semaphore { #define DEFINE_SEMAPHORE(name) \ struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) -#define DECLARE_MUTEX(name) \ - struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) - static inline void sema_init(struct semaphore *sem, int val) { static struct lock_class_key __key; @@ -39,9 +36,6 @@ static inline void sema_init(struct semaphore *sem, int val) lockdep_init_map(&sem->lock.dep_map, "semaphore->lock", &__key, 0); } -#define init_MUTEX(sem) sema_init(sem, 1) -#define init_MUTEX_LOCKED(sem) sema_init(sem, 0) - extern void down(struct semaphore *sem); extern int __must_check down_interruptible(struct semaphore *sem); extern int __must_check down_killable(struct semaphore *sem); diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 90b54d4697fd..e3c7fc0dca38 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2794,12 +2794,8 @@ sub process { WARN("__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr); } -# check for semaphores used as mutexes - if ($line =~ /^.\s*(DECLARE_MUTEX|init_MUTEX)\s*\(/) { - WARN("mutexes are preferred for single holder semaphores\n" . $herecurr); - } -# check for semaphores used as mutexes - if ($line =~ /^.\s*init_MUTEX_LOCKED\s*\(/) { +# check for semaphores initialized locked + if ($line =~ /^.\s*sema_init.+,\W?0\W?\)/) { WARN("consider using a completion\n" . $herecurr); } -- cgit v1.2.3 From 3c80fe4ac9cfb13b1bfa4edf1544e8b656716694 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 9 Dec 2009 14:19:31 +0000 Subject: audit: Call tty_audit_push_task() outside preempt disabled While auditing all tasklist_lock read_lock sites I stumbled over the following call chain: audit_prepare_user_tty() read_lock(&tasklist_lock); tty_audit_push_task(); mutex_lock(&buf->mutex); --> buf->mutex is locked with preemption disabled. Solve this by acquiring a reference to the task struct under rcu_read_lock and call tty_audit_push_task outside of the preempt disabled region. Move all code which needs to be protected by sighand lock into tty_audit_push_task() and use lock/unlock_sighand as we do not hold tasklist_lock. Signed-off-by: Thomas Gleixner Cc: Al Viro Cc: Eric Paris Cc: Oleg Nesterov Signed-off-by: Al Viro --- drivers/char/tty_audit.c | 38 ++++++++++++++++++++++++++++---------- include/linux/tty.h | 9 +++++---- kernel/audit.c | 25 +++++++++---------------- 3 files changed, 42 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c index 1b8ee590b4ca..f64582b0f623 100644 --- a/drivers/char/tty_audit.c +++ b/drivers/char/tty_audit.c @@ -188,25 +188,43 @@ void tty_audit_tiocsti(struct tty_struct *tty, char ch) } /** - * tty_audit_push_task - Flush task's pending audit data + * tty_audit_push_task - Flush task's pending audit data + * @tsk: task pointer + * @loginuid: sender login uid + * @sessionid: sender session id + * + * Called with a ref on @tsk held. Try to lock sighand and get a + * reference to the tty audit buffer if available. + * Flush the buffer or return an appropriate error code. */ -void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid) +int tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid) { - struct tty_audit_buf *buf; + struct tty_audit_buf *buf = ERR_PTR(-EPERM); + unsigned long flags; - spin_lock_irq(&tsk->sighand->siglock); - buf = tsk->signal->tty_audit_buf; - if (buf) - atomic_inc(&buf->count); - spin_unlock_irq(&tsk->sighand->siglock); - if (!buf) - return; + if (!lock_task_sighand(tsk, &flags)) + return -ESRCH; + + if (tsk->signal->audit_tty) { + buf = tsk->signal->tty_audit_buf; + if (buf) + atomic_inc(&buf->count); + } + unlock_task_sighand(tsk, &flags); + + /* + * Return 0 when signal->audit_tty set + * but tsk->signal->tty_audit_buf == NULL. + */ + if (!buf || IS_ERR(buf)) + return PTR_ERR(buf); mutex_lock(&buf->mutex); tty_audit_buf_push(tsk, loginuid, sessionid, buf); mutex_unlock(&buf->mutex); tty_audit_buf_put(buf); + return 0; } /** diff --git a/include/linux/tty.h b/include/linux/tty.h index e500171c745f..2a754748dd5f 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -541,8 +541,8 @@ extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern void tty_audit_push(struct tty_struct *tty); -extern void tty_audit_push_task(struct task_struct *tsk, - uid_t loginuid, u32 sessionid); +extern int tty_audit_push_task(struct task_struct *tsk, + uid_t loginuid, u32 sessionid); #else static inline void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, size_t size) @@ -560,9 +560,10 @@ static inline void tty_audit_fork(struct signal_struct *sig) static inline void tty_audit_push(struct tty_struct *tty) { } -static inline void tty_audit_push_task(struct task_struct *tsk, - uid_t loginuid, u32 sessionid) +static inline int tty_audit_push_task(struct task_struct *tsk, + uid_t loginuid, u32 sessionid) { + return 0; } #endif diff --git a/kernel/audit.c b/kernel/audit.c index a300931fc45f..8429afea37bf 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -467,23 +467,16 @@ static int audit_prepare_user_tty(pid_t pid, uid_t loginuid, u32 sessionid) struct task_struct *tsk; int err; - read_lock(&tasklist_lock); + rcu_read_lock(); tsk = find_task_by_vpid(pid); - err = -ESRCH; - if (!tsk) - goto out; - err = 0; - - spin_lock_irq(&tsk->sighand->siglock); - if (!tsk->signal->audit_tty) - err = -EPERM; - spin_unlock_irq(&tsk->sighand->siglock); - if (err) - goto out; - - tty_audit_push_task(tsk, loginuid, sessionid); -out: - read_unlock(&tasklist_lock); + if (!tsk) { + rcu_read_unlock(); + return -ESRCH; + } + get_task_struct(tsk); + rcu_read_unlock(); + err = tty_audit_push_task(tsk, loginuid, sessionid); + put_task_struct(tsk); return err; } -- cgit v1.2.3 From af2951325bd6c26cb2c91943c7b11aed53504056 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 11 Nov 2009 10:09:41 -0500 Subject: audit: make link()/linkat() match "attribute change" predicate Signed-off-by: Al Viro --- include/asm-generic/audit_change_attr.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/asm-generic/audit_change_attr.h b/include/asm-generic/audit_change_attr.h index 50764550a60c..bcbab3e4a3be 100644 --- a/include/asm-generic/audit_change_attr.h +++ b/include/asm-generic/audit_change_attr.h @@ -20,3 +20,7 @@ __NR_chown32, __NR_fchown32, __NR_lchown32, #endif +__NR_link, +#ifdef __NR_linkat +__NR_linkat, +#endif -- cgit v1.2.3 From 120a795da07c9a02221ca23464c28a7c6ad7de1d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Oct 2010 02:54:44 -0400 Subject: audit mmap Normal syscall audit doesn't catch 5th argument of syscall. It also doesn't catch the contents of userland structures pointed to be syscall argument, so for both old and new mmap(2) ABI it doesn't record the descriptor we are mapping. For old one it also misses flags. Signed-off-by: Al Viro --- include/linux/audit.h | 9 +++++++++ kernel/auditsc.c | 16 ++++++++++++++++ mm/mmap.c | 2 ++ mm/nommu.c | 2 ++ 4 files changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index e24afabc548f..8b5c0620abf9 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -102,6 +102,7 @@ #define AUDIT_EOE 1320 /* End of multi-record event */ #define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ #define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */ +#define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ @@ -478,6 +479,7 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old); extern void __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old); +extern void __audit_mmap_fd(int fd, int flags); static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -531,6 +533,12 @@ static inline void audit_log_capset(pid_t pid, const struct cred *new, __audit_log_capset(pid, new, old); } +static inline void audit_mmap_fd(int fd, int flags) +{ + if (unlikely(!audit_dummy_context())) + __audit_mmap_fd(fd, flags); +} + extern int audit_n_rules; extern int audit_signals; #else @@ -564,6 +572,7 @@ extern int audit_signals; #define audit_mq_getsetattr(d,s) ((void)0) #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; }) #define audit_log_capset(pid, ncr, ocr) ((void)0) +#define audit_mmap_fd(fd, flags) ((void)0) #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1b31c130d034..f49a0318c2ed 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -241,6 +241,10 @@ struct audit_context { pid_t pid; struct audit_cap_data cap; } capset; + struct { + int fd; + int flags; + } mmap; }; int fds[2]; @@ -1305,6 +1309,10 @@ static void show_special(struct audit_context *context, int *call_panic) audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted); audit_log_cap(ab, "cap_pe", &context->capset.cap.effective); break; } + case AUDIT_MMAP: { + audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd, + context->mmap.flags); + break; } } audit_log_end(ab); } @@ -2476,6 +2484,14 @@ void __audit_log_capset(pid_t pid, context->type = AUDIT_CAPSET; } +void __audit_mmap_fd(int fd, int flags) +{ + struct audit_context *context = current->audit_context; + context->mmap.fd = fd; + context->mmap.flags = flags; + context->type = AUDIT_MMAP; +} + /** * audit_core_dumps - record information about processes that end abnormally * @signr: signal value diff --git a/mm/mmap.c b/mm/mmap.c index 00161a48a451..b179abb1474a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1108,6 +1109,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, unsigned long retval = -EBADF; if (!(flags & MAP_ANONYMOUS)) { + audit_mmap_fd(fd, flags); if (unlikely(flags & MAP_HUGETLB)) return -EINVAL; file = fget(fd); diff --git a/mm/nommu.c b/mm/nommu.c index 30b5c20eec15..3613517c7592 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -1458,6 +1459,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, struct file *file = NULL; unsigned long retval = -EBADF; + audit_mmap_fd(fd, flags); if (!(flags & MAP_ANONYMOUS)) { file = fget(fd); if (!file) -- cgit v1.2.3 From 6bff7eccb0d9bdef4123aad5399e73cbc26683a6 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 29 Oct 2010 12:02:17 +0200 Subject: Ensure FMODE_NONOTIFY is not set by userspace In fsnotify_open() ensure that FMODE_NONOTIFY is never set by userspace. Also always call fsnotify_parent and fsnotify. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index ecb43b33d181..5c185fa27089 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -235,10 +235,11 @@ static inline void fsnotify_open(struct file *file) if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - if (!(file->f_mode & FMODE_NONOTIFY)) { - fsnotify_parent(path, NULL, mask); - fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); - } + /* FMODE_NONOTIFY must never be set from user */ + file->f_mode &= ~FMODE_NONOTIFY; + + fsnotify_parent(path, NULL, mask); + fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } /* -- cgit v1.2.3 From 05fa3135fdc7b9b510b502a35b6b97d2b38c6f48 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 30 Oct 2010 17:31:15 -0400 Subject: locks: fix setlease methods to free passed-in lock We modified setlease to require the caller to allocate the new lease in the case of creating a new lease, but forgot to fix up the filesystem methods. Cc: Steven Whitehouse Cc: Steve French Cc: Trond Myklebust Signed-off-by: J. Bruce Fields Acked-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- fs/cifs/cifsfs.c | 5 ++++- fs/gfs2/file.c | 2 ++ fs/locks.c | 3 ++- fs/nfs/file.c | 3 ++- include/linux/fs.h | 1 + 5 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 75c4eaa79588..54745b6c3db9 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -625,8 +625,11 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) knows that the file won't be changed on the server by anyone else */ return generic_setlease(file, arg, lease); - else + else { + if (arg != F_UNLCK) + locks_free_lock(*lease); return -EAGAIN; + } } struct file_system_type cifs_fs_type = { diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index aa996471ec5c..ac943c1307b5 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -629,6 +629,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) { + if (arg != F_UNLCK) + locks_free_lock(*fl); return -EINVAL; } diff --git a/fs/locks.c b/fs/locks.c index 63fbc41cc573..5b526a977882 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -186,7 +186,7 @@ void locks_release_private(struct file_lock *fl) EXPORT_SYMBOL_GPL(locks_release_private); /* Free a lock which is not in use. */ -static void locks_free_lock(struct file_lock *fl) +void locks_free_lock(struct file_lock *fl) { BUG_ON(waitqueue_active(&fl->fl_wait)); BUG_ON(!list_empty(&fl->fl_block)); @@ -195,6 +195,7 @@ static void locks_free_lock(struct file_lock *fl) locks_release_private(fl); kmem_cache_free(filelock_cache, fl); } +EXPORT_SYMBOL(locks_free_lock); void locks_init_lock(struct file_lock *fl) { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index e756075637b0..1e524fb73ba5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -884,6 +884,7 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) dprintk("NFS: setlease(%s/%s, arg=%ld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, arg); - + if (arg != F_UNLCK) + locks_free_lock(*fl); return -EINVAL; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 7b7b507ffa1c..1eb29399a4ff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1129,6 +1129,7 @@ extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); extern int fcntl_getlease(struct file *filp); /* fs/locks.c */ +void locks_free_lock(struct file_lock *fl); extern void locks_init_lock(struct file_lock *); extern struct file_lock * locks_alloc_lock(void); extern void locks_copy_lock(struct file_lock *, struct file_lock *); -- cgit v1.2.3 From bb8430a2c8fe2b726033017daadf73c69b0348ea Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 31 Oct 2010 08:35:31 -0400 Subject: locks: remove fl_copy_lock lock_manager operation This one was only used for a nasty hack in nfsd, which has recently been removed. Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 2 -- fs/locks.c | 5 +---- include/linux/fs.h | 1 - 3 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8a817f656f0a..a91f30890011 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -322,7 +322,6 @@ fl_release_private: yes yes prototypes: int (*fl_compare_owner)(struct file_lock *, struct file_lock *); void (*fl_notify)(struct file_lock *); /* unblock callback */ - void (*fl_copy_lock)(struct file_lock *, struct file_lock *); void (*fl_release_private)(struct file_lock *); void (*fl_break)(struct file_lock *); /* break_lease callback */ @@ -330,7 +329,6 @@ locking rules: BKL may block fl_compare_owner: yes no fl_notify: yes no -fl_copy_lock: yes no fl_release_private: yes yes fl_break: yes no diff --git a/fs/locks.c b/fs/locks.c index a2ab790471b5..65765cb6afed 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -235,11 +235,8 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl) fl->fl_ops->fl_copy_lock(new, fl); new->fl_ops = fl->fl_ops; } - if (fl->fl_lmops) { - if (fl->fl_lmops->fl_copy_lock) - fl->fl_lmops->fl_copy_lock(new, fl); + if (fl->fl_lmops) new->fl_lmops = fl->fl_lmops; - } } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 1eb29399a4ff..334d68a17108 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1056,7 +1056,6 @@ struct lock_manager_operations { int (*fl_compare_owner)(struct file_lock *, struct file_lock *); void (*fl_notify)(struct file_lock *); /* unblock callback */ int (*fl_grant)(struct file_lock *, struct file_lock *, int); - void (*fl_copy_lock)(struct file_lock *, struct file_lock *); void (*fl_release_private)(struct file_lock *); void (*fl_break)(struct file_lock *); int (*fl_mylease)(struct file_lock *, struct file_lock *); -- cgit v1.2.3 From cbf4bd380a9caa72118525eabe7b82c6a3c8da78 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 31 Oct 2010 21:06:59 +0100 Subject: i2c: Drop unused I2C_CLASS_TV flags There are no users left for I2C_CLASS_TV_ANALOG and I2C_CLASS_TV_DIGITAL, so we can get rid of them. Signed-off-by: Jean Delvare --- drivers/staging/tm6000/tm6000-i2c.c | 1 - include/linux/i2c.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/drivers/staging/tm6000/tm6000-i2c.c b/drivers/staging/tm6000/tm6000-i2c.c index 3e46866dd279..93f625fc852b 100644 --- a/drivers/staging/tm6000/tm6000-i2c.c +++ b/drivers/staging/tm6000/tm6000-i2c.c @@ -320,7 +320,6 @@ static struct i2c_algorithm tm6000_algo = { static struct i2c_adapter tm6000_adap_template = { .owner = THIS_MODULE, - .class = I2C_CLASS_TV_ANALOG | I2C_CLASS_TV_DIGITAL, .name = "tm6000", .algo = &tm6000_algo, }; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 1f66fa06a97c..889b35abaeda 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -407,8 +407,6 @@ void i2c_unlock_adapter(struct i2c_adapter *); /* i2c adapter classes (bitmask) */ #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ -#define I2C_CLASS_TV_ANALOG (1<<1) /* bttv + friends */ -#define I2C_CLASS_TV_DIGITAL (1<<2) /* dvb cards */ #define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ #define I2C_CLASS_SPD (1<<7) /* SPD EEPROMs and similar */ -- cgit v1.2.3 From e30d9859cf08920ae711f57ecd9726804451d29f Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Sun, 31 Oct 2010 21:06:59 +0100 Subject: i2c-i801: Add Intel Patsburg device ID Add support for the Intel Patsburg PCH SMBus Controller. Signed-off-by: Seth Heasley Signed-off-by: Jean Delvare --- Documentation/i2c/busses/i2c-i801 | 3 ++- drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-i801.c | 2 ++ include/linux/pci_ids.h | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801 index e307914a3eda..a417cb13943d 100644 --- a/Documentation/i2c/busses/i2c-i801 +++ b/Documentation/i2c/busses/i2c-i801 @@ -15,8 +15,9 @@ Supported adapters: * Intel 82801I (ICH9) * Intel EP80579 (Tolapai) * Intel 82801JI (ICH10) - * Intel 3400/5 Series (PCH) + * Intel 5/3400 Series (PCH) * Intel Cougar Point (PCH) + * Intel Patsburg (PCH) Datasheets: Publicly available at the Intel website Authors: diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index c950be3cce21..3a6321cb8030 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -99,6 +99,7 @@ config I2C_I801 ICH10 5/3400 Series (PCH) Cougar Point (PCH) + Patsburg (PCH) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 59d65981eed7..1fe30da653c3 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -43,6 +43,7 @@ ICH10 0x3a60 32 hard yes yes yes 5/3400 Series (PCH) 0x3b30 32 hard yes yes yes Cougar Point (PCH) 0x1c22 32 hard yes yes yes + Patsburg (PCH) 0x1d22 32 hard yes yes yes Features supported by this driver: Software PEC no @@ -592,6 +593,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH10_5) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS) }, { 0, } }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 87e2c2e7aed3..c6bcfe93b9ca 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2465,6 +2465,7 @@ #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS 0x1c22 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN 0x1c41 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX 0x1c5f +#define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS 0x1d22 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC 0x1d40 #define PCI_DEVICE_ID_INTEL_82801AA_0 0x2410 #define PCI_DEVICE_ID_INTEL_82801AA_1 0x2411 -- cgit v1.2.3